[PATCH], Patch #3 of 10, Add prefixed addressing support
Bill Schmidt
wschmidt@linux.ibm.com
Fri Aug 16 01:59:00 GMT 2019
On 8/14/19 5:06 PM, Michael Meissner wrote:
> This patch adds prefixed memory support to all offsettable instructions.
>
> Unlike previous versions of the patch, this patch combines all of the
> modifications for addressing to one patch. Previously, I had 3 separate
> patches (one for PADDI, one for scalar types, and one for vector types).
>
> 2019-08-14 Michael Meissner <meissner@linux.ibm.com>
>
> * config/rs6000/predicates.md (add_operand): Add support for the
> PADDI instruction.
> (non_add_cint_operand): Add support for the PADDI instruction.
> (lwa_operand): Add support for the prefixed PLWA instruction.
> * config/rs6000/rs6000.c (rs6000_hard_regno_mode_ok_uncached):
> Only treat modes < 16 bytes as scalars.
> (rs6000_debug_print_mode): Print whether the mode supports
> prefixed addressing.
> (setup_insn_form): Enable prefixed addressing for all modes whose
> default instruction form includes offset addressing.
> (num_insns_constant_gpr): Add support for the PADDI instruction.
> (quad_address_p): Add support for prefixed addressing.
> (mem_operand_gpr): Add support for prefixed addressing.
> (mem_operand_ds_form): Add support for prefixed addressing.
> (rs6000_legitimate_offset_address_p): Add support for prefixed
> addressing.
> (rs6000_legitimate_address_p): Add support for prefixed
> addressing.
> (rs6000_mode_dependent_address): Add support for prefixed
> addressing.
> (rs6000_rtx_costs): Make PADDI cost the same as ADDI or ADDIS.
> * config/rs6000/rs6000.md (add<mode>3): Add support for PADDI.
> (movsi_internal1): Add support for prefixed addressing, and using
> PADDI to load up large integers.
> (movsi splitter): Do not split up a PADDI instruction.
> (mov<mode>_64bit_dm): Add support for prefixed addressing.
> (movtd_64bit_nodm): Add support for prefixed addressing.
> (movdi_internal64): Add support for prefixed addressing, and using
> PADDI to load up large integers.
> (movdi splitter): Update comment about PADDI.
> (stack_protect_setdi): Add support for prefixed addressing.
> (stack_protect_testdi): Add support for prefixed addressing.
> * config/rs6000/vsx.md (vsx_mov<mode>_64bit): Add support for
> prefixed addressing.
> (vsx_extract_<P:mode>_<VSX_D:mode>_load): Add support for prefixed
> addressing.
> (vsx_extract_<P:mode>_<VSX_D:mode>_load): Add support for prefixed
> addressing.
>
> Index: gcc/config/rs6000/predicates.md
> ===================================================================
> --- gcc/config/rs6000/predicates.md (revision 274174)
> +++ gcc/config/rs6000/predicates.md (working copy)
> @@ -839,7 +839,8 @@
> (define_predicate "add_operand"
> (if_then_else (match_code "const_int")
> (match_test "satisfies_constraint_I (op)
> - || satisfies_constraint_L (op)")
> + || satisfies_constraint_L (op)
> + || satisfies_constraint_eI (op)")
> (match_operand 0 "gpc_reg_operand")))
>
> ;; Return 1 if the operand is either a non-special register, or 0, or -1.
> @@ -852,7 +853,8 @@
> (define_predicate "non_add_cint_operand"
> (and (match_code "const_int")
> (match_test "!satisfies_constraint_I (op)
> - && !satisfies_constraint_L (op)")))
> + && !satisfies_constraint_L (op)
> + && !satisfies_constraint_eI (op)")))
>
> ;; Return 1 if the operand is a constant that can be used as the operand
> ;; of an AND, OR or XOR.
> @@ -933,6 +935,13 @@
> return false;
>
> addr = XEXP (inner, 0);
> +
> + /* The LWA instruction uses the DS-form format where the bottom two bits of
> + the offset must be 0. The prefixed PLWA does not have this
> + restriction. */
> + if (prefixed_local_addr_p (addr, mode, INSN_FORM_DS))
> + return true;
> +
> if (GET_CODE (addr) == PRE_INC
> || GET_CODE (addr) == PRE_DEC
> || (GET_CODE (addr) == PRE_MODIFY
> Index: gcc/config/rs6000/rs6000.c
> ===================================================================
> --- gcc/config/rs6000/rs6000.c (revision 274175)
> +++ gcc/config/rs6000/rs6000.c (working copy)
> @@ -1828,7 +1828,7 @@ rs6000_hard_regno_mode_ok_uncached (int regno, mac
>
> if (ALTIVEC_REGNO_P (regno))
> {
> - if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
> + if (GET_MODE_SIZE (mode) < 16 && !reg_addr[mode].scalar_in_vmx_p)
> return 0;
Unrelated change? I don't quite understand why it was changed, either.Â
Is this to do with vector_pair support? If so, maybe it belongs with a
different patch?
>
> return ALTIVEC_REGNO_P (last_regno);
> @@ -2146,6 +2146,11 @@ rs6000_debug_print_mode (ssize_t m)
> rs6000_debug_insn_form (reg_addr[m].insn_form[RELOAD_REG_FPR]),
> rs6000_debug_insn_form (reg_addr[m].insn_form[RELOAD_REG_VMX]));
>
> + if (reg_addr[m].prefixed_memory_p)
> + fprintf (stderr, " Prefix");
> + else
> + spaces += sizeof (" Prefix") - 1;
> +
> if ((reg_addr[m].reload_store != CODE_FOR_nothing)
> || (reg_addr[m].reload_load != CODE_FOR_nothing))
> {
> @@ -2838,11 +2843,16 @@ setup_insn_form (void)
> else
> def_rc = RELOAD_REG_GPR;
>
> - reg_addr[m].default_insn_form = reg_addr[m].insn_form[def_rc];
> + enum insn_form def_iform = reg_addr[m].insn_form[def_rc];
> + reg_addr[m].default_insn_form = def_iform;
>
> - /* Don't enable prefixed memory support until all of the infrastructure
> - changes are in. */
> - reg_addr[m].prefixed_memory_p = false;
> + /* Only modes that support offset addressing by default can be
> + prefixed. */
> + reg_addr[m].prefixed_memory_p = (TARGET_PREFIXED_ADDR
> + && (def_iform == INSN_FORM_D
> + || def_iform == INSN_FORM_DS
> + || def_iform == INSN_FORM_DQ));
> +
> }
> }
>
> @@ -5693,7 +5703,7 @@ static int
> num_insns_constant_gpr (HOST_WIDE_INT value)
> {
> /* signed constant loadable with addi */
> - if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000)
> + if (SIGNED_16BIT_OFFSET_P (value))
> return 1;
>
> /* constant loadable with addis */
> @@ -5701,6 +5711,10 @@ num_insns_constant_gpr (HOST_WIDE_INT value)
> && (value >> 31 == -1 || value >> 31 == 0))
> return 1;
>
> + /* PADDI can support up to 34 bit signed integers. */
> + else if (TARGET_PREFIXED_ADDR && SIGNED_34BIT_OFFSET_P (value))
> + return 1;
> +
> else if (TARGET_POWERPC64)
> {
> HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
> @@ -7411,7 +7425,7 @@ quad_address_p (rtx addr, machine_mode mode, bool
> {
> rtx op0, op1;
>
> - if (GET_MODE_SIZE (mode) != 16)
> + if (GET_MODE_SIZE (mode) < 16)
> return false;
Same question about whether this is an unrelated change, perhaps to do
with vector_pair support?
>
> if (legitimate_indirect_address_p (addr, strict))
> @@ -7420,6 +7434,13 @@ quad_address_p (rtx addr, machine_mode mode, bool
> if (VECTOR_MODE_P (mode) && !mode_supports_dq_form (mode))
> return false;
>
> + /* Is this a valid prefixed address? If the bottom four bits of the offset
> + are non-zero, we could use a prefixed instruction (which does not have the
> + DQ-form constraint that the traditional instruction had) instead of
> + forcing the unaligned offset to a GPR. */
> + if (prefixed_local_addr_p (addr, mode, INSN_FORM_DQ))
> + return true;
> +
> if (GET_CODE (addr) != PLUS)
> return false;
>
> @@ -7521,6 +7542,13 @@ mem_operand_gpr (rtx op, machine_mode mode)
> && legitimate_indirect_address_p (XEXP (addr, 0), false))
> return true;
>
> + /* Allow prefixed instructions if supported. If the bottom two bits of the
> + offset are non-zero, we could use a prefixed instruction (which does not
> + have the DS-form constraint that the traditional instruction had) instead
> + of forcing the unaligned offset to a GPR. */
> + if (prefixed_local_addr_p (addr, mode, INSN_FORM_DS))
> + return true;
> +
> /* Don't allow non-offsettable addresses. See PRs 83969 and 84279. */
> if (!rs6000_offsettable_memref_p (op, mode, false))
> return false;
> @@ -7542,7 +7570,7 @@ mem_operand_gpr (rtx op, machine_mode mode)
> causes a wrap, so test only the low 16 bits. */
> offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
>
> - return offset + 0x8000 < 0x10000u - extra;
> + return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
> }
>
> /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
> @@ -7555,6 +7583,13 @@ mem_operand_ds_form (rtx op, machine_mode mode)
> int extra;
> rtx addr = XEXP (op, 0);
>
> + /* Allow prefixed instructions if supported. If the bottom two bits of the
> + offset are non-zero, we could use a prefixed instruction (which does not
> + have the DS-form constraint that the traditional instruction had) instead
> + of forcing the unaligned offset to a GPR. */
> + if (prefixed_local_addr_p (addr, mode, INSN_FORM_DS))
> + return true;
> +
> if (!offsettable_address_p (false, mode, addr))
> return false;
>
> @@ -7575,7 +7610,7 @@ mem_operand_ds_form (rtx op, machine_mode mode)
> causes a wrap, so test only the low 16 bits. */
> offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
>
> - return offset + 0x8000 < 0x10000u - extra;
> + return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
> }
>
> /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
> @@ -7924,8 +7959,10 @@ rs6000_legitimate_offset_address_p (machine_mode m
> break;
> }
>
> - offset += 0x8000;
> - return offset < 0x10000 - extra;
> + if (TARGET_PREFIXED_ADDR)
> + return SIGNED_34BIT_OFFSET_EXTRA_P (offset, extra);
> + else
> + return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
> }
>
> bool
> @@ -8822,6 +8859,11 @@ rs6000_legitimate_address_p (machine_mode mode, rt
> && mode_supports_pre_incdec_p (mode)
> && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
> return 1;
> +
> + /* Handle prefixed addresses (pc-relative or 34-bit offset). */
> + if (prefixed_local_addr_p (x, mode, INSN_FORM_UNKNOWN))
> + return 1;
> +
> /* Handle restricted vector d-form offsets in ISA 3.0. */
> if (quad_offset_p)
> {
> @@ -8880,7 +8922,10 @@ rs6000_legitimate_address_p (machine_mode mode, rt
> || (!avoiding_indexed_address_p (mode)
> && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
> && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
> - return 1;
> + {
> + /* There is no prefixed version of the load/store with update. */
> + return !prefixed_local_addr_p (XEXP (x, 1), mode, INSN_FORM_UNKNOWN);
> + }
> if (reg_offset_p && !quad_offset_p
> && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
> return 1;
> @@ -8942,8 +8987,12 @@ rs6000_mode_dependent_address (const_rtx addr)
> && XEXP (addr, 0) != arg_pointer_rtx
> && CONST_INT_P (XEXP (addr, 1)))
> {
> - unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
> - return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12);
> + HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
> + HOST_WIDE_INT extra = TARGET_POWERPC64 ? 8 : 12;
> + if (TARGET_PREFIXED_ADDR)
> + return !SIGNED_34BIT_OFFSET_EXTRA_P (val, extra);
> + else
> + return !SIGNED_16BIT_OFFSET_EXTRA_P (val, extra);
> }
> break;
>
> @@ -20939,7 +20988,8 @@ rs6000_rtx_costs (rtx x, machine_mode mode, int ou
> || outer_code == PLUS
> || outer_code == MINUS)
> && (satisfies_constraint_I (x)
> - || satisfies_constraint_L (x)))
> + || satisfies_constraint_L (x)
> + || satisfies_constraint_eI (x)))
> || (outer_code == AND
> && (satisfies_constraint_K (x)
> || (mode == SImode
> Index: gcc/config/rs6000/rs6000.md
> ===================================================================
> --- gcc/config/rs6000/rs6000.md (revision 274175)
> +++ gcc/config/rs6000/rs6000.md (working copy)
> @@ -1768,15 +1768,17 @@
> })
>
> (define_insn "*add<mode>3"
> - [(set (match_operand:GPR 0 "gpc_reg_operand" "=r,r,r")
> - (plus:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r,b,b")
> - (match_operand:GPR 2 "add_operand" "r,I,L")))]
> + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r,r,r,r")
> + (plus:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r,b,b,b")
> + (match_operand:GPR 2 "add_operand" "r,I,L,eI")))]
> ""
> "@
> add %0,%1,%2
> addi %0,%1,%2
> - addis %0,%1,%v2"
> - [(set_attr "type" "add")])
> + addis %0,%1,%v2
> + addi %0,%1,%2"
> + [(set_attr "type" "add")
> + (set_attr "isa" "*,*,*,fut")])
>
> (define_insn "*addsi3_high"
> [(set (match_operand:SI 0 "gpc_reg_operand" "=b")
> @@ -6916,22 +6918,22 @@
>
> ;; MR LA LWZ LFIWZX LXSIWZX
> ;; STW STFIWX STXSIWX LI LIS
> -;; # XXLOR XXSPLTIB 0 XXSPLTIB -1 VSPLTISW
> -;; XXLXOR 0 XXLORC -1 P9 const MTVSRWZ MFVSRWZ
> -;; MF%1 MT%0 NOP
> +;; PLI # XXLOR XXSPLTIB 0 XXSPLTIB -1
> +;; VSPLTISW XXLXOR 0 XXLORC -1 P9 const MTVSRWZ
> +;; MFVSRWZ MF%1 MT%0 NOP
> (define_insn "*movsi_internal1"
> [(set (match_operand:SI 0 "nonimmediate_operand"
> "=r, r, r, d, v,
> m, Z, Z, r, r,
> - r, wa, wa, wa, v,
> - wa, v, v, wa, r,
> - r, *h, *h")
> + r, r, wa, wa, wa,
> + v, wa, v, v, wa,
> + r, r, *h, *h")
> (match_operand:SI 1 "input_operand"
> "r, U, m, Z, Z,
> r, d, v, I, L,
> - n, wa, O, wM, wB,
> - O, wM, wS, r, wa,
> - *h, r, 0"))]
> + eI, n, wa, O, wM,
> + wB, O, wM, wS, r,
> + wa, *h, r, 0"))]
> "gpc_reg_operand (operands[0], SImode)
> || gpc_reg_operand (operands[1], SImode)"
> "@
> @@ -6945,6 +6947,7 @@
> stxsiwx %x1,%y0
> li %0,%1
> lis %0,%v1
> + li %0,%1
> #
> xxlor %x0,%x1,%x1
> xxspltib %x0,0
> @@ -6961,21 +6964,21 @@
> [(set_attr "type"
> "*, *, load, fpload, fpload,
> store, fpstore, fpstore, *, *,
> - *, veclogical, vecsimple, vecsimple, vecsimple,
> - veclogical, veclogical, vecsimple, mffgpr, mftgpr,
> - *, *, *")
> + *, *, veclogical, vecsimple, vecsimple,
> + vecsimple, veclogical, veclogical, vecsimple, mffgpr,
> + mftgpr, *, *, *")
> (set_attr "length"
> "*, *, *, *, *,
> *, *, *, *, *,
> - 8, *, *, *, *,
> - *, *, 8, *, *,
> - *, *, *")
> + *, 8, *, *, *,
> + *, *, *, 8, *,
> + *, *, *, *")
> (set_attr "isa"
> "*, *, *, p8v, p8v,
> *, p8v, p8v, *, *,
> - *, p8v, p9v, p9v, p8v,
> - p9v, p8v, p9v, p8v, p8v,
> - *, *, *")])
> + fut, *, p8v, p9v, p9v,
> + p8v, p9v, p8v, p9v, p8v,
> + p8v, *, *, *")])
>
> ;; Like movsi, but adjust a SF value to be used in a SI context, i.e.
> ;; (set (reg:SI ...) (subreg:SI (reg:SF ...) 0))
> @@ -7120,14 +7123,15 @@
> "xscvdpsp %x0,%x1"
> [(set_attr "type" "fp")])
>
> -;; Split a load of a large constant into the appropriate two-insn
> -;; sequence.
> +;; Split a load of a large constant into the appropriate two-insn sequence. On
> +;; systems that support PADDI (PLI), we can use PLI to load any 32-bit constant
> +;; in one instruction.
>
> (define_split
> [(set (match_operand:SI 0 "gpc_reg_operand")
> (match_operand:SI 1 "const_int_operand"))]
> "(unsigned HOST_WIDE_INT) (INTVAL (operands[1]) + 0x8000) >= 0x10000
> - && (INTVAL (operands[1]) & 0xffff) != 0"
> + && (INTVAL (operands[1]) & 0xffff) != 0 && !TARGET_PREFIXED_ADDR"
> [(set (match_dup 0)
> (match_dup 2))
> (set (match_dup 0)
> @@ -7766,9 +7770,18 @@
> ;; not swapped like they are for TImode or TFmode. Subregs therefore are
> ;; problematical. Don't allow direct move for this case.
>
> +;; FPR load FPR store FPR move FPR zero GPR load
> +;; GPR store GPR move GPR zero MFVSRD MTVSRD
> +
> (define_insn_and_split "*mov<mode>_64bit_dm"
> - [(set (match_operand:FMOVE128_FPR 0 "nonimmediate_operand" "=m,d,d,d,Y,r,r,r,d")
> - (match_operand:FMOVE128_FPR 1 "input_operand" "d,m,d,<zero_fp>,r,<zero_fp>Y,r,d,r"))]
> + [(set (match_operand:FMOVE128_FPR 0 "nonimmediate_operand"
> + "=m, d, d, d, Y,
> + r, r, r, r, d")
> +
> + (match_operand:FMOVE128_FPR 1 "input_operand"
> + "d, m, d, <zero_fp>, r,
> + <zero_fp>, Y, r, d, r"))]
> +
> "TARGET_HARD_FLOAT && TARGET_POWERPC64 && FLOAT128_2REG_P (<MODE>mode)
> && (<MODE>mode != TDmode || WORDS_BIG_ENDIAN)
> && (gpc_reg_operand (operands[0], <MODE>mode)
> @@ -7776,9 +7789,13 @@
> "#"
> "&& reload_completed"
> [(pc)]
> -{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; }
> - [(set_attr "length" "8,8,8,8,12,12,8,8,8")
> - (set_attr "isa" "*,*,*,*,*,*,*,p8v,p8v")])
> +{
> + rs6000_split_multireg_move (operands[0], operands[1]);
> + DONE;
> +}
> + [(set_attr "isa" "*,*,*,*,*,*,*,*,p8v,p8v")
> + (set_attr "non_prefixed_length" "8")
> + (set_attr "prefixed_length" "20")])
>
> (define_insn_and_split "*movtd_64bit_nodm"
> [(set (match_operand:TD 0 "nonimmediate_operand" "=m,d,d,Y,r,r")
> @@ -7789,8 +7806,12 @@
> "#"
> "&& reload_completed"
> [(pc)]
> -{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; }
> - [(set_attr "length" "8,8,8,12,12,8")])
> +{
> + rs6000_split_multireg_move (operands[0], operands[1]);
> + DONE;
> +}
> + [(set_attr "non_prefixed_length" "8")
> + (set_attr "prefixed_length" "20")])
>
> (define_insn_and_split "*mov<mode>_32bit"
> [(set (match_operand:FMOVE128_FPR 0 "nonimmediate_operand" "=m,d,d,d,Y,r,r")
> @@ -8800,24 +8821,24 @@
> [(pc)]
> { rs6000_split_multireg_move (operands[0], operands[1]); DONE; })
>
> -;; GPR store GPR load GPR move GPR li GPR lis GPR #
> -;; FPR store FPR load FPR move AVX store AVX store AVX load
> -;; AVX load VSX move P9 0 P9 -1 AVX 0/-1 VSX 0
> -;; VSX -1 P9 const AVX const From SPR To SPR SPR<->SPR
> -;; VSX->GPR GPR->VSX
> +;; GPR store GPR load GPR move GPR li GPR lis GPR pli
> +;; GPR # FPR store FPR load FPR move AVX store AVX store
> +;; AVX load AVX load VSX move P9 0 P9 -1 AVX 0/-1
> +;; VSX 0 VSX -1 P9 const AVX const From SPR To SPR
> +;; SPR<->SPR VSX->GPR GPR->VSX
> (define_insn "*movdi_internal64"
> [(set (match_operand:DI 0 "nonimmediate_operand"
> "=YZ, r, r, r, r, r,
> - m, ^d, ^d, wY, Z, $v,
> - $v, ^wa, wa, wa, v, wa,
> - wa, v, v, r, *h, *h,
> - ?r, ?wa")
> + r, m, ^d, ^d, wY, Z,
> + $v, $v, ^wa, wa, wa, v,
> + wa, wa, v, v, r, *h,
> + *h, ?r, ?wa")
> (match_operand:DI 1 "input_operand"
> - "r, YZ, r, I, L, nF,
> - ^d, m, ^d, ^v, $v, wY,
> - Z, ^wa, Oj, wM, OjwM, Oj,
> - wM, wS, wB, *h, r, 0,
> - wa, r"))]
> + "r, YZ, r, I, L, eI,
> + nF, ^d, m, ^d, ^v, $v,
> + wY, Z, ^wa, Oj, wM, OjwM,
> + Oj, wM, wS, wB, *h, r,
> + 0, wa, r"))]
> "TARGET_POWERPC64
> && (gpc_reg_operand (operands[0], DImode)
> || gpc_reg_operand (operands[1], DImode))"
> @@ -8827,6 +8848,7 @@
> mr %0,%1
> li %0,%1
> lis %0,%v1
> + li %0,%1
> #
> stfd%U0%X0 %1,%0
> lfd%U1%X1 %0,%1
> @@ -8850,26 +8872,28 @@
> mtvsrd %x0,%1"
> [(set_attr "type"
> "store, load, *, *, *, *,
> - fpstore, fpload, fpsimple, fpstore, fpstore, fpload,
> - fpload, veclogical, vecsimple, vecsimple, vecsimple, veclogical,
> - veclogical, vecsimple, vecsimple, mfjmpr, mtjmpr, *,
> - mftgpr, mffgpr")
> + *, fpstore, fpload, fpsimple, fpstore, fpstore,
> + fpload, fpload, veclogical,vecsimple, vecsimple, vecsimple,
> + veclogical, veclogical, vecsimple, vecsimple, mfjmpr, mtjmpr,
> + *, mftgpr, mffgpr")
> (set_attr "size" "64")
> (set_attr "length"
> - "*, *, *, *, *, 20,
> + "*, *, *, *, *, *,
> + 20, *, *, *, *, *,
> *, *, *, *, *, *,
> - *, *, *, *, *, *,
> - *, 8, *, *, *, *,
> - *, *")
> + *, *, 8, *, *, *,
> + *, *, *")
> (set_attr "isa"
> - "*, *, *, *, *, *,
> - *, *, *, p9v, p7v, p9v,
> - p7v, *, p9v, p9v, p7v, *,
> - *, p7v, p7v, *, *, *,
> - p8v, p8v")])
> + "*, *, *, *, *, fut,
> + *, *, *, *, p9v, p7v,
> + p9v, p7v, *, p9v, p9v, p7v,
> + *, *, p7v, p7v, *, *,
> + *, p8v, p8v")])
>
> ; Some DImode loads are best done as a load of -1 followed by a mask
> -; instruction.
> +; instruction. On systems that support the PADDI (PLI) instruction,
> +; num_insns_constant returns 1, so these splitter would not be used for things
> +; that be loaded with PLI.
> (define_split
> [(set (match_operand:DI 0 "int_reg_operand_not_pseudo")
> (match_operand:DI 1 "const_int_operand"))]
> @@ -8987,7 +9011,8 @@
> return rs6000_output_move_128bit (operands);
> }
> [(set_attr "type" "store,store,load,load,*,*")
> - (set_attr "length" "8")])
> + (set_attr "non_prefixed_length" "8,8,8,8,8,40")
> + (set_attr "prefixed_length" "20,20,20,20,8,40")])
>
> (define_split
> [(set (match_operand:TI2 0 "int_reg_operand")
> @@ -11501,15 +11526,43 @@
> [(set_attr "type" "three")
> (set_attr "length" "12")])
>
> +;; We can't use the prefixed attribute here because there are two memory
> +;; instructions, and we can't split the insn due to the fact that this
> +;; operation needs to be done in one piece.
> (define_insn "stack_protect_setdi"
> [(set (match_operand:DI 0 "memory_operand" "=Y")
> (unspec:DI [(match_operand:DI 1 "memory_operand" "Y")] UNSPEC_SP_SET))
> (set (match_scratch:DI 2 "=&r") (const_int 0))]
> "TARGET_64BIT"
> - "ld%U1%X1 %2,%1\;std%U0%X0 %2,%0\;li %2,0"
> +{
> + if (prefixed_mem_operand (operands[1], DImode))
> + output_asm_insn ("pld %2,%1", operands);
> + else
> + output_asm_insn ("ld%U1%X1 %2,%1", operands);
> +
> + if (prefixed_mem_operand (operands[0], DImode))
> + output_asm_insn ("pstd %2,%0", operands);
> + else
> + output_asm_insn ("std%U0%X0 %2,%0", operands);
> +
> + return "li %2,0";
> +}
> [(set_attr "type" "three")
> - (set_attr "length" "12")])
>
> + ;; Back to back prefixed memory instructions take 20 bytes (8 bytes for each
> + ;; prefixed instruction + 4 bytes for the possible NOP).
> + (set_attr "prefixed" "no")
Should "prefixed" be conditional? "no" seems to break rs6000_num_insns
in patch #4.
> + (set (attr "length")
> + (cond [(and (match_operand 0 "prefixed_mem_operand")
> + (match_operand 1 "prefixed_mem_operand"))
> + (const_string "24")
> +
> + (ior (match_operand 0 "prefixed_mem_operand")
> + (match_operand 1 "prefixed_mem_operand"))
> + (const_string "20")]
> +
> + (const_string "12")))])
> +
> (define_expand "stack_protect_test"
> [(match_operand 0 "memory_operand")
> (match_operand 1 "memory_operand")
> @@ -11547,6 +11600,9 @@
> lwz%U1%X1 %3,%1\;lwz%U2%X2 %4,%2\;cmplw %0,%3,%4\;li %3,0\;li %4,0"
> [(set_attr "length" "16,20")])
>
> +;; We can't use the prefixed attribute here because there are two memory
> +;; instructions, and we can't split the insn due to the fact that this
> +;; operation needs to be done in one piece.
> (define_insn "stack_protect_testdi"
> [(set (match_operand:CCEQ 0 "cc_reg_operand" "=x,?y")
> (unspec:CCEQ [(match_operand:DI 1 "memory_operand" "Y,Y")
> @@ -11555,11 +11611,44 @@
> (set (match_scratch:DI 4 "=r,r") (const_int 0))
> (clobber (match_scratch:DI 3 "=&r,&r"))]
> "TARGET_64BIT"
> - "@
> - ld%U1%X1 %3,%1\;ld%U2%X2 %4,%2\;xor. %3,%3,%4\;li %4,0
> - ld%U1%X1 %3,%1\;ld%U2%X2 %4,%2\;cmpld %0,%3,%4\;li %3,0\;li %4,0"
> - [(set_attr "length" "16,20")])
> +{
> + if (prefixed_mem_operand (operands[1], DImode))
> + output_asm_insn ("pld %3,%1", operands);
> + else
> + output_asm_insn ("ld%U1%X1 %3,%1", operands);
>
> + if (prefixed_mem_operand (operands[2], DImode))
> + output_asm_insn ("pld %4,%2", operands);
> + else
> + output_asm_insn ("ld%U2%X2 %4,%2", operands);
> +
> + if (which_alternative == 0)
> + output_asm_insn ("xor. %3,%3,%4", operands);
> + else
> + output_asm_insn ("cmpld %0,%3,%4\;li %3,0", operands);
> +
> + return "li %4,0";
> +}
> + ;; Back to back prefixed memory instructions take 20 bytes (8 bytes for each
> + ;; prefixed instruction + 4 bytes for the possible NOP).
> + [(set (attr "length")
> + (cond [(and (match_operand 1 "prefixed_mem_operand")
> + (match_operand 2 "prefixed_mem_operand"))
> + (if_then_else (eq_attr "alternative" "0")
> + (const_string "28")
> + (const_string "32"))
> +
> + (ior (match_operand 1 "prefixed_mem_operand")
> + (match_operand 2 "prefixed_mem_operand"))
> + (if_then_else (eq_attr "alternative" "0")
> + (const_string "20")
> + (const_string "24"))]
> +
> + (if_then_else (eq_attr "alternative" "0")
> + (const_string "16")
> + (const_string "20"))))
> + (set_attr "prefixed" "no")])
Same question about "prefixed" being conditional; again seems to break
patch #4.
Thanks,
Bill
> +
>
> ;; Here are the actual compare insns.
> (define_insn "*cmp<mode>_signed"
> Index: gcc/config/rs6000/vsx.md
> ===================================================================
> --- gcc/config/rs6000/vsx.md (revision 274173)
> +++ gcc/config/rs6000/vsx.md (working copy)
> @@ -1149,10 +1149,30 @@
> "vecstore, vecload, vecsimple, mffgpr, mftgpr, load,
> store, load, store, *, vecsimple, vecsimple,
> vecsimple, *, *, vecstore, vecload")
> - (set_attr "length"
> - "*, *, *, 8, *, 8,
> - 8, 8, 8, 8, *, *,
> - *, 20, 8, *, *")
> + (set (attr "non_prefixed_length")
> + (cond [(and (eq_attr "alternative" "4") ;; MTVSRDD
> + (match_test "TARGET_P9_VECTOR"))
> + (const_string "4")
> +
> + (eq_attr "alternative" "3,4") ;; GPR <-> VSX
> + (const_string "8")
> +
> + (eq_attr "alternative" "5,6,7,8") ;; GPR load/store
> + (const_string "8")]
> + (const_string "*")))
> +
> + (set (attr "prefixed_length")
> + (cond [(and (eq_attr "alternative" "4") ;; MTVSRDD
> + (match_test "TARGET_P9_VECTOR"))
> + (const_string "4")
> +
> + (eq_attr "alternative" "3,4") ;; GPR <-> VSX
> + (const_string "8")
> +
> + (eq_attr "alternative" "5,6,7,8") ;; GPR load/store
> + (const_string "20")]
> + (const_string "*")))
> +
> (set_attr "isa"
> "<VSisa>, <VSisa>, <VSisa>, *, *, *,
> *, *, *, *, p9v, *,
> @@ -3199,7 +3219,12 @@
> operands[3], <VSX_D:VS_scalar>mode);
> }
> [(set_attr "type" "fpload,load")
> - (set_attr "length" "8")])
> + (set (attr "prefixed")
> + (if_then_else (match_operand 1 "prefixed_mem_operand")
> + (const_string "yes")
> + (const_string "no")))
> + (set_attr "non_prefixed_length" "8")
> + (set_attr "prefixed_length" "16")])
>
> ;; Optimize storing a single scalar element that is the right location to
> ;; memory
> @@ -3294,6 +3319,8 @@
> }
> [(set_attr "type" "fpload,fpload,fpload,load")
> (set_attr "length" "8")
> + (set_attr "non_prefixed_length" "8")
> + (set_attr "prefixed_length" "16")
> (set_attr "isa" "*,p7v,p9v,*")])
>
> ;; Variable V4SF extract
>
More information about the Gcc-patches
mailing list