[PATCH], Patch #3 of 10, Add prefixed addressing support

Bill Schmidt wschmidt@linux.ibm.com
Fri Aug 16 01:59:00 GMT 2019


On 8/14/19 5:06 PM, Michael Meissner wrote:
> This patch adds prefixed memory support to all offsettable instructions.
>
> Unlike previous versions of the patch, this patch combines all of the
> modifications for addressing to one patch.  Previously, I had 3 separate
> patches (one for PADDI, one for scalar types, and one for vector types).
>
> 2019-08-14   Michael Meissner  <meissner@linux.ibm.com>
>
> 	* config/rs6000/predicates.md (add_operand): Add support for the
> 	PADDI instruction.
> 	(non_add_cint_operand): Add support for the PADDI instruction.
> 	(lwa_operand): Add support for the prefixed PLWA instruction.
> 	* config/rs6000/rs6000.c (rs6000_hard_regno_mode_ok_uncached):
> 	Only treat modes < 16 bytes as scalars.
> 	(rs6000_debug_print_mode): Print whether the mode supports
> 	prefixed addressing.
> 	(setup_insn_form): Enable prefixed addressing for all modes whose
> 	default instruction form includes offset addressing.
> 	(num_insns_constant_gpr): Add support for the PADDI instruction.
> 	(quad_address_p): Add support for prefixed addressing.
> 	(mem_operand_gpr): Add support for prefixed addressing.
> 	(mem_operand_ds_form): Add support for prefixed addressing.
> 	(rs6000_legitimate_offset_address_p): Add support for prefixed
> 	addressing.
> 	(rs6000_legitimate_address_p): Add support for prefixed
> 	addressing.
> 	(rs6000_mode_dependent_address): Add support for prefixed
> 	addressing.
> 	(rs6000_rtx_costs): Make PADDI cost the same as ADDI or ADDIS.
> 	* config/rs6000/rs6000.md (add<mode>3): Add support for PADDI.
> 	(movsi_internal1): Add support for prefixed addressing, and using
> 	PADDI to load up large integers.
> 	(movsi splitter): Do not split up a PADDI instruction.
> 	(mov<mode>_64bit_dm): Add support for prefixed addressing.
> 	(movtd_64bit_nodm): Add support for prefixed addressing.
> 	(movdi_internal64): Add support for prefixed addressing, and using
> 	PADDI to load up large integers.
> 	(movdi splitter): Update comment about PADDI.
> 	(stack_protect_setdi): Add support for prefixed addressing.
> 	(stack_protect_testdi): Add support for prefixed addressing.
> 	* config/rs6000/vsx.md (vsx_mov<mode>_64bit): Add support for
> 	prefixed addressing.
> 	(vsx_extract_<P:mode>_<VSX_D:mode>_load): Add support for prefixed
> 	addressing.
> 	(vsx_extract_<P:mode>_<VSX_D:mode>_load): Add support for prefixed
> 	addressing.
>
> Index: gcc/config/rs6000/predicates.md
> ===================================================================
> --- gcc/config/rs6000/predicates.md	(revision 274174)
> +++ gcc/config/rs6000/predicates.md	(working copy)
> @@ -839,7 +839,8 @@
>  (define_predicate "add_operand"
>    (if_then_else (match_code "const_int")
>      (match_test "satisfies_constraint_I (op)
> -		 || satisfies_constraint_L (op)")
> +		 || satisfies_constraint_L (op)
> +		 || satisfies_constraint_eI (op)")
>      (match_operand 0 "gpc_reg_operand")))
>
>  ;; Return 1 if the operand is either a non-special register, or 0, or -1.
> @@ -852,7 +853,8 @@
>  (define_predicate "non_add_cint_operand"
>    (and (match_code "const_int")
>         (match_test "!satisfies_constraint_I (op)
> -		    && !satisfies_constraint_L (op)")))
> +		    && !satisfies_constraint_L (op)
> +		    && !satisfies_constraint_eI (op)")))
>
>  ;; Return 1 if the operand is a constant that can be used as the operand
>  ;; of an AND, OR or XOR.
> @@ -933,6 +935,13 @@
>      return false;
>
>    addr = XEXP (inner, 0);
> +
> +  /* The LWA instruction uses the DS-form format where the bottom two bits of
> +     the offset must be 0.  The prefixed PLWA does not have this
> +     restriction.  */
> +  if (prefixed_local_addr_p (addr, mode, INSN_FORM_DS))
> +    return true;
> +
>    if (GET_CODE (addr) == PRE_INC
>        || GET_CODE (addr) == PRE_DEC
>        || (GET_CODE (addr) == PRE_MODIFY
> Index: gcc/config/rs6000/rs6000.c
> ===================================================================
> --- gcc/config/rs6000/rs6000.c	(revision 274175)
> +++ gcc/config/rs6000/rs6000.c	(working copy)
> @@ -1828,7 +1828,7 @@ rs6000_hard_regno_mode_ok_uncached (int regno, mac
>
>        if (ALTIVEC_REGNO_P (regno))
>  	{
> -	  if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
> +	  if (GET_MODE_SIZE (mode) < 16 && !reg_addr[mode].scalar_in_vmx_p)
>  	    return 0;

Unrelated change?  I don't quite understand why it was changed, either. 
Is this to do with vector_pair support?  If so, maybe it belongs with a
different patch?
>
>  	  return ALTIVEC_REGNO_P (last_regno);
> @@ -2146,6 +2146,11 @@ rs6000_debug_print_mode (ssize_t m)
>            rs6000_debug_insn_form (reg_addr[m].insn_form[RELOAD_REG_FPR]),
>            rs6000_debug_insn_form (reg_addr[m].insn_form[RELOAD_REG_VMX]));
>
> +  if (reg_addr[m].prefixed_memory_p)
> +    fprintf (stderr, "  Prefix");
> +  else
> +    spaces += sizeof ("  Prefix") - 1;
> +
>    if ((reg_addr[m].reload_store != CODE_FOR_nothing)
>        || (reg_addr[m].reload_load != CODE_FOR_nothing))
>      {
> @@ -2838,11 +2843,16 @@ setup_insn_form (void)
>        else
>  	def_rc = RELOAD_REG_GPR;
>
> -      reg_addr[m].default_insn_form = reg_addr[m].insn_form[def_rc];
> +      enum insn_form def_iform = reg_addr[m].insn_form[def_rc];
> +      reg_addr[m].default_insn_form = def_iform;
>
> -      /* Don't enable prefixed memory support until all of the infrastructure
> -	 changes are in.  */
> -      reg_addr[m].prefixed_memory_p = false;
> +      /* Only modes that support offset addressing by default can be
> +	 prefixed.  */
> +      reg_addr[m].prefixed_memory_p = (TARGET_PREFIXED_ADDR
> +				       && (def_iform == INSN_FORM_D
> +					   || def_iform == INSN_FORM_DS
> +					   || def_iform == INSN_FORM_DQ));
> +
>      }
>  }
>
> @@ -5693,7 +5703,7 @@ static int
>  num_insns_constant_gpr (HOST_WIDE_INT value)
>  {
>    /* signed constant loadable with addi */
> -  if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000)
> +  if (SIGNED_16BIT_OFFSET_P (value))
>      return 1;
>
>    /* constant loadable with addis */
> @@ -5701,6 +5711,10 @@ num_insns_constant_gpr (HOST_WIDE_INT value)
>  	   && (value >> 31 == -1 || value >> 31 == 0))
>      return 1;
>
> +  /* PADDI can support up to 34 bit signed integers.  */
> +  else if (TARGET_PREFIXED_ADDR && SIGNED_34BIT_OFFSET_P (value))
> +    return 1;
> +
>    else if (TARGET_POWERPC64)
>      {
>        HOST_WIDE_INT low  = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
> @@ -7411,7 +7425,7 @@ quad_address_p (rtx addr, machine_mode mode, bool
>  {
>    rtx op0, op1;
>
> -  if (GET_MODE_SIZE (mode) != 16)
> +  if (GET_MODE_SIZE (mode) < 16)
>      return false;

Same question about whether this is an unrelated change, perhaps to do
with vector_pair support?
>
>    if (legitimate_indirect_address_p (addr, strict))
> @@ -7420,6 +7434,13 @@ quad_address_p (rtx addr, machine_mode mode, bool
>    if (VECTOR_MODE_P (mode) && !mode_supports_dq_form (mode))
>      return false;
>
> +  /* Is this a valid prefixed address?  If the bottom four bits of the offset
> +     are non-zero, we could use a prefixed instruction (which does not have the
> +     DQ-form constraint that the traditional instruction had) instead of
> +     forcing the unaligned offset to a GPR.  */
> +  if (prefixed_local_addr_p (addr, mode, INSN_FORM_DQ))
> +    return true;
> +
>    if (GET_CODE (addr) != PLUS)
>      return false;
>
> @@ -7521,6 +7542,13 @@ mem_operand_gpr (rtx op, machine_mode mode)
>        && legitimate_indirect_address_p (XEXP (addr, 0), false))
>      return true;
>
> +  /* Allow prefixed instructions if supported.  If the bottom two bits of the
> +     offset are non-zero, we could use a prefixed instruction (which does not
> +     have the DS-form constraint that the traditional instruction had) instead
> +     of forcing the unaligned offset to a GPR.  */
> +  if (prefixed_local_addr_p (addr, mode, INSN_FORM_DS))
> +    return true;
> +
>    /* Don't allow non-offsettable addresses.  See PRs 83969 and 84279.  */
>    if (!rs6000_offsettable_memref_p (op, mode, false))
>      return false;
> @@ -7542,7 +7570,7 @@ mem_operand_gpr (rtx op, machine_mode mode)
>         causes a wrap, so test only the low 16 bits.  */
>      offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
>
> -  return offset + 0x8000 < 0x10000u - extra;
> +  return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
>  }
>
>  /* As above, but for DS-FORM VSX insns.  Unlike mem_operand_gpr,
> @@ -7555,6 +7583,13 @@ mem_operand_ds_form (rtx op, machine_mode mode)
>    int extra;
>    rtx addr = XEXP (op, 0);
>
> +  /* Allow prefixed instructions if supported.  If the bottom two bits of the
> +     offset are non-zero, we could use a prefixed instruction (which does not
> +     have the DS-form constraint that the traditional instruction had) instead
> +     of forcing the unaligned offset to a GPR.  */
> +  if (prefixed_local_addr_p (addr, mode, INSN_FORM_DS))
> +    return true;
> +
>    if (!offsettable_address_p (false, mode, addr))
>      return false;
>
> @@ -7575,7 +7610,7 @@ mem_operand_ds_form (rtx op, machine_mode mode)
>         causes a wrap, so test only the low 16 bits.  */
>      offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
>
> -  return offset + 0x8000 < 0x10000u - extra;
> +  return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
>  }
>  

>  /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p.  */
> @@ -7924,8 +7959,10 @@ rs6000_legitimate_offset_address_p (machine_mode m
>        break;
>      }
>
> -  offset += 0x8000;
> -  return offset < 0x10000 - extra;
> +  if (TARGET_PREFIXED_ADDR)
> +    return SIGNED_34BIT_OFFSET_EXTRA_P (offset, extra);
> +  else
> +    return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
>  }
>
>  bool
> @@ -8822,6 +8859,11 @@ rs6000_legitimate_address_p (machine_mode mode, rt
>        && mode_supports_pre_incdec_p (mode)
>        && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
>      return 1;
> +
> +  /* Handle prefixed addresses (pc-relative or 34-bit offset).  */
> +  if (prefixed_local_addr_p (x, mode, INSN_FORM_UNKNOWN))
> +    return 1;
> +
>    /* Handle restricted vector d-form offsets in ISA 3.0.  */
>    if (quad_offset_p)
>      {
> @@ -8880,7 +8922,10 @@ rs6000_legitimate_address_p (machine_mode mode, rt
>  	  || (!avoiding_indexed_address_p (mode)
>  	      && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
>        && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
> -    return 1;
> +    {
> +      /* There is no prefixed version of the load/store with update.  */
> +      return !prefixed_local_addr_p (XEXP (x, 1), mode, INSN_FORM_UNKNOWN);
> +    }
>    if (reg_offset_p && !quad_offset_p
>        && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
>      return 1;
> @@ -8942,8 +8987,12 @@ rs6000_mode_dependent_address (const_rtx addr)
>  	  && XEXP (addr, 0) != arg_pointer_rtx
>  	  && CONST_INT_P (XEXP (addr, 1)))
>  	{
> -	  unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
> -	  return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12);
> +	  HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
> +	  HOST_WIDE_INT extra = TARGET_POWERPC64 ? 8 : 12;
> +	  if (TARGET_PREFIXED_ADDR)
> +	    return !SIGNED_34BIT_OFFSET_EXTRA_P (val, extra);
> +	  else
> +	    return !SIGNED_16BIT_OFFSET_EXTRA_P (val, extra);
>  	}
>        break;
>
> @@ -20939,7 +20988,8 @@ rs6000_rtx_costs (rtx x, machine_mode mode, int ou
>  	    || outer_code == PLUS
>  	    || outer_code == MINUS)
>  	   && (satisfies_constraint_I (x)
> -	       || satisfies_constraint_L (x)))
> +	       || satisfies_constraint_L (x)
> +	       || satisfies_constraint_eI (x)))
>  	  || (outer_code == AND
>  	      && (satisfies_constraint_K (x)
>  		  || (mode == SImode
> Index: gcc/config/rs6000/rs6000.md
> ===================================================================
> --- gcc/config/rs6000/rs6000.md	(revision 274175)
> +++ gcc/config/rs6000/rs6000.md	(working copy)
> @@ -1768,15 +1768,17 @@
>  })
>
>  (define_insn "*add<mode>3"
> -  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r,r,r")
> -	(plus:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r,b,b")
> -		  (match_operand:GPR 2 "add_operand" "r,I,L")))]
> +  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r,r,r,r")
> +	(plus:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r,b,b,b")
> +		  (match_operand:GPR 2 "add_operand" "r,I,L,eI")))]
>    ""
>    "@
>     add %0,%1,%2
>     addi %0,%1,%2
> -   addis %0,%1,%v2"
> -  [(set_attr "type" "add")])
> +   addis %0,%1,%v2
> +   addi %0,%1,%2"
> +  [(set_attr "type" "add")
> +   (set_attr "isa" "*,*,*,fut")])
>
>  (define_insn "*addsi3_high"
>    [(set (match_operand:SI 0 "gpc_reg_operand" "=b")
> @@ -6916,22 +6918,22 @@
>
>  ;;		MR           LA           LWZ          LFIWZX       LXSIWZX
>  ;;		STW          STFIWX       STXSIWX      LI           LIS
> -;;		#            XXLOR        XXSPLTIB 0   XXSPLTIB -1  VSPLTISW
> -;;		XXLXOR 0     XXLORC -1    P9 const     MTVSRWZ      MFVSRWZ
> -;;		MF%1         MT%0         NOP
> +;;		PLI          #            XXLOR        XXSPLTIB 0   XXSPLTIB -1
> +;;		VSPLTISW     XXLXOR 0     XXLORC -1    P9 const     MTVSRWZ
> +;;		MFVSRWZ      MF%1         MT%0         NOP
>  (define_insn "*movsi_internal1"
>    [(set (match_operand:SI 0 "nonimmediate_operand"
>  		"=r,         r,           r,           d,           v,
>  		 m,          Z,           Z,           r,           r,
> -		 r,          wa,          wa,          wa,          v,
> -		 wa,         v,           v,           wa,          r,
> -		 r,          *h,          *h")
> +		 r,          r,           wa,          wa,          wa,
> +		 v,          wa,          v,           v,           wa,
> +		 r,          r,           *h,          *h")
>  	(match_operand:SI 1 "input_operand"
>  		"r,          U,           m,           Z,           Z,
>  		 r,          d,           v,           I,           L,
> -		 n,          wa,          O,           wM,          wB,
> -		 O,          wM,          wS,          r,           wa,
> -		 *h,         r,           0"))]
> +		 eI,         n,           wa,          O,           wM,
> +		 wB,         O,           wM,          wS,          r,
> +		 wa,         *h,          r,           0"))]
>    "gpc_reg_operand (operands[0], SImode)
>     || gpc_reg_operand (operands[1], SImode)"
>    "@
> @@ -6945,6 +6947,7 @@
>     stxsiwx %x1,%y0
>     li %0,%1
>     lis %0,%v1
> +   li %0,%1
>     #
>     xxlor %x0,%x1,%x1
>     xxspltib %x0,0
> @@ -6961,21 +6964,21 @@
>    [(set_attr "type"
>  		"*,          *,           load,        fpload,      fpload,
>  		 store,      fpstore,     fpstore,     *,           *,
> -		 *,          veclogical,  vecsimple,   vecsimple,   vecsimple,
> -		 veclogical, veclogical,  vecsimple,   mffgpr,      mftgpr,
> -		 *,          *,           *")
> +		 *,          *,           veclogical,  vecsimple,   vecsimple,
> +		 vecsimple,  veclogical,  veclogical,  vecsimple,   mffgpr,
> +		 mftgpr,     *,           *,           *")
>     (set_attr "length"
>  		"*,          *,           *,           *,           *,
>  		 *,          *,           *,           *,           *,
> -		 8,          *,           *,           *,           *,
> -		 *,          *,           8,           *,           *,
> -		 *,          *,           *")
> +		 *,          8,           *,           *,           *,
> +		 *,          *,           *,           8,           *,
> +		 *,          *,           *,           *")
>     (set_attr "isa"
>  		"*,          *,           *,           p8v,         p8v,
>  		 *,          p8v,         p8v,         *,           *,
> -		 *,          p8v,         p9v,         p9v,         p8v,
> -		 p9v,        p8v,         p9v,         p8v,         p8v,
> -		 *,          *,           *")])
> +		 fut,        *,           p8v,         p9v,         p9v,
> +		 p8v,        p9v,         p8v,         p9v,         p8v,
> +		 p8v,        *,           *,           *")])
>
>  ;; Like movsi, but adjust a SF value to be used in a SI context, i.e.
>  ;; (set (reg:SI ...) (subreg:SI (reg:SF ...) 0))
> @@ -7120,14 +7123,15 @@
>    "xscvdpsp %x0,%x1"
>    [(set_attr "type" "fp")])
>
> -;; Split a load of a large constant into the appropriate two-insn
> -;; sequence.
> +;; Split a load of a large constant into the appropriate two-insn sequence.  On
> +;; systems that support PADDI (PLI), we can use PLI to load any 32-bit constant
> +;; in one instruction.
>
>  (define_split
>    [(set (match_operand:SI 0 "gpc_reg_operand")
>  	(match_operand:SI 1 "const_int_operand"))]
>    "(unsigned HOST_WIDE_INT) (INTVAL (operands[1]) + 0x8000) >= 0x10000
> -   && (INTVAL (operands[1]) & 0xffff) != 0"
> +   && (INTVAL (operands[1]) & 0xffff) != 0 && !TARGET_PREFIXED_ADDR"
>    [(set (match_dup 0)
>  	(match_dup 2))
>     (set (match_dup 0)
> @@ -7766,9 +7770,18 @@
>  ;; not swapped like they are for TImode or TFmode.  Subregs therefore are
>  ;; problematical.  Don't allow direct move for this case.
>
> +;;		FPR load    FPR store   FPR move    FPR zero    GPR load
> +;;		GPR store   GPR move    GPR zero    MFVSRD      MTVSRD
> +
>  (define_insn_and_split "*mov<mode>_64bit_dm"
> -  [(set (match_operand:FMOVE128_FPR 0 "nonimmediate_operand" "=m,d,d,d,Y,r,r,r,d")
> -	(match_operand:FMOVE128_FPR 1 "input_operand" "d,m,d,<zero_fp>,r,<zero_fp>Y,r,d,r"))]
> +  [(set (match_operand:FMOVE128_FPR 0 "nonimmediate_operand"
> +		"=m,        d,          d,          d,          Y,
> +		 r,         r,          r,          r,          d")
> +
> +	(match_operand:FMOVE128_FPR 1 "input_operand"
> +		"d,         m,          d,          <zero_fp>,  r,
> +		 <zero_fp>, Y,          r,          d,          r"))]
> +
>    "TARGET_HARD_FLOAT && TARGET_POWERPC64 && FLOAT128_2REG_P (<MODE>mode)
>     && (<MODE>mode != TDmode || WORDS_BIG_ENDIAN)
>     && (gpc_reg_operand (operands[0], <MODE>mode)
> @@ -7776,9 +7789,13 @@
>    "#"
>    "&& reload_completed"
>    [(pc)]
> -{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; }
> -  [(set_attr "length" "8,8,8,8,12,12,8,8,8")
> -   (set_attr "isa" "*,*,*,*,*,*,*,p8v,p8v")])
> +{
> +  rs6000_split_multireg_move (operands[0], operands[1]);
> +  DONE;
> +}
> +  [(set_attr "isa" "*,*,*,*,*,*,*,*,p8v,p8v")
> +   (set_attr "non_prefixed_length" "8")
> +   (set_attr "prefixed_length" "20")])
>
>  (define_insn_and_split "*movtd_64bit_nodm"
>    [(set (match_operand:TD 0 "nonimmediate_operand" "=m,d,d,Y,r,r")
> @@ -7789,8 +7806,12 @@
>    "#"
>    "&& reload_completed"
>    [(pc)]
> -{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; }
> -  [(set_attr "length" "8,8,8,12,12,8")])
> +{
> +  rs6000_split_multireg_move (operands[0], operands[1]);
> +  DONE;
> +}
> +  [(set_attr "non_prefixed_length" "8")
> +   (set_attr "prefixed_length" "20")])
>
>  (define_insn_and_split "*mov<mode>_32bit"
>    [(set (match_operand:FMOVE128_FPR 0 "nonimmediate_operand" "=m,d,d,d,Y,r,r")
> @@ -8800,24 +8821,24 @@
>    [(pc)]
>  { rs6000_split_multireg_move (operands[0], operands[1]); DONE; })
>
> -;;              GPR store  GPR load   GPR move   GPR li     GPR lis     GPR #
> -;;              FPR store  FPR load   FPR move   AVX store  AVX store   AVX load
> -;;              AVX load   VSX move   P9 0       P9 -1      AVX 0/-1    VSX 0
> -;;              VSX -1     P9 const   AVX const  From SPR   To SPR      SPR<->SPR
> -;;              VSX->GPR   GPR->VSX
> +;;              GPR store  GPR load   GPR move   GPR li     GPR lis     GPR pli
> +;;              GPR #      FPR store  FPR load   FPR move   AVX store   AVX store
> +;;              AVX load   AVX load   VSX move   P9 0       P9 -1       AVX 0/-1
> +;;              VSX 0      VSX -1     P9 const   AVX const  From SPR    To SPR
> +;;              SPR<->SPR  VSX->GPR   GPR->VSX
>  (define_insn "*movdi_internal64"
>    [(set (match_operand:DI 0 "nonimmediate_operand"
>                 "=YZ,       r,         r,         r,         r,          r,
> -                m,         ^d,        ^d,        wY,        Z,          $v,
> -                $v,        ^wa,       wa,        wa,        v,          wa,
> -                wa,        v,         v,         r,         *h,         *h,
> -                ?r,        ?wa")
> +                r,         m,         ^d,        ^d,        wY,         Z,
> +                $v,        $v,        ^wa,       wa,        wa,         v,
> +                wa,        wa,        v,         v,         r,          *h,
> +                *h,        ?r,        ?wa")
>  	(match_operand:DI 1 "input_operand"
> -               "r,         YZ,        r,         I,         L,          nF,
> -                ^d,        m,         ^d,        ^v,        $v,         wY,
> -                Z,         ^wa,       Oj,        wM,        OjwM,       Oj,
> -                wM,        wS,        wB,        *h,        r,          0,
> -                wa,        r"))]
> +               "r,         YZ,        r,         I,         L,          eI,
> +                nF,        ^d,        m,         ^d,        ^v,         $v,
> +                wY,        Z,         ^wa,       Oj,        wM,         OjwM,
> +                Oj,        wM,        wS,        wB,        *h,         r,
> +                0,         wa,        r"))]
>    "TARGET_POWERPC64
>     && (gpc_reg_operand (operands[0], DImode)
>         || gpc_reg_operand (operands[1], DImode))"
> @@ -8827,6 +8848,7 @@
>     mr %0,%1
>     li %0,%1
>     lis %0,%v1
> +   li %0,%1
>     #
>     stfd%U0%X0 %1,%0
>     lfd%U1%X1 %0,%1
> @@ -8850,26 +8872,28 @@
>     mtvsrd %x0,%1"
>    [(set_attr "type"
>                 "store,      load,	*,         *,         *,         *,
> -                fpstore,    fpload,     fpsimple,  fpstore,   fpstore,   fpload,
> -                fpload,     veclogical, vecsimple, vecsimple, vecsimple, veclogical,
> -                veclogical, vecsimple,  vecsimple, mfjmpr,    mtjmpr,    *,
> -                mftgpr,    mffgpr")
> +                *,          fpstore,    fpload,    fpsimple,  fpstore,   fpstore,
> +                fpload,     fpload,     veclogical,vecsimple, vecsimple, vecsimple,
> +                veclogical, veclogical, vecsimple,  vecsimple, mfjmpr,   mtjmpr,
> +                *,          mftgpr,    mffgpr")
>     (set_attr "size" "64")
>     (set_attr "length"
> -               "*,         *,         *,         *,         *,          20,
> +               "*,         *,         *,         *,         *,          *,
> +                20,        *,         *,         *,         *,          *,
>                  *,         *,         *,         *,         *,          *,
> -                *,         *,         *,         *,         *,          *,
> -                *,         8,         *,         *,         *,          *,
> -                *,         *")
> +                *,         *,         8,         *,         *,          *,
> +                *,         *,         *")
>     (set_attr "isa"
> -               "*,         *,         *,         *,         *,          *,
> -                *,         *,         *,         p9v,       p7v,        p9v,
> -                p7v,       *,         p9v,       p9v,       p7v,        *,
> -                *,         p7v,       p7v,       *,         *,          *,
> -                p8v,       p8v")])
> +               "*,         *,         *,         *,         *,          fut,
> +                *,         *,         *,         *,         p9v,        p7v,
> +                p9v,       p7v,       *,         p9v,       p9v,        p7v,
> +                *,         *,         p7v,       p7v,       *,          *,
> +                *,         p8v,       p8v")])
>
>  ; Some DImode loads are best done as a load of -1 followed by a mask
> -; instruction.
> +; instruction.  On systems that support the PADDI (PLI) instruction,
> +; num_insns_constant returns 1, so these splitter would not be used for things
> +; that be loaded with PLI.
>  (define_split
>    [(set (match_operand:DI 0 "int_reg_operand_not_pseudo")
>  	(match_operand:DI 1 "const_int_operand"))]
> @@ -8987,7 +9011,8 @@
>    return rs6000_output_move_128bit (operands);
>  }
>    [(set_attr "type" "store,store,load,load,*,*")
> -   (set_attr "length" "8")])
> +   (set_attr "non_prefixed_length" "8,8,8,8,8,40")
> +   (set_attr "prefixed_length" "20,20,20,20,8,40")])
>
>  (define_split
>    [(set (match_operand:TI2 0 "int_reg_operand")
> @@ -11501,15 +11526,43 @@
>    [(set_attr "type" "three")
>     (set_attr "length" "12")])
>
> +;; We can't use the prefixed attribute here because there are two memory
> +;; instructions, and we can't split the insn due to the fact that this
> +;; operation needs to be done in one piece.
>  (define_insn "stack_protect_setdi"
>    [(set (match_operand:DI 0 "memory_operand" "=Y")
>  	(unspec:DI [(match_operand:DI 1 "memory_operand" "Y")] UNSPEC_SP_SET))
>     (set (match_scratch:DI 2 "=&r") (const_int 0))]
>    "TARGET_64BIT"
> -  "ld%U1%X1 %2,%1\;std%U0%X0 %2,%0\;li %2,0"
> +{
> +  if (prefixed_mem_operand (operands[1], DImode))
> +    output_asm_insn ("pld %2,%1", operands);
> +  else
> +    output_asm_insn ("ld%U1%X1 %2,%1", operands);
> +
> +  if (prefixed_mem_operand (operands[0], DImode))
> +    output_asm_insn ("pstd %2,%0", operands);
> +  else
> +    output_asm_insn ("std%U0%X0 %2,%0", operands);
> +
> +  return "li %2,0";
> +}
>    [(set_attr "type" "three")
> -   (set_attr "length" "12")])
>
> +  ;; Back to back prefixed memory instructions take 20 bytes (8 bytes for each
> +  ;; prefixed instruction + 4 bytes for the possible NOP).
> +   (set_attr "prefixed" "no")

Should "prefixed" be conditional?  "no" seems to break rs6000_num_insns
in patch #4.
> +   (set (attr "length")
> +	(cond [(and (match_operand 0 "prefixed_mem_operand")
> +		    (match_operand 1 "prefixed_mem_operand"))
> +	       (const_string "24")
> +
> +	       (ior (match_operand 0 "prefixed_mem_operand")
> +		    (match_operand 1 "prefixed_mem_operand"))
> +	       (const_string "20")]
> +
> +	      (const_string "12")))])
> +
>  (define_expand "stack_protect_test"
>    [(match_operand 0 "memory_operand")
>     (match_operand 1 "memory_operand")
> @@ -11547,6 +11600,9 @@
>     lwz%U1%X1 %3,%1\;lwz%U2%X2 %4,%2\;cmplw %0,%3,%4\;li %3,0\;li %4,0"
>    [(set_attr "length" "16,20")])
>
> +;; We can't use the prefixed attribute here because there are two memory
> +;; instructions, and we can't split the insn due to the fact that this
> +;; operation needs to be done in one piece.
>  (define_insn "stack_protect_testdi"
>    [(set (match_operand:CCEQ 0 "cc_reg_operand" "=x,?y")
>          (unspec:CCEQ [(match_operand:DI 1 "memory_operand" "Y,Y")
> @@ -11555,11 +11611,44 @@
>     (set (match_scratch:DI 4 "=r,r") (const_int 0))
>     (clobber (match_scratch:DI 3 "=&r,&r"))]
>    "TARGET_64BIT"
> -  "@
> -   ld%U1%X1 %3,%1\;ld%U2%X2 %4,%2\;xor. %3,%3,%4\;li %4,0
> -   ld%U1%X1 %3,%1\;ld%U2%X2 %4,%2\;cmpld %0,%3,%4\;li %3,0\;li %4,0"
> -  [(set_attr "length" "16,20")])
> +{
> +  if (prefixed_mem_operand (operands[1], DImode))
> +    output_asm_insn ("pld %3,%1", operands);
> +  else
> +    output_asm_insn ("ld%U1%X1 %3,%1", operands);
>
> +  if (prefixed_mem_operand (operands[2], DImode))
> +    output_asm_insn ("pld %4,%2", operands);
> +  else
> +    output_asm_insn ("ld%U2%X2 %4,%2", operands);
> +
> +  if (which_alternative == 0)
> +    output_asm_insn ("xor. %3,%3,%4", operands);
> +  else
> +    output_asm_insn ("cmpld %0,%3,%4\;li %3,0", operands);
> +
> +  return "li %4,0";
> +}
> +  ;; Back to back prefixed memory instructions take 20 bytes (8 bytes for each
> +  ;; prefixed instruction + 4 bytes for the possible NOP).
> +  [(set (attr "length")
> +	(cond [(and (match_operand 1 "prefixed_mem_operand")
> +		    (match_operand 2 "prefixed_mem_operand"))
> +	       (if_then_else (eq_attr "alternative" "0")
> +			     (const_string "28")
> +			     (const_string "32"))
> +
> +	       (ior (match_operand 1 "prefixed_mem_operand")
> +		    (match_operand 2 "prefixed_mem_operand"))
> +	       (if_then_else (eq_attr "alternative" "0")
> +			     (const_string "20")
> +			     (const_string "24"))]
> +
> +	      (if_then_else (eq_attr "alternative" "0")
> +			    (const_string "16")
> +			    (const_string "20"))))
> +   (set_attr "prefixed" "no")])

Same question about "prefixed" being conditional; again seems to break
patch #4.

Thanks,
Bill
> +
>  

>  ;; Here are the actual compare insns.
>  (define_insn "*cmp<mode>_signed"
> Index: gcc/config/rs6000/vsx.md
> ===================================================================
> --- gcc/config/rs6000/vsx.md	(revision 274173)
> +++ gcc/config/rs6000/vsx.md	(working copy)
> @@ -1149,10 +1149,30 @@
>                 "vecstore,  vecload,   vecsimple, mffgpr,    mftgpr,    load,
>                  store,     load,      store,     *,         vecsimple, vecsimple,
>                  vecsimple, *,         *,         vecstore,  vecload")
> -   (set_attr "length"
> -               "*,         *,         *,         8,         *,         8,
> -                8,         8,         8,         8,         *,         *,
> -                *,         20,        8,         *,         *")
> +   (set (attr "non_prefixed_length")
> +	(cond [(and (eq_attr "alternative" "4")		;; MTVSRDD
> +		    (match_test "TARGET_P9_VECTOR"))
> +	       (const_string "4")
> +
> +	       (eq_attr "alternative" "3,4")		;; GPR <-> VSX
> +	       (const_string "8")
> +
> +	       (eq_attr "alternative" "5,6,7,8")	;; GPR load/store
> +	       (const_string "8")]
> +	      (const_string "*")))
> +
> +   (set (attr "prefixed_length")
> +	(cond [(and (eq_attr "alternative" "4")		;; MTVSRDD
> +		    (match_test "TARGET_P9_VECTOR"))
> +	       (const_string "4")
> +
> +	       (eq_attr "alternative" "3,4")		;; GPR <-> VSX
> +	       (const_string "8")
> +
> +	       (eq_attr "alternative" "5,6,7,8")	;; GPR load/store
> +	       (const_string "20")]
> +	      (const_string "*")))
> +
>     (set_attr "isa"
>                 "<VSisa>,   <VSisa>,   <VSisa>,   *,         *,         *,
>                  *,         *,         *,         *,         p9v,       *,
> @@ -3199,7 +3219,12 @@
>  					   operands[3], <VSX_D:VS_scalar>mode);
>  }
>    [(set_attr "type" "fpload,load")
> -   (set_attr "length" "8")])
> +   (set (attr "prefixed")
> +	(if_then_else (match_operand 1 "prefixed_mem_operand")
> +		      (const_string "yes")
> +		      (const_string "no")))
> +   (set_attr "non_prefixed_length" "8")
> +   (set_attr "prefixed_length" "16")])
>
>  ;; Optimize storing a single scalar element that is the right location to
>  ;; memory
> @@ -3294,6 +3319,8 @@
>  }
>    [(set_attr "type" "fpload,fpload,fpload,load")
>     (set_attr "length" "8")
> +   (set_attr "non_prefixed_length" "8")
> +   (set_attr "prefixed_length" "16")
>     (set_attr "isa" "*,p7v,p9v,*")])
>
>  ;; Variable V4SF extract
>



More information about the Gcc-patches mailing list