This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [AArch64][PR65375] Fix RTX cost for vector SET



On 17/04/15 12:19, Kugan wrote:
Hi James,

Here is an attempt along this line. Is this what you have in mind?
Trying to keep functionality as before so that we can tune the
parameters later. Not fully tested yet.

Hi Kugan,
I'm not doing a full review here, just have a comment inline.

Thanks,
Kyrill


Thanks,
Kugan

cost.txt


diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h
index ae2b547..ed9432e 100644
--- a/gcc/config/aarch64/aarch64-cost-tables.h
+++ b/gcc/config/aarch64/aarch64-cost-tables.h
@@ -121,7 +121,9 @@ const struct cpu_cost_table thunderx_extra_costs =
    },
    /* Vector */
    {
-    COSTS_N_INSNS (1)	/* Alu.  */
+    COSTS_N_INSNS (1),	/* Alu.  */
+    COSTS_N_INSNS (1),	/* Load.  */
+    COSTS_N_INSNS (1)	/* Store.  */
    }

What about the other CPU vector costs?
Also, tune_params would need updating.

  };
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index cba3c1a..c2d4a53 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -5499,16 +5499,6 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
       above this default.  */
    *cost = COSTS_N_INSNS (1);
- /* TODO: The cost infrastructure currently does not handle
-     vector operations.  Assume that all vector operations
-     are equally expensive.  */
-  if (VECTOR_MODE_P (mode))
-    {
-      if (speed)
-	*cost += extra_cost->vect.alu;
-      return true;
-    }
-
    switch (code)
      {
      case SET:
@@ -5523,7 +5513,9 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
  	  if (speed)
  	    {
  	      rtx address = XEXP (op0, 0);
-	      if (GET_MODE_CLASS (mode) == MODE_INT)
+	      if (VECTOR_MODE_P (mode))
+		*cost += extra_cost->vect.store;
+	      else if (GET_MODE_CLASS (mode) == MODE_INT)
  		*cost += extra_cost->ldst.store;
  	      else if (mode == SFmode)
  		*cost += extra_cost->ldst.storef;
@@ -5544,10 +5536,17 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
/* Fall through. */
  	case REG:
+	  if (VECTOR_MODE_P (GET_MODE (op0)) && REG_P (op1))
+	    {
+              /* The cost is 1 per vector-register copied.  */
+              int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
+			      / GET_MODE_SIZE (V4SImode);
+              *cost = COSTS_N_INSNS (n_minus_1 + 1);
+	    }
  	  /* const0_rtx is in general free, but we will use an
  	     instruction to set a register to 0.  */
-          if (REG_P (op1) || op1 == const0_rtx)
-            {
+	  else if (REG_P (op1) || op1 == const0_rtx)
+	    {
                /* The cost is 1 per register copied.  */
                int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
  			      / UNITS_PER_WORD;
@@ -5570,6 +5569,7 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
  	      && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
  		  >= INTVAL (XEXP (op0, 1))))
  	    op1 = XEXP (op1, 0);
+	  gcc_assert (!VECTOR_MODE_P (mode));

We shouldn't assert in rtx costs. If some control flow logic gets buggy,
at worst we'd return a wrong rtx cost and make a suboptimal decision.
This shouldn't ever lead to a crash, IMO.

if (CONST_INT_P (op1))
              {
@@ -5621,8 +5621,10 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
      case CONST_DOUBLE:
        if (speed)
  	{
+	  if (VECTOR_MODE_P (mode))
+	    *cost += extra_cost->vect.alu;
  	  /* mov[df,sf]_aarch64.  */
-	  if (aarch64_float_const_representable_p (x))
+	  else if (aarch64_float_const_representable_p (x))
  	    /* FMOV (scalar immediate).  */
  	    *cost += extra_cost->fp[mode == DFmode].fpconst;
  	  else if (!aarch64_float_const_zero_rtx_p (x))
@@ -5650,7 +5652,9 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
  	     approximation for the additional cost of the addressing
  	     mode.  */
  	  rtx address = XEXP (x, 0);
-	  if (GET_MODE_CLASS (mode) == MODE_INT)
+	  if (VECTOR_MODE_P (mode))
+	    *cost += extra_cost->vect.load;
+	  else if (GET_MODE_CLASS (mode) == MODE_INT)
  	    *cost += extra_cost->ldst.load;
  	  else if (mode == SFmode)
  	    *cost += extra_cost->ldst.loadf;
@@ -5705,7 +5709,12 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
      case CLRSB:
      case CLZ:
        if (speed)
-        *cost += extra_cost->alu.clz;
+	{
+	  if (VECTOR_MODE_P (mode))
+	    *cost += extra_cost->vect.alu;
+	  else
+	    *cost += extra_cost->alu.clz;
+	}
return false; @@ -5790,6 +5799,13 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
            return false;
          }
+ /* VCMP. */
+      if (VECTOR_MODE_P (mode))
+	{
+	  if (speed)
+	    *cost += extra_cost->vect.alu;
+	  return true;
+	}
        return false;
case MINUS:
@@ -5808,8 +5824,13 @@ cost_minus:
  	    *cost += rtx_cost (op0, MINUS, 0, speed);
if (speed)
-	      /* SUB(S) (immediate).  */
-	      *cost += extra_cost->alu.arith;
+	      {
+		if (VECTOR_MODE_P (mode))
+		  *cost += extra_cost->vect.alu;
+		/* SUB(S) (immediate).  */
+		else
+		  *cost += extra_cost->alu.arith;
+	      }
  	    return true;
}
@@ -5818,8 +5839,12 @@ cost_minus:
          if (aarch64_rtx_arith_op_extract_p (op1, mode))
  	  {
  	    if (speed)
-	      *cost += extra_cost->alu.arith_shift;
-
+	      {
+		if (VECTOR_MODE_P (mode))
+		  *cost += extra_cost->vect.alu;
+		else
+		  *cost += extra_cost->alu.arith_shift;
+	      }
  	    *cost += rtx_cost (XEXP (XEXP (op1, 0), 0),
  			       (enum rtx_code) GET_CODE (op1),
  			       0, speed);
@@ -5844,7 +5869,10 @@ cost_minus:
if (speed)
  	  {
-	    if (GET_MODE_CLASS (mode) == MODE_INT)
+	    if (VECTOR_MODE_P (mode))
+	      /* Vector SUB.  */
+	      *cost += extra_cost->vect.alu;
+	    else if (GET_MODE_CLASS (mode) == MODE_INT)
  	      /* SUB(S).  */
  	      *cost += extra_cost->alu.arith;
  	    else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
@@ -5878,8 +5906,13 @@ cost_plus:
  	    *cost += rtx_cost (op0, PLUS, 0, speed);
if (speed)
-	      /* ADD (immediate).  */
-	      *cost += extra_cost->alu.arith;
+	      {
+		if (VECTOR_MODE_P (mode))
+		  *cost += extra_cost->vect.alu;
+		/* ADD (immediate).  */
+		else
+		  *cost += extra_cost->alu.arith;
+	      }
  	    return true;
  	  }
@@ -5887,8 +5920,12 @@ cost_plus:
          if (aarch64_rtx_arith_op_extract_p (op0, mode))
  	  {
  	    if (speed)
-	      *cost += extra_cost->alu.arith_shift;
-
+	      {
+		if (VECTOR_MODE_P (mode))
+		  *cost += extra_cost->vect.alu;
+		else
+		  *cost += extra_cost->alu.arith_shift;
+	      }
  	    *cost += rtx_cost (XEXP (XEXP (op0, 0), 0),
  			       (enum rtx_code) GET_CODE (op0),
  			       0, speed);
@@ -5913,7 +5950,10 @@ cost_plus:
if (speed)
  	  {
-	    if (GET_MODE_CLASS (mode) == MODE_INT)
+	    if (VECTOR_MODE_P (mode))
+	      /* Vector ADD.  */
+	      *cost += extra_cost->vect.alu;
+	    else if (GET_MODE_CLASS (mode) == MODE_INT)
  	      /* ADD.  */
  	      *cost += extra_cost->alu.arith;
  	    else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
@@ -5927,8 +5967,12 @@ cost_plus:
        *cost = COSTS_N_INSNS (1);
if (speed)
-        *cost += extra_cost->alu.rev;
-
+	{
+	  if (VECTOR_MODE_P (mode))
+	    *cost += extra_cost->vect.alu;
+	  else
+	    *cost += extra_cost->alu.rev;
+	}
        return false;
case IOR:
@@ -5936,10 +5980,14 @@ cost_plus:
          {
            *cost = COSTS_N_INSNS (1);
- if (speed)
-            *cost += extra_cost->alu.rev;
-
-          return true;
+	  if (speed)
+	    {
+	      if (VECTOR_MODE_P (mode))
+		*cost += extra_cost->vect.alu;
+	      else
+		*cost += extra_cost->alu.rev;
+	    }
+	  return true;
          }
      /* Fall through.  */
      case XOR:
@@ -5948,6 +5996,13 @@ cost_plus:
        op0 = XEXP (x, 0);
        op1 = XEXP (x, 1);
+ if (VECTOR_MODE_P (mode))
+	{
+	  if (speed)
+	    *cost += extra_cost->vect.alu;
+	  return true;
+	}
+
        if (code == AND
            && GET_CODE (op0) == MULT
            && CONST_INT_P (XEXP (op0, 1))
@@ -6013,10 +6068,15 @@ cost_plus:
        return false;
case NOT:
-      /* MVN.  */
        if (speed)
-	*cost += extra_cost->alu.logical;
-
+	{
+	  /* VNEG.  */
+	  if (VECTOR_MODE_P (mode))
+	    *cost += extra_cost->vect.alu;
+	  /* MVN.  */
+	  else
+	    *cost += extra_cost->alu.logical;
+	}
        /* The logical instruction could have the shifted register form,
           but the cost is the same if the shift is processed as a separate
           instruction, so we don't bother with it here.  */
@@ -6057,13 +6117,18 @@ cost_plus:
/* UXTB/UXTH. */
        if (speed)
-	*cost += extra_cost->alu.extend;
-
+	{
+	  if (VECTOR_MODE_P (mode))
+	    *cost += extra_cost->vect.alu;
+	  else
+	    *cost += extra_cost->alu.extend;
+	}
        return false;
case SIGN_EXTEND:
        if (MEM_P (XEXP (x, 0)))
  	{
+	  gcc_assert (!VECTOR_MODE_P (mode));

Same here.

  	  /* LDRSH.  */
  	  if (speed)
  	    {
@@ -6078,7 +6143,12 @@ cost_plus:
  	}
if (speed)
-	*cost += extra_cost->alu.extend;
+	{
+	  if (VECTOR_MODE_P (mode))
+	    *cost += extra_cost->vect.alu;
+	  else
+	    *cost += extra_cost->alu.extend;
+	}
        return false;
case ASHIFT:
@@ -6087,10 +6157,16 @@ cost_plus:
if (CONST_INT_P (op1))
          {
-	  /* LSL (immediate), UBMF, UBFIZ and friends.  These are all
-	     aliases.  */
  	  if (speed)
-	    *cost += extra_cost->alu.shift;
+	    {
+	      /* VSHL (immediate).  */
+	      if (VECTOR_MODE_P (mode))
+		*cost += extra_cost->vect.alu;
+	      /* LSL (immediate), UBMF, UBFIZ and friends.  These are all
+		 aliases.  */
+	      else
+		*cost += extra_cost->alu.shift;
+	    }
/* We can incorporate zero/sign extend for free. */
            if (GET_CODE (op0) == ZERO_EXTEND
@@ -6102,10 +6178,15 @@ cost_plus:
          }
        else
          {
-	  /* LSLV.  */
  	  if (speed)
-	    *cost += extra_cost->alu.shift_reg;
-
+	    {
+	      /* VSHL (register).  */
+	      if (VECTOR_MODE_P (mode))
+		*cost += extra_cost->vect.alu;
+	      /* LSLV.  */
+	      else
+		*cost += extra_cost->alu.shift_reg;
+	    }
  	  return false;  /* All arguments need to be in registers.  */
          }
@@ -6120,18 +6201,27 @@ cost_plus:
  	{
  	  /* ASR (immediate) and friends.  */
  	  if (speed)
-	    *cost += extra_cost->alu.shift;
+	    {
+	      if (VECTOR_MODE_P (mode))
+		*cost += extra_cost->vect.alu;
+	      else
+		*cost += extra_cost->alu.shift;
+	    }
*cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
  	  return true;
  	}
        else
  	{
-
-	  /* ASR (register) and friends.  */
  	  if (speed)
-	    *cost += extra_cost->alu.shift_reg;
-
+	    {
+	      /* VAHR (register).  */
+	      if (VECTOR_MODE_P (mode))
+		*cost += extra_cost->vect.alu;
+	      /* ASR (register) and friends.  */
+	      else
+		*cost += extra_cost->alu.shift_reg;
+	    }
  	  return false;  /* All arguments need to be in registers.  */
  	}
@@ -6179,7 +6269,12 @@ cost_plus:
      case SIGN_EXTRACT:
        /* UBFX/SBFX.  */
        if (speed)
-	*cost += extra_cost->alu.bfx;
+	{
+	  if (VECTOR_MODE_P (mode))
+	    *cost += extra_cost->vect.alu;
+	  else
+	    *cost += extra_cost->alu.bfx;
+	}
/* We can trust that the immediates used will be correct (there
  	 are no by-register forms), so we need only cost op0.  */
@@ -6196,7 +6291,9 @@ cost_plus:
      case UMOD:
        if (speed)
  	{
-	  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
+	  if (VECTOR_MODE_P (mode))
+	    *cost += extra_cost->vect.alu;
+	  else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
  	    *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
  		      + extra_cost->mult[GET_MODE (x) == DImode].idiv);
  	  else if (GET_MODE (x) == DFmode)
@@ -6213,7 +6310,9 @@ cost_plus:
      case SQRT:
        if (speed)
  	{
-	  if (GET_MODE_CLASS (mode) == MODE_INT)
+	  if (VECTOR_MODE_P (mode))
+	    *cost += extra_cost->vect.alu;
+	  else if (GET_MODE_CLASS (mode) == MODE_INT)
  	    /* There is no integer SQRT, so only DIV and UDIV can get
  	       here.  */
  	    *cost += extra_cost->mult[mode == DImode].idiv;
@@ -6245,7 +6344,12 @@ cost_plus:
        op2 = XEXP (x, 2);
if (speed)
-	*cost += extra_cost->fp[mode == DFmode].fma;
+	{
+	  if (VECTOR_MODE_P (mode))
+	    *cost += extra_cost->vect.alu;
+	  else
+	    *cost += extra_cost->fp[mode == DFmode].fma;
+	}
/* FMSUB, FNMADD, and FNMSUB are free. */
        if (GET_CODE (op0) == NEG)
@@ -6285,7 +6389,13 @@ cost_plus:
case FLOAT_EXTEND:
        if (speed)
-	*cost += extra_cost->fp[mode == DFmode].widen;
+	{
+	  if (VECTOR_MODE_P (mode))
+	    /*Vector convertion.  */
+	    *cost += extra_cost->vect.alu;
+	  else
+	    *cost += extra_cost->fp[mode == DFmode].widen;
+	}
        return false;
case FLOAT_TRUNCATE:
@@ -6311,8 +6421,13 @@ cost_plus:
          }
if (speed)
-        *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
-
+	{
+	  /* FCVT.  */
+	  if (VECTOR_MODE_P (mode))
+	    *cost += extra_cost->vect.alu;
+	  else
+	    *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
+	}
        *cost += rtx_cost (x, (enum rtx_code) code, 0, speed);
        return true;
@@ -6321,7 +6436,12 @@ cost_plus:
  	{
  	  /* FABS and FNEG are analogous.  */
  	  if (speed)
-	    *cost += extra_cost->fp[mode == DFmode].neg;
+	    {
+	      if (VECTOR_MODE_P (mode))
+		*cost += extra_cost->vect.alu;
+	      else
+		*cost += extra_cost->fp[mode == DFmode].neg;
+	    }
  	}
        else
  	{
@@ -6338,10 +6458,13 @@ cost_plus:
      case SMIN:
        if (speed)
  	{
+	  if (VECTOR_MODE_P (mode))
+	    *cost += extra_cost->vect.alu;
  	  /* FMAXNM/FMINNM/FMAX/FMIN.
  	     TODO: This may not be accurate for all implementations, but
  	     we do not model this in the cost tables.  */
-	  *cost += extra_cost->fp[mode == DFmode].addsub;
+	  else
+	    *cost += extra_cost->fp[mode == DFmode].addsub;
  	}
        return false;
diff --git a/gcc/config/arm/aarch-common-protos.h b/gcc/config/arm/aarch-common-protos.h
index 3ee7ebf..c8e1d2e 100644
--- a/gcc/config/arm/aarch-common-protos.h
+++ b/gcc/config/arm/aarch-common-protos.h
@@ -124,6 +124,8 @@ struct fp_cost_table
  struct vector_cost_table
  {
    const int alu;
+  const int load;
+  const int store;
  };
struct cpu_cost_table
diff --git a/gcc/config/arm/aarch-cost-tables.h b/gcc/config/arm/aarch-cost-tables.h
index 05e96a9..257902c 100644
--- a/gcc/config/arm/aarch-cost-tables.h
+++ b/gcc/config/arm/aarch-cost-tables.h
@@ -119,7 +119,9 @@ const struct cpu_cost_table generic_extra_costs =
    },
    /* Vector */
    {
-    COSTS_N_INSNS (1)	/* alu.  */
+    COSTS_N_INSNS (1),	/* alu.  */
+    COSTS_N_INSNS (1),	/* Load.  */
+    COSTS_N_INSNS (1)	/* Store.  */
    }
  };
@@ -220,7 +222,9 @@ const struct cpu_cost_table cortexa53_extra_costs =
    },
    /* Vector */
    {
-    COSTS_N_INSNS (1)	/* alu.  */
+    COSTS_N_INSNS (1),	/* alu.  */
+    COSTS_N_INSNS (1),	/* Load.  */
+    COSTS_N_INSNS (1)	/* Store.  */
    }
  };
@@ -321,7 +325,9 @@ const struct cpu_cost_table cortexa57_extra_costs =
    },
    /* Vector */
    {
-    COSTS_N_INSNS (1)  /* alu.  */
+    COSTS_N_INSNS (1),  /* alu.  */
+    COSTS_N_INSNS (1),  /* Load.  */
+    COSTS_N_INSNS (1)   /* Store.  */
    }
  };
@@ -422,7 +428,9 @@ const struct cpu_cost_table xgene1_extra_costs =
    },
    /* Vector */
    {
-    COSTS_N_INSNS (2)  /* alu.  */
+    COSTS_N_INSNS (2),  /* alu.  */
+    COSTS_N_INSNS (1),  /* Load.  */
+    COSTS_N_INSNS (1),  /* Store.  */
    }
  };


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]