This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [AArch64][PR65375] Fix RTX cost for vector SET
- From: Kyrill Tkachov <kyrylo dot tkachov at arm dot com>
- To: Kugan <kugan dot vivekanandarajah at linaro dot org>, James Greenhalgh <James dot Greenhalgh at arm dot com>
- Cc: "gcc-patches at gcc dot gnu dot org" <gcc-patches at gcc dot gnu dot org>, Marcus Shawcroft <Marcus dot Shawcroft at arm dot com>, Richard Earnshaw <Richard dot Earnshaw at arm dot com>, Jim Wilson <jim dot wilson at linaro dot org>
- Date: Fri, 17 Apr 2015 12:25:01 +0100
- Subject: Re: [AArch64][PR65375] Fix RTX cost for vector SET
- Authentication-results: sourceware.org; auth=none
- References: <55066BCC dot 4010900 at linaro dot org> <5506AA24 dot 3050108 at arm dot com> <5506CD7A dot 7030109 at linaro dot org> <5506D77B dot 5060909 at linaro dot org> <55070972 dot 3000800 at arm dot com> <5507813E dot 3060106 at linaro dot org> <5513B390 dot 2030201 at linaro dot org> <552D8FF7 dot 5000105 at linaro dot org> <20150415092509 dot GA20852 at arm dot com> <552E4150 dot 3020403 at linaro dot org> <20150415111854 dot GB22143 at arm dot com> <552E4C90 dot 4070208 at linaro dot org> <5530EC32 dot 4030806 at linaro dot org>
On 17/04/15 12:19, Kugan wrote:
Hi James,
Here is an attempt along this line. Is this what you have in mind?
Trying to keep functionality as before so that we can tune the
parameters later. Not fully tested yet.
Hi Kugan,
I'm not doing a full review here, just have a comment inline.
Thanks,
Kyrill
Thanks,
Kugan
cost.txt
diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h
index ae2b547..ed9432e 100644
--- a/gcc/config/aarch64/aarch64-cost-tables.h
+++ b/gcc/config/aarch64/aarch64-cost-tables.h
@@ -121,7 +121,9 @@ const struct cpu_cost_table thunderx_extra_costs =
},
/* Vector */
{
- COSTS_N_INSNS (1) /* Alu. */
+ COSTS_N_INSNS (1), /* Alu. */
+ COSTS_N_INSNS (1), /* Load. */
+ COSTS_N_INSNS (1) /* Store. */
}
What about the other CPU vector costs?
Also, tune_params would need updating.
};
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index cba3c1a..c2d4a53 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -5499,16 +5499,6 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
above this default. */
*cost = COSTS_N_INSNS (1);
- /* TODO: The cost infrastructure currently does not handle
- vector operations. Assume that all vector operations
- are equally expensive. */
- if (VECTOR_MODE_P (mode))
- {
- if (speed)
- *cost += extra_cost->vect.alu;
- return true;
- }
-
switch (code)
{
case SET:
@@ -5523,7 +5513,9 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
if (speed)
{
rtx address = XEXP (op0, 0);
- if (GET_MODE_CLASS (mode) == MODE_INT)
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.store;
+ else if (GET_MODE_CLASS (mode) == MODE_INT)
*cost += extra_cost->ldst.store;
else if (mode == SFmode)
*cost += extra_cost->ldst.storef;
@@ -5544,10 +5536,17 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
/* Fall through. */
case REG:
+ if (VECTOR_MODE_P (GET_MODE (op0)) && REG_P (op1))
+ {
+ /* The cost is 1 per vector-register copied. */
+ int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
+ / GET_MODE_SIZE (V4SImode);
+ *cost = COSTS_N_INSNS (n_minus_1 + 1);
+ }
/* const0_rtx is in general free, but we will use an
instruction to set a register to 0. */
- if (REG_P (op1) || op1 == const0_rtx)
- {
+ else if (REG_P (op1) || op1 == const0_rtx)
+ {
/* The cost is 1 per register copied. */
int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
/ UNITS_PER_WORD;
@@ -5570,6 +5569,7 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
&& (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
>= INTVAL (XEXP (op0, 1))))
op1 = XEXP (op1, 0);
+ gcc_assert (!VECTOR_MODE_P (mode));
We shouldn't assert in rtx costs. If some control flow logic gets buggy,
at worst we'd return a wrong rtx cost and make a suboptimal decision.
This shouldn't ever lead to a crash, IMO.
if (CONST_INT_P (op1))
{
@@ -5621,8 +5621,10 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
case CONST_DOUBLE:
if (speed)
{
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
/* mov[df,sf]_aarch64. */
- if (aarch64_float_const_representable_p (x))
+ else if (aarch64_float_const_representable_p (x))
/* FMOV (scalar immediate). */
*cost += extra_cost->fp[mode == DFmode].fpconst;
else if (!aarch64_float_const_zero_rtx_p (x))
@@ -5650,7 +5652,9 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
approximation for the additional cost of the addressing
mode. */
rtx address = XEXP (x, 0);
- if (GET_MODE_CLASS (mode) == MODE_INT)
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.load;
+ else if (GET_MODE_CLASS (mode) == MODE_INT)
*cost += extra_cost->ldst.load;
else if (mode == SFmode)
*cost += extra_cost->ldst.loadf;
@@ -5705,7 +5709,12 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
case CLRSB:
case CLZ:
if (speed)
- *cost += extra_cost->alu.clz;
+ {
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ else
+ *cost += extra_cost->alu.clz;
+ }
return false;
@@ -5790,6 +5799,13 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
return false;
}
+ /* VCMP. */
+ if (VECTOR_MODE_P (mode))
+ {
+ if (speed)
+ *cost += extra_cost->vect.alu;
+ return true;
+ }
return false;
case MINUS:
@@ -5808,8 +5824,13 @@ cost_minus:
*cost += rtx_cost (op0, MINUS, 0, speed);
if (speed)
- /* SUB(S) (immediate). */
- *cost += extra_cost->alu.arith;
+ {
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ /* SUB(S) (immediate). */
+ else
+ *cost += extra_cost->alu.arith;
+ }
return true;
}
@@ -5818,8 +5839,12 @@ cost_minus:
if (aarch64_rtx_arith_op_extract_p (op1, mode))
{
if (speed)
- *cost += extra_cost->alu.arith_shift;
-
+ {
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ else
+ *cost += extra_cost->alu.arith_shift;
+ }
*cost += rtx_cost (XEXP (XEXP (op1, 0), 0),
(enum rtx_code) GET_CODE (op1),
0, speed);
@@ -5844,7 +5869,10 @@ cost_minus:
if (speed)
{
- if (GET_MODE_CLASS (mode) == MODE_INT)
+ if (VECTOR_MODE_P (mode))
+ /* Vector SUB. */
+ *cost += extra_cost->vect.alu;
+ else if (GET_MODE_CLASS (mode) == MODE_INT)
/* SUB(S). */
*cost += extra_cost->alu.arith;
else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
@@ -5878,8 +5906,13 @@ cost_plus:
*cost += rtx_cost (op0, PLUS, 0, speed);
if (speed)
- /* ADD (immediate). */
- *cost += extra_cost->alu.arith;
+ {
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ /* ADD (immediate). */
+ else
+ *cost += extra_cost->alu.arith;
+ }
return true;
}
@@ -5887,8 +5920,12 @@ cost_plus:
if (aarch64_rtx_arith_op_extract_p (op0, mode))
{
if (speed)
- *cost += extra_cost->alu.arith_shift;
-
+ {
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ else
+ *cost += extra_cost->alu.arith_shift;
+ }
*cost += rtx_cost (XEXP (XEXP (op0, 0), 0),
(enum rtx_code) GET_CODE (op0),
0, speed);
@@ -5913,7 +5950,10 @@ cost_plus:
if (speed)
{
- if (GET_MODE_CLASS (mode) == MODE_INT)
+ if (VECTOR_MODE_P (mode))
+ /* Vector ADD. */
+ *cost += extra_cost->vect.alu;
+ else if (GET_MODE_CLASS (mode) == MODE_INT)
/* ADD. */
*cost += extra_cost->alu.arith;
else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
@@ -5927,8 +5967,12 @@ cost_plus:
*cost = COSTS_N_INSNS (1);
if (speed)
- *cost += extra_cost->alu.rev;
-
+ {
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ else
+ *cost += extra_cost->alu.rev;
+ }
return false;
case IOR:
@@ -5936,10 +5980,14 @@ cost_plus:
{
*cost = COSTS_N_INSNS (1);
- if (speed)
- *cost += extra_cost->alu.rev;
-
- return true;
+ if (speed)
+ {
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ else
+ *cost += extra_cost->alu.rev;
+ }
+ return true;
}
/* Fall through. */
case XOR:
@@ -5948,6 +5996,13 @@ cost_plus:
op0 = XEXP (x, 0);
op1 = XEXP (x, 1);
+ if (VECTOR_MODE_P (mode))
+ {
+ if (speed)
+ *cost += extra_cost->vect.alu;
+ return true;
+ }
+
if (code == AND
&& GET_CODE (op0) == MULT
&& CONST_INT_P (XEXP (op0, 1))
@@ -6013,10 +6068,15 @@ cost_plus:
return false;
case NOT:
- /* MVN. */
if (speed)
- *cost += extra_cost->alu.logical;
-
+ {
+ /* VNEG. */
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ /* MVN. */
+ else
+ *cost += extra_cost->alu.logical;
+ }
/* The logical instruction could have the shifted register form,
but the cost is the same if the shift is processed as a separate
instruction, so we don't bother with it here. */
@@ -6057,13 +6117,18 @@ cost_plus:
/* UXTB/UXTH. */
if (speed)
- *cost += extra_cost->alu.extend;
-
+ {
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ else
+ *cost += extra_cost->alu.extend;
+ }
return false;
case SIGN_EXTEND:
if (MEM_P (XEXP (x, 0)))
{
+ gcc_assert (!VECTOR_MODE_P (mode));
Same here.
/* LDRSH. */
if (speed)
{
@@ -6078,7 +6143,12 @@ cost_plus:
}
if (speed)
- *cost += extra_cost->alu.extend;
+ {
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ else
+ *cost += extra_cost->alu.extend;
+ }
return false;
case ASHIFT:
@@ -6087,10 +6157,16 @@ cost_plus:
if (CONST_INT_P (op1))
{
- /* LSL (immediate), UBMF, UBFIZ and friends. These are all
- aliases. */
if (speed)
- *cost += extra_cost->alu.shift;
+ {
+ /* VSHL (immediate). */
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ /* LSL (immediate), UBMF, UBFIZ and friends. These are all
+ aliases. */
+ else
+ *cost += extra_cost->alu.shift;
+ }
/* We can incorporate zero/sign extend for free. */
if (GET_CODE (op0) == ZERO_EXTEND
@@ -6102,10 +6178,15 @@ cost_plus:
}
else
{
- /* LSLV. */
if (speed)
- *cost += extra_cost->alu.shift_reg;
-
+ {
+ /* VSHL (register). */
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ /* LSLV. */
+ else
+ *cost += extra_cost->alu.shift_reg;
+ }
return false; /* All arguments need to be in registers. */
}
@@ -6120,18 +6201,27 @@ cost_plus:
{
/* ASR (immediate) and friends. */
if (speed)
- *cost += extra_cost->alu.shift;
+ {
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ else
+ *cost += extra_cost->alu.shift;
+ }
*cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
return true;
}
else
{
-
- /* ASR (register) and friends. */
if (speed)
- *cost += extra_cost->alu.shift_reg;
-
+ {
+ /* VAHR (register). */
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ /* ASR (register) and friends. */
+ else
+ *cost += extra_cost->alu.shift_reg;
+ }
return false; /* All arguments need to be in registers. */
}
@@ -6179,7 +6269,12 @@ cost_plus:
case SIGN_EXTRACT:
/* UBFX/SBFX. */
if (speed)
- *cost += extra_cost->alu.bfx;
+ {
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ else
+ *cost += extra_cost->alu.bfx;
+ }
/* We can trust that the immediates used will be correct (there
are no by-register forms), so we need only cost op0. */
@@ -6196,7 +6291,9 @@ cost_plus:
case UMOD:
if (speed)
{
- if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
*cost += (extra_cost->mult[GET_MODE (x) == DImode].add
+ extra_cost->mult[GET_MODE (x) == DImode].idiv);
else if (GET_MODE (x) == DFmode)
@@ -6213,7 +6310,9 @@ cost_plus:
case SQRT:
if (speed)
{
- if (GET_MODE_CLASS (mode) == MODE_INT)
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ else if (GET_MODE_CLASS (mode) == MODE_INT)
/* There is no integer SQRT, so only DIV and UDIV can get
here. */
*cost += extra_cost->mult[mode == DImode].idiv;
@@ -6245,7 +6344,12 @@ cost_plus:
op2 = XEXP (x, 2);
if (speed)
- *cost += extra_cost->fp[mode == DFmode].fma;
+ {
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ else
+ *cost += extra_cost->fp[mode == DFmode].fma;
+ }
/* FMSUB, FNMADD, and FNMSUB are free. */
if (GET_CODE (op0) == NEG)
@@ -6285,7 +6389,13 @@ cost_plus:
case FLOAT_EXTEND:
if (speed)
- *cost += extra_cost->fp[mode == DFmode].widen;
+ {
+ if (VECTOR_MODE_P (mode))
+ /*Vector convertion. */
+ *cost += extra_cost->vect.alu;
+ else
+ *cost += extra_cost->fp[mode == DFmode].widen;
+ }
return false;
case FLOAT_TRUNCATE:
@@ -6311,8 +6421,13 @@ cost_plus:
}
if (speed)
- *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
-
+ {
+ /* FCVT. */
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ else
+ *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
+ }
*cost += rtx_cost (x, (enum rtx_code) code, 0, speed);
return true;
@@ -6321,7 +6436,12 @@ cost_plus:
{
/* FABS and FNEG are analogous. */
if (speed)
- *cost += extra_cost->fp[mode == DFmode].neg;
+ {
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
+ else
+ *cost += extra_cost->fp[mode == DFmode].neg;
+ }
}
else
{
@@ -6338,10 +6458,13 @@ cost_plus:
case SMIN:
if (speed)
{
+ if (VECTOR_MODE_P (mode))
+ *cost += extra_cost->vect.alu;
/* FMAXNM/FMINNM/FMAX/FMIN.
TODO: This may not be accurate for all implementations, but
we do not model this in the cost tables. */
- *cost += extra_cost->fp[mode == DFmode].addsub;
+ else
+ *cost += extra_cost->fp[mode == DFmode].addsub;
}
return false;
diff --git a/gcc/config/arm/aarch-common-protos.h b/gcc/config/arm/aarch-common-protos.h
index 3ee7ebf..c8e1d2e 100644
--- a/gcc/config/arm/aarch-common-protos.h
+++ b/gcc/config/arm/aarch-common-protos.h
@@ -124,6 +124,8 @@ struct fp_cost_table
struct vector_cost_table
{
const int alu;
+ const int load;
+ const int store;
};
struct cpu_cost_table
diff --git a/gcc/config/arm/aarch-cost-tables.h b/gcc/config/arm/aarch-cost-tables.h
index 05e96a9..257902c 100644
--- a/gcc/config/arm/aarch-cost-tables.h
+++ b/gcc/config/arm/aarch-cost-tables.h
@@ -119,7 +119,9 @@ const struct cpu_cost_table generic_extra_costs =
},
/* Vector */
{
- COSTS_N_INSNS (1) /* alu. */
+ COSTS_N_INSNS (1), /* alu. */
+ COSTS_N_INSNS (1), /* Load. */
+ COSTS_N_INSNS (1) /* Store. */
}
};
@@ -220,7 +222,9 @@ const struct cpu_cost_table cortexa53_extra_costs =
},
/* Vector */
{
- COSTS_N_INSNS (1) /* alu. */
+ COSTS_N_INSNS (1), /* alu. */
+ COSTS_N_INSNS (1), /* Load. */
+ COSTS_N_INSNS (1) /* Store. */
}
};
@@ -321,7 +325,9 @@ const struct cpu_cost_table cortexa57_extra_costs =
},
/* Vector */
{
- COSTS_N_INSNS (1) /* alu. */
+ COSTS_N_INSNS (1), /* alu. */
+ COSTS_N_INSNS (1), /* Load. */
+ COSTS_N_INSNS (1) /* Store. */
}
};
@@ -422,7 +428,9 @@ const struct cpu_cost_table xgene1_extra_costs =
},
/* Vector */
{
- COSTS_N_INSNS (2) /* alu. */
+ COSTS_N_INSNS (2), /* alu. */
+ COSTS_N_INSNS (1), /* Load. */
+ COSTS_N_INSNS (1), /* Store. */
}
};