This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] [AARCH64] Improve vector generation cost model
- From: <apinski at marvell dot com>
- To: <gcc-patches at gcc dot gnu dot org>
- Cc: Andrew Pinski <apinski at marvell dot com>
- Date: Thu, 14 Mar 2019 18:18:34 -0700
- Subject: [PATCH] [AARCH64] Improve vector generation cost model
From: Andrew Pinski <apinski@marvell.com>
Hi,
On OcteonTX2, ld1r and ld1 (with a single lane) are split
into two different micro-ops unlike most other targets.
This adds three extra costs to the cost table:
ld1_dup: used for "ld1r {v0.4s}, [x0]"
merge_dup: used for "dup v0.4s, v0.4s[0]" and "ins v0.4s[0], v0.4s[0]"
ld1_merge: used fir "ld1 {v0.4s}[0], [x0]"
OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions.
Thanks,
Andrew Pinski
ChangeLog:
* config/arm/aarch-common-protos.h (vector_cost_table):
Add merge_dup, ld1_merge, and ld1_dup.
* config/aarch64/aarch64-cost-tables.h (qdf24xx_extra_costs):
Update for the new fields.
(thunderx_extra_costs): Likewise.
(thunderx2t99_extra_costs): Likewise.
(tsv110_extra_costs): Likewise.
* config/arm/aarch-cost-tables.h (generic_extra_costs): Likewise.
(cortexa53_extra_costs): Likewise.
(cortexa57_extra_costs): Likewise.
(exynosm1_extra_costs): Likewise.
(xgene1_extra_costs): Likewise.
* config/aarch64/aarch64.c (aarch64_rtx_costs): Handle vec_dup of a memory.
Hanlde vec_merge of a memory.
Signed-off-by: Andrew Pinski <apinski@marvell.com>
---
gcc/config/aarch64/aarch64-cost-tables.h | 20 +++++++++++++++----
gcc/config/aarch64/aarch64.c | 22 +++++++++++++++++++++
gcc/config/arm/aarch-common-protos.h | 3 +++
gcc/config/arm/aarch-cost-tables.h | 25 +++++++++++++++++++-----
4 files changed, 61 insertions(+), 9 deletions(-)
diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h
index 5c9442e1b89..9a7c70ba595 100644
--- a/gcc/config/aarch64/aarch64-cost-tables.h
+++ b/gcc/config/aarch64/aarch64-cost-tables.h
@@ -123,7 +123,10 @@ const struct cpu_cost_table qdf24xx_extra_costs =
},
/* Vector */
{
- COSTS_N_INSNS (1) /* alu. */
+ COSTS_N_INSNS (1), /* Alu. */
+ COSTS_N_INSNS (1), /* dup_merge. */
+ COSTS_N_INSNS (1), /* ld1_merge. */
+ COSTS_N_INSNS (1) /* ld1_dup. */
}
};
@@ -227,7 +230,10 @@ const struct cpu_cost_table thunderx_extra_costs =
},
/* Vector */
{
- COSTS_N_INSNS (1) /* Alu. */
+ COSTS_N_INSNS (1), /* Alu. */
+ COSTS_N_INSNS (1), /* dup_merge. */
+ COSTS_N_INSNS (1), /* ld1_merge. */
+ COSTS_N_INSNS (1) /* ld1_dup. */
}
};
@@ -330,7 +336,10 @@ const struct cpu_cost_table thunderx2t99_extra_costs =
},
/* Vector */
{
- COSTS_N_INSNS (1) /* Alu. */
+ COSTS_N_INSNS (1), /* Alu. */
+ COSTS_N_INSNS (1), /* dup_merge. */
+ COSTS_N_INSNS (1), /* ld1_merge. */
+ COSTS_N_INSNS (1) /* ld1_dup. */
}
};
@@ -434,7 +443,10 @@ const struct cpu_cost_table tsv110_extra_costs =
},
/* Vector */
{
- COSTS_N_INSNS (1) /* alu. */
+ COSTS_N_INSNS (1), /* Alu. */
+ COSTS_N_INSNS (1), /* dup_merge. */
+ COSTS_N_INSNS (1), /* ld1_merge. */
+ COSTS_N_INSNS (1) /* ld1_dup. */
}
};
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index b38505b0872..dc4d3d39af8 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -10568,6 +10568,28 @@ cost_plus:
}
break;
+ case VEC_DUPLICATE:
+ if (!speed)
+ return false;
+
+ if (GET_CODE (XEXP (x, 0)) == MEM)
+ *cost += extra_cost->vect.ld1_dup;
+ else
+ *cost += extra_cost->vect.merge_dup;
+ return true;
+
+ case VEC_MERGE:
+ if (speed && GET_CODE (XEXP (x, 0)) == VEC_DUPLICATE)
+ {
+ if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MEM)
+ *cost += extra_cost->vect.ld1_merge;
+ else
+ *cost += extra_cost->vect.merge_dup;
+ return true;
+ }
+ break;
+
+
case TRUNCATE:
/* Decompose <su>muldi3_highpart. */
diff --git a/gcc/config/arm/aarch-common-protos.h b/gcc/config/arm/aarch-common-protos.h
index 11cd5145bbc..dbc1282402a 100644
--- a/gcc/config/arm/aarch-common-protos.h
+++ b/gcc/config/arm/aarch-common-protos.h
@@ -131,6 +131,9 @@ struct fp_cost_table
struct vector_cost_table
{
const int alu;
+ const int merge_dup;
+ const int ld1_merge;
+ const int ld1_dup;
};
struct cpu_cost_table
diff --git a/gcc/config/arm/aarch-cost-tables.h b/gcc/config/arm/aarch-cost-tables.h
index bc33efadc6c..a51bc668f56 100644
--- a/gcc/config/arm/aarch-cost-tables.h
+++ b/gcc/config/arm/aarch-cost-tables.h
@@ -121,7 +121,10 @@ const struct cpu_cost_table generic_extra_costs =
},
/* Vector */
{
- COSTS_N_INSNS (1) /* alu. */
+ COSTS_N_INSNS (1), /* alu. */
+ COSTS_N_INSNS (1), /* dup_merge. */
+ COSTS_N_INSNS (1), /* ld1_merge. */
+ COSTS_N_INSNS (1) /* ld1_dup. */
}
};
@@ -224,7 +227,10 @@ const struct cpu_cost_table cortexa53_extra_costs =
},
/* Vector */
{
- COSTS_N_INSNS (1) /* alu. */
+ COSTS_N_INSNS (1), /* alu. */
+ COSTS_N_INSNS (1), /* dup_merge. */
+ COSTS_N_INSNS (1), /* ld1_merge. */
+ COSTS_N_INSNS (1) /* ld1_dup. */
}
};
@@ -327,7 +333,10 @@ const struct cpu_cost_table cortexa57_extra_costs =
},
/* Vector */
{
- COSTS_N_INSNS (1) /* alu. */
+ COSTS_N_INSNS (1), /* alu. */
+ COSTS_N_INSNS (1), /* dup_merge. */
+ COSTS_N_INSNS (1), /* ld1_merge. */
+ COSTS_N_INSNS (1) /* ld1_dup. */
}
};
@@ -430,7 +439,10 @@ const struct cpu_cost_table exynosm1_extra_costs =
},
/* Vector */
{
- COSTS_N_INSNS (0) /* alu. */
+ COSTS_N_INSNS (0), /* alu. */
+ COSTS_N_INSNS (0), /* dup_merge. */
+ COSTS_N_INSNS (0), /* ld1_merge. */
+ COSTS_N_INSNS (0) /* ld1_dup. */
}
};
@@ -533,7 +545,10 @@ const struct cpu_cost_table xgene1_extra_costs =
},
/* Vector */
{
- COSTS_N_INSNS (2) /* alu. */
+ COSTS_N_INSNS (2), /* alu. */
+ COSTS_N_INSNS (2), /* dup_merge. */
+ COSTS_N_INSNS (2), /* ld1_merge. */
+ COSTS_N_INSNS (2) /* ld1_dup. */
}
};
--
2.17.1