This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] i386: move alignment defaults to processor_costs.
- From: Martin Liška <mliska at suse dot cz>
- To: gcc-patches at gcc dot gnu dot org
- Cc: Jan Hubicka <jh at suse dot cz>
- Date: Wed, 12 Sep 2018 14:44:55 +0200
- Subject: [PATCH] i386: move alignment defaults to processor_costs.
Hi.
Honza asked me to do the follow-up. It moves definition of alignments
related to a CPU into *_cost scructures. Advantage of it is that there's
no redundant definition for CPUs that have equal cost.
I verified that it produces same output for all valid -march options:
gcc --help=common -Q -O2 -march=*
I need to reg&bootstrap the patch. Will it be fine after I'll test it?
Thanks,
Martin
gcc/ChangeLog:
2018-09-12 Martin Liska <mliska@suse.cz>
* common/config/i386/i386-common.c (ix86_get_valid_option_values):
Use processor_names table.
* config/i386/i386.c (ix86_default_align): Use
processor_cost_table for alignment values.
(ix86_option_override_internal): Use processor_names.
(ix86_function_specific_print): Likewise.
* config/i386/i386.h (struct processor_costs):
Add alignment values.
(struct ptt): Remove and replace with const char *.
* config/i386/x86-tune-costs.h (struct processor_costs):
Declare default alignments for all costs.
---
gcc/common/config/i386/i386-common.c | 82 ++++++++++-----------
gcc/config/i386/i386.c | 15 ++--
gcc/config/i386/i386.h | 22 +++---
gcc/config/i386/x86-tune-costs.h | 104 +++++++++++++++++++++++++++
4 files changed, 159 insertions(+), 64 deletions(-)
diff --git a/gcc/common/config/i386/i386-common.c b/gcc/common/config/i386/i386-common.c
index c7eb859e1c1..3b5312d7250 100644
--- a/gcc/common/config/i386/i386-common.c
+++ b/gcc/common/config/i386/i386-common.c
@@ -1461,49 +1461,45 @@ i386_except_unwind_info (struct gcc_options *opts)
#define TARGET_SUPPORTS_SPLIT_STACK ix86_supports_split_stack
/* This table must be in sync with enum processor_type in i386.h. */
-const struct ptt processor_target_table[PROCESSOR_max] =
+const char *const processor_names[PROCESSOR_max] =
{
- /* The "0:0:8" label alignment specified for some processors generates
- secondary 8-byte alignment only for those label/jump/loop targets
- which have primary alignment. */
-
- {"generic", "16:11:8", "16:11:8", "0:0:8", "16"},
- {"i386", "4", "4", NULL, "4" },
- {"i486", "16", "16", "0:0:8", "16"},
- {"pentium", "16:8:8", "16:8:8", "0:0:8", "16"},
- {"lakemont", "16:8:8", "16:8:8", "0:0:8", "16"},
- {"pentiumpro", "16", "16:11:8", "0:0:8", "16"},
- {"pentium4", NULL, NULL, NULL, NULL},
- {"nocona", NULL, NULL, NULL, NULL},
- {"core2", "16:11:8", "16:11:8", "0:0:8", "16"},
- {"nehalem", "16:11:8", "16:11:8", "0:0:8", "16"},
- {"sandybridge", "16:11:8", "16:11:8", "0:0:8", "16"},
- {"haswell", "16:11:8", "16:11:8", "0:0:8", "16"},
- {"bonnell", "16", "16:8:8", "0:0:8", "16"},
- {"silvermont", "16", "16:8:8", "0:0:8", "16"},
- {"goldmont", "16", "16:8:8", "0:0:8", "16"},
- {"goldmont-plus", "16", "16:8:8", "0:0:8", "16"},
- {"tremont", "16", "16:8:8", "0:0:8", "16"},
- {"knl", "16", "16:8:8", "0:0:8", "16"},
- {"knm", "16", "16:8:8", "0:0:8", "16"},
- {"skylake", "16:11:8", "16:11:8", "0:0:8", "16"},
- {"skylake-avx512", "16:11:8", "16:11:8", "0:0:8", "16"},
- {"cannonlake", "16:11:8", "16:11:8", "0:0:8", "16"},
- {"icelake-client", "16:11:8", "16:11:8", "0:0:8", "16"},
- {"icelake-server", "16:11:8", "16:11:8", "0:0:8", "16"},
- {"intel", "16", "16:8:8", "0:0:8", "16"},
- {"geode", NULL, NULL, NULL, NULL},
- {"k6", "32:8:8", "32:8:8", "0:0:8", "32"},
- {"athlon", "16:8:8", "16:8:8", "0:0:8", "16"},
- {"k8", "16:8:8", "16:8:8", "0:0:8", "16"},
- {"amdfam10", "32:25:8", "32:8:8", "0:0:8", "32"},
- {"bdver1", "16:11:8", "16:8:8", "0:0:8", "11"},
- {"bdver2", "16:11:8", "16:8:8", "0:0:8", "11"},
- {"bdver3", "16:11:8", "16:8:8", "0:0:8", "11"},
- {"bdver4", "16:11:8", "16:8:8", "0:0:8", "11"},
- {"btver1", "16:11:8", "16:8:8", "0:0:8", "11"},
- {"btver2", "16:11:8", "16:8:8", "0:0:8", "11"},
- {"znver1", "16", "16", "0:0:8", "16"}
+ "generic",
+ "i386",
+ "i486",
+ "pentium",
+ "lakemont",
+ "pentiumpro",
+ "pentium4",
+ "nocona",
+ "core2",
+ "nehalem",
+ "sandybridge",
+ "haswell",
+ "bonnell",
+ "silvermont",
+ "goldmont",
+ "goldmont-plus",
+ "tremont",
+ "knl",
+ "knm",
+ "skylake",
+ "skylake-avx512",
+ "cannonlake",
+ "icelake-client",
+ "icelake-server",
+ "intel",
+ "geode",
+ "k6",
+ "athlon",
+ "k8",
+ "amdfam10",
+ "bdver1",
+ "bdver2",
+ "bdver3",
+ "bdver4",
+ "btver1",
+ "btver2",
+ "znver1"
};
const pta processor_alias_table[] =
@@ -1715,7 +1711,7 @@ ix86_get_valid_option_values (int option_code,
break;
case OPT_mtune_:
for (unsigned i = 0; i < PROCESSOR_max; i++)
- v.safe_push (processor_target_table[i].name);
+ v.safe_push (processor_names[i]);
break;
default:
break;
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 48e484b3d62..96759de49bc 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -3352,13 +3352,13 @@ ix86_default_align (struct gcc_options *opts)
{
/* -falign-foo without argument: supply one. */
if (opts->x_flag_align_loops && !opts->x_str_align_loops)
- opts->x_str_align_loops = processor_target_table[ix86_tune].align_loop;
+ opts->x_str_align_loops = processor_cost_table[ix86_tune]->align_loop;
if (opts->x_flag_align_jumps && !opts->x_str_align_jumps)
- opts->x_str_align_jumps = processor_target_table[ix86_tune].align_jump;
+ opts->x_str_align_jumps = processor_cost_table[ix86_tune]->align_jump;
if (opts->x_flag_align_labels && !opts->x_str_align_labels)
- opts->x_str_align_labels = processor_target_table[ix86_tune].align_label;
+ opts->x_str_align_labels = processor_cost_table[ix86_tune]->align_label;
if (opts->x_flag_align_functions && !opts->x_str_align_functions)
- opts->x_str_align_functions = processor_target_table[ix86_tune].align_func;
+ opts->x_str_align_functions = processor_cost_table[ix86_tune]->align_func;
}
/* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook. */
@@ -3488,8 +3488,7 @@ ix86_option_override_internal (bool main_args_p,
opts->x_ix86_tune_string = opts->x_ix86_arch_string;
if (!opts->x_ix86_tune_string)
{
- opts->x_ix86_tune_string
- = processor_target_table[TARGET_CPU_DEFAULT].name;
+ opts->x_ix86_tune_string = processor_names[TARGET_CPU_DEFAULT];
ix86_tune_defaulted = 1;
}
@@ -4940,12 +4939,12 @@ ix86_function_specific_print (FILE *file, int indent,
gcc_assert (ptr->arch < PROCESSOR_max);
fprintf (file, "%*sarch = %d (%s)\n",
indent, "",
- ptr->arch, processor_target_table[ptr->arch].name);
+ ptr->arch, processor_names[ptr->arch]);
gcc_assert (ptr->tune < PROCESSOR_max);
fprintf (file, "%*stune = %d (%s)\n",
indent, "",
- ptr->tune, processor_target_table[ptr->tune].name);
+ ptr->tune, processor_names[ptr->tune]);
fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 382323d385b..01eba5dd01f 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -311,6 +311,14 @@ struct processor_costs {
cost model. */
const int cond_not_taken_branch_cost;/* Cost of not taken branch for
vectorizer cost model. */
+
+ /* The "0:0:8" label alignment specified for some processors generates
+ secondary 8-byte alignment only for those label/jump/loop targets
+ which have primary alignment. */
+ const char *const align_loop; /* Loop alignment. */
+ const char *const align_jump; /* Jump alignment. */
+ const char *const align_label; /* Label alignment. */
+ const char *const align_func; /* Function alignment. */
};
extern const struct processor_costs *ix86_cost;
@@ -2278,19 +2286,7 @@ enum processor_type
};
#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS)
-/* Processor target table, indexed by processor number */
-struct ptt
-{
- const char *const name; /* processor name */
-
- /* Default alignments. */
- const char *const align_loop;
- const char *const align_jump;
- const char *const align_label;
- const char *const align_func;
-};
-
-extern const struct ptt processor_target_table[PROCESSOR_max];
+extern const char *const processor_names[PROCESSOR_max];
#include "wide-int-bitmask.h"
diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index ff289342e4f..71a5854c09a 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -111,6 +111,10 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */
ix86_size_memset,
COSTS_N_BYTES (1), /* cond_taken_branch_cost. */
COSTS_N_BYTES (1), /* cond_not_taken_branch_cost. */
+ NULL, /* Loop alignment. */
+ NULL, /* Jump alignment. */
+ NULL, /* Label alignment. */
+ NULL, /* Func alignment. */
};
/* Processor costs (relative to an add) */
@@ -197,6 +201,10 @@ struct processor_costs i386_cost = { /* 386 specific costs */
i386_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
+ "4", /* Loop alignment. */
+ "4", /* Jump alignment. */
+ NULL, /* Label alignment. */
+ "4", /* Func alignment. */
};
static stringop_algs i486_memcpy[2] = {
@@ -284,6 +292,10 @@ struct processor_costs i486_cost = { /* 486 specific costs */
i486_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
+ "16", /* Loop alignment. */
+ "16", /* Jump alignment. */
+ "0:0:8", /* Label alignment. */
+ "16", /* Func alignment. */
};
static stringop_algs pentium_memcpy[2] = {
@@ -369,6 +381,10 @@ struct processor_costs pentium_cost = {
pentium_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
+ "16:8:8", /* Loop alignment. */
+ "16:8:8", /* Jump alignment. */
+ "0:0:8", /* Label alignment. */
+ "16", /* Func alignment. */
};
static const
@@ -447,6 +463,10 @@ struct processor_costs lakemont_cost = {
pentium_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
+ "16:8:8", /* Loop alignment. */
+ "16:8:8", /* Jump alignment. */
+ "0:0:8", /* Label alignment. */
+ "16", /* Func alignment. */
};
/* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
@@ -540,6 +560,10 @@ struct processor_costs pentiumpro_cost = {
pentiumpro_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
+ "16", /* Loop alignment. */
+ "16:11:8", /* Jump alignment. */
+ "0:0:8", /* Label alignment. */
+ "16", /* Func alignment. */
};
static stringop_algs geode_memcpy[2] = {
@@ -625,6 +649,10 @@ struct processor_costs geode_cost = {
geode_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
+ NULL, /* Loop alignment. */
+ NULL, /* Jump alignment. */
+ NULL, /* Label alignment. */
+ NULL, /* Func alignment. */
};
static stringop_algs k6_memcpy[2] = {
@@ -712,6 +740,10 @@ struct processor_costs k6_cost = {
k6_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
+ "32:8:8", /* Loop alignment. */
+ "32:8:8", /* Jump alignment. */
+ "0:0:8", /* Label alignment. */
+ "32", /* Func alignment. */
};
/* For some reason, Athlon deals better with REP prefix (relative to loops)
@@ -800,6 +832,10 @@ struct processor_costs athlon_cost = {
athlon_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
+ "16:8:8", /* Loop alignment. */
+ "16:8:8", /* Jump alignment. */
+ "0:0:8", /* Label alignment. */
+ "16", /* Func alignment. */
};
/* K8 has optimized REP instruction for medium sized blocks, but for very
@@ -897,6 +933,10 @@ struct processor_costs k8_cost = {
k8_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */
+ "16:8:8", /* Loop alignment. */
+ "16:8:8", /* Jump alignment. */
+ "0:0:8", /* Label alignment. */
+ "16", /* Func alignment. */
};
/* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
@@ -1001,6 +1041,10 @@ struct processor_costs amdfam10_cost = {
amdfam10_memset,
COSTS_N_INSNS (2), /* cond_taken_branch_cost. */
COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
+ "32:25:8", /* Loop alignment. */
+ "32:8:8", /* Jump alignment. */
+ "0:0:8", /* Label alignment. */
+ "32", /* Func alignment. */
};
/* BDVER1 has optimized REP instruction for medium sized blocks, but for
@@ -1099,6 +1143,10 @@ const struct processor_costs bdver1_cost = {
bdver1_memset,
COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */
+ "16:11:8", /* Loop alignment. */
+ "16:8:8", /* Jump alignment. */
+ "0:0:8", /* Label alignment. */
+ "11", /* Func alignment. */
};
/* BDVER2 has optimized REP instruction for medium sized blocks, but for
@@ -1198,6 +1246,10 @@ const struct processor_costs bdver2_cost = {
bdver2_memset,
COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */
+ "16:11:8", /* Loop alignment. */
+ "16:8:8", /* Jump alignment. */
+ "0:0:8", /* Label alignment. */
+ "11", /* Func alignment. */
};
@@ -1296,6 +1348,10 @@ struct processor_costs bdver3_cost = {
bdver3_memset,
COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */
+ "16:11:8", /* Loop alignment. */
+ "16:8:8", /* Jump alignment. */
+ "0:0:8", /* Label alignment. */
+ "11", /* Func alignment. */
};
/* BDVER4 has optimized REP instruction for medium sized blocks, but for
@@ -1393,6 +1449,10 @@ struct processor_costs bdver4_cost = {
bdver4_memset,
COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */
+ "16:11:8", /* Loop alignment. */
+ "16:8:8", /* Jump alignment. */
+ "0:0:8", /* Label alignment. */
+ "11", /* Func alignment. */
};
@@ -1513,6 +1573,10 @@ struct processor_costs znver1_cost = {
znver1_memset,
COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */
+ "16", /* Loop alignment. */
+ "16", /* Jump alignment. */
+ "0:0:8", /* Label alignment. */
+ "16", /* Func alignment. */
};
/* skylake_cost should produce code tuned for Skylake familly of CPUs. */
@@ -1605,6 +1669,10 @@ struct processor_costs skylake_cost = {
skylake_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
+ "16:11:8", /* Loop alignment. */
+ "16:11:8", /* Jump alignment. */
+ "0:0:8", /* Label alignment. */
+ "16", /* Func alignment. */
};
/* BTVER1 has optimized REP instruction for medium sized blocks, but for
very small blocks it is better to use loop. For large blocks, libcall can
@@ -1694,6 +1762,10 @@ const struct processor_costs btver1_cost = {
btver1_memset,
COSTS_N_INSNS (2), /* cond_taken_branch_cost. */
COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
+ "16:11:8", /* Loop alignment. */
+ "16:8:8", /* Jump alignment. */
+ "0:0:8", /* Label alignment. */
+ "11", /* Func alignment. */
};
static stringop_algs btver2_memcpy[2] = {
@@ -1781,6 +1853,10 @@ const struct processor_costs btver2_cost = {
btver2_memset,
COSTS_N_INSNS (2), /* cond_taken_branch_cost. */
COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
+ "16:11:8", /* Loop alignment. */
+ "16:8:8", /* Jump alignment. */
+ "0:0:8", /* Label alignment. */
+ "11", /* Func alignment. */
};
static stringop_algs pentium4_memcpy[2] = {
@@ -1867,6 +1943,10 @@ struct processor_costs pentium4_cost = {
pentium4_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
+ NULL, /* Loop alignment. */
+ NULL, /* Jump alignment. */
+ NULL, /* Label alignment. */
+ NULL, /* Func alignment. */
};
static stringop_algs nocona_memcpy[2] = {
@@ -1956,6 +2036,10 @@ struct processor_costs nocona_cost = {
nocona_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
+ NULL, /* Loop alignment. */
+ NULL, /* Jump alignment. */
+ NULL, /* Label alignment. */
+ NULL, /* Func alignment. */
};
static stringop_algs atom_memcpy[2] = {
@@ -2043,6 +2127,10 @@ struct processor_costs atom_cost = {
atom_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
+ "16", /* Loop alignment. */
+ "16:8:8", /* Jump alignment. */
+ "0:0:8", /* Label alignment. */
+ "16", /* Func alignment. */
};
static stringop_algs slm_memcpy[2] = {
@@ -2130,6 +2218,10 @@ struct processor_costs slm_cost = {
slm_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
+ "16", /* Loop alignment. */
+ "16:8:8", /* Jump alignment. */
+ "0:0:8", /* Label alignment. */
+ "16", /* Func alignment. */
};
static stringop_algs intel_memcpy[2] = {
@@ -2217,6 +2309,10 @@ struct processor_costs intel_cost = {
intel_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
+ "16", /* Loop alignment. */
+ "16:8:8", /* Jump alignment. */
+ "0:0:8", /* Label alignment. */
+ "16", /* Func alignment. */
};
/* Generic should produce code tuned for Core-i7 (and newer chips)
@@ -2313,6 +2409,10 @@ struct processor_costs generic_cost = {
generic_memset,
COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */
+ "16:11:8", /* Loop alignment. */
+ "16:11:8", /* Jump alignment. */
+ "0:0:8", /* Label alignment. */
+ "16", /* Func alignment. */
};
/* core_cost should produce code tuned for Core familly of CPUs. */
@@ -2416,5 +2516,9 @@ struct processor_costs core_cost = {
core_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
+ "16:11:8", /* Loop alignment. */
+ "16:11:8", /* Jump alignment. */
+ "0:0:8", /* Label alignment. */
+ "16", /* Func alignment. */
};