This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
PATCH: Separate processor model from ix86_tune
- From: "H.J. Lu" <hongjiu dot lu at intel dot com>
- To: gcc-patches at gcc dot gnu dot org
- Cc: ubizjak at gmail dot com, jh at suse dot cz
- Date: Thu, 25 Sep 2008 17:51:56 -0700
- Subject: PATCH: Separate processor model from ix86_tune
- Reply-to: "H.J. Lu" <hjl dot tools at gmail dot com>
This patch adds ix86_cpu to separate processor model from ix86_tune.
We have ix86_tune to control x86 optimization features and ix86_cpu
to control processor model for scheduler. Now we can have a new tuning
option, -mtune=xxxx, and use the existing processor model. We can even
simplify
(define_attr "cpu"
"i386,i486,pentium,pentiumpro,geode,k6,athlon,pentium4,k8,
nocona,core2,generic32,generic64,amdfam10"
(const (symbol_ref "ix86_cpu")))
to make scheduler to run faster. OK for trunk?
Thanks.
H.J.
---
2008-09-25 H.J. Lu <hongjiu.lu@intel.com>
* config/i386/i386-protos.h (ix86_cpu): New.
* config/i386/i386.c (ix86_cpu): New.
(override_options): Add cpu to processor_alias_table. Set
ix86_cpu from the cpu field in processor_alias_table.
(ix86_function_specific_save): Save ix86_cpu.
(ix86_function_specific_restore): Restore ix86_cpu.
* config/i386/i386.md (cpu): Map to ix86_cpu instead of
ix86_tune.
* config/i386/i386.opt: Add cpu.
--- gcc/config/i386/i386-protos.h.sched 2008-09-02 17:10:49.000000000 -0700
+++ gcc/config/i386/i386-protos.h 2008-09-25 17:12:09.000000000 -0700
@@ -272,3 +272,7 @@ extern enum rtx_code ix86_fp_compare_cod
extern rtx construct_plt_address (rtx);
#endif
extern int asm_preferred_eh_data_format (int, int);
+
+#ifdef HAVE_ATTR_cpu
+extern enum attr_cpu ix86_cpu;
+#endif
--- gcc/config/i386/i386.c.sched 2008-09-24 08:43:15.000000000 -0700
+++ gcc/config/i386/i386.c 2008-09-25 17:36:34.000000000 -0700
@@ -1693,6 +1693,9 @@ enum tls_dialect ix86_tls_dialect = TLS_
enum fpmath_unit ix86_fpmath;
/* Which cpu are we scheduling for. */
+enum attr_cpu ix86_cpu;
+
+/* Which cpu are we optimizing for. */
enum processor_type ix86_tune;
/* Which instruction set architecture to use. */
@@ -2490,93 +2493,89 @@ override_options (bool main_args_p)
{
const char *const name; /* processor name or nickname. */
const enum processor_type processor;
+ const enum attr_cpu cpu;
const unsigned /*enum pta_flags*/ flags;
}
const processor_alias_table[] =
{
- {"i386", PROCESSOR_I386, 0},
- {"i486", PROCESSOR_I486, 0},
- {"i586", PROCESSOR_PENTIUM, 0},
- {"pentium", PROCESSOR_PENTIUM, 0},
- {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
- {"winchip-c6", PROCESSOR_I486, PTA_MMX},
- {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
- {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
- {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
- {"i686", PROCESSOR_PENTIUMPRO, 0},
- {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
- {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
- {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
- {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
- {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_SSE2},
- {"pentium4", PROCESSOR_PENTIUM4, PTA_MMX |PTA_SSE | PTA_SSE2},
- {"pentium4m", PROCESSOR_PENTIUM4, PTA_MMX | PTA_SSE | PTA_SSE2},
- {"prescott", PROCESSOR_NOCONA, PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
- {"nocona", PROCESSOR_NOCONA, (PTA_64BIT
- | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
- | PTA_CX16 | PTA_NO_SAHF)},
- {"core2", PROCESSOR_CORE2, (PTA_64BIT
- | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
- | PTA_SSSE3
- | PTA_CX16)},
- {"geode", PROCESSOR_GEODE, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
- |PTA_PREFETCH_SSE)},
- {"k6", PROCESSOR_K6, PTA_MMX},
- {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
- {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
- {"athlon", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
- | PTA_PREFETCH_SSE)},
- {"athlon-tbird", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
- | PTA_PREFETCH_SSE)},
- {"athlon-4", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
- | PTA_SSE)},
- {"athlon-xp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
- | PTA_SSE)},
- {"athlon-mp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
- | PTA_SSE)},
- {"x86-64", PROCESSOR_K8, (PTA_64BIT
- | PTA_MMX | PTA_SSE | PTA_SSE2
- | PTA_NO_SAHF)},
- {"k8", PROCESSOR_K8, (PTA_64BIT
- | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
- | PTA_SSE | PTA_SSE2
- | PTA_NO_SAHF)},
- {"k8-sse3", PROCESSOR_K8, (PTA_64BIT
- | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
- | PTA_SSE | PTA_SSE2 | PTA_SSE3
- | PTA_NO_SAHF)},
- {"opteron", PROCESSOR_K8, (PTA_64BIT
- | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
- | PTA_SSE | PTA_SSE2
- | PTA_NO_SAHF)},
- {"opteron-sse3", PROCESSOR_K8, (PTA_64BIT
- | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
- | PTA_SSE | PTA_SSE2 | PTA_SSE3
- | PTA_NO_SAHF)},
- {"athlon64", PROCESSOR_K8, (PTA_64BIT
- | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
- | PTA_SSE | PTA_SSE2
- | PTA_NO_SAHF)},
- {"athlon64-sse3", PROCESSOR_K8, (PTA_64BIT
- | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
- | PTA_SSE | PTA_SSE2 | PTA_SSE3
- | PTA_NO_SAHF)},
- {"athlon-fx", PROCESSOR_K8, (PTA_64BIT
- | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
- | PTA_SSE | PTA_SSE2
- | PTA_NO_SAHF)},
- {"amdfam10", PROCESSOR_AMDFAM10, (PTA_64BIT
- | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
- | PTA_SSE | PTA_SSE2 | PTA_SSE3
- | PTA_SSE4A
- | PTA_CX16 | PTA_ABM)},
- {"barcelona", PROCESSOR_AMDFAM10, (PTA_64BIT
- | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
- | PTA_SSE | PTA_SSE2 | PTA_SSE3
- | PTA_SSE4A
- | PTA_CX16 | PTA_ABM)},
- {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
- {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
+ {"i386", PROCESSOR_I386, CPU_I386, 0},
+ {"i486", PROCESSOR_I486, CPU_I486, 0},
+ {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
+ {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
+ {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
+ {"winchip-c6", PROCESSOR_I486, CPU_I486, PTA_MMX},
+ {"winchip2", PROCESSOR_I486, CPU_I486, PTA_MMX | PTA_3DNOW},
+ {"c3", PROCESSOR_I486, CPU_I486, PTA_MMX | PTA_3DNOW},
+ {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
+ {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
+ {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
+ {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
+ {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
+ PTA_MMX | PTA_SSE},
+ {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
+ PTA_MMX | PTA_SSE},
+ {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
+ PTA_MMX | PTA_SSE | PTA_SSE2},
+ {"pentium4", PROCESSOR_PENTIUM4, CPU_PENTIUM4,
+ PTA_MMX |PTA_SSE | PTA_SSE2},
+ {"pentium4m", PROCESSOR_PENTIUM4, CPU_PENTIUM4,
+ PTA_MMX | PTA_SSE | PTA_SSE2},
+ {"prescott", PROCESSOR_NOCONA, CPU_NOCONA,
+ PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
+ {"nocona", PROCESSOR_NOCONA, CPU_NOCONA,
+ (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
+ | PTA_CX16 | PTA_NO_SAHF)},
+ {"core2", PROCESSOR_CORE2, CPU_CORE2,
+ (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
+ | PTA_SSSE3 | PTA_CX16)},
+ {"geode", PROCESSOR_GEODE, CPU_GEODE,
+ (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE)},
+ {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
+ {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
+ {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
+ {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
+ (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE)},
+ {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
+ (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE)},
+ {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
+ (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE)},
+ {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
+ (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE)},
+ {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
+ (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE)},
+ {"x86-64", PROCESSOR_K8, CPU_K8,
+ (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF)},
+ {"k8", PROCESSOR_K8, CPU_K8,
+ (PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
+ | PTA_SSE2 | PTA_NO_SAHF)},
+ {"k8-sse3", PROCESSOR_K8, CPU_K8,
+ (PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
+ | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF)},
+ {"opteron", PROCESSOR_K8, CPU_K8,
+ (PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
+ | PTA_SSE2 | PTA_NO_SAHF)},
+ {"opteron-sse3", PROCESSOR_K8, CPU_K8,
+ (PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
+ | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF)},
+ {"athlon64", PROCESSOR_K8, CPU_K8,
+ (PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
+ | PTA_SSE2 | PTA_NO_SAHF)},
+ {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
+ (PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
+ | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF)},
+ {"athlon-fx", PROCESSOR_K8, CPU_K8,
+ (PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
+ | PTA_SSE2 | PTA_NO_SAHF)},
+ {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
+ (PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
+ | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM)},
+ {"barcelona", PROCESSOR_AMDFAM10, PROCESSOR_AMDFAM10,
+ (PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
+ | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM)},
+ {"generic32", PROCESSOR_GENERIC32, CPU_GENERIC32,
+ 0 /* flags are only used for -march switch. */ },
+ {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
+ PTA_64BIT /* flags are only used for -march switch. */ },
};
int const pta_size = ARRAY_SIZE (processor_alias_table);
@@ -2766,6 +2765,7 @@ override_options (bool main_args_p)
for (i = 0; i < pta_size; i++)
if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
{
+ ix86_cpu = processor_alias_table[i].cpu;
ix86_arch = processor_alias_table[i].processor;
/* Default cpu tuning to the architecture. */
ix86_tune = ix86_arch;
@@ -2848,6 +2848,7 @@ override_options (bool main_args_p)
for (i = 0; i < pta_size; i++)
if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
{
+ ix86_cpu = processor_alias_table[i].cpu;
ix86_tune = processor_alias_table[i].processor;
if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
{
@@ -2858,6 +2859,7 @@ override_options (bool main_args_p)
if (! strcmp (ix86_tune_string,
processor_alias_table[i].name))
break;
+ ix86_cpu = processor_alias_table[i].cpu;
ix86_tune = processor_alias_table[i].processor;
}
else
@@ -3276,11 +3278,13 @@ static void
ix86_function_specific_save (struct cl_target_option *ptr)
{
gcc_assert (IN_RANGE (ix86_arch, 0, 255));
+ gcc_assert (IN_RANGE (ix86_cpu, 0, 255));
gcc_assert (IN_RANGE (ix86_tune, 0, 255));
gcc_assert (IN_RANGE (ix86_fpmath, 0, 255));
gcc_assert (IN_RANGE (ix86_branch_cost, 0, 255));
ptr->arch = ix86_arch;
+ ptr->cpu = ix86_cpu;
ptr->tune = ix86_tune;
ptr->fpmath = ix86_fpmath;
ptr->branch_cost = ix86_branch_cost;
@@ -3301,6 +3305,7 @@ ix86_function_specific_restore (struct c
int i;
ix86_arch = ptr->arch;
+ ix86_cpu = ptr->cpu;
ix86_tune = ptr->tune;
ix86_fpmath = ptr->fpmath;
ix86_branch_cost = ptr->branch_cost;
--- gcc/config/i386/i386.md.sched 2008-09-25 17:09:36.000000000 -0700
+++ gcc/config/i386/i386.md 2008-09-25 17:42:22.000000000 -0700
@@ -295,11 +295,10 @@
;; "reload_completed && TARGET_64BIT".
-;; Processor type. This attribute must exactly match the processor_type
-;; enumeration in i386.h.
+;; Processor type.
(define_attr "cpu" "i386,i486,pentium,pentiumpro,geode,k6,athlon,pentium4,k8,
nocona,core2,generic32,generic64,amdfam10"
- (const (symbol_ref "ix86_tune")))
+ (const (symbol_ref "ix86_cpu")))
;; A basic instruction type. Refinements due to arguments to be
;; provided in other attributes.
--- gcc/config/i386/i386.opt.sched 2008-09-02 17:10:49.000000000 -0700
+++ gcc/config/i386/i386.opt 2008-09-25 17:35:23.000000000 -0700
@@ -31,6 +31,10 @@ unsigned char tune
TargetSave
unsigned char fpmath
+;; CPU model
+TargetSave
+unsigned char cpu
+
;; branch cost
TargetSave
unsigned char branch_cost