This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
RE: [Patch, i386] PR 59422 - Support more targets for function multi versioning
- From: "Gopalasubramanian, Ganesh" <Ganesh dot Gopalasubramanian at amd dot com>
- To: Allan Sandfeld Jensen <carewolf at gmail dot com>
- Cc: "gcc-patches at gcc dot gnu dot org" <gcc-patches at gcc dot gnu dot org>, Uros Bizjak <ubizjak at gmail dot com>
- Date: Thu, 19 Dec 2013 10:13:17 +0000
- Subject: RE: [Patch, i386] PR 59422 - Support more targets for function multi versioning
- Authentication-results: sourceware.org; auth=none
- References: <201312151954 dot 38590 dot linux at carewolf dot com> <201312181836 dot 07898 dot linux at carewolf dot com> <EB4625145972F94C9680D8CADD6516155E790579 at SATLEXDAG02 dot amd dot com> <201312191054 dot 25953 dot linux at carewolf dot com>
> Sorry, I must have been looking at an older version, but as I said I already did enable it in the latest patch. (see http://gcc.gnu.org/ml/gcc-patches/2013-12/msg01577.html )
Sorry for causing another revision but we would like to stick with "btver1" and "btver2" rather than "BOBCAT" or "JAGUAR".
Therefore the changes would be like
Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c (revision 206065)
+++ gcc/config/i386/i386.c (working copy)
@@ -29965,9 +29965,14 @@
P_PROC_SSE4_2,
P_POPCNT,
P_AVX,
+ P_PROC_AVX,
+ P_FMA4,
+ P_XOP,
+ P_PROC_XOP,
+ P_FMA,
+ P_PROC_FMA,
P_AVX2,
- P_FMA,
- P_PROC_FMA
+ P_PROC_AVX2
};
enum feature_priority priority = P_ZERO;
@@ -29986,11 +29991,15 @@
{"sse", P_SSE},
{"sse2", P_SSE2},
{"sse3", P_SSE3},
+ {"sse4a", P_SSE4_a},
{"ssse3", P_SSSE3},
{"sse4.1", P_SSE4_1},
{"sse4.2", P_SSE4_2},
{"popcnt", P_POPCNT},
{"avx", P_AVX},
+ {"fma4", P_FMA4},
+ {"xop", P_XOP},
+ {"fma", P_FMA},
{"avx2", P_AVX2}
};
@@ -30044,25 +30053,49 @@
break;
case PROCESSOR_COREI7_AVX:
arg_str = "corei7-avx";
- priority = P_PROC_SSE4_2;
+ priority = P_PROC_AVX;
break;
+ case PROCESSOR_HASWELL:
+ arg_str = "core-avx2";
+ priority = P_PROC_AVX2;
+ break;
case PROCESSOR_ATOM:
arg_str = "atom";
priority = P_PROC_SSSE3;
break;
+ case PROCESSOR_SLM:
+ arg_str = "slm";
+ priority = P_PROC_SSE4_2;
+ break;
case PROCESSOR_AMDFAM10:
arg_str = "amdfam10h";
priority = P_PROC_SSE4_a;
break;
+ case PROCESSOR_BTVER1:
+ arg_str = "btver1";
+ priority = P_PROC_SSE4_a;
+ break;
+ case PROCESSOR_BTVER2:
+ arg_str = "btver2";
+ priority = P_PROC_AVX;
+ break;
case PROCESSOR_BDVER1:
arg_str = "bdver1";
- priority = P_PROC_FMA;
+ priority = P_PROC_XOP;
break;
case PROCESSOR_BDVER2:
arg_str = "bdver2";
priority = P_PROC_FMA;
break;
- }
+ case PROCESSOR_BDVER3:
+ arg_str = "bdver3";
+ priority = P_PROC_FMA;
+ break;
+ case PROCESSOR_BDVER4:
+ arg_str = "bdver4";
+ priority = P_PROC_AVX2;
+ break;
+ }
}
cl_target_option_restore (&global_options, &cur_target);
@@ -30922,9 +30955,13 @@
F_SSE2,
F_SSE3,
F_SSSE3,
+ F_SSE4_a,
F_SSE4_1,
F_SSE4_2,
F_AVX,
+ F_FMA4,
+ F_XOP,
+ F_FMA,
F_AVX2,
F_MAX
};
@@ -30943,6 +30980,10 @@
M_AMDFAM10H,
M_AMDFAM15H,
M_INTEL_SLM,
+ M_INTEL_COREI7_AVX,
+ M_INTEL_CORE_AVX2,
+ M_AMD_BTVER1,
+ M_AMD_BTVER2,
M_CPU_SUBTYPE_START,
M_INTEL_COREI7_NEHALEM,
M_INTEL_COREI7_WESTMERE,
@@ -30953,7 +30994,9 @@
M_AMDFAM15H_BDVER1,
M_AMDFAM15H_BDVER2,
M_AMDFAM15H_BDVER3,
- M_AMDFAM15H_BDVER4
+ M_AMDFAM15H_BDVER4,
+ M_INTEL_COREI7_IVYBRIDGE,
+ M_INTEL_CORE_HASWELL
};
static struct _arch_names_table
@@ -30971,11 +31014,17 @@
{"corei7", M_INTEL_COREI7},
{"nehalem", M_INTEL_COREI7_NEHALEM},
{"westmere", M_INTEL_COREI7_WESTMERE},
+ {"corei7-avx", M_INTEL_COREI7_AVX},
{"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
+ {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
+ {"core-avx2", M_INTEL_CORE_AVX2},
+ {"haswell", M_INTEL_CORE_HASWELL},
{"amdfam10h", M_AMDFAM10H},
{"barcelona", M_AMDFAM10H_BARCELONA},
{"shanghai", M_AMDFAM10H_SHANGHAI},
{"istanbul", M_AMDFAM10H_ISTANBUL},
+ {"btver1", M_AMD_BTVER1},
+ {"btver2", M_AMD_BTVER2},
{"amdfam15h", M_AMDFAM15H},
{"bdver1", M_AMDFAM15H_BDVER1},
{"bdver2", M_AMDFAM15H_BDVER2},
@@ -30997,9 +31046,13 @@
{"sse2", F_SSE2},
{"sse3", F_SSE3},
{"ssse3", F_SSSE3},
+ {"sse4a", F_SSE4_a},
{"sse4.1", F_SSE4_1},
{"sse4.2", F_SSE4_2},
{"avx", F_AVX},
+ {"fma4", F_FMA4},
+ {"xop", F_XOP},
+ {"fma", F_FMA},
{"avx2", F_AVX2}
};
Index: libgcc/config/i386/cpuinfo.c
===================================================================
--- libgcc/config/i386/cpuinfo.c (revision 206065)
+++ libgcc/config/i386/cpuinfo.c (working copy)
@@ -62,6 +62,10 @@
AMDFAM10H,
AMDFAM15H,
INTEL_SLM,
+ INTEL_COREI7_AVX,
+ INTEL_CORE_AVX2,
+ AMD_BTVER1,
+ AMD_BTVER2,
CPU_TYPE_MAX
};
@@ -75,6 +79,10 @@
AMDFAM10H_ISTANBUL,
AMDFAM15H_BDVER1,
AMDFAM15H_BDVER2,
+ AMDFAM15H_BDVER3,
+ AMDFAM15H_BDVER4,
+ INTEL_COREI7_IVYBRIDGE,
+ INTEL_CORE_HASWELL,
CPU_SUBTYPE_MAX
};
@@ -89,9 +97,13 @@
FEATURE_SSE2,
FEATURE_SSE3,
FEATURE_SSSE3,
+ FEATURE_SSE4_a,
FEATURE_SSE4_1,
FEATURE_SSE4_2,
FEATURE_AVX,
+ FEATURE_FMA4,
+ FEATURE_XOP,
+ FEATURE_FMA,
FEATURE_AVX2
};
@@ -113,37 +125,46 @@
{
/* AMD Family 10h. */
case 0x10:
+ __cpu_model.__cpu_type = AMDFAM10H;
switch (model)
{
case 0x2:
/* Barcelona. */
- __cpu_model.__cpu_type = AMDFAM10H;
__cpu_model.__cpu_subtype = AMDFAM10H_BARCELONA;
break;
case 0x4:
/* Shanghai. */
- __cpu_model.__cpu_type = AMDFAM10H;
__cpu_model.__cpu_subtype = AMDFAM10H_SHANGHAI;
break;
case 0x8:
/* Istanbul. */
- __cpu_model.__cpu_type = AMDFAM10H;
__cpu_model.__cpu_subtype = AMDFAM10H_ISTANBUL;
break;
default:
break;
}
break;
- /* AMD Family 15h. */
+ /* AMD Family 14h "btver1". */
+ case 0x14:
+ __cpu_model.__cpu_type = AMD_BTVER1;
+ break;
+ /* AMD Family 15h "Bulldozer". */
case 0x15:
__cpu_model.__cpu_type = AMDFAM15H;
/* Bulldozer version 1. */
if ( model <= 0xf)
__cpu_model.__cpu_subtype = AMDFAM15H_BDVER1;
- /* Bulldozer version 2. */
- if (model >= 0x10 && model <= 0x1f)
- __cpu_model.__cpu_subtype = AMDFAM15H_BDVER2;
+ /* Bulldozer version 2 "Piledriver" */
+ if (model >= 0x10 && model <= 0x2f)
+ __cpu_model.__cpu_subtype = AMDFAM15H_BDVER2;
+ /* Bulldozer version 3 "Steamroller" */
+ if (model >= 0x30 && model <= 0x4f)
+ __cpu_model.__cpu_subtype = AMDFAM15H_BDVER3;
break;
+ /* AMD Family 16h "btver2". */
+ case 0x16:
+ __cpu_model.__cpu_type = AMD_BTVER2;
+ break;
default:
break;
}
@@ -193,9 +214,21 @@
case 0x2a:
case 0x2d:
/* Sandy Bridge. */
- __cpu_model.__cpu_type = INTEL_COREI7;
+ __cpu_model.__cpu_type = INTEL_COREI7_AVX;
__cpu_model.__cpu_subtype = INTEL_COREI7_SANDYBRIDGE;
break;
+ case 0x3a:
+ case 0x3e:
+ /* Ivy Bridge. */
+ __cpu_model.__cpu_type = INTEL_COREI7_AVX;
+ __cpu_model.__cpu_subtype = INTEL_COREI7_IVYBRIDGE;
+ case 0x3c:
+ case 0x3f:
+ case 0x45:
+ case 0x46:
+ /* Haswell. */
+ __cpu_model.__cpu_type = INTEL_CORE_AVX2;
+ __cpu_model.__cpu_subtype = INTEL_CORE_HASWELL;
case 0x17:
case 0x1d:
/* Penryn. */
@@ -242,6 +275,8 @@
features |= (1 << FEATURE_SSE4_2);
if (ecx & bit_AVX)
features |= (1 << FEATURE_AVX);
+ if (ecx & bit_FMA)
+ features |= (1 << FEATURE_FMA);
/* Get Advanced Features at level 7 (eax = 7, ecx = 0). */
if (max_cpuid_level >= 7)
@@ -252,6 +287,23 @@
features |= (1 << FEATURE_AVX2);
}
+ unsigned int ext_level;
+ unsigned int eax, ebx;
+ /* Check cpuid level of extended features. */
+ __cpuid (0x80000000, ext_level, ebx, ecx, edx);
+
+ if (ext_level > 0x80000000)
+ {
+ __cpuid (0x80000001, eax, ebx, ecx, edx);
+
+ if (ecx & bit_SSE4a)
+ features |= (1 << FEATURE_SSE4_a);
+ if (ecx & bit_FMA4)
+ features |= (1 << FEATURE_FMA4);
+ if (ecx & bit_XOP)
+ features |= (1 << FEATURE_XOP);
+ }
+
__cpu_model.__cpu_features[0] = features;
}
Regards
Ganesh