This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

RE: [Patch, i386] PR 59422 - Support more targets for function multi versioning


> Sorry, I must have been looking at an older version, but as I said I already did enable it in the latest patch. (see http://gcc.gnu.org/ml/gcc-patches/2013-12/msg01577.html )

Sorry for causing another revision but we would like to stick with "btver1" and "btver2" rather than "BOBCAT" or "JAGUAR".
Therefore the changes would be like

Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c	(revision 206065)
+++ gcc/config/i386/i386.c	(working copy)
@@ -29965,9 +29965,14 @@
     P_PROC_SSE4_2,
     P_POPCNT,
     P_AVX,
+    P_PROC_AVX,
+    P_FMA4,
+    P_XOP,
+    P_PROC_XOP,
+    P_FMA,    
+    P_PROC_FMA,
     P_AVX2,
-    P_FMA,
-    P_PROC_FMA
+    P_PROC_AVX2
   };
 
  enum feature_priority priority = P_ZERO;
@@ -29986,11 +29991,15 @@
       {"sse", P_SSE},
       {"sse2", P_SSE2},
       {"sse3", P_SSE3},
+      {"sse4a", P_SSE4_a},
       {"ssse3", P_SSSE3},
       {"sse4.1", P_SSE4_1},
       {"sse4.2", P_SSE4_2},
       {"popcnt", P_POPCNT},
       {"avx", P_AVX},
+      {"fma4", P_FMA4},
+      {"xop", P_XOP},
+      {"fma", P_FMA},
       {"avx2", P_AVX2}
     };
 
@@ -30044,25 +30053,49 @@
 	      break;
             case PROCESSOR_COREI7_AVX:
               arg_str = "corei7-avx";
-              priority = P_PROC_SSE4_2;
+              priority = P_PROC_AVX;
               break;
+            case PROCESSOR_HASWELL:
+              arg_str = "core-avx2";
+              priority = P_PROC_AVX2;
+              break;
 	    case PROCESSOR_ATOM:
 	      arg_str = "atom";
 	      priority = P_PROC_SSSE3;
 	      break;
+            case PROCESSOR_SLM:
+              arg_str = "slm";
+              priority = P_PROC_SSE4_2;
+              break;
 	    case PROCESSOR_AMDFAM10:
 	      arg_str = "amdfam10h";
 	      priority = P_PROC_SSE4_a;
 	      break;
+            case PROCESSOR_BTVER1:
+              arg_str = "btver1";
+              priority = P_PROC_SSE4_a;
+              break;
+            case PROCESSOR_BTVER2:
+              arg_str = "btver2";
+              priority = P_PROC_AVX;
+              break;
 	    case PROCESSOR_BDVER1:
 	      arg_str = "bdver1";
-	      priority = P_PROC_FMA;
+	      priority = P_PROC_XOP;
 	      break;
 	    case PROCESSOR_BDVER2:
 	      arg_str = "bdver2";
 	      priority = P_PROC_FMA;
 	      break;
-	    }  
+            case PROCESSOR_BDVER3:
+              arg_str = "bdver3";
+              priority = P_PROC_FMA;
+              break;
+            case PROCESSOR_BDVER4:
+              arg_str = "bdver4";
+              priority = P_PROC_AVX2;
+              break;
+            }  
 	}    
     
       cl_target_option_restore (&global_options, &cur_target);
@@ -30922,9 +30955,13 @@
     F_SSE2,
     F_SSE3,
     F_SSSE3,
+    F_SSE4_a,
     F_SSE4_1,
     F_SSE4_2,
     F_AVX,
+    F_FMA4,
+    F_XOP,
+    F_FMA,
     F_AVX2,
     F_MAX
   };
@@ -30943,6 +30980,10 @@
     M_AMDFAM10H,
     M_AMDFAM15H,
     M_INTEL_SLM,
+    M_INTEL_COREI7_AVX,
+    M_INTEL_CORE_AVX2,
+    M_AMD_BTVER1,
+    M_AMD_BTVER2,
     M_CPU_SUBTYPE_START,
     M_INTEL_COREI7_NEHALEM,
     M_INTEL_COREI7_WESTMERE,
@@ -30953,7 +30994,9 @@
     M_AMDFAM15H_BDVER1,
     M_AMDFAM15H_BDVER2,
     M_AMDFAM15H_BDVER3,
-    M_AMDFAM15H_BDVER4
+    M_AMDFAM15H_BDVER4,
+    M_INTEL_COREI7_IVYBRIDGE,
+    M_INTEL_CORE_HASWELL
   };
 
   static struct _arch_names_table
@@ -30971,11 +31014,17 @@
       {"corei7", M_INTEL_COREI7},
       {"nehalem", M_INTEL_COREI7_NEHALEM},
       {"westmere", M_INTEL_COREI7_WESTMERE},
+      {"corei7-avx", M_INTEL_COREI7_AVX},
       {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
+      {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
+      {"core-avx2", M_INTEL_CORE_AVX2},
+      {"haswell", M_INTEL_CORE_HASWELL},
       {"amdfam10h", M_AMDFAM10H},
       {"barcelona", M_AMDFAM10H_BARCELONA},
       {"shanghai", M_AMDFAM10H_SHANGHAI},
       {"istanbul", M_AMDFAM10H_ISTANBUL},
+      {"btver1", M_AMD_BTVER1},
+      {"btver2", M_AMD_BTVER2},
       {"amdfam15h", M_AMDFAM15H},
       {"bdver1", M_AMDFAM15H_BDVER1},
       {"bdver2", M_AMDFAM15H_BDVER2},
@@ -30997,9 +31046,13 @@
       {"sse2",   F_SSE2},
       {"sse3",   F_SSE3},
       {"ssse3",  F_SSSE3},
+      {"sse4a",  F_SSE4_a},
       {"sse4.1", F_SSE4_1},
       {"sse4.2", F_SSE4_2},
       {"avx",    F_AVX},
+      {"fma4",   F_FMA4},
+      {"xop",    F_XOP},
+      {"fma",    F_FMA},
       {"avx2",   F_AVX2}
     };

Index: libgcc/config/i386/cpuinfo.c
===================================================================
--- libgcc/config/i386/cpuinfo.c	(revision 206065)
+++ libgcc/config/i386/cpuinfo.c	(working copy)
@@ -62,6 +62,10 @@
   AMDFAM10H,
   AMDFAM15H,
   INTEL_SLM,
+  INTEL_COREI7_AVX,
+  INTEL_CORE_AVX2,
+  AMD_BTVER1,
+  AMD_BTVER2,
   CPU_TYPE_MAX
 };
 
@@ -75,6 +79,10 @@
   AMDFAM10H_ISTANBUL,
   AMDFAM15H_BDVER1,
   AMDFAM15H_BDVER2,
+  AMDFAM15H_BDVER3,
+  AMDFAM15H_BDVER4,
+  INTEL_COREI7_IVYBRIDGE,
+  INTEL_CORE_HASWELL,
   CPU_SUBTYPE_MAX
 };
 
@@ -89,9 +97,13 @@
   FEATURE_SSE2,
   FEATURE_SSE3,
   FEATURE_SSSE3,
+  FEATURE_SSE4_a,
   FEATURE_SSE4_1,
   FEATURE_SSE4_2,
   FEATURE_AVX,
+  FEATURE_FMA4,
+  FEATURE_XOP,
+  FEATURE_FMA,
   FEATURE_AVX2
 };
 
@@ -113,37 +125,46 @@
     {
     /* AMD Family 10h.  */
     case 0x10:
+      __cpu_model.__cpu_type = AMDFAM10H;
       switch (model)
 	{
 	case 0x2:
 	  /* Barcelona.  */
-	  __cpu_model.__cpu_type = AMDFAM10H;
 	  __cpu_model.__cpu_subtype = AMDFAM10H_BARCELONA;
 	  break;
 	case 0x4:
 	  /* Shanghai.  */
-	  __cpu_model.__cpu_type = AMDFAM10H;
 	  __cpu_model.__cpu_subtype = AMDFAM10H_SHANGHAI;
 	  break;
 	case 0x8:
 	  /* Istanbul.  */
-	  __cpu_model.__cpu_type = AMDFAM10H;
 	  __cpu_model.__cpu_subtype = AMDFAM10H_ISTANBUL;
 	  break;
 	default:
 	  break;
 	}
       break;
-    /* AMD Family 15h.  */
+    /* AMD Family 14h "btver1". */
+    case 0x14:
+      __cpu_model.__cpu_type = AMD_BTVER1;
+      break;
+    /* AMD Family 15h "Bulldozer".  */
     case 0x15:
       __cpu_model.__cpu_type = AMDFAM15H;
       /* Bulldozer version 1.  */
       if ( model <= 0xf)
 	__cpu_model.__cpu_subtype = AMDFAM15H_BDVER1;
-      /* Bulldozer version 2.  */
-      if (model >= 0x10 && model <= 0x1f)
-	__cpu_model.__cpu_subtype = AMDFAM15H_BDVER2;
+      /* Bulldozer version 2 "Piledriver" */
+      if (model >= 0x10 && model <= 0x2f)
+	__cpu_model.__cpu_subtype = AMDFAM15H_BDVER2;      
+      /* Bulldozer version 3 "Steamroller"  */
+      if (model >= 0x30 && model <= 0x4f)
+        __cpu_model.__cpu_subtype = AMDFAM15H_BDVER3;
       break;
+    /* AMD Family 16h "btver2". */
+    case 0x16:
+      __cpu_model.__cpu_type = AMD_BTVER2;
+      break;
     default:
       break;
     }
@@ -193,9 +214,21 @@
 	    case 0x2a:
 	    case 0x2d:
 	      /* Sandy Bridge.  */
-	      __cpu_model.__cpu_type = INTEL_COREI7;
+	      __cpu_model.__cpu_type = INTEL_COREI7_AVX;
 	      __cpu_model.__cpu_subtype = INTEL_COREI7_SANDYBRIDGE;
 	      break;
+            case 0x3a:
+            case 0x3e:
+              /* Ivy Bridge.  */
+              __cpu_model.__cpu_type = INTEL_COREI7_AVX;
+              __cpu_model.__cpu_subtype = INTEL_COREI7_IVYBRIDGE;
+            case 0x3c:
+            case 0x3f:
+            case 0x45:
+            case 0x46:
+              /* Haswell.  */
+              __cpu_model.__cpu_type = INTEL_CORE_AVX2;
+              __cpu_model.__cpu_subtype = INTEL_CORE_HASWELL;
 	    case 0x17:
 	    case 0x1d:
 	      /* Penryn.  */
@@ -242,6 +275,8 @@
     features |= (1 << FEATURE_SSE4_2);
   if (ecx & bit_AVX)
     features |= (1 << FEATURE_AVX);
+  if (ecx & bit_FMA)
+    features |= (1 << FEATURE_FMA);
 
   /* Get Advanced Features at level 7 (eax = 7, ecx = 0). */
   if (max_cpuid_level >= 7)
@@ -252,6 +287,23 @@
 	features |= (1 << FEATURE_AVX2);
     }
 
+  unsigned int ext_level;
+  unsigned int eax, ebx;
+  /* Check cpuid level of extended features.  */
+  __cpuid (0x80000000, ext_level, ebx, ecx, edx);
+
+  if (ext_level > 0x80000000)
+    {
+      __cpuid (0x80000001, eax, ebx, ecx, edx);
+
+      if (ecx & bit_SSE4a)
+        features |= (1 << FEATURE_SSE4_a);
+      if (ecx & bit_FMA4)
+        features |= (1 << FEATURE_FMA4);
+      if (ecx & bit_XOP)
+        features |= (1 << FEATURE_XOP);
+    }
+    
   __cpu_model.__cpu_features[0] = features;
 }

Regards
Ganesh


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]