This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [Patch, i386] PR 59422 - Support more targets for function multi versioning


Patch updated to keep libgcc enums backwards compatible.

`Allan
Index: gcc/ChangeLog
===================================================================
--- gcc/ChangeLog	(revision 205984)
+++ gcc/ChangeLog	(working copy)
@@ -1,3 +1,9 @@
+2013-12-14  Allan Sandfeld Jensen <sandfeld@kde.org>
+
+        PR gcc/59422
+        * config/i386/i386.c: Extend function multiversioning
+        to better support recent Intel and AMD models.
+        
 2013-12-14  Marek Polacek  <polacek@redhat.com>
 
 	PR sanitizer/59503
Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c	(revision 205984)
+++ gcc/config/i386/i386.c	(working copy)
@@ -29962,9 +29962,14 @@
     P_PROC_SSE4_2,
     P_POPCNT,
     P_AVX,
+    P_PROC_AVX,
+    P_FMA4,
+    P_XOP,
+    P_PROC_XOP,
+    P_FMA,    
+    P_PROC_FMA,
     P_AVX2,
-    P_FMA,
-    P_PROC_FMA
+    P_PROC_AVX2
   };
 
  enum feature_priority priority = P_ZERO;
@@ -29983,11 +29988,15 @@
       {"sse", P_SSE},
       {"sse2", P_SSE2},
       {"sse3", P_SSE3},
+      {"sse4a", P_SSE4_a},
       {"ssse3", P_SSSE3},
       {"sse4.1", P_SSE4_1},
       {"sse4.2", P_SSE4_2},
       {"popcnt", P_POPCNT},
       {"avx", P_AVX},
+      {"fma4", P_FMA4},
+      {"xop", P_XOP},
+      {"fma", P_FMA},
       {"avx2", P_AVX2}
     };
 
@@ -30041,25 +30050,49 @@
 	      break;
             case PROCESSOR_COREI7_AVX:
               arg_str = "corei7-avx";
-              priority = P_PROC_SSE4_2;
+              priority = P_PROC_AVX;
               break;
+            case PROCESSOR_HASWELL:
+              arg_str = "core-avx2";
+              priority = P_PROC_AVX2;
+              break;
 	    case PROCESSOR_ATOM:
 	      arg_str = "atom";
 	      priority = P_PROC_SSSE3;
 	      break;
+            case PROCESSOR_SLM:
+              arg_str = "slm";
+              priority = P_PROC_SSE4_2;
+              break;
 	    case PROCESSOR_AMDFAM10:
 	      arg_str = "amdfam10h";
 	      priority = P_PROC_SSE4_a;
 	      break;
+            case PROCESSOR_BTVER1:
+              arg_str = "btver1";
+              priority = P_PROC_SSE4_a;
+              break;
+            case PROCESSOR_BTVER2:
+              arg_str = "btver2";
+              priority = P_PROC_SSE4_2;
+              break;
 	    case PROCESSOR_BDVER1:
 	      arg_str = "bdver1";
-	      priority = P_PROC_FMA;
+	      priority = P_PROC_XOP;
 	      break;
 	    case PROCESSOR_BDVER2:
 	      arg_str = "bdver2";
 	      priority = P_PROC_FMA;
 	      break;
-	    }  
+            case PROCESSOR_BDVER3:
+              arg_str = "bdver3";
+              priority = P_PROC_FMA;
+              break;
+            case PROCESSOR_BDVER4:
+              arg_str = "bdver4";
+              priority = P_PROC_AVX2;
+              break;
+            }  
 	}    
     
       cl_target_option_restore (&global_options, &cur_target);
@@ -30919,9 +30952,13 @@
     F_SSE2,
     F_SSE3,
     F_SSSE3,
+    F_SSE4_a,
     F_SSE4_1,
     F_SSE4_2,
     F_AVX,
+    F_FMA4,
+    F_XOP,
+    F_FMA,
     F_AVX2,
     F_MAX
   };
@@ -30940,6 +30977,7 @@
     M_AMDFAM10H,
     M_AMDFAM15H,
     M_INTEL_SLM,
+    M_AMDFAM14H,
     M_CPU_SUBTYPE_START,
     M_INTEL_COREI7_NEHALEM,
     M_INTEL_COREI7_WESTMERE,
@@ -30950,7 +30988,10 @@
     M_AMDFAM15H_BDVER1,
     M_AMDFAM15H_BDVER2,
     M_AMDFAM15H_BDVER3,
-    M_AMDFAM15H_BDVER4
+    M_AMDFAM15H_BDVER4,
+    M_INTEL_COREI7_IVYBRIDGE,
+    M_INTEL_COREI7_HASWELL,
+    M_AMDFAM14H_BTVER1,
   };
 
   static struct _arch_names_table
@@ -30968,11 +31009,15 @@
       {"corei7", M_INTEL_COREI7},
       {"nehalem", M_INTEL_COREI7_NEHALEM},
       {"westmere", M_INTEL_COREI7_WESTMERE},
-      {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
+      {"corei7-avx", M_INTEL_COREI7_SANDYBRIDGE},
+      {"core-avx-i", M_INTEL_COREI7_IVYBRIDGE},
+      {"core-avx2", M_INTEL_COREI7_HASWELL},
       {"amdfam10h", M_AMDFAM10H},
       {"barcelona", M_AMDFAM10H_BARCELONA},
       {"shanghai", M_AMDFAM10H_SHANGHAI},
       {"istanbul", M_AMDFAM10H_ISTANBUL},
+      {"amdfam14h", M_AMDFAM14H},
+      {"btver1", M_AMDFAM14H_BTVER1},
       {"amdfam15h", M_AMDFAM15H},
       {"bdver1", M_AMDFAM15H_BDVER1},
       {"bdver2", M_AMDFAM15H_BDVER2},
@@ -30994,9 +31039,13 @@
       {"sse2",   F_SSE2},
       {"sse3",   F_SSE3},
       {"ssse3",  F_SSSE3},
+      {"sse4a",  F_SSE4_a},
       {"sse4.1", F_SSE4_1},
       {"sse4.2", F_SSE4_2},
       {"avx",    F_AVX},
+      {"fma4",   F_FMA4},
+      {"xop",    F_XOP},
+      {"fma",    F_FMA},
       {"avx2",   F_AVX2}
     };
 
Index: libgcc/ChangeLog
===================================================================
--- libgcc/ChangeLog	(revision 205984)
+++ libgcc/ChangeLog	(working copy)
@@ -1,3 +1,9 @@
+2013-12-14  Allan Sandfeld Jensen <sandfeld@kde.org>
+
+        PR gcc/59422
+        * config/i386/cpuinfo.c: Detect sse4a, fma4, xop and fma
+        ISAs and recent Intel and AMD models.
+        
 2013-12-12  Zhenqiang Chen  <zhenqiang.chen@arm.com>
 
 	* config.host (arm*-*-uclinux*): Move t-arm before t-bpabi.
Index: libgcc/config/i386/cpuinfo.c
===================================================================
--- libgcc/config/i386/cpuinfo.c	(revision 205984)
+++ libgcc/config/i386/cpuinfo.c	(working copy)
@@ -62,6 +62,7 @@
   AMDFAM10H,
   AMDFAM15H,
   INTEL_SLM,
+  AMDFAM14H,
   CPU_TYPE_MAX
 };
 
@@ -75,6 +76,11 @@
   AMDFAM10H_ISTANBUL,
   AMDFAM15H_BDVER1,
   AMDFAM15H_BDVER2,
+  AMDFAM15H_BDVER3,
+  AMDFAM15H_BDVER4,
+  INTEL_COREI7_IVYBRIDGE,
+  INTEL_COREI7_HASWELL,
+  AMDFAM14H_BTVER1,
   CPU_SUBTYPE_MAX
 };
 
@@ -89,9 +95,13 @@
   FEATURE_SSE2,
   FEATURE_SSE3,
   FEATURE_SSSE3,
+  FEATURE_SSE4_a,
   FEATURE_SSE4_1,
   FEATURE_SSE4_2,
   FEATURE_AVX,
+  FEATURE_FMA4,
+  FEATURE_XOP,
+  FEATURE_FMA,
   FEATURE_AVX2
 };
 
@@ -113,36 +123,43 @@
     {
     /* AMD Family 10h.  */
     case 0x10:
+      __cpu_model.__cpu_type = AMDFAM10H;
       switch (model)
 	{
 	case 0x2:
 	  /* Barcelona.  */
-	  __cpu_model.__cpu_type = AMDFAM10H;
 	  __cpu_model.__cpu_subtype = AMDFAM10H_BARCELONA;
 	  break;
 	case 0x4:
 	  /* Shanghai.  */
-	  __cpu_model.__cpu_type = AMDFAM10H;
 	  __cpu_model.__cpu_subtype = AMDFAM10H_SHANGHAI;
 	  break;
 	case 0x8:
 	  /* Istanbul.  */
-	  __cpu_model.__cpu_type = AMDFAM10H;
 	  __cpu_model.__cpu_subtype = AMDFAM10H_ISTANBUL;
 	  break;
 	default:
 	  break;
 	}
       break;
-    /* AMD Family 15h.  */
+    /* AMD Family 14h "Bobcat". */
+    case 0x14:
+      __cpu_model.__cpu_type = AMDFAM14H;
+      if ( model <= 0xf)
+        __cpu_model.__cpu_subtype = AMDFAM14H_BTVER1;
+      break;
+    /* AMD Family 15h "Bulldozer".  */
     case 0x15:
       __cpu_model.__cpu_type = AMDFAM15H;
       /* Bulldozer version 1.  */
       if ( model <= 0xf)
 	__cpu_model.__cpu_subtype = AMDFAM15H_BDVER1;
-      /* Bulldozer version 2.  */
-      if (model >= 0x10 && model <= 0x1f)
-	__cpu_model.__cpu_subtype = AMDFAM15H_BDVER2;
+      /* Bulldozer version 2 "Piledriver" */
+      if (model >= 0x10 && model <= 0x2f)
+	__cpu_model.__cpu_subtype = AMDFAM15H_BDVER2;      
+      /* Bulldozer version 3 "Steamroller"  */
+      if (model >= 0x30 && model <= 0x4f)
+        __cpu_model.__cpu_subtype = AMDFAM15H_BDVER3;
       break;
     default:
       break;
@@ -196,6 +213,18 @@
 	      __cpu_model.__cpu_type = INTEL_COREI7;
 	      __cpu_model.__cpu_subtype = INTEL_COREI7_SANDYBRIDGE;
 	      break;
+            case 0x3a:
+            case 0x3e:
+              /* Ivy Bridge.  */
+              __cpu_model.__cpu_type = INTEL_COREI7;
+              __cpu_model.__cpu_subtype = INTEL_COREI7_IVYBRIDGE;
+            case 0x3c:
+            case 0x3f:
+            case 0x45:
+            case 0x46:
+              /* Haswell.  */
+              __cpu_model.__cpu_type = INTEL_COREI7;
+              __cpu_model.__cpu_subtype = INTEL_COREI7_HASWELL;
 	    case 0x17:
 	    case 0x1d:
 	      /* Penryn.  */
@@ -242,6 +271,8 @@
     features |= (1 << FEATURE_SSE4_2);
   if (ecx & bit_AVX)
     features |= (1 << FEATURE_AVX);
+  if (ecx & bit_FMA)
+    features |= (1 << FEATURE_FMA);
 
   /* Get Advanced Features at level 7 (eax = 7, ecx = 0). */
   if (max_cpuid_level >= 7)
@@ -252,6 +283,23 @@
 	features |= (1 << FEATURE_AVX2);
     }
 
+  unsigned int ext_level;
+  unsigned int eax, ebx;
+  /* Check cpuid level of extended features.  */
+  __cpuid (0x80000000, ext_level, ebx, ecx, edx);
+
+  if (ext_level > 0x80000000)
+    {
+      __cpuid (0x80000001, eax, ebx, ecx, edx);
+
+      if (ecx & bit_SSE4a)
+        features |= (1 << FEATURE_SSE4_a);
+      if (ecx & bit_FMA4)
+        features |= (1 << FEATURE_FMA4);
+      if (ecx & bit_XOP)
+        features |= (1 << FEATURE_XOP);
+    }
+    
   __cpu_model.__cpu_features[0] = features;
 }
 

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]