[PATCH] [3/4] Add l3 cache detection for Intel CPUs

Andi Kleen andi@firstfloor.org
Thu Jan 14 02:21:00 GMT 2010


Various Intel CPUs (Xeon 74xx, Xeon 55xx, Core i3/5/7) have L3
caches. Add proper L3 detection through CPUID4 to driver-i386.c

Right now this simply replaces the L2 size with L3,
but reports it as L2 to the compiler.

This is important on cores (like 55xx) where the L2 caches
are much smaller than the L3 caches to allow the loop
optimizer to benefit from the larger caches.

This not quite correct and assumes a single threaded program
and inclusive caches
(should probably be adjusted for parallelization etc.),
but doing that all properly would be much more complicated,
and this simple heuristic will be likely an improvement.

---
 gcc/config/i386/driver-i386.c |   16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

Index: gcc/gcc/config/i386/driver-i386.c
===================================================================
--- gcc.orig/gcc/config/i386/driver-i386.c
+++ gcc/gcc/config/i386/driver-i386.c
@@ -264,7 +264,8 @@ enum cache_type
 };
 
 static void
-detect_caches_cpuid4 (struct cache_desc *level1, struct cache_desc *level2)
+detect_caches_cpuid4 (struct cache_desc *level1, struct cache_desc *level2,
+		      struct cache_desc *level3)
 {
   struct cache_desc *cache;
 
@@ -289,6 +290,9 @@ detect_caches_cpuid4 (struct cache_desc
 	      case 2:
 		cache = level2;
 		break;
+	      case 3:
+		cache = level3;
+		break;
 	      default:
 		cache = NULL;
 	      }
@@ -316,10 +320,11 @@ detect_caches_cpuid4 (struct cache_desc
 static const char *
 detect_caches_intel (bool xeon_mp, unsigned max_level, unsigned max_ext_level)
 {
-  struct cache_desc level1 = {0, 0, 0}, level2 = {0, 0, 0};
+  struct cache_desc level1 = {0, 0, 0}, level2 = {0, 0, 0},
+	            level3 = {0, 0, 0};
 
   if (max_level >= 4)
-    detect_caches_cpuid4 (&level1, &level2);
+    detect_caches_cpuid4 (&level1, &level2, &level3);
   else if (max_level >= 2)
     detect_caches_cpuid2 (xeon_mp, &level1, &level2);
   else
@@ -328,6 +333,11 @@ detect_caches_intel (bool xeon_mp, unsig
   if (level1.sizekb == 0)
     return "";
 
+  /* Let the L3 replace the L2. This assumes inclusive caches
+     and single threaded program for now. */
+  if (level3.sizekb)
+    level2 = level3;
+
   /* Intel CPUs are equipped with AMD style L2 cache info.  Try this
      method if other methods fail to provide L2 cache parameters.  */
   if (level2.sizekb == 0 && max_ext_level >= 0x80000006)



More information about the Gcc-patches mailing list