[patch] Detect cache size with -mtune=native

Zdenek Dvorak rakdver@atrey.karlin.mff.cuni.cz
Mon Feb 26 16:01:00 GMT 2007


Hello,

this patch extends -mtune=native to detect the size of the l1 cache and
of the cache line, and to pass it to the compiler by params.  I have
tested this on several machines to that I have access, and the results
seem to be correct (but especially the intel way of describing the cache
sizes is quite ugly.

Bootstrapped & regtested on i686.

Zdenek

	* config/i386/driver-i386.c (describe_cache, detect_caches_amd,
	decode_caches_intel, detect_caches_intel): New functions.
	(host_detect_local_cpu): Use detect_caches_amd and
	detect_caches_intel.

Index: config/i386/driver-i386.c
===================================================================
*** config/i386/driver-i386.c	(revision 122257)
--- config/i386/driver-i386.c	(working copy)
*************** const char *host_detect_local_cpu (int a
*** 47,52 ****
--- 47,181 ----
  #define bit_3DNOWP (1 << 30)
  #define bit_LM (1 << 29)
  
+ /* Returns parameters that describe L1_ASSOC associative cache of size
+    L1_SIZEKB with lines of size L1_LINE.  */
+ 
+ static char *
+ describe_cache (unsigned l1_sizekb, unsigned l1_line, unsigned l1_assoc)
+ {
+   char size[1000], line[1000];
+   unsigned reduced_size, loss, size_in_lines, i;
+ 
+   /* We approximate the effects of associativity by considering the cache to be
+      (1 - 1/2^assoc) times smaller.  */
+   reduced_size = l1_sizekb * 1024;
+   loss = reduced_size;
+   for (i = 0; i < l1_assoc && loss > 0; i++)
+     loss /= 2;
+   reduced_size -= loss;
+ 
+   size_in_lines = reduced_size / l1_line;
+ 
+   sprintf (size, "--param l1-cache-size=%u", size_in_lines);
+   sprintf (line, "--param l1-cache-line-size=%u", l1_line);
+ 
+   return concat (size, " ", line, " ", NULL);
+ }
+ 
+ /* Returns the description of caches for an AMD processor.  */
+ 
+ static char *
+ detect_caches_amd (unsigned max_ext_level)
+ {
+   unsigned eax, ebx, ecx, edx;
+   unsigned l1_sizekb, l1_line, l1_assoc;
+ 
+   if (max_ext_level < 0x80000005)
+     return NULL;
+ 
+   cpuid (0x80000005, eax, ebx, ecx, edx);
+ 
+   l1_line = ecx & 0xff;
+   l1_sizekb = (ecx >> 24) & 0xff;
+   l1_assoc = (ecx >> 16) & 0xff;
+ 
+   return describe_cache (l1_sizekb, l1_line, l1_assoc);
+ }
+ 
+ /* Stores the size of the L1 cache and cache line, and the associativity
+    of the cache according to REG to L1_SIZEKB, L1_LINE and L1_ASSOC.  */
+ 
+ static void
+ decode_caches_intel (unsigned reg, unsigned *l1_sizekb, unsigned *l1_line,
+ 		     unsigned *l1_assoc)
+ {
+   unsigned i, val;
+ 
+   if (((reg >> 31) & 1) != 0)
+     return;
+ 
+   for (i = 0; i < 4; i++)
+     {
+       val = reg & 0xff;
+       reg >>= 8;
+ 
+       switch (val)
+ 	{
+ 	case 0xa:
+ 	  *l1_sizekb = 8;
+ 	  *l1_line = 32;
+ 	  *l1_assoc = 2;
+ 	  break;
+ 	case 0xc:
+ 	  *l1_sizekb = 16;
+ 	  *l1_line = 32;
+ 	  *l1_assoc = 4;
+ 	  break;
+ 	case 0x2c:
+ 	  *l1_sizekb = 32;
+ 	  *l1_line = 64;
+ 	  *l1_assoc = 8;
+ 	  break;
+ 	case 0x60:
+ 	  *l1_sizekb = 16;
+ 	  *l1_line = 64;
+ 	  *l1_assoc = 8;
+ 	  break;
+ 	case 0x66:
+ 	  *l1_sizekb = 8;
+ 	  *l1_line = 64;
+ 	  *l1_assoc = 4;
+ 	  break;
+ 	case 0x67:
+ 	  *l1_sizekb = 16;
+ 	  *l1_line = 64;
+ 	  *l1_assoc = 4;
+ 	  break;
+ 	case 0x68:
+ 	  *l1_sizekb = 32;
+ 	  *l1_line = 64;
+ 	  *l1_assoc = 4;
+ 	  break;
+ 
+ 	default:
+ 	  break;
+ 	}
+     }
+ }
+ 
+ /* Returns the description of caches for an intel processor.  */
+ 
+ static char *
+ detect_caches_intel (unsigned max_level)
+ {
+   unsigned eax, ebx, ecx, edx;
+   unsigned l1_sizekb = 0, l1_line = 0, assoc = 0;
+ 
+   if (max_level < 2)
+     return NULL;
+ 
+   cpuid (2, eax, ebx, ecx, edx);
+ 
+   decode_caches_intel (eax, &l1_sizekb, &l1_line, &assoc);
+   decode_caches_intel (ebx, &l1_sizekb, &l1_line, &assoc);
+   decode_caches_intel (ecx, &l1_sizekb, &l1_line, &assoc);
+   decode_caches_intel (edx, &l1_sizekb, &l1_line, &assoc);
+   if (!l1_sizekb)
+     return (char *) "";
+ 
+   return describe_cache (l1_sizekb, l1_line, assoc);
+ }
+ 
  /* This will be called by the spec parser in gcc.c when it sees
     a %:local_cpu_detect(args) construct.  Currently it will be called
     with either "arch" or "tune" as argument depending on if -march=native
*************** const char *host_detect_local_cpu (int a
*** 62,67 ****
--- 191,197 ----
  const char *host_detect_local_cpu (int argc, const char **argv)
  {
    const char *cpu = NULL;
+   char *cache = (char *) "";
    enum processor_type processor = PROCESSOR_I386;
    unsigned int eax, ebx, ecx, edx;
    unsigned int max_level;
*************** const char *host_detect_local_cpu (int a
*** 126,131 ****
--- 256,269 ----
  
    is_amd = vendor == *(unsigned int*)"Auth";
  
+   if (!arch)
+     {
+       if (is_amd)
+ 	cache = detect_caches_amd (ext_level);
+       else if (vendor == *(unsigned int*)"Genu")
+ 	cache = detect_caches_intel (max_level);
+     }
+ 
    if (is_amd)
      {
        if (has_mmx)
*************** const char *host_detect_local_cpu (int a
*** 283,289 ****
      }
  
  done:
!   return concat ("-m", argv[0], "=", cpu, NULL);
  }
  #else
  /* If we aren't compiling with GCC we just provide a minimal
--- 421,427 ----
      }
  
  done:
!   return concat (cache, "-m", argv[0], "=", cpu, NULL);
  }
  #else
  /* If we aren't compiling with GCC we just provide a minimal



More information about the Gcc-patches mailing list