This is the mail archive of the gcc@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

AMD K6 support


Hi everybody,

I found my AMD K6 perform best with -O3 -mpentiumpro -malign-functions=4 
 -fschedule-insns2 -ffast-math on the numerical calculations I make.

As I don't like specify -mpentiumpro -malign-functions=4 all the time, I
decided to create a -mamdk6 to basically select the correct values for
scheduling etc.
I looked into the K6 optimization docs, played a little bit with the
params in config/i386/i386.{md,c,h} and created a patch. I get some better
performance than with -mppro -ma-f=4.

The patch is appended. Just enjoy, if you got a AMD K6. 
Maybe some more experiments with non-numerical code could help to get even
better results, but I wasn't able to find params to produce faster code, so
far.

Jeff: I'd really be lucky to see this in egcs.
I hereby declare, that this little patch was entirely written by me and I
hereby release it to the public under the terms of the GNU GPL.

Regards,
-- 
+--------------------------------------------------------------+
|  cand.phys. Kurt Garloff        C1-O4-101 (Chemiegeb"aude)   |
|Lehrstuhl Hochfrequenztechnik  (Fakult"at f"ur Elektrotechnik)|
|   Universit"at Dortmund             D - 44221 Dortmund       |
|   Tel.: (0231) 755-3947            Fax: (0231) 755-4631      |
|            garloff@hft.e-technik.uni-dortmund.de             |
|                 Public PGP key available !                   | 
+--------------------------------------------------------------+

P.S.:
Just for your information:
I also handcoded some routines when experimenting. In contrast to what
AMD docs say, you can use at least one of the new AMD-3DNow! instructions
without getting a penalty for the switch between FPU and MMX/3D: prefetch
It greatly improved (factor 1.4) my double(!) Matrix Vector multiplication by 
prefetching the next cache line before using it.
--- gcc/ChangeLog.orig	Wed Aug 12 10:56:17 1998
+++ gcc/ChangeLog	Mon Aug 17 09:06:01 1998
@@ -1,3 +1,10 @@
+Mon Aug 17 11:03:50 1998 Kurt Garloff (K.Garloff@ping.de)
+
+	* config/i386/i386.md: Added description and scheduling params
+	for AMD K6
+	* config/i386/i386.h: Likewise
+	* config/i386/i386.c: Likewise
+
 Mon Aug 3 23:43:55 PDT 1998 Jeff Law  (law@cygnus.com)
 
 	* version.c: Bump for snapshot.
--- gcc/config/i386/i386.h.orig	Wed Aug 12 10:56:06 1998
+++ gcc/config/i386/i386.h	Mon Aug 17 08:50:06 1998
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler for Intel X86
-   (386, 486, Pentium).
+   (386, 486, Pentium, PentiumPro, AmdK6).
    Copyright (C) 1988, 92, 94, 95, 96, 97, 1998 Free Software Foundation, Inc.
 
 This file is part of GNU CC.
@@ -155,20 +155,23 @@
 #define TARGET_486 (ix86_cpu == PROCESSOR_I486)
 #define TARGET_PENTIUM (ix86_cpu == PROCESSOR_PENTIUM)
 #define TARGET_PENTIUMPRO (ix86_cpu == PROCESSOR_PENTIUMPRO)
+#define TARGET_AMDK6 (ix86_cpu == PROCESSOR_AMDK6)
 #define TARGET_USE_LEAVE (ix86_cpu == PROCESSOR_I386)
-#define TARGET_PUSH_MEMORY (ix86_cpu == PROCESSOR_I386)
+#define TARGET_PUSH_MEMORY (ix86_cpu == PROCESSOR_I386 || ix86_cpu == PROCESSOR_AMDK6)
 #define TARGET_ZERO_EXTEND_WITH_AND (ix86_cpu != PROCESSOR_I386 \
-				     && ix86_cpu != PROCESSOR_PENTIUMPRO)
+				     && ix86_cpu != PROCESSOR_PENTIUMPRO && ix86_cpu != PROCESSOR_AMDK6)
 #define TARGET_DOUBLE_WITH_ADD (ix86_cpu != PROCESSOR_I386)
-#define TARGET_USE_BIT_TEST (ix86_cpu == PROCESSOR_I386)
+#define TARGET_USE_BIT_TEST (ix86_cpu == PROCESSOR_I386) /* || ix86_cpu == PROCESSOR_AMDK5) */
 #define TARGET_UNROLL_STRLEN (ix86_cpu != PROCESSOR_I386)
 #define TARGET_USE_Q_REG (ix86_cpu == PROCESSOR_PENTIUM \
-			  || ix86_cpu == PROCESSOR_PENTIUMPRO)
+			  || ix86_cpu == PROCESSOR_PENTIUMPRO || ix86_cpu == PROCESSOR_AMDK6)
 #define TARGET_USE_ANY_REG (ix86_cpu == PROCESSOR_I486)
 #define TARGET_CMOVE (ix86_arch == PROCESSOR_PENTIUMPRO)
-#define TARGET_DEEP_BRANCH_PREDICTION (ix86_cpu == PROCESSOR_PENTIUMPRO)
+#define TARGET_DEEP_BRANCH_PREDICTION (ix86_cpu == PROCESSOR_PENTIUMPRO || ix86_cpu == PROCESSOR_AMDK6)
 #define TARGET_STACK_PROBE (target_flags & MASK_STACK_PROBE)
 
+#define ISSUE_RATE (TARGET_AMDK6 ? 2 : 1)
+
 #define TARGET_SWITCHES							\
 { { "80387",			 MASK_80387 },				\
   { "no-80387",			-MASK_80387 },				\
@@ -181,6 +184,7 @@
   { "no-486",			 0 },					\
   { "pentium",			 0 },					\
   { "pentiumpro",		 0 },					\
+  { "amdk6",			 0 },					\
   { "rtd",			 MASK_RTD },				\
   { "no-rtd",			-MASK_RTD },				\
   { "align-double",		 MASK_ALIGN_DOUBLE },			\
@@ -219,7 +223,8 @@
  {PROCESSOR_I386,			/* 80386 */
   PROCESSOR_I486,			/* 80486DX, 80486SX, 80486DX[24] */
   PROCESSOR_PENTIUM,
-  PROCESSOR_PENTIUMPRO};
+  PROCESSOR_PENTIUMPRO,
+  PROCESSOR_AMDK6};
 
 #define PROCESSOR_I386_STRING "i386"
 #define PROCESSOR_I486_STRING "i486"
@@ -227,6 +232,7 @@
 #define PROCESSOR_PENTIUM_STRING "pentium"
 #define PROCESSOR_I686_STRING "i686"
 #define PROCESSOR_PENTIUMPRO_STRING "pentiumpro"
+#define PROCESSOR_AMDK6_STRING "amdk6"
 
 extern enum processor_type ix86_cpu;
 
@@ -240,6 +246,8 @@
 					       ? PROCESSOR_PENTIUM  \
   : ((enum processor_type) TARGET_CPU_DEFAULT == PROCESSOR_PENTIUMPRO) \
 					       ? PROCESSOR_PENTIUMPRO  \
+  : ((enum processor_type) TARGET_CPU_DEFAULT == PROCESSOR_AMDK6) \
+					       ? PROCESSOR_AMDK6  \
   : PROCESSOR_I386
 #define PROCESSOR_DEFAULT_STRING \
   ((enum processor_type) TARGET_CPU_DEFAULT == PROCESSOR_I486) \
@@ -248,6 +256,8 @@
 					       ? PROCESSOR_PENTIUM_STRING  \
   : ((enum processor_type) TARGET_CPU_DEFAULT == PROCESSOR_PENTIUMPRO) \
 					       ? PROCESSOR_PENTIUMPRO_STRING  \
+  : ((enum processor_type) TARGET_CPU_DEFAULT == PROCESSOR_AMDK6_STRING) \
+					       ? PROCESSOR_AMDK6_STRING  \
   : PROCESSOR_I386_STRING
 
 /* This macro is similar to `TARGET_SWITCHES' but defines names of
@@ -301,7 +311,9 @@
 %{mno-pentium:-mcpu=i486 -march=i486} \
 %{mpentium:-mcpu=pentium} \
 %{mno-pentiumpro:-mcpu=pentium} \
-%{mpentiumpro:-mcpu=pentiumpro}}"
+%{mpentiumpro:-mcpu=pentiumpro} \
+%{mnoamdk6:-mcpu=pentium} \
+%{mamdk6:-mcpu=amdk6}}"
 #endif
 
 #define CPP_486_SPEC "%{!ansi:-Di486} -D__i486 -D__i486__"
@@ -309,6 +321,8 @@
 	-D__i586 -D__i586__ -D__pentium -D__pentium__"
 #define CPP_686_SPEC "%{!ansi:-Di686 -Dpentiumpro} \
 	-D__i686 -D__i686__ -D__pentiumpro -D__pentiumpro__"
+#define CPP_AMDK6_SPEC "%{!ansi:-Di686 -Damdk6} \
+	-D__i586 -D__i586__ -D__amd6 -D__amdk6__"
 
 #ifndef CPP_CPU_DEFAULT_SPEC
 #if TARGET_CPU_DEFAULT == 1
@@ -320,10 +334,14 @@
 #if TARGET_CPU_DEFAULT == 3
 #define CPP_CPU_DEFAULT_SPEC "%(cpp_686)"
 #else
+#if TARGET_CPU_DEFAULT == 4
+#define CPP_CPU_DEFAULT_SPEC "%(cpp_amdk6)"
+#else
 #define CPP_CPU_DEFAULT_SPEC ""
 #endif
 #endif
 #endif
+#endif
 #endif /* CPP_CPU_DEFAULT_SPEC */
 
 #ifndef CPP_CPU_SPEC
@@ -333,6 +351,7 @@
 %{mcpu=i486:%(cpp_486)} %{m486:%(cpp_486)} \
 %{mpentium:%(cpp_586)} %{mcpu=pentium:%(cpp_586)} \
 %{mpentiumpro:%(cpp_686)} %{mcpu=pentiumpro:%(cpp_686)} \
+%{mamdk6:%(cpp_amdk6)} %{mcpu=admk6:%(cpp_amdk6)} \
 %{!mcpu*:%{!m486:%{!mpentium*:%(cpp_cpu_default)}}}"
 #endif
 
@@ -358,6 +377,7 @@
   { "cpp_486", CPP_486_SPEC},						\
   { "cpp_586", CPP_586_SPEC},						\
   { "cpp_686", CPP_686_SPEC},						\
+  { "cpp_amdk6", CPP_AMDK6_SPEC},					\
   { "cpp_cpu_default",	CPP_CPU_DEFAULT_SPEC },				\
   { "cpp_cpu",	CPP_CPU_SPEC },						\
   { "cc1_cpu",  CC1_CPU_SPEC },						\
--- gcc/config/i386/i386.c.orig	Wed Aug 12 10:56:17 1998
+++ gcc/config/i386/i386.c	Mon Aug 17 07:50:41 1998
@@ -100,7 +100,17 @@
   17					/* cost of a divide/mod */
 };
 
-struct processor_costs *ix86_cost = &pentium_cost;
+struct processor_costs amdk6_cost = {
+  1,					/* cost of an add instruction */
+  1,					/* cost of a lea instruction */
+  3,					/* variable shift costs */
+  2,					/* constant shift costs */
+  4,					/* cost of starting a multiply */
+  0,					/* cost of multiply per each bit set */
+  17					/* cost of a divide/mod */
+};
+
+struct processor_costs *ix86_cost = &pentiumpro_cost;
 
 #define AT_BP(mode) (gen_rtx_MEM ((mode), frame_pointer_rtx))
 
@@ -213,7 +223,9 @@
 	   {PROCESSOR_I686_STRING, PROCESSOR_PENTIUMPRO, &pentiumpro_cost,
 	      0, 0},
 	   {PROCESSOR_PENTIUMPRO_STRING, PROCESSOR_PENTIUMPRO,
-	      &pentiumpro_cost, 0, 0}};
+	      &pentiumpro_cost, 0, 0},
+      	   {PROCESSOR_AMDK6_STRING, PROCESSOR_AMDK6,
+	      &amdk6_cost, 0, 0}};
 
   int ptt_size = sizeof (processor_target_table) / sizeof (struct ptt);
 
@@ -349,7 +361,7 @@
 	       i386_align_funcs, MAX_CODE_ALIGN);
     }
   else
-    i386_align_funcs = def_align;
+     i386_align_funcs = (TARGET_AMDK6 ? 4 : def_align);
 
   /* Validate -mbranch-cost= value, or provide default. */
   if (i386_branch_cost_string)
--- gcc/config/i386/i386.md.orig	Wed Aug 12 10:56:17 1998
+++ gcc/config/i386/i386.md	Wed Aug 19 08:47:31 1998
@@ -97,6 +97,10 @@
  3 0)
 
 (define_function_unit "fp" 1 0
+ (and (eq_attr "type" "fpop") (eq_attr "cpu" "amdk6"))
+ 2 0)
+
+(define_function_unit "fp" 1 0
  (and (eq_attr "type" "fpmul") (eq_attr "cpu" "pentium")) 
  7 0)
 
@@ -105,18 +109,38 @@
  5 0)
 
 (define_function_unit "fp" 1 0
+ (and (eq_attr "type" "fpmul") (eq_attr "cpu" "pentiumpro")) 
+ 4 0)
+
+(define_function_unit "fp" 1 0
+ (and (eq_attr "type" "fpmul") (eq_attr "cpu" "amdk6")) 
+ 5 0)
+
+(define_function_unit "fp" 1 0
  (and (eq_attr "type" "idiv") (eq_attr "cpu" "pentiumpro")) 
  10 10)
 
 (define_function_unit "fp" 1 0
+ (and (eq_attr "type" "idiv") (eq_attr "cpu" "amdk6"))
+ 10 9)
+
+(define_function_unit "fp" 1 0
  (and (eq_attr "type" "imul") (eq_attr "cpu" "pentiumpro")) 
  6 0)
 
 (define_function_unit "fp" 1 0
- (eq_attr "type" "fpdiv") 
+ (and (eq_attr "type" "imul") (eq_attr "cpu" "amdk6")) 
+ 5 0)
+
+(define_function_unit "fp" 1 0
+ (and (eq_attr "type" "fpdiv") (eq_attr "cpu" "!amdk6"))
  10 10)
 
 (define_function_unit "fp" 1 0
+ (and (eq_attr "type" "fpdiv") (eq_attr "cpu" "amdk6"))
+ 10 9)
+
+(define_function_unit "fp" 1 0
  (eq_attr "type" "fld") 
  1 0)
 
@@ -140,7 +164,7 @@
 ;; Processor type -- this attribute must exactly match the processor_type
 ;; enumeration in i386.h.
 
-(define_attr "cpu" "i386,i486,pentium,pentiumpro"
+(define_attr "cpu" "i386,i486,pentium,pentiumpro,amdk6"
   (const (symbol_ref "ix86_cpu")))
 
 (define_insn "tstsi_1"
@@ -989,7 +1013,7 @@
 	  operands[1] = i386_sext16_if_const (operands[1]);
 	  return AS2 (mov%L0,%k1,%k0);
 	}
-      if (TARGET_PENTIUMPRO)
+      if (TARGET_PENTIUMPRO || TARGET_AMDK6)
 	{
 	  /* movzwl is faster than movw on the Pentium Pro,
 	   * although not as fast as an aligned movl. */

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]