This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

patches to gcc for athalon processor, and the mmx/3dnow instructions




Below is a patch against the 2.95 released source which adds support
for the k7 (AMD Athalon) and the mmx registers.    I have separate patch
to the binutils which adds the additional 3dnow instructions for the
Athalon.

The only files modified are the i386.{c,h,md} and a fix for a bug
in the dwarfout.c.   Also nothing should change unless the -mmmx flag is
passed to gcc.

I have an include file amd-mmx.h which provides macro wrappers for
the various 3dnow and mmx operations, which assure that the floating
point regs will be saved should one of these be invoked.  I will mail
this separately.   I am not sure where such a thing fits in the
tree, but it is certainly useful for someone wanting to code using
these instructions.  

William Schelter


+ 2000-05-01  Bill Schelter  <wfs@sonia.ma.utexas.edu>
+ 
+ 	* i386.h MMX registers are introduced as a new set of 8 registers
+ 	starting at FIRST_MMX_REG and going to LAST_MMX_REG.  Various
+ 	macros for dealing with these registers.   The symbolic names
+ 	are %mm0,... %mm7
+ 	
+ 	* i386.h The MASK_MMX is inserted for the -mmmx command line switch,
+ 	which controls whether any of the new code is invoked.
+ 
+ 	* i386.h MASK_NO_CLOBBER_MM0 is introduced for -mno-clobber-mm0 switch.
+ 	Normal operation will use mm0 as an auxiliary to do a move
+ 	from a pair of normal regs to an mmx reg.   If that switch
+ 	is given, memory will be used instead.
+ 
+ 	
+ 	*  i386.md add data to describe the Athalon processor (k7)
+ 	from AMD with its special multiple integer  and fp units.
+ 	
+ 	*  i386.md fix the movdi define_insn, so as to handle
+ 	moves between MMX registers and all other possible
+ 	ways of storing a DI mode quantity, except for Floating
+ 	point regs, since that would be illegal.  A gcc command
+ 	line switch '-mmmx' must be used at the command line to utilize the
+ 	mmx instructions involving these moves, and before the
+ 	mmx regs would be used.   The movdi instruction will use
+ 	either mm0 as a scratch register for moves, or else will
+ 	use memory if -mno-clobber-mm0 is specified on the commandline.
+ 
+ 	* i386.c static rtx mmx_move_memory_loc is introduced as a location
+ 	in the current stack frame, which will be allocated if necessary
+ 	to do moves which would not be possible without going through
+ 	memory.   Its value is cleared by the clear_386_stack_locals()
+ 	which is already called on function entries to set up for
+ 	possible floating point stack  compilation variables...
+ 
+ 
+ 	* dwarf2out.c the DWARF_FRAME_REGNUM sometimes mixes the order
+         up as for the x86 small registers, so we have to actually
+         compute the beg and end of the range.
+ 
  Wed Jul 28 21:39:31 PDT 1999 Jeff Law  (law@cygnus.com)
  
  	* gcc-2.95 Released.
Only in ./gcc/config/i386: #i386.md.~2~#
Only in ./gcc/config/i386: .#i386.md.~2~
Only in /home/wfs/tmp/gcc/gcc/gcc/config/i386: CVS
Only in ./gcc/config/i386: TAGS
diff -rc /home/wfs/tmp/gcc/gcc/gcc/config/i386/i386.c ./gcc/config/i386/i386.c
*** /home/wfs/tmp/gcc/gcc/gcc/config/i386/i386.c	Thu May 27 07:11:59 1999
--- ./gcc/config/i386/i386.c	Mon May  1 22:15:19 2000
***************
*** 165,171 ****
    FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
    FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
    /* arg pointer */
!   INDEX_REGS
  };
  
  /* Test and compare insns in i386.md store the information needed to
--- 165,174 ----
    FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
    FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
    /* arg pointer */
!   INDEX_REGS,
!   MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, 
!   MMX_REGS, MMX_REGS
! 
  };
  
  /* Test and compare insns in i386.md store the information needed to
***************
*** 578,583 ****
--- 581,588 ----
       tree identifier ATTRIBUTE_UNUSED;
       tree args ATTRIBUTE_UNUSED;
  {
+   if (is_attribute_p ("prefermmx", identifier))
+     return (args == NULL_TREE);
    return 0;
  }
  
***************
*** 607,612 ****
--- 612,621 ----
    if (is_attribute_p ("cdecl", identifier))
      return (args == NULL_TREE);
  
+   /* says the variable prefers to be in an mmx register */ 
+   if (is_attribute_p ("prefermmx", identifier))
+     return (args == NULL_TREE);
+ 
    /* Regparm attribute specifies how many integer arguments are to be
       passed in registers. */
    if (is_attribute_p ("regparm", identifier))
***************
*** 1625,1630 ****
--- 1634,1640 ----
    return TRUE;
  }
  
+ 
  static rtx pic_label_rtx;
  static char pic_label_name [256];
  static int pic_label_no = 0;
***************
*** 4443,4448 ****
--- 4453,4478 ----
    p->machine = NULL;
  }
  
+ /* if non zero a stack location which holds DIMode, and is aligned
+    correctly, and which can be used to do memory moves.
+ */   
+ static rtx mmx_move_memory_loc;
+ 
+ /* return a memory location in this stack frame, which can be
+    used as a temporary in doing movs or other operations.
+    Only ONE of these will be assigned per stack frame, so code
+    must not use it in a persistent way.
+ */   
+ 
+ rtx get_mmx_move_memory_loc()
+ {
+   if (mmx_move_memory_loc == NULL_RTX) {
+     mmx_move_memory_loc= assign_stack_temp (DImode, 8, 8);
+   }
+   return mmx_move_memory_loc;
+ }
+ 
+ 
  /* Clear stack slot assignments remembered from previous functions.
     This is called from INIT_EXPANDERS once before RTL is emitted for each
     function.  */
***************
*** 4453,4458 ****
--- 4483,4490 ----
    enum machine_mode mode;
    int n;
  
+   mmx_move_memory_loc = NULL_RTX;
+ 
    for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
         mode = (enum machine_mode) ((int) mode + 1))
      for (n = 0; n < MAX_386_STACK_LOCALS; n++)
***************
*** 5734,5736 ****
--- 5766,5769 ----
  
    return len;
  }
+ 
Only in ./gcc/config/i386: i386.c.orig
Only in ./gcc/config/i386: i386.c.~1~
Only in ./gcc/config/i386: i386.c.~2~
Only in ./gcc/config/i386: i386.c.~7~
Only in ./gcc/config/i386: i386.c.~8~
diff -rc /home/wfs/tmp/gcc/gcc/gcc/config/i386/i386.h ./gcc/config/i386/i386.h
*** /home/wfs/tmp/gcc/gcc/gcc/config/i386/i386.h	Sun Apr 25 06:43:49 1999
--- ./gcc/config/i386/i386.h	Sat Apr 22 16:25:58 2000
***************
*** 79,85 ****
  
  /* Masks for the -m switches */
  #define MASK_80387		000000000001	/* Hardware floating point */
! #define MASK_NOTUSED1		000000000002	/* bit not currently used */
  #define MASK_NOTUSED2		000000000004	/* bit not currently used */
  #define MASK_RTD		000000000010	/* Use ret that pops args */
  #define MASK_ALIGN_DOUBLE	000000000020	/* align doubles to 2 word boundary */
--- 79,85 ----
  
  /* Masks for the -m switches */
  #define MASK_80387		000000000001	/* Hardware floating point */
! #define MASK_MMX		000000000002	/* mmx regs and basic mmxops*/
  #define MASK_NOTUSED2		000000000004	/* bit not currently used */
  #define MASK_RTD		000000000010	/* Use ret that pops args */
  #define MASK_ALIGN_DOUBLE	000000000020	/* align doubles to 2 word boundary */
***************
*** 96,105 ****
--- 96,109 ----
  #define MASK_DEBUG_ARG		000020000000	/* Debug function_arg */   
  #define MASK_SCHEDULE_PROLOGUE  000040000000    /* Emit prologue as rtl */
  #define MASK_STACK_PROBE	000100000000	/* Enable stack probing */
+ #define MASK_NO_CLOBBER_MM0	000200000000	/* Use memory rather than reserving mm0 as a scratch register for mmx <--> ordinary reg moves */
  
  /* Use the floating point instructions */
  #define TARGET_80387 (target_flags & MASK_80387)
  
+ /* Use the MMX registers */
+ #define TARGET_MMX (target_flags & MASK_MMX)
+ 
  /* Compile using ret insn that pops args.
     This will not work unless you use prototypes at least
     for all functions that can take varying numbers of args.  */  
***************
*** 176,184 ****
--- 180,194 ----
  
  #define TARGET_STACK_PROBE (target_flags & MASK_STACK_PROBE)
  
+ #define TARGET_NO_CLOBBER_MM0 (target_flags & MASK_NO_CLOBBER_MM0)
+ 
+ 
+ 
  #define TARGET_SWITCHES							\
  { { "80387",			 MASK_80387, "Use hardware fp" },	\
    { "no-80387",			-MASK_80387, "Do not use hardware fp" },\
+   { "mmx",			 MASK_MMX, "Use MMX registers" },	\
+   { "no-mmx",			-MASK_MMX, "Dont use mmx registers" },\
    { "hard-float",		 MASK_80387, "Use hardware fp" },	\
    { "soft-float",		-MASK_80387, "Do not use hardware fp" },\
    { "no-soft-float",		 MASK_80387, "Use hardware fp" },	\
***************
*** 212,218 ****
    { "no-debug-arg",		-MASK_DEBUG_ARG, 0 /* intentionally undoc */ },			\
    { "stack-arg-probe",		 MASK_STACK_PROBE, "Enable stack probing" },			\
    { "no-stack-arg-probe",	-MASK_STACK_PROBE, "" },			\
!   { "windows",			0, 0 /* intentionally undoc */ },					\
    { "dll",			0, 0 /* intentionally undoc */ },					\
    SUBTARGET_SWITCHES							\
    { "", MASK_SCHEDULE_PROLOGUE | TARGET_DEFAULT, 0 }}
--- 222,230 ----
    { "no-debug-arg",		-MASK_DEBUG_ARG, 0 /* intentionally undoc */ },			\
    { "stack-arg-probe",		 MASK_STACK_PROBE, "Enable stack probing" },			\
    { "no-stack-arg-probe",	-MASK_STACK_PROBE, "" },			\
!   { "no-clobber-mm0",		 MASK_NO_CLOBBER_MM0, "Mov via memory not mm0" },			\
!   { "clobber-mm0",	-MASK_NO_CLOBBER_MM0, "" },			\
!  { "windows",			0, 0 /* intentionally undoc */ },					\
    { "dll",			0, 0 /* intentionally undoc */ },					\
    SUBTARGET_SWITCHES							\
    { "", MASK_SCHEDULE_PROLOGUE | TARGET_DEFAULT, 0 }}
***************
*** 225,231 ****
    PROCESSOR_I486,			/* 80486DX, 80486SX, 80486DX[24] */
    PROCESSOR_PENTIUM,
    PROCESSOR_PENTIUMPRO,
!   PROCESSOR_K6};
  
  #define PROCESSOR_I386_STRING "i386"
  #define PROCESSOR_I486_STRING "i486"
--- 237,245 ----
    PROCESSOR_I486,			/* 80486DX, 80486SX, 80486DX[24] */
    PROCESSOR_PENTIUM,
    PROCESSOR_PENTIUMPRO,
!   PROCESSOR_K6,
!   PROCESSOR_K7
!  };
  
  #define PROCESSOR_I386_STRING "i386"
  #define PROCESSOR_I486_STRING "i486"
***************
*** 234,239 ****
--- 248,254 ----
  #define PROCESSOR_I686_STRING "i686"
  #define PROCESSOR_PENTIUMPRO_STRING "pentiumpro"
  #define PROCESSOR_K6_STRING "k6"
+ #define PROCESSOR_K7_STRING "k7"
  
  extern enum processor_type ix86_cpu;
  
***************
*** 246,251 ****
--- 261,267 ----
    : PROCESSOR_DEFAULT == PROCESSOR_PENTIUM ? PROCESSOR_PENTIUM_STRING  \
    : PROCESSOR_DEFAULT == PROCESSOR_PENTIUMPRO ? PROCESSOR_PENTIUMPRO_STRING  \
    : PROCESSOR_DEFAULT == PROCESSOR_K6 ? PROCESSOR_K6_STRING  \
+   : PROCESSOR_DEFAULT == PROCESSOR_K7 ? PROCESSOR_K7_STRING  \
    : PROCESSOR_I386_STRING)
  
  /* This macro is similar to `TARGET_SWITCHES' but defines names of
***************
*** 303,309 ****
  #define CPP_586_SPEC "%{!ansi:-Di586 -Dpentium} \
  	-D__i586 -D__i586__ -D__pentium -D__pentium__"
  #define CPP_K6_SPEC "%{!ansi:-Di586 -Dk6} \
! 	-D__i586 -D__i586__ -D__k6 -D__k6__" 
  #define CPP_686_SPEC "%{!ansi:-Di686 -Dpentiumpro} \
  	-D__i686 -D__i686__ -D__pentiumpro -D__pentiumpro__"
  
--- 319,327 ----
  #define CPP_586_SPEC "%{!ansi:-Di586 -Dpentium} \
  	-D__i586 -D__i586__ -D__pentium -D__pentium__"
  #define CPP_K6_SPEC "%{!ansi:-Di586 -Dk6} \
! 	-D__i586 -D__i586__ -D__k6 -D__k6__"
! #define CPP_K7_SPEC "%{!ansi:-Di586 -Dk7} \
! 	-D__i586 -D__i586__ -D__k7 -D__k7__" 
  #define CPP_686_SPEC "%{!ansi:-Di686 -Dpentiumpro} \
  	-D__i686 -D__i686__ -D__pentiumpro -D__pentiumpro__"
  
***************
*** 320,325 ****
--- 338,346 ----
  #if TARGET_CPU_DEFAULT == 4
  #define CPP_CPU_DEFAULT_SPEC "%(cpp_k6)"
  #endif
+ #if TARGET_CPU_DEFAULT == 5
+ #define CPP_CPU_DEFAULT_SPEC "%(cpp_k7)"
+ #endif
  #ifndef CPP_CPU_DEFAULT_SPEC
  #define CPP_CPU_DEFAULT_SPEC ""
  #endif
***************
*** 333,338 ****
--- 354,361 ----
  %{mpentium:%(cpp_586)} %{mcpu=pentium:%(cpp_586)} \
  %{mpentiumpro:%(cpp_686)} %{mcpu=pentiumpro:%(cpp_686)} \
  %{mcpu=k6:%(cpp_k6)} \
+ %{mcpu=k7:%(cpp_k7)} \
+ %{mno-clobber-mm0:-D__NO_CLOBBER_MM0__} \
  %{!mcpu*:%{!m486:%{!mpentium*:%(cpp_cpu_default)}}}"
  #endif
  
***************
*** 358,363 ****
--- 381,387 ----
    { "cpp_486", CPP_486_SPEC},						\
    { "cpp_586", CPP_586_SPEC},						\
    { "cpp_k6", CPP_K6_SPEC},						\
+   { "cpp_k7", CPP_K7_SPEC},						\
    { "cpp_686", CPP_686_SPEC},						\
    { "cpp_cpu_default",	CPP_CPU_DEFAULT_SPEC },				\
    { "cpp_cpu",	CPP_CPU_SPEC },						\
***************
*** 594,607 ****
     eliminated during reloading in favor of either the stack or frame
     pointer. */
  
! #define FIRST_PSEUDO_REGISTER 17
  
  /* 1 for registers that have pervasive standard uses
     and are not available for the register allocator.
     On the 80386, the stack pointer is such, as is the arg pointer. */
  #define FIXED_REGISTERS \
  /*ax,dx,cx,bx,si,di,bp,sp,st,st1,st2,st3,st4,st5,st6,st7,arg*/       \
! {  0, 0, 0, 0, 0, 0, 0, 1, 0,  0,  0,  0,  0,  0,  0,  0,  1 }
  
  /* 1 for registers not available across function calls.
     These must include the FIXED_REGISTERS and also any
--- 618,634 ----
     eliminated during reloading in favor of either the stack or frame
     pointer. */
  
! #define FIRST_PSEUDO_REGISTER 25
  
  /* 1 for registers that have pervasive standard uses
     and are not available for the register allocator.
     On the 80386, the stack pointer is such, as is the arg pointer. */
  #define FIXED_REGISTERS \
  /*ax,dx,cx,bx,si,di,bp,sp,st,st1,st2,st3,st4,st5,st6,st7,arg*/       \
! {  0, 0, 0, 0, 0, 0, 0, 1, 0,  0,  0,  0,  0,  0,  0,  0,  1 , \
! /*mm0 mm1 mm2 mm3 mm4 mm5 mm6 mm7 */ \
!     0,  0,  0,  0,  0,  0,  0,  0  \
! }
  
  /* 1 for registers not available across function calls.
     These must include the FIXED_REGISTERS and also any
***************
*** 612,618 ****
  
  #define CALL_USED_REGISTERS \
  /*ax,dx,cx,bx,si,di,bp,sp,st,st1,st2,st3,st4,st5,st6,st7,arg*/ \
! {  1, 1, 1, 0, 0, 0, 0, 1, 1,  1,  1,  1,  1,  1,  1,  1,  1 }
  
  /* Order in which to allocate registers.  Each register must be
     listed once, even those in FIXED_REGISTERS.  List frame pointer
--- 639,648 ----
  
  #define CALL_USED_REGISTERS \
  /*ax,dx,cx,bx,si,di,bp,sp,st,st1,st2,st3,st4,st5,st6,st7,arg*/ \
! {  1, 1, 1, 0, 0, 0, 0, 1, 1,  1,  1,  1,  1,  1,  1,  1,  1, \
! /*mm0 mm1 mm2 mm3 mm4 mm5 mm6 mm7 */ \
!     1,  1,  1,  1,  1,  1,  1,  1  \
! }
  
  /* Order in which to allocate registers.  Each register must be
     listed once, even those in FIXED_REGISTERS.  List frame pointer
***************
*** 635,641 ****
  
  #define REG_ALLOC_ORDER \
  /*ax,dx,cx,bx,si,di,bp,sp,st,st1,st2,st3,st4,st5,st6,st7,arg*/ \
! {  0, 1, 2, 3, 4, 5, 6, 7, 8,  9, 10, 11, 12, 13, 14, 15, 16 }
  
  /* A C statement (sans semicolon) to choose the order in which to
     allocate hard registers for pseudo-registers local to a basic
--- 665,674 ----
  
  #define REG_ALLOC_ORDER \
  /*ax,dx,cx,bx,si,di,bp,sp,st,st1,st2,st3,st4,st5,st6,st7,arg*/ \
! {  0, 1, 2, 3, 4, 5, 6, 7, 8,  9, 10, 11, 12, 13, 14, 15, 16, \
! /*mm0 mm1 mm2 mm3 mm4 mm5 mm6 mm7 */ \
!  17, 18, 19, 20, 21, 22, 23,24  \
! }
  
  /* A C statement (sans semicolon) to choose the order in which to
     allocate hard registers for pseudo-registers local to a basic
***************
*** 682,688 ****
     */
  
  #define HARD_REGNO_NREGS(REGNO, MODE)   \
!   (FP_REGNO_P (REGNO) ? 1 \
     : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD))
  
  /* Value is 1 if hard register REGNO can hold a value of machine-mode MODE.
--- 715,722 ----
     */
  
  #define HARD_REGNO_NREGS(REGNO, MODE)   \
!   (FP_REGNO_P (REGNO) ? 1  \
!    : (MMX_REGNO_P(REGNO)) ?  ((GET_MODE_SIZE (MODE) + 2*UNITS_PER_WORD - 1) / (2*UNITS_PER_WORD))  \
     : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD))
  
  /* Value is 1 if hard register REGNO can hold a value of machine-mode MODE.
***************
*** 699,704 ****
--- 733,740 ----
     ? (((int) GET_MODE_CLASS (MODE) == (int) MODE_FLOAT		\
         || (int) GET_MODE_CLASS (MODE) == (int) MODE_COMPLEX_FLOAT)	\
        && GET_MODE_UNIT_SIZE (MODE) <= (LONG_DOUBLE_TYPE_SIZE == 96 ? 12 : 8))\
+    : MMX_REGNO_P (REGNO) \
+    ? ((GET_MODE_UNIT_SIZE (MODE) == 8)) \
     : (int) (MODE) != (int) QImode ? 1				\
     : (reload_in_progress | reload_completed) == 1)
  
***************
*** 732,737 ****
--- 768,776 ----
  #define FIRST_STACK_REG FIRST_FLOAT_REG
  #define LAST_STACK_REG (FIRST_FLOAT_REG + 7)
  
+ #define FIRST_MMX_REG 17
+ #define LAST_MMX_REG (FIRST_MMX_REG + 7)
+ 
  /* Value should be nonzero if functions must have frame pointers.
     Zero means the frame pointer need not be set up (and parms
     may be accessed via the stack pointer) in functions that seem suitable.
***************
*** 813,824 ****
--- 852,867 ----
    GENERAL_REGS,			/* %eax %ebx %ecx %edx %esi %edi %ebp %esp */
    FP_TOP_REG, FP_SECOND_REG,	/* %st(0) %st(1) */
    FLOAT_REGS,
+   MMX_REGS,                     /* %mm0 %mm1 %mm2 %mm3 %mm4 %mm5 %mm6 %mm7 */
    ALL_REGS, LIM_REG_CLASSES
  };
  
  #define N_REG_CLASSES (int) LIM_REG_CLASSES
  
  #define FLOAT_CLASS_P(CLASS) (reg_class_subset_p (CLASS, FLOAT_REGS))
+ #define MMX_CLASS_P(CLASS) (reg_class_subset_p (CLASS, MMX_REGS))
+ 
+ 
  
  /* Give names of register classes as strings for dump file.   */
  
***************
*** 832,837 ****
--- 875,881 ----
     "GENERAL_REGS",			\
     "FP_TOP_REG", "FP_SECOND_REG",	\
     "FLOAT_REGS",			\
+    "MMX_REGS",			        \
     "ALL_REGS" }
  
  /* Define which registers fit in which classes.
***************
*** 848,854 ****
   {0x100ff},			/* GENERAL_REGS */		\
    {0x0100}, {0x0200},		/* FP_TOP_REG, FP_SECOND_REG */	\
    {0xff00},			/* FLOAT_REGS */		\
!  {0x1ffff}}
  
  /* The same information, inverted:
     Return the class number of the smallest class containing
--- 892,899 ----
   {0x100ff},			/* GENERAL_REGS */		\
    {0x0100}, {0x0200},		/* FP_TOP_REG, FP_SECOND_REG */	\
    {0xff00},			/* FLOAT_REGS */		\
!  {(0xff << 17)},		/* MMX_REGS (8 regs starting at 17) */		\
!  {0x1ffffff}}
  
  /* The same information, inverted:
     Return the class number of the smallest class containing
***************
*** 870,875 ****
--- 915,924 ----
  
  #define FP_REG_P(X) (REG_P (X) && FP_REGNO_P (REGNO (X)))
  #define FP_REGNO_P(n) ((n) >= FIRST_STACK_REG && (n) <= LAST_STACK_REG)
+ 
+ #define MMX_REG_P(X) (REG_P (X) && MMX_REGNO_P (REGNO (X)))
+ #define MMX_REGNO_P(n) ((n) >= FIRST_MMX_REG && (n) <= LAST_MMX_REG)
+ 
    
  #define STACK_REG_P(xop) (REG_P (xop) &&		       	\
  			  REGNO (xop) >= FIRST_STACK_REG &&	\
***************
*** 897,902 ****
--- 946,952 ----
     (C) == 'f' ? (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387	\
  		 ? FLOAT_REGS					\
  		 : NO_REGS) :					\
+    (C) == 'U' ? MMX_REGS :					\
     (C) == 't' ? (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387	\
  		 ? FP_TOP_REG					\
  		 : NO_REGS) :					\
***************
*** 933,938 ****
--- 983,991 ----
     (C) == 'O' ? (VALUE) >= 0 && (VALUE) <= 32 :	\
     0)
  
+ /*#define EXTRA_CONSTRAINT(OP, C) \
+   (((C) == 'U') ? (GET_CODE (op) == CONST_INT ......*/
+ 
  /* Similar, but for floating constants, and defining letters G and H.
     Here VALUE is the CONST_DOUBLE rtx itself.  We allow constants even if
     TARGET_387 isn't set, because the stack register converter may need to
***************
*** 985,990 ****
--- 1038,1044 ----
     except in the FP regs, where a single reg is always enough.  */
  #define CLASS_MAX_NREGS(CLASS, MODE)	\
   (FLOAT_CLASS_P (CLASS) ? 1 :		\
+   MMX_CLASS_P (CLASS) ? 1 :		\
    ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD))
  
  /* A C expression whose value is nonzero if pseudos that have been
***************
*** 2184,2190 ****
  #define REGISTER_MOVE_COST(CLASS1, CLASS2)				\
    (((FLOAT_CLASS_P (CLASS1) && ! FLOAT_CLASS_P (CLASS2))		\
      || (! FLOAT_CLASS_P (CLASS1) && FLOAT_CLASS_P (CLASS2))) ? 10	\
!    : 2)
  
  
  /* A C expression for the cost of moving data of mode M between a
--- 2238,2245 ----
  #define REGISTER_MOVE_COST(CLASS1, CLASS2)				\
    (((FLOAT_CLASS_P (CLASS1) && ! FLOAT_CLASS_P (CLASS2))		\
      || (! FLOAT_CLASS_P (CLASS1) && FLOAT_CLASS_P (CLASS2))) ? 10	\
!    : ((MMX_CLASS_P (CLASS1) && ! MMX_CLASS_P (CLASS2))		\
!     || (! MMX_CLASS_P (CLASS1) && MMX_CLASS_P (CLASS2))) ? 8 : 2)  
  
  
  /* A C expression for the cost of moving data of mode M between a
***************
*** 2195,2202 ****
     between two registers, you should define this macro to express the
     relative cost.  */
  
! /* #define MEMORY_MOVE_COST(M,C,I) 2  */
! 
  /* A C expression for the cost of a branch instruction.  A value of 1
     is the default; other values are interpreted relative to that.  */
  
--- 2250,2259 ----
     between two registers, you should define this macro to express the
     relative cost.  */
  
! #define MEMORY_MOVE_COST(MODE, CLASS, IN) \
!   (((CLASS == GENERAL_REGS) && (MODE == DImode)) ? 4 \
!    : 2)
!    
  /* A C expression for the cost of a branch instruction.  A value of 1
     is the default; other values are interpreted relative to that.  */
  
***************
*** 2381,2387 ****
  
  #define HI_REGISTER_NAMES \
  {"ax","dx","cx","bx","si","di","bp","sp",          \
!  "st","st(1)","st(2)","st(3)","st(4)","st(5)","st(6)","st(7)","" }
  
  #define REGISTER_NAMES HI_REGISTER_NAMES
  
--- 2438,2446 ----
  
  #define HI_REGISTER_NAMES \
  {"ax","dx","cx","bx","si","di","bp","sp",          \
!  "st","st(1)","st(2)","st(3)","st(4)","st(5)","st(6)","st(7)","", \
!   "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"  \
! }
  
  #define REGISTER_NAMES HI_REGISTER_NAMES
  
***************
*** 2413,2419 ****
  
  /* How to renumber registers for dbx and gdb.  */
  
! /* {0,2,1,3,6,7,4,5,12,13,14,15,16,17}  */
  #define DBX_REGISTER_NUMBER(n) \
  ((n) == 0 ? 0 : \
   (n) == 1 ? 2 : \
--- 2472,2480 ----
  
  /* How to renumber registers for dbx and gdb.  */
  
! /* {0,2,1,3,6,7,4,5,12,13,14,15,16,17}
!    ???????? for mmx 
!  */
  #define DBX_REGISTER_NUMBER(n) \
  ((n) == 0 ? 0 : \
   (n) == 1 ? 2 : \
***************
*** 2521,2533 ****
     It need not be very fast code.  */
  
  #define ASM_OUTPUT_REG_PUSH(FILE,REGNO)  \
!   fprintf (FILE, "\tpushl %%e%s\n", reg_names[REGNO])
  
  /* This is how to output an insn to pop a register from the stack.
     It need not be very fast code.  */
  
! #define ASM_OUTPUT_REG_POP(FILE,REGNO)  \
!   fprintf (FILE, "\tpopl %%e%s\n", reg_names[REGNO])
  
  /* This is how to output an element of a case-vector that is absolute.
       */
--- 2582,2604 ----
     It need not be very fast code.  */
  
  #define ASM_OUTPUT_REG_PUSH(FILE,REGNO)  \
!   do { if (MMX_REGNO_P(REGNO)) { \
!     fprintf (FILE, "\tadd $-8,%esp\n\tmovq %%%s,0x(%esp)\n", \
!        reg_names[REGNO]);\
!   } else { \
!   fprintf (FILE, "\tpushl %%e%s\n", reg_names[REGNO]); \
!   }} while (0)
  
  /* This is how to output an insn to pop a register from the stack.
     It need not be very fast code.  */
  
! #define ASM_OUTPUT_REG_POP(FILE,REGNO) do { \
!   if (MMX_REGNO_P(REGNO)) { \
!     fprintf (FILE, "\tmovq 0x(%esp),%%%s\n\tadd $8,%esp\n", \
!  reg_names[REGNO]); \
!   } else { \
!   fprintf (FILE, "\tpopl %%e%s\n", reg_names[REGNO]); \
!   }} while(0)
  
  /* This is how to output an element of a case-vector that is absolute.
       */
***************
*** 2611,2617 ****
  	 case 4:					\
  	 case 8:					\
  	 case 12:					\
! 	   if (! FP_REG_P (X)) fputs ("e", FILE);	\
  	 case 2:					\
  	   fputs (hi_reg_name[REGNO (X)], FILE);	\
  	   break;					\
--- 2682,2688 ----
  	 case 4:					\
  	 case 8:					\
  	 case 12:					\
! 	   if (! (FP_REG_P (X) || MMX_REG_P(X))) fputs ("e", FILE);	\
  	 case 2:					\
  	   fputs (hi_reg_name[REGNO (X)], FILE);	\
  	   break;					\
***************
*** 2643,2649 ****
  	 { fputs ("argp", FILE); break; }		\
         if (STACK_TOP_P (X))				\
  	 { fputs ("st(0)", FILE); break; }		\
!        if (FP_REG_P (X))				\
  	 { fputs (hi_name[REGNO(X)], FILE); break; }	\
         switch (GET_MODE_SIZE (GET_MODE (X)))		\
  	 {						\
--- 2714,2720 ----
  	 { fputs ("argp", FILE); break; }		\
         if (STACK_TOP_P (X))				\
  	 { fputs ("st(0)", FILE); break; }		\
!        if (FP_REG_P (X) || MMX_REG_P(X))		\
  	 { fputs (hi_name[REGNO(X)], FILE); break; }	\
         switch (GET_MODE_SIZE (GET_MODE (X)))		\
  	 {						\
***************
*** 2766,2771 ****
--- 2837,2844 ----
  extern int small_shift_operand ();
  extern char *output_ashl ();
  extern int memory_address_info ();
+ extern struct rtx_def *get_mmx_move_memory_loc();
+ 
  
  #ifdef NOTYET
  extern struct rtx_def *copy_all_rtx ();
***************
*** 2801,2806 ****
--- 2874,2881 ----
  
  /* External functions used */
  extern struct rtx_def *force_operand ();
+ 
+ 
  
  
  /*
Only in ./gcc/config/i386: i386.h.orig
Only in ./gcc/config/i386: i386.h.~1~
Only in ./gcc/config/i386: i386.h.~2~
Only in ./gcc/config/i386: i386.h.~6~
Only in ./gcc/config/i386: i386.h.~7~
diff -rc /home/wfs/tmp/gcc/gcc/gcc/config/i386/i386.md ./gcc/config/i386/i386.md
*** /home/wfs/tmp/gcc/gcc/gcc/config/i386/i386.md	Sun Apr 25 06:43:46 1999
--- ./gcc/config/i386/i386.md	Mon May  1 22:14:06 2000
***************
*** 162,167 ****
--- 162,203 ----
    (and (eq_attr "type" "fpop,fpmul,fcompare") (eq_attr "cpu" "k6"))
   2 2)
  
+ ;; K7 has 3 FPU units, capable of different things.   an add unit,
+ ;; a multiply unit and a store unit.   The following does not
+ ;; seem to specify that the add's are in the fpaddk7 unit, nor
+ ;; does it specify they have a latency of 4 rather than 2...
+ (define_function_unit "fpaddk7" 1 0
+   (and (eq_attr "type" "fcompare,fld,fpop" ) (eq_attr "cpu" "k7"))
+ 2 0)
+ 
+ 
+ ;; how to express this..., we need to add the attr later..
+ ;(define_function_unit "fpaddk7" 1 0
+ ;  (and (eq_attr "type" "fsub,fsubb,fadd,faddp,fsubrp,fsubr") (eq_attr "cpu" "k7"))
+ ;4 0)
+ 
+ 
+ (define_function_unit "fpmulk7" 1 0
+   (and (eq_attr "type" "fpmul") (eq_attr "cpu" "k7"))
+ 4 0
+ )
+ 
+ (define_function_unit "fpmulk7" 1 0
+   (and (eq_attr "type" "fpdiv") (eq_attr "cpu" "k7"))
+ 16 0
+ )
+ (define_function_unit "fpmulk7" 1 0
+   (and (eq_attr "type" "fpop,fld") (eq_attr "cpu" "k7"))
+ 2 0
+ )
+ 
+ (define_function_unit "fpstorek7" 1 0
+   (and (eq_attr "type" "fld,fpop") (eq_attr "cpu" "k7"))
+ 2 0)
+ 
+ 
+ 
+ 
  ;; i386 and i486 have one integer unit, which need not be modeled
  
  (define_function_unit "integer" 2 0
***************
*** 199,204 ****
--- 235,266 ----
    (and (eq_attr "cpu" "k6") (eq_attr "type" "idiv"))
    17 17)
  
+ ;; there are three integer units on the k7, but only one of
+ ;; them can perform multiplies.
+ ;; actually the multiply may tie up two units.   It is unclear
+ ;; from the k7 documentations.   If so the following needs to
+ ;; be adjusted somehow
+ 
+ (define_function_unit "integer" 2 0 
+   (and (eq_attr "cpu" "k7")
+     (eq_attr "type" "integer,binary,test,compare,!imul"))
+ 2 0)
+ 
+ (define_function_unit "integerk7" 2 0 
+   (and (eq_attr "cpu" "k7")
+     (eq_attr "type" "integer,binary,test,compare")
+     )
+ 2 0)
+ 
+ (define_function_unit "integerk7" 2 0 
+   (and (eq_attr "cpu" "k7")
+     (eq_attr "type" "imul")
+     )
+ 4 0)
+ 
+ 
+ 
+ 
  ;; Pentium Pro and K6 have a separate load unit.
  (define_function_unit "load" 1 0
    (and (eq_attr "cpu" "pentiumpro") (eq_attr "memory" "load"))
***************
*** 208,213 ****
--- 270,281 ----
    (and (eq_attr "cpu" "k6") (eq_attr "memory" "load"))
    2 0)
  
+ (define_function_unit "loadStore" 2 0
+   (and (eq_attr "cpu" "k7") 
+    (eq_attr "memory" "load,store")
+    )
+   1 0)
+ 
  ;; Pentium Pro and K6 have a separate store unit.
  (define_function_unit "store" 1 0
    (and (eq_attr "cpu" "pentiumpro,k6") (eq_attr "memory" "store"))
***************
*** 218,223 ****
--- 286,293 ----
    (and (eq_attr "cpu" "k6") (eq_attr "type" "lea"))
    1 0)
  
+ 
+ 
  
  ;; "movl MEM,REG / testl REG,REG" is faster on a 486 than "cmpl $0,MEM".
  ;; But restricting MEM here would mean that gcc could not remove a redundant
***************
*** 234,240 ****
  ;; Processor type -- this attribute must exactly match the processor_type
  ;; enumeration in i386.h.
  
! (define_attr "cpu" "i386,i486,pentium,pentiumpro,k6"
    (const (symbol_ref "ix86_cpu")))
  
  (define_insn "tstsi_1"
--- 304,310 ----
  ;; Processor type -- this attribute must exactly match the processor_type
  ;; enumeration in i386.h.
  
! (define_attr "cpu" "i386,i486,pentium,pentiumpro,k6,k7"
    (const (symbol_ref "ix86_cpu")))
  
  (define_insn "tstsi_1"
***************
*** 1283,1288 ****
--- 1353,1359 ----
    return AS2 (mov%B0,%1,%0);
  }")
  
+ 
  (define_insn "movsf_push"
    [(set (match_operand:SF 0 "push_operand" "=<,<")
  	(match_operand:SF 1 "general_operand" "*rfF,m"))]
***************
*** 1694,1699 ****
--- 1765,1877 ----
  }")
  
  (define_insn ""
+   [(set (match_operand:DI 0 "general_operand" "=U,m,rU,r")
+ 	(match_operand:DI 1 "general_operand" "Umi,Uri,r,Umi"))
+ ]
+   "TARGET_MMX && !FP_REG_P(operands[0]) && !FP_REG_P(operands[1])"
+   "*
+   if ((MMX_REG_P(operands[0])
+       &&  (GET_CODE (operands[1]) == MEM || MMX_REG_P(operands[1])))
+      || (MMX_REG_P(operands[1]) && GET_CODE (operands[0]) == MEM))
+      return AS2 (movq,%1,%0);
+   if ((MMX_REG_P(operands[0]) &&
+ 	GET_CODE (operands[1]) == CONST_INT))
+      {  rtx first,second;
+         rtx mmx_move_memory_loc= get_mmx_move_memory_loc();
+         rtx xops [4];
+        split_double (operands[1], &first, &second);
+         xops[0]=mmx_move_memory_loc;
+ 	xops[1]=first;
+ 	output_asm_insn(singlemove_string(xops),xops);
+ 	xops[0]=adj_offsettable_operand(mmx_move_memory_loc,4);
+ 	xops[1]=second;
+ 	output_asm_insn(singlemove_string(xops),xops);
+ 	xops[1] = mmx_move_memory_loc;
+  	xops[0]= operands[0];
+ 	output_asm_insn(AS2 (movd,%1,%0),xops);
+ 	return \"\";
+      }
+ 
+      if (!MMX_REG_P(operands[0]) && !MMX_REG_P(operands[1])) {
+         return output_move_double (operands);
+       }
+ 
+    if(TARGET_NO_CLOBBER_MM0
+      /* can happen that mm0 is the target of a set, even if other
+        ops clobber it */
+       || (MMX_REG_P(operands[0]) && (FIRST_MMX_REG == REGNO(operands[0])))) {
+       if ((MMX_REG_P(operands[0]) && (REGNO(operands[1])) >= FIRST_STACK_REG)
+ 	  || (MMX_REG_P(operands[1]) && (REGNO(operands[0])) >= FIRST_STACK_REG)) { abort();
+       }
+       { rtx scratch = get_mmx_move_memory_loc();
+         rtx scratch1 = adj_offsettable_operand(scratch,4);
+         rtx xops[2];
+       if (MMX_REG_P(operands[0])) {
+ 	xops[0] = scratch;
+ 	xops[1] = operands[1];
+ 	output_asm_insn(singlemove_string(xops),xops);
+         xops[0] = scratch1;
+ 	xops[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
+ 	output_asm_insn(singlemove_string(xops),xops);
+ 	xops[0] = operands[0];
+ 	xops[1] = scratch;
+ 	output_asm_insn(AS2 (movq,%1,%0),xops);
+ 	RET;
+       }
+       else {
+ 	xops[0] = scratch;
+ 	xops[1] = operands[1];
+ 	output_asm_insn(AS2 (movq,%1,%0),xops);
+ 	xops[1] = scratch;
+ 	xops[0] = operands[0];
+ 	output_asm_insn(singlemove_string(xops),xops);
+         xops[1] = scratch1;
+ 	xops[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+ 	output_asm_insn(singlemove_string(xops),xops);
+ 	RET;
+       }
+       }
+    }
+ 	
+    { rtx scratch_reg = gen_rtx_REG(DImode,FIRST_MMX_REG);
+     /* scratch_reg = operands[2]; */
+        
+   if (MMX_REG_P(operands[0])) {
+     rtx xops[2];
+     if ((REGNO(operands[1])) >= FIRST_STACK_REG)
+       abort();
+     xops[1]= gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
+     xops[0]= scratch_reg;
+     output_asm_insn (AS2 (movd,%1,%0),xops);
+     output_asm_insn (AS2 (movd,%1,%0),operands);
+     xops[1]= scratch_reg;
+     xops[0]= GEN_INT(32);
+     output_asm_insn (AS2 (psllq,%0,%1),xops);
+     xops[1]= scratch_reg;
+     xops[0]= operands[0];
+     output_asm_insn (AS2 (por,%1,%0), xops);
+     RET;
+   } else {  /* operands[0] is general reg */
+   rtx xops[2];
+   if ((REGNO(operands[0])) >= FIRST_STACK_REG)
+       abort();
+   xops[0]=operands[1];
+   xops[1]=scratch_reg;
+   output_asm_insn (AS2 (movq,%0,%1),xops);
+   output_asm_insn (AS2 (movd,%1,%0),operands);
+   xops[1]= scratch_reg;
+   xops[0]= GEN_INT(32);
+   output_asm_insn (AS2 (psrlq,%0,%1),xops);
+   xops[0]= gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+   output_asm_insn (AS2 (movd,%1,%0), xops);
+   RET; 
+   }
+  }
+ "
+   [(set_attr "type" "memory,memory,integer,integer")
+    (set_attr "memory" "*,load,*,*")])
+ 
+ (define_insn ""
    [(set (match_operand:DI 0 "general_operand" "=g,r")
  	(match_operand:DI 1 "general_operand" "riF,m"))]
    "(!TARGET_MOVE || GET_CODE (operands[0]) != MEM)
***************
*** 1705,1711 ****
  (define_split
    [(set (match_operand:DI 0 "nonimmediate_operand" "")
  	(match_operand:DI 1 "general_operand" ""))]
!   "reload_completed
     && (offsettable_memref_p (operands[0])
         || nonmemory_operand (operands[0], DImode))
     && (offsettable_memref_p (operands[1])
--- 1883,1890 ----
  (define_split
    [(set (match_operand:DI 0 "nonimmediate_operand" "")
  	(match_operand:DI 1 "general_operand" ""))]
!   "!(MMX_REG_P(operands[0]) || (MMX_REG_P(operands[1])))
!    && reload_completed
     && (offsettable_memref_p (operands[0])
         || nonmemory_operand (operands[0], DImode))
     && (offsettable_memref_p (operands[1])
Only in ./gcc/config/i386: i386.md.orig
Only in ./gcc/config/i386: i386.md.~1~
Only in ./gcc/config/i386: i386.md.~22~
Only in ./gcc/config/i386: i386.md.~23~
Only in ./gcc/config/i386: i386.md.~2~
diff -rc /home/wfs/tmp/gcc/gcc/gcc/config/i386/linux.h ./gcc/config/i386/linux.h
*** /home/wfs/tmp/gcc/gcc/gcc/config/i386/linux.h	Wed Apr  7 19:32:13 1999
--- ./gcc/config/i386/linux.h	Fri Sep 10 19:33:40 1999
***************
*** 117,122 ****
--- 117,123 ----
   : (n) == 6 ? 5 \
   : (n) == 7 ? 4 \
   : ((n) >= FIRST_STACK_REG && (n) <= LAST_STACK_REG) ? (n)+3 \
+  : ((n) >= FIRST_MMX_REG && (n) <= LAST_MMX_REG) ? (n)+2 \
   : (-1))
  
  /* Output assembler code to FILE to increment profiler label # LABELNO
Only in ./gcc/config/i386: linux.h.orig
Only in ./gcc/config/i386: linux.h.~1~
diff -rc /home/wfs/tmp/gcc/gcc/gcc/dwarf2out.c ./gcc/dwarf2out.c
*** /home/wfs/tmp/gcc/gcc/gcc/dwarf2out.c	Tue Jul  6 19:15:29 1999
--- ./gcc/dwarf2out.c	Fri Sep 10 20:07:12 1999
***************
*** 645,653 ****
        --n_ranges;
        t = build_int_2 (ranges[n_ranges].size, 0);
        do
! 	{
! 	  int beg = DWARF_FRAME_REGNUM (ranges[n_ranges].beg);
! 	  int end = DWARF_FRAME_REGNUM (ranges[n_ranges].end);
  	  if (beg < 0)
  	    continue;
  	  if (end >= last_end)
--- 645,662 ----
        --n_ranges;
        t = build_int_2 (ranges[n_ranges].size, 0);
        do
! 	{ int beg,end, i = ranges[n_ranges].beg;
! 	  beg = end =  DWARF_FRAME_REGNUM (i);
! 	  /* the DWARF_FRAME_REGNUM sometimes mixes the order
! 	     up as for the x86 small registers, so we have to
! 	     get the beg and end of the range.
! 	  */
! 	  while(i <= ranges[n_ranges].end)
! 	      { int di = DWARF_FRAME_REGNUM(i);
! 	      beg = (di < beg ? di : beg);
! 	      end = (di > end ? di : end);
! 	      i++;
! 	      }
  	  if (beg < 0)
  	    continue;
  	  if (end >= last_end)


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]