This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

recognize x86 CPU variants and default SSE/MMX/3dNOW support


Hi,
this patch adds the support for the CPU variants as discussed earlier.  I hope
the lists of chips is complette, the names are somewhat messy, but the official
ones are too.

I am not quite sure this is right sollution becase
1) I am not adding them to the x86_cpu enum as we do with the cores.  This is
   because practically only differences are the SSE/MMX/3DNOW sets we want to
   control separately anyway.
   Adding to x86_cpu would require updating _much_ more palces than we do now,
   for instance the scheduling definitions and friends
2) It is not possible to configure for these - but again this would make no
   difference, as configuring gcc for given target affects just -mcpu setting
3) We are running out of available flags...

But I don't see any considerably better sollution.

Honza

Tue Dec 11 11:23:36 CET 2001  Jan Hubicka  <jh@suse.cz>
	* i386.c (override_options): Recognize various CPU variants and set
	SSE/MMX/3dNOW flags accordingly.
	* i386.h (MASK_NOMMX, MASK_NOSSE, MASK_NOSSE2, MASK_NO3DNOW,
	MASK_NO3DNOW_A): New.
	(MASK_*): Renumber.
	(TARGET_FLAGS): Use new masks.
	(CPP_CPU_SPECS): Recognize new CPU variants.
	* invoke.texi (-mcpu): Update documentation.

Index: config/i386/i386.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386.c,v
retrieving revision 1.334.2.3
diff -c -3 -p -r1.334.2.3 i386.c
*** i386.c	2001/11/30 20:59:29	1.334.2.3
--- i386.c	2001/12/11 10:04:59
*************** override_options ()
*** 817,834 ****
      {
        const char *const name;		/* processor name or nickname.  */
        const enum processor_type processor;
      }
    const processor_alias_table[] =
      {
!       {"i386", PROCESSOR_I386},
!       {"i486", PROCESSOR_I486},
!       {"i586", PROCESSOR_PENTIUM},
!       {"pentium", PROCESSOR_PENTIUM},
!       {"i686", PROCESSOR_PENTIUMPRO},
!       {"pentiumpro", PROCESSOR_PENTIUMPRO},
!       {"k6", PROCESSOR_K6},
!       {"athlon", PROCESSOR_ATHLON},
!       {"pentium4", PROCESSOR_PENTIUM4},
      };
  
    int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta);
--- 817,860 ----
      {
        const char *const name;		/* processor name or nickname.  */
        const enum processor_type processor;
+       const enum pta_flags
+ 	{
+ 	  PTA_SSE = 1,
+ 	  PTA_SSE2 = 2,
+ 	  PTA_MMX = 4,
+ 	  PTA_SSEPREFETCH = 8,
+ 	  PTA_3DNOW = 16,
+ 	  PTA_3DNOW_A = 64,
+ 	} flags;
      }
    const processor_alias_table[] =
      {
!       {"i386", PROCESSOR_I386, 0},
!       {"i486", PROCESSOR_I486, 0},
!       {"i586", PROCESSOR_PENTIUM, 0},
!       {"pentium", PROCESSOR_PENTIUM, 0},
!       {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
!       {"i686", PROCESSOR_PENTIUMPRO, 0},
!       {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
!       {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
!       {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_SSEPREFETCH},
!       {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
! 				       PTA_MMX | PTA_SSEPREFETCH},
!       {"k6", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
!       {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
!       {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
!       {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_SSEPREFETCH | PTA_3DNOW
! 				   | PTA_3DNOW_A},
!       {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_SSEPREFETCH
! 		       | PTA_3DNOW | PTA_3DNOW_A},
!       {"athlon4", PROCESSOR_ATHLON, PTA_MMX | PTA_SSEPREFETCH | PTA_3DNOW
! 				    | PTA_3DNOW_A},
!       {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_SSEPREFETCH | PTA_3DNOW
! 				      | PTA_3DNOW_A},
!       {"athlon-mobille", PROCESSOR_ATHLON, PTA_MMX | PTA_SSEPREFETCH
! 					   | PTA_3DNOW | PTA_3DNOW_A},
!       {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_SSEPREFETCH | PTA_3DNOW
! 				      | PTA_3DNOW_A | PTA_SSE},
      };
  
    int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta);
*************** override_options ()
*** 880,885 ****
--- 906,921 ----
  	    ix86_arch = processor_alias_table[i].processor;
  	    /* Default cpu tuning to the architecture.  */
  	    ix86_cpu = ix86_arch;
+ 	    if (processor_alias_table[i].flags & PTA_MMX)
+ 	      target_flags |= MASK_MMX;
+ 	    if (processor_alias_table[i].flags & PTA_3DNOW)
+ 	      target_flags |= MASK_3DNOW;
+ 	    if (processor_alias_table[i].flags & PTA_3DNOW_A)
+ 	      target_flags |= MASK_3DNOW_A;
+ 	    if (processor_alias_table[i].flags & PTA_SSE)
+ 	      target_flags |= MASK_SSE;
+ 	    if (processor_alias_table[i].flags & PTA_SSE2)
+ 	      target_flags |= MASK_SSE2;
  	    break;
  	  }
  
*************** override_options ()
*** 1019,1024 ****
--- 1055,1071 ----
       wrt NaNs.  This lets us use a shorter comparison sequence.  */
    if (flag_unsafe_math_optimizations)
      target_flags &= ~MASK_IEEE_FP;
+ 
+   if (target_flags & MASK_NOMMX)
+     target_flags &= ~(MASK_MMX);
+   if (target_flags & MASK_NOSSE)
+     target_flags &= ~(MASK_SSE | MASK_SSE2);
+   if (target_flags & MASK_NOSSE2)
+     target_flags &= ~(MASK_SSE2);
+   if (target_flags & MASK_NO3DNOW)
+     target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
+   if (target_flags & MASK_NO3DNOW_A)
+     target_flags &= ~(MASK_3DNOW_A);
  
    if (TARGET_64BIT)
      {
Index: config/i386/i386.h
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386.h,v
retrieving revision 1.217.2.1
diff -c -3 -p -r1.217.2.1 i386.h
*** i386.h	2001/11/22 16:05:06	1.217.2.1
--- i386.h	2001/12/11 10:05:00
*************** extern int target_flags;
*** 112,136 ****
  #define MASK_NO_FANCY_MATH_387	0x00000040	/* Disable sin, cos, sqrt */
  #define MASK_OMIT_LEAF_FRAME_POINTER 0x080      /* omit leaf frame pointers */
  #define MASK_STACK_PROBE	0x00000100	/* Enable stack probing */
! #define MASK_NO_ALIGN_STROPS	0x00001000	/* Enable aligning of string ops.  */
! #define MASK_INLINE_ALL_STROPS	0x00002000	/* Inline stringops in all cases */
! #define MASK_NO_PUSH_ARGS	0x00004000	/* Use push instructions */
! #define MASK_ACCUMULATE_OUTGOING_ARGS 0x00008000/* Accumulate outgoing args */
! #define MASK_NO_ACCUMULATE_OUTGOING_ARGS 0x00010000
! #define MASK_MMX		0x00020000	/* Support MMX regs/builtins */
! #define MASK_SSE		0x00040000	/* Support SSE regs/builtins */
! #define MASK_SSE2		0x00080000	/* Support SSE2 regs/builtins */
  #define MASK_3DNOW		0x00100000	/* Support 3Dnow builtins */
! #define MASK_3DNOW_A		0x00200000	/* Support Athlon 3Dnow builtins */
! #define MASK_128BIT_LONG_DOUBLE 0x00400000	/* long double size is 128bit */
! #define MASK_MIX_SSE_I387	0x00800000	/* Mix SSE and i387 instructions */
! #define MASK_64BIT		0x01000000	/* Produce 64bit code */
! #define MASK_NO_RED_ZONE	0x02000000	/* Do not use red zone */
  
  /* Temporary codegen switches */
! #define MASK_INTEL_SYNTAX	0x00000200
! #define MASK_DEBUG_ARG		0x00000400	/* function_arg */   
! #define MASK_DEBUG_ADDR		0x00000800	/* GO_IF_LEGITIMATE_ADDRESS */
  
  /* Use the floating point instructions */
  #define TARGET_80387 (target_flags & MASK_80387)
--- 112,141 ----
  #define MASK_NO_FANCY_MATH_387	0x00000040	/* Disable sin, cos, sqrt */
  #define MASK_OMIT_LEAF_FRAME_POINTER 0x080      /* omit leaf frame pointers */
  #define MASK_STACK_PROBE	0x00000100	/* Enable stack probing */
! #define MASK_NO_ALIGN_STROPS	0x00000200	/* Enable aligning of string ops.  */
! #define MASK_INLINE_ALL_STROPS	0x00000400	/* Inline stringops in all cases */
! #define MASK_NO_PUSH_ARGS	0x00000800	/* Use push instructions */
! #define MASK_ACCUMULATE_OUTGOING_ARGS 0x00001000/* Accumulate outgoing args */
! #define MASK_NO_ACCUMULATE_OUTGOING_ARGS 0x00002000
! #define MASK_MMX		0x00004000	/* Support MMX regs/builtins */
! #define MASK_NOMMX		0x00008000	/* Support MMX regs/builtins */
! #define MASK_SSE		0x00010000	/* Support SSE regs/builtins */
! #define MASK_NOSSE		0x00020000	/* Support SSE regs/builtins */
! #define MASK_SSE2		0x00040000	/* Support SSE2 regs/builtins */
! #define MASK_NOSSE2		0x00080000	/* Support SSE2 regs/builtins */
  #define MASK_3DNOW		0x00100000	/* Support 3Dnow builtins */
! #define MASK_NO3DNOW		0x00200000	/* Support 3Dnow builtins */
! #define MASK_3DNOW_A		0x00400000	/* Support Athlon 3Dnow builtins */
! #define MASK_NO3DNOW_A		0x00800000	/* Support Athlon 3Dnow builtins */
! #define MASK_128BIT_LONG_DOUBLE 0x01000000	/* long double size is 128bit */
! #define MASK_MIX_SSE_I387	0x02000000	/* Mix SSE and i387 instructions */
! #define MASK_64BIT		0x04000000	/* Produce 64bit code */
! #define MASK_NO_RED_ZONE	0x08000000	/* Do not use red zone */
  
  /* Temporary codegen switches */
! #define MASK_INTEL_SYNTAX	0x10000000
! #define MASK_DEBUG_ARG		0x20000000	/* function_arg */   
! #define MASK_DEBUG_ADDR		0x40000000	/* GO_IF_LEGITIMATE_ADDRESS */
  
  /* Use the floating point instructions */
  #define TARGET_80387 (target_flags & MASK_80387)
*************** extern const int x86_epilogue_using_move
*** 340,358 ****
    { "no-accumulate-outgoing-args",MASK_NO_ACCUMULATE_OUTGOING_ARGS,	      \
      N_("Do not use push instructions to save outgoing arguments") },	      \
    { "mmx",			 MASK_MMX, N_("Support MMX builtins") },      \
!   { "no-mmx",			-MASK_MMX,				      \
      N_("Do not support MMX builtins") },				      \
    { "3dnow",                     MASK_3DNOW,				      \
      N_("Support 3DNow! builtins") },					      \
!   { "no-3dnow",                 -MASK_3DNOW,				      \
      N_("Do not support 3DNow! builtins") },				      \
    { "sse",			 MASK_SSE,				      \
      N_("Support MMX and SSE builtins and code generation") },		      \
!   { "no-sse",			-MASK_SSE,				      \
      N_("Do not support MMX and SSE builtins and code generation") },	      \
    { "sse2",			 MASK_SSE2,				      \
      N_("Support MMX, SSE and SSE2 builtins and code generation") },	      \
!   { "no-sse2",			-MASK_SSE2,				      \
      N_("Do not support MMX, SSE and SSE2 builtins and code generation") },    \
    { "mix-sse-i387",		 MASK_MIX_SSE_I387,			      \
      N_("Use both SSE and i387 instruction sets for floating point arithmetics") },\
--- 345,363 ----
    { "no-accumulate-outgoing-args",MASK_NO_ACCUMULATE_OUTGOING_ARGS,	      \
      N_("Do not use push instructions to save outgoing arguments") },	      \
    { "mmx",			 MASK_MMX, N_("Support MMX builtins") },      \
!   { "no-mmx",			 MASK_NOMMX,				      \
      N_("Do not support MMX builtins") },				      \
    { "3dnow",                     MASK_3DNOW,				      \
      N_("Support 3DNow! builtins") },					      \
!   { "no-3dnow",                  MASK_NO3DNOW,				      \
      N_("Do not support 3DNow! builtins") },				      \
    { "sse",			 MASK_SSE,				      \
      N_("Support MMX and SSE builtins and code generation") },		      \
!   { "no-sse",			 MASK_NOSSE,				      \
      N_("Do not support MMX and SSE builtins and code generation") },	      \
    { "sse2",			 MASK_SSE2,				      \
      N_("Support MMX, SSE and SSE2 builtins and code generation") },	      \
!   { "no-sse2",			 MASK_NOSSE2,				      \
      N_("Do not support MMX, SSE and SSE2 builtins and code generation") },    \
    { "mix-sse-i387",		 MASK_MIX_SSE_I387,			      \
      N_("Use both SSE and i387 instruction sets for floating point arithmetics") },\
*************** extern int ix86_arch;
*** 522,532 ****
  %{march=pentium4:-D__pentium4 -D__pentium4__ %{!mcpu*:-D__tune_pentium4__ }}\
  %{m386|mcpu=i386:-D__tune_i386__ }\
  %{m486|mcpu=i486:-D__tune_i486__ }\
! %{mpentium|mcpu=pentium|mcpu=i586:-D__tune_i586__ -D__tune_pentium__ }\
! %{mpentiumpro|mcpu=pentiumpro|mcpu=i686:-D__tune_i686__ -D__tune_pentiumpro__ }\
! %{mcpu=k6:-D__tune_k6__ }\
! %{mcpu=athlon:-D__tune_athlon__ }\
  %{mcpu=pentium4:-D__tune_pentium4__ }\
  %{!march*:%{!mcpu*:%{!m386:%{!m486:%{!mpentium*:%(cpp_cpu_default)}}}}}"
  
  #ifndef CPP_CPU_SPEC
--- 527,548 ----
  %{march=pentium4:-D__pentium4 -D__pentium4__ %{!mcpu*:-D__tune_pentium4__ }}\
  %{m386|mcpu=i386:-D__tune_i386__ }\
  %{m486|mcpu=i486:-D__tune_i486__ }\
! %{mpentium|mcpu=pentium|mcpu=i586|mcpu=pentium-mmx:-D__tune_i586__ -D__tune_pentium__ }\
! %{mpentiumpro|mcpu=pentiumpro|mcpu=i686|cpu=pentium2|cpu=pentium3:-D__tune_i686__\
! -D__tune_pentiumpro__ }\
! %{mcpu=k6|mcpu=k6-2|mcpu=k6-3:-D__tune_k6__ }\
! %{mcpu=athlon|mcpu=athlon-tbird|mcpu=athlon4|mcpu=athlon-xp|mcpu=athlon-mobille|mcpu=athlon-mp:\
! -D__tune_athlon__ }\
  %{mcpu=pentium4:-D__tune_pentium4__ }\
+ %{march=march=athlon-tbird|march=athlon-xp|march=athlon-mp|march=pentium3|march=pentium4:\
+ -D__SSE__ }\
+ %{march=pentium-mmx|march=k6|march=k6-2|march=k6-3\
+ march=athlon|march=athlon-tbird|march=athlon4|march=athlon-xp|march=athlon-mobille\
+ |march=athlon-mp|march=pentium2|march=pentium3|march=pentium4: -D__MMX__ }\
+ %{march=k6|march=k6-2|march=k6-3\
+ march=athlon|march=athlon-tbird|march=athlon4|march=athlon-xp|march=athlon-mobille\
+ |march=athlon-mp: -D__3dNOW__ }\
+ %{mcpu=mcpu=pentium4: -D__SSE2__ }\
  %{!march*:%{!mcpu*:%{!m386:%{!m486:%{!mpentium*:%(cpp_cpu_default)}}}}}"
  
  #ifndef CPP_CPU_SPEC
Index: doc/invoke.texi
===================================================================
RCS file: /cvs/gcc/egcs/gcc/doc/invoke.texi,v
retrieving revision 1.75.2.4
diff -c -3 -p -r1.75.2.4 invoke.texi
*** invoke.texi	2001/11/30 21:00:30	1.75.2.4
--- invoke.texi	2001/12/11 10:05:03
*************** computers:
*** 7536,7545 ****
  @table @gcctabopt
  @item -mcpu=@var{cpu-type}
  @opindex mcpu
! Assume the defaults for the machine type @var{cpu-type} when scheduling
! instructions.  The choices for @var{cpu-type} are @samp{i386},
! @samp{i486}, @samp{i586}, @samp{i686}, @samp{pentium},
! @samp{pentiumpro}, @samp{pentium4}, @samp{k6}, and @samp{athlon}
  
  While picking a specific @var{cpu-type} will schedule things appropriately
  for that particular chip, the compiler will not generate any code that
--- 7536,7548 ----
  @table @gcctabopt
  @item -mcpu=@var{cpu-type}
  @opindex mcpu
! Tune to @var{cpu-type} everything applicable about the generated code, except
! for the ABI and the set of available instructions.  The choices for
! @var{cpu-type} are @samp{i386}, @samp{i486}, @samp{i586}, @samp{i686},
! @samp{pentium}, @samp{pentium-mmx}, @samp{pentiumpro}, @samp{pentium2},
! @samp{pentium3}, @samp{pentium4}, @samp{k6}, @samp{k6-2}, @samp{k6-3},
! @samp{athlon}, @samp{athlon-tbird}, @samp{athlon4}, @samp{athlon-xp},
! @samp{athlon-mobille} and @samp{athlon-mp}.
  
  While picking a specific @var{cpu-type} will schedule things appropriately
  for that particular chip, the compiler will not generate any code that


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]