This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[3.4-BIB] -march=k8 support


Hi,
this patch adds -march=k8 used to enable optimizations for AMD's future chips.
There is not much done at the moment, but I will improve it incrementally.

Honza

Wed Nov 20 13:16:05 CET 2002  Jan Hubicka  <jh@suse.cz>
	* config.gcc: Add k8 target alias support
	* i386.c (_cost): Declare costs for various variants of divides and
	multiplies.
	(k8_cost): New.
	(m_K8, m_ATHLON_K8): New macros.
	(x86_use_leave, x86_push_memory, x86_movx, x86_unroll_strlen,
	x86_cmove, x86_3dnow_a, x86_deep_branch, x86_use_fiop,
	x86_promote_QImode, x86_sub_esp_?, x86_add_esp_?,
	x86_integer_DFmode_moves, x86_partial_reg_dependency,
	x86_memory_mismatch_stall, x86_accumulate_outgoing_args,
	x86_prologue_using_move, x86_epilogue_using_move,
	x86_arch_always_fancy_math_387, x86_sse_partial_regs,
	x86_sse_typeless_stores): Set for K8
	(override_options): Add k8 support; fix athlon alignment;
	complain about non-x86-64 capable CPU being used in x86-64 compilation.
	(ix86_issue_rate): Set for K8.
	(ix86_adjust_cost, ia32_use_dfa_pipeline_interface,
	x86_machine_dependent_reorg): Handle K8 like
	* i386.h
	(x86_costs):  Change mult_init and divide into array.
	(TARGET_K8, TARGET_ATHLON): New macros.
	(MODE_INDEX): New macro.
	(RTX_COST): Use new costs.
	(TARGET_CPU_CPP_BUILTINS):  Define __k8__ and __tune_k8__.
	(TARGET_CPU_DEFAULT_NAMES): Add k8
	(TARGET_CPU_DEFAULT_k8): New constant
	(enum processor_type): Add PROCESSOR_K8.
	* i386.md (cpu attribute): Add k8.
Index: config.gcc
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config.gcc,v
retrieving revision 1.240.2.13
diff -c -3 -p -r1.240.2.13 config.gcc
*** config.gcc	19 Nov 2002 14:44:57 -0000	1.240.2.13
--- config.gcc	20 Nov 2002 12:12:10 -0000
*************** i586-*-*)
*** 2658,2663 ****
--- 2658,2666 ----
  	;;
  i686-*-* | i786-*-*)
  	case $target_alias in
+ 		k8-*)
+ 			target_cpu_default2=TARGET_CPU_DEFAULT_k8
+ 			;;
  		athlon_xp-*|athlon_mp-*|athlon_4-*)
  			target_cpu_default2=TARGET_CPU_DEFAULT_athlon_sse
  			;;
*************** x86_64-*-*)
*** 2682,2688 ****
  	# We should have hammer chip here, but it does not exist yet and
  	# thus it is not supported.  Athlon_SSE is probably equivalent feature
  	# wise to hammer from our point of view except for 64bit mode.
! 	target_cpu_default2=TARGET_CPU_DEFAULT_athlon_sse
  	;;
  alpha*-*-*)
  	case $machine in
--- 2685,2691 ----
  	# We should have hammer chip here, but it does not exist yet and
  	# thus it is not supported.  Athlon_SSE is probably equivalent feature
  	# wise to hammer from our point of view except for 64bit mode.
! 	target_cpu_default2=TARGET_CPU_DEFAULT_k8
  	;;
  alpha*-*-*)
  	case $machine in
Index: config/i386/i386.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.c,v
retrieving revision 1.447.2.24
diff -c -3 -p -r1.447.2.24 i386.c
*** config/i386/i386.c	17 Nov 2002 17:13:45 -0000	1.447.2.24
--- config/i386/i386.c	20 Nov 2002 12:12:26 -0000
*************** struct processor_costs size_cost = {	/* 
*** 55,63 ****
    3,					/* cost of a lea instruction */
    2,					/* variable shift costs */
    3,					/* constant shift costs */
!   3,					/* cost of starting a multiply */
    0,					/* cost of multiply per each bit set */
!   3,					/* cost of a divide/mod */
    3,					/* cost of movsx */
    3,					/* cost of movzx */
    0,					/* "large" insn */
--- 55,63 ----
    3,					/* cost of a lea instruction */
    2,					/* variable shift costs */
    3,					/* constant shift costs */
!   {3, 3, 3, 3, 5},			/* cost of starting a multiply */
    0,					/* cost of multiply per each bit set */
!   {3, 3, 3, 3, 5},			/* cost of a divide/mod */
    3,					/* cost of movsx */
    3,					/* cost of movzx */
    0,					/* "large" insn */
*************** struct processor_costs i386_cost = {	/* 
*** 100,108 ****
    1,					/* cost of a lea instruction */
    3,					/* variable shift costs */
    2,					/* constant shift costs */
!   6,					/* cost of starting a multiply */
    1,					/* cost of multiply per each bit set */
!   23,					/* cost of a divide/mod */
    3,					/* cost of movsx */
    2,					/* cost of movzx */
    15,					/* "large" insn */
--- 100,108 ----
    1,					/* cost of a lea instruction */
    3,					/* variable shift costs */
    2,					/* constant shift costs */
!   {6, 6, 6, 6, 6},			/* cost of starting a multiply */
    1,					/* cost of multiply per each bit set */
!   {23, 23, 23, 23, 23},			/* cost of a divide/mod */
    3,					/* cost of movsx */
    2,					/* cost of movzx */
    15,					/* "large" insn */
*************** struct processor_costs i486_cost = {	/* 
*** 144,152 ****
    1,					/* cost of a lea instruction */
    3,					/* variable shift costs */
    2,					/* constant shift costs */
!   12,					/* cost of starting a multiply */
    1,					/* cost of multiply per each bit set */
!   40,					/* cost of a divide/mod */
    3,					/* cost of movsx */
    2,					/* cost of movzx */
    15,					/* "large" insn */
--- 144,152 ----
    1,					/* cost of a lea instruction */
    3,					/* variable shift costs */
    2,					/* constant shift costs */
!   {12, 12, 12, 12, 12},			/* cost of starting a multiply */
    1,					/* cost of multiply per each bit set */
!   {40, 40, 40, 40, 40},			/* cost of a divide/mod */
    3,					/* cost of movsx */
    2,					/* cost of movzx */
    15,					/* "large" insn */
*************** struct processor_costs pentium_cost = {
*** 188,196 ****
    1,					/* cost of a lea instruction */
    4,					/* variable shift costs */
    1,					/* constant shift costs */
!   11,					/* cost of starting a multiply */
    0,					/* cost of multiply per each bit set */
!   25,					/* cost of a divide/mod */
    3,					/* cost of movsx */
    2,					/* cost of movzx */
    8,					/* "large" insn */
--- 188,196 ----
    1,					/* cost of a lea instruction */
    4,					/* variable shift costs */
    1,					/* constant shift costs */
!   {11, 11, 11, 11, 11},			/* cost of starting a multiply */
    0,					/* cost of multiply per each bit set */
!   {25, 25, 25, 25, 25},			/* cost of a divide/mod */
    3,					/* cost of movsx */
    2,					/* cost of movzx */
    8,					/* "large" insn */
*************** struct processor_costs pentiumpro_cost =
*** 232,240 ****
    1,					/* cost of a lea instruction */
    1,					/* variable shift costs */
    1,					/* constant shift costs */
!   4,					/* cost of starting a multiply */
    0,					/* cost of multiply per each bit set */
!   17,					/* cost of a divide/mod */
    1,					/* cost of movsx */
    1,					/* cost of movzx */
    8,					/* "large" insn */
--- 232,240 ----
    1,					/* cost of a lea instruction */
    1,					/* variable shift costs */
    1,					/* constant shift costs */
!   {4, 4, 4, 4, 4},			/* cost of starting a multiply */
    0,					/* cost of multiply per each bit set */
!   {17, 17, 17, 17, 17},			/* cost of a divide/mod */
    1,					/* cost of movsx */
    1,					/* cost of movzx */
    8,					/* "large" insn */
*************** struct processor_costs k6_cost = {
*** 276,284 ****
    2,					/* cost of a lea instruction */
    1,					/* variable shift costs */
    1,					/* constant shift costs */
!   3,					/* cost of starting a multiply */
    0,					/* cost of multiply per each bit set */
!   18,					/* cost of a divide/mod */
    2,					/* cost of movsx */
    2,					/* cost of movzx */
    8,					/* "large" insn */
--- 276,284 ----
    2,					/* cost of a lea instruction */
    1,					/* variable shift costs */
    1,					/* constant shift costs */
!   {3, 3, 3, 3, 3},			/* cost of starting a multiply */
    0,					/* cost of multiply per each bit set */
!   {18, 18, 18, 18, 18},			/* cost of a divide/mod */
    2,					/* cost of movsx */
    2,					/* cost of movzx */
    8,					/* "large" insn */
*************** struct processor_costs athlon_cost = {
*** 320,328 ****
    2,					/* cost of a lea instruction */
    1,					/* variable shift costs */
    1,					/* constant shift costs */
!   5,					/* cost of starting a multiply */
    0,					/* cost of multiply per each bit set */
!   42,					/* cost of a divide/mod */
    1,					/* cost of movsx */
    1,					/* cost of movzx */
    8,					/* "large" insn */
--- 320,328 ----
    2,					/* cost of a lea instruction */
    1,					/* variable shift costs */
    1,					/* constant shift costs */
!   {5, 5, 5, 5, 5},			/* cost of starting a multiply */
    0,					/* cost of multiply per each bit set */
!   {18, 26, 42, 74, 74},			/* cost of a divide/mod */
    1,					/* cost of movsx */
    1,					/* cost of movzx */
    8,					/* "large" insn */
*************** struct processor_costs athlon_cost = {
*** 359,372 ****
  };
  
  static const
  struct processor_costs pentium4_cost = {
    1,					/* cost of an add instruction */
    1,					/* cost of a lea instruction */
    4,					/* variable shift costs */
    4,					/* constant shift costs */
!   15,					/* cost of starting a multiply */
    0,					/* cost of multiply per each bit set */
!   56,					/* cost of a divide/mod */
    1,					/* cost of movsx */
    1,					/* cost of movzx */
    16,					/* "large" insn */
--- 359,416 ----
  };
  
  static const
+ struct processor_costs k8_cost = {
+   1,					/* cost of an add instruction */
+   2,					/* cost of a lea instruction */
+   1,					/* variable shift costs */
+   1,					/* constant shift costs */
+   {3, 4, 3, 4, 5},			/* cost of starting a multiply */
+   0,					/* cost of multiply per each bit set */
+   {18, 26, 42, 74, 74},			/* cost of a divide/mod */
+   1,					/* cost of movsx */
+   1,					/* cost of movzx */
+   8,					/* "large" insn */
+   9,					/* MOVE_RATIO */
+   4,					/* cost for loading QImode using movzbl */
+   {3, 4, 3},				/* cost of loading integer registers
+ 					   in QImode, HImode and SImode.
+ 					   Relative to reg-reg move (2).  */
+   {3, 4, 3},				/* cost of storing integer registers */
+   4,					/* cost of reg,reg fld/fst */
+   {4, 4, 12},				/* cost of loading fp registers
+ 					   in SFmode, DFmode and XFmode */
+   {6, 6, 8},				/* cost of loading integer registers */
+   2,					/* cost of moving MMX register */
+   {3, 3},				/* cost of loading MMX registers
+ 					   in SImode and DImode */
+   {4, 4},				/* cost of storing MMX registers
+ 					   in SImode and DImode */
+   2,					/* cost of moving SSE register */
+   {4, 3, 6},				/* cost of loading SSE registers
+ 					   in SImode, DImode and TImode */
+   {4, 4, 5},				/* cost of storing SSE registers
+ 					   in SImode, DImode and TImode */
+   5,					/* MMX or SSE register to integer */
+   64,					/* size of prefetch block */
+   6,					/* number of parallel prefetches */
+   2,					/* Branch cost */
+   4,					/* cost of FADD and FSUB insns.  */
+   4,					/* cost of FMUL instruction.  */
+   19,					/* cost of FDIV instruction.  */
+   2,					/* cost of FABS instruction.  */
+   2,					/* cost of FCHS instruction.  */
+   35,					/* cost of FSQRT instruction.  */
+ };
+ 
+ static const
  struct processor_costs pentium4_cost = {
    1,					/* cost of an add instruction */
    1,					/* cost of a lea instruction */
    4,					/* variable shift costs */
    4,					/* constant shift costs */
!   {15, 15, 15, 15, 15},			/* cost of starting a multiply */
    0,					/* cost of multiply per each bit set */
!   {56, 56, 56, 56, 56},			/* cost of a divide/mod */
    1,					/* cost of movsx */
    1,					/* cost of movzx */
    16,					/* "large" insn */
*************** const struct processor_costs *ix86_cost 
*** 412,464 ****
  #define m_K6  (1<<PROCESSOR_K6)
  #define m_ATHLON  (1<<PROCESSOR_ATHLON)
  #define m_PENT4  (1<<PROCESSOR_PENTIUM4)
  
! const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
! const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
  const int x86_zero_extend_with_and = m_486 | m_PENT;
! const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
  const int x86_double_with_add = ~m_386;
  const int x86_use_bit_test = m_386;
! const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
! const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
! const int x86_3dnow_a = m_ATHLON;
! const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
  const int x86_branch_hints = m_PENT4;
  const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
  const int x86_partial_reg_stall = m_PPRO;
  const int x86_use_loop = m_K6;
! const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
  const int x86_use_mov0 = m_K6;
  const int x86_use_cltd = ~(m_PENT | m_K6);
  const int x86_read_modify_write = ~m_PENT;
  const int x86_read_modify = ~(m_PENT | m_PPRO);
  const int x86_split_long_moves = m_PPRO;
! const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON;
  const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
  const int x86_single_stringop = m_386 | m_PENT4;
  const int x86_qimode_math = ~(0);
  const int x86_promote_qi_regs = 0;
  const int x86_himode_math = ~(m_PPRO);
  const int x86_promote_hi_regs = m_PPRO;
! const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
! const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
! const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
! const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
! const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4 | m_PPRO);
! const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
! const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
! const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
! const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
! const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
  const int x86_decompose_lea = m_PENT4;
  const int x86_shift1 = ~m_486;
! const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON | m_PENT4;
  const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
  /* Set for machines where the type and dependencies are resolved on SSE register
     parts insetad of whole registers, so we may maintain just lower part of
     scalar values in proper format leaving the upper part undefined.  */
! const int x86_sse_partial_regs = m_ATHLON;
! const int x86_sse_typeless_stores = m_ATHLON;
  const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
  
  /* In case the avreage insn count for single function invocation is
--- 456,510 ----
  #define m_K6  (1<<PROCESSOR_K6)
  #define m_ATHLON  (1<<PROCESSOR_ATHLON)
  #define m_PENT4  (1<<PROCESSOR_PENTIUM4)
+ #define m_K8  (1<<PROCESSOR_K8)
+ #define m_ATHLON_K8  (m_K8 | m_ATHLON)
  
! const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
! const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
  const int x86_zero_extend_with_and = m_486 | m_PENT;
! const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
  const int x86_double_with_add = ~m_386;
  const int x86_use_bit_test = m_386;
! const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
! const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
! const int x86_3dnow_a = m_ATHLON_K8;
! const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
  const int x86_branch_hints = m_PENT4;
  const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
  const int x86_partial_reg_stall = m_PPRO;
  const int x86_use_loop = m_K6;
! const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
  const int x86_use_mov0 = m_K6;
  const int x86_use_cltd = ~(m_PENT | m_K6);
  const int x86_read_modify_write = ~m_PENT;
  const int x86_read_modify = ~(m_PENT | m_PPRO);
  const int x86_split_long_moves = m_PPRO;
! const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
  const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
  const int x86_single_stringop = m_386 | m_PENT4;
  const int x86_qimode_math = ~(0);
  const int x86_promote_qi_regs = 0;
  const int x86_himode_math = ~(m_PPRO);
  const int x86_promote_hi_regs = m_PPRO;
! const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
! const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
! const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
! const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
! const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
! const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
! const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
! const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
! const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
! const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
  const int x86_decompose_lea = m_PENT4;
  const int x86_shift1 = ~m_486;
! const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
  const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
  /* Set for machines where the type and dependencies are resolved on SSE register
     parts insetad of whole registers, so we may maintain just lower part of
     scalar values in proper format leaving the upper part undefined.  */
! const int x86_sse_partial_regs = m_ATHLON_K8;
! const int x86_sse_typeless_stores = m_ATHLON_K8;
  const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
  
  /* In case the avreage insn count for single function invocation is
*************** override_options ()
*** 965,972 ****
        {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
        {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
        {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
!       {&athlon_cost, 0, 0, 16, 7, 64, 7, 16},
!       {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0}
      };
  
    static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
--- 1011,1019 ----
        {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
        {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
        {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
!       {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
!       {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
!       {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
      };
  
    static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
*************** override_options ()
*** 981,987 ****
  	  PTA_MMX = 4,
  	  PTA_PREFETCH_SSE = 8,
  	  PTA_3DNOW = 16,
! 	  PTA_3DNOW_A = 64
  	} flags;
      }
    const processor_alias_table[] =
--- 1028,1035 ----
  	  PTA_MMX = 4,
  	  PTA_PREFETCH_SSE = 8,
  	  PTA_3DNOW = 16,
! 	  PTA_3DNOW_A = 64,
! 	  PTA_64BIT = 128
  	} flags;
      }
    const processor_alias_table[] =
*************** override_options ()
*** 1013,1018 ****
--- 1061,1068 ----
  				      | PTA_3DNOW_A | PTA_SSE},
        {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
  				      | PTA_3DNOW_A | PTA_SSE},
+       {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
+ 				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
      };
  
    int const pta_size = ARRAY_SIZE (processor_alias_table);
*************** override_options ()
*** 1052,1058 ****
    if (!ix86_cpu_string)
      ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
    if (!ix86_arch_string)
!     ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
  
    if (ix86_cmodel_string != 0)
      {
--- 1102,1108 ----
    if (!ix86_cpu_string)
      ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
    if (!ix86_arch_string)
!     ix86_arch_string = TARGET_64BIT ? "k8" : "i386";
  
    if (ix86_cmodel_string != 0)
      {
*************** override_options ()
*** 1118,1123 ****
--- 1168,1175 ----
  	  target_flags |= MASK_SSE2;
  	if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
  	  x86_prefetch_sse = true;
+ 	if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_MMX))
+ 	  error ("CPU you sellected does not support x86-64 instruction set");
  	break;
        }
  
*************** ix86_issue_rate ()
*** 11237,11242 ****
--- 11289,11295 ----
      case PROCESSOR_PENTIUMPRO:
      case PROCESSOR_PENTIUM4:
      case PROCESSOR_ATHLON:
+     case PROCESSOR_K8:
        return 3;
  
      default:
*************** ix86_adjust_cost (insn, link, dep_insn, 
*** 11448,11453 ****
--- 11501,11507 ----
        break;
  
      case PROCESSOR_ATHLON:
+     case PROCESSOR_K8:
        memory = get_attr_memory (insn);
        dep_memory = get_attr_memory (dep_insn);
  
*************** ix86_variable_issue (dump, sched_verbose
*** 11724,11730 ****
  static int
  ia32_use_dfa_pipeline_interface ()
  {
!   if (ix86_cpu == PROCESSOR_PENTIUM || ix86_cpu == PROCESSOR_ATHLON)
      return 1;
    return 0;
  }
--- 11778,11784 ----
  static int
  ia32_use_dfa_pipeline_interface ()
  {
!   if (TARGET_PENTIUM || TARGET_ATHLON_K8)
      return 1;
    return 0;
  }
*************** x86_machine_dependent_reorg (first)
*** 14499,14505 ****
  {
    edge e;
  
!   if (!TARGET_ATHLON || !optimize || optimize_size)
      return;
    for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
    {
--- 14553,14559 ----
  {
    edge e;
  
!   if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
      return;
    for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
    {
Index: config/i386/i386.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.h,v
retrieving revision 1.280.4.17
diff -c -3 -p -r1.280.4.17 i386.h
*** config/i386/i386.h	17 Nov 2002 17:13:47 -0000	1.280.4.17
--- config/i386/i386.h	20 Nov 2002 12:12:28 -0000
*************** struct processor_costs {
*** 41,49 ****
    const int lea;		/* cost of a lea instruction */
    const int shift_var;		/* variable shift costs */
    const int shift_const;	/* constant shift costs */
!   const int mult_init;		/* cost of starting a multiply */
    const int mult_bit;		/* cost of multiply per each bit set */
!   const int divide;		/* cost of a divide/mod */
    int movsx;			/* The cost of movsx operation.  */
    int movzx;			/* The cost of movzx operation.  */
    const int large_insn;		/* insns larger than this cost more */
--- 41,51 ----
    const int lea;		/* cost of a lea instruction */
    const int shift_var;		/* variable shift costs */
    const int shift_const;	/* constant shift costs */
!   const int mult_init[5];	/* cost of starting a multiply 
! 				   in QImode, HImode, SImode, DImode, TImode*/
    const int mult_bit;		/* cost of multiply per each bit set */
!   const int divide[5];		/* cost of a divide/mod 
! 				   in QImode, HImode, SImode, DImode, TImode*/
    int movsx;			/* The cost of movsx operation.  */
    int movzx;			/* The cost of movzx operation.  */
    const int large_insn;		/* insns larger than this cost more */
*************** extern int target_flags;
*** 206,211 ****
--- 208,215 ----
  #define TARGET_K6 (ix86_cpu == PROCESSOR_K6)
  #define TARGET_ATHLON (ix86_cpu == PROCESSOR_ATHLON)
  #define TARGET_PENTIUM4 (ix86_cpu == PROCESSOR_PENTIUM4)
+ #define TARGET_K8 (ix86_cpu == PROCESSOR_K8)
+ #define TARGET_ATHLON_K8 (TARGET_K8 || TARGET_ATHLON)
  
  #define CPUMASK (1 << ix86_cpu)
  extern const int x86_use_leave, x86_push_memory, x86_zero_extend_with_and;
*************** extern int x86_prefetch_sse;
*** 547,552 ****
--- 551,558 ----
  	  if (last_cpu_char != 'n')				\
  	    builtin_define ("__tune_athlon_sse__");		\
  	}							\
+       else if (TARGET_K8)					\
+ 	builtin_define ("__tune_k8__");				\
        else if (TARGET_PENTIUM4)					\
  	builtin_define ("__tune_pentium4__");			\
  								\
*************** extern int x86_prefetch_sse;
*** 605,610 ****
--- 611,621 ----
  	  if (last_arch_char != 'n')				\
  	    builtin_define ("__athlon_sse__");			\
  	}							\
+       else if (ix86_arch == PROCESSOR_K8)			\
+ 	{							\
+ 	  builtin_define ("__k8");				\
+ 	  builtin_define ("__k8__");				\
+ 	}							\
        else if (ix86_arch == PROCESSOR_PENTIUM4)			\
  	{							\
  	  builtin_define ("__pentium4");			\
*************** extern int x86_prefetch_sse;
*** 626,636 ****
  #define TARGET_CPU_DEFAULT_k6_3 10
  #define TARGET_CPU_DEFAULT_athlon 11
  #define TARGET_CPU_DEFAULT_athlon_sse 12
  
  #define TARGET_CPU_DEFAULT_NAMES {"i386", "i486", "pentium", "pentium-mmx",\
  				  "pentiumpro", "pentium2", "pentium3", \
  				  "pentium4", "k6", "k6-2", "k6-3",\
! 				  "athlon", "athlon-4"}
  
  #ifndef CC1_SPEC
  #define CC1_SPEC "%(cc1_cpu) "
--- 637,648 ----
  #define TARGET_CPU_DEFAULT_k6_3 10
  #define TARGET_CPU_DEFAULT_athlon 11
  #define TARGET_CPU_DEFAULT_athlon_sse 12
+ #define TARGET_CPU_DEFAULT_k8 13
  
  #define TARGET_CPU_DEFAULT_NAMES {"i386", "i486", "pentium", "pentium-mmx",\
  				  "pentiumpro", "pentium2", "pentium3", \
  				  "pentium4", "k6", "k6-2", "k6-3",\
! 				  "athlon", "athlon-4", "k8"}
  
  #ifndef CC1_SPEC
  #define CC1_SPEC "%(cc1_cpu) "
*************** do {							\
*** 2591,2596 ****
--- 2603,2616 ----
  #define TOPLEVEL_COSTS_N_INSNS(N) \
    do { total = COSTS_N_INSNS (N); goto egress_rtx_costs; } while (0)
  
+ /* Return index of given mode in mult and division cost tables.  */
+ #define MODE_INDEX(mode)					\
+   ((mode) == QImode ? 0						\
+    : (mode) == HImode ? 1					\
+    : (mode) == SImode ? 2					\
+    : (mode) == DImode ? 3					\
+    : 4)
+ 
  /* Like `CONST_COSTS' but applies to nonconstant RTL expressions.
     This can be used, for example, to indicate how costly a multiply
     instruction is.  In writing this macro, you can use the construct
*************** do {							\
*** 2676,2685 ****
--- 2696,2707 ----
  	  } 								\
  									\
  	TOPLEVEL_COSTS_N_INSNS (ix86_cost->mult_init			\
+ 				[MODE_INDEX (GET_MODE (X))]		\
  			        + nbits * ix86_cost->mult_bit);		\
        }									\
      else			/* This is arbitrary */			\
        TOPLEVEL_COSTS_N_INSNS (ix86_cost->mult_init			\
+ 			      [MODE_INDEX (GET_MODE (X))]		\
  			      + 7 * ix86_cost->mult_bit);		\
  									\
    case DIV:								\
*************** do {							\
*** 2689,2695 ****
      if (FLOAT_MODE_P (GET_MODE (X)))					\
        TOPLEVEL_COSTS_N_INSNS (ix86_cost->fdiv);				\
      else								\
!       TOPLEVEL_COSTS_N_INSNS (ix86_cost->divide);			\
      break;								\
  									\
    case PLUS:								\
--- 2711,2718 ----
      if (FLOAT_MODE_P (GET_MODE (X)))					\
        TOPLEVEL_COSTS_N_INSNS (ix86_cost->fdiv);				\
      else								\
!       TOPLEVEL_COSTS_N_INSNS (ix86_cost->divide				\
! 			      [MODE_INDEX (GET_MODE (X))]);		\
      break;								\
  									\
    case PLUS:								\
*************** enum processor_type
*** 3288,3293 ****
--- 3311,3317 ----
    PROCESSOR_K6,
    PROCESSOR_ATHLON,
    PROCESSOR_PENTIUM4,
+   PROCESSOR_K8,
    PROCESSOR_max
  };
  
Index: config/i386/i386.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.md,v
retrieving revision 1.380.4.27
diff -c -3 -p -r1.380.4.27 i386.md
*** config/i386/i386.md	17 Nov 2002 13:02:55 -0000	1.380.4.27
--- config/i386/i386.md	20 Nov 2002 12:12:40 -0000
***************
*** 132,138 ****
  
  ;; Processor type.  This attribute must exactly match the processor_type
  ;; enumeration in i386.h.
! (define_attr "cpu" "i386,i486,pentium,pentiumpro,k6,athlon,pentium4"
    (const (symbol_ref "ix86_cpu")))
  
  ;; A basic instruction type.  Refinements due to arguments to be
--- 132,138 ----
  
  ;; Processor type.  This attribute must exactly match the processor_type
  ;; enumeration in i386.h.
! (define_attr "cpu" "i386,i486,pentium,pentiumpro,k6,athlon,pentium4,k8"
    (const (symbol_ref "ix86_cpu")))
  
  ;; A basic instruction type.  Refinements due to arguments to be


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]