This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

RFC: a patch for tuning gcc to PENTIUM_M


  This is not a patch which I propose to commit.  This patch could be
used for tuning gcc for pentium-m.  I tested it on pentium-m with
128Mb two months ago.  This memory is not enough for SPEC2000.  So I
tested it on SPEC95.  Unfortunately, I have no available PENTIUM-M
machine now.  So somebody could use this patch for further tuning.

  The patch resulted in 1-2% better SPEC2000 in comparison with all
other possible tunings.

  One problem with the patch is that PentiumPro DFA description has
been added since my work on the patch and old code for tuning the insn
scheduler has been gone.  I used pentium-pro old description and
functions for tuning scheduler (I found important to follow PENTIUMPRO
4-1-1 mops rule).  Therefore I modify the patch to
reflect this change.  I've missed discussion about PentiumPro DFA
description.  I'd have recommended to use a Pentium-M machine to
compare with the old description.

 Another problem with the patch is a bigger code than for pentiumpro.
But as I found it is really important to align labels to 16 byte
boundary.  We could switch such aligment off when -Os is used.


Base flags: -O2 -march=i686 -mtune=i686 Peak flags: -O2 -march=i686 -mtune=pentium-m

========================================================================
099.go 4600 71.1 64.7 4600 71.8 64.1 124.m88ksim 1900 35.5 53.5 1900 35.6 53.4 126.gcc 1700 135 12.6 1700 121 14.1 129.compress 1800 29.2 61.6 1800 29.8 60.5 130.li 1900 32.4 58.6 1900 29.9 63.5 132.ijpeg 2400 45.7 52.5 2400 43.6 55.1 134.perl 1900 25.3 75.0 1900 25.5 74.6 147.vortex 2700 44.5 60.6 2700 43.4 62.2 SPECint_base95 (Geom. Mean) 49.8 SPECint95 (Geom. Mean) 51.2


----------------CINT95-----------------
2.589%         192228         197204 099.go
3.045%         113328         116779 124.m88ksim
1.567%    1.12991e+06    1.14762e+06 126.gcc
4.058%           6308           6564 129.compress
2.776%          48808          50163 130.li
2.736%         126141         129592 132.ijpeg
0.328%         237804         238583 134.perl
4.334%         574533         599432 147.vortex
Average = 1.94841%

========================================================================
101.tomcatv 3700 51.8 71.4 3700 51.1 72.3 102.swim 8600 83.6 103 8600 83.6 103 103.su2cor 1400 41.0 34.2 1400 41.1 34.1 104.hydro2d 2400 78.1 30.7 2400 78.7 30.5 107.mgrid 2500 46.5 53.8 2500 46.2 54.1 110.applu 2200 68.5 32.1 2200 69.1 31.8 125.turb3d 4100 78.3 52.3 4100 78.1 52.5 141.apsi 2100 36.3 57.8 2100 35.8 58.7 145.fpppp 9600 83.1 115 9600 77.6 124 146.wave5 3000 43.0 69.7 3000 42.8 70.1 SPECfp_base95 (Geom. Mean) 56.4 SPECfp95 (Geom. Mean) 56.9


Base flags: -O2 -g -march=i386 -mtune=i386
Peak flags: -O2 -g -march=i386 -mtune=pentium-m
========================================================================
099.go 4600 75.3 61.1 4600 78.0 59.0 124.m88ksim 1900 37.4 50.8 1900 36.5 52.0 126.gcc 1700 166 10.3 1700 191 8.91 129.compress 1800 31.1 57.9 1800 31.2 57.8 130.li 1900 32.0 59.3 1900 30.8 61.7 132.ijpeg 2400 54.0 44.5 2400 46.2 52.0 134.perl 1900 28.2 67.3 1900 26.1 72.9 147.vortex 2700 43.6 61.9 2700 46.1 58.6 SPECint_base95 (Geom. Mean) 46.1 SPECint95 (Geom. Mean) 46.4


========================================================================
101.tomcatv 3700 51.6 71.7 3700 51.3 72.1 102.swim 8600 83.9 103 8600 83.1 103 103.su2cor 1400 42.0 33.4 1400 41.1 34.1 104.hydro2d 2400 80.2 29.9 2400 79.5 30.2 107.mgrid 2500 47.3 52.9 2500 46.8 53.4 110.applu 2200 66.5 33.1 2200 69.0 31.9 125.turb3d 4100 80.3 51.0 4100 76.9 53.3 141.apsi 2100 36.7 57.3 2100 36.6 57.4 145.fpppp 9600 83.7 115 9600 77.2 124 146.wave5 3000 43.3 69.3 3000 43.1 69.5 SPECfp_base95 (Geom. Mean) 55.9 SPECfp95 (Geom. Mean) 56.7


Base flags: -O2 -g -march=pentium4 -mtune=pentium4
Peak flags: -O2 -g -march=pentium4 -mtune=pentium-m
========================================================================
099.go 4600 74.6 61.7 4600 71.8 64.1 124.m88ksim 1900 37.1 51.2 1900 35.0 54.4 126.gcc 1700 147 11.6 1700 150 11.3 129.compress 1800 29.7 60.7 1800 30.8 58.4 130.li 1900 32.4 58.7 1900 29.9 63.6 132.ijpeg 2400 44.3 54.2 2400 45.0 53.3 134.perl 1900 24.5 77.4 1900 25.8 73.6 147.vortex 2700 43.8 61.6 2700 43.7 61.8 SPECint_base95 (Geom. Mean) 49.1 SPECint95 (Geom. Mean) 49.4


========================================================================
101.tomcatv 3700 51.9 71.4 3700 51.9 71.3 102.swim 8600 83.6 103 8600 84.2 102 103.su2cor 1400 41.3 33.9 1400 41.3 33.9 104.hydro2d 2400 78.8 30.5 2400 78.2 30.7 107.mgrid 2500 46.4 53.9 2500 44.4 56.3 110.applu 2200 67.6 32.5 2200 70.1 31.4 125.turb3d 4100 75.2 54.5 4100 80.8 50.8 141.apsi 2100 37.3 56.4 2100 35.8 58.6 145.fpppp 9600 83.7 115 9600 77.4 124 146.wave5 3000 37.2 80.7 3000 37.2 80.8 SPECfp_base95 (Geom. Mean) 57.2 SPECfp95 (Geom. Mean) 57.6



Vlad


Index: config/i386/i386.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.h,v
retrieving revision 1.383
diff -c -d -p -r1.383 i386.h
*** config/i386/i386.h	2 Apr 2004 15:05:51 -0000	1.383
--- config/i386/i386.h	2 Apr 2004 16:08:00 -0000
*************** extern int target_flags;
*** 219,224 ****
--- 219,225 ----
  #define TARGET_PENTIUM4 (ix86_tune == PROCESSOR_PENTIUM4)
  #define TARGET_K8 (ix86_tune == PROCESSOR_K8)
  #define TARGET_ATHLON_K8 (TARGET_K8 || TARGET_ATHLON)
+ #define TARGET_PENTIUM_M (ix86_tune == PROCESSOR_PENTIUM_M)
  #define TARGET_NOCONA (ix86_tune == PROCESSOR_NOCONA)
  
  #define TUNEMASK (1 << ix86_tune)
*************** extern int x86_prefetch_sse;
*** 607,612 ****
--- 608,615 ----
  	builtin_define ("__tune_k8__");				\
        else if (TARGET_PENTIUM4)					\
  	builtin_define ("__tune_pentium4__");			\
+       else if (TARGET_PENTIUM_M)				\
+ 	builtin_define ("__tune_pentium_m__");			\
        else if (TARGET_NOCONA)					\
  	builtin_define ("__tune_nocona__");			\
  								\
*************** extern int x86_prefetch_sse;
*** 677,682 ****
--- 680,690 ----
  	  builtin_define ("__pentium4");			\
  	  builtin_define ("__pentium4__");			\
  	}							\
+       else if (ix86_arch == PROCESSOR_PENTIUM_M)		\
+ 	{							\
+ 	  builtin_define ("__pentium_m");			\
+ 	  builtin_define ("__pentium_m__");			\
+ 	}							\
        else if (ix86_arch == PROCESSOR_NOCONA)			\
  	{							\
  	  builtin_define ("__nocona");				\
*************** enum processor_type
*** 2952,2957 ****
--- 2960,2966 ----
    PROCESSOR_ATHLON,
    PROCESSOR_PENTIUM4,
    PROCESSOR_K8,
+   PROCESSOR_PENTIUM_M,
    PROCESSOR_NOCONA,
    PROCESSOR_max
  };
Index: config/i386/i386.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.md,v
retrieving revision 1.523
diff -c -d -p -r1.523 i386.md
*** config/i386/i386.md	2 Apr 2004 15:05:55 -0000	1.523
--- config/i386/i386.md	2 Apr 2004 16:08:03 -0000
***************
*** 149,155 ****
  
  ;; Processor type.  This attribute must exactly match the processor_type
  ;; enumeration in i386.h.
! (define_attr "cpu" "i386,i486,pentium,pentiumpro,k6,athlon,pentium4,k8,nocona"
    (const (symbol_ref "ix86_tune")))
  
  ;; A basic instruction type.  Refinements due to arguments to be
--- 149,155 ----
  
  ;; Processor type.  This attribute must exactly match the processor_type
  ;; enumeration in i386.h.
! (define_attr "cpu" "i386,i486,pentium,pentiumpro,k6,athlon,pentium4,k8,pentium_m,nocona"
    (const (symbol_ref "ix86_tune")))
  
  ;; A basic instruction type.  Refinements due to arguments to be
Index: config/i386/i386.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.c,v
retrieving revision 1.661
diff -c -d -p -r1.661 i386.c
*** config/i386/i386.c	2 Apr 2004 15:05:53 -0000	1.661
--- config/i386/i386.c	2 Apr 2004 16:08:06 -0000
*************** struct processor_costs pentium4_cost = {
*** 458,463 ****
--- 458,507 ----
  };
  
  static const
+ struct processor_costs pentium_m_cost = {
+   1,					/* cost of an add instruction */
+   1,					/* cost of a lea instruction */
+   2,					/* variable shift costs */
+   2,					/* constant shift costs */
+   {4, 4, 4, 4, 4},			/* cost of starting a multiply */
+   0,					/* cost of multiply per each bit set */
+   {56, 56, 56, 56, 56},			/* cost of a divide/mod */
+   1,					/* cost of movsx */
+   1,					/* cost of movzx */
+   16,					/* "large" insn */
+   6,					/* MOVE_RATIO */
+   2,					/* cost for loading QImode using movzbl */
+   {3, 3, 3},				/* cost of loading integer registers
+ 					   in QImode, HImode and SImode.
+ 					   Relative to reg-reg move (2).  */
+   {3, 3, 3},				/* cost of storing integer registers */
+   2,					/* cost of reg,reg fld/fst */
+   {3, 3, 3},				/* cost of loading fp registers
+ 					   in SFmode, DFmode and XFmode */
+   {3, 3, 3},				/* cost of loading integer registers */
+   2,					/* cost of moving MMX register */
+   {3, 3},				/* cost of loading MMX registers
+ 					   in SImode and DImode */
+   {3, 3},				/* cost of storing MMX registers
+ 					   in SImode and DImode */
+   1,					/* cost of moving SSE register */
+   {3, 3, 3},				/* cost of loading SSE registers
+ 					   in SImode, DImode and TImode */
+   {3, 3, 3},				/* cost of storing SSE registers
+ 					   in SImode, DImode and TImode */
+   2,					/* MMX or SSE register to integer */
+   64,					/* size of prefetch block */
+   6,					/* number of parallel prefetches */
+   2,					/* Branch cost */
+   5,					/* cost of FADD and FSUB insns.  */
+   7,					/* cost of FMUL instruction.  */
+   43,					/* cost of FDIV instruction.  */
+   2,					/* cost of FABS instruction.  */
+   2,					/* cost of FCHS instruction.  */
+   43,					/* cost of FSQRT instruction.  */
+ };
+ 
+ static const
  struct processor_costs nocona_cost = {
    1,					/* cost of an add instruction */
    1,					/* cost of a lea instruction */
*************** const struct processor_costs *ix86_cost 
*** 513,532 ****
  #define m_PENT4  (1<<PROCESSOR_PENTIUM4)
  #define m_K8  (1<<PROCESSOR_K8)
  #define m_ATHLON_K8  (m_K8 | m_ATHLON)
  #define m_NOCONA  (1<<PROCESSOR_NOCONA)
  
  const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
! const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
  const int x86_zero_extend_with_and = m_486 | m_PENT;
! const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA /* m_386 | m_K6 */;
  const int x86_double_with_add = ~m_386;
  const int x86_use_bit_test = m_386;
  const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
! const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
  const int x86_3dnow_a = m_ATHLON_K8;
! const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
  const int x86_branch_hints = m_PENT4 | m_NOCONA;
! const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA;
  const int x86_partial_reg_stall = m_PPRO;
  const int x86_use_loop = m_K6;
  const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
--- 557,577 ----
  #define m_PENT4  (1<<PROCESSOR_PENTIUM4)
  #define m_K8  (1<<PROCESSOR_K8)
  #define m_ATHLON_K8  (m_K8 | m_ATHLON)
+ #define m_PENT_M  (1<<PROCESSOR_PENTIUM_M)
  #define m_NOCONA  (1<<PROCESSOR_NOCONA)
  
  const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
! const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_PENT_M | m_NOCONA;
  const int x86_zero_extend_with_and = m_486 | m_PENT;
! const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_PENT_M | m_NOCONA /* m_386 | m_K6 */;
  const int x86_double_with_add = ~m_386;
  const int x86_use_bit_test = m_386;
  const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
! const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_PENT_M | m_NOCONA;
  const int x86_3dnow_a = m_ATHLON_K8;
! const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_PENT_M | m_NOCONA;
  const int x86_branch_hints = m_PENT4 | m_NOCONA;
! const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_PENT_M | m_NOCONA;
  const int x86_partial_reg_stall = m_PPRO;
  const int x86_use_loop = m_K6;
  const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
*************** const int x86_read_modify = ~(m_PENT | m
*** 537,561 ****
  const int x86_split_long_moves = m_PPRO;
  const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
  const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
! const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
  const int x86_qimode_math = ~(0);
  const int x86_promote_qi_regs = 0;
  const int x86_himode_math = ~(m_PPRO);
  const int x86_promote_hi_regs = m_PPRO;
! const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA;
! const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA;
! const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA;
! const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA;
! const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO);
! const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
! const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
! const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO;
! const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
! const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
  const int x86_decompose_lea = m_PENT4 | m_NOCONA;
  const int x86_shift1 = ~m_486;
! const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
! const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO;
  /* Set for machines where the type and dependencies are resolved on SSE register
     parts instead of whole registers, so we may maintain just lower part of
     scalar values in proper format leaving the upper part undefined.  */
--- 582,606 ----
  const int x86_split_long_moves = m_PPRO;
  const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
  const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
! const int x86_single_stringop = m_386 | m_PENT4 | m_PENT_M | m_NOCONA;
  const int x86_qimode_math = ~(0);
  const int x86_promote_qi_regs = 0;
  const int x86_himode_math = ~(m_PPRO);
  const int x86_promote_hi_regs = m_PPRO;
! const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_PENT_M | m_NOCONA;
! const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_PENT_M | m_NOCONA;
! const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_PENT_M | m_NOCONA;
! const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_PENT_M | m_NOCONA;
! const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PENT_M | m_NOCONA | m_PPRO);
! const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_PENT_M | m_NOCONA;
! const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_PENT_M | m_NOCONA;
! const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PENT_M | m_NOCONA | m_PPRO;
! const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_PENT_M;
! const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_PENT_M;
  const int x86_decompose_lea = m_PENT4 | m_NOCONA;
  const int x86_shift1 = ~m_486;
! const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_PENT_M | m_NOCONA;
! const int x86_sse_partial_reg_dependency = m_PENT4 | m_PENT_M | m_NOCONA | m_PPRO;
  /* Set for machines where the type and dependencies are resolved on SSE register
     parts instead of whole registers, so we may maintain just lower part of
     scalar values in proper format leaving the upper part undefined.  */
*************** const int x86_sse_partial_regs = m_ATHLO
*** 564,574 ****
     need for extra instructions beforehand  */
  const int x86_sse_partial_regs_for_cvtsd2ss = 0;
  const int x86_sse_typeless_stores = m_ATHLON_K8;
! const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
  const int x86_use_ffreep = m_ATHLON_K8;
  const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
  const int x86_inter_unit_moves = ~(m_ATHLON_K8);
! const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO;
  /* Some CPU cores are not able to predict more than 4 branch instructions in
     the 16 byte window.  */
  const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
--- 609,619 ----
     need for extra instructions beforehand  */
  const int x86_sse_partial_regs_for_cvtsd2ss = 0;
  const int x86_sse_typeless_stores = m_ATHLON_K8;
! const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_PENT_M | m_NOCONA;
  const int x86_use_ffreep = m_ATHLON_K8;
  const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
  const int x86_inter_unit_moves = ~(m_ATHLON_K8);
! const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PENT_M | m_NOCONA | m_PPRO;
  /* Some CPU cores are not able to predict more than 4 branch instructions in
     the 16 byte window.  */
  const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
*************** override_options (void)
*** 1115,1120 ****
--- 1160,1166 ----
        {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
        {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
        {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
+       {&pentium_m_cost, 0, 0, 16, 15, 16, 15, 16},
        {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}
      };
  
*************** override_options (void)
*** 1151,1157 ****
        {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
        {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
        {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
!       {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
        {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
  				       | PTA_MMX | PTA_PREFETCH_SSE},
        {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
--- 1197,1203 ----
        {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
        {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
        {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
!       {"pentium-m", PROCESSOR_PENTIUM_M, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
        {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
  				       | PTA_MMX | PTA_PREFETCH_SSE},
        {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
*************** incdec_operand (rtx op, enum machine_mod
*** 3818,3824 ****
  {
    /* On Pentium4, the inc and dec operations causes extra dependency on flag
       registers, since carry flag is not set.  */
!   if ((TARGET_PENTIUM4 || TARGET_NOCONA) && !optimize_size)
      return 0;
    return op == const1_rtx || op == constm1_rtx;
  }
--- 3864,3870 ----
  {
    /* On Pentium4, the inc and dec operations causes extra dependency on flag
       registers, since carry flag is not set.  */
!   if ((TARGET_PENTIUM4 || TARGET_PENTIUM_M || TARGET_NOCONA) && !optimize_size)
      return 0;
    return op == const1_rtx || op == constm1_rtx;
  }
*************** ix86_issue_rate (void)
*** 12119,12124 ****
--- 12165,12171 ----
  
      case PROCESSOR_PENTIUMPRO:
      case PROCESSOR_PENTIUM4:
+     case PROCESSOR_PENTIUM_M:
      case PROCESSOR_ATHLON:
      case PROCESSOR_K8:
      case PROCESSOR_NOCONA:
*************** ia32_use_dfa_pipeline_interface (void)
*** 12368,12373 ****
--- 12415,12421 ----
  {
    if (TARGET_PENTIUM
        || TARGET_PENTIUMPRO
+       || TARGET_PENTIUM_M
        || TARGET_ATHLON_K8)
      return 1;
    return 0;
*************** ia32_multipass_dfa_lookahead (void)
*** 12383,12389 ****
    if (ix86_tune == PROCESSOR_PENTIUM)
      return 2;
  
!   if (ix86_tune == PROCESSOR_PENTIUMPRO)
      return 1;
  
    else
--- 12431,12437 ----
    if (ix86_tune == PROCESSOR_PENTIUM)
      return 2;
  
!   if (ix86_tune == PROCESSOR_PENTIUMPRO || ix86_tune == PROCESSOR_PENTIUM_M)
      return 1;
  
    else

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]