[4.0 and mainline] Fix multiplication by constant expansion

Jan Hubicka jh@suse.cz
Mon Jan 2 15:56:00 GMT 2006


Hi,
this is first part of patch as discussed.
Bootstrapped/regtested i686-pc-gnu-linux.

2006-01-02  Jan Hubicka  <jh@suse.cz>
	* i386.c (*_cost): Add COSTS_N_INSNS.
	(ix86_rtx_costs): Do not use COSTS_N_INSNS.
Index: config/i386/i386.c
===================================================================
*** config/i386/i386.c	(revision 108713)
--- config/i386/i386.c	(working copy)
*************** Boston, MA 02110-1301, USA.  */
*** 65,79 ****
  /* Processor costs (relative to an add) */
  static const
  struct processor_costs size_cost = {	/* costs for tunning for size */
!   2,					/* cost of an add instruction */
!   3,					/* cost of a lea instruction */
!   2,					/* variable shift costs */
!   3,					/* constant shift costs */
!   {3, 3, 3, 3, 5},			/* cost of starting a multiply */
    0,					/* cost of multiply per each bit set */
!   {3, 3, 3, 3, 5},			/* cost of a divide/mod */
!   3,					/* cost of movsx */
!   3,					/* cost of movzx */
    0,					/* "large" insn */
    2,					/* MOVE_RATIO */
    2,					/* cost for loading QImode using movzbl */
--- 65,87 ----
  /* Processor costs (relative to an add) */
  static const
  struct processor_costs size_cost = {	/* costs for tunning for size */
!   COSTS_N_INSNS (2),			/* cost of an add instruction */
!   COSTS_N_INSNS (3),			/* cost of a lea instruction */
!   COSTS_N_INSNS (2),			/* variable shift costs */
!   COSTS_N_INSNS (3),			/* constant shift costs */
!   {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
!    COSTS_N_INSNS (3),			/*                               HI */
!    COSTS_N_INSNS (3),			/*                               SI */
!    COSTS_N_INSNS (3),			/*                               DI */
!    COSTS_N_INSNS (5)},			/*                               other */
    0,					/* cost of multiply per each bit set */
!   {COSTS_N_INSNS (3),			/* cost of a divide/mod for QI */
!    COSTS_N_INSNS (3),			/*                          HI */
!    COSTS_N_INSNS (3),			/*                          SI */
!    COSTS_N_INSNS (3),			/*                          DI */
!    COSTS_N_INSNS (5)},			/*                          other */
!   COSTS_N_INSNS (3),			/* cost of movsx */
!   COSTS_N_INSNS (3),			/* cost of movzx */
    0,					/* "large" insn */
    2,					/* MOVE_RATIO */
    2,					/* cost for loading QImode using movzbl */
*************** struct processor_costs size_cost = {	/* 
*** 99,124 ****
    0,					/* size of prefetch block */
    0,					/* number of parallel prefetches */
    1,					/* Branch cost */
!   2,					/* cost of FADD and FSUB insns.  */
!   2,					/* cost of FMUL instruction.  */
!   2,					/* cost of FDIV instruction.  */
!   2,					/* cost of FABS instruction.  */
!   2,					/* cost of FCHS instruction.  */
!   2,					/* cost of FSQRT instruction.  */
  };
  
  /* Processor costs (relative to an add) */
  static const
  struct processor_costs i386_cost = {	/* 386 specific costs */
!   1,					/* cost of an add instruction */
!   1,					/* cost of a lea instruction */
!   3,					/* variable shift costs */
!   2,					/* constant shift costs */
!   {6, 6, 6, 6, 6},			/* cost of starting a multiply */
!   1,					/* cost of multiply per each bit set */
!   {23, 23, 23, 23, 23},			/* cost of a divide/mod */
!   3,					/* cost of movsx */
!   2,					/* cost of movzx */
    15,					/* "large" insn */
    3,					/* MOVE_RATIO */
    4,					/* cost for loading QImode using movzbl */
--- 107,140 ----
    0,					/* size of prefetch block */
    0,					/* number of parallel prefetches */
    1,					/* Branch cost */
!   COSTS_N_INSNS (2),			/* cost of FADD and FSUB insns.  */
!   COSTS_N_INSNS (2),			/* cost of FMUL instruction.  */
!   COSTS_N_INSNS (2),			/* cost of FDIV instruction.  */
!   COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
!   COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
!   COSTS_N_INSNS (2),			/* cost of FSQRT instruction.  */
  };
  
  /* Processor costs (relative to an add) */
  static const
  struct processor_costs i386_cost = {	/* 386 specific costs */
!   COSTS_N_INSNS (1),			/* cost of an add instruction */
!   COSTS_N_INSNS (1),			/* cost of a lea instruction */
!   COSTS_N_INSNS (3),			/* variable shift costs */
!   COSTS_N_INSNS (2),			/* constant shift costs */
!   {COSTS_N_INSNS (6),			/* cost of starting multiply for QI */
!    COSTS_N_INSNS (6),			/*                               HI */
!    COSTS_N_INSNS (6),			/*                               SI */
!    COSTS_N_INSNS (6),			/*                               DI */
!    COSTS_N_INSNS (6)},			/*                               other */
!   COSTS_N_INSNS (1),			/* cost of multiply per each bit set */
!   {COSTS_N_INSNS (23),			/* cost of a divide/mod for QI */
!    COSTS_N_INSNS (23),			/*                          HI */
!    COSTS_N_INSNS (23),			/*                          SI */
!    COSTS_N_INSNS (23),			/*                          DI */
!    COSTS_N_INSNS (23)},			/*                          other */
!   COSTS_N_INSNS (3),			/* cost of movsx */
!   COSTS_N_INSNS (2),			/* cost of movzx */
    15,					/* "large" insn */
    3,					/* MOVE_RATIO */
    4,					/* cost for loading QImode using movzbl */
*************** struct processor_costs i386_cost = {	/* 
*** 144,168 ****
    0,					/* size of prefetch block */
    0,					/* number of parallel prefetches */
    1,					/* Branch cost */
!   23,					/* cost of FADD and FSUB insns.  */
!   27,					/* cost of FMUL instruction.  */
!   88,					/* cost of FDIV instruction.  */
!   22,					/* cost of FABS instruction.  */
!   24,					/* cost of FCHS instruction.  */
!   122,					/* cost of FSQRT instruction.  */
  };
  
  static const
  struct processor_costs i486_cost = {	/* 486 specific costs */
!   1,					/* cost of an add instruction */
!   1,					/* cost of a lea instruction */
!   3,					/* variable shift costs */
!   2,					/* constant shift costs */
!   {12, 12, 12, 12, 12},			/* cost of starting a multiply */
    1,					/* cost of multiply per each bit set */
!   {40, 40, 40, 40, 40},			/* cost of a divide/mod */
!   3,					/* cost of movsx */
!   2,					/* cost of movzx */
    15,					/* "large" insn */
    3,					/* MOVE_RATIO */
    4,					/* cost for loading QImode using movzbl */
--- 160,192 ----
    0,					/* size of prefetch block */
    0,					/* number of parallel prefetches */
    1,					/* Branch cost */
!   COSTS_N_INSNS (23),			/* cost of FADD and FSUB insns.  */
!   COSTS_N_INSNS (27),			/* cost of FMUL instruction.  */
!   COSTS_N_INSNS (88),			/* cost of FDIV instruction.  */
!   COSTS_N_INSNS (22),			/* cost of FABS instruction.  */
!   COSTS_N_INSNS (24),			/* cost of FCHS instruction.  */
!   COSTS_N_INSNS (122),			/* cost of FSQRT instruction.  */
  };
  
  static const
  struct processor_costs i486_cost = {	/* 486 specific costs */
!   COSTS_N_INSNS (1),			/* cost of an add instruction */
!   COSTS_N_INSNS (1),			/* cost of a lea instruction */
!   COSTS_N_INSNS (3),			/* variable shift costs */
!   COSTS_N_INSNS (2),			/* constant shift costs */
!   {COSTS_N_INSNS (12),			/* cost of starting multiply for QI */
!    COSTS_N_INSNS (12),			/*                               HI */
!    COSTS_N_INSNS (12),			/*                               SI */
!    COSTS_N_INSNS (12),			/*                               DI */
!    COSTS_N_INSNS (12)},			/*                               other */
    1,					/* cost of multiply per each bit set */
!   {COSTS_N_INSNS (40),			/* cost of a divide/mod for QI */
!    COSTS_N_INSNS (40),			/*                          HI */
!    COSTS_N_INSNS (40),			/*                          SI */
!    COSTS_N_INSNS (40),			/*                          DI */
!    COSTS_N_INSNS (40)},			/*                          other */
!   COSTS_N_INSNS (3),			/* cost of movsx */
!   COSTS_N_INSNS (2),			/* cost of movzx */
    15,					/* "large" insn */
    3,					/* MOVE_RATIO */
    4,					/* cost for loading QImode using movzbl */
*************** struct processor_costs i486_cost = {	/* 
*** 188,212 ****
    0,					/* size of prefetch block */
    0,					/* number of parallel prefetches */
    1,					/* Branch cost */
!   8,					/* cost of FADD and FSUB insns.  */
!   16,					/* cost of FMUL instruction.  */
!   73,					/* cost of FDIV instruction.  */
!   3,					/* cost of FABS instruction.  */
!   3,					/* cost of FCHS instruction.  */
!   83,					/* cost of FSQRT instruction.  */
  };
  
  static const
  struct processor_costs pentium_cost = {
!   1,					/* cost of an add instruction */
!   1,					/* cost of a lea instruction */
!   4,					/* variable shift costs */
!   1,					/* constant shift costs */
!   {11, 11, 11, 11, 11},			/* cost of starting a multiply */
    0,					/* cost of multiply per each bit set */
!   {25, 25, 25, 25, 25},			/* cost of a divide/mod */
!   3,					/* cost of movsx */
!   2,					/* cost of movzx */
    8,					/* "large" insn */
    6,					/* MOVE_RATIO */
    6,					/* cost for loading QImode using movzbl */
--- 212,244 ----
    0,					/* size of prefetch block */
    0,					/* number of parallel prefetches */
    1,					/* Branch cost */
!   COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
!   COSTS_N_INSNS (16),			/* cost of FMUL instruction.  */
!   COSTS_N_INSNS (73),			/* cost of FDIV instruction.  */
!   COSTS_N_INSNS (3),			/* cost of FABS instruction.  */
!   COSTS_N_INSNS (3),			/* cost of FCHS instruction.  */
!   COSTS_N_INSNS (83),			/* cost of FSQRT instruction.  */
  };
  
  static const
  struct processor_costs pentium_cost = {
!   COSTS_N_INSNS (1),			/* cost of an add instruction */
!   COSTS_N_INSNS (1),			/* cost of a lea instruction */
!   COSTS_N_INSNS (4),			/* variable shift costs */
!   COSTS_N_INSNS (1),			/* constant shift costs */
!   {COSTS_N_INSNS (11),			/* cost of starting multiply for QI */
!    COSTS_N_INSNS (11),			/*                               HI */
!    COSTS_N_INSNS (11),			/*                               SI */
!    COSTS_N_INSNS (11),			/*                               DI */
!    COSTS_N_INSNS (11)},			/*                               other */
    0,					/* cost of multiply per each bit set */
!   {COSTS_N_INSNS (25),			/* cost of a divide/mod for QI */
!    COSTS_N_INSNS (25),			/*                          HI */
!    COSTS_N_INSNS (25),			/*                          SI */
!    COSTS_N_INSNS (25),			/*                          DI */
!    COSTS_N_INSNS (25)},			/*                          other */
!   COSTS_N_INSNS (3),			/* cost of movsx */
!   COSTS_N_INSNS (2),			/* cost of movzx */
    8,					/* "large" insn */
    6,					/* MOVE_RATIO */
    6,					/* cost for loading QImode using movzbl */
*************** struct processor_costs pentium_cost = {
*** 232,256 ****
    0,					/* size of prefetch block */
    0,					/* number of parallel prefetches */
    2,					/* Branch cost */
!   3,					/* cost of FADD and FSUB insns.  */
!   3,					/* cost of FMUL instruction.  */
!   39,					/* cost of FDIV instruction.  */
!   1,					/* cost of FABS instruction.  */
!   1,					/* cost of FCHS instruction.  */
!   70,					/* cost of FSQRT instruction.  */
  };
  
  static const
  struct processor_costs pentiumpro_cost = {
!   1,					/* cost of an add instruction */
!   1,					/* cost of a lea instruction */
!   1,					/* variable shift costs */
!   1,					/* constant shift costs */
!   {4, 4, 4, 4, 4},			/* cost of starting a multiply */
    0,					/* cost of multiply per each bit set */
!   {17, 17, 17, 17, 17},			/* cost of a divide/mod */
!   1,					/* cost of movsx */
!   1,					/* cost of movzx */
    8,					/* "large" insn */
    6,					/* MOVE_RATIO */
    2,					/* cost for loading QImode using movzbl */
--- 264,296 ----
    0,					/* size of prefetch block */
    0,					/* number of parallel prefetches */
    2,					/* Branch cost */
!   COSTS_N_INSNS (3),			/* cost of FADD and FSUB insns.  */
!   COSTS_N_INSNS (3),			/* cost of FMUL instruction.  */
!   COSTS_N_INSNS (39),			/* cost of FDIV instruction.  */
!   COSTS_N_INSNS (1),			/* cost of FABS instruction.  */
!   COSTS_N_INSNS (1),			/* cost of FCHS instruction.  */
!   COSTS_N_INSNS (70),			/* cost of FSQRT instruction.  */
  };
  
  static const
  struct processor_costs pentiumpro_cost = {
!   COSTS_N_INSNS (1),			/* cost of an add instruction */
!   COSTS_N_INSNS (1),			/* cost of a lea instruction */
!   COSTS_N_INSNS (1),			/* variable shift costs */
!   COSTS_N_INSNS (1),			/* constant shift costs */
!   {COSTS_N_INSNS (4),			/* cost of starting multiply for QI */
!    COSTS_N_INSNS (4),			/*                               HI */
!    COSTS_N_INSNS (4),			/*                               SI */
!    COSTS_N_INSNS (4),			/*                               DI */
!    COSTS_N_INSNS (4)},			/*                               other */
    0,					/* cost of multiply per each bit set */
!   {COSTS_N_INSNS (17),			/* cost of a divide/mod for QI */
!    COSTS_N_INSNS (17),			/*                          HI */
!    COSTS_N_INSNS (17),			/*                          SI */
!    COSTS_N_INSNS (17),			/*                          DI */
!    COSTS_N_INSNS (17)},			/*                          other */
!   COSTS_N_INSNS (1),			/* cost of movsx */
!   COSTS_N_INSNS (1),			/* cost of movzx */
    8,					/* "large" insn */
    6,					/* MOVE_RATIO */
    2,					/* cost for loading QImode using movzbl */
*************** struct processor_costs pentiumpro_cost =
*** 276,300 ****
    32,					/* size of prefetch block */
    6,					/* number of parallel prefetches */
    2,					/* Branch cost */
!   3,					/* cost of FADD and FSUB insns.  */
!   5,					/* cost of FMUL instruction.  */
!   56,					/* cost of FDIV instruction.  */
!   2,					/* cost of FABS instruction.  */
!   2,					/* cost of FCHS instruction.  */
!   56,					/* cost of FSQRT instruction.  */
  };
  
  static const
  struct processor_costs k6_cost = {
!   1,					/* cost of an add instruction */
!   2,					/* cost of a lea instruction */
!   1,					/* variable shift costs */
!   1,					/* constant shift costs */
!   {3, 3, 3, 3, 3},			/* cost of starting a multiply */
    0,					/* cost of multiply per each bit set */
!   {18, 18, 18, 18, 18},			/* cost of a divide/mod */
!   2,					/* cost of movsx */
!   2,					/* cost of movzx */
    8,					/* "large" insn */
    4,					/* MOVE_RATIO */
    3,					/* cost for loading QImode using movzbl */
--- 316,348 ----
    32,					/* size of prefetch block */
    6,					/* number of parallel prefetches */
    2,					/* Branch cost */
!   COSTS_N_INSNS (3),			/* cost of FADD and FSUB insns.  */
!   COSTS_N_INSNS (5),			/* cost of FMUL instruction.  */
!   COSTS_N_INSNS (56),			/* cost of FDIV instruction.  */
!   COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
!   COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
!   COSTS_N_INSNS (56),			/* cost of FSQRT instruction.  */
  };
  
  static const
  struct processor_costs k6_cost = {
!   COSTS_N_INSNS (1),			/* cost of an add instruction */
!   COSTS_N_INSNS (2),			/* cost of a lea instruction */
!   COSTS_N_INSNS (1),			/* variable shift costs */
!   COSTS_N_INSNS (1),			/* constant shift costs */
!   {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
!    COSTS_N_INSNS (3),			/*                               HI */
!    COSTS_N_INSNS (3),			/*                               SI */
!    COSTS_N_INSNS (3),			/*                               DI */
!    COSTS_N_INSNS (3)},			/*                               other */
    0,					/* cost of multiply per each bit set */
!   {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
!    COSTS_N_INSNS (18),			/*                          HI */
!    COSTS_N_INSNS (18),			/*                          SI */
!    COSTS_N_INSNS (18),			/*                          DI */
!    COSTS_N_INSNS (18)},			/*                          other */
!   COSTS_N_INSNS (2),			/* cost of movsx */
!   COSTS_N_INSNS (2),			/* cost of movzx */
    8,					/* "large" insn */
    4,					/* MOVE_RATIO */
    3,					/* cost for loading QImode using movzbl */
*************** struct processor_costs k6_cost = {
*** 320,344 ****
    32,					/* size of prefetch block */
    1,					/* number of parallel prefetches */
    1,					/* Branch cost */
!   2,					/* cost of FADD and FSUB insns.  */
!   2,					/* cost of FMUL instruction.  */
!   56,					/* cost of FDIV instruction.  */
!   2,					/* cost of FABS instruction.  */
!   2,					/* cost of FCHS instruction.  */
!   56,					/* cost of FSQRT instruction.  */
  };
  
  static const
  struct processor_costs athlon_cost = {
!   1,					/* cost of an add instruction */
!   2,					/* cost of a lea instruction */
!   1,					/* variable shift costs */
!   1,					/* constant shift costs */
!   {5, 5, 5, 5, 5},			/* cost of starting a multiply */
    0,					/* cost of multiply per each bit set */
!   {18, 26, 42, 74, 74},			/* cost of a divide/mod */
!   1,					/* cost of movsx */
!   1,					/* cost of movzx */
    8,					/* "large" insn */
    9,					/* MOVE_RATIO */
    4,					/* cost for loading QImode using movzbl */
--- 368,400 ----
    32,					/* size of prefetch block */
    1,					/* number of parallel prefetches */
    1,					/* Branch cost */
!   COSTS_N_INSNS (2),			/* cost of FADD and FSUB insns.  */
!   COSTS_N_INSNS (2),			/* cost of FMUL instruction.  */
!   COSTS_N_INSNS (56),			/* cost of FDIV instruction.  */
!   COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
!   COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
!   COSTS_N_INSNS (56),			/* cost of FSQRT instruction.  */
  };
  
  static const
  struct processor_costs athlon_cost = {
!   COSTS_N_INSNS (1),			/* cost of an add instruction */
!   COSTS_N_INSNS (2),			/* cost of a lea instruction */
!   COSTS_N_INSNS (1),			/* variable shift costs */
!   COSTS_N_INSNS (1),			/* constant shift costs */
!   {COSTS_N_INSNS (5),			/* cost of starting multiply for QI */
!    COSTS_N_INSNS (5),			/*                               HI */
!    COSTS_N_INSNS (5),			/*                               SI */
!    COSTS_N_INSNS (5),			/*                               DI */
!    COSTS_N_INSNS (5)},			/*                               other */
    0,					/* cost of multiply per each bit set */
!   {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
!    COSTS_N_INSNS (26),			/*                          HI */
!    COSTS_N_INSNS (42),			/*                          SI */
!    COSTS_N_INSNS (74),			/*                          DI */
!    COSTS_N_INSNS (74)},			/*                          other */
!   COSTS_N_INSNS (1),			/* cost of movsx */
!   COSTS_N_INSNS (1),			/* cost of movzx */
    8,					/* "large" insn */
    9,					/* MOVE_RATIO */
    4,					/* cost for loading QImode using movzbl */
*************** struct processor_costs athlon_cost = {
*** 364,388 ****
    64,					/* size of prefetch block */
    6,					/* number of parallel prefetches */
    5,					/* Branch cost */
!   4,					/* cost of FADD and FSUB insns.  */
!   4,					/* cost of FMUL instruction.  */
!   24,					/* cost of FDIV instruction.  */
!   2,					/* cost of FABS instruction.  */
!   2,					/* cost of FCHS instruction.  */
!   35,					/* cost of FSQRT instruction.  */
  };
  
  static const
  struct processor_costs k8_cost = {
!   1,					/* cost of an add instruction */
!   2,					/* cost of a lea instruction */
!   1,					/* variable shift costs */
!   1,					/* constant shift costs */
!   {3, 4, 3, 4, 5},			/* cost of starting a multiply */
    0,					/* cost of multiply per each bit set */
!   {18, 26, 42, 74, 74},			/* cost of a divide/mod */
!   1,					/* cost of movsx */
!   1,					/* cost of movzx */
    8,					/* "large" insn */
    9,					/* MOVE_RATIO */
    4,					/* cost for loading QImode using movzbl */
--- 420,452 ----
    64,					/* size of prefetch block */
    6,					/* number of parallel prefetches */
    5,					/* Branch cost */
!   COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
!   COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
!   COSTS_N_INSNS (24),			/* cost of FDIV instruction.  */
!   COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
!   COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
!   COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
  };
  
  static const
  struct processor_costs k8_cost = {
!   COSTS_N_INSNS (1),			/* cost of an add instruction */
!   COSTS_N_INSNS (2),			/* cost of a lea instruction */
!   COSTS_N_INSNS (1),			/* variable shift costs */
!   COSTS_N_INSNS (1),			/* constant shift costs */
!   {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
!    COSTS_N_INSNS (4),			/*                               HI */
!    COSTS_N_INSNS (3),			/*                               SI */
!    COSTS_N_INSNS (4),			/*                               DI */
!    COSTS_N_INSNS (5)},			/*                               other */
    0,					/* cost of multiply per each bit set */
!   {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
!    COSTS_N_INSNS (26),			/*                          HI */
!    COSTS_N_INSNS (42),			/*                          SI */
!    COSTS_N_INSNS (74),			/*                          DI */
!    COSTS_N_INSNS (74)},			/*                          other */
!   COSTS_N_INSNS (1),			/* cost of movsx */
!   COSTS_N_INSNS (1),			/* cost of movzx */
    8,					/* "large" insn */
    9,					/* MOVE_RATIO */
    4,					/* cost for loading QImode using movzbl */
*************** struct processor_costs k8_cost = {
*** 408,432 ****
    64,					/* size of prefetch block */
    6,					/* number of parallel prefetches */
    5,					/* Branch cost */
!   4,					/* cost of FADD and FSUB insns.  */
!   4,					/* cost of FMUL instruction.  */
!   19,					/* cost of FDIV instruction.  */
!   2,					/* cost of FABS instruction.  */
!   2,					/* cost of FCHS instruction.  */
!   35,					/* cost of FSQRT instruction.  */
  };
  
  static const
  struct processor_costs pentium4_cost = {
!   1,					/* cost of an add instruction */
!   3,					/* cost of a lea instruction */
!   4,					/* variable shift costs */
!   4,					/* constant shift costs */
!   {15, 15, 15, 15, 15},			/* cost of starting a multiply */
    0,					/* cost of multiply per each bit set */
!   {56, 56, 56, 56, 56},			/* cost of a divide/mod */
!   1,					/* cost of movsx */
!   1,					/* cost of movzx */
    16,					/* "large" insn */
    6,					/* MOVE_RATIO */
    2,					/* cost for loading QImode using movzbl */
--- 472,504 ----
    64,					/* size of prefetch block */
    6,					/* number of parallel prefetches */
    5,					/* Branch cost */
!   COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
!   COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
!   COSTS_N_INSNS (19),			/* cost of FDIV instruction.  */
!   COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
!   COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
!   COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
  };
  
  static const
  struct processor_costs pentium4_cost = {
!   COSTS_N_INSNS (1),			/* cost of an add instruction */
!   COSTS_N_INSNS (3),			/* cost of a lea instruction */
!   COSTS_N_INSNS (4),			/* variable shift costs */
!   COSTS_N_INSNS (4),			/* constant shift costs */
!   {COSTS_N_INSNS (15),			/* cost of starting multiply for QI */
!    COSTS_N_INSNS (15),			/*                               HI */
!    COSTS_N_INSNS (15),			/*                               SI */
!    COSTS_N_INSNS (15),			/*                               DI */
!    COSTS_N_INSNS (15)},			/*                               other */
    0,					/* cost of multiply per each bit set */
!   {COSTS_N_INSNS (56),			/* cost of a divide/mod for QI */
!    COSTS_N_INSNS (56),			/*                          HI */
!    COSTS_N_INSNS (56),			/*                          SI */
!    COSTS_N_INSNS (56),			/*                          DI */
!    COSTS_N_INSNS (56)},			/*                          other */
!   COSTS_N_INSNS (1),			/* cost of movsx */
!   COSTS_N_INSNS (1),			/* cost of movzx */
    16,					/* "large" insn */
    6,					/* MOVE_RATIO */
    2,					/* cost for loading QImode using movzbl */
*************** struct processor_costs pentium4_cost = {
*** 452,476 ****
    64,					/* size of prefetch block */
    6,					/* number of parallel prefetches */
    2,					/* Branch cost */
!   5,					/* cost of FADD and FSUB insns.  */
!   7,					/* cost of FMUL instruction.  */
!   43,					/* cost of FDIV instruction.  */
!   2,					/* cost of FABS instruction.  */
!   2,					/* cost of FCHS instruction.  */
!   43,					/* cost of FSQRT instruction.  */
  };
  
  static const
  struct processor_costs nocona_cost = {
!   1,					/* cost of an add instruction */
!   1,					/* cost of a lea instruction */
!   1,					/* variable shift costs */
!   1,					/* constant shift costs */
!   {10, 10, 10, 10, 10},			/* cost of starting a multiply */
    0,					/* cost of multiply per each bit set */
!   {66, 66, 66, 66, 66},			/* cost of a divide/mod */
!   1,					/* cost of movsx */
!   1,					/* cost of movzx */
    16,					/* "large" insn */
    17,					/* MOVE_RATIO */
    4,					/* cost for loading QImode using movzbl */
--- 524,556 ----
    64,					/* size of prefetch block */
    6,					/* number of parallel prefetches */
    2,					/* Branch cost */
!   COSTS_N_INSNS (5),			/* cost of FADD and FSUB insns.  */
!   COSTS_N_INSNS (7),			/* cost of FMUL instruction.  */
!   COSTS_N_INSNS (43),			/* cost of FDIV instruction.  */
!   COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
!   COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
!   COSTS_N_INSNS (43),			/* cost of FSQRT instruction.  */
  };
  
  static const
  struct processor_costs nocona_cost = {
!   COSTS_N_INSNS (1),			/* cost of an add instruction */
!   COSTS_N_INSNS (1),			/* cost of a lea instruction */
!   COSTS_N_INSNS (1),			/* variable shift costs */
!   COSTS_N_INSNS (1),			/* constant shift costs */
!   {COSTS_N_INSNS (10),			/* cost of starting multiply for QI */
!    COSTS_N_INSNS (10),			/*                               HI */
!    COSTS_N_INSNS (10),			/*                               SI */
!    COSTS_N_INSNS (10),			/*                               DI */
!    COSTS_N_INSNS (10)},			/*                               other */
    0,					/* cost of multiply per each bit set */
!   {COSTS_N_INSNS (66),			/* cost of a divide/mod for QI */
!    COSTS_N_INSNS (66),			/*                          HI */
!    COSTS_N_INSNS (66),			/*                          SI */
!    COSTS_N_INSNS (66),			/*                          DI */
!    COSTS_N_INSNS (66)},			/*                          other */
!   COSTS_N_INSNS (1),			/* cost of movsx */
!   COSTS_N_INSNS (1),			/* cost of movzx */
    16,					/* "large" insn */
    17,					/* MOVE_RATIO */
    4,					/* cost for loading QImode using movzbl */
*************** struct processor_costs nocona_cost = {
*** 496,507 ****
    128,					/* size of prefetch block */
    8,					/* number of parallel prefetches */
    1,					/* Branch cost */
!   6,					/* cost of FADD and FSUB insns.  */
!   8,					/* cost of FMUL instruction.  */
!   40,					/* cost of FDIV instruction.  */
!   3,					/* cost of FABS instruction.  */
!   3,					/* cost of FCHS instruction.  */
!   44,					/* cost of FSQRT instruction.  */
  };
  
  const struct processor_costs *ix86_cost = &pentium_cost;
--- 576,587 ----
    128,					/* size of prefetch block */
    8,					/* number of parallel prefetches */
    1,					/* Branch cost */
!   COSTS_N_INSNS (6),			/* cost of FADD and FSUB insns.  */
!   COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
!   COSTS_N_INSNS (40),			/* cost of FDIV instruction.  */
!   COSTS_N_INSNS (3),			/* cost of FABS instruction.  */
!   COSTS_N_INSNS (3),			/* cost of FCHS instruction.  */
!   COSTS_N_INSNS (44),			/* cost of FSQRT instruction.  */
  };
  
  const struct processor_costs *ix86_cost = &pentium_cost;
*************** ix86_rtx_costs (rtx x, int code, int out
*** 16315,16327 ****
  	  && GET_MODE (XEXP (x, 0)) == SImode)
  	*total = 1;
        else if (TARGET_ZERO_EXTEND_WITH_AND)
! 	*total = COSTS_N_INSNS (ix86_cost->add);
        else
! 	*total = COSTS_N_INSNS (ix86_cost->movzx);
        return false;
  
      case SIGN_EXTEND:
!       *total = COSTS_N_INSNS (ix86_cost->movsx);
        return false;
  
      case ASHIFT:
--- 16397,16409 ----
  	  && GET_MODE (XEXP (x, 0)) == SImode)
  	*total = 1;
        else if (TARGET_ZERO_EXTEND_WITH_AND)
! 	*total = ix86_cost->add;
        else
! 	*total = ix86_cost->movzx;
        return false;
  
      case SIGN_EXTEND:
!       *total = ix86_cost->movsx;
        return false;
  
      case ASHIFT:
*************** ix86_rtx_costs (rtx x, int code, int out
*** 16331,16343 ****
  	  HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
  	  if (value == 1)
  	    {
! 	      *total = COSTS_N_INSNS (ix86_cost->add);
  	      return false;
  	    }
  	  if ((value == 2 || value == 3)
  	      && ix86_cost->lea <= ix86_cost->shift_const)
  	    {
! 	      *total = COSTS_N_INSNS (ix86_cost->lea);
  	      return false;
  	    }
  	}
--- 16413,16426 ----
  	  HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
  	  if (value == 1)
  	    {
! 	      *total = ix86_cost->add;
  	      return false;
  	    }
  	  if ((value == 2 || value == 3)
+ 	      && !TARGET_DECOMPOSE_LEA
  	      && ix86_cost->lea <= ix86_cost->shift_const)
  	    {
! 	      *total = ix86_cost->lea;
  	      return false;
  	    }
  	}
*************** ix86_rtx_costs (rtx x, int code, int out
*** 16352,16382 ****
  	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
  	    {
  	      if (INTVAL (XEXP (x, 1)) > 32)
! 		*total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
  	      else
! 		*total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
  	    }
  	  else
  	    {
  	      if (GET_CODE (XEXP (x, 1)) == AND)
! 		*total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
  	      else
! 		*total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
  	    }
  	}
        else
  	{
  	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
! 	    *total = COSTS_N_INSNS (ix86_cost->shift_const);
  	  else
! 	    *total = COSTS_N_INSNS (ix86_cost->shift_var);
  	}
        return false;
  
      case MULT:
        if (FLOAT_MODE_P (mode))
  	{
! 	  *total = COSTS_N_INSNS (ix86_cost->fmul);
  	  return false;
  	}
        else
--- 16435,16465 ----
  	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
  	    {
  	      if (INTVAL (XEXP (x, 1)) > 32)
! 		*total = ix86_cost->shift_const + COSTS_N_INSNS (2);
  	      else
! 		*total = ix86_cost->shift_const * 2;
  	    }
  	  else
  	    {
  	      if (GET_CODE (XEXP (x, 1)) == AND)
! 		*total = ix86_cost->shift_var * 2;
  	      else
! 		*total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
  	    }
  	}
        else
  	{
  	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
! 	    *total = ix86_cost->shift_const;
  	  else
! 	    *total = ix86_cost->shift_var;
  	}
        return false;
  
      case MULT:
        if (FLOAT_MODE_P (mode))
  	{
! 	  *total = ix86_cost->fmul;
  	  return false;
  	}
        else
*************** ix86_rtx_costs (rtx x, int code, int out
*** 16417,16425 ****
  	        op0 = XEXP (op0, 0), mode = GET_MODE (op0);
  	    }
  
!   	  *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
! 			          + nbits * ix86_cost->mult_bit)
! 	           + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code);
  
            return true;
  	}
--- 16500,16508 ----
  	        op0 = XEXP (op0, 0), mode = GET_MODE (op0);
  	    }
  
!   	  *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
! 		    + nbits * ix86_cost->mult_bit
! 	            + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
  
            return true;
  	}
*************** ix86_rtx_costs (rtx x, int code, int out
*** 16429,16442 ****
      case MOD:
      case UMOD:
        if (FLOAT_MODE_P (mode))
! 	*total = COSTS_N_INSNS (ix86_cost->fdiv);
        else
! 	*total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
        return false;
  
      case PLUS:
        if (FLOAT_MODE_P (mode))
! 	*total = COSTS_N_INSNS (ix86_cost->fadd);
        else if (GET_MODE_CLASS (mode) == MODE_INT
  	       && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
  	{
--- 16512,16525 ----
      case MOD:
      case UMOD:
        if (FLOAT_MODE_P (mode))
! 	*total = ix86_cost->fdiv;
        else
! 	*total = ix86_cost->divide[MODE_INDEX (mode)];
        return false;
  
      case PLUS:
        if (FLOAT_MODE_P (mode))
! 	*total = ix86_cost->fadd;
        else if (GET_MODE_CLASS (mode) == MODE_INT
  	       && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
  	{
*************** ix86_rtx_costs (rtx x, int code, int out
*** 16448,16454 ****
  	      HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
  	      if (val == 2 || val == 4 || val == 8)
  		{
! 		  *total = COSTS_N_INSNS (ix86_cost->lea);
  		  *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
  		  *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
  				      outer_code);
--- 16531,16537 ----
  	      HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
  	      if (val == 2 || val == 4 || val == 8)
  		{
! 		  *total = ix86_cost->lea;
  		  *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
  		  *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
  				      outer_code);
*************** ix86_rtx_costs (rtx x, int code, int out
*** 16462,16468 ****
  	      HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
  	      if (val == 2 || val == 4 || val == 8)
  		{
! 		  *total = COSTS_N_INSNS (ix86_cost->lea);
  		  *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
  		  *total += rtx_cost (XEXP (x, 1), outer_code);
  		  return true;
--- 16545,16551 ----
  	      HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
  	      if (val == 2 || val == 4 || val == 8)
  		{
! 		  *total = ix86_cost->lea;
  		  *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
  		  *total += rtx_cost (XEXP (x, 1), outer_code);
  		  return true;
*************** ix86_rtx_costs (rtx x, int code, int out
*** 16470,16476 ****
  	    }
  	  else if (GET_CODE (XEXP (x, 0)) == PLUS)
  	    {
! 	      *total = COSTS_N_INSNS (ix86_cost->lea);
  	      *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
  	      *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
  	      *total += rtx_cost (XEXP (x, 1), outer_code);
--- 16553,16559 ----
  	    }
  	  else if (GET_CODE (XEXP (x, 0)) == PLUS)
  	    {
! 	      *total = ix86_cost->lea;
  	      *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
  	      *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
  	      *total += rtx_cost (XEXP (x, 1), outer_code);
*************** ix86_rtx_costs (rtx x, int code, int out
*** 16482,16488 ****
      case MINUS:
        if (FLOAT_MODE_P (mode))
  	{
! 	  *total = COSTS_N_INSNS (ix86_cost->fadd);
  	  return false;
  	}
        /* FALLTHRU */
--- 16565,16571 ----
      case MINUS:
        if (FLOAT_MODE_P (mode))
  	{
! 	  *total = ix86_cost->fadd;
  	  return false;
  	}
        /* FALLTHRU */
*************** ix86_rtx_costs (rtx x, int code, int out
*** 16492,16498 ****
      case XOR:
        if (!TARGET_64BIT && mode == DImode)
  	{
! 	  *total = (COSTS_N_INSNS (ix86_cost->add) * 2
  		    + (rtx_cost (XEXP (x, 0), outer_code)
  		       << (GET_MODE (XEXP (x, 0)) != DImode))
  		    + (rtx_cost (XEXP (x, 1), outer_code)
--- 16575,16581 ----
      case XOR:
        if (!TARGET_64BIT && mode == DImode)
  	{
! 	  *total = (ix86_cost->add * 2
  		    + (rtx_cost (XEXP (x, 0), outer_code)
  		       << (GET_MODE (XEXP (x, 0)) != DImode))
  		    + (rtx_cost (XEXP (x, 1), outer_code)
*************** ix86_rtx_costs (rtx x, int code, int out
*** 16504,16519 ****
      case NEG:
        if (FLOAT_MODE_P (mode))
  	{
! 	  *total = COSTS_N_INSNS (ix86_cost->fchs);
  	  return false;
  	}
        /* FALLTHRU */
  
      case NOT:
        if (!TARGET_64BIT && mode == DImode)
! 	*total = COSTS_N_INSNS (ix86_cost->add * 2);
        else
! 	*total = COSTS_N_INSNS (ix86_cost->add);
        return false;
  
      case COMPARE:
--- 16587,16602 ----
      case NEG:
        if (FLOAT_MODE_P (mode))
  	{
! 	  *total = ix86_cost->fchs;
  	  return false;
  	}
        /* FALLTHRU */
  
      case NOT:
        if (!TARGET_64BIT && mode == DImode)
! 	*total = ix86_cost->add * 2;
        else
! 	*total = ix86_cost->add;
        return false;
  
      case COMPARE:
*************** ix86_rtx_costs (rtx x, int code, int out
*** 16524,16530 ****
  	{
  	  /* This kind of construct is implemented using test[bwl].
  	     Treat it as if we had an AND.  */
! 	  *total = (COSTS_N_INSNS (ix86_cost->add)
  		    + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
  		    + rtx_cost (const1_rtx, outer_code));
  	  return true;
--- 16607,16613 ----
  	{
  	  /* This kind of construct is implemented using test[bwl].
  	     Treat it as if we had an AND.  */
! 	  *total = (ix86_cost->add
  		    + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
  		    + rtx_cost (const1_rtx, outer_code));
  	  return true;
*************** ix86_rtx_costs (rtx x, int code, int out
*** 16540,16551 ****
  
      case ABS:
        if (FLOAT_MODE_P (mode))
! 	*total = COSTS_N_INSNS (ix86_cost->fabs);
        return false;
  
      case SQRT:
        if (FLOAT_MODE_P (mode))
! 	*total = COSTS_N_INSNS (ix86_cost->fsqrt);
        return false;
  
      case UNSPEC:
--- 16623,16634 ----
  
      case ABS:
        if (FLOAT_MODE_P (mode))
! 	*total = ix86_cost->fabs;
        return false;
  
      case SQRT:
        if (FLOAT_MODE_P (mode))
! 	*total = ix86_cost->fsqrt;
        return false;
  
      case UNSPEC:



More information about the Gcc-patches mailing list