This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [4.0 and mainline] Fix multiplication by constant expansion
Hi,
this is first part of patch as discussed.
Bootstrapped/regtested i686-pc-gnu-linux.
2006-01-02 Jan Hubicka <jh@suse.cz>
* i386.c (*_cost): Add COSTS_N_INSNS.
(ix86_rtx_costs): Do not use COSTS_N_INSNS.
Index: config/i386/i386.c
===================================================================
*** config/i386/i386.c (revision 108713)
--- config/i386/i386.c (working copy)
*************** Boston, MA 02110-1301, USA. */
*** 65,79 ****
/* Processor costs (relative to an add) */
static const
struct processor_costs size_cost = { /* costs for tunning for size */
! 2, /* cost of an add instruction */
! 3, /* cost of a lea instruction */
! 2, /* variable shift costs */
! 3, /* constant shift costs */
! {3, 3, 3, 3, 5}, /* cost of starting a multiply */
0, /* cost of multiply per each bit set */
! {3, 3, 3, 3, 5}, /* cost of a divide/mod */
! 3, /* cost of movsx */
! 3, /* cost of movzx */
0, /* "large" insn */
2, /* MOVE_RATIO */
2, /* cost for loading QImode using movzbl */
--- 65,87 ----
/* Processor costs (relative to an add) */
static const
struct processor_costs size_cost = { /* costs for tunning for size */
! COSTS_N_INSNS (2), /* cost of an add instruction */
! COSTS_N_INSNS (3), /* cost of a lea instruction */
! COSTS_N_INSNS (2), /* variable shift costs */
! COSTS_N_INSNS (3), /* constant shift costs */
! {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
! COSTS_N_INSNS (3), /* HI */
! COSTS_N_INSNS (3), /* SI */
! COSTS_N_INSNS (3), /* DI */
! COSTS_N_INSNS (5)}, /* other */
0, /* cost of multiply per each bit set */
! {COSTS_N_INSNS (3), /* cost of a divide/mod for QI */
! COSTS_N_INSNS (3), /* HI */
! COSTS_N_INSNS (3), /* SI */
! COSTS_N_INSNS (3), /* DI */
! COSTS_N_INSNS (5)}, /* other */
! COSTS_N_INSNS (3), /* cost of movsx */
! COSTS_N_INSNS (3), /* cost of movzx */
0, /* "large" insn */
2, /* MOVE_RATIO */
2, /* cost for loading QImode using movzbl */
*************** struct processor_costs size_cost = { /*
*** 99,124 ****
0, /* size of prefetch block */
0, /* number of parallel prefetches */
1, /* Branch cost */
! 2, /* cost of FADD and FSUB insns. */
! 2, /* cost of FMUL instruction. */
! 2, /* cost of FDIV instruction. */
! 2, /* cost of FABS instruction. */
! 2, /* cost of FCHS instruction. */
! 2, /* cost of FSQRT instruction. */
};
/* Processor costs (relative to an add) */
static const
struct processor_costs i386_cost = { /* 386 specific costs */
! 1, /* cost of an add instruction */
! 1, /* cost of a lea instruction */
! 3, /* variable shift costs */
! 2, /* constant shift costs */
! {6, 6, 6, 6, 6}, /* cost of starting a multiply */
! 1, /* cost of multiply per each bit set */
! {23, 23, 23, 23, 23}, /* cost of a divide/mod */
! 3, /* cost of movsx */
! 2, /* cost of movzx */
15, /* "large" insn */
3, /* MOVE_RATIO */
4, /* cost for loading QImode using movzbl */
--- 107,140 ----
0, /* size of prefetch block */
0, /* number of parallel prefetches */
1, /* Branch cost */
! COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
! COSTS_N_INSNS (2), /* cost of FMUL instruction. */
! COSTS_N_INSNS (2), /* cost of FDIV instruction. */
! COSTS_N_INSNS (2), /* cost of FABS instruction. */
! COSTS_N_INSNS (2), /* cost of FCHS instruction. */
! COSTS_N_INSNS (2), /* cost of FSQRT instruction. */
};
/* Processor costs (relative to an add) */
static const
struct processor_costs i386_cost = { /* 386 specific costs */
! COSTS_N_INSNS (1), /* cost of an add instruction */
! COSTS_N_INSNS (1), /* cost of a lea instruction */
! COSTS_N_INSNS (3), /* variable shift costs */
! COSTS_N_INSNS (2), /* constant shift costs */
! {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
! COSTS_N_INSNS (6), /* HI */
! COSTS_N_INSNS (6), /* SI */
! COSTS_N_INSNS (6), /* DI */
! COSTS_N_INSNS (6)}, /* other */
! COSTS_N_INSNS (1), /* cost of multiply per each bit set */
! {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
! COSTS_N_INSNS (23), /* HI */
! COSTS_N_INSNS (23), /* SI */
! COSTS_N_INSNS (23), /* DI */
! COSTS_N_INSNS (23)}, /* other */
! COSTS_N_INSNS (3), /* cost of movsx */
! COSTS_N_INSNS (2), /* cost of movzx */
15, /* "large" insn */
3, /* MOVE_RATIO */
4, /* cost for loading QImode using movzbl */
*************** struct processor_costs i386_cost = { /*
*** 144,168 ****
0, /* size of prefetch block */
0, /* number of parallel prefetches */
1, /* Branch cost */
! 23, /* cost of FADD and FSUB insns. */
! 27, /* cost of FMUL instruction. */
! 88, /* cost of FDIV instruction. */
! 22, /* cost of FABS instruction. */
! 24, /* cost of FCHS instruction. */
! 122, /* cost of FSQRT instruction. */
};
static const
struct processor_costs i486_cost = { /* 486 specific costs */
! 1, /* cost of an add instruction */
! 1, /* cost of a lea instruction */
! 3, /* variable shift costs */
! 2, /* constant shift costs */
! {12, 12, 12, 12, 12}, /* cost of starting a multiply */
1, /* cost of multiply per each bit set */
! {40, 40, 40, 40, 40}, /* cost of a divide/mod */
! 3, /* cost of movsx */
! 2, /* cost of movzx */
15, /* "large" insn */
3, /* MOVE_RATIO */
4, /* cost for loading QImode using movzbl */
--- 160,192 ----
0, /* size of prefetch block */
0, /* number of parallel prefetches */
1, /* Branch cost */
! COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
! COSTS_N_INSNS (27), /* cost of FMUL instruction. */
! COSTS_N_INSNS (88), /* cost of FDIV instruction. */
! COSTS_N_INSNS (22), /* cost of FABS instruction. */
! COSTS_N_INSNS (24), /* cost of FCHS instruction. */
! COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
};
static const
struct processor_costs i486_cost = { /* 486 specific costs */
! COSTS_N_INSNS (1), /* cost of an add instruction */
! COSTS_N_INSNS (1), /* cost of a lea instruction */
! COSTS_N_INSNS (3), /* variable shift costs */
! COSTS_N_INSNS (2), /* constant shift costs */
! {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
! COSTS_N_INSNS (12), /* HI */
! COSTS_N_INSNS (12), /* SI */
! COSTS_N_INSNS (12), /* DI */
! COSTS_N_INSNS (12)}, /* other */
1, /* cost of multiply per each bit set */
! {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
! COSTS_N_INSNS (40), /* HI */
! COSTS_N_INSNS (40), /* SI */
! COSTS_N_INSNS (40), /* DI */
! COSTS_N_INSNS (40)}, /* other */
! COSTS_N_INSNS (3), /* cost of movsx */
! COSTS_N_INSNS (2), /* cost of movzx */
15, /* "large" insn */
3, /* MOVE_RATIO */
4, /* cost for loading QImode using movzbl */
*************** struct processor_costs i486_cost = { /*
*** 188,212 ****
0, /* size of prefetch block */
0, /* number of parallel prefetches */
1, /* Branch cost */
! 8, /* cost of FADD and FSUB insns. */
! 16, /* cost of FMUL instruction. */
! 73, /* cost of FDIV instruction. */
! 3, /* cost of FABS instruction. */
! 3, /* cost of FCHS instruction. */
! 83, /* cost of FSQRT instruction. */
};
static const
struct processor_costs pentium_cost = {
! 1, /* cost of an add instruction */
! 1, /* cost of a lea instruction */
! 4, /* variable shift costs */
! 1, /* constant shift costs */
! {11, 11, 11, 11, 11}, /* cost of starting a multiply */
0, /* cost of multiply per each bit set */
! {25, 25, 25, 25, 25}, /* cost of a divide/mod */
! 3, /* cost of movsx */
! 2, /* cost of movzx */
8, /* "large" insn */
6, /* MOVE_RATIO */
6, /* cost for loading QImode using movzbl */
--- 212,244 ----
0, /* size of prefetch block */
0, /* number of parallel prefetches */
1, /* Branch cost */
! COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
! COSTS_N_INSNS (16), /* cost of FMUL instruction. */
! COSTS_N_INSNS (73), /* cost of FDIV instruction. */
! COSTS_N_INSNS (3), /* cost of FABS instruction. */
! COSTS_N_INSNS (3), /* cost of FCHS instruction. */
! COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
};
static const
struct processor_costs pentium_cost = {
! COSTS_N_INSNS (1), /* cost of an add instruction */
! COSTS_N_INSNS (1), /* cost of a lea instruction */
! COSTS_N_INSNS (4), /* variable shift costs */
! COSTS_N_INSNS (1), /* constant shift costs */
! {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
! COSTS_N_INSNS (11), /* HI */
! COSTS_N_INSNS (11), /* SI */
! COSTS_N_INSNS (11), /* DI */
! COSTS_N_INSNS (11)}, /* other */
0, /* cost of multiply per each bit set */
! {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
! COSTS_N_INSNS (25), /* HI */
! COSTS_N_INSNS (25), /* SI */
! COSTS_N_INSNS (25), /* DI */
! COSTS_N_INSNS (25)}, /* other */
! COSTS_N_INSNS (3), /* cost of movsx */
! COSTS_N_INSNS (2), /* cost of movzx */
8, /* "large" insn */
6, /* MOVE_RATIO */
6, /* cost for loading QImode using movzbl */
*************** struct processor_costs pentium_cost = {
*** 232,256 ****
0, /* size of prefetch block */
0, /* number of parallel prefetches */
2, /* Branch cost */
! 3, /* cost of FADD and FSUB insns. */
! 3, /* cost of FMUL instruction. */
! 39, /* cost of FDIV instruction. */
! 1, /* cost of FABS instruction. */
! 1, /* cost of FCHS instruction. */
! 70, /* cost of FSQRT instruction. */
};
static const
struct processor_costs pentiumpro_cost = {
! 1, /* cost of an add instruction */
! 1, /* cost of a lea instruction */
! 1, /* variable shift costs */
! 1, /* constant shift costs */
! {4, 4, 4, 4, 4}, /* cost of starting a multiply */
0, /* cost of multiply per each bit set */
! {17, 17, 17, 17, 17}, /* cost of a divide/mod */
! 1, /* cost of movsx */
! 1, /* cost of movzx */
8, /* "large" insn */
6, /* MOVE_RATIO */
2, /* cost for loading QImode using movzbl */
--- 264,296 ----
0, /* size of prefetch block */
0, /* number of parallel prefetches */
2, /* Branch cost */
! COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
! COSTS_N_INSNS (3), /* cost of FMUL instruction. */
! COSTS_N_INSNS (39), /* cost of FDIV instruction. */
! COSTS_N_INSNS (1), /* cost of FABS instruction. */
! COSTS_N_INSNS (1), /* cost of FCHS instruction. */
! COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
};
static const
struct processor_costs pentiumpro_cost = {
! COSTS_N_INSNS (1), /* cost of an add instruction */
! COSTS_N_INSNS (1), /* cost of a lea instruction */
! COSTS_N_INSNS (1), /* variable shift costs */
! COSTS_N_INSNS (1), /* constant shift costs */
! {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
! COSTS_N_INSNS (4), /* HI */
! COSTS_N_INSNS (4), /* SI */
! COSTS_N_INSNS (4), /* DI */
! COSTS_N_INSNS (4)}, /* other */
0, /* cost of multiply per each bit set */
! {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
! COSTS_N_INSNS (17), /* HI */
! COSTS_N_INSNS (17), /* SI */
! COSTS_N_INSNS (17), /* DI */
! COSTS_N_INSNS (17)}, /* other */
! COSTS_N_INSNS (1), /* cost of movsx */
! COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
6, /* MOVE_RATIO */
2, /* cost for loading QImode using movzbl */
*************** struct processor_costs pentiumpro_cost =
*** 276,300 ****
32, /* size of prefetch block */
6, /* number of parallel prefetches */
2, /* Branch cost */
! 3, /* cost of FADD and FSUB insns. */
! 5, /* cost of FMUL instruction. */
! 56, /* cost of FDIV instruction. */
! 2, /* cost of FABS instruction. */
! 2, /* cost of FCHS instruction. */
! 56, /* cost of FSQRT instruction. */
};
static const
struct processor_costs k6_cost = {
! 1, /* cost of an add instruction */
! 2, /* cost of a lea instruction */
! 1, /* variable shift costs */
! 1, /* constant shift costs */
! {3, 3, 3, 3, 3}, /* cost of starting a multiply */
0, /* cost of multiply per each bit set */
! {18, 18, 18, 18, 18}, /* cost of a divide/mod */
! 2, /* cost of movsx */
! 2, /* cost of movzx */
8, /* "large" insn */
4, /* MOVE_RATIO */
3, /* cost for loading QImode using movzbl */
--- 316,348 ----
32, /* size of prefetch block */
6, /* number of parallel prefetches */
2, /* Branch cost */
! COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
! COSTS_N_INSNS (5), /* cost of FMUL instruction. */
! COSTS_N_INSNS (56), /* cost of FDIV instruction. */
! COSTS_N_INSNS (2), /* cost of FABS instruction. */
! COSTS_N_INSNS (2), /* cost of FCHS instruction. */
! COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
};
static const
struct processor_costs k6_cost = {
! COSTS_N_INSNS (1), /* cost of an add instruction */
! COSTS_N_INSNS (2), /* cost of a lea instruction */
! COSTS_N_INSNS (1), /* variable shift costs */
! COSTS_N_INSNS (1), /* constant shift costs */
! {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
! COSTS_N_INSNS (3), /* HI */
! COSTS_N_INSNS (3), /* SI */
! COSTS_N_INSNS (3), /* DI */
! COSTS_N_INSNS (3)}, /* other */
0, /* cost of multiply per each bit set */
! {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
! COSTS_N_INSNS (18), /* HI */
! COSTS_N_INSNS (18), /* SI */
! COSTS_N_INSNS (18), /* DI */
! COSTS_N_INSNS (18)}, /* other */
! COSTS_N_INSNS (2), /* cost of movsx */
! COSTS_N_INSNS (2), /* cost of movzx */
8, /* "large" insn */
4, /* MOVE_RATIO */
3, /* cost for loading QImode using movzbl */
*************** struct processor_costs k6_cost = {
*** 320,344 ****
32, /* size of prefetch block */
1, /* number of parallel prefetches */
1, /* Branch cost */
! 2, /* cost of FADD and FSUB insns. */
! 2, /* cost of FMUL instruction. */
! 56, /* cost of FDIV instruction. */
! 2, /* cost of FABS instruction. */
! 2, /* cost of FCHS instruction. */
! 56, /* cost of FSQRT instruction. */
};
static const
struct processor_costs athlon_cost = {
! 1, /* cost of an add instruction */
! 2, /* cost of a lea instruction */
! 1, /* variable shift costs */
! 1, /* constant shift costs */
! {5, 5, 5, 5, 5}, /* cost of starting a multiply */
0, /* cost of multiply per each bit set */
! {18, 26, 42, 74, 74}, /* cost of a divide/mod */
! 1, /* cost of movsx */
! 1, /* cost of movzx */
8, /* "large" insn */
9, /* MOVE_RATIO */
4, /* cost for loading QImode using movzbl */
--- 368,400 ----
32, /* size of prefetch block */
1, /* number of parallel prefetches */
1, /* Branch cost */
! COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
! COSTS_N_INSNS (2), /* cost of FMUL instruction. */
! COSTS_N_INSNS (56), /* cost of FDIV instruction. */
! COSTS_N_INSNS (2), /* cost of FABS instruction. */
! COSTS_N_INSNS (2), /* cost of FCHS instruction. */
! COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
};
static const
struct processor_costs athlon_cost = {
! COSTS_N_INSNS (1), /* cost of an add instruction */
! COSTS_N_INSNS (2), /* cost of a lea instruction */
! COSTS_N_INSNS (1), /* variable shift costs */
! COSTS_N_INSNS (1), /* constant shift costs */
! {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
! COSTS_N_INSNS (5), /* HI */
! COSTS_N_INSNS (5), /* SI */
! COSTS_N_INSNS (5), /* DI */
! COSTS_N_INSNS (5)}, /* other */
0, /* cost of multiply per each bit set */
! {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
! COSTS_N_INSNS (26), /* HI */
! COSTS_N_INSNS (42), /* SI */
! COSTS_N_INSNS (74), /* DI */
! COSTS_N_INSNS (74)}, /* other */
! COSTS_N_INSNS (1), /* cost of movsx */
! COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
9, /* MOVE_RATIO */
4, /* cost for loading QImode using movzbl */
*************** struct processor_costs athlon_cost = {
*** 364,388 ****
64, /* size of prefetch block */
6, /* number of parallel prefetches */
5, /* Branch cost */
! 4, /* cost of FADD and FSUB insns. */
! 4, /* cost of FMUL instruction. */
! 24, /* cost of FDIV instruction. */
! 2, /* cost of FABS instruction. */
! 2, /* cost of FCHS instruction. */
! 35, /* cost of FSQRT instruction. */
};
static const
struct processor_costs k8_cost = {
! 1, /* cost of an add instruction */
! 2, /* cost of a lea instruction */
! 1, /* variable shift costs */
! 1, /* constant shift costs */
! {3, 4, 3, 4, 5}, /* cost of starting a multiply */
0, /* cost of multiply per each bit set */
! {18, 26, 42, 74, 74}, /* cost of a divide/mod */
! 1, /* cost of movsx */
! 1, /* cost of movzx */
8, /* "large" insn */
9, /* MOVE_RATIO */
4, /* cost for loading QImode using movzbl */
--- 420,452 ----
64, /* size of prefetch block */
6, /* number of parallel prefetches */
5, /* Branch cost */
! COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
! COSTS_N_INSNS (4), /* cost of FMUL instruction. */
! COSTS_N_INSNS (24), /* cost of FDIV instruction. */
! COSTS_N_INSNS (2), /* cost of FABS instruction. */
! COSTS_N_INSNS (2), /* cost of FCHS instruction. */
! COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
};
static const
struct processor_costs k8_cost = {
! COSTS_N_INSNS (1), /* cost of an add instruction */
! COSTS_N_INSNS (2), /* cost of a lea instruction */
! COSTS_N_INSNS (1), /* variable shift costs */
! COSTS_N_INSNS (1), /* constant shift costs */
! {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
! COSTS_N_INSNS (4), /* HI */
! COSTS_N_INSNS (3), /* SI */
! COSTS_N_INSNS (4), /* DI */
! COSTS_N_INSNS (5)}, /* other */
0, /* cost of multiply per each bit set */
! {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
! COSTS_N_INSNS (26), /* HI */
! COSTS_N_INSNS (42), /* SI */
! COSTS_N_INSNS (74), /* DI */
! COSTS_N_INSNS (74)}, /* other */
! COSTS_N_INSNS (1), /* cost of movsx */
! COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
9, /* MOVE_RATIO */
4, /* cost for loading QImode using movzbl */
*************** struct processor_costs k8_cost = {
*** 408,432 ****
64, /* size of prefetch block */
6, /* number of parallel prefetches */
5, /* Branch cost */
! 4, /* cost of FADD and FSUB insns. */
! 4, /* cost of FMUL instruction. */
! 19, /* cost of FDIV instruction. */
! 2, /* cost of FABS instruction. */
! 2, /* cost of FCHS instruction. */
! 35, /* cost of FSQRT instruction. */
};
static const
struct processor_costs pentium4_cost = {
! 1, /* cost of an add instruction */
! 3, /* cost of a lea instruction */
! 4, /* variable shift costs */
! 4, /* constant shift costs */
! {15, 15, 15, 15, 15}, /* cost of starting a multiply */
0, /* cost of multiply per each bit set */
! {56, 56, 56, 56, 56}, /* cost of a divide/mod */
! 1, /* cost of movsx */
! 1, /* cost of movzx */
16, /* "large" insn */
6, /* MOVE_RATIO */
2, /* cost for loading QImode using movzbl */
--- 472,504 ----
64, /* size of prefetch block */
6, /* number of parallel prefetches */
5, /* Branch cost */
! COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
! COSTS_N_INSNS (4), /* cost of FMUL instruction. */
! COSTS_N_INSNS (19), /* cost of FDIV instruction. */
! COSTS_N_INSNS (2), /* cost of FABS instruction. */
! COSTS_N_INSNS (2), /* cost of FCHS instruction. */
! COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
};
static const
struct processor_costs pentium4_cost = {
! COSTS_N_INSNS (1), /* cost of an add instruction */
! COSTS_N_INSNS (3), /* cost of a lea instruction */
! COSTS_N_INSNS (4), /* variable shift costs */
! COSTS_N_INSNS (4), /* constant shift costs */
! {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
! COSTS_N_INSNS (15), /* HI */
! COSTS_N_INSNS (15), /* SI */
! COSTS_N_INSNS (15), /* DI */
! COSTS_N_INSNS (15)}, /* other */
0, /* cost of multiply per each bit set */
! {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
! COSTS_N_INSNS (56), /* HI */
! COSTS_N_INSNS (56), /* SI */
! COSTS_N_INSNS (56), /* DI */
! COSTS_N_INSNS (56)}, /* other */
! COSTS_N_INSNS (1), /* cost of movsx */
! COSTS_N_INSNS (1), /* cost of movzx */
16, /* "large" insn */
6, /* MOVE_RATIO */
2, /* cost for loading QImode using movzbl */
*************** struct processor_costs pentium4_cost = {
*** 452,476 ****
64, /* size of prefetch block */
6, /* number of parallel prefetches */
2, /* Branch cost */
! 5, /* cost of FADD and FSUB insns. */
! 7, /* cost of FMUL instruction. */
! 43, /* cost of FDIV instruction. */
! 2, /* cost of FABS instruction. */
! 2, /* cost of FCHS instruction. */
! 43, /* cost of FSQRT instruction. */
};
static const
struct processor_costs nocona_cost = {
! 1, /* cost of an add instruction */
! 1, /* cost of a lea instruction */
! 1, /* variable shift costs */
! 1, /* constant shift costs */
! {10, 10, 10, 10, 10}, /* cost of starting a multiply */
0, /* cost of multiply per each bit set */
! {66, 66, 66, 66, 66}, /* cost of a divide/mod */
! 1, /* cost of movsx */
! 1, /* cost of movzx */
16, /* "large" insn */
17, /* MOVE_RATIO */
4, /* cost for loading QImode using movzbl */
--- 524,556 ----
64, /* size of prefetch block */
6, /* number of parallel prefetches */
2, /* Branch cost */
! COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
! COSTS_N_INSNS (7), /* cost of FMUL instruction. */
! COSTS_N_INSNS (43), /* cost of FDIV instruction. */
! COSTS_N_INSNS (2), /* cost of FABS instruction. */
! COSTS_N_INSNS (2), /* cost of FCHS instruction. */
! COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
};
static const
struct processor_costs nocona_cost = {
! COSTS_N_INSNS (1), /* cost of an add instruction */
! COSTS_N_INSNS (1), /* cost of a lea instruction */
! COSTS_N_INSNS (1), /* variable shift costs */
! COSTS_N_INSNS (1), /* constant shift costs */
! {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
! COSTS_N_INSNS (10), /* HI */
! COSTS_N_INSNS (10), /* SI */
! COSTS_N_INSNS (10), /* DI */
! COSTS_N_INSNS (10)}, /* other */
0, /* cost of multiply per each bit set */
! {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
! COSTS_N_INSNS (66), /* HI */
! COSTS_N_INSNS (66), /* SI */
! COSTS_N_INSNS (66), /* DI */
! COSTS_N_INSNS (66)}, /* other */
! COSTS_N_INSNS (1), /* cost of movsx */
! COSTS_N_INSNS (1), /* cost of movzx */
16, /* "large" insn */
17, /* MOVE_RATIO */
4, /* cost for loading QImode using movzbl */
*************** struct processor_costs nocona_cost = {
*** 496,507 ****
128, /* size of prefetch block */
8, /* number of parallel prefetches */
1, /* Branch cost */
! 6, /* cost of FADD and FSUB insns. */
! 8, /* cost of FMUL instruction. */
! 40, /* cost of FDIV instruction. */
! 3, /* cost of FABS instruction. */
! 3, /* cost of FCHS instruction. */
! 44, /* cost of FSQRT instruction. */
};
const struct processor_costs *ix86_cost = &pentium_cost;
--- 576,587 ----
128, /* size of prefetch block */
8, /* number of parallel prefetches */
1, /* Branch cost */
! COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
! COSTS_N_INSNS (8), /* cost of FMUL instruction. */
! COSTS_N_INSNS (40), /* cost of FDIV instruction. */
! COSTS_N_INSNS (3), /* cost of FABS instruction. */
! COSTS_N_INSNS (3), /* cost of FCHS instruction. */
! COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
};
const struct processor_costs *ix86_cost = &pentium_cost;
*************** ix86_rtx_costs (rtx x, int code, int out
*** 16315,16327 ****
&& GET_MODE (XEXP (x, 0)) == SImode)
*total = 1;
else if (TARGET_ZERO_EXTEND_WITH_AND)
! *total = COSTS_N_INSNS (ix86_cost->add);
else
! *total = COSTS_N_INSNS (ix86_cost->movzx);
return false;
case SIGN_EXTEND:
! *total = COSTS_N_INSNS (ix86_cost->movsx);
return false;
case ASHIFT:
--- 16397,16409 ----
&& GET_MODE (XEXP (x, 0)) == SImode)
*total = 1;
else if (TARGET_ZERO_EXTEND_WITH_AND)
! *total = ix86_cost->add;
else
! *total = ix86_cost->movzx;
return false;
case SIGN_EXTEND:
! *total = ix86_cost->movsx;
return false;
case ASHIFT:
*************** ix86_rtx_costs (rtx x, int code, int out
*** 16331,16343 ****
HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
if (value == 1)
{
! *total = COSTS_N_INSNS (ix86_cost->add);
return false;
}
if ((value == 2 || value == 3)
&& ix86_cost->lea <= ix86_cost->shift_const)
{
! *total = COSTS_N_INSNS (ix86_cost->lea);
return false;
}
}
--- 16413,16426 ----
HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
if (value == 1)
{
! *total = ix86_cost->add;
return false;
}
if ((value == 2 || value == 3)
+ && !TARGET_DECOMPOSE_LEA
&& ix86_cost->lea <= ix86_cost->shift_const)
{
! *total = ix86_cost->lea;
return false;
}
}
*************** ix86_rtx_costs (rtx x, int code, int out
*** 16352,16382 ****
if (GET_CODE (XEXP (x, 1)) == CONST_INT)
{
if (INTVAL (XEXP (x, 1)) > 32)
! *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
else
! *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
}
else
{
if (GET_CODE (XEXP (x, 1)) == AND)
! *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
else
! *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
}
}
else
{
if (GET_CODE (XEXP (x, 1)) == CONST_INT)
! *total = COSTS_N_INSNS (ix86_cost->shift_const);
else
! *total = COSTS_N_INSNS (ix86_cost->shift_var);
}
return false;
case MULT:
if (FLOAT_MODE_P (mode))
{
! *total = COSTS_N_INSNS (ix86_cost->fmul);
return false;
}
else
--- 16435,16465 ----
if (GET_CODE (XEXP (x, 1)) == CONST_INT)
{
if (INTVAL (XEXP (x, 1)) > 32)
! *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
else
! *total = ix86_cost->shift_const * 2;
}
else
{
if (GET_CODE (XEXP (x, 1)) == AND)
! *total = ix86_cost->shift_var * 2;
else
! *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
}
}
else
{
if (GET_CODE (XEXP (x, 1)) == CONST_INT)
! *total = ix86_cost->shift_const;
else
! *total = ix86_cost->shift_var;
}
return false;
case MULT:
if (FLOAT_MODE_P (mode))
{
! *total = ix86_cost->fmul;
return false;
}
else
*************** ix86_rtx_costs (rtx x, int code, int out
*** 16417,16425 ****
op0 = XEXP (op0, 0), mode = GET_MODE (op0);
}
! *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
! + nbits * ix86_cost->mult_bit)
! + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code);
return true;
}
--- 16500,16508 ----
op0 = XEXP (op0, 0), mode = GET_MODE (op0);
}
! *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
! + nbits * ix86_cost->mult_bit
! + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
return true;
}
*************** ix86_rtx_costs (rtx x, int code, int out
*** 16429,16442 ****
case MOD:
case UMOD:
if (FLOAT_MODE_P (mode))
! *total = COSTS_N_INSNS (ix86_cost->fdiv);
else
! *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
return false;
case PLUS:
if (FLOAT_MODE_P (mode))
! *total = COSTS_N_INSNS (ix86_cost->fadd);
else if (GET_MODE_CLASS (mode) == MODE_INT
&& GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
{
--- 16512,16525 ----
case MOD:
case UMOD:
if (FLOAT_MODE_P (mode))
! *total = ix86_cost->fdiv;
else
! *total = ix86_cost->divide[MODE_INDEX (mode)];
return false;
case PLUS:
if (FLOAT_MODE_P (mode))
! *total = ix86_cost->fadd;
else if (GET_MODE_CLASS (mode) == MODE_INT
&& GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
{
*************** ix86_rtx_costs (rtx x, int code, int out
*** 16448,16454 ****
HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
if (val == 2 || val == 4 || val == 8)
{
! *total = COSTS_N_INSNS (ix86_cost->lea);
*total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
*total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
outer_code);
--- 16531,16537 ----
HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
if (val == 2 || val == 4 || val == 8)
{
! *total = ix86_cost->lea;
*total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
*total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
outer_code);
*************** ix86_rtx_costs (rtx x, int code, int out
*** 16462,16468 ****
HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
if (val == 2 || val == 4 || val == 8)
{
! *total = COSTS_N_INSNS (ix86_cost->lea);
*total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
*total += rtx_cost (XEXP (x, 1), outer_code);
return true;
--- 16545,16551 ----
HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
if (val == 2 || val == 4 || val == 8)
{
! *total = ix86_cost->lea;
*total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
*total += rtx_cost (XEXP (x, 1), outer_code);
return true;
*************** ix86_rtx_costs (rtx x, int code, int out
*** 16470,16476 ****
}
else if (GET_CODE (XEXP (x, 0)) == PLUS)
{
! *total = COSTS_N_INSNS (ix86_cost->lea);
*total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
*total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
*total += rtx_cost (XEXP (x, 1), outer_code);
--- 16553,16559 ----
}
else if (GET_CODE (XEXP (x, 0)) == PLUS)
{
! *total = ix86_cost->lea;
*total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
*total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
*total += rtx_cost (XEXP (x, 1), outer_code);
*************** ix86_rtx_costs (rtx x, int code, int out
*** 16482,16488 ****
case MINUS:
if (FLOAT_MODE_P (mode))
{
! *total = COSTS_N_INSNS (ix86_cost->fadd);
return false;
}
/* FALLTHRU */
--- 16565,16571 ----
case MINUS:
if (FLOAT_MODE_P (mode))
{
! *total = ix86_cost->fadd;
return false;
}
/* FALLTHRU */
*************** ix86_rtx_costs (rtx x, int code, int out
*** 16492,16498 ****
case XOR:
if (!TARGET_64BIT && mode == DImode)
{
! *total = (COSTS_N_INSNS (ix86_cost->add) * 2
+ (rtx_cost (XEXP (x, 0), outer_code)
<< (GET_MODE (XEXP (x, 0)) != DImode))
+ (rtx_cost (XEXP (x, 1), outer_code)
--- 16575,16581 ----
case XOR:
if (!TARGET_64BIT && mode == DImode)
{
! *total = (ix86_cost->add * 2
+ (rtx_cost (XEXP (x, 0), outer_code)
<< (GET_MODE (XEXP (x, 0)) != DImode))
+ (rtx_cost (XEXP (x, 1), outer_code)
*************** ix86_rtx_costs (rtx x, int code, int out
*** 16504,16519 ****
case NEG:
if (FLOAT_MODE_P (mode))
{
! *total = COSTS_N_INSNS (ix86_cost->fchs);
return false;
}
/* FALLTHRU */
case NOT:
if (!TARGET_64BIT && mode == DImode)
! *total = COSTS_N_INSNS (ix86_cost->add * 2);
else
! *total = COSTS_N_INSNS (ix86_cost->add);
return false;
case COMPARE:
--- 16587,16602 ----
case NEG:
if (FLOAT_MODE_P (mode))
{
! *total = ix86_cost->fchs;
return false;
}
/* FALLTHRU */
case NOT:
if (!TARGET_64BIT && mode == DImode)
! *total = ix86_cost->add * 2;
else
! *total = ix86_cost->add;
return false;
case COMPARE:
*************** ix86_rtx_costs (rtx x, int code, int out
*** 16524,16530 ****
{
/* This kind of construct is implemented using test[bwl].
Treat it as if we had an AND. */
! *total = (COSTS_N_INSNS (ix86_cost->add)
+ rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
+ rtx_cost (const1_rtx, outer_code));
return true;
--- 16607,16613 ----
{
/* This kind of construct is implemented using test[bwl].
Treat it as if we had an AND. */
! *total = (ix86_cost->add
+ rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
+ rtx_cost (const1_rtx, outer_code));
return true;
*************** ix86_rtx_costs (rtx x, int code, int out
*** 16540,16551 ****
case ABS:
if (FLOAT_MODE_P (mode))
! *total = COSTS_N_INSNS (ix86_cost->fabs);
return false;
case SQRT:
if (FLOAT_MODE_P (mode))
! *total = COSTS_N_INSNS (ix86_cost->fsqrt);
return false;
case UNSPEC:
--- 16623,16634 ----
case ABS:
if (FLOAT_MODE_P (mode))
! *total = ix86_cost->fabs;
return false;
case SQRT:
if (FLOAT_MODE_P (mode))
! *total = ix86_cost->fsqrt;
return false;
case UNSPEC: