]> gcc.gnu.org Git - gcc.git/blame - gcc/config/i386/i386.c
i386.c (ix86_expand_vector_move): Use the mode of the operand, rather than assuming...
[gcc.git] / gcc / config / i386 / i386.c
CommitLineData
e075ae69 1/* Subroutines used for code generation on IA-32.
07933f72
GS
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
2a2ab3f9
JVA
4
5This file is part of GNU CC.
6
7GNU CC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GNU CC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU CC; see the file COPYING. If not, write to
97aadbb9 19the Free Software Foundation, 59 Temple Place - Suite 330,
0f290768 20Boston, MA 02111-1307, USA. */
2a2ab3f9 21
2a2ab3f9 22#include "config.h"
bb5177ac 23#include "system.h"
2a2ab3f9 24#include "rtl.h"
6baf1cc8
BS
25#include "tree.h"
26#include "tm_p.h"
2a2ab3f9
JVA
27#include "regs.h"
28#include "hard-reg-set.h"
29#include "real.h"
30#include "insn-config.h"
31#include "conditions.h"
2a2ab3f9
JVA
32#include "output.h"
33#include "insn-attr.h"
2a2ab3f9 34#include "flags.h"
a8ffcc81 35#include "except.h"
ecbc4695 36#include "function.h"
00c79232 37#include "recog.h"
ced8dd8c 38#include "expr.h"
e78d8e51 39#include "optabs.h"
f103890b 40#include "toplev.h"
e075ae69 41#include "basic-block.h"
1526a060 42#include "ggc.h"
672a6f42
NB
43#include "target.h"
44#include "target-def.h"
2a2ab3f9 45
8dfe5673 46#ifndef CHECK_STACK_LIMIT
07933f72 47#define CHECK_STACK_LIMIT (-1)
8dfe5673
RK
48#endif
49
2ab0437e 50/* Processor costs (relative to an add) */
8b60264b 51static const
2ab0437e
JH
52struct processor_costs size_cost = { /* costs for tunning for size */
53 2, /* cost of an add instruction */
54 3, /* cost of a lea instruction */
55 2, /* variable shift costs */
56 3, /* constant shift costs */
57 3, /* cost of starting a multiply */
58 0, /* cost of multiply per each bit set */
59 3, /* cost of a divide/mod */
44cf5b6a
JH
60 3, /* cost of movsx */
61 3, /* cost of movzx */
2ab0437e
JH
62 0, /* "large" insn */
63 2, /* MOVE_RATIO */
64 2, /* cost for loading QImode using movzbl */
65 {2, 2, 2}, /* cost of loading integer registers
66 in QImode, HImode and SImode.
67 Relative to reg-reg move (2). */
68 {2, 2, 2}, /* cost of storing integer registers */
69 2, /* cost of reg,reg fld/fst */
70 {2, 2, 2}, /* cost of loading fp registers
71 in SFmode, DFmode and XFmode */
72 {2, 2, 2}, /* cost of loading integer registers */
73 3, /* cost of moving MMX register */
74 {3, 3}, /* cost of loading MMX registers
75 in SImode and DImode */
76 {3, 3}, /* cost of storing MMX registers
77 in SImode and DImode */
78 3, /* cost of moving SSE register */
79 {3, 3, 3}, /* cost of loading SSE registers
80 in SImode, DImode and TImode */
81 {3, 3, 3}, /* cost of storing SSE registers
82 in SImode, DImode and TImode */
83 3, /* MMX or SSE register to integer */
f4365627
JH
84 0, /* size of prefetch block */
85 0, /* number of parallel prefetches */
2ab0437e 86};
32b5b1aa 87/* Processor costs (relative to an add) */
8b60264b 88static const
32b5b1aa 89struct processor_costs i386_cost = { /* 386 specific costs */
e9a25f70 90 1, /* cost of an add instruction */
32b5b1aa
SC
91 1, /* cost of a lea instruction */
92 3, /* variable shift costs */
93 2, /* constant shift costs */
94 6, /* cost of starting a multiply */
95 1, /* cost of multiply per each bit set */
e075ae69 96 23, /* cost of a divide/mod */
44cf5b6a
JH
97 3, /* cost of movsx */
98 2, /* cost of movzx */
96e7ae40 99 15, /* "large" insn */
e2e52e1b 100 3, /* MOVE_RATIO */
7c6b971d 101 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
102 {2, 4, 2}, /* cost of loading integer registers
103 in QImode, HImode and SImode.
0f290768 104 Relative to reg-reg move (2). */
96e7ae40
JH
105 {2, 4, 2}, /* cost of storing integer registers */
106 2, /* cost of reg,reg fld/fst */
107 {8, 8, 8}, /* cost of loading fp registers
108 in SFmode, DFmode and XFmode */
fa79946e
JH
109 {8, 8, 8}, /* cost of loading integer registers */
110 2, /* cost of moving MMX register */
111 {4, 8}, /* cost of loading MMX registers
112 in SImode and DImode */
113 {4, 8}, /* cost of storing MMX registers
114 in SImode and DImode */
115 2, /* cost of moving SSE register */
116 {4, 8, 16}, /* cost of loading SSE registers
117 in SImode, DImode and TImode */
118 {4, 8, 16}, /* cost of storing SSE registers
119 in SImode, DImode and TImode */
120 3, /* MMX or SSE register to integer */
f4365627
JH
121 0, /* size of prefetch block */
122 0, /* number of parallel prefetches */
32b5b1aa
SC
123};
124
8b60264b 125static const
32b5b1aa
SC
126struct processor_costs i486_cost = { /* 486 specific costs */
127 1, /* cost of an add instruction */
128 1, /* cost of a lea instruction */
129 3, /* variable shift costs */
130 2, /* constant shift costs */
131 12, /* cost of starting a multiply */
132 1, /* cost of multiply per each bit set */
e075ae69 133 40, /* cost of a divide/mod */
44cf5b6a
JH
134 3, /* cost of movsx */
135 2, /* cost of movzx */
96e7ae40 136 15, /* "large" insn */
e2e52e1b 137 3, /* MOVE_RATIO */
7c6b971d 138 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
139 {2, 4, 2}, /* cost of loading integer registers
140 in QImode, HImode and SImode.
0f290768 141 Relative to reg-reg move (2). */
96e7ae40
JH
142 {2, 4, 2}, /* cost of storing integer registers */
143 2, /* cost of reg,reg fld/fst */
144 {8, 8, 8}, /* cost of loading fp registers
145 in SFmode, DFmode and XFmode */
fa79946e
JH
146 {8, 8, 8}, /* cost of loading integer registers */
147 2, /* cost of moving MMX register */
148 {4, 8}, /* cost of loading MMX registers
149 in SImode and DImode */
150 {4, 8}, /* cost of storing MMX registers
151 in SImode and DImode */
152 2, /* cost of moving SSE register */
153 {4, 8, 16}, /* cost of loading SSE registers
154 in SImode, DImode and TImode */
155 {4, 8, 16}, /* cost of storing SSE registers
156 in SImode, DImode and TImode */
f4365627
JH
157 3, /* MMX or SSE register to integer */
158 0, /* size of prefetch block */
159 0, /* number of parallel prefetches */
32b5b1aa
SC
160};
161
8b60264b 162static const
e5cb57e8 163struct processor_costs pentium_cost = {
32b5b1aa
SC
164 1, /* cost of an add instruction */
165 1, /* cost of a lea instruction */
856b07a1 166 4, /* variable shift costs */
e5cb57e8 167 1, /* constant shift costs */
856b07a1
SC
168 11, /* cost of starting a multiply */
169 0, /* cost of multiply per each bit set */
e075ae69 170 25, /* cost of a divide/mod */
44cf5b6a
JH
171 3, /* cost of movsx */
172 2, /* cost of movzx */
96e7ae40 173 8, /* "large" insn */
e2e52e1b 174 6, /* MOVE_RATIO */
7c6b971d 175 6, /* cost for loading QImode using movzbl */
96e7ae40
JH
176 {2, 4, 2}, /* cost of loading integer registers
177 in QImode, HImode and SImode.
0f290768 178 Relative to reg-reg move (2). */
96e7ae40
JH
179 {2, 4, 2}, /* cost of storing integer registers */
180 2, /* cost of reg,reg fld/fst */
181 {2, 2, 6}, /* cost of loading fp registers
182 in SFmode, DFmode and XFmode */
fa79946e
JH
183 {4, 4, 6}, /* cost of loading integer registers */
184 8, /* cost of moving MMX register */
185 {8, 8}, /* cost of loading MMX registers
186 in SImode and DImode */
187 {8, 8}, /* cost of storing MMX registers
188 in SImode and DImode */
189 2, /* cost of moving SSE register */
190 {4, 8, 16}, /* cost of loading SSE registers
191 in SImode, DImode and TImode */
192 {4, 8, 16}, /* cost of storing SSE registers
193 in SImode, DImode and TImode */
f4365627
JH
194 3, /* MMX or SSE register to integer */
195 0, /* size of prefetch block */
196 0, /* number of parallel prefetches */
32b5b1aa
SC
197};
198
8b60264b 199static const
856b07a1
SC
200struct processor_costs pentiumpro_cost = {
201 1, /* cost of an add instruction */
202 1, /* cost of a lea instruction */
e075ae69 203 1, /* variable shift costs */
856b07a1 204 1, /* constant shift costs */
369e59b1 205 4, /* cost of starting a multiply */
856b07a1 206 0, /* cost of multiply per each bit set */
e075ae69 207 17, /* cost of a divide/mod */
44cf5b6a
JH
208 1, /* cost of movsx */
209 1, /* cost of movzx */
96e7ae40 210 8, /* "large" insn */
e2e52e1b 211 6, /* MOVE_RATIO */
7c6b971d 212 2, /* cost for loading QImode using movzbl */
96e7ae40
JH
213 {4, 4, 4}, /* cost of loading integer registers
214 in QImode, HImode and SImode.
0f290768 215 Relative to reg-reg move (2). */
96e7ae40
JH
216 {2, 2, 2}, /* cost of storing integer registers */
217 2, /* cost of reg,reg fld/fst */
218 {2, 2, 6}, /* cost of loading fp registers
219 in SFmode, DFmode and XFmode */
fa79946e
JH
220 {4, 4, 6}, /* cost of loading integer registers */
221 2, /* cost of moving MMX register */
222 {2, 2}, /* cost of loading MMX registers
223 in SImode and DImode */
224 {2, 2}, /* cost of storing MMX registers
225 in SImode and DImode */
226 2, /* cost of moving SSE register */
227 {2, 2, 8}, /* cost of loading SSE registers
228 in SImode, DImode and TImode */
229 {2, 2, 8}, /* cost of storing SSE registers
230 in SImode, DImode and TImode */
f4365627
JH
231 3, /* MMX or SSE register to integer */
232 32, /* size of prefetch block */
233 6, /* number of parallel prefetches */
856b07a1
SC
234};
235
8b60264b 236static const
a269a03c
JC
237struct processor_costs k6_cost = {
238 1, /* cost of an add instruction */
e075ae69 239 2, /* cost of a lea instruction */
a269a03c
JC
240 1, /* variable shift costs */
241 1, /* constant shift costs */
73fe76e4 242 3, /* cost of starting a multiply */
a269a03c 243 0, /* cost of multiply per each bit set */
e075ae69 244 18, /* cost of a divide/mod */
44cf5b6a
JH
245 2, /* cost of movsx */
246 2, /* cost of movzx */
96e7ae40 247 8, /* "large" insn */
e2e52e1b 248 4, /* MOVE_RATIO */
7c6b971d 249 3, /* cost for loading QImode using movzbl */
96e7ae40
JH
250 {4, 5, 4}, /* cost of loading integer registers
251 in QImode, HImode and SImode.
0f290768 252 Relative to reg-reg move (2). */
96e7ae40
JH
253 {2, 3, 2}, /* cost of storing integer registers */
254 4, /* cost of reg,reg fld/fst */
255 {6, 6, 6}, /* cost of loading fp registers
256 in SFmode, DFmode and XFmode */
fa79946e
JH
257 {4, 4, 4}, /* cost of loading integer registers */
258 2, /* cost of moving MMX register */
259 {2, 2}, /* cost of loading MMX registers
260 in SImode and DImode */
261 {2, 2}, /* cost of storing MMX registers
262 in SImode and DImode */
263 2, /* cost of moving SSE register */
264 {2, 2, 8}, /* cost of loading SSE registers
265 in SImode, DImode and TImode */
266 {2, 2, 8}, /* cost of storing SSE registers
267 in SImode, DImode and TImode */
f4365627
JH
268 6, /* MMX or SSE register to integer */
269 32, /* size of prefetch block */
270 1, /* number of parallel prefetches */
a269a03c
JC
271};
272
8b60264b 273static const
309ada50
JH
274struct processor_costs athlon_cost = {
275 1, /* cost of an add instruction */
0b5107cf 276 2, /* cost of a lea instruction */
309ada50
JH
277 1, /* variable shift costs */
278 1, /* constant shift costs */
279 5, /* cost of starting a multiply */
280 0, /* cost of multiply per each bit set */
0b5107cf 281 42, /* cost of a divide/mod */
44cf5b6a
JH
282 1, /* cost of movsx */
283 1, /* cost of movzx */
309ada50 284 8, /* "large" insn */
e2e52e1b 285 9, /* MOVE_RATIO */
309ada50
JH
286 4, /* cost for loading QImode using movzbl */
287 {4, 5, 4}, /* cost of loading integer registers
288 in QImode, HImode and SImode.
0f290768 289 Relative to reg-reg move (2). */
309ada50
JH
290 {2, 3, 2}, /* cost of storing integer registers */
291 4, /* cost of reg,reg fld/fst */
0b5107cf 292 {6, 6, 20}, /* cost of loading fp registers
309ada50 293 in SFmode, DFmode and XFmode */
fa79946e
JH
294 {4, 4, 16}, /* cost of loading integer registers */
295 2, /* cost of moving MMX register */
296 {2, 2}, /* cost of loading MMX registers
297 in SImode and DImode */
298 {2, 2}, /* cost of storing MMX registers
299 in SImode and DImode */
300 2, /* cost of moving SSE register */
301 {2, 2, 8}, /* cost of loading SSE registers
302 in SImode, DImode and TImode */
303 {2, 2, 8}, /* cost of storing SSE registers
304 in SImode, DImode and TImode */
f4365627
JH
305 6, /* MMX or SSE register to integer */
306 64, /* size of prefetch block */
307 6, /* number of parallel prefetches */
309ada50
JH
308};
309
8b60264b 310static const
b4e89e2d
JH
311struct processor_costs pentium4_cost = {
312 1, /* cost of an add instruction */
313 1, /* cost of a lea instruction */
314 8, /* variable shift costs */
315 8, /* constant shift costs */
316 30, /* cost of starting a multiply */
317 0, /* cost of multiply per each bit set */
318 112, /* cost of a divide/mod */
44cf5b6a
JH
319 1, /* cost of movsx */
320 1, /* cost of movzx */
b4e89e2d
JH
321 16, /* "large" insn */
322 6, /* MOVE_RATIO */
323 2, /* cost for loading QImode using movzbl */
324 {4, 5, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 3, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of loading integer registers */
332 2, /* cost of moving MMX register */
333 {2, 2}, /* cost of loading MMX registers
334 in SImode and DImode */
335 {2, 2}, /* cost of storing MMX registers
336 in SImode and DImode */
337 12, /* cost of moving SSE register */
338 {12, 12, 12}, /* cost of loading SSE registers
339 in SImode, DImode and TImode */
340 {2, 2, 8}, /* cost of storing SSE registers
341 in SImode, DImode and TImode */
342 10, /* MMX or SSE register to integer */
f4365627
JH
343 64, /* size of prefetch block */
344 6, /* number of parallel prefetches */
b4e89e2d
JH
345};
346
8b60264b 347const struct processor_costs *ix86_cost = &pentium_cost;
32b5b1aa 348
a269a03c
JC
349/* Processor feature/optimization bitmasks. */
350#define m_386 (1<<PROCESSOR_I386)
351#define m_486 (1<<PROCESSOR_I486)
352#define m_PENT (1<<PROCESSOR_PENTIUM)
353#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
354#define m_K6 (1<<PROCESSOR_K6)
309ada50 355#define m_ATHLON (1<<PROCESSOR_ATHLON)
b4e89e2d 356#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
a269a03c 357
309ada50 358const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
b4e89e2d 359const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
a269a03c 360const int x86_zero_extend_with_and = m_486 | m_PENT;
b4e89e2d 361const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
e075ae69 362const int x86_double_with_add = ~m_386;
a269a03c 363const int x86_use_bit_test = m_386;
e2e52e1b 364const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
b4e89e2d 365const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
47f339cf 366const int x86_3dnow_a = m_ATHLON;
b4e89e2d 367const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
ef6257cd 368const int x86_branch_hints = m_PENT4;
b4e89e2d 369const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
e075ae69
RH
370const int x86_partial_reg_stall = m_PPRO;
371const int x86_use_loop = m_K6;
309ada50 372const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
e075ae69
RH
373const int x86_use_mov0 = m_K6;
374const int x86_use_cltd = ~(m_PENT | m_K6);
375const int x86_read_modify_write = ~m_PENT;
376const int x86_read_modify = ~(m_PENT | m_PPRO);
377const int x86_split_long_moves = m_PPRO;
e9e80858 378const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
b4e89e2d 379const int x86_single_stringop = m_386 | m_PENT4;
d9f32422
JH
380const int x86_qimode_math = ~(0);
381const int x86_promote_qi_regs = 0;
382const int x86_himode_math = ~(m_PPRO);
383const int x86_promote_hi_regs = m_PPRO;
b4e89e2d
JH
384const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
385const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
386const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
387const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
388const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4);
389const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
390const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
c6036a37
JH
391const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
392const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
393const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
b972dd02 394const int x86_decompose_lea = m_PENT4;
a269a03c 395
6ab16dd9
JH
396/* In case the avreage insn count for single function invocation is
397 lower than this constant, emit fast (but longer) prologue and
398 epilogue code. */
399#define FAST_PROLOGUE_INSN_COUNT 30
400/* Set by prologue expander and used by epilogue expander to determine
401 the style used. */
402static int use_fast_prologue_epilogue;
403
07933f72 404#define AT_BP(MODE) (gen_rtx_MEM ((MODE), hard_frame_pointer_rtx))
2a2ab3f9 405
83182544
KG
406static const char *const hi_reg_name[] = HI_REGISTER_NAMES; /* names for 16 bit regs */
407static const char *const qi_reg_name[] = QI_REGISTER_NAMES; /* names for 8 bit regs (low) */
408static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; /* names for 8 bit regs (high) */
4c0d89b5
RS
409
410/* Array of the smallest class containing reg number REGNO, indexed by
0f290768 411 REGNO. Used by REGNO_REG_CLASS in i386.h. */
4c0d89b5 412
e075ae69 413enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
4c0d89b5
RS
414{
415 /* ax, dx, cx, bx */
ab408a86 416 AREG, DREG, CREG, BREG,
4c0d89b5 417 /* si, di, bp, sp */
e075ae69 418 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
4c0d89b5
RS
419 /* FP registers */
420 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
79325812 421 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
4c0d89b5 422 /* arg pointer */
83774849 423 NON_Q_REGS,
564d80f4 424 /* flags, fpsr, dirflag, frame */
a7180f70
BS
425 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
426 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
427 SSE_REGS, SSE_REGS,
428 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
3d117b30
JH
429 MMX_REGS, MMX_REGS,
430 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
431 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
432 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
433 SSE_REGS, SSE_REGS,
4c0d89b5 434};
c572e5ba 435
3d117b30 436/* The "default" register map used in 32bit mode. */
83774849 437
0f290768 438int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
439{
440 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
441 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
3d117b30 442 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
443 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
444 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
3d117b30
JH
445 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
446 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
447};
448
07933f72 449static int const x86_64_int_parameter_registers[6] = {5 /*RDI*/, 4 /*RSI*/,
53c17031
JH
450 1 /*RDX*/, 2 /*RCX*/,
451 FIRST_REX_INT_REG /*R8 */,
452 FIRST_REX_INT_REG + 1 /*R9 */};
07933f72 453static int const x86_64_int_return_registers[4] = {0 /*RAX*/, 1 /*RDI*/, 5, 4};
53c17031 454
0f7fa3d0
JH
455/* The "default" register map used in 64bit mode. */
456int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
457{
458 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
459 33, 34, 35, 36, 37, 38, 39, 40 /* fp regs */
460 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
461 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
462 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
463 8,9,10,11,12,13,14,15, /* extended integer registers */
464 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
465};
466
83774849
RH
467/* Define the register numbers to be used in Dwarf debugging information.
468 The SVR4 reference port C compiler uses the following register numbers
469 in its Dwarf output code:
470 0 for %eax (gcc regno = 0)
471 1 for %ecx (gcc regno = 2)
472 2 for %edx (gcc regno = 1)
473 3 for %ebx (gcc regno = 3)
474 4 for %esp (gcc regno = 7)
475 5 for %ebp (gcc regno = 6)
476 6 for %esi (gcc regno = 4)
477 7 for %edi (gcc regno = 5)
478 The following three DWARF register numbers are never generated by
479 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
480 believes these numbers have these meanings.
481 8 for %eip (no gcc equivalent)
482 9 for %eflags (gcc regno = 17)
483 10 for %trapno (no gcc equivalent)
484 It is not at all clear how we should number the FP stack registers
485 for the x86 architecture. If the version of SDB on x86/svr4 were
486 a bit less brain dead with respect to floating-point then we would
487 have a precedent to follow with respect to DWARF register numbers
488 for x86 FP registers, but the SDB on x86/svr4 is so completely
489 broken with respect to FP registers that it is hardly worth thinking
490 of it as something to strive for compatibility with.
491 The version of x86/svr4 SDB I have at the moment does (partially)
492 seem to believe that DWARF register number 11 is associated with
493 the x86 register %st(0), but that's about all. Higher DWARF
494 register numbers don't seem to be associated with anything in
495 particular, and even for DWARF regno 11, SDB only seems to under-
496 stand that it should say that a variable lives in %st(0) (when
497 asked via an `=' command) if we said it was in DWARF regno 11,
498 but SDB still prints garbage when asked for the value of the
499 variable in question (via a `/' command).
500 (Also note that the labels SDB prints for various FP stack regs
501 when doing an `x' command are all wrong.)
502 Note that these problems generally don't affect the native SVR4
503 C compiler because it doesn't allow the use of -O with -g and
504 because when it is *not* optimizing, it allocates a memory
505 location for each floating-point variable, and the memory
506 location is what gets described in the DWARF AT_location
507 attribute for the variable in question.
508 Regardless of the severe mental illness of the x86/svr4 SDB, we
509 do something sensible here and we use the following DWARF
510 register numbers. Note that these are all stack-top-relative
511 numbers.
512 11 for %st(0) (gcc regno = 8)
513 12 for %st(1) (gcc regno = 9)
514 13 for %st(2) (gcc regno = 10)
515 14 for %st(3) (gcc regno = 11)
516 15 for %st(4) (gcc regno = 12)
517 16 for %st(5) (gcc regno = 13)
518 17 for %st(6) (gcc regno = 14)
519 18 for %st(7) (gcc regno = 15)
520*/
0f290768 521int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
522{
523 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
524 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
3f3f2124 525 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
526 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
527 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
3f3f2124
JH
528 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
529 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
83774849
RH
530};
531
c572e5ba
JVA
532/* Test and compare insns in i386.md store the information needed to
533 generate branch and scc insns here. */
534
07933f72
GS
535rtx ix86_compare_op0 = NULL_RTX;
536rtx ix86_compare_op1 = NULL_RTX;
f5316dfe 537
7a2e09f4 538#define MAX_386_STACK_LOCALS 3
8362f420
JH
539/* Size of the register save area. */
540#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
36edd3cc
BS
541
542/* Define the structure for the machine field in struct function. */
543struct machine_function
544{
545 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
8362f420 546 int save_varrargs_registers;
6fca22eb 547 int accesses_prev_frame;
36edd3cc
BS
548};
549
01d939e8 550#define ix86_stack_locals (cfun->machine->stack_locals)
8362f420 551#define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
36edd3cc 552
4dd2ac2c
JH
553/* Structure describing stack frame layout.
554 Stack grows downward:
555
556 [arguments]
557 <- ARG_POINTER
558 saved pc
559
560 saved frame pointer if frame_pointer_needed
561 <- HARD_FRAME_POINTER
562 [saved regs]
563
564 [padding1] \
565 )
566 [va_arg registers] (
567 > to_allocate <- FRAME_POINTER
568 [frame] (
569 )
570 [padding2] /
571 */
572struct ix86_frame
573{
574 int nregs;
575 int padding1;
8362f420 576 int va_arg_size;
4dd2ac2c
JH
577 HOST_WIDE_INT frame;
578 int padding2;
579 int outgoing_arguments_size;
8362f420 580 int red_zone_size;
4dd2ac2c
JH
581
582 HOST_WIDE_INT to_allocate;
583 /* The offsets relative to ARG_POINTER. */
584 HOST_WIDE_INT frame_pointer_offset;
585 HOST_WIDE_INT hard_frame_pointer_offset;
586 HOST_WIDE_INT stack_pointer_offset;
587};
588
c93e80a5
JH
589/* Used to enable/disable debugging features. */
590const char *ix86_debug_arg_string, *ix86_debug_addr_string;
6189a572
JH
591/* Code model option as passed by user. */
592const char *ix86_cmodel_string;
593/* Parsed value. */
594enum cmodel ix86_cmodel;
80f33d06
GS
595/* Asm dialect. */
596const char *ix86_asm_string;
597enum asm_dialect ix86_asm_dialect = ASM_ATT;
6189a572 598
c8c5cb99 599/* which cpu are we scheduling for */
e42ea7f9 600enum processor_type ix86_cpu;
c8c5cb99 601
965f5423
JH
602/* which unit we are generating floating point math for */
603enum fpmath_unit ix86_fpmath;
604
c8c5cb99 605/* which instruction set architecture to use. */
c942177e 606int ix86_arch;
c8c5cb99
SC
607
608/* Strings to hold which cpu and instruction set architecture to use. */
9c23aa47
ZW
609const char *ix86_cpu_string; /* for -mcpu=<xxx> */
610const char *ix86_arch_string; /* for -march=<xxx> */
965f5423 611const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
c8c5cb99 612
0f290768 613/* # of registers to use to pass arguments. */
e075ae69 614const char *ix86_regparm_string;
e9a25f70 615
f4365627
JH
616/* true if sse prefetch instruction is not NOOP. */
617int x86_prefetch_sse;
618
e075ae69
RH
619/* ix86_regparm_string as a number */
620int ix86_regparm;
e9a25f70
JL
621
622/* Alignment to use for loops and jumps: */
623
0f290768 624/* Power of two alignment for loops. */
e075ae69 625const char *ix86_align_loops_string;
e9a25f70 626
0f290768 627/* Power of two alignment for non-loop jumps. */
e075ae69 628const char *ix86_align_jumps_string;
e9a25f70 629
3af4bd89 630/* Power of two alignment for stack boundary in bytes. */
e075ae69 631const char *ix86_preferred_stack_boundary_string;
3af4bd89
JH
632
633/* Preferred alignment for stack boundary in bits. */
e075ae69 634int ix86_preferred_stack_boundary;
3af4bd89 635
e9a25f70 636/* Values 1-5: see jump.c */
e075ae69
RH
637int ix86_branch_cost;
638const char *ix86_branch_cost_string;
e9a25f70 639
0f290768 640/* Power of two alignment for functions. */
e075ae69 641const char *ix86_align_funcs_string;
623fe810
RH
642
643/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
644static char internal_label_prefix[16];
645static int internal_label_prefix_len;
e075ae69 646\f
623fe810 647static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
f6da8bc3
KG
648static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
649static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
e075ae69 650 int, int, FILE *));
f6da8bc3 651static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
3a3677ff
RH
652static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
653 rtx *, rtx *));
f6da8bc3
KG
654static rtx gen_push PARAMS ((rtx));
655static int memory_address_length PARAMS ((rtx addr));
656static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
657static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
658static int ix86_safe_length PARAMS ((rtx));
659static enum attr_memory ix86_safe_memory PARAMS ((rtx));
660static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
661static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
662static void ix86_dump_ppro_packet PARAMS ((FILE *));
663static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
664static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
e075ae69 665 rtx));
f6da8bc3
KG
666static void ix86_init_machine_status PARAMS ((struct function *));
667static void ix86_mark_machine_status PARAMS ((struct function *));
37b15744 668static void ix86_free_machine_status PARAMS ((struct function *));
2b589241 669static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
f6da8bc3 670static int ix86_safe_length_prefix PARAMS ((rtx));
b531087a
KH
671static int ix86_nsaved_regs PARAMS ((void));
672static void ix86_emit_save_regs PARAMS ((void));
c6036a37 673static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
37a58036 674static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
0e4970d7 675static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
b531087a
KH
676static void ix86_sched_reorder_pentium PARAMS ((rtx *, rtx *));
677static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
55efb413 678static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
0945b39d 679static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
0945b39d
JH
680static rtx ix86_expand_aligntest PARAMS ((rtx, int));
681static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
c237e94a
ZW
682static int ix86_issue_rate PARAMS ((void));
683static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
684static void ix86_sched_init PARAMS ((FILE *, int, int));
685static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
686static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
e37af218 687static void ix86_init_mmx_sse_builtins PARAMS ((void));
e075ae69
RH
688
689struct ix86_address
690{
691 rtx base, index, disp;
692 HOST_WIDE_INT scale;
693};
b08de47e 694
e075ae69 695static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
bd793c65
BS
696
697struct builtin_description;
8b60264b
KG
698static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
699 tree, rtx));
700static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
701 tree, rtx));
bd793c65
BS
702static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
703static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
704static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
e37af218
RH
705static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code,
706 tree, rtx));
707static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
bd793c65 708static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
c0c102a9
JH
709static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
710static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
711 enum rtx_code *,
712 enum rtx_code *,
713 enum rtx_code *));
9e7adcb3
JH
714static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
715 rtx *, rtx *));
716static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
717static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
718static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
719static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
37a58036 720static int ix86_save_reg PARAMS ((int, int));
4dd2ac2c 721static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
8d8e52be 722static int ix86_comp_type_attributes PARAMS ((tree, tree));
91d231cb
JM
723const struct attribute_spec ix86_attribute_table[];
724static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
725static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
7c262518 726
2cc07db4
RH
727#ifdef DO_GLOBAL_CTORS_BODY
728static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
729#endif
e56feed6 730
53c17031
JH
731/* Register class used for passing given 64bit part of the argument.
732 These represent classes as documented by the PS ABI, with the exception
733 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
734 use SF or DFmode move instead of DImode to avoid reformating penalties.
735
736 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
737 whenever possible (upper half does contain padding).
738 */
739enum x86_64_reg_class
740 {
741 X86_64_NO_CLASS,
742 X86_64_INTEGER_CLASS,
743 X86_64_INTEGERSI_CLASS,
744 X86_64_SSE_CLASS,
745 X86_64_SSESF_CLASS,
746 X86_64_SSEDF_CLASS,
747 X86_64_SSEUP_CLASS,
748 X86_64_X87_CLASS,
749 X86_64_X87UP_CLASS,
750 X86_64_MEMORY_CLASS
751 };
0b5826ac 752static const char * const x86_64_reg_class_name[] =
53c17031
JH
753 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
754
755#define MAX_CLASSES 4
756static int classify_argument PARAMS ((enum machine_mode, tree,
757 enum x86_64_reg_class [MAX_CLASSES],
758 int));
759static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
760 int *));
761static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
07933f72 762 const int *, int));
53c17031
JH
763static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
764 enum x86_64_reg_class));
672a6f42
NB
765\f
766/* Initialize the GCC target structure. */
91d231cb
JM
767#undef TARGET_ATTRIBUTE_TABLE
768#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
672a6f42 769#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
f5f4be42
NB
770# undef TARGET_MERGE_DECL_ATTRIBUTES
771# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
672a6f42
NB
772#endif
773
8d8e52be
JM
774#undef TARGET_COMP_TYPE_ATTRIBUTES
775#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
776
f6155fda
SS
777#undef TARGET_INIT_BUILTINS
778#define TARGET_INIT_BUILTINS ix86_init_builtins
779
780#undef TARGET_EXPAND_BUILTIN
781#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
782
08c148a8
NB
783#if defined (OSF_OS) || defined (TARGET_OSF1ELF)
784 static void ix86_osf_output_function_prologue PARAMS ((FILE *,
785 HOST_WIDE_INT));
786# undef TARGET_ASM_FUNCTION_PROLOGUE
787# define TARGET_ASM_FUNCTION_PROLOGUE ix86_osf_output_function_prologue
788#endif
789
17b53c33
NB
790#undef TARGET_ASM_OPEN_PAREN
791#define TARGET_ASM_OPEN_PAREN ""
792#undef TARGET_ASM_CLOSE_PAREN
793#define TARGET_ASM_CLOSE_PAREN ""
794
301d03af
RS
795#undef TARGET_ASM_ALIGNED_HI_OP
796#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
797#undef TARGET_ASM_ALIGNED_SI_OP
798#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
799#ifdef ASM_QUAD
800#undef TARGET_ASM_ALIGNED_DI_OP
801#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
802#endif
803
804#undef TARGET_ASM_UNALIGNED_HI_OP
805#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
806#undef TARGET_ASM_UNALIGNED_SI_OP
807#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
808#undef TARGET_ASM_UNALIGNED_DI_OP
809#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
810
c237e94a
ZW
811#undef TARGET_SCHED_ADJUST_COST
812#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
813#undef TARGET_SCHED_ISSUE_RATE
814#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
815#undef TARGET_SCHED_VARIABLE_ISSUE
816#define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
817#undef TARGET_SCHED_INIT
818#define TARGET_SCHED_INIT ix86_sched_init
819#undef TARGET_SCHED_REORDER
820#define TARGET_SCHED_REORDER ix86_sched_reorder
821
f6897b10 822struct gcc_target targetm = TARGET_INITIALIZER;
e075ae69 823\f
f5316dfe
MM
824/* Sometimes certain combinations of command options do not make
825 sense on a particular target machine. You can define a macro
826 `OVERRIDE_OPTIONS' to take account of this. This macro, if
827 defined, is executed once just after all the command options have
828 been parsed.
829
830 Don't use this macro to turn on various extra optimizations for
831 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
832
833void
834override_options ()
835{
400500c4 836 int i;
e075ae69
RH
837 /* Comes from final.c -- no real reason to change it. */
838#define MAX_CODE_ALIGN 16
f5316dfe 839
c8c5cb99
SC
840 static struct ptt
841 {
8b60264b
KG
842 const struct processor_costs *cost; /* Processor costs */
843 const int target_enable; /* Target flags to enable. */
844 const int target_disable; /* Target flags to disable. */
845 const int align_loop; /* Default alignments. */
2cca7283 846 const int align_loop_max_skip;
8b60264b 847 const int align_jump;
2cca7283 848 const int align_jump_max_skip;
8b60264b
KG
849 const int align_func;
850 const int branch_cost;
e075ae69 851 }
0f290768 852 const processor_target_table[PROCESSOR_max] =
e075ae69 853 {
2cca7283
JH
854 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
855 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
856 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
857 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
858 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
859 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
860 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
e075ae69
RH
861 };
862
f4365627 863 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
e075ae69
RH
864 static struct pta
865 {
8b60264b
KG
866 const char *const name; /* processor name or nickname. */
867 const enum processor_type processor;
0dd0e980
JH
868 const enum pta_flags
869 {
870 PTA_SSE = 1,
871 PTA_SSE2 = 2,
872 PTA_MMX = 4,
f4365627 873 PTA_PREFETCH_SSE = 8,
0dd0e980
JH
874 PTA_3DNOW = 16,
875 PTA_3DNOW_A = 64
876 } flags;
e075ae69 877 }
0f290768 878 const processor_alias_table[] =
e075ae69 879 {
0dd0e980
JH
880 {"i386", PROCESSOR_I386, 0},
881 {"i486", PROCESSOR_I486, 0},
882 {"i586", PROCESSOR_PENTIUM, 0},
883 {"pentium", PROCESSOR_PENTIUM, 0},
884 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
885 {"i686", PROCESSOR_PENTIUMPRO, 0},
886 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
887 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
f4365627 888 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
0dd0e980 889 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
f4365627 890 PTA_MMX | PTA_PREFETCH_SSE},
0dd0e980
JH
891 {"k6", PROCESSOR_K6, PTA_MMX},
892 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
893 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
f4365627 894 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 895 | PTA_3DNOW_A},
f4365627 896 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
0dd0e980 897 | PTA_3DNOW | PTA_3DNOW_A},
f4365627 898 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 899 | PTA_3DNOW_A | PTA_SSE},
f4365627 900 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 901 | PTA_3DNOW_A | PTA_SSE},
f4365627 902 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 903 | PTA_3DNOW_A | PTA_SSE},
3af4bd89 904 };
c8c5cb99 905
0f290768 906 int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta);
c8c5cb99 907
f5316dfe
MM
908#ifdef SUBTARGET_OVERRIDE_OPTIONS
909 SUBTARGET_OVERRIDE_OPTIONS;
910#endif
911
f4365627
JH
912 if (!ix86_cpu_string && ix86_arch_string)
913 ix86_cpu_string = ix86_arch_string;
914 if (!ix86_cpu_string)
915 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
916 if (!ix86_arch_string)
917 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
e075ae69 918
6189a572
JH
919 if (ix86_cmodel_string != 0)
920 {
921 if (!strcmp (ix86_cmodel_string, "small"))
922 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
923 else if (flag_pic)
c725bd79 924 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
6189a572
JH
925 else if (!strcmp (ix86_cmodel_string, "32"))
926 ix86_cmodel = CM_32;
927 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
928 ix86_cmodel = CM_KERNEL;
929 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
930 ix86_cmodel = CM_MEDIUM;
931 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
932 ix86_cmodel = CM_LARGE;
933 else
934 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
935 }
936 else
937 {
938 ix86_cmodel = CM_32;
939 if (TARGET_64BIT)
940 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
941 }
c93e80a5
JH
942 if (ix86_asm_string != 0)
943 {
944 if (!strcmp (ix86_asm_string, "intel"))
945 ix86_asm_dialect = ASM_INTEL;
946 else if (!strcmp (ix86_asm_string, "att"))
947 ix86_asm_dialect = ASM_ATT;
948 else
949 error ("bad value (%s) for -masm= switch", ix86_asm_string);
950 }
6189a572 951 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
c725bd79 952 error ("code model `%s' not supported in the %s bit mode",
6189a572
JH
953 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
954 if (ix86_cmodel == CM_LARGE)
c725bd79 955 sorry ("code model `large' not supported yet");
0c2dc519 956 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
c725bd79 957 sorry ("%i-bit mode not compiled in",
0c2dc519 958 (target_flags & MASK_64BIT) ? 64 : 32);
6189a572 959
f4365627
JH
960 for (i = 0; i < pta_size; i++)
961 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
962 {
963 ix86_arch = processor_alias_table[i].processor;
964 /* Default cpu tuning to the architecture. */
965 ix86_cpu = ix86_arch;
966 if (processor_alias_table[i].flags & PTA_MMX
967 && !(target_flags & MASK_MMX_SET))
968 target_flags |= MASK_MMX;
969 if (processor_alias_table[i].flags & PTA_3DNOW
970 && !(target_flags & MASK_3DNOW_SET))
971 target_flags |= MASK_3DNOW;
972 if (processor_alias_table[i].flags & PTA_3DNOW_A
973 && !(target_flags & MASK_3DNOW_A_SET))
974 target_flags |= MASK_3DNOW_A;
975 if (processor_alias_table[i].flags & PTA_SSE
976 && !(target_flags & MASK_SSE_SET))
977 target_flags |= MASK_SSE;
978 if (processor_alias_table[i].flags & PTA_SSE2
979 && !(target_flags & MASK_SSE2_SET))
980 target_flags |= MASK_SSE2;
981 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
982 x86_prefetch_sse = true;
983 break;
984 }
400500c4 985
f4365627
JH
986 if (i == pta_size)
987 error ("bad value (%s) for -march= switch", ix86_arch_string);
e075ae69 988
f4365627
JH
989 for (i = 0; i < pta_size; i++)
990 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
991 {
992 ix86_cpu = processor_alias_table[i].processor;
993 break;
994 }
995 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
996 x86_prefetch_sse = true;
997 if (i == pta_size)
998 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
e075ae69 999
2ab0437e
JH
1000 if (optimize_size)
1001 ix86_cost = &size_cost;
1002 else
1003 ix86_cost = processor_target_table[ix86_cpu].cost;
e075ae69
RH
1004 target_flags |= processor_target_table[ix86_cpu].target_enable;
1005 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1006
36edd3cc
BS
1007 /* Arrange to set up i386_stack_locals for all functions. */
1008 init_machine_status = ix86_init_machine_status;
1526a060 1009 mark_machine_status = ix86_mark_machine_status;
37b15744 1010 free_machine_status = ix86_free_machine_status;
36edd3cc 1011
0f290768 1012 /* Validate -mregparm= value. */
e075ae69 1013 if (ix86_regparm_string)
b08de47e 1014 {
400500c4
RK
1015 i = atoi (ix86_regparm_string);
1016 if (i < 0 || i > REGPARM_MAX)
1017 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1018 else
1019 ix86_regparm = i;
b08de47e 1020 }
0d7d98ee
JH
1021 else
1022 if (TARGET_64BIT)
1023 ix86_regparm = REGPARM_MAX;
b08de47e 1024
3e18fdf6 1025 /* If the user has provided any of the -malign-* options,
a4f31c00 1026 warn and use that value only if -falign-* is not set.
3e18fdf6 1027 Remove this code in GCC 3.2 or later. */
e075ae69 1028 if (ix86_align_loops_string)
b08de47e 1029 {
3e18fdf6
GK
1030 warning ("-malign-loops is obsolete, use -falign-loops");
1031 if (align_loops == 0)
1032 {
1033 i = atoi (ix86_align_loops_string);
1034 if (i < 0 || i > MAX_CODE_ALIGN)
1035 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1036 else
1037 align_loops = 1 << i;
1038 }
b08de47e 1039 }
3af4bd89 1040
e075ae69 1041 if (ix86_align_jumps_string)
b08de47e 1042 {
3e18fdf6
GK
1043 warning ("-malign-jumps is obsolete, use -falign-jumps");
1044 if (align_jumps == 0)
1045 {
1046 i = atoi (ix86_align_jumps_string);
1047 if (i < 0 || i > MAX_CODE_ALIGN)
1048 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1049 else
1050 align_jumps = 1 << i;
1051 }
b08de47e 1052 }
b08de47e 1053
e075ae69 1054 if (ix86_align_funcs_string)
b08de47e 1055 {
3e18fdf6
GK
1056 warning ("-malign-functions is obsolete, use -falign-functions");
1057 if (align_functions == 0)
1058 {
1059 i = atoi (ix86_align_funcs_string);
1060 if (i < 0 || i > MAX_CODE_ALIGN)
1061 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1062 else
1063 align_functions = 1 << i;
1064 }
b08de47e 1065 }
3af4bd89 1066
3e18fdf6 1067 /* Default align_* from the processor table. */
3e18fdf6 1068 if (align_loops == 0)
2cca7283
JH
1069 {
1070 align_loops = processor_target_table[ix86_cpu].align_loop;
1071 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1072 }
3e18fdf6 1073 if (align_jumps == 0)
2cca7283
JH
1074 {
1075 align_jumps = processor_target_table[ix86_cpu].align_jump;
1076 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1077 }
3e18fdf6 1078 if (align_functions == 0)
2cca7283
JH
1079 {
1080 align_functions = processor_target_table[ix86_cpu].align_func;
1081 }
3e18fdf6 1082
e4c0478d 1083 /* Validate -mpreferred-stack-boundary= value, or provide default.
fbb83b43
AO
1084 The default of 128 bits is for Pentium III's SSE __m128, but we
1085 don't want additional code to keep the stack aligned when
1086 optimizing for code size. */
1087 ix86_preferred_stack_boundary = (optimize_size
1088 ? TARGET_64BIT ? 64 : 32
1089 : 128);
e075ae69 1090 if (ix86_preferred_stack_boundary_string)
3af4bd89 1091 {
400500c4 1092 i = atoi (ix86_preferred_stack_boundary_string);
c6257c5d
AO
1093 if (i < (TARGET_64BIT ? 3 : 2) || i > 12)
1094 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
0d7d98ee 1095 TARGET_64BIT ? 3 : 2);
400500c4
RK
1096 else
1097 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3af4bd89 1098 }
77a989d1 1099
0f290768 1100 /* Validate -mbranch-cost= value, or provide default. */
e075ae69
RH
1101 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1102 if (ix86_branch_cost_string)
804a8ee0 1103 {
400500c4
RK
1104 i = atoi (ix86_branch_cost_string);
1105 if (i < 0 || i > 5)
1106 error ("-mbranch-cost=%d is not between 0 and 5", i);
1107 else
1108 ix86_branch_cost = i;
804a8ee0 1109 }
804a8ee0 1110
e9a25f70
JL
1111 /* Keep nonleaf frame pointers. */
1112 if (TARGET_OMIT_LEAF_FRAME_POINTER)
77a989d1 1113 flag_omit_frame_pointer = 1;
e075ae69
RH
1114
1115 /* If we're doing fast math, we don't care about comparison order
1116 wrt NaNs. This lets us use a shorter comparison sequence. */
de6c5979 1117 if (flag_unsafe_math_optimizations)
e075ae69
RH
1118 target_flags &= ~MASK_IEEE_FP;
1119
14f73b5a
JH
1120 if (TARGET_64BIT)
1121 {
1122 if (TARGET_ALIGN_DOUBLE)
c725bd79 1123 error ("-malign-double makes no sense in the 64bit mode");
14f73b5a 1124 if (TARGET_RTD)
c725bd79 1125 error ("-mrtd calling convention not supported in the 64bit mode");
14f73b5a 1126 /* Enable by default the SSE and MMX builtins. */
965f5423
JH
1127 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1128 ix86_fpmath = FPMATH_SSE;
14f73b5a 1129 }
965f5423
JH
1130 else
1131 ix86_fpmath = FPMATH_387;
1132
1133 if (ix86_fpmath_string != 0)
1134 {
1135 if (! strcmp (ix86_fpmath_string, "387"))
1136 ix86_fpmath = FPMATH_387;
1137 else if (! strcmp (ix86_fpmath_string, "sse"))
1138 {
1139 if (!TARGET_SSE)
1140 {
1141 warning ("SSE instruction set disabled, using 387 arithmetics");
1142 ix86_fpmath = FPMATH_387;
1143 }
1144 else
1145 ix86_fpmath = FPMATH_SSE;
1146 }
1147 else if (! strcmp (ix86_fpmath_string, "387,sse")
1148 || ! strcmp (ix86_fpmath_string, "sse,387"))
1149 {
1150 if (!TARGET_SSE)
1151 {
1152 warning ("SSE instruction set disabled, using 387 arithmetics");
1153 ix86_fpmath = FPMATH_387;
1154 }
1155 else if (!TARGET_80387)
1156 {
1157 warning ("387 instruction set disabled, using SSE arithmetics");
1158 ix86_fpmath = FPMATH_SSE;
1159 }
1160 else
1161 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1162 }
1163 else
1164 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1165 }
14f73b5a 1166
a7180f70
BS
1167 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1168 on by -msse. */
1169 if (TARGET_SSE)
e37af218
RH
1170 {
1171 target_flags |= MASK_MMX;
1172 x86_prefetch_sse = true;
1173 }
c6036a37 1174
47f339cf
BS
1175 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1176 if (TARGET_3DNOW)
1177 {
1178 target_flags |= MASK_MMX;
1179 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1180 extensions it adds. */
1181 if (x86_3dnow_a & (1 << ix86_arch))
1182 target_flags |= MASK_3DNOW_A;
1183 }
c6036a37 1184 if ((x86_accumulate_outgoing_args & CPUMASK)
0dd0e980 1185 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS_SET)
c6036a37
JH
1186 && !optimize_size)
1187 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
623fe810
RH
1188
1189 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1190 {
1191 char *p;
1192 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1193 p = strchr (internal_label_prefix, 'X');
1194 internal_label_prefix_len = p - internal_label_prefix;
1195 *p = '\0';
1196 }
f5316dfe
MM
1197}
1198\f
32b5b1aa 1199void
c6aded7c 1200optimization_options (level, size)
32b5b1aa 1201 int level;
bb5177ac 1202 int size ATTRIBUTE_UNUSED;
32b5b1aa 1203{
e9a25f70
JL
1204 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1205 make the problem with not enough registers even worse. */
32b5b1aa
SC
1206#ifdef INSN_SCHEDULING
1207 if (level > 1)
1208 flag_schedule_insns = 0;
1209#endif
53c17031
JH
1210 if (TARGET_64BIT && optimize >= 1)
1211 flag_omit_frame_pointer = 1;
1212 if (TARGET_64BIT)
b932f770
JH
1213 {
1214 flag_pcc_struct_return = 0;
1215 flag_asynchronous_unwind_tables = 1;
1216 }
32b5b1aa 1217}
b08de47e 1218\f
91d231cb
JM
1219/* Table of valid machine attributes. */
1220const struct attribute_spec ix86_attribute_table[] =
b08de47e 1221{
91d231cb 1222 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
b08de47e
MM
1223 /* Stdcall attribute says callee is responsible for popping arguments
1224 if they are not variable. */
91d231cb
JM
1225 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1226 /* Cdecl attribute says the callee is a normal C declaration */
1227 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
b08de47e 1228 /* Regparm attribute specifies how many integer arguments are to be
0f290768 1229 passed in registers. */
91d231cb
JM
1230 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1231#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
3da1eb0b
DS
1232 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1233 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1234 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
91d231cb
JM
1235#endif
1236 { NULL, 0, 0, false, false, false, NULL }
1237};
1238
1239/* Handle a "cdecl" or "stdcall" attribute;
1240 arguments as in struct attribute_spec.handler. */
1241static tree
1242ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1243 tree *node;
1244 tree name;
1245 tree args ATTRIBUTE_UNUSED;
1246 int flags ATTRIBUTE_UNUSED;
1247 bool *no_add_attrs;
1248{
1249 if (TREE_CODE (*node) != FUNCTION_TYPE
1250 && TREE_CODE (*node) != METHOD_TYPE
1251 && TREE_CODE (*node) != FIELD_DECL
1252 && TREE_CODE (*node) != TYPE_DECL)
b08de47e 1253 {
91d231cb
JM
1254 warning ("`%s' attribute only applies to functions",
1255 IDENTIFIER_POINTER (name));
1256 *no_add_attrs = true;
1257 }
b08de47e 1258
91d231cb
JM
1259 if (TARGET_64BIT)
1260 {
1261 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1262 *no_add_attrs = true;
1263 }
b08de47e 1264
91d231cb
JM
1265 return NULL_TREE;
1266}
b08de47e 1267
91d231cb
JM
1268/* Handle a "regparm" attribute;
1269 arguments as in struct attribute_spec.handler. */
1270static tree
1271ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1272 tree *node;
1273 tree name;
1274 tree args;
1275 int flags ATTRIBUTE_UNUSED;
1276 bool *no_add_attrs;
1277{
1278 if (TREE_CODE (*node) != FUNCTION_TYPE
1279 && TREE_CODE (*node) != METHOD_TYPE
1280 && TREE_CODE (*node) != FIELD_DECL
1281 && TREE_CODE (*node) != TYPE_DECL)
1282 {
1283 warning ("`%s' attribute only applies to functions",
1284 IDENTIFIER_POINTER (name));
1285 *no_add_attrs = true;
1286 }
1287 else
1288 {
1289 tree cst;
b08de47e 1290
91d231cb
JM
1291 cst = TREE_VALUE (args);
1292 if (TREE_CODE (cst) != INTEGER_CST)
1293 {
1294 warning ("`%s' attribute requires an integer constant argument",
1295 IDENTIFIER_POINTER (name));
1296 *no_add_attrs = true;
1297 }
1298 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1299 {
1300 warning ("argument to `%s' attribute larger than %d",
1301 IDENTIFIER_POINTER (name), REGPARM_MAX);
1302 *no_add_attrs = true;
1303 }
b08de47e
MM
1304 }
1305
91d231cb 1306 return NULL_TREE;
b08de47e
MM
1307}
1308
08c148a8
NB
1309#if defined (OSF_OS) || defined (TARGET_OSF1ELF)
1310
1311/* Generate the assembly code for function entry. FILE is a stdio
1312 stream to output the code to. SIZE is an int: how many units of
1313 temporary storage to allocate.
1314
1315 Refer to the array `regs_ever_live' to determine which registers to
1316 save; `regs_ever_live[I]' is nonzero if register number I is ever
1317 used in the function. This function is responsible for knowing
1318 which registers should not be saved even if used.
1319
1320 We override it here to allow for the new profiling code to go before
1321 the prologue and the old mcount code to go after the prologue (and
1322 after %ebx has been set up for ELF shared library support). */
1323
1324static void
1325ix86_osf_output_function_prologue (file, size)
1326 FILE *file;
1327 HOST_WIDE_INT size;
1328{
5f37d07c
KG
1329 const char *prefix = "";
1330 const char *const lprefix = LPREFIX;
08c148a8
NB
1331 int labelno = profile_label_no;
1332
1333#ifdef OSF_OS
1334
1335 if (TARGET_UNDERSCORES)
1336 prefix = "_";
1337
70f4f91c 1338 if (current_function_profile && OSF_PROFILE_BEFORE_PROLOGUE)
08c148a8
NB
1339 {
1340 if (!flag_pic && !HALF_PIC_P ())
1341 {
1342 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1343 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1344 }
1345
1346 else if (HALF_PIC_P ())
1347 {
1348 rtx symref;
1349
1350 HALF_PIC_EXTERNAL ("_mcount_ptr");
1351 symref = HALF_PIC_PTR (gen_rtx_SYMBOL_REF (Pmode,
1352 "_mcount_ptr"));
1353
1354 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1355 fprintf (file, "\tmovl %s%s,%%eax\n", prefix,
1356 XSTR (symref, 0));
1357 fprintf (file, "\tcall *(%%eax)\n");
1358 }
1359
1360 else
1361 {
1362 static int call_no = 0;
1363
1364 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1365 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1366 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1367 lprefix, call_no++);
1368 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1369 lprefix, labelno);
1370 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1371 prefix);
1372 fprintf (file, "\tcall *(%%eax)\n");
1373 }
1374 }
1375
1376#else /* !OSF_OS */
1377
70f4f91c 1378 if (current_function_profile && OSF_PROFILE_BEFORE_PROLOGUE)
08c148a8
NB
1379 {
1380 if (!flag_pic)
1381 {
1382 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1383 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1384 }
1385
1386 else
1387 {
1388 static int call_no = 0;
1389
1390 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1391 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1392 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1393 lprefix, call_no++);
1394 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1395 lprefix, labelno);
1396 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1397 prefix);
1398 fprintf (file, "\tcall *(%%eax)\n");
1399 }
1400 }
1401#endif /* !OSF_OS */
1402
1403 function_prologue (file, size);
1404}
1405
1406#endif /* OSF_OS || TARGET_OSF1ELF */
1407
b08de47e
MM
1408/* Return 0 if the attributes for two types are incompatible, 1 if they
1409 are compatible, and 2 if they are nearly compatible (which causes a
1410 warning to be generated). */
1411
8d8e52be 1412static int
e075ae69 1413ix86_comp_type_attributes (type1, type2)
afcfe58c
MM
1414 tree type1;
1415 tree type2;
b08de47e 1416{
0f290768 1417 /* Check for mismatch of non-default calling convention. */
27c38fbe 1418 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
afcfe58c
MM
1419
1420 if (TREE_CODE (type1) != FUNCTION_TYPE)
1421 return 1;
1422
1423 /* Check for mismatched return types (cdecl vs stdcall). */
6093f019
RH
1424 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1425 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
afcfe58c 1426 return 0;
b08de47e
MM
1427 return 1;
1428}
b08de47e
MM
1429\f
1430/* Value is the number of bytes of arguments automatically
1431 popped when returning from a subroutine call.
1432 FUNDECL is the declaration node of the function (as a tree),
1433 FUNTYPE is the data type of the function (as a tree),
1434 or for a library call it is an identifier node for the subroutine name.
1435 SIZE is the number of bytes of arguments passed on the stack.
1436
1437 On the 80386, the RTD insn may be used to pop them if the number
1438 of args is fixed, but if the number is variable then the caller
1439 must pop them all. RTD can't be used for library calls now
1440 because the library is compiled with the Unix compiler.
1441 Use of RTD is a selectable option, since it is incompatible with
1442 standard Unix calling sequences. If the option is not selected,
1443 the caller must always pop the args.
1444
1445 The attribute stdcall is equivalent to RTD on a per module basis. */
1446
1447int
e075ae69 1448ix86_return_pops_args (fundecl, funtype, size)
b08de47e
MM
1449 tree fundecl;
1450 tree funtype;
1451 int size;
79325812 1452{
3345ee7d 1453 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
b08de47e 1454
0f290768 1455 /* Cdecl functions override -mrtd, and never pop the stack. */
e9a25f70 1456 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
79325812 1457
0f290768 1458 /* Stdcall functions will pop the stack if not variable args. */
698cdd84
SC
1459 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1460 rtd = 1;
79325812 1461
698cdd84
SC
1462 if (rtd
1463 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
e9a25f70
JL
1464 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1465 == void_type_node)))
698cdd84
SC
1466 return size;
1467 }
79325812 1468
e9a25f70 1469 /* Lose any fake structure return argument. */
0d7d98ee
JH
1470 if (aggregate_value_p (TREE_TYPE (funtype))
1471 && !TARGET_64BIT)
698cdd84 1472 return GET_MODE_SIZE (Pmode);
79325812 1473
2614aac6 1474 return 0;
b08de47e 1475}
b08de47e
MM
1476\f
1477/* Argument support functions. */
1478
53c17031
JH
1479/* Return true when register may be used to pass function parameters. */
1480bool
1481ix86_function_arg_regno_p (regno)
1482 int regno;
1483{
1484 int i;
1485 if (!TARGET_64BIT)
0333394e
JJ
1486 return (regno < REGPARM_MAX
1487 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
53c17031
JH
1488 if (SSE_REGNO_P (regno) && TARGET_SSE)
1489 return true;
1490 /* RAX is used as hidden argument to va_arg functions. */
1491 if (!regno)
1492 return true;
1493 for (i = 0; i < REGPARM_MAX; i++)
1494 if (regno == x86_64_int_parameter_registers[i])
1495 return true;
1496 return false;
1497}
1498
b08de47e
MM
1499/* Initialize a variable CUM of type CUMULATIVE_ARGS
1500 for a call to a function whose data type is FNTYPE.
1501 For a library call, FNTYPE is 0. */
1502
1503void
1504init_cumulative_args (cum, fntype, libname)
e9a25f70 1505 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
b08de47e
MM
1506 tree fntype; /* tree ptr for function decl */
1507 rtx libname; /* SYMBOL_REF of library name or 0 */
1508{
1509 static CUMULATIVE_ARGS zero_cum;
1510 tree param, next_param;
1511
1512 if (TARGET_DEBUG_ARG)
1513 {
1514 fprintf (stderr, "\ninit_cumulative_args (");
1515 if (fntype)
e9a25f70
JL
1516 fprintf (stderr, "fntype code = %s, ret code = %s",
1517 tree_code_name[(int) TREE_CODE (fntype)],
1518 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
b08de47e
MM
1519 else
1520 fprintf (stderr, "no fntype");
1521
1522 if (libname)
1523 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1524 }
1525
1526 *cum = zero_cum;
1527
1528 /* Set up the number of registers to use for passing arguments. */
e075ae69 1529 cum->nregs = ix86_regparm;
53c17031
JH
1530 cum->sse_nregs = SSE_REGPARM_MAX;
1531 if (fntype && !TARGET_64BIT)
b08de47e
MM
1532 {
1533 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
e9a25f70 1534
b08de47e
MM
1535 if (attr)
1536 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1537 }
53c17031 1538 cum->maybe_vaarg = false;
b08de47e
MM
1539
1540 /* Determine if this function has variable arguments. This is
1541 indicated by the last argument being 'void_type_mode' if there
1542 are no variable arguments. If there are variable arguments, then
1543 we won't pass anything in registers */
1544
1545 if (cum->nregs)
1546 {
1547 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
e9a25f70 1548 param != 0; param = next_param)
b08de47e
MM
1549 {
1550 next_param = TREE_CHAIN (param);
e9a25f70 1551 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
53c17031
JH
1552 {
1553 if (!TARGET_64BIT)
1554 cum->nregs = 0;
1555 cum->maybe_vaarg = true;
1556 }
b08de47e
MM
1557 }
1558 }
53c17031
JH
1559 if ((!fntype && !libname)
1560 || (fntype && !TYPE_ARG_TYPES (fntype)))
1561 cum->maybe_vaarg = 1;
b08de47e
MM
1562
1563 if (TARGET_DEBUG_ARG)
1564 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1565
1566 return;
1567}
1568
53c17031 1569/* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
f710504c 1570 of this code is to classify each 8bytes of incoming argument by the register
53c17031
JH
1571 class and assign registers accordingly. */
1572
1573/* Return the union class of CLASS1 and CLASS2.
1574 See the x86-64 PS ABI for details. */
1575
1576static enum x86_64_reg_class
1577merge_classes (class1, class2)
1578 enum x86_64_reg_class class1, class2;
1579{
1580 /* Rule #1: If both classes are equal, this is the resulting class. */
1581 if (class1 == class2)
1582 return class1;
1583
1584 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1585 the other class. */
1586 if (class1 == X86_64_NO_CLASS)
1587 return class2;
1588 if (class2 == X86_64_NO_CLASS)
1589 return class1;
1590
1591 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1592 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1593 return X86_64_MEMORY_CLASS;
1594
1595 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1596 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1597 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1598 return X86_64_INTEGERSI_CLASS;
1599 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1600 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1601 return X86_64_INTEGER_CLASS;
1602
1603 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1604 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1605 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1606 return X86_64_MEMORY_CLASS;
1607
1608 /* Rule #6: Otherwise class SSE is used. */
1609 return X86_64_SSE_CLASS;
1610}
1611
1612/* Classify the argument of type TYPE and mode MODE.
1613 CLASSES will be filled by the register class used to pass each word
1614 of the operand. The number of words is returned. In case the parameter
1615 should be passed in memory, 0 is returned. As a special case for zero
1616 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1617
1618 BIT_OFFSET is used internally for handling records and specifies offset
1619 of the offset in bits modulo 256 to avoid overflow cases.
1620
1621 See the x86-64 PS ABI for details.
1622*/
1623
1624static int
1625classify_argument (mode, type, classes, bit_offset)
1626 enum machine_mode mode;
1627 tree type;
1628 enum x86_64_reg_class classes[MAX_CLASSES];
1629 int bit_offset;
1630{
1631 int bytes =
1632 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1633 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1634
1635 if (type && AGGREGATE_TYPE_P (type))
1636 {
1637 int i;
1638 tree field;
1639 enum x86_64_reg_class subclasses[MAX_CLASSES];
1640
1641 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1642 if (bytes > 16)
1643 return 0;
1644
1645 for (i = 0; i < words; i++)
1646 classes[i] = X86_64_NO_CLASS;
1647
1648 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1649 signalize memory class, so handle it as special case. */
1650 if (!words)
1651 {
1652 classes[0] = X86_64_NO_CLASS;
1653 return 1;
1654 }
1655
1656 /* Classify each field of record and merge classes. */
1657 if (TREE_CODE (type) == RECORD_TYPE)
1658 {
1659 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1660 {
1661 if (TREE_CODE (field) == FIELD_DECL)
1662 {
1663 int num;
1664
1665 /* Bitfields are always classified as integer. Handle them
1666 early, since later code would consider them to be
1667 misaligned integers. */
1668 if (DECL_BIT_FIELD (field))
1669 {
1670 for (i = int_bit_position (field) / 8 / 8;
1671 i < (int_bit_position (field)
1672 + tree_low_cst (DECL_SIZE (field), 0)
1673 + 63) / 8 / 8; i++)
1674 classes[i] =
1675 merge_classes (X86_64_INTEGER_CLASS,
1676 classes[i]);
1677 }
1678 else
1679 {
1680 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1681 TREE_TYPE (field), subclasses,
1682 (int_bit_position (field)
1683 + bit_offset) % 256);
1684 if (!num)
1685 return 0;
1686 for (i = 0; i < num; i++)
1687 {
1688 int pos =
1689 (int_bit_position (field) + bit_offset) / 8 / 8;
1690 classes[i + pos] =
1691 merge_classes (subclasses[i], classes[i + pos]);
1692 }
1693 }
1694 }
1695 }
1696 }
1697 /* Arrays are handled as small records. */
1698 else if (TREE_CODE (type) == ARRAY_TYPE)
1699 {
1700 int num;
1701 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1702 TREE_TYPE (type), subclasses, bit_offset);
1703 if (!num)
1704 return 0;
1705
1706 /* The partial classes are now full classes. */
1707 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1708 subclasses[0] = X86_64_SSE_CLASS;
1709 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1710 subclasses[0] = X86_64_INTEGER_CLASS;
1711
1712 for (i = 0; i < words; i++)
1713 classes[i] = subclasses[i % num];
1714 }
1715 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1716 else if (TREE_CODE (type) == UNION_TYPE)
1717 {
1718 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1719 {
1720 if (TREE_CODE (field) == FIELD_DECL)
1721 {
1722 int num;
1723 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1724 TREE_TYPE (field), subclasses,
1725 bit_offset);
1726 if (!num)
1727 return 0;
1728 for (i = 0; i < num; i++)
1729 classes[i] = merge_classes (subclasses[i], classes[i]);
1730 }
1731 }
1732 }
1733 else
1734 abort ();
1735
1736 /* Final merger cleanup. */
1737 for (i = 0; i < words; i++)
1738 {
1739 /* If one class is MEMORY, everything should be passed in
1740 memory. */
1741 if (classes[i] == X86_64_MEMORY_CLASS)
1742 return 0;
1743
d6a7951f 1744 /* The X86_64_SSEUP_CLASS should be always preceded by
53c17031
JH
1745 X86_64_SSE_CLASS. */
1746 if (classes[i] == X86_64_SSEUP_CLASS
1747 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1748 classes[i] = X86_64_SSE_CLASS;
1749
d6a7951f 1750 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
53c17031
JH
1751 if (classes[i] == X86_64_X87UP_CLASS
1752 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1753 classes[i] = X86_64_SSE_CLASS;
1754 }
1755 return words;
1756 }
1757
1758 /* Compute alignment needed. We align all types to natural boundaries with
1759 exception of XFmode that is aligned to 64bits. */
1760 if (mode != VOIDmode && mode != BLKmode)
1761 {
1762 int mode_alignment = GET_MODE_BITSIZE (mode);
1763
1764 if (mode == XFmode)
1765 mode_alignment = 128;
1766 else if (mode == XCmode)
1767 mode_alignment = 256;
f5143c46 1768 /* Misaligned fields are always returned in memory. */
53c17031
JH
1769 if (bit_offset % mode_alignment)
1770 return 0;
1771 }
1772
1773 /* Classification of atomic types. */
1774 switch (mode)
1775 {
1776 case DImode:
1777 case SImode:
1778 case HImode:
1779 case QImode:
1780 case CSImode:
1781 case CHImode:
1782 case CQImode:
1783 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1784 classes[0] = X86_64_INTEGERSI_CLASS;
1785 else
1786 classes[0] = X86_64_INTEGER_CLASS;
1787 return 1;
1788 case CDImode:
1789 case TImode:
1790 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1791 return 2;
1792 case CTImode:
1793 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1794 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1795 return 4;
1796 case SFmode:
1797 if (!(bit_offset % 64))
1798 classes[0] = X86_64_SSESF_CLASS;
1799 else
1800 classes[0] = X86_64_SSE_CLASS;
1801 return 1;
1802 case DFmode:
1803 classes[0] = X86_64_SSEDF_CLASS;
1804 return 1;
1805 case TFmode:
1806 classes[0] = X86_64_X87_CLASS;
1807 classes[1] = X86_64_X87UP_CLASS;
1808 return 2;
1809 case TCmode:
1810 classes[0] = X86_64_X87_CLASS;
1811 classes[1] = X86_64_X87UP_CLASS;
1812 classes[2] = X86_64_X87_CLASS;
1813 classes[3] = X86_64_X87UP_CLASS;
1814 return 4;
1815 case DCmode:
1816 classes[0] = X86_64_SSEDF_CLASS;
1817 classes[1] = X86_64_SSEDF_CLASS;
1818 return 2;
1819 case SCmode:
1820 classes[0] = X86_64_SSE_CLASS;
1821 return 1;
e95d6b23
JH
1822 case V4SFmode:
1823 case V4SImode:
1824 classes[0] = X86_64_SSE_CLASS;
1825 classes[1] = X86_64_SSEUP_CLASS;
1826 return 2;
1827 case V2SFmode:
1828 case V2SImode:
1829 case V4HImode:
1830 case V8QImode:
1831 classes[0] = X86_64_SSE_CLASS;
1832 return 1;
53c17031 1833 case BLKmode:
e95d6b23 1834 case VOIDmode:
53c17031
JH
1835 return 0;
1836 default:
1837 abort ();
1838 }
1839}
1840
1841/* Examine the argument and return set number of register required in each
f5143c46 1842 class. Return 0 iff parameter should be passed in memory. */
53c17031
JH
1843static int
1844examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1845 enum machine_mode mode;
1846 tree type;
1847 int *int_nregs, *sse_nregs;
1848 int in_return;
1849{
1850 enum x86_64_reg_class class[MAX_CLASSES];
1851 int n = classify_argument (mode, type, class, 0);
1852
1853 *int_nregs = 0;
1854 *sse_nregs = 0;
1855 if (!n)
1856 return 0;
1857 for (n--; n >= 0; n--)
1858 switch (class[n])
1859 {
1860 case X86_64_INTEGER_CLASS:
1861 case X86_64_INTEGERSI_CLASS:
1862 (*int_nregs)++;
1863 break;
1864 case X86_64_SSE_CLASS:
1865 case X86_64_SSESF_CLASS:
1866 case X86_64_SSEDF_CLASS:
1867 (*sse_nregs)++;
1868 break;
1869 case X86_64_NO_CLASS:
1870 case X86_64_SSEUP_CLASS:
1871 break;
1872 case X86_64_X87_CLASS:
1873 case X86_64_X87UP_CLASS:
1874 if (!in_return)
1875 return 0;
1876 break;
1877 case X86_64_MEMORY_CLASS:
1878 abort ();
1879 }
1880 return 1;
1881}
1882/* Construct container for the argument used by GCC interface. See
1883 FUNCTION_ARG for the detailed description. */
1884static rtx
1885construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1886 enum machine_mode mode;
1887 tree type;
1888 int in_return;
1889 int nintregs, nsseregs;
07933f72
GS
1890 const int * intreg;
1891 int sse_regno;
53c17031
JH
1892{
1893 enum machine_mode tmpmode;
1894 int bytes =
1895 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1896 enum x86_64_reg_class class[MAX_CLASSES];
1897 int n;
1898 int i;
1899 int nexps = 0;
1900 int needed_sseregs, needed_intregs;
1901 rtx exp[MAX_CLASSES];
1902 rtx ret;
1903
1904 n = classify_argument (mode, type, class, 0);
1905 if (TARGET_DEBUG_ARG)
1906 {
1907 if (!n)
1908 fprintf (stderr, "Memory class\n");
1909 else
1910 {
1911 fprintf (stderr, "Classes:");
1912 for (i = 0; i < n; i++)
1913 {
1914 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
1915 }
1916 fprintf (stderr, "\n");
1917 }
1918 }
1919 if (!n)
1920 return NULL;
1921 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
1922 return NULL;
1923 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
1924 return NULL;
1925
1926 /* First construct simple cases. Avoid SCmode, since we want to use
1927 single register to pass this type. */
1928 if (n == 1 && mode != SCmode)
1929 switch (class[0])
1930 {
1931 case X86_64_INTEGER_CLASS:
1932 case X86_64_INTEGERSI_CLASS:
1933 return gen_rtx_REG (mode, intreg[0]);
1934 case X86_64_SSE_CLASS:
1935 case X86_64_SSESF_CLASS:
1936 case X86_64_SSEDF_CLASS:
1937 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1938 case X86_64_X87_CLASS:
1939 return gen_rtx_REG (mode, FIRST_STACK_REG);
1940 case X86_64_NO_CLASS:
1941 /* Zero sized array, struct or class. */
1942 return NULL;
1943 default:
1944 abort ();
1945 }
1946 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
e95d6b23 1947 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
53c17031
JH
1948 if (n == 2
1949 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
1950 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
1951 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
1952 && class[1] == X86_64_INTEGER_CLASS
1953 && (mode == CDImode || mode == TImode)
1954 && intreg[0] + 1 == intreg[1])
1955 return gen_rtx_REG (mode, intreg[0]);
1956 if (n == 4
1957 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
1958 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
1959 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
1960
1961 /* Otherwise figure out the entries of the PARALLEL. */
1962 for (i = 0; i < n; i++)
1963 {
1964 switch (class[i])
1965 {
1966 case X86_64_NO_CLASS:
1967 break;
1968 case X86_64_INTEGER_CLASS:
1969 case X86_64_INTEGERSI_CLASS:
1970 /* Merge TImodes on aligned occassions here too. */
1971 if (i * 8 + 8 > bytes)
1972 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
1973 else if (class[i] == X86_64_INTEGERSI_CLASS)
1974 tmpmode = SImode;
1975 else
1976 tmpmode = DImode;
1977 /* We've requested 24 bytes we don't have mode for. Use DImode. */
1978 if (tmpmode == BLKmode)
1979 tmpmode = DImode;
1980 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1981 gen_rtx_REG (tmpmode, *intreg),
1982 GEN_INT (i*8));
1983 intreg++;
1984 break;
1985 case X86_64_SSESF_CLASS:
1986 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1987 gen_rtx_REG (SFmode,
1988 SSE_REGNO (sse_regno)),
1989 GEN_INT (i*8));
1990 sse_regno++;
1991 break;
1992 case X86_64_SSEDF_CLASS:
1993 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1994 gen_rtx_REG (DFmode,
1995 SSE_REGNO (sse_regno)),
1996 GEN_INT (i*8));
1997 sse_regno++;
1998 break;
1999 case X86_64_SSE_CLASS:
2000 if (i < n && class[i + 1] == X86_64_SSEUP_CLASS)
2001 tmpmode = TImode, i++;
2002 else
2003 tmpmode = DImode;
2004 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2005 gen_rtx_REG (tmpmode,
2006 SSE_REGNO (sse_regno)),
2007 GEN_INT (i*8));
2008 sse_regno++;
2009 break;
2010 default:
2011 abort ();
2012 }
2013 }
2014 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2015 for (i = 0; i < nexps; i++)
2016 XVECEXP (ret, 0, i) = exp [i];
2017 return ret;
2018}
2019
b08de47e
MM
2020/* Update the data in CUM to advance over an argument
2021 of mode MODE and data type TYPE.
2022 (TYPE is null for libcalls where that information may not be available.) */
2023
2024void
2025function_arg_advance (cum, mode, type, named)
2026 CUMULATIVE_ARGS *cum; /* current arg information */
2027 enum machine_mode mode; /* current arg mode */
2028 tree type; /* type of the argument or 0 if lib support */
2029 int named; /* whether or not the argument was named */
2030{
5ac9118e
KG
2031 int bytes =
2032 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
2033 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2034
2035 if (TARGET_DEBUG_ARG)
2036 fprintf (stderr,
e9a25f70 2037 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
b08de47e 2038 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
53c17031 2039 if (TARGET_64BIT)
b08de47e 2040 {
53c17031
JH
2041 int int_nregs, sse_nregs;
2042 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2043 cum->words += words;
2044 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
82a127a9 2045 {
53c17031
JH
2046 cum->nregs -= int_nregs;
2047 cum->sse_nregs -= sse_nregs;
2048 cum->regno += int_nregs;
2049 cum->sse_regno += sse_nregs;
82a127a9 2050 }
53c17031
JH
2051 else
2052 cum->words += words;
b08de47e 2053 }
a4f31c00 2054 else
82a127a9 2055 {
53c17031
JH
2056 if (TARGET_SSE && mode == TImode)
2057 {
2058 cum->sse_words += words;
2059 cum->sse_nregs -= 1;
2060 cum->sse_regno += 1;
2061 if (cum->sse_nregs <= 0)
2062 {
2063 cum->sse_nregs = 0;
2064 cum->sse_regno = 0;
2065 }
2066 }
2067 else
82a127a9 2068 {
53c17031
JH
2069 cum->words += words;
2070 cum->nregs -= words;
2071 cum->regno += words;
2072
2073 if (cum->nregs <= 0)
2074 {
2075 cum->nregs = 0;
2076 cum->regno = 0;
2077 }
82a127a9
CM
2078 }
2079 }
b08de47e
MM
2080 return;
2081}
2082
2083/* Define where to put the arguments to a function.
2084 Value is zero to push the argument on the stack,
2085 or a hard register in which to store the argument.
2086
2087 MODE is the argument's machine mode.
2088 TYPE is the data type of the argument (as a tree).
2089 This is null for libcalls where that information may
2090 not be available.
2091 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2092 the preceding args and about the function being called.
2093 NAMED is nonzero if this argument is a named parameter
2094 (otherwise it is an extra parameter matching an ellipsis). */
2095
07933f72 2096rtx
b08de47e
MM
2097function_arg (cum, mode, type, named)
2098 CUMULATIVE_ARGS *cum; /* current arg information */
2099 enum machine_mode mode; /* current arg mode */
2100 tree type; /* type of the argument or 0 if lib support */
2101 int named; /* != 0 for normal args, == 0 for ... args */
2102{
2103 rtx ret = NULL_RTX;
5ac9118e
KG
2104 int bytes =
2105 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
2106 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2107
53c17031
JH
2108 /* Handle an hidden AL argument containing number of registers for varargs
2109 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2110 any AL settings. */
32ee7d1d 2111 if (mode == VOIDmode)
b08de47e 2112 {
53c17031
JH
2113 if (TARGET_64BIT)
2114 return GEN_INT (cum->maybe_vaarg
2115 ? (cum->sse_nregs < 0
2116 ? SSE_REGPARM_MAX
2117 : cum->sse_regno)
2118 : -1);
2119 else
2120 return constm1_rtx;
b08de47e 2121 }
53c17031
JH
2122 if (TARGET_64BIT)
2123 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2124 &x86_64_int_parameter_registers [cum->regno],
2125 cum->sse_regno);
2126 else
2127 switch (mode)
2128 {
2129 /* For now, pass fp/complex values on the stack. */
2130 default:
2131 break;
2132
2133 case BLKmode:
2134 case DImode:
2135 case SImode:
2136 case HImode:
2137 case QImode:
2138 if (words <= cum->nregs)
2139 ret = gen_rtx_REG (mode, cum->regno);
2140 break;
2141 case TImode:
2142 if (cum->sse_nregs)
2143 ret = gen_rtx_REG (mode, cum->sse_regno);
2144 break;
2145 }
b08de47e
MM
2146
2147 if (TARGET_DEBUG_ARG)
2148 {
2149 fprintf (stderr,
e9a25f70 2150 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
b08de47e
MM
2151 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2152
2153 if (ret)
b531087a 2154 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO (ret) ]);
b08de47e
MM
2155 else
2156 fprintf (stderr, ", stack");
2157
2158 fprintf (stderr, " )\n");
2159 }
2160
2161 return ret;
2162}
53c17031
JH
2163
2164/* Gives the alignment boundary, in bits, of an argument with the specified mode
2165 and type. */
2166
2167int
2168ix86_function_arg_boundary (mode, type)
2169 enum machine_mode mode;
2170 tree type;
2171{
2172 int align;
2173 if (!TARGET_64BIT)
2174 return PARM_BOUNDARY;
2175 if (type)
2176 align = TYPE_ALIGN (type);
2177 else
2178 align = GET_MODE_ALIGNMENT (mode);
2179 if (align < PARM_BOUNDARY)
2180 align = PARM_BOUNDARY;
2181 if (align > 128)
2182 align = 128;
2183 return align;
2184}
2185
2186/* Return true if N is a possible register number of function value. */
2187bool
2188ix86_function_value_regno_p (regno)
2189 int regno;
2190{
2191 if (!TARGET_64BIT)
2192 {
2193 return ((regno) == 0
2194 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2195 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2196 }
2197 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2198 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2199 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2200}
2201
2202/* Define how to find the value returned by a function.
2203 VALTYPE is the data type of the value (as a tree).
2204 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2205 otherwise, FUNC is 0. */
2206rtx
2207ix86_function_value (valtype)
2208 tree valtype;
2209{
2210 if (TARGET_64BIT)
2211 {
2212 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2213 REGPARM_MAX, SSE_REGPARM_MAX,
2214 x86_64_int_return_registers, 0);
2215 /* For zero sized structures, construct_continer return NULL, but we need
2216 to keep rest of compiler happy by returning meaningfull value. */
2217 if (!ret)
2218 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2219 return ret;
2220 }
2221 else
2222 return gen_rtx_REG (TYPE_MODE (valtype), VALUE_REGNO (TYPE_MODE (valtype)));
2223}
2224
f5143c46 2225/* Return false iff type is returned in memory. */
53c17031
JH
2226int
2227ix86_return_in_memory (type)
2228 tree type;
2229{
2230 int needed_intregs, needed_sseregs;
2231 if (TARGET_64BIT)
2232 {
2233 return !examine_argument (TYPE_MODE (type), type, 1,
2234 &needed_intregs, &needed_sseregs);
2235 }
2236 else
2237 {
2238 if (TYPE_MODE (type) == BLKmode
2239 || (VECTOR_MODE_P (TYPE_MODE (type))
2240 && int_size_in_bytes (type) == 8)
2241 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2242 && TYPE_MODE (type) != TFmode
2243 && !VECTOR_MODE_P (TYPE_MODE (type))))
2244 return 1;
2245 return 0;
2246 }
2247}
2248
2249/* Define how to find the value returned by a library function
2250 assuming the value has mode MODE. */
2251rtx
2252ix86_libcall_value (mode)
2253 enum machine_mode mode;
2254{
2255 if (TARGET_64BIT)
2256 {
2257 switch (mode)
2258 {
2259 case SFmode:
2260 case SCmode:
2261 case DFmode:
2262 case DCmode:
2263 return gen_rtx_REG (mode, FIRST_SSE_REG);
2264 case TFmode:
2265 case TCmode:
2266 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2267 default:
2268 return gen_rtx_REG (mode, 0);
2269 }
2270 }
2271 else
2272 return gen_rtx_REG (mode, VALUE_REGNO (mode));
2273}
ad919812
JH
2274\f
2275/* Create the va_list data type. */
53c17031 2276
ad919812
JH
2277tree
2278ix86_build_va_list ()
2279{
2280 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
53c17031 2281
ad919812
JH
2282 /* For i386 we use plain pointer to argument area. */
2283 if (!TARGET_64BIT)
2284 return build_pointer_type (char_type_node);
2285
2286 record = make_lang_type (RECORD_TYPE);
2287 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2288
2289 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2290 unsigned_type_node);
2291 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2292 unsigned_type_node);
2293 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2294 ptr_type_node);
2295 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2296 ptr_type_node);
2297
2298 DECL_FIELD_CONTEXT (f_gpr) = record;
2299 DECL_FIELD_CONTEXT (f_fpr) = record;
2300 DECL_FIELD_CONTEXT (f_ovf) = record;
2301 DECL_FIELD_CONTEXT (f_sav) = record;
2302
2303 TREE_CHAIN (record) = type_decl;
2304 TYPE_NAME (record) = type_decl;
2305 TYPE_FIELDS (record) = f_gpr;
2306 TREE_CHAIN (f_gpr) = f_fpr;
2307 TREE_CHAIN (f_fpr) = f_ovf;
2308 TREE_CHAIN (f_ovf) = f_sav;
2309
2310 layout_type (record);
2311
2312 /* The correct type is an array type of one element. */
2313 return build_array_type (record, build_index_type (size_zero_node));
2314}
2315
2316/* Perform any needed actions needed for a function that is receiving a
2317 variable number of arguments.
2318
2319 CUM is as above.
2320
2321 MODE and TYPE are the mode and type of the current parameter.
2322
2323 PRETEND_SIZE is a variable that should be set to the amount of stack
2324 that must be pushed by the prolog to pretend that our caller pushed
2325 it.
2326
2327 Normally, this macro will push all remaining incoming registers on the
2328 stack and set PRETEND_SIZE to the length of the registers pushed. */
2329
2330void
2331ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2332 CUMULATIVE_ARGS *cum;
2333 enum machine_mode mode;
2334 tree type;
2335 int *pretend_size ATTRIBUTE_UNUSED;
2336 int no_rtl;
2337
2338{
2339 CUMULATIVE_ARGS next_cum;
2340 rtx save_area = NULL_RTX, mem;
2341 rtx label;
2342 rtx label_ref;
2343 rtx tmp_reg;
2344 rtx nsse_reg;
2345 int set;
2346 tree fntype;
2347 int stdarg_p;
2348 int i;
2349
2350 if (!TARGET_64BIT)
2351 return;
2352
2353 /* Indicate to allocate space on the stack for varargs save area. */
2354 ix86_save_varrargs_registers = 1;
2355
2356 fntype = TREE_TYPE (current_function_decl);
2357 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2358 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2359 != void_type_node));
2360
2361 /* For varargs, we do not want to skip the dummy va_dcl argument.
2362 For stdargs, we do want to skip the last named argument. */
2363 next_cum = *cum;
2364 if (stdarg_p)
2365 function_arg_advance (&next_cum, mode, type, 1);
2366
2367 if (!no_rtl)
2368 save_area = frame_pointer_rtx;
2369
2370 set = get_varargs_alias_set ();
2371
2372 for (i = next_cum.regno; i < ix86_regparm; i++)
2373 {
2374 mem = gen_rtx_MEM (Pmode,
2375 plus_constant (save_area, i * UNITS_PER_WORD));
0692acba 2376 set_mem_alias_set (mem, set);
ad919812
JH
2377 emit_move_insn (mem, gen_rtx_REG (Pmode,
2378 x86_64_int_parameter_registers[i]));
2379 }
2380
2381 if (next_cum.sse_nregs)
2382 {
2383 /* Now emit code to save SSE registers. The AX parameter contains number
2384 of SSE parameter regsiters used to call this function. We use
2385 sse_prologue_save insn template that produces computed jump across
2386 SSE saves. We need some preparation work to get this working. */
2387
2388 label = gen_label_rtx ();
2389 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2390
2391 /* Compute address to jump to :
2392 label - 5*eax + nnamed_sse_arguments*5 */
2393 tmp_reg = gen_reg_rtx (Pmode);
2394 nsse_reg = gen_reg_rtx (Pmode);
2395 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2396 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
44cf5b6a 2397 gen_rtx_MULT (Pmode, nsse_reg,
ad919812
JH
2398 GEN_INT (4))));
2399 if (next_cum.sse_regno)
2400 emit_move_insn
2401 (nsse_reg,
2402 gen_rtx_CONST (DImode,
2403 gen_rtx_PLUS (DImode,
2404 label_ref,
2405 GEN_INT (next_cum.sse_regno * 4))));
2406 else
2407 emit_move_insn (nsse_reg, label_ref);
2408 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2409
2410 /* Compute address of memory block we save into. We always use pointer
2411 pointing 127 bytes after first byte to store - this is needed to keep
2412 instruction size limited by 4 bytes. */
2413 tmp_reg = gen_reg_rtx (Pmode);
8ac61af7
RK
2414 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2415 plus_constant (save_area,
2416 8 * REGPARM_MAX + 127)));
ad919812 2417 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
14f73b5a 2418 set_mem_alias_set (mem, set);
8ac61af7 2419 set_mem_align (mem, BITS_PER_WORD);
ad919812
JH
2420
2421 /* And finally do the dirty job! */
8ac61af7
RK
2422 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2423 GEN_INT (next_cum.sse_regno), label));
ad919812
JH
2424 }
2425
2426}
2427
2428/* Implement va_start. */
2429
2430void
2431ix86_va_start (stdarg_p, valist, nextarg)
2432 int stdarg_p;
2433 tree valist;
2434 rtx nextarg;
2435{
2436 HOST_WIDE_INT words, n_gpr, n_fpr;
2437 tree f_gpr, f_fpr, f_ovf, f_sav;
2438 tree gpr, fpr, ovf, sav, t;
2439
2440 /* Only 64bit target needs something special. */
2441 if (!TARGET_64BIT)
2442 {
2443 std_expand_builtin_va_start (stdarg_p, valist, nextarg);
2444 return;
2445 }
2446
2447 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2448 f_fpr = TREE_CHAIN (f_gpr);
2449 f_ovf = TREE_CHAIN (f_fpr);
2450 f_sav = TREE_CHAIN (f_ovf);
2451
2452 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2453 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2454 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2455 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2456 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2457
2458 /* Count number of gp and fp argument registers used. */
2459 words = current_function_args_info.words;
2460 n_gpr = current_function_args_info.regno;
2461 n_fpr = current_function_args_info.sse_regno;
2462
2463 if (TARGET_DEBUG_ARG)
2464 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
b531087a 2465 (int) words, (int) n_gpr, (int) n_fpr);
ad919812
JH
2466
2467 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2468 build_int_2 (n_gpr * 8, 0));
2469 TREE_SIDE_EFFECTS (t) = 1;
2470 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2471
2472 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2473 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2474 TREE_SIDE_EFFECTS (t) = 1;
2475 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2476
2477 /* Find the overflow area. */
2478 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2479 if (words != 0)
2480 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2481 build_int_2 (words * UNITS_PER_WORD, 0));
2482 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2483 TREE_SIDE_EFFECTS (t) = 1;
2484 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2485
2486 /* Find the register save area.
2487 Prologue of the function save it right above stack frame. */
2488 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2489 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2490 TREE_SIDE_EFFECTS (t) = 1;
2491 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2492}
2493
2494/* Implement va_arg. */
2495rtx
2496ix86_va_arg (valist, type)
2497 tree valist, type;
2498{
2499 static int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2500 tree f_gpr, f_fpr, f_ovf, f_sav;
2501 tree gpr, fpr, ovf, sav, t;
b932f770 2502 int size, rsize;
ad919812
JH
2503 rtx lab_false, lab_over = NULL_RTX;
2504 rtx addr_rtx, r;
2505 rtx container;
2506
2507 /* Only 64bit target needs something special. */
2508 if (!TARGET_64BIT)
2509 {
2510 return std_expand_builtin_va_arg (valist, type);
2511 }
2512
2513 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2514 f_fpr = TREE_CHAIN (f_gpr);
2515 f_ovf = TREE_CHAIN (f_fpr);
2516 f_sav = TREE_CHAIN (f_ovf);
2517
2518 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2519 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2520 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2521 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2522 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2523
2524 size = int_size_in_bytes (type);
2525 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2526
2527 container = construct_container (TYPE_MODE (type), type, 0,
2528 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2529 /*
2530 * Pull the value out of the saved registers ...
2531 */
2532
2533 addr_rtx = gen_reg_rtx (Pmode);
2534
2535 if (container)
2536 {
2537 rtx int_addr_rtx, sse_addr_rtx;
2538 int needed_intregs, needed_sseregs;
2539 int need_temp;
2540
2541 lab_over = gen_label_rtx ();
2542 lab_false = gen_label_rtx ();
8bad7136 2543
ad919812
JH
2544 examine_argument (TYPE_MODE (type), type, 0,
2545 &needed_intregs, &needed_sseregs);
2546
2547
2548 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2549 || TYPE_ALIGN (type) > 128);
2550
2551 /* In case we are passing structure, verify that it is consetuctive block
2552 on the register save area. If not we need to do moves. */
2553 if (!need_temp && !REG_P (container))
2554 {
2555 /* Verify that all registers are strictly consetuctive */
2556 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2557 {
2558 int i;
2559
2560 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2561 {
2562 rtx slot = XVECEXP (container, 0, i);
b531087a 2563 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
ad919812
JH
2564 || INTVAL (XEXP (slot, 1)) != i * 16)
2565 need_temp = 1;
2566 }
2567 }
2568 else
2569 {
2570 int i;
2571
2572 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2573 {
2574 rtx slot = XVECEXP (container, 0, i);
b531087a 2575 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
ad919812
JH
2576 || INTVAL (XEXP (slot, 1)) != i * 8)
2577 need_temp = 1;
2578 }
2579 }
2580 }
2581 if (!need_temp)
2582 {
2583 int_addr_rtx = addr_rtx;
2584 sse_addr_rtx = addr_rtx;
2585 }
2586 else
2587 {
2588 int_addr_rtx = gen_reg_rtx (Pmode);
2589 sse_addr_rtx = gen_reg_rtx (Pmode);
2590 }
2591 /* First ensure that we fit completely in registers. */
2592 if (needed_intregs)
2593 {
2594 emit_cmp_and_jump_insns (expand_expr
2595 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2596 GEN_INT ((REGPARM_MAX - needed_intregs +
2597 1) * 8), GE, const1_rtx, SImode,
d43e0b7d 2598 1, lab_false);
ad919812
JH
2599 }
2600 if (needed_sseregs)
2601 {
2602 emit_cmp_and_jump_insns (expand_expr
2603 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2604 GEN_INT ((SSE_REGPARM_MAX -
2605 needed_sseregs + 1) * 16 +
2606 REGPARM_MAX * 8), GE, const1_rtx,
d43e0b7d 2607 SImode, 1, lab_false);
ad919812
JH
2608 }
2609
2610 /* Compute index to start of area used for integer regs. */
2611 if (needed_intregs)
2612 {
2613 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2614 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2615 if (r != int_addr_rtx)
2616 emit_move_insn (int_addr_rtx, r);
2617 }
2618 if (needed_sseregs)
2619 {
2620 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2621 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2622 if (r != sse_addr_rtx)
2623 emit_move_insn (sse_addr_rtx, r);
2624 }
2625 if (need_temp)
2626 {
2627 int i;
2628 rtx mem;
2629
b932f770
JH
2630 /* Never use the memory itself, as it has the alias set. */
2631 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2632 mem = gen_rtx_MEM (BLKmode, addr_rtx);
0692acba 2633 set_mem_alias_set (mem, get_varargs_alias_set ());
8ac61af7 2634 set_mem_align (mem, BITS_PER_UNIT);
b932f770 2635
ad919812
JH
2636 for (i = 0; i < XVECLEN (container, 0); i++)
2637 {
2638 rtx slot = XVECEXP (container, 0, i);
2639 rtx reg = XEXP (slot, 0);
2640 enum machine_mode mode = GET_MODE (reg);
2641 rtx src_addr;
2642 rtx src_mem;
2643 int src_offset;
2644 rtx dest_mem;
2645
2646 if (SSE_REGNO_P (REGNO (reg)))
2647 {
2648 src_addr = sse_addr_rtx;
2649 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2650 }
2651 else
2652 {
2653 src_addr = int_addr_rtx;
2654 src_offset = REGNO (reg) * 8;
2655 }
2656 src_mem = gen_rtx_MEM (mode, src_addr);
0692acba 2657 set_mem_alias_set (src_mem, get_varargs_alias_set ());
ad919812
JH
2658 src_mem = adjust_address (src_mem, mode, src_offset);
2659 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
ad919812
JH
2660 emit_move_insn (dest_mem, src_mem);
2661 }
2662 }
2663
2664 if (needed_intregs)
2665 {
2666 t =
2667 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2668 build_int_2 (needed_intregs * 8, 0));
2669 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2670 TREE_SIDE_EFFECTS (t) = 1;
2671 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2672 }
2673 if (needed_sseregs)
2674 {
2675 t =
2676 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2677 build_int_2 (needed_sseregs * 16, 0));
2678 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2679 TREE_SIDE_EFFECTS (t) = 1;
2680 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2681 }
2682
2683 emit_jump_insn (gen_jump (lab_over));
2684 emit_barrier ();
2685 emit_label (lab_false);
2686 }
2687
2688 /* ... otherwise out of the overflow area. */
2689
2690 /* Care for on-stack alignment if needed. */
2691 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2692 t = ovf;
2693 else
2694 {
2695 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2696 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2697 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2698 }
2699 t = save_expr (t);
2700
2701 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2702 if (r != addr_rtx)
2703 emit_move_insn (addr_rtx, r);
2704
2705 t =
2706 build (PLUS_EXPR, TREE_TYPE (t), t,
2707 build_int_2 (rsize * UNITS_PER_WORD, 0));
2708 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2709 TREE_SIDE_EFFECTS (t) = 1;
2710 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2711
2712 if (container)
2713 emit_label (lab_over);
2714
ad919812
JH
2715 return addr_rtx;
2716}
2717\f
7dd4b4a3
JH
2718/* Return nonzero if OP is general operand representable on x86_64. */
2719
2720int
2721x86_64_general_operand (op, mode)
2722 rtx op;
2723 enum machine_mode mode;
2724{
2725 if (!TARGET_64BIT)
2726 return general_operand (op, mode);
2727 if (nonimmediate_operand (op, mode))
2728 return 1;
2729 return x86_64_sign_extended_value (op);
2730}
2731
2732/* Return nonzero if OP is general operand representable on x86_64
d6a7951f 2733 as either sign extended or zero extended constant. */
7dd4b4a3
JH
2734
2735int
2736x86_64_szext_general_operand (op, mode)
2737 rtx op;
2738 enum machine_mode mode;
2739{
2740 if (!TARGET_64BIT)
2741 return general_operand (op, mode);
2742 if (nonimmediate_operand (op, mode))
2743 return 1;
2744 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2745}
2746
2747/* Return nonzero if OP is nonmemory operand representable on x86_64. */
2748
2749int
2750x86_64_nonmemory_operand (op, mode)
2751 rtx op;
2752 enum machine_mode mode;
2753{
2754 if (!TARGET_64BIT)
2755 return nonmemory_operand (op, mode);
2756 if (register_operand (op, mode))
2757 return 1;
2758 return x86_64_sign_extended_value (op);
2759}
2760
2761/* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2762
2763int
2764x86_64_movabs_operand (op, mode)
2765 rtx op;
2766 enum machine_mode mode;
2767{
2768 if (!TARGET_64BIT || !flag_pic)
2769 return nonmemory_operand (op, mode);
2770 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
2771 return 1;
2772 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2773 return 1;
2774 return 0;
2775}
2776
2777/* Return nonzero if OP is nonmemory operand representable on x86_64. */
2778
2779int
2780x86_64_szext_nonmemory_operand (op, mode)
2781 rtx op;
2782 enum machine_mode mode;
2783{
2784 if (!TARGET_64BIT)
2785 return nonmemory_operand (op, mode);
2786 if (register_operand (op, mode))
2787 return 1;
2788 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2789}
2790
2791/* Return nonzero if OP is immediate operand representable on x86_64. */
2792
2793int
2794x86_64_immediate_operand (op, mode)
2795 rtx op;
2796 enum machine_mode mode;
2797{
2798 if (!TARGET_64BIT)
2799 return immediate_operand (op, mode);
2800 return x86_64_sign_extended_value (op);
2801}
2802
2803/* Return nonzero if OP is immediate operand representable on x86_64. */
2804
2805int
2806x86_64_zext_immediate_operand (op, mode)
2807 rtx op;
2808 enum machine_mode mode ATTRIBUTE_UNUSED;
2809{
2810 return x86_64_zero_extended_value (op);
2811}
2812
8bad7136
JL
2813/* Return nonzero if OP is (const_int 1), else return zero. */
2814
2815int
2816const_int_1_operand (op, mode)
2817 rtx op;
2818 enum machine_mode mode ATTRIBUTE_UNUSED;
2819{
2820 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2821}
2822
e075ae69
RH
2823/* Returns 1 if OP is either a symbol reference or a sum of a symbol
2824 reference and a constant. */
b08de47e
MM
2825
2826int
e075ae69
RH
2827symbolic_operand (op, mode)
2828 register rtx op;
2829 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 2830{
e075ae69 2831 switch (GET_CODE (op))
2a2ab3f9 2832 {
e075ae69
RH
2833 case SYMBOL_REF:
2834 case LABEL_REF:
2835 return 1;
2836
2837 case CONST:
2838 op = XEXP (op, 0);
2839 if (GET_CODE (op) == SYMBOL_REF
2840 || GET_CODE (op) == LABEL_REF
2841 || (GET_CODE (op) == UNSPEC
6eb791fc
JH
2842 && (XINT (op, 1) == 6
2843 || XINT (op, 1) == 7
2844 || XINT (op, 1) == 15)))
e075ae69
RH
2845 return 1;
2846 if (GET_CODE (op) != PLUS
2847 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2848 return 0;
2849
2850 op = XEXP (op, 0);
2851 if (GET_CODE (op) == SYMBOL_REF
2852 || GET_CODE (op) == LABEL_REF)
2853 return 1;
2854 /* Only @GOTOFF gets offsets. */
2855 if (GET_CODE (op) != UNSPEC
2856 || XINT (op, 1) != 7)
2857 return 0;
2858
2859 op = XVECEXP (op, 0, 0);
2860 if (GET_CODE (op) == SYMBOL_REF
2861 || GET_CODE (op) == LABEL_REF)
2862 return 1;
2863 return 0;
2864
2865 default:
2866 return 0;
2a2ab3f9
JVA
2867 }
2868}
2a2ab3f9 2869
e075ae69 2870/* Return true if the operand contains a @GOT or @GOTOFF reference. */
3b3c6a3f 2871
e075ae69
RH
2872int
2873pic_symbolic_operand (op, mode)
2874 register rtx op;
2875 enum machine_mode mode ATTRIBUTE_UNUSED;
2876{
6eb791fc
JH
2877 if (GET_CODE (op) != CONST)
2878 return 0;
2879 op = XEXP (op, 0);
2880 if (TARGET_64BIT)
2881 {
2882 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
2883 return 1;
2884 }
2885 else
2a2ab3f9 2886 {
e075ae69
RH
2887 if (GET_CODE (op) == UNSPEC)
2888 return 1;
2889 if (GET_CODE (op) != PLUS
2890 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2891 return 0;
2892 op = XEXP (op, 0);
2893 if (GET_CODE (op) == UNSPEC)
2894 return 1;
2a2ab3f9 2895 }
e075ae69 2896 return 0;
2a2ab3f9 2897}
2a2ab3f9 2898
623fe810
RH
2899/* Return true if OP is a symbolic operand that resolves locally. */
2900
2901static int
2902local_symbolic_operand (op, mode)
2903 rtx op;
2904 enum machine_mode mode ATTRIBUTE_UNUSED;
2905{
2906 if (GET_CODE (op) == LABEL_REF)
2907 return 1;
2908
2909 if (GET_CODE (op) == CONST
2910 && GET_CODE (XEXP (op, 0)) == PLUS
2911 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2912 op = XEXP (XEXP (op, 0), 0);
2913
2914 if (GET_CODE (op) != SYMBOL_REF)
2915 return 0;
2916
2917 /* These we've been told are local by varasm and encode_section_info
2918 respectively. */
2919 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
2920 return 1;
2921
2922 /* There is, however, a not insubstantial body of code in the rest of
2923 the compiler that assumes it can just stick the results of
2924 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
2925 /* ??? This is a hack. Should update the body of the compiler to
2926 always create a DECL an invoke ENCODE_SECTION_INFO. */
2927 if (strncmp (XSTR (op, 0), internal_label_prefix,
2928 internal_label_prefix_len) == 0)
2929 return 1;
2930
2931 return 0;
2932}
2933
28d52ffb
RH
2934/* Test for a valid operand for a call instruction. Don't allow the
2935 arg pointer register or virtual regs since they may decay into
2936 reg + const, which the patterns can't handle. */
2a2ab3f9 2937
e075ae69
RH
2938int
2939call_insn_operand (op, mode)
2940 rtx op;
2941 enum machine_mode mode ATTRIBUTE_UNUSED;
2942{
e075ae69
RH
2943 /* Disallow indirect through a virtual register. This leads to
2944 compiler aborts when trying to eliminate them. */
2945 if (GET_CODE (op) == REG
2946 && (op == arg_pointer_rtx
564d80f4 2947 || op == frame_pointer_rtx
e075ae69
RH
2948 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
2949 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
2950 return 0;
2a2ab3f9 2951
28d52ffb
RH
2952 /* Disallow `call 1234'. Due to varying assembler lameness this
2953 gets either rejected or translated to `call .+1234'. */
2954 if (GET_CODE (op) == CONST_INT)
2955 return 0;
2956
cbbf65e0
RH
2957 /* Explicitly allow SYMBOL_REF even if pic. */
2958 if (GET_CODE (op) == SYMBOL_REF)
e075ae69 2959 return 1;
2a2ab3f9 2960
cbbf65e0
RH
2961 /* Half-pic doesn't allow anything but registers and constants.
2962 We've just taken care of the later. */
2963 if (HALF_PIC_P ())
2964 return register_operand (op, Pmode);
2965
2966 /* Otherwise we can allow any general_operand in the address. */
2967 return general_operand (op, Pmode);
e075ae69 2968}
79325812 2969
e075ae69
RH
2970int
2971constant_call_address_operand (op, mode)
2972 rtx op;
2973 enum machine_mode mode ATTRIBUTE_UNUSED;
2974{
eaf19aba
JJ
2975 if (GET_CODE (op) == CONST
2976 && GET_CODE (XEXP (op, 0)) == PLUS
2977 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2978 op = XEXP (XEXP (op, 0), 0);
e1ff012c 2979 return GET_CODE (op) == SYMBOL_REF;
e075ae69 2980}
2a2ab3f9 2981
e075ae69 2982/* Match exactly zero and one. */
e9a25f70 2983
0f290768 2984int
e075ae69
RH
2985const0_operand (op, mode)
2986 register rtx op;
2987 enum machine_mode mode;
2988{
2989 return op == CONST0_RTX (mode);
2990}
e9a25f70 2991
0f290768 2992int
e075ae69
RH
2993const1_operand (op, mode)
2994 register rtx op;
2995 enum machine_mode mode ATTRIBUTE_UNUSED;
2996{
2997 return op == const1_rtx;
2998}
2a2ab3f9 2999
e075ae69 3000/* Match 2, 4, or 8. Used for leal multiplicands. */
e9a25f70 3001
e075ae69
RH
3002int
3003const248_operand (op, mode)
3004 register rtx op;
3005 enum machine_mode mode ATTRIBUTE_UNUSED;
3006{
3007 return (GET_CODE (op) == CONST_INT
3008 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3009}
e9a25f70 3010
e075ae69 3011/* True if this is a constant appropriate for an increment or decremenmt. */
81fd0956 3012
e075ae69
RH
3013int
3014incdec_operand (op, mode)
3015 register rtx op;
0631e0bf 3016 enum machine_mode mode ATTRIBUTE_UNUSED;
e075ae69 3017{
f5143c46 3018 /* On Pentium4, the inc and dec operations causes extra dependency on flag
b4e89e2d
JH
3019 registers, since carry flag is not set. */
3020 if (TARGET_PENTIUM4 && !optimize_size)
3021 return 0;
2b1c08f5 3022 return op == const1_rtx || op == constm1_rtx;
e075ae69 3023}
2a2ab3f9 3024
371bc54b
JH
3025/* Return nonzero if OP is acceptable as operand of DImode shift
3026 expander. */
3027
3028int
3029shiftdi_operand (op, mode)
3030 rtx op;
3031 enum machine_mode mode ATTRIBUTE_UNUSED;
3032{
3033 if (TARGET_64BIT)
3034 return nonimmediate_operand (op, mode);
3035 else
3036 return register_operand (op, mode);
3037}
3038
0f290768 3039/* Return false if this is the stack pointer, or any other fake
e075ae69
RH
3040 register eliminable to the stack pointer. Otherwise, this is
3041 a register operand.
2a2ab3f9 3042
e075ae69
RH
3043 This is used to prevent esp from being used as an index reg.
3044 Which would only happen in pathological cases. */
5f1ec3e6 3045
e075ae69
RH
3046int
3047reg_no_sp_operand (op, mode)
3048 register rtx op;
3049 enum machine_mode mode;
3050{
3051 rtx t = op;
3052 if (GET_CODE (t) == SUBREG)
3053 t = SUBREG_REG (t);
564d80f4 3054 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
e075ae69 3055 return 0;
2a2ab3f9 3056
e075ae69 3057 return register_operand (op, mode);
2a2ab3f9 3058}
b840bfb0 3059
915119a5
BS
3060int
3061mmx_reg_operand (op, mode)
3062 register rtx op;
bd793c65 3063 enum machine_mode mode ATTRIBUTE_UNUSED;
915119a5
BS
3064{
3065 return MMX_REG_P (op);
3066}
3067
2c5a510c
RH
3068/* Return false if this is any eliminable register. Otherwise
3069 general_operand. */
3070
3071int
3072general_no_elim_operand (op, mode)
3073 register rtx op;
3074 enum machine_mode mode;
3075{
3076 rtx t = op;
3077 if (GET_CODE (t) == SUBREG)
3078 t = SUBREG_REG (t);
3079 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3080 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3081 || t == virtual_stack_dynamic_rtx)
3082 return 0;
1020a5ab
RH
3083 if (REG_P (t)
3084 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3085 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3086 return 0;
2c5a510c
RH
3087
3088 return general_operand (op, mode);
3089}
3090
3091/* Return false if this is any eliminable register. Otherwise
3092 register_operand or const_int. */
3093
3094int
3095nonmemory_no_elim_operand (op, mode)
3096 register rtx op;
3097 enum machine_mode mode;
3098{
3099 rtx t = op;
3100 if (GET_CODE (t) == SUBREG)
3101 t = SUBREG_REG (t);
3102 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3103 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3104 || t == virtual_stack_dynamic_rtx)
3105 return 0;
3106
3107 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3108}
3109
e075ae69 3110/* Return true if op is a Q_REGS class register. */
b840bfb0 3111
e075ae69
RH
3112int
3113q_regs_operand (op, mode)
3114 register rtx op;
3115 enum machine_mode mode;
b840bfb0 3116{
e075ae69
RH
3117 if (mode != VOIDmode && GET_MODE (op) != mode)
3118 return 0;
3119 if (GET_CODE (op) == SUBREG)
3120 op = SUBREG_REG (op);
3121 return QI_REG_P (op);
0f290768 3122}
b840bfb0 3123
e075ae69 3124/* Return true if op is a NON_Q_REGS class register. */
b840bfb0 3125
e075ae69
RH
3126int
3127non_q_regs_operand (op, mode)
3128 register rtx op;
3129 enum machine_mode mode;
3130{
3131 if (mode != VOIDmode && GET_MODE (op) != mode)
3132 return 0;
3133 if (GET_CODE (op) == SUBREG)
3134 op = SUBREG_REG (op);
3135 return NON_QI_REG_P (op);
0f290768 3136}
b840bfb0 3137
915119a5
BS
3138/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3139 insns. */
3140int
3141sse_comparison_operator (op, mode)
3142 rtx op;
3143 enum machine_mode mode ATTRIBUTE_UNUSED;
3144{
3145 enum rtx_code code = GET_CODE (op);
a46d1d38
JH
3146 switch (code)
3147 {
3148 /* Operations supported directly. */
3149 case EQ:
3150 case LT:
3151 case LE:
3152 case UNORDERED:
3153 case NE:
3154 case UNGE:
3155 case UNGT:
3156 case ORDERED:
3157 return 1;
3158 /* These are equivalent to ones above in non-IEEE comparisons. */
3159 case UNEQ:
3160 case UNLT:
3161 case UNLE:
3162 case LTGT:
3163 case GE:
3164 case GT:
3165 return !TARGET_IEEE_FP;
3166 default:
3167 return 0;
3168 }
915119a5 3169}
9076b9c1 3170/* Return 1 if OP is a valid comparison operator in valid mode. */
e075ae69 3171int
9076b9c1
JH
3172ix86_comparison_operator (op, mode)
3173 register rtx op;
3174 enum machine_mode mode;
e075ae69 3175{
9076b9c1 3176 enum machine_mode inmode;
9a915772 3177 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
3178 if (mode != VOIDmode && GET_MODE (op) != mode)
3179 return 0;
9a915772
JH
3180 if (GET_RTX_CLASS (code) != '<')
3181 return 0;
3182 inmode = GET_MODE (XEXP (op, 0));
3183
3184 if (inmode == CCFPmode || inmode == CCFPUmode)
3185 {
3186 enum rtx_code second_code, bypass_code;
3187 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3188 return (bypass_code == NIL && second_code == NIL);
3189 }
3190 switch (code)
3a3677ff
RH
3191 {
3192 case EQ: case NE:
3a3677ff 3193 return 1;
9076b9c1 3194 case LT: case GE:
7e08e190 3195 if (inmode == CCmode || inmode == CCGCmode
9076b9c1
JH
3196 || inmode == CCGOCmode || inmode == CCNOmode)
3197 return 1;
3198 return 0;
7e08e190 3199 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
7e08e190 3200 if (inmode == CCmode)
9076b9c1
JH
3201 return 1;
3202 return 0;
3203 case GT: case LE:
7e08e190 3204 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
9076b9c1
JH
3205 return 1;
3206 return 0;
3a3677ff
RH
3207 default:
3208 return 0;
3209 }
3210}
3211
9076b9c1 3212/* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3a3677ff 3213
9076b9c1
JH
3214int
3215fcmov_comparison_operator (op, mode)
3a3677ff
RH
3216 register rtx op;
3217 enum machine_mode mode;
3218{
b62d22a2 3219 enum machine_mode inmode;
9a915772 3220 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
3221 if (mode != VOIDmode && GET_MODE (op) != mode)
3222 return 0;
9a915772
JH
3223 if (GET_RTX_CLASS (code) != '<')
3224 return 0;
3225 inmode = GET_MODE (XEXP (op, 0));
3226 if (inmode == CCFPmode || inmode == CCFPUmode)
3a3677ff 3227 {
9a915772
JH
3228 enum rtx_code second_code, bypass_code;
3229 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3230 if (bypass_code != NIL || second_code != NIL)
3231 return 0;
3232 code = ix86_fp_compare_code_to_integer (code);
3233 }
3234 /* i387 supports just limited amount of conditional codes. */
3235 switch (code)
3236 {
3237 case LTU: case GTU: case LEU: case GEU:
3238 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
9076b9c1
JH
3239 return 1;
3240 return 0;
9a915772
JH
3241 case ORDERED: case UNORDERED:
3242 case EQ: case NE:
3243 return 1;
3a3677ff
RH
3244 default:
3245 return 0;
3246 }
e075ae69 3247}
b840bfb0 3248
e9e80858
JH
3249/* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3250
3251int
3252promotable_binary_operator (op, mode)
3253 register rtx op;
3254 enum machine_mode mode ATTRIBUTE_UNUSED;
3255{
3256 switch (GET_CODE (op))
3257 {
3258 case MULT:
3259 /* Modern CPUs have same latency for HImode and SImode multiply,
3260 but 386 and 486 do HImode multiply faster. */
3261 return ix86_cpu > PROCESSOR_I486;
3262 case PLUS:
3263 case AND:
3264 case IOR:
3265 case XOR:
3266 case ASHIFT:
3267 return 1;
3268 default:
3269 return 0;
3270 }
3271}
3272
e075ae69
RH
3273/* Nearly general operand, but accept any const_double, since we wish
3274 to be able to drop them into memory rather than have them get pulled
3275 into registers. */
b840bfb0 3276
2a2ab3f9 3277int
e075ae69
RH
3278cmp_fp_expander_operand (op, mode)
3279 register rtx op;
3280 enum machine_mode mode;
2a2ab3f9 3281{
e075ae69 3282 if (mode != VOIDmode && mode != GET_MODE (op))
0b6b2900 3283 return 0;
e075ae69 3284 if (GET_CODE (op) == CONST_DOUBLE)
2a2ab3f9 3285 return 1;
e075ae69 3286 return general_operand (op, mode);
2a2ab3f9
JVA
3287}
3288
e075ae69 3289/* Match an SI or HImode register for a zero_extract. */
2a2ab3f9
JVA
3290
3291int
e075ae69 3292ext_register_operand (op, mode)
2a2ab3f9 3293 register rtx op;
bb5177ac 3294 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 3295{
3522082b 3296 int regno;
0d7d98ee
JH
3297 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3298 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
e075ae69 3299 return 0;
3522082b
JH
3300
3301 if (!register_operand (op, VOIDmode))
3302 return 0;
3303
3304 /* Be curefull to accept only registers having upper parts. */
3305 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3306 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
e075ae69
RH
3307}
3308
3309/* Return 1 if this is a valid binary floating-point operation.
0f290768 3310 OP is the expression matched, and MODE is its mode. */
e075ae69
RH
3311
3312int
3313binary_fp_operator (op, mode)
3314 register rtx op;
3315 enum machine_mode mode;
3316{
3317 if (mode != VOIDmode && mode != GET_MODE (op))
3318 return 0;
3319
2a2ab3f9
JVA
3320 switch (GET_CODE (op))
3321 {
e075ae69
RH
3322 case PLUS:
3323 case MINUS:
3324 case MULT:
3325 case DIV:
3326 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
e9a25f70 3327
2a2ab3f9
JVA
3328 default:
3329 return 0;
3330 }
3331}
fee2770d 3332
e075ae69 3333int
b531087a 3334mult_operator (op, mode)
e075ae69
RH
3335 register rtx op;
3336 enum machine_mode mode ATTRIBUTE_UNUSED;
3337{
3338 return GET_CODE (op) == MULT;
3339}
3340
3341int
b531087a 3342div_operator (op, mode)
e075ae69
RH
3343 register rtx op;
3344 enum machine_mode mode ATTRIBUTE_UNUSED;
3345{
3346 return GET_CODE (op) == DIV;
3347}
0a726ef1
JL
3348
3349int
e075ae69
RH
3350arith_or_logical_operator (op, mode)
3351 rtx op;
3352 enum machine_mode mode;
0a726ef1 3353{
e075ae69
RH
3354 return ((mode == VOIDmode || GET_MODE (op) == mode)
3355 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3356 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
0a726ef1
JL
3357}
3358
e075ae69 3359/* Returns 1 if OP is memory operand with a displacement. */
fee2770d
RS
3360
3361int
e075ae69
RH
3362memory_displacement_operand (op, mode)
3363 register rtx op;
3364 enum machine_mode mode;
4f2c8ebb 3365{
e075ae69 3366 struct ix86_address parts;
e9a25f70 3367
e075ae69
RH
3368 if (! memory_operand (op, mode))
3369 return 0;
3370
3371 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3372 abort ();
3373
3374 return parts.disp != NULL_RTX;
4f2c8ebb
RS
3375}
3376
16189740 3377/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
e075ae69
RH
3378 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3379
3380 ??? It seems likely that this will only work because cmpsi is an
3381 expander, and no actual insns use this. */
4f2c8ebb
RS
3382
3383int
e075ae69
RH
3384cmpsi_operand (op, mode)
3385 rtx op;
3386 enum machine_mode mode;
fee2770d 3387{
b9b2c339 3388 if (nonimmediate_operand (op, mode))
e075ae69
RH
3389 return 1;
3390
3391 if (GET_CODE (op) == AND
3392 && GET_MODE (op) == SImode
3393 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3394 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3395 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3396 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3397 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3398 && GET_CODE (XEXP (op, 1)) == CONST_INT)
fee2770d 3399 return 1;
e9a25f70 3400
fee2770d
RS
3401 return 0;
3402}
d784886d 3403
e075ae69
RH
3404/* Returns 1 if OP is memory operand that can not be represented by the
3405 modRM array. */
d784886d
RK
3406
3407int
e075ae69 3408long_memory_operand (op, mode)
d784886d
RK
3409 register rtx op;
3410 enum machine_mode mode;
3411{
e075ae69 3412 if (! memory_operand (op, mode))
d784886d
RK
3413 return 0;
3414
e075ae69 3415 return memory_address_length (op) != 0;
d784886d 3416}
2247f6ed
JH
3417
3418/* Return nonzero if the rtx is known aligned. */
3419
3420int
3421aligned_operand (op, mode)
3422 rtx op;
3423 enum machine_mode mode;
3424{
3425 struct ix86_address parts;
3426
3427 if (!general_operand (op, mode))
3428 return 0;
3429
0f290768 3430 /* Registers and immediate operands are always "aligned". */
2247f6ed
JH
3431 if (GET_CODE (op) != MEM)
3432 return 1;
3433
0f290768 3434 /* Don't even try to do any aligned optimizations with volatiles. */
2247f6ed
JH
3435 if (MEM_VOLATILE_P (op))
3436 return 0;
3437
3438 op = XEXP (op, 0);
3439
3440 /* Pushes and pops are only valid on the stack pointer. */
3441 if (GET_CODE (op) == PRE_DEC
3442 || GET_CODE (op) == POST_INC)
3443 return 1;
3444
3445 /* Decode the address. */
3446 if (! ix86_decompose_address (op, &parts))
3447 abort ();
3448
3449 /* Look for some component that isn't known to be aligned. */
3450 if (parts.index)
3451 {
3452 if (parts.scale < 4
bdb429a5 3453 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
2247f6ed
JH
3454 return 0;
3455 }
3456 if (parts.base)
3457 {
bdb429a5 3458 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
2247f6ed
JH
3459 return 0;
3460 }
3461 if (parts.disp)
3462 {
3463 if (GET_CODE (parts.disp) != CONST_INT
3464 || (INTVAL (parts.disp) & 3) != 0)
3465 return 0;
3466 }
3467
3468 /* Didn't find one -- this must be an aligned address. */
3469 return 1;
3470}
e075ae69
RH
3471\f
3472/* Return true if the constant is something that can be loaded with
3473 a special instruction. Only handle 0.0 and 1.0; others are less
3474 worthwhile. */
57dbca5e
BS
3475
3476int
e075ae69
RH
3477standard_80387_constant_p (x)
3478 rtx x;
57dbca5e 3479{
2b04e52b 3480 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
e075ae69 3481 return -1;
2b04e52b
JH
3482 /* Note that on the 80387, other constants, such as pi, that we should support
3483 too. On some machines, these are much slower to load as standard constant,
3484 than to load from doubles in memory. */
3485 if (x == CONST0_RTX (GET_MODE (x)))
3486 return 1;
3487 if (x == CONST1_RTX (GET_MODE (x)))
3488 return 2;
e075ae69 3489 return 0;
57dbca5e
BS
3490}
3491
2b04e52b
JH
3492/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3493 */
3494int
3495standard_sse_constant_p (x)
3496 rtx x;
3497{
3498 if (GET_CODE (x) != CONST_DOUBLE)
3499 return -1;
3500 return (x == CONST0_RTX (GET_MODE (x)));
3501}
3502
2a2ab3f9
JVA
3503/* Returns 1 if OP contains a symbol reference */
3504
3505int
3506symbolic_reference_mentioned_p (op)
3507 rtx op;
3508{
6f7d635c 3509 register const char *fmt;
2a2ab3f9
JVA
3510 register int i;
3511
3512 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3513 return 1;
3514
3515 fmt = GET_RTX_FORMAT (GET_CODE (op));
3516 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3517 {
3518 if (fmt[i] == 'E')
3519 {
3520 register int j;
3521
3522 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3523 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3524 return 1;
3525 }
e9a25f70 3526
2a2ab3f9
JVA
3527 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3528 return 1;
3529 }
3530
3531 return 0;
3532}
e075ae69
RH
3533
3534/* Return 1 if it is appropriate to emit `ret' instructions in the
3535 body of a function. Do this only if the epilogue is simple, needing a
3536 couple of insns. Prior to reloading, we can't tell how many registers
3537 must be saved, so return 0 then. Return 0 if there is no frame
3538 marker to de-allocate.
3539
3540 If NON_SAVING_SETJMP is defined and true, then it is not possible
3541 for the epilogue to be simple, so return 0. This is a special case
3542 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3543 until final, but jump_optimize may need to know sooner if a
3544 `return' is OK. */
32b5b1aa
SC
3545
3546int
e075ae69 3547ix86_can_use_return_insn_p ()
32b5b1aa 3548{
4dd2ac2c 3549 struct ix86_frame frame;
9a7372d6 3550
e075ae69
RH
3551#ifdef NON_SAVING_SETJMP
3552 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3553 return 0;
3554#endif
9a7372d6
RH
3555
3556 if (! reload_completed || frame_pointer_needed)
3557 return 0;
32b5b1aa 3558
9a7372d6
RH
3559 /* Don't allow more than 32 pop, since that's all we can do
3560 with one instruction. */
3561 if (current_function_pops_args
3562 && current_function_args_size >= 32768)
e075ae69 3563 return 0;
32b5b1aa 3564
4dd2ac2c
JH
3565 ix86_compute_frame_layout (&frame);
3566 return frame.to_allocate == 0 && frame.nregs == 0;
e075ae69 3567}
6189a572
JH
3568\f
3569/* Return 1 if VALUE can be stored in the sign extended immediate field. */
3570int
3571x86_64_sign_extended_value (value)
3572 rtx value;
3573{
3574 switch (GET_CODE (value))
3575 {
3576 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3577 to be at least 32 and this all acceptable constants are
3578 represented as CONST_INT. */
3579 case CONST_INT:
3580 if (HOST_BITS_PER_WIDE_INT == 32)
3581 return 1;
3582 else
3583 {
3584 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
fa9f36a1 3585 return trunc_int_for_mode (val, SImode) == val;
6189a572
JH
3586 }
3587 break;
3588
3589 /* For certain code models, the symbolic references are known to fit. */
3590 case SYMBOL_REF:
3591 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
3592
3593 /* For certain code models, the code is near as well. */
3594 case LABEL_REF:
3595 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
3596
3597 /* We also may accept the offsetted memory references in certain special
3598 cases. */
3599 case CONST:
3600 if (GET_CODE (XEXP (value, 0)) == UNSPEC
3601 && XVECLEN (XEXP (value, 0), 0) == 1
3602 && XINT (XEXP (value, 0), 1) == 15)
3603 return 1;
3604 else if (GET_CODE (XEXP (value, 0)) == PLUS)
3605 {
3606 rtx op1 = XEXP (XEXP (value, 0), 0);
3607 rtx op2 = XEXP (XEXP (value, 0), 1);
3608 HOST_WIDE_INT offset;
3609
3610 if (ix86_cmodel == CM_LARGE)
3611 return 0;
3612 if (GET_CODE (op2) != CONST_INT)
3613 return 0;
3614 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3615 switch (GET_CODE (op1))
3616 {
3617 case SYMBOL_REF:
3618 /* For CM_SMALL assume that latest object is 1MB before
3619 end of 31bits boundary. We may also accept pretty
3620 large negative constants knowing that all objects are
3621 in the positive half of address space. */
3622 if (ix86_cmodel == CM_SMALL
3623 && offset < 1024*1024*1024
3624 && trunc_int_for_mode (offset, SImode) == offset)
3625 return 1;
3626 /* For CM_KERNEL we know that all object resist in the
3627 negative half of 32bits address space. We may not
3628 accept negative offsets, since they may be just off
d6a7951f 3629 and we may accept pretty large positive ones. */
6189a572
JH
3630 if (ix86_cmodel == CM_KERNEL
3631 && offset > 0
3632 && trunc_int_for_mode (offset, SImode) == offset)
3633 return 1;
3634 break;
3635 case LABEL_REF:
3636 /* These conditions are similar to SYMBOL_REF ones, just the
3637 constraints for code models differ. */
3638 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3639 && offset < 1024*1024*1024
3640 && trunc_int_for_mode (offset, SImode) == offset)
3641 return 1;
3642 if (ix86_cmodel == CM_KERNEL
3643 && offset > 0
3644 && trunc_int_for_mode (offset, SImode) == offset)
3645 return 1;
3646 break;
3647 default:
3648 return 0;
3649 }
3650 }
3651 return 0;
3652 default:
3653 return 0;
3654 }
3655}
3656
3657/* Return 1 if VALUE can be stored in the zero extended immediate field. */
3658int
3659x86_64_zero_extended_value (value)
3660 rtx value;
3661{
3662 switch (GET_CODE (value))
3663 {
3664 case CONST_DOUBLE:
3665 if (HOST_BITS_PER_WIDE_INT == 32)
3666 return (GET_MODE (value) == VOIDmode
3667 && !CONST_DOUBLE_HIGH (value));
3668 else
3669 return 0;
3670 case CONST_INT:
3671 if (HOST_BITS_PER_WIDE_INT == 32)
3672 return INTVAL (value) >= 0;
3673 else
b531087a 3674 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
6189a572
JH
3675 break;
3676
3677 /* For certain code models, the symbolic references are known to fit. */
3678 case SYMBOL_REF:
3679 return ix86_cmodel == CM_SMALL;
3680
3681 /* For certain code models, the code is near as well. */
3682 case LABEL_REF:
3683 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3684
3685 /* We also may accept the offsetted memory references in certain special
3686 cases. */
3687 case CONST:
3688 if (GET_CODE (XEXP (value, 0)) == PLUS)
3689 {
3690 rtx op1 = XEXP (XEXP (value, 0), 0);
3691 rtx op2 = XEXP (XEXP (value, 0), 1);
3692
3693 if (ix86_cmodel == CM_LARGE)
3694 return 0;
3695 switch (GET_CODE (op1))
3696 {
3697 case SYMBOL_REF:
3698 return 0;
d6a7951f 3699 /* For small code model we may accept pretty large positive
6189a572
JH
3700 offsets, since one bit is available for free. Negative
3701 offsets are limited by the size of NULL pointer area
3702 specified by the ABI. */
3703 if (ix86_cmodel == CM_SMALL
3704 && GET_CODE (op2) == CONST_INT
3705 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3706 && (trunc_int_for_mode (INTVAL (op2), SImode)
3707 == INTVAL (op2)))
3708 return 1;
3709 /* ??? For the kernel, we may accept adjustment of
3710 -0x10000000, since we know that it will just convert
d6a7951f 3711 negative address space to positive, but perhaps this
6189a572
JH
3712 is not worthwhile. */
3713 break;
3714 case LABEL_REF:
3715 /* These conditions are similar to SYMBOL_REF ones, just the
3716 constraints for code models differ. */
3717 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3718 && GET_CODE (op2) == CONST_INT
3719 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3720 && (trunc_int_for_mode (INTVAL (op2), SImode)
3721 == INTVAL (op2)))
3722 return 1;
3723 break;
3724 default:
3725 return 0;
3726 }
3727 }
3728 return 0;
3729 default:
3730 return 0;
3731 }
3732}
6fca22eb
RH
3733
3734/* Value should be nonzero if functions must have frame pointers.
3735 Zero means the frame pointer need not be set up (and parms may
3736 be accessed via the stack pointer) in functions that seem suitable. */
3737
3738int
3739ix86_frame_pointer_required ()
3740{
3741 /* If we accessed previous frames, then the generated code expects
3742 to be able to access the saved ebp value in our frame. */
3743 if (cfun->machine->accesses_prev_frame)
3744 return 1;
a4f31c00 3745
6fca22eb
RH
3746 /* Several x86 os'es need a frame pointer for other reasons,
3747 usually pertaining to setjmp. */
3748 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3749 return 1;
3750
3751 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3752 the frame pointer by default. Turn it back on now if we've not
3753 got a leaf function. */
3754 if (TARGET_OMIT_LEAF_FRAME_POINTER && ! leaf_function_p ())
3755 return 1;
3756
3757 return 0;
3758}
3759
3760/* Record that the current function accesses previous call frames. */
3761
3762void
3763ix86_setup_frame_addresses ()
3764{
3765 cfun->machine->accesses_prev_frame = 1;
3766}
e075ae69 3767\f
4cf12e7e 3768static char pic_label_name[32];
e9a25f70 3769
e075ae69
RH
3770/* This function generates code for -fpic that loads %ebx with
3771 the return address of the caller and then returns. */
3772
3773void
4cf12e7e 3774ix86_asm_file_end (file)
e075ae69 3775 FILE *file;
e075ae69
RH
3776{
3777 rtx xops[2];
32b5b1aa 3778
4cf12e7e
RH
3779 if (! TARGET_DEEP_BRANCH_PREDICTION || pic_label_name[0] == 0)
3780 return;
32b5b1aa 3781
c7f0da1d
RH
3782 /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related
3783 to updating relocations to a section being discarded such that this
3784 doesn't work. Ought to detect this at configure time. */
7c262518 3785#if 0
4cf12e7e
RH
3786 /* The trick here is to create a linkonce section containing the
3787 pic label thunk, but to refer to it with an internal label.
3788 Because the label is internal, we don't have inter-dso name
3789 binding issues on hosts that don't support ".hidden".
e9a25f70 3790
4cf12e7e
RH
3791 In order to use these macros, however, we must create a fake
3792 function decl. */
7c262518
RH
3793 if (targetm.have_named_sections)
3794 {
3795 tree decl = build_decl (FUNCTION_DECL,
3796 get_identifier ("i686.get_pc_thunk"),
3797 error_mark_node);
3798 DECL_ONE_ONLY (decl) = 1;
3799 UNIQUE_SECTION (decl, 0);
715bdd29 3800 named_section (decl, NULL);
7c262518
RH
3801 }
3802 else
4cf12e7e 3803#else
7c262518 3804 text_section ();
4cf12e7e 3805#endif
0afeb08a 3806
4cf12e7e
RH
3807 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
3808 internal (non-global) label that's being emitted, it didn't make
3809 sense to have .type information for local labels. This caused
3810 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
3811 me debug info for a label that you're declaring non-global?) this
3812 was changed to call ASM_OUTPUT_LABEL() instead. */
3813
3814 ASM_OUTPUT_LABEL (file, pic_label_name);
3815
3816 xops[0] = pic_offset_table_rtx;
3817 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3818 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3819 output_asm_insn ("ret", xops);
32b5b1aa 3820}
32b5b1aa 3821
e075ae69
RH
3822void
3823load_pic_register ()
32b5b1aa 3824{
e075ae69 3825 rtx gotsym, pclab;
32b5b1aa 3826
0d7d98ee 3827 if (TARGET_64BIT)
b531087a 3828 abort ();
0d7d98ee 3829
a8a05998 3830 gotsym = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
32b5b1aa 3831
e075ae69 3832 if (TARGET_DEEP_BRANCH_PREDICTION)
32b5b1aa 3833 {
4cf12e7e
RH
3834 if (! pic_label_name[0])
3835 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
e075ae69 3836 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
32b5b1aa 3837 }
e075ae69 3838 else
e5cb57e8 3839 {
e075ae69 3840 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
e5cb57e8 3841 }
e5cb57e8 3842
e075ae69 3843 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
2a2ab3f9 3844
e075ae69
RH
3845 if (! TARGET_DEEP_BRANCH_PREDICTION)
3846 emit_insn (gen_popsi1 (pic_offset_table_rtx));
79325812 3847
e075ae69 3848 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
e9a25f70 3849}
8dfe5673 3850
0d7d98ee 3851/* Generate an "push" pattern for input ARG. */
e9a25f70 3852
e075ae69
RH
3853static rtx
3854gen_push (arg)
3855 rtx arg;
e9a25f70 3856{
c5c76735 3857 return gen_rtx_SET (VOIDmode,
0d7d98ee
JH
3858 gen_rtx_MEM (Pmode,
3859 gen_rtx_PRE_DEC (Pmode,
c5c76735
JL
3860 stack_pointer_rtx)),
3861 arg);
e9a25f70
JL
3862}
3863
4dd2ac2c
JH
3864/* Return 1 if we need to save REGNO. */
3865static int
1020a5ab
RH
3866ix86_save_reg (regno, maybe_eh_return)
3867 int regno;
37a58036 3868 int maybe_eh_return;
1020a5ab
RH
3869{
3870 if (flag_pic
3871 && ! TARGET_64BIT
3872 && regno == PIC_OFFSET_TABLE_REGNUM
3873 && (current_function_uses_pic_offset_table
3874 || current_function_uses_const_pool
3875 || current_function_calls_eh_return))
3876 return 1;
3877
3878 if (current_function_calls_eh_return && maybe_eh_return)
3879 {
3880 unsigned i;
3881 for (i = 0; ; i++)
3882 {
b531087a 3883 unsigned test = EH_RETURN_DATA_REGNO (i);
1020a5ab
RH
3884 if (test == INVALID_REGNUM)
3885 break;
3886 if (test == (unsigned) regno)
3887 return 1;
3888 }
3889 }
4dd2ac2c 3890
1020a5ab
RH
3891 return (regs_ever_live[regno]
3892 && !call_used_regs[regno]
3893 && !fixed_regs[regno]
3894 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4dd2ac2c
JH
3895}
3896
0903fcab
JH
3897/* Return number of registers to be saved on the stack. */
3898
3899static int
3900ix86_nsaved_regs ()
3901{
3902 int nregs = 0;
0903fcab
JH
3903 int regno;
3904
4dd2ac2c 3905 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 3906 if (ix86_save_reg (regno, true))
4dd2ac2c 3907 nregs++;
0903fcab
JH
3908 return nregs;
3909}
3910
3911/* Return the offset between two registers, one to be eliminated, and the other
3912 its replacement, at the start of a routine. */
3913
3914HOST_WIDE_INT
3915ix86_initial_elimination_offset (from, to)
3916 int from;
3917 int to;
3918{
4dd2ac2c
JH
3919 struct ix86_frame frame;
3920 ix86_compute_frame_layout (&frame);
564d80f4
JH
3921
3922 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 3923 return frame.hard_frame_pointer_offset;
564d80f4
JH
3924 else if (from == FRAME_POINTER_REGNUM
3925 && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 3926 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
3927 else
3928 {
564d80f4
JH
3929 if (to != STACK_POINTER_REGNUM)
3930 abort ();
3931 else if (from == ARG_POINTER_REGNUM)
4dd2ac2c 3932 return frame.stack_pointer_offset;
564d80f4
JH
3933 else if (from != FRAME_POINTER_REGNUM)
3934 abort ();
0903fcab 3935 else
4dd2ac2c 3936 return frame.stack_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
3937 }
3938}
3939
4dd2ac2c 3940/* Fill structure ix86_frame about frame of currently computed function. */
0f290768 3941
4dd2ac2c
JH
3942static void
3943ix86_compute_frame_layout (frame)
3944 struct ix86_frame *frame;
65954bd8 3945{
65954bd8 3946 HOST_WIDE_INT total_size;
564d80f4 3947 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
44affdae
JH
3948 int offset;
3949 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4dd2ac2c 3950 HOST_WIDE_INT size = get_frame_size ();
65954bd8 3951
4dd2ac2c 3952 frame->nregs = ix86_nsaved_regs ();
564d80f4 3953 total_size = size;
65954bd8 3954
4dd2ac2c
JH
3955 /* Skip return value and save base pointer. */
3956 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
3957
3958 frame->hard_frame_pointer_offset = offset;
564d80f4 3959
fcbfaa65
RK
3960 /* Do some sanity checking of stack_alignment_needed and
3961 preferred_alignment, since i386 port is the only using those features
f710504c 3962 that may break easily. */
564d80f4 3963
44affdae
JH
3964 if (size && !stack_alignment_needed)
3965 abort ();
44affdae
JH
3966 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
3967 abort ();
3968 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
3969 abort ();
3970 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
3971 abort ();
564d80f4 3972
4dd2ac2c
JH
3973 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
3974 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
564d80f4 3975
4dd2ac2c
JH
3976 /* Register save area */
3977 offset += frame->nregs * UNITS_PER_WORD;
65954bd8 3978
8362f420
JH
3979 /* Va-arg area */
3980 if (ix86_save_varrargs_registers)
3981 {
3982 offset += X86_64_VARARGS_SIZE;
3983 frame->va_arg_size = X86_64_VARARGS_SIZE;
3984 }
3985 else
3986 frame->va_arg_size = 0;
3987
4dd2ac2c
JH
3988 /* Align start of frame for local function. */
3989 frame->padding1 = ((offset + stack_alignment_needed - 1)
3990 & -stack_alignment_needed) - offset;
f73ad30e 3991
4dd2ac2c 3992 offset += frame->padding1;
65954bd8 3993
4dd2ac2c
JH
3994 /* Frame pointer points here. */
3995 frame->frame_pointer_offset = offset;
54ff41b7 3996
4dd2ac2c 3997 offset += size;
65954bd8 3998
4dd2ac2c 3999 /* Add outgoing arguments area. */
f73ad30e 4000 if (ACCUMULATE_OUTGOING_ARGS)
4dd2ac2c
JH
4001 {
4002 offset += current_function_outgoing_args_size;
4003 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4004 }
4005 else
4006 frame->outgoing_arguments_size = 0;
564d80f4 4007
4dd2ac2c
JH
4008 /* Align stack boundary. */
4009 frame->padding2 = ((offset + preferred_alignment - 1)
4010 & -preferred_alignment) - offset;
4011
4012 offset += frame->padding2;
4013
4014 /* We've reached end of stack frame. */
4015 frame->stack_pointer_offset = offset;
4016
4017 /* Size prologue needs to allocate. */
4018 frame->to_allocate =
4019 (size + frame->padding1 + frame->padding2
8362f420 4020 + frame->outgoing_arguments_size + frame->va_arg_size);
4dd2ac2c 4021
8362f420
JH
4022 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4023 && current_function_is_leaf)
4024 {
4025 frame->red_zone_size = frame->to_allocate;
4026 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4027 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4028 }
4029 else
4030 frame->red_zone_size = 0;
4031 frame->to_allocate -= frame->red_zone_size;
4032 frame->stack_pointer_offset -= frame->red_zone_size;
4dd2ac2c
JH
4033#if 0
4034 fprintf (stderr, "nregs: %i\n", frame->nregs);
4035 fprintf (stderr, "size: %i\n", size);
4036 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4037 fprintf (stderr, "padding1: %i\n", frame->padding1);
8362f420 4038 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4dd2ac2c
JH
4039 fprintf (stderr, "padding2: %i\n", frame->padding2);
4040 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
8362f420 4041 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4dd2ac2c
JH
4042 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4043 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4044 frame->hard_frame_pointer_offset);
4045 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4046#endif
65954bd8
JL
4047}
4048
0903fcab
JH
4049/* Emit code to save registers in the prologue. */
4050
4051static void
4052ix86_emit_save_regs ()
4053{
4054 register int regno;
0903fcab 4055 rtx insn;
0903fcab 4056
4dd2ac2c 4057 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 4058 if (ix86_save_reg (regno, true))
0903fcab 4059 {
0d7d98ee 4060 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
0903fcab
JH
4061 RTX_FRAME_RELATED_P (insn) = 1;
4062 }
4063}
4064
c6036a37
JH
4065/* Emit code to save registers using MOV insns. First register
4066 is restored from POINTER + OFFSET. */
4067static void
4068ix86_emit_save_regs_using_mov (pointer, offset)
b72f00af
RK
4069 rtx pointer;
4070 HOST_WIDE_INT offset;
c6036a37
JH
4071{
4072 int regno;
4073 rtx insn;
4074
4075 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4076 if (ix86_save_reg (regno, true))
4077 {
b72f00af
RK
4078 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4079 Pmode, offset),
c6036a37
JH
4080 gen_rtx_REG (Pmode, regno));
4081 RTX_FRAME_RELATED_P (insn) = 1;
4082 offset += UNITS_PER_WORD;
4083 }
4084}
4085
0f290768 4086/* Expand the prologue into a bunch of separate insns. */
e075ae69
RH
4087
4088void
4089ix86_expand_prologue ()
2a2ab3f9 4090{
564d80f4 4091 rtx insn;
0d7d98ee
JH
4092 int pic_reg_used = (flag_pic && (current_function_uses_pic_offset_table
4093 || current_function_uses_const_pool)
4094 && !TARGET_64BIT);
4dd2ac2c 4095 struct ix86_frame frame;
6ab16dd9 4096 int use_mov = 0;
c6036a37 4097 HOST_WIDE_INT allocate;
4dd2ac2c 4098
2ab0437e 4099 if (!optimize_size)
6ab16dd9
JH
4100 {
4101 use_fast_prologue_epilogue
4102 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
2ab0437e
JH
4103 if (TARGET_PROLOGUE_USING_MOVE)
4104 use_mov = use_fast_prologue_epilogue;
6ab16dd9 4105 }
4dd2ac2c 4106 ix86_compute_frame_layout (&frame);
79325812 4107
e075ae69
RH
4108 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4109 slower on all targets. Also sdb doesn't like it. */
e9a25f70 4110
2a2ab3f9
JVA
4111 if (frame_pointer_needed)
4112 {
564d80f4 4113 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
e075ae69 4114 RTX_FRAME_RELATED_P (insn) = 1;
e9a25f70 4115
564d80f4 4116 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
e075ae69 4117 RTX_FRAME_RELATED_P (insn) = 1;
2a2ab3f9
JVA
4118 }
4119
c6036a37
JH
4120 allocate = frame.to_allocate;
4121 /* In case we are dealing only with single register and empty frame,
4122 push is equivalent of the mov+add sequence. */
4123 if (allocate == 0 && frame.nregs <= 1)
4124 use_mov = 0;
4125
4126 if (!use_mov)
4127 ix86_emit_save_regs ();
4128 else
4129 allocate += frame.nregs * UNITS_PER_WORD;
564d80f4 4130
c6036a37 4131 if (allocate == 0)
8dfe5673 4132 ;
e323735c 4133 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
469ac993 4134 {
f2042df3
RH
4135 insn = emit_insn (gen_pro_epilogue_adjust_stack
4136 (stack_pointer_rtx, stack_pointer_rtx,
e323735c 4137 GEN_INT (-allocate)));
e075ae69 4138 RTX_FRAME_RELATED_P (insn) = 1;
469ac993 4139 }
79325812 4140 else
8dfe5673 4141 {
e075ae69 4142 /* ??? Is this only valid for Win32? */
e9a25f70 4143
e075ae69 4144 rtx arg0, sym;
e9a25f70 4145
8362f420 4146 if (TARGET_64BIT)
b531087a 4147 abort ();
8362f420 4148
e075ae69 4149 arg0 = gen_rtx_REG (SImode, 0);
c6036a37 4150 emit_move_insn (arg0, GEN_INT (allocate));
77a989d1 4151
e075ae69
RH
4152 sym = gen_rtx_MEM (FUNCTION_MODE,
4153 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
32ee7d1d 4154 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
e075ae69
RH
4155
4156 CALL_INSN_FUNCTION_USAGE (insn)
276ab4a4
RH
4157 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4158 CALL_INSN_FUNCTION_USAGE (insn));
e075ae69 4159 }
c6036a37
JH
4160 if (use_mov)
4161 {
4162 if (!frame_pointer_needed || !frame.to_allocate)
4163 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4164 else
4165 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4166 -frame.nregs * UNITS_PER_WORD);
4167 }
e9a25f70 4168
84530511
SC
4169#ifdef SUBTARGET_PROLOGUE
4170 SUBTARGET_PROLOGUE;
0f290768 4171#endif
84530511 4172
e9a25f70 4173 if (pic_reg_used)
e075ae69 4174 load_pic_register ();
77a989d1 4175
e9a25f70
JL
4176 /* If we are profiling, make sure no instructions are scheduled before
4177 the call to mcount. However, if -fpic, the above call will have
4178 done that. */
70f4f91c 4179 if (current_function_profile && ! pic_reg_used)
e9a25f70 4180 emit_insn (gen_blockage ());
77a989d1
SC
4181}
4182
da2d1d3a
JH
4183/* Emit code to restore saved registers using MOV insns. First register
4184 is restored from POINTER + OFFSET. */
4185static void
1020a5ab
RH
4186ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4187 rtx pointer;
4188 int offset;
37a58036 4189 int maybe_eh_return;
da2d1d3a
JH
4190{
4191 int regno;
da2d1d3a 4192
4dd2ac2c 4193 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 4194 if (ix86_save_reg (regno, maybe_eh_return))
da2d1d3a 4195 {
4dd2ac2c 4196 emit_move_insn (gen_rtx_REG (Pmode, regno),
b72f00af
RK
4197 adjust_address (gen_rtx_MEM (Pmode, pointer),
4198 Pmode, offset));
4dd2ac2c 4199 offset += UNITS_PER_WORD;
da2d1d3a
JH
4200 }
4201}
4202
0f290768 4203/* Restore function stack, frame, and registers. */
e9a25f70 4204
2a2ab3f9 4205void
1020a5ab
RH
4206ix86_expand_epilogue (style)
4207 int style;
2a2ab3f9 4208{
1c71e60e 4209 int regno;
fdb8a883 4210 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4dd2ac2c 4211 struct ix86_frame frame;
65954bd8 4212 HOST_WIDE_INT offset;
4dd2ac2c
JH
4213
4214 ix86_compute_frame_layout (&frame);
2a2ab3f9 4215
a4f31c00 4216 /* Calculate start of saved registers relative to ebp. Special care
84e306b4
RH
4217 must be taken for the normal return case of a function using
4218 eh_return: the eax and edx registers are marked as saved, but not
4219 restored along this path. */
4220 offset = frame.nregs;
4221 if (current_function_calls_eh_return && style != 2)
4222 offset -= 2;
4223 offset *= -UNITS_PER_WORD;
2a2ab3f9 4224
fdb8a883
JW
4225 /* If we're only restoring one register and sp is not valid then
4226 using a move instruction to restore the register since it's
0f290768 4227 less work than reloading sp and popping the register.
da2d1d3a
JH
4228
4229 The default code result in stack adjustment using add/lea instruction,
4230 while this code results in LEAVE instruction (or discrete equivalent),
4231 so it is profitable in some other cases as well. Especially when there
4232 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4233 and there is exactly one register to pop. This heruistic may need some
4234 tuning in future. */
4dd2ac2c 4235 if ((!sp_valid && frame.nregs <= 1)
2ab0437e 4236 || (TARGET_EPILOGUE_USING_MOVE
6ab16dd9 4237 && use_fast_prologue_epilogue
c6036a37 4238 && (frame.nregs > 1 || frame.to_allocate))
4dd2ac2c 4239 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
2ab0437e 4240 || (frame_pointer_needed && TARGET_USE_LEAVE
6ab16dd9 4241 && use_fast_prologue_epilogue && frame.nregs == 1)
2ab0437e 4242 || current_function_calls_eh_return)
2a2ab3f9 4243 {
da2d1d3a
JH
4244 /* Restore registers. We can use ebp or esp to address the memory
4245 locations. If both are available, default to ebp, since offsets
4246 are known to be small. Only exception is esp pointing directly to the
4247 end of block of saved registers, where we may simplify addressing
4248 mode. */
4249
4dd2ac2c 4250 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
1020a5ab
RH
4251 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4252 frame.to_allocate, style == 2);
da2d1d3a 4253 else
1020a5ab
RH
4254 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4255 offset, style == 2);
4256
4257 /* eh_return epilogues need %ecx added to the stack pointer. */
4258 if (style == 2)
4259 {
4260 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
2a2ab3f9 4261
1020a5ab
RH
4262 if (frame_pointer_needed)
4263 {
4264 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4265 tmp = plus_constant (tmp, UNITS_PER_WORD);
4266 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4267
4268 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4269 emit_move_insn (hard_frame_pointer_rtx, tmp);
4270
4271 emit_insn (gen_pro_epilogue_adjust_stack
f2042df3 4272 (stack_pointer_rtx, sa, const0_rtx));
1020a5ab
RH
4273 }
4274 else
4275 {
4276 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4277 tmp = plus_constant (tmp, (frame.to_allocate
4278 + frame.nregs * UNITS_PER_WORD));
4279 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4280 }
4281 }
4282 else if (!frame_pointer_needed)
f2042df3
RH
4283 emit_insn (gen_pro_epilogue_adjust_stack
4284 (stack_pointer_rtx, stack_pointer_rtx,
4285 GEN_INT (frame.to_allocate
4286 + frame.nregs * UNITS_PER_WORD)));
0f290768 4287 /* If not an i386, mov & pop is faster than "leave". */
6ab16dd9 4288 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
8362f420 4289 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
c8c5cb99 4290 else
2a2ab3f9 4291 {
1c71e60e
JH
4292 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4293 hard_frame_pointer_rtx,
f2042df3 4294 const0_rtx));
8362f420
JH
4295 if (TARGET_64BIT)
4296 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4297 else
4298 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
e9a25f70
JL
4299 }
4300 }
1c71e60e 4301 else
68f654ec 4302 {
1c71e60e
JH
4303 /* First step is to deallocate the stack frame so that we can
4304 pop the registers. */
4305 if (!sp_valid)
4306 {
4307 if (!frame_pointer_needed)
4308 abort ();
4309 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4310 hard_frame_pointer_rtx,
f2042df3 4311 GEN_INT (offset)));
1c71e60e 4312 }
4dd2ac2c 4313 else if (frame.to_allocate)
f2042df3
RH
4314 emit_insn (gen_pro_epilogue_adjust_stack
4315 (stack_pointer_rtx, stack_pointer_rtx,
4316 GEN_INT (frame.to_allocate)));
1c71e60e 4317
4dd2ac2c 4318 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 4319 if (ix86_save_reg (regno, false))
8362f420
JH
4320 {
4321 if (TARGET_64BIT)
4322 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4323 else
4324 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4325 }
4dd2ac2c 4326 if (frame_pointer_needed)
8362f420 4327 {
f5143c46 4328 /* Leave results in shorter dependency chains on CPUs that are
2ab0437e
JH
4329 able to grok it fast. */
4330 if (TARGET_USE_LEAVE)
4331 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4332 else if (TARGET_64BIT)
8362f420
JH
4333 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4334 else
4335 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4336 }
68f654ec 4337 }
68f654ec 4338
cbbf65e0 4339 /* Sibcall epilogues don't want a return instruction. */
1020a5ab 4340 if (style == 0)
cbbf65e0
RH
4341 return;
4342
2a2ab3f9
JVA
4343 if (current_function_pops_args && current_function_args_size)
4344 {
e075ae69 4345 rtx popc = GEN_INT (current_function_pops_args);
2a2ab3f9 4346
b8c752c8
UD
4347 /* i386 can only pop 64K bytes. If asked to pop more, pop
4348 return address, do explicit add, and jump indirectly to the
0f290768 4349 caller. */
2a2ab3f9 4350
b8c752c8 4351 if (current_function_pops_args >= 65536)
2a2ab3f9 4352 {
e075ae69 4353 rtx ecx = gen_rtx_REG (SImode, 2);
e9a25f70 4354
8362f420
JH
4355 /* There are is no "pascal" calling convention in 64bit ABI. */
4356 if (TARGET_64BIT)
b531087a 4357 abort ();
8362f420 4358
e075ae69
RH
4359 emit_insn (gen_popsi1 (ecx));
4360 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
11837777 4361 emit_jump_insn (gen_return_indirect_internal (ecx));
e9a25f70 4362 }
79325812 4363 else
e075ae69
RH
4364 emit_jump_insn (gen_return_pop_internal (popc));
4365 }
4366 else
4367 emit_jump_insn (gen_return_internal ());
4368}
4369\f
4370/* Extract the parts of an RTL expression that is a valid memory address
b446e5a2
JH
4371 for an instruction. Return 0 if the structure of the address is
4372 grossly off. Return -1 if the address contains ASHIFT, so it is not
4373 strictly valid, but still used for computing length of lea instruction.
4374 */
e075ae69
RH
4375
4376static int
4377ix86_decompose_address (addr, out)
4378 register rtx addr;
4379 struct ix86_address *out;
4380{
4381 rtx base = NULL_RTX;
4382 rtx index = NULL_RTX;
4383 rtx disp = NULL_RTX;
4384 HOST_WIDE_INT scale = 1;
4385 rtx scale_rtx = NULL_RTX;
b446e5a2 4386 int retval = 1;
e075ae69
RH
4387
4388 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
4389 base = addr;
4390 else if (GET_CODE (addr) == PLUS)
4391 {
4392 rtx op0 = XEXP (addr, 0);
4393 rtx op1 = XEXP (addr, 1);
4394 enum rtx_code code0 = GET_CODE (op0);
4395 enum rtx_code code1 = GET_CODE (op1);
4396
4397 if (code0 == REG || code0 == SUBREG)
4398 {
4399 if (code1 == REG || code1 == SUBREG)
4400 index = op0, base = op1; /* index + base */
4401 else
4402 base = op0, disp = op1; /* base + displacement */
4403 }
4404 else if (code0 == MULT)
e9a25f70 4405 {
e075ae69
RH
4406 index = XEXP (op0, 0);
4407 scale_rtx = XEXP (op0, 1);
4408 if (code1 == REG || code1 == SUBREG)
4409 base = op1; /* index*scale + base */
e9a25f70 4410 else
e075ae69
RH
4411 disp = op1; /* index*scale + disp */
4412 }
4413 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4414 {
4415 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4416 scale_rtx = XEXP (XEXP (op0, 0), 1);
4417 base = XEXP (op0, 1);
4418 disp = op1;
2a2ab3f9 4419 }
e075ae69
RH
4420 else if (code0 == PLUS)
4421 {
4422 index = XEXP (op0, 0); /* index + base + disp */
4423 base = XEXP (op0, 1);
4424 disp = op1;
4425 }
4426 else
b446e5a2 4427 return 0;
e075ae69
RH
4428 }
4429 else if (GET_CODE (addr) == MULT)
4430 {
4431 index = XEXP (addr, 0); /* index*scale */
4432 scale_rtx = XEXP (addr, 1);
4433 }
4434 else if (GET_CODE (addr) == ASHIFT)
4435 {
4436 rtx tmp;
4437
4438 /* We're called for lea too, which implements ashift on occasion. */
4439 index = XEXP (addr, 0);
4440 tmp = XEXP (addr, 1);
4441 if (GET_CODE (tmp) != CONST_INT)
b446e5a2 4442 return 0;
e075ae69
RH
4443 scale = INTVAL (tmp);
4444 if ((unsigned HOST_WIDE_INT) scale > 3)
b446e5a2 4445 return 0;
e075ae69 4446 scale = 1 << scale;
b446e5a2 4447 retval = -1;
2a2ab3f9 4448 }
2a2ab3f9 4449 else
e075ae69
RH
4450 disp = addr; /* displacement */
4451
4452 /* Extract the integral value of scale. */
4453 if (scale_rtx)
e9a25f70 4454 {
e075ae69 4455 if (GET_CODE (scale_rtx) != CONST_INT)
b446e5a2 4456 return 0;
e075ae69 4457 scale = INTVAL (scale_rtx);
e9a25f70 4458 }
3b3c6a3f 4459
e075ae69
RH
4460 /* Allow arg pointer and stack pointer as index if there is not scaling */
4461 if (base && index && scale == 1
564d80f4
JH
4462 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4463 || index == stack_pointer_rtx))
e075ae69
RH
4464 {
4465 rtx tmp = base;
4466 base = index;
4467 index = tmp;
4468 }
4469
4470 /* Special case: %ebp cannot be encoded as a base without a displacement. */
564d80f4
JH
4471 if ((base == hard_frame_pointer_rtx
4472 || base == frame_pointer_rtx
4473 || base == arg_pointer_rtx) && !disp)
e075ae69
RH
4474 disp = const0_rtx;
4475
4476 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4477 Avoid this by transforming to [%esi+0]. */
4478 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4479 && base && !index && !disp
329e1d01 4480 && REG_P (base)
e075ae69
RH
4481 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4482 disp = const0_rtx;
4483
4484 /* Special case: encode reg+reg instead of reg*2. */
4485 if (!base && index && scale && scale == 2)
4486 base = index, scale = 1;
0f290768 4487
e075ae69
RH
4488 /* Special case: scaling cannot be encoded without base or displacement. */
4489 if (!base && !disp && index && scale != 1)
4490 disp = const0_rtx;
4491
4492 out->base = base;
4493 out->index = index;
4494 out->disp = disp;
4495 out->scale = scale;
3b3c6a3f 4496
b446e5a2 4497 return retval;
e075ae69 4498}
01329426
JH
4499\f
4500/* Return cost of the memory address x.
4501 For i386, it is better to use a complex address than let gcc copy
4502 the address into a reg and make a new pseudo. But not if the address
4503 requires to two regs - that would mean more pseudos with longer
4504 lifetimes. */
4505int
4506ix86_address_cost (x)
4507 rtx x;
4508{
4509 struct ix86_address parts;
4510 int cost = 1;
3b3c6a3f 4511
01329426
JH
4512 if (!ix86_decompose_address (x, &parts))
4513 abort ();
4514
4515 /* More complex memory references are better. */
4516 if (parts.disp && parts.disp != const0_rtx)
4517 cost--;
4518
4519 /* Attempt to minimize number of registers in the address. */
4520 if ((parts.base
4521 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4522 || (parts.index
4523 && (!REG_P (parts.index)
4524 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4525 cost++;
4526
4527 if (parts.base
4528 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4529 && parts.index
4530 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4531 && parts.base != parts.index)
4532 cost++;
4533
4534 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4535 since it's predecode logic can't detect the length of instructions
4536 and it degenerates to vector decoded. Increase cost of such
4537 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
0f290768 4538 to split such addresses or even refuse such addresses at all.
01329426
JH
4539
4540 Following addressing modes are affected:
4541 [base+scale*index]
4542 [scale*index+disp]
4543 [base+index]
0f290768 4544
01329426
JH
4545 The first and last case may be avoidable by explicitly coding the zero in
4546 memory address, but I don't have AMD-K6 machine handy to check this
4547 theory. */
4548
4549 if (TARGET_K6
4550 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4551 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4552 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4553 cost += 10;
0f290768 4554
01329426
JH
4555 return cost;
4556}
4557\f
b949ea8b
JW
4558/* If X is a machine specific address (i.e. a symbol or label being
4559 referenced as a displacement from the GOT implemented using an
4560 UNSPEC), then return the base term. Otherwise return X. */
4561
4562rtx
4563ix86_find_base_term (x)
4564 rtx x;
4565{
4566 rtx term;
4567
6eb791fc
JH
4568 if (TARGET_64BIT)
4569 {
4570 if (GET_CODE (x) != CONST)
4571 return x;
4572 term = XEXP (x, 0);
4573 if (GET_CODE (term) == PLUS
4574 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4575 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4576 term = XEXP (term, 0);
4577 if (GET_CODE (term) != UNSPEC
4578 || XVECLEN (term, 0) != 1
4579 || XINT (term, 1) != 15)
4580 return x;
4581
4582 term = XVECEXP (term, 0, 0);
4583
4584 if (GET_CODE (term) != SYMBOL_REF
4585 && GET_CODE (term) != LABEL_REF)
4586 return x;
4587
4588 return term;
4589 }
4590
b949ea8b
JW
4591 if (GET_CODE (x) != PLUS
4592 || XEXP (x, 0) != pic_offset_table_rtx
4593 || GET_CODE (XEXP (x, 1)) != CONST)
4594 return x;
4595
4596 term = XEXP (XEXP (x, 1), 0);
4597
4598 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4599 term = XEXP (term, 0);
4600
4601 if (GET_CODE (term) != UNSPEC
4602 || XVECLEN (term, 0) != 1
4603 || XINT (term, 1) != 7)
4604 return x;
4605
4606 term = XVECEXP (term, 0, 0);
4607
4608 if (GET_CODE (term) != SYMBOL_REF
4609 && GET_CODE (term) != LABEL_REF)
4610 return x;
4611
4612 return term;
4613}
4614\f
e075ae69
RH
4615/* Determine if a given CONST RTX is a valid memory displacement
4616 in PIC mode. */
0f290768 4617
59be65f6 4618int
91bb873f
RH
4619legitimate_pic_address_disp_p (disp)
4620 register rtx disp;
4621{
6eb791fc
JH
4622 /* In 64bit mode we can allow direct addresses of symbols and labels
4623 when they are not dynamic symbols. */
4624 if (TARGET_64BIT)
4625 {
4626 rtx x = disp;
4627 if (GET_CODE (disp) == CONST)
4628 x = XEXP (disp, 0);
4629 /* ??? Handle PIC code models */
4630 if (GET_CODE (x) == PLUS
4631 && (GET_CODE (XEXP (x, 1)) == CONST_INT
4632 && ix86_cmodel == CM_SMALL_PIC
4633 && INTVAL (XEXP (x, 1)) < 1024*1024*1024
4634 && INTVAL (XEXP (x, 1)) > -1024*1024*1024))
4635 x = XEXP (x, 0);
4636 if (local_symbolic_operand (x, Pmode))
4637 return 1;
4638 }
91bb873f
RH
4639 if (GET_CODE (disp) != CONST)
4640 return 0;
4641 disp = XEXP (disp, 0);
4642
6eb791fc
JH
4643 if (TARGET_64BIT)
4644 {
4645 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4646 of GOT tables. We should not need these anyway. */
4647 if (GET_CODE (disp) != UNSPEC
4648 || XVECLEN (disp, 0) != 1
4649 || XINT (disp, 1) != 15)
4650 return 0;
4651
4652 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
4653 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
4654 return 0;
4655 return 1;
4656 }
4657
91bb873f
RH
4658 if (GET_CODE (disp) == PLUS)
4659 {
4660 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
4661 return 0;
4662 disp = XEXP (disp, 0);
4663 }
4664
4665 if (GET_CODE (disp) != UNSPEC
4666 || XVECLEN (disp, 0) != 1)
4667 return 0;
4668
4669 /* Must be @GOT or @GOTOFF. */
623fe810
RH
4670 switch (XINT (disp, 1))
4671 {
4672 case 6: /* @GOT */
4673 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
91bb873f 4674
623fe810
RH
4675 case 7: /* @GOTOFF */
4676 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
4677 }
4678
4679 return 0;
91bb873f
RH
4680}
4681
e075ae69
RH
4682/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
4683 memory address for an instruction. The MODE argument is the machine mode
4684 for the MEM expression that wants to use this address.
4685
4686 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
4687 convert common non-canonical forms to canonical form so that they will
4688 be recognized. */
4689
3b3c6a3f
MM
4690int
4691legitimate_address_p (mode, addr, strict)
4692 enum machine_mode mode;
4693 register rtx addr;
4694 int strict;
4695{
e075ae69
RH
4696 struct ix86_address parts;
4697 rtx base, index, disp;
4698 HOST_WIDE_INT scale;
4699 const char *reason = NULL;
4700 rtx reason_rtx = NULL_RTX;
3b3c6a3f
MM
4701
4702 if (TARGET_DEBUG_ADDR)
4703 {
4704 fprintf (stderr,
e9a25f70 4705 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
3b3c6a3f 4706 GET_MODE_NAME (mode), strict);
3b3c6a3f
MM
4707 debug_rtx (addr);
4708 }
4709
b446e5a2 4710 if (ix86_decompose_address (addr, &parts) <= 0)
3b3c6a3f 4711 {
e075ae69 4712 reason = "decomposition failed";
50e60bc3 4713 goto report_error;
3b3c6a3f
MM
4714 }
4715
e075ae69
RH
4716 base = parts.base;
4717 index = parts.index;
4718 disp = parts.disp;
4719 scale = parts.scale;
91f0226f 4720
e075ae69 4721 /* Validate base register.
e9a25f70
JL
4722
4723 Don't allow SUBREG's here, it can lead to spill failures when the base
3d771dfd
MM
4724 is one word out of a two word structure, which is represented internally
4725 as a DImode int. */
e9a25f70 4726
3b3c6a3f
MM
4727 if (base)
4728 {
e075ae69
RH
4729 reason_rtx = base;
4730
3d771dfd 4731 if (GET_CODE (base) != REG)
3b3c6a3f 4732 {
e075ae69 4733 reason = "base is not a register";
50e60bc3 4734 goto report_error;
3b3c6a3f
MM
4735 }
4736
c954bd01
RH
4737 if (GET_MODE (base) != Pmode)
4738 {
e075ae69 4739 reason = "base is not in Pmode";
50e60bc3 4740 goto report_error;
c954bd01
RH
4741 }
4742
e9a25f70
JL
4743 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
4744 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
3b3c6a3f 4745 {
e075ae69 4746 reason = "base is not valid";
50e60bc3 4747 goto report_error;
3b3c6a3f
MM
4748 }
4749 }
4750
e075ae69 4751 /* Validate index register.
e9a25f70
JL
4752
4753 Don't allow SUBREG's here, it can lead to spill failures when the index
3d771dfd
MM
4754 is one word out of a two word structure, which is represented internally
4755 as a DImode int. */
e075ae69
RH
4756
4757 if (index)
3b3c6a3f 4758 {
e075ae69
RH
4759 reason_rtx = index;
4760
4761 if (GET_CODE (index) != REG)
3b3c6a3f 4762 {
e075ae69 4763 reason = "index is not a register";
50e60bc3 4764 goto report_error;
3b3c6a3f
MM
4765 }
4766
e075ae69 4767 if (GET_MODE (index) != Pmode)
c954bd01 4768 {
e075ae69 4769 reason = "index is not in Pmode";
50e60bc3 4770 goto report_error;
c954bd01
RH
4771 }
4772
e075ae69
RH
4773 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
4774 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
3b3c6a3f 4775 {
e075ae69 4776 reason = "index is not valid";
50e60bc3 4777 goto report_error;
3b3c6a3f
MM
4778 }
4779 }
3b3c6a3f 4780
e075ae69
RH
4781 /* Validate scale factor. */
4782 if (scale != 1)
3b3c6a3f 4783 {
e075ae69
RH
4784 reason_rtx = GEN_INT (scale);
4785 if (!index)
3b3c6a3f 4786 {
e075ae69 4787 reason = "scale without index";
50e60bc3 4788 goto report_error;
3b3c6a3f
MM
4789 }
4790
e075ae69 4791 if (scale != 2 && scale != 4 && scale != 8)
3b3c6a3f 4792 {
e075ae69 4793 reason = "scale is not a valid multiplier";
50e60bc3 4794 goto report_error;
3b3c6a3f
MM
4795 }
4796 }
4797
91bb873f 4798 /* Validate displacement. */
3b3c6a3f
MM
4799 if (disp)
4800 {
e075ae69
RH
4801 reason_rtx = disp;
4802
91bb873f 4803 if (!CONSTANT_ADDRESS_P (disp))
3b3c6a3f 4804 {
e075ae69 4805 reason = "displacement is not constant";
50e60bc3 4806 goto report_error;
3b3c6a3f
MM
4807 }
4808
0d7d98ee 4809 if (TARGET_64BIT)
3b3c6a3f 4810 {
0d7d98ee
JH
4811 if (!x86_64_sign_extended_value (disp))
4812 {
4813 reason = "displacement is out of range";
4814 goto report_error;
4815 }
4816 }
4817 else
4818 {
4819 if (GET_CODE (disp) == CONST_DOUBLE)
4820 {
4821 reason = "displacement is a const_double";
4822 goto report_error;
4823 }
3b3c6a3f
MM
4824 }
4825
91bb873f 4826 if (flag_pic && SYMBOLIC_CONST (disp))
3b3c6a3f 4827 {
0d7d98ee
JH
4828 if (TARGET_64BIT && (index || base))
4829 {
4830 reason = "non-constant pic memory reference";
4831 goto report_error;
4832 }
91bb873f
RH
4833 if (! legitimate_pic_address_disp_p (disp))
4834 {
e075ae69 4835 reason = "displacement is an invalid pic construct";
50e60bc3 4836 goto report_error;
91bb873f
RH
4837 }
4838
4e9efe54 4839 /* This code used to verify that a symbolic pic displacement
0f290768
KH
4840 includes the pic_offset_table_rtx register.
4841
4e9efe54
JH
4842 While this is good idea, unfortunately these constructs may
4843 be created by "adds using lea" optimization for incorrect
4844 code like:
4845
4846 int a;
4847 int foo(int i)
4848 {
4849 return *(&a+i);
4850 }
4851
50e60bc3 4852 This code is nonsensical, but results in addressing
4e9efe54 4853 GOT table with pic_offset_table_rtx base. We can't
f710504c 4854 just refuse it easily, since it gets matched by
4e9efe54
JH
4855 "addsi3" pattern, that later gets split to lea in the
4856 case output register differs from input. While this
4857 can be handled by separate addsi pattern for this case
4858 that never results in lea, this seems to be easier and
4859 correct fix for crash to disable this test. */
3b3c6a3f 4860 }
91bb873f 4861 else if (HALF_PIC_P ())
3b3c6a3f 4862 {
91bb873f 4863 if (! HALF_PIC_ADDRESS_P (disp)
e075ae69 4864 || (base != NULL_RTX || index != NULL_RTX))
91bb873f 4865 {
e075ae69 4866 reason = "displacement is an invalid half-pic reference";
50e60bc3 4867 goto report_error;
91bb873f 4868 }
3b3c6a3f
MM
4869 }
4870 }
4871
e075ae69 4872 /* Everything looks valid. */
3b3c6a3f 4873 if (TARGET_DEBUG_ADDR)
e075ae69 4874 fprintf (stderr, "Success.\n");
3b3c6a3f 4875 return TRUE;
e075ae69 4876
50e60bc3 4877report_error:
e075ae69
RH
4878 if (TARGET_DEBUG_ADDR)
4879 {
4880 fprintf (stderr, "Error: %s\n", reason);
4881 debug_rtx (reason_rtx);
4882 }
4883 return FALSE;
3b3c6a3f 4884}
3b3c6a3f 4885\f
55efb413
JW
4886/* Return an unique alias set for the GOT. */
4887
0f290768 4888static HOST_WIDE_INT
55efb413
JW
4889ix86_GOT_alias_set ()
4890{
4891 static HOST_WIDE_INT set = -1;
4892 if (set == -1)
4893 set = new_alias_set ();
4894 return set;
0f290768 4895}
55efb413 4896
3b3c6a3f
MM
4897/* Return a legitimate reference for ORIG (an address) using the
4898 register REG. If REG is 0, a new pseudo is generated.
4899
91bb873f 4900 There are two types of references that must be handled:
3b3c6a3f
MM
4901
4902 1. Global data references must load the address from the GOT, via
4903 the PIC reg. An insn is emitted to do this load, and the reg is
4904 returned.
4905
91bb873f
RH
4906 2. Static data references, constant pool addresses, and code labels
4907 compute the address as an offset from the GOT, whose base is in
4908 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
4909 differentiate them from global data objects. The returned
4910 address is the PIC reg + an unspec constant.
3b3c6a3f
MM
4911
4912 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
91bb873f 4913 reg also appears in the address. */
3b3c6a3f
MM
4914
4915rtx
4916legitimize_pic_address (orig, reg)
4917 rtx orig;
4918 rtx reg;
4919{
4920 rtx addr = orig;
4921 rtx new = orig;
91bb873f 4922 rtx base;
3b3c6a3f 4923
623fe810 4924 if (local_symbolic_operand (addr, Pmode))
3b3c6a3f 4925 {
14f73b5a
JH
4926 /* In 64bit mode we can address such objects directly. */
4927 if (TARGET_64BIT)
4928 new = addr;
4929 else
4930 {
4931 /* This symbol may be referenced via a displacement from the PIC
4932 base address (@GOTOFF). */
3b3c6a3f 4933
14f73b5a
JH
4934 current_function_uses_pic_offset_table = 1;
4935 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 7);
4936 new = gen_rtx_CONST (Pmode, new);
4937 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3b3c6a3f 4938
14f73b5a
JH
4939 if (reg != 0)
4940 {
4941 emit_move_insn (reg, new);
4942 new = reg;
4943 }
4944 }
3b3c6a3f 4945 }
91bb873f 4946 else if (GET_CODE (addr) == SYMBOL_REF)
3b3c6a3f 4947 {
14f73b5a
JH
4948 if (TARGET_64BIT)
4949 {
4950 current_function_uses_pic_offset_table = 1;
4951 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 15);
4952 new = gen_rtx_CONST (Pmode, new);
4953 new = gen_rtx_MEM (Pmode, new);
4954 RTX_UNCHANGING_P (new) = 1;
4955 set_mem_alias_set (new, ix86_GOT_alias_set ());
4956
4957 if (reg == 0)
4958 reg = gen_reg_rtx (Pmode);
4959 /* Use directly gen_movsi, otherwise the address is loaded
4960 into register for CSE. We don't want to CSE this addresses,
4961 instead we CSE addresses from the GOT table, so skip this. */
4962 emit_insn (gen_movsi (reg, new));
4963 new = reg;
4964 }
4965 else
4966 {
4967 /* This symbol must be referenced via a load from the
4968 Global Offset Table (@GOT). */
3b3c6a3f 4969
14f73b5a
JH
4970 current_function_uses_pic_offset_table = 1;
4971 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 6);
4972 new = gen_rtx_CONST (Pmode, new);
4973 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
4974 new = gen_rtx_MEM (Pmode, new);
4975 RTX_UNCHANGING_P (new) = 1;
4976 set_mem_alias_set (new, ix86_GOT_alias_set ());
3b3c6a3f 4977
14f73b5a
JH
4978 if (reg == 0)
4979 reg = gen_reg_rtx (Pmode);
4980 emit_move_insn (reg, new);
4981 new = reg;
4982 }
0f290768 4983 }
91bb873f
RH
4984 else
4985 {
4986 if (GET_CODE (addr) == CONST)
3b3c6a3f 4987 {
91bb873f
RH
4988 addr = XEXP (addr, 0);
4989 if (GET_CODE (addr) == UNSPEC)
4990 {
4991 /* Check that the unspec is one of the ones we generate? */
4992 }
4993 else if (GET_CODE (addr) != PLUS)
564d80f4 4994 abort ();
3b3c6a3f 4995 }
91bb873f
RH
4996 if (GET_CODE (addr) == PLUS)
4997 {
4998 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
e9a25f70 4999
91bb873f
RH
5000 /* Check first to see if this is a constant offset from a @GOTOFF
5001 symbol reference. */
623fe810 5002 if (local_symbolic_operand (op0, Pmode)
91bb873f
RH
5003 && GET_CODE (op1) == CONST_INT)
5004 {
6eb791fc
JH
5005 if (!TARGET_64BIT)
5006 {
5007 current_function_uses_pic_offset_table = 1;
5008 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 7);
5009 new = gen_rtx_PLUS (Pmode, new, op1);
5010 new = gen_rtx_CONST (Pmode, new);
5011 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
91bb873f 5012
6eb791fc
JH
5013 if (reg != 0)
5014 {
5015 emit_move_insn (reg, new);
5016 new = reg;
5017 }
5018 }
5019 else
91bb873f 5020 {
6eb791fc 5021 /* ??? We need to limit offsets here. */
91bb873f
RH
5022 }
5023 }
5024 else
5025 {
5026 base = legitimize_pic_address (XEXP (addr, 0), reg);
5027 new = legitimize_pic_address (XEXP (addr, 1),
5028 base == reg ? NULL_RTX : reg);
5029
5030 if (GET_CODE (new) == CONST_INT)
5031 new = plus_constant (base, INTVAL (new));
5032 else
5033 {
5034 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5035 {
5036 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5037 new = XEXP (new, 1);
5038 }
5039 new = gen_rtx_PLUS (Pmode, base, new);
5040 }
5041 }
5042 }
3b3c6a3f
MM
5043 }
5044 return new;
5045}
5046\f
3b3c6a3f
MM
5047/* Try machine-dependent ways of modifying an illegitimate address
5048 to be legitimate. If we find one, return the new, valid address.
5049 This macro is used in only one place: `memory_address' in explow.c.
5050
5051 OLDX is the address as it was before break_out_memory_refs was called.
5052 In some cases it is useful to look at this to decide what needs to be done.
5053
5054 MODE and WIN are passed so that this macro can use
5055 GO_IF_LEGITIMATE_ADDRESS.
5056
5057 It is always safe for this macro to do nothing. It exists to recognize
5058 opportunities to optimize the output.
5059
5060 For the 80386, we handle X+REG by loading X into a register R and
5061 using R+REG. R will go in a general reg and indexing will be used.
5062 However, if REG is a broken-out memory address or multiplication,
5063 nothing needs to be done because REG can certainly go in a general reg.
5064
5065 When -fpic is used, special handling is needed for symbolic references.
5066 See comments by legitimize_pic_address in i386.c for details. */
5067
5068rtx
5069legitimize_address (x, oldx, mode)
5070 register rtx x;
bb5177ac 5071 register rtx oldx ATTRIBUTE_UNUSED;
3b3c6a3f
MM
5072 enum machine_mode mode;
5073{
5074 int changed = 0;
5075 unsigned log;
5076
5077 if (TARGET_DEBUG_ADDR)
5078 {
e9a25f70
JL
5079 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5080 GET_MODE_NAME (mode));
3b3c6a3f
MM
5081 debug_rtx (x);
5082 }
5083
5084 if (flag_pic && SYMBOLIC_CONST (x))
5085 return legitimize_pic_address (x, 0);
5086
5087 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5088 if (GET_CODE (x) == ASHIFT
5089 && GET_CODE (XEXP (x, 1)) == CONST_INT
b531087a 5090 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
3b3c6a3f
MM
5091 {
5092 changed = 1;
a269a03c
JC
5093 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5094 GEN_INT (1 << log));
3b3c6a3f
MM
5095 }
5096
5097 if (GET_CODE (x) == PLUS)
5098 {
0f290768 5099 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
e9a25f70 5100
3b3c6a3f
MM
5101 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5102 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
b531087a 5103 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
3b3c6a3f
MM
5104 {
5105 changed = 1;
c5c76735
JL
5106 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5107 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5108 GEN_INT (1 << log));
3b3c6a3f
MM
5109 }
5110
5111 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5112 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
b531087a 5113 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
3b3c6a3f
MM
5114 {
5115 changed = 1;
c5c76735
JL
5116 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5117 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5118 GEN_INT (1 << log));
3b3c6a3f
MM
5119 }
5120
0f290768 5121 /* Put multiply first if it isn't already. */
3b3c6a3f
MM
5122 if (GET_CODE (XEXP (x, 1)) == MULT)
5123 {
5124 rtx tmp = XEXP (x, 0);
5125 XEXP (x, 0) = XEXP (x, 1);
5126 XEXP (x, 1) = tmp;
5127 changed = 1;
5128 }
5129
5130 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5131 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5132 created by virtual register instantiation, register elimination, and
5133 similar optimizations. */
5134 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5135 {
5136 changed = 1;
c5c76735
JL
5137 x = gen_rtx_PLUS (Pmode,
5138 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5139 XEXP (XEXP (x, 1), 0)),
5140 XEXP (XEXP (x, 1), 1));
3b3c6a3f
MM
5141 }
5142
e9a25f70
JL
5143 /* Canonicalize
5144 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3b3c6a3f
MM
5145 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5146 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5147 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5148 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5149 && CONSTANT_P (XEXP (x, 1)))
5150 {
00c79232
ML
5151 rtx constant;
5152 rtx other = NULL_RTX;
3b3c6a3f
MM
5153
5154 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5155 {
5156 constant = XEXP (x, 1);
5157 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5158 }
5159 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5160 {
5161 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5162 other = XEXP (x, 1);
5163 }
5164 else
5165 constant = 0;
5166
5167 if (constant)
5168 {
5169 changed = 1;
c5c76735
JL
5170 x = gen_rtx_PLUS (Pmode,
5171 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5172 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5173 plus_constant (other, INTVAL (constant)));
3b3c6a3f
MM
5174 }
5175 }
5176
5177 if (changed && legitimate_address_p (mode, x, FALSE))
5178 return x;
5179
5180 if (GET_CODE (XEXP (x, 0)) == MULT)
5181 {
5182 changed = 1;
5183 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5184 }
5185
5186 if (GET_CODE (XEXP (x, 1)) == MULT)
5187 {
5188 changed = 1;
5189 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5190 }
5191
5192 if (changed
5193 && GET_CODE (XEXP (x, 1)) == REG
5194 && GET_CODE (XEXP (x, 0)) == REG)
5195 return x;
5196
5197 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5198 {
5199 changed = 1;
5200 x = legitimize_pic_address (x, 0);
5201 }
5202
5203 if (changed && legitimate_address_p (mode, x, FALSE))
5204 return x;
5205
5206 if (GET_CODE (XEXP (x, 0)) == REG)
5207 {
5208 register rtx temp = gen_reg_rtx (Pmode);
5209 register rtx val = force_operand (XEXP (x, 1), temp);
5210 if (val != temp)
5211 emit_move_insn (temp, val);
5212
5213 XEXP (x, 1) = temp;
5214 return x;
5215 }
5216
5217 else if (GET_CODE (XEXP (x, 1)) == REG)
5218 {
5219 register rtx temp = gen_reg_rtx (Pmode);
5220 register rtx val = force_operand (XEXP (x, 0), temp);
5221 if (val != temp)
5222 emit_move_insn (temp, val);
5223
5224 XEXP (x, 0) = temp;
5225 return x;
5226 }
5227 }
5228
5229 return x;
5230}
2a2ab3f9
JVA
5231\f
5232/* Print an integer constant expression in assembler syntax. Addition
5233 and subtraction are the only arithmetic that may appear in these
5234 expressions. FILE is the stdio stream to write to, X is the rtx, and
5235 CODE is the operand print code from the output string. */
5236
5237static void
5238output_pic_addr_const (file, x, code)
5239 FILE *file;
5240 rtx x;
5241 int code;
5242{
5243 char buf[256];
5244
5245 switch (GET_CODE (x))
5246 {
5247 case PC:
5248 if (flag_pic)
5249 putc ('.', file);
5250 else
5251 abort ();
5252 break;
5253
5254 case SYMBOL_REF:
91bb873f
RH
5255 assemble_name (file, XSTR (x, 0));
5256 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
5257 fputs ("@PLT", file);
2a2ab3f9
JVA
5258 break;
5259
91bb873f
RH
5260 case LABEL_REF:
5261 x = XEXP (x, 0);
5262 /* FALLTHRU */
2a2ab3f9
JVA
5263 case CODE_LABEL:
5264 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5265 assemble_name (asm_out_file, buf);
5266 break;
5267
5268 case CONST_INT:
f64cecad 5269 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2a2ab3f9
JVA
5270 break;
5271
5272 case CONST:
5273 /* This used to output parentheses around the expression,
5274 but that does not work on the 386 (either ATT or BSD assembler). */
5275 output_pic_addr_const (file, XEXP (x, 0), code);
5276 break;
5277
5278 case CONST_DOUBLE:
5279 if (GET_MODE (x) == VOIDmode)
5280 {
5281 /* We can use %d if the number is <32 bits and positive. */
5282 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
f64cecad
JC
5283 fprintf (file, "0x%lx%08lx",
5284 (unsigned long) CONST_DOUBLE_HIGH (x),
5285 (unsigned long) CONST_DOUBLE_LOW (x));
2a2ab3f9 5286 else
f64cecad 5287 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2a2ab3f9
JVA
5288 }
5289 else
5290 /* We can't handle floating point constants;
5291 PRINT_OPERAND must handle them. */
5292 output_operand_lossage ("floating constant misused");
5293 break;
5294
5295 case PLUS:
e9a25f70 5296 /* Some assemblers need integer constants to appear first. */
2a2ab3f9
JVA
5297 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5298 {
2a2ab3f9 5299 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 5300 putc ('+', file);
e9a25f70 5301 output_pic_addr_const (file, XEXP (x, 1), code);
2a2ab3f9 5302 }
91bb873f 5303 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2a2ab3f9 5304 {
2a2ab3f9 5305 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 5306 putc ('+', file);
e9a25f70 5307 output_pic_addr_const (file, XEXP (x, 0), code);
2a2ab3f9 5308 }
91bb873f
RH
5309 else
5310 abort ();
2a2ab3f9
JVA
5311 break;
5312
5313 case MINUS:
80f33d06 5314 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
2a2ab3f9 5315 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 5316 putc ('-', file);
2a2ab3f9 5317 output_pic_addr_const (file, XEXP (x, 1), code);
80f33d06 5318 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
2a2ab3f9
JVA
5319 break;
5320
91bb873f
RH
5321 case UNSPEC:
5322 if (XVECLEN (x, 0) != 1)
77ebd435 5323 abort ();
91bb873f
RH
5324 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5325 switch (XINT (x, 1))
77ebd435
AJ
5326 {
5327 case 6:
5328 fputs ("@GOT", file);
5329 break;
5330 case 7:
5331 fputs ("@GOTOFF", file);
5332 break;
5333 case 8:
5334 fputs ("@PLT", file);
5335 break;
6eb791fc
JH
5336 case 15:
5337 fputs ("@GOTPCREL(%RIP)", file);
5338 break;
77ebd435
AJ
5339 default:
5340 output_operand_lossage ("invalid UNSPEC as operand");
5341 break;
5342 }
91bb873f
RH
5343 break;
5344
2a2ab3f9
JVA
5345 default:
5346 output_operand_lossage ("invalid expression as operand");
5347 }
5348}
1865dbb5 5349
0f290768 5350/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
1865dbb5
JM
5351 We need to handle our special PIC relocations. */
5352
0f290768 5353void
1865dbb5
JM
5354i386_dwarf_output_addr_const (file, x)
5355 FILE *file;
5356 rtx x;
5357{
14f73b5a 5358#ifdef ASM_QUAD
18b5b8d6 5359 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
14f73b5a
JH
5360#else
5361 if (TARGET_64BIT)
5362 abort ();
18b5b8d6 5363 fprintf (file, "%s", ASM_LONG);
14f73b5a 5364#endif
1865dbb5
JM
5365 if (flag_pic)
5366 output_pic_addr_const (file, x, '\0');
5367 else
5368 output_addr_const (file, x);
5369 fputc ('\n', file);
5370}
5371
5372/* In the name of slightly smaller debug output, and to cater to
5373 general assembler losage, recognize PIC+GOTOFF and turn it back
5374 into a direct symbol reference. */
5375
5376rtx
5377i386_simplify_dwarf_addr (orig_x)
5378 rtx orig_x;
5379{
ec65b2e3 5380 rtx x = orig_x, y;
1865dbb5 5381
6eb791fc
JH
5382 if (TARGET_64BIT)
5383 {
5384 if (GET_CODE (x) != CONST
5385 || GET_CODE (XEXP (x, 0)) != UNSPEC
5386 || XINT (XEXP (x, 0), 1) != 15)
5387 return orig_x;
5388 return XVECEXP (XEXP (x, 0), 0, 0);
5389 }
5390
1865dbb5 5391 if (GET_CODE (x) != PLUS
1865dbb5
JM
5392 || GET_CODE (XEXP (x, 1)) != CONST)
5393 return orig_x;
5394
ec65b2e3
JJ
5395 if (GET_CODE (XEXP (x, 0)) == REG
5396 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
5397 /* %ebx + GOT/GOTOFF */
5398 y = NULL;
5399 else if (GET_CODE (XEXP (x, 0)) == PLUS)
5400 {
5401 /* %ebx + %reg * scale + GOT/GOTOFF */
5402 y = XEXP (x, 0);
5403 if (GET_CODE (XEXP (y, 0)) == REG
5404 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
5405 y = XEXP (y, 1);
5406 else if (GET_CODE (XEXP (y, 1)) == REG
5407 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
5408 y = XEXP (y, 0);
5409 else
5410 return orig_x;
5411 if (GET_CODE (y) != REG
5412 && GET_CODE (y) != MULT
5413 && GET_CODE (y) != ASHIFT)
5414 return orig_x;
5415 }
5416 else
5417 return orig_x;
5418
1865dbb5
JM
5419 x = XEXP (XEXP (x, 1), 0);
5420 if (GET_CODE (x) == UNSPEC
3adbce3d
RH
5421 && (XINT (x, 1) == 6
5422 || XINT (x, 1) == 7))
ec65b2e3
JJ
5423 {
5424 if (y)
5425 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
5426 return XVECEXP (x, 0, 0);
5427 }
1865dbb5
JM
5428
5429 if (GET_CODE (x) == PLUS
5430 && GET_CODE (XEXP (x, 0)) == UNSPEC
5431 && GET_CODE (XEXP (x, 1)) == CONST_INT
3adbce3d
RH
5432 && (XINT (XEXP (x, 0), 1) == 6
5433 || XINT (XEXP (x, 0), 1) == 7))
ec65b2e3
JJ
5434 {
5435 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
5436 if (y)
5437 return gen_rtx_PLUS (Pmode, y, x);
5438 return x;
5439 }
1865dbb5
JM
5440
5441 return orig_x;
5442}
2a2ab3f9 5443\f
a269a03c 5444static void
e075ae69 5445put_condition_code (code, mode, reverse, fp, file)
a269a03c 5446 enum rtx_code code;
e075ae69
RH
5447 enum machine_mode mode;
5448 int reverse, fp;
a269a03c
JC
5449 FILE *file;
5450{
a269a03c
JC
5451 const char *suffix;
5452
9a915772
JH
5453 if (mode == CCFPmode || mode == CCFPUmode)
5454 {
5455 enum rtx_code second_code, bypass_code;
5456 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
5457 if (bypass_code != NIL || second_code != NIL)
b531087a 5458 abort ();
9a915772
JH
5459 code = ix86_fp_compare_code_to_integer (code);
5460 mode = CCmode;
5461 }
a269a03c
JC
5462 if (reverse)
5463 code = reverse_condition (code);
e075ae69 5464
a269a03c
JC
5465 switch (code)
5466 {
5467 case EQ:
5468 suffix = "e";
5469 break;
a269a03c
JC
5470 case NE:
5471 suffix = "ne";
5472 break;
a269a03c 5473 case GT:
7e08e190 5474 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
e075ae69
RH
5475 abort ();
5476 suffix = "g";
a269a03c 5477 break;
a269a03c 5478 case GTU:
e075ae69
RH
5479 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
5480 Those same assemblers have the same but opposite losage on cmov. */
7e08e190 5481 if (mode != CCmode)
0f290768 5482 abort ();
e075ae69 5483 suffix = fp ? "nbe" : "a";
a269a03c 5484 break;
a269a03c 5485 case LT:
9076b9c1 5486 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 5487 suffix = "s";
7e08e190 5488 else if (mode == CCmode || mode == CCGCmode)
e075ae69 5489 suffix = "l";
9076b9c1 5490 else
0f290768 5491 abort ();
a269a03c 5492 break;
a269a03c 5493 case LTU:
9076b9c1 5494 if (mode != CCmode)
0f290768 5495 abort ();
a269a03c
JC
5496 suffix = "b";
5497 break;
a269a03c 5498 case GE:
9076b9c1 5499 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 5500 suffix = "ns";
7e08e190 5501 else if (mode == CCmode || mode == CCGCmode)
e075ae69 5502 suffix = "ge";
9076b9c1 5503 else
0f290768 5504 abort ();
a269a03c 5505 break;
a269a03c 5506 case GEU:
e075ae69 5507 /* ??? As above. */
7e08e190 5508 if (mode != CCmode)
0f290768 5509 abort ();
7e08e190 5510 suffix = fp ? "nb" : "ae";
a269a03c 5511 break;
a269a03c 5512 case LE:
7e08e190 5513 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
e075ae69
RH
5514 abort ();
5515 suffix = "le";
a269a03c 5516 break;
a269a03c 5517 case LEU:
9076b9c1
JH
5518 if (mode != CCmode)
5519 abort ();
7e08e190 5520 suffix = "be";
a269a03c 5521 break;
3a3677ff 5522 case UNORDERED:
9e7adcb3 5523 suffix = fp ? "u" : "p";
3a3677ff
RH
5524 break;
5525 case ORDERED:
9e7adcb3 5526 suffix = fp ? "nu" : "np";
3a3677ff 5527 break;
a269a03c
JC
5528 default:
5529 abort ();
5530 }
5531 fputs (suffix, file);
5532}
5533
e075ae69
RH
5534void
5535print_reg (x, code, file)
5536 rtx x;
5537 int code;
5538 FILE *file;
e5cb57e8 5539{
e075ae69 5540 if (REGNO (x) == ARG_POINTER_REGNUM
564d80f4 5541 || REGNO (x) == FRAME_POINTER_REGNUM
e075ae69
RH
5542 || REGNO (x) == FLAGS_REG
5543 || REGNO (x) == FPSR_REG)
5544 abort ();
e9a25f70 5545
80f33d06 5546 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
e075ae69
RH
5547 putc ('%', file);
5548
ef6257cd 5549 if (code == 'w' || MMX_REG_P (x))
e075ae69
RH
5550 code = 2;
5551 else if (code == 'b')
5552 code = 1;
5553 else if (code == 'k')
5554 code = 4;
3f3f2124
JH
5555 else if (code == 'q')
5556 code = 8;
e075ae69
RH
5557 else if (code == 'y')
5558 code = 3;
5559 else if (code == 'h')
5560 code = 0;
5561 else
5562 code = GET_MODE_SIZE (GET_MODE (x));
e9a25f70 5563
3f3f2124
JH
5564 /* Irritatingly, AMD extended registers use different naming convention
5565 from the normal registers. */
5566 if (REX_INT_REG_P (x))
5567 {
885a70fd
JH
5568 if (!TARGET_64BIT)
5569 abort ();
3f3f2124
JH
5570 switch (code)
5571 {
ef6257cd 5572 case 0:
c725bd79 5573 error ("extended registers have no high halves");
3f3f2124
JH
5574 break;
5575 case 1:
5576 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
5577 break;
5578 case 2:
5579 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
5580 break;
5581 case 4:
5582 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
5583 break;
5584 case 8:
5585 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
5586 break;
5587 default:
c725bd79 5588 error ("unsupported operand size for extended register");
3f3f2124
JH
5589 break;
5590 }
5591 return;
5592 }
e075ae69
RH
5593 switch (code)
5594 {
5595 case 3:
5596 if (STACK_TOP_P (x))
5597 {
5598 fputs ("st(0)", file);
5599 break;
5600 }
5601 /* FALLTHRU */
e075ae69 5602 case 8:
3f3f2124 5603 case 4:
e075ae69 5604 case 12:
446988df 5605 if (! ANY_FP_REG_P (x))
885a70fd 5606 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
e075ae69 5607 /* FALLTHRU */
a7180f70 5608 case 16:
e075ae69
RH
5609 case 2:
5610 fputs (hi_reg_name[REGNO (x)], file);
5611 break;
5612 case 1:
5613 fputs (qi_reg_name[REGNO (x)], file);
5614 break;
5615 case 0:
5616 fputs (qi_high_reg_name[REGNO (x)], file);
5617 break;
5618 default:
5619 abort ();
fe25fea3 5620 }
e5cb57e8
SC
5621}
5622
2a2ab3f9 5623/* Meaning of CODE:
fe25fea3 5624 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
e5cb57e8 5625 C -- print opcode suffix for set/cmov insn.
fe25fea3 5626 c -- like C, but print reversed condition
ef6257cd 5627 F,f -- likewise, but for floating-point.
2a2ab3f9
JVA
5628 R -- print the prefix for register names.
5629 z -- print the opcode suffix for the size of the current operand.
5630 * -- print a star (in certain assembler syntax)
fb204271 5631 A -- print an absolute memory reference.
2a2ab3f9 5632 w -- print the operand as if it's a "word" (HImode) even if it isn't.
2d49677f
SC
5633 s -- print a shift double count, followed by the assemblers argument
5634 delimiter.
fe25fea3
SC
5635 b -- print the QImode name of the register for the indicated operand.
5636 %b0 would print %al if operands[0] is reg 0.
5637 w -- likewise, print the HImode name of the register.
5638 k -- likewise, print the SImode name of the register.
3f3f2124 5639 q -- likewise, print the DImode name of the register.
ef6257cd
JH
5640 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
5641 y -- print "st(0)" instead of "st" as a register.
a46d1d38 5642 D -- print condition for SSE cmp instruction.
ef6257cd
JH
5643 P -- if PIC, print an @PLT suffix.
5644 X -- don't print any sort of PIC '@' suffix for a symbol.
a46d1d38 5645 */
2a2ab3f9
JVA
5646
5647void
5648print_operand (file, x, code)
5649 FILE *file;
5650 rtx x;
5651 int code;
5652{
5653 if (code)
5654 {
5655 switch (code)
5656 {
5657 case '*':
80f33d06 5658 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9
JVA
5659 putc ('*', file);
5660 return;
5661
fb204271 5662 case 'A':
80f33d06 5663 if (ASSEMBLER_DIALECT == ASM_ATT)
fb204271 5664 putc ('*', file);
80f33d06 5665 else if (ASSEMBLER_DIALECT == ASM_INTEL)
fb204271
DN
5666 {
5667 /* Intel syntax. For absolute addresses, registers should not
5668 be surrounded by braces. */
5669 if (GET_CODE (x) != REG)
5670 {
5671 putc ('[', file);
5672 PRINT_OPERAND (file, x, 0);
5673 putc (']', file);
5674 return;
5675 }
5676 }
80f33d06
GS
5677 else
5678 abort ();
fb204271
DN
5679
5680 PRINT_OPERAND (file, x, 0);
5681 return;
5682
5683
2a2ab3f9 5684 case 'L':
80f33d06 5685 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 5686 putc ('l', file);
2a2ab3f9
JVA
5687 return;
5688
5689 case 'W':
80f33d06 5690 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 5691 putc ('w', file);
2a2ab3f9
JVA
5692 return;
5693
5694 case 'B':
80f33d06 5695 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 5696 putc ('b', file);
2a2ab3f9
JVA
5697 return;
5698
5699 case 'Q':
80f33d06 5700 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 5701 putc ('l', file);
2a2ab3f9
JVA
5702 return;
5703
5704 case 'S':
80f33d06 5705 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 5706 putc ('s', file);
2a2ab3f9
JVA
5707 return;
5708
5f1ec3e6 5709 case 'T':
80f33d06 5710 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 5711 putc ('t', file);
5f1ec3e6
JVA
5712 return;
5713
2a2ab3f9
JVA
5714 case 'z':
5715 /* 387 opcodes don't get size suffixes if the operands are
0f290768 5716 registers. */
2a2ab3f9
JVA
5717
5718 if (STACK_REG_P (x))
5719 return;
5720
5721 /* this is the size of op from size of operand */
5722 switch (GET_MODE_SIZE (GET_MODE (x)))
5723 {
2a2ab3f9 5724 case 2:
155d8a47
JW
5725#ifdef HAVE_GAS_FILDS_FISTS
5726 putc ('s', file);
5727#endif
2a2ab3f9
JVA
5728 return;
5729
5730 case 4:
5731 if (GET_MODE (x) == SFmode)
5732 {
e075ae69 5733 putc ('s', file);
2a2ab3f9
JVA
5734 return;
5735 }
5736 else
e075ae69 5737 putc ('l', file);
2a2ab3f9
JVA
5738 return;
5739
5f1ec3e6 5740 case 12:
2b589241 5741 case 16:
e075ae69
RH
5742 putc ('t', file);
5743 return;
5f1ec3e6 5744
2a2ab3f9
JVA
5745 case 8:
5746 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
56c0e8fa
JVA
5747 {
5748#ifdef GAS_MNEMONICS
e075ae69 5749 putc ('q', file);
56c0e8fa 5750#else
e075ae69
RH
5751 putc ('l', file);
5752 putc ('l', file);
56c0e8fa
JVA
5753#endif
5754 }
e075ae69
RH
5755 else
5756 putc ('l', file);
2a2ab3f9 5757 return;
155d8a47
JW
5758
5759 default:
5760 abort ();
2a2ab3f9 5761 }
4af3895e
JVA
5762
5763 case 'b':
5764 case 'w':
5765 case 'k':
3f3f2124 5766 case 'q':
4af3895e
JVA
5767 case 'h':
5768 case 'y':
5cb6195d 5769 case 'X':
e075ae69 5770 case 'P':
4af3895e
JVA
5771 break;
5772
2d49677f
SC
5773 case 's':
5774 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
5775 {
5776 PRINT_OPERAND (file, x, 0);
e075ae69 5777 putc (',', file);
2d49677f 5778 }
a269a03c
JC
5779 return;
5780
a46d1d38
JH
5781 case 'D':
5782 /* Little bit of braindamage here. The SSE compare instructions
5783 does use completely different names for the comparisons that the
5784 fp conditional moves. */
5785 switch (GET_CODE (x))
5786 {
5787 case EQ:
5788 case UNEQ:
5789 fputs ("eq", file);
5790 break;
5791 case LT:
5792 case UNLT:
5793 fputs ("lt", file);
5794 break;
5795 case LE:
5796 case UNLE:
5797 fputs ("le", file);
5798 break;
5799 case UNORDERED:
5800 fputs ("unord", file);
5801 break;
5802 case NE:
5803 case LTGT:
5804 fputs ("neq", file);
5805 break;
5806 case UNGE:
5807 case GE:
5808 fputs ("nlt", file);
5809 break;
5810 case UNGT:
5811 case GT:
5812 fputs ("nle", file);
5813 break;
5814 case ORDERED:
5815 fputs ("ord", file);
5816 break;
5817 default:
5818 abort ();
5819 break;
5820 }
5821 return;
1853aadd 5822 case 'C':
e075ae69 5823 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
1853aadd 5824 return;
fe25fea3 5825 case 'F':
e075ae69 5826 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
fe25fea3
SC
5827 return;
5828
e9a25f70 5829 /* Like above, but reverse condition */
e075ae69 5830 case 'c':
c1d5afc4
CR
5831 /* Check to see if argument to %c is really a constant
5832 and not a condition code which needs to be reversed. */
5833 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
5834 {
5835 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
5836 return;
5837 }
e075ae69
RH
5838 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
5839 return;
fe25fea3 5840 case 'f':
e075ae69 5841 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
1853aadd 5842 return;
ef6257cd
JH
5843 case '+':
5844 {
5845 rtx x;
e5cb57e8 5846
ef6257cd
JH
5847 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
5848 return;
a4f31c00 5849
ef6257cd
JH
5850 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
5851 if (x)
5852 {
5853 int pred_val = INTVAL (XEXP (x, 0));
5854
5855 if (pred_val < REG_BR_PROB_BASE * 45 / 100
5856 || pred_val > REG_BR_PROB_BASE * 55 / 100)
5857 {
5858 int taken = pred_val > REG_BR_PROB_BASE / 2;
5859 int cputaken = final_forward_branch_p (current_output_insn) == 0;
5860
5861 /* Emit hints only in the case default branch prediction
5862 heruistics would fail. */
5863 if (taken != cputaken)
5864 {
5865 /* We use 3e (DS) prefix for taken branches and
5866 2e (CS) prefix for not taken branches. */
5867 if (taken)
5868 fputs ("ds ; ", file);
5869 else
5870 fputs ("cs ; ", file);
5871 }
5872 }
5873 }
5874 return;
5875 }
4af3895e 5876 default:
a52453cc 5877 output_operand_lossage ("invalid operand code `%c'", code);
2a2ab3f9
JVA
5878 }
5879 }
e9a25f70 5880
2a2ab3f9
JVA
5881 if (GET_CODE (x) == REG)
5882 {
5883 PRINT_REG (x, code, file);
5884 }
e9a25f70 5885
2a2ab3f9
JVA
5886 else if (GET_CODE (x) == MEM)
5887 {
e075ae69 5888 /* No `byte ptr' prefix for call instructions. */
80f33d06 5889 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
2a2ab3f9 5890 {
69ddee61 5891 const char * size;
e075ae69
RH
5892 switch (GET_MODE_SIZE (GET_MODE (x)))
5893 {
5894 case 1: size = "BYTE"; break;
5895 case 2: size = "WORD"; break;
5896 case 4: size = "DWORD"; break;
5897 case 8: size = "QWORD"; break;
5898 case 12: size = "XWORD"; break;
a7180f70 5899 case 16: size = "XMMWORD"; break;
e075ae69 5900 default:
564d80f4 5901 abort ();
e075ae69 5902 }
fb204271
DN
5903
5904 /* Check for explicit size override (codes 'b', 'w' and 'k') */
5905 if (code == 'b')
5906 size = "BYTE";
5907 else if (code == 'w')
5908 size = "WORD";
5909 else if (code == 'k')
5910 size = "DWORD";
5911
e075ae69
RH
5912 fputs (size, file);
5913 fputs (" PTR ", file);
2a2ab3f9 5914 }
e075ae69
RH
5915
5916 x = XEXP (x, 0);
5917 if (flag_pic && CONSTANT_ADDRESS_P (x))
5918 output_pic_addr_const (file, x, code);
0d7d98ee
JH
5919 /* Avoid (%rip) for call operands. */
5920 else if (CONSTANT_ADDRESS_P (x) && code =='P'
5921 && GET_CODE (x) != CONST_INT)
5922 output_addr_const (file, x);
2a2ab3f9 5923 else
e075ae69 5924 output_address (x);
2a2ab3f9 5925 }
e9a25f70 5926
2a2ab3f9
JVA
5927 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
5928 {
e9a25f70
JL
5929 REAL_VALUE_TYPE r;
5930 long l;
5931
5f1ec3e6
JVA
5932 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5933 REAL_VALUE_TO_TARGET_SINGLE (r, l);
e075ae69 5934
80f33d06 5935 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 5936 putc ('$', file);
52267fcb 5937 fprintf (file, "0x%lx", l);
5f1ec3e6 5938 }
e9a25f70 5939
0f290768 5940 /* These float cases don't actually occur as immediate operands. */
5f1ec3e6
JVA
5941 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
5942 {
e9a25f70
JL
5943 REAL_VALUE_TYPE r;
5944 char dstr[30];
5945
5f1ec3e6
JVA
5946 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5947 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
5948 fprintf (file, "%s", dstr);
2a2ab3f9 5949 }
e9a25f70 5950
2b589241
JH
5951 else if (GET_CODE (x) == CONST_DOUBLE
5952 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
2a2ab3f9 5953 {
e9a25f70
JL
5954 REAL_VALUE_TYPE r;
5955 char dstr[30];
5956
5f1ec3e6
JVA
5957 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5958 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
5959 fprintf (file, "%s", dstr);
2a2ab3f9 5960 }
79325812 5961 else
2a2ab3f9 5962 {
4af3895e 5963 if (code != 'P')
2a2ab3f9 5964 {
695dac07 5965 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
e075ae69 5966 {
80f33d06 5967 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
5968 putc ('$', file);
5969 }
2a2ab3f9
JVA
5970 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
5971 || GET_CODE (x) == LABEL_REF)
e075ae69 5972 {
80f33d06 5973 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
5974 putc ('$', file);
5975 else
5976 fputs ("OFFSET FLAT:", file);
5977 }
2a2ab3f9 5978 }
e075ae69
RH
5979 if (GET_CODE (x) == CONST_INT)
5980 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5981 else if (flag_pic)
2a2ab3f9
JVA
5982 output_pic_addr_const (file, x, code);
5983 else
5984 output_addr_const (file, x);
5985 }
5986}
5987\f
5988/* Print a memory operand whose address is ADDR. */
5989
5990void
5991print_operand_address (file, addr)
5992 FILE *file;
5993 register rtx addr;
5994{
e075ae69
RH
5995 struct ix86_address parts;
5996 rtx base, index, disp;
5997 int scale;
e9a25f70 5998
e075ae69
RH
5999 if (! ix86_decompose_address (addr, &parts))
6000 abort ();
e9a25f70 6001
e075ae69
RH
6002 base = parts.base;
6003 index = parts.index;
6004 disp = parts.disp;
6005 scale = parts.scale;
e9a25f70 6006
e075ae69
RH
6007 if (!base && !index)
6008 {
6009 /* Displacement only requires special attention. */
e9a25f70 6010
e075ae69 6011 if (GET_CODE (disp) == CONST_INT)
2a2ab3f9 6012 {
80f33d06 6013 if (ASSEMBLER_DIALECT == ASM_INTEL)
fb204271
DN
6014 {
6015 if (USER_LABEL_PREFIX[0] == 0)
6016 putc ('%', file);
6017 fputs ("ds:", file);
6018 }
e075ae69 6019 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
2a2ab3f9 6020 }
e075ae69
RH
6021 else if (flag_pic)
6022 output_pic_addr_const (file, addr, 0);
6023 else
6024 output_addr_const (file, addr);
0d7d98ee
JH
6025
6026 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6027 if (GET_CODE (disp) != CONST_INT && TARGET_64BIT)
6028 fputs ("(%rip)", file);
e075ae69
RH
6029 }
6030 else
6031 {
80f33d06 6032 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9 6033 {
e075ae69 6034 if (disp)
2a2ab3f9 6035 {
c399861d 6036 if (flag_pic)
e075ae69
RH
6037 output_pic_addr_const (file, disp, 0);
6038 else if (GET_CODE (disp) == LABEL_REF)
6039 output_asm_label (disp);
2a2ab3f9 6040 else
e075ae69 6041 output_addr_const (file, disp);
2a2ab3f9
JVA
6042 }
6043
e075ae69
RH
6044 putc ('(', file);
6045 if (base)
6046 PRINT_REG (base, 0, file);
6047 if (index)
2a2ab3f9 6048 {
e075ae69
RH
6049 putc (',', file);
6050 PRINT_REG (index, 0, file);
6051 if (scale != 1)
6052 fprintf (file, ",%d", scale);
2a2ab3f9 6053 }
e075ae69 6054 putc (')', file);
2a2ab3f9 6055 }
2a2ab3f9
JVA
6056 else
6057 {
e075ae69 6058 rtx offset = NULL_RTX;
e9a25f70 6059
e075ae69
RH
6060 if (disp)
6061 {
6062 /* Pull out the offset of a symbol; print any symbol itself. */
6063 if (GET_CODE (disp) == CONST
6064 && GET_CODE (XEXP (disp, 0)) == PLUS
6065 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6066 {
6067 offset = XEXP (XEXP (disp, 0), 1);
6068 disp = gen_rtx_CONST (VOIDmode,
6069 XEXP (XEXP (disp, 0), 0));
6070 }
ce193852 6071
e075ae69
RH
6072 if (flag_pic)
6073 output_pic_addr_const (file, disp, 0);
6074 else if (GET_CODE (disp) == LABEL_REF)
6075 output_asm_label (disp);
6076 else if (GET_CODE (disp) == CONST_INT)
6077 offset = disp;
6078 else
6079 output_addr_const (file, disp);
6080 }
e9a25f70 6081
e075ae69
RH
6082 putc ('[', file);
6083 if (base)
a8620236 6084 {
e075ae69
RH
6085 PRINT_REG (base, 0, file);
6086 if (offset)
6087 {
6088 if (INTVAL (offset) >= 0)
6089 putc ('+', file);
6090 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6091 }
a8620236 6092 }
e075ae69
RH
6093 else if (offset)
6094 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
2247a58c 6095 else
e075ae69 6096 putc ('0', file);
e9a25f70 6097
e075ae69
RH
6098 if (index)
6099 {
6100 putc ('+', file);
6101 PRINT_REG (index, 0, file);
6102 if (scale != 1)
6103 fprintf (file, "*%d", scale);
6104 }
6105 putc (']', file);
6106 }
2a2ab3f9
JVA
6107 }
6108}
6109\f
6110/* Split one or more DImode RTL references into pairs of SImode
6111 references. The RTL can be REG, offsettable MEM, integer constant, or
6112 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6113 split and "num" is its length. lo_half and hi_half are output arrays
0f290768 6114 that parallel "operands". */
2a2ab3f9
JVA
6115
6116void
6117split_di (operands, num, lo_half, hi_half)
6118 rtx operands[];
6119 int num;
6120 rtx lo_half[], hi_half[];
6121{
6122 while (num--)
6123 {
57dbca5e 6124 rtx op = operands[num];
b932f770
JH
6125
6126 /* simplify_subreg refuse to split volatile memory addresses,
6127 but we still have to handle it. */
6128 if (GET_CODE (op) == MEM)
2a2ab3f9 6129 {
f4ef873c 6130 lo_half[num] = adjust_address (op, SImode, 0);
b72f00af 6131 hi_half[num] = adjust_address (op, SImode, 4);
2a2ab3f9
JVA
6132 }
6133 else
b932f770 6134 {
38ca929b
JH
6135 lo_half[num] = simplify_gen_subreg (SImode, op,
6136 GET_MODE (op) == VOIDmode
6137 ? DImode : GET_MODE (op), 0);
6138 hi_half[num] = simplify_gen_subreg (SImode, op,
6139 GET_MODE (op) == VOIDmode
6140 ? DImode : GET_MODE (op), 4);
b932f770 6141 }
2a2ab3f9
JVA
6142 }
6143}
44cf5b6a
JH
6144/* Split one or more TImode RTL references into pairs of SImode
6145 references. The RTL can be REG, offsettable MEM, integer constant, or
6146 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6147 split and "num" is its length. lo_half and hi_half are output arrays
6148 that parallel "operands". */
6149
6150void
6151split_ti (operands, num, lo_half, hi_half)
6152 rtx operands[];
6153 int num;
6154 rtx lo_half[], hi_half[];
6155{
6156 while (num--)
6157 {
6158 rtx op = operands[num];
b932f770
JH
6159
6160 /* simplify_subreg refuse to split volatile memory addresses, but we
6161 still have to handle it. */
6162 if (GET_CODE (op) == MEM)
44cf5b6a
JH
6163 {
6164 lo_half[num] = adjust_address (op, DImode, 0);
6165 hi_half[num] = adjust_address (op, DImode, 8);
6166 }
6167 else
b932f770
JH
6168 {
6169 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6170 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6171 }
44cf5b6a
JH
6172 }
6173}
2a2ab3f9 6174\f
2a2ab3f9
JVA
6175/* Output code to perform a 387 binary operation in INSN, one of PLUS,
6176 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6177 is the expression of the binary operation. The output may either be
6178 emitted here, or returned to the caller, like all output_* functions.
6179
6180 There is no guarantee that the operands are the same mode, as they
0f290768 6181 might be within FLOAT or FLOAT_EXTEND expressions. */
2a2ab3f9 6182
e3c2afab
AM
6183#ifndef SYSV386_COMPAT
6184/* Set to 1 for compatibility with brain-damaged assemblers. No-one
6185 wants to fix the assemblers because that causes incompatibility
6186 with gcc. No-one wants to fix gcc because that causes
6187 incompatibility with assemblers... You can use the option of
6188 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6189#define SYSV386_COMPAT 1
6190#endif
6191
69ddee61 6192const char *
2a2ab3f9
JVA
6193output_387_binary_op (insn, operands)
6194 rtx insn;
6195 rtx *operands;
6196{
e3c2afab 6197 static char buf[30];
69ddee61 6198 const char *p;
1deaa899
JH
6199 const char *ssep;
6200 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
2a2ab3f9 6201
e3c2afab
AM
6202#ifdef ENABLE_CHECKING
6203 /* Even if we do not want to check the inputs, this documents input
6204 constraints. Which helps in understanding the following code. */
6205 if (STACK_REG_P (operands[0])
6206 && ((REG_P (operands[1])
6207 && REGNO (operands[0]) == REGNO (operands[1])
6208 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
6209 || (REG_P (operands[2])
6210 && REGNO (operands[0]) == REGNO (operands[2])
6211 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
6212 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
6213 ; /* ok */
1deaa899 6214 else if (!is_sse)
e3c2afab
AM
6215 abort ();
6216#endif
6217
2a2ab3f9
JVA
6218 switch (GET_CODE (operands[3]))
6219 {
6220 case PLUS:
e075ae69
RH
6221 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6222 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6223 p = "fiadd";
6224 else
6225 p = "fadd";
1deaa899 6226 ssep = "add";
2a2ab3f9
JVA
6227 break;
6228
6229 case MINUS:
e075ae69
RH
6230 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6231 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6232 p = "fisub";
6233 else
6234 p = "fsub";
1deaa899 6235 ssep = "sub";
2a2ab3f9
JVA
6236 break;
6237
6238 case MULT:
e075ae69
RH
6239 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6240 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6241 p = "fimul";
6242 else
6243 p = "fmul";
1deaa899 6244 ssep = "mul";
2a2ab3f9
JVA
6245 break;
6246
6247 case DIV:
e075ae69
RH
6248 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6249 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6250 p = "fidiv";
6251 else
6252 p = "fdiv";
1deaa899 6253 ssep = "div";
2a2ab3f9
JVA
6254 break;
6255
6256 default:
6257 abort ();
6258 }
6259
1deaa899
JH
6260 if (is_sse)
6261 {
6262 strcpy (buf, ssep);
6263 if (GET_MODE (operands[0]) == SFmode)
6264 strcat (buf, "ss\t{%2, %0|%0, %2}");
6265 else
6266 strcat (buf, "sd\t{%2, %0|%0, %2}");
6267 return buf;
6268 }
e075ae69 6269 strcpy (buf, p);
2a2ab3f9
JVA
6270
6271 switch (GET_CODE (operands[3]))
6272 {
6273 case MULT:
6274 case PLUS:
6275 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
6276 {
e3c2afab 6277 rtx temp = operands[2];
2a2ab3f9
JVA
6278 operands[2] = operands[1];
6279 operands[1] = temp;
6280 }
6281
e3c2afab
AM
6282 /* know operands[0] == operands[1]. */
6283
2a2ab3f9 6284 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
6285 {
6286 p = "%z2\t%2";
6287 break;
6288 }
2a2ab3f9
JVA
6289
6290 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63
JL
6291 {
6292 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
6293 /* How is it that we are storing to a dead operand[2]?
6294 Well, presumably operands[1] is dead too. We can't
6295 store the result to st(0) as st(0) gets popped on this
6296 instruction. Instead store to operands[2] (which I
6297 think has to be st(1)). st(1) will be popped later.
6298 gcc <= 2.8.1 didn't have this check and generated
6299 assembly code that the Unixware assembler rejected. */
6300 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 6301 else
e3c2afab 6302 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
e075ae69 6303 break;
6b28fd63 6304 }
2a2ab3f9
JVA
6305
6306 if (STACK_TOP_P (operands[0]))
e3c2afab 6307 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 6308 else
e3c2afab 6309 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
e075ae69 6310 break;
2a2ab3f9
JVA
6311
6312 case MINUS:
6313 case DIV:
6314 if (GET_CODE (operands[1]) == MEM)
e075ae69
RH
6315 {
6316 p = "r%z1\t%1";
6317 break;
6318 }
2a2ab3f9
JVA
6319
6320 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
6321 {
6322 p = "%z2\t%2";
6323 break;
6324 }
2a2ab3f9 6325
2a2ab3f9 6326 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63 6327 {
e3c2afab
AM
6328#if SYSV386_COMPAT
6329 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
6330 derived assemblers, confusingly reverse the direction of
6331 the operation for fsub{r} and fdiv{r} when the
6332 destination register is not st(0). The Intel assembler
6333 doesn't have this brain damage. Read !SYSV386_COMPAT to
6334 figure out what the hardware really does. */
6335 if (STACK_TOP_P (operands[0]))
6336 p = "{p\t%0, %2|rp\t%2, %0}";
6337 else
6338 p = "{rp\t%2, %0|p\t%0, %2}";
6339#else
6b28fd63 6340 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
6341 /* As above for fmul/fadd, we can't store to st(0). */
6342 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 6343 else
e3c2afab
AM
6344 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6345#endif
e075ae69 6346 break;
6b28fd63 6347 }
2a2ab3f9
JVA
6348
6349 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6b28fd63 6350 {
e3c2afab 6351#if SYSV386_COMPAT
6b28fd63 6352 if (STACK_TOP_P (operands[0]))
e3c2afab 6353 p = "{rp\t%0, %1|p\t%1, %0}";
6b28fd63 6354 else
e3c2afab
AM
6355 p = "{p\t%1, %0|rp\t%0, %1}";
6356#else
6357 if (STACK_TOP_P (operands[0]))
6358 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
6359 else
6360 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
6361#endif
e075ae69 6362 break;
6b28fd63 6363 }
2a2ab3f9
JVA
6364
6365 if (STACK_TOP_P (operands[0]))
6366 {
6367 if (STACK_TOP_P (operands[1]))
e3c2afab 6368 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 6369 else
e3c2afab 6370 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
e075ae69 6371 break;
2a2ab3f9
JVA
6372 }
6373 else if (STACK_TOP_P (operands[1]))
e3c2afab
AM
6374 {
6375#if SYSV386_COMPAT
6376 p = "{\t%1, %0|r\t%0, %1}";
6377#else
6378 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
6379#endif
6380 }
2a2ab3f9 6381 else
e3c2afab
AM
6382 {
6383#if SYSV386_COMPAT
6384 p = "{r\t%2, %0|\t%0, %2}";
6385#else
6386 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6387#endif
6388 }
e075ae69 6389 break;
2a2ab3f9
JVA
6390
6391 default:
6392 abort ();
6393 }
e075ae69
RH
6394
6395 strcat (buf, p);
6396 return buf;
2a2ab3f9 6397}
e075ae69 6398
a4f31c00 6399/* Output code to initialize control word copies used by
7a2e09f4
JH
6400 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
6401 is set to control word rounding downwards. */
6402void
6403emit_i387_cw_initialization (normal, round_down)
6404 rtx normal, round_down;
6405{
6406 rtx reg = gen_reg_rtx (HImode);
6407
6408 emit_insn (gen_x86_fnstcw_1 (normal));
6409 emit_move_insn (reg, normal);
6410 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
6411 && !TARGET_64BIT)
6412 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
6413 else
6414 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
6415 emit_move_insn (round_down, reg);
6416}
6417
2a2ab3f9 6418/* Output code for INSN to convert a float to a signed int. OPERANDS
46d21d2c 6419 are the insn operands. The output may be [HSD]Imode and the input
e075ae69 6420 operand may be [SDX]Fmode. */
2a2ab3f9 6421
69ddee61 6422const char *
2a2ab3f9
JVA
6423output_fix_trunc (insn, operands)
6424 rtx insn;
6425 rtx *operands;
6426{
6427 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
e075ae69 6428 int dimode_p = GET_MODE (operands[0]) == DImode;
2a2ab3f9 6429
e075ae69
RH
6430 /* Jump through a hoop or two for DImode, since the hardware has no
6431 non-popping instruction. We used to do this a different way, but
6432 that was somewhat fragile and broke with post-reload splitters. */
a05924f9
JH
6433 if (dimode_p && !stack_top_dies)
6434 output_asm_insn ("fld\t%y1", operands);
e075ae69 6435
7a2e09f4 6436 if (!STACK_TOP_P (operands[1]))
10195bd8
JW
6437 abort ();
6438
e075ae69 6439 if (GET_CODE (operands[0]) != MEM)
7a2e09f4 6440 abort ();
e9a25f70 6441
7a2e09f4 6442 output_asm_insn ("fldcw\t%3", operands);
e075ae69 6443 if (stack_top_dies || dimode_p)
7a2e09f4 6444 output_asm_insn ("fistp%z0\t%0", operands);
10195bd8 6445 else
7a2e09f4 6446 output_asm_insn ("fist%z0\t%0", operands);
e075ae69 6447 output_asm_insn ("fldcw\t%2", operands);
10195bd8 6448
e075ae69 6449 return "";
2a2ab3f9 6450}
cda749b1 6451
e075ae69
RH
6452/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
6453 should be used and 2 when fnstsw should be used. UNORDERED_P is true
6454 when fucom should be used. */
6455
69ddee61 6456const char *
e075ae69 6457output_fp_compare (insn, operands, eflags_p, unordered_p)
cda749b1
JW
6458 rtx insn;
6459 rtx *operands;
e075ae69 6460 int eflags_p, unordered_p;
cda749b1 6461{
e075ae69
RH
6462 int stack_top_dies;
6463 rtx cmp_op0 = operands[0];
6464 rtx cmp_op1 = operands[1];
0644b628 6465 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
e075ae69
RH
6466
6467 if (eflags_p == 2)
6468 {
6469 cmp_op0 = cmp_op1;
6470 cmp_op1 = operands[2];
6471 }
0644b628
JH
6472 if (is_sse)
6473 {
6474 if (GET_MODE (operands[0]) == SFmode)
6475 if (unordered_p)
6476 return "ucomiss\t{%1, %0|%0, %1}";
6477 else
6478 return "comiss\t{%1, %0|%0, %y}";
6479 else
6480 if (unordered_p)
6481 return "ucomisd\t{%1, %0|%0, %1}";
6482 else
6483 return "comisd\t{%1, %0|%0, %y}";
6484 }
cda749b1 6485
e075ae69 6486 if (! STACK_TOP_P (cmp_op0))
cda749b1
JW
6487 abort ();
6488
e075ae69 6489 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
cda749b1 6490
e075ae69
RH
6491 if (STACK_REG_P (cmp_op1)
6492 && stack_top_dies
6493 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
6494 && REGNO (cmp_op1) != FIRST_STACK_REG)
cda749b1 6495 {
e075ae69
RH
6496 /* If both the top of the 387 stack dies, and the other operand
6497 is also a stack register that dies, then this must be a
6498 `fcompp' float compare */
6499
6500 if (eflags_p == 1)
6501 {
6502 /* There is no double popping fcomi variant. Fortunately,
6503 eflags is immune from the fstp's cc clobbering. */
6504 if (unordered_p)
6505 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
6506 else
6507 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
6508 return "fstp\t%y0";
6509 }
6510 else
cda749b1 6511 {
e075ae69
RH
6512 if (eflags_p == 2)
6513 {
6514 if (unordered_p)
6515 return "fucompp\n\tfnstsw\t%0";
6516 else
6517 return "fcompp\n\tfnstsw\t%0";
6518 }
cda749b1
JW
6519 else
6520 {
e075ae69
RH
6521 if (unordered_p)
6522 return "fucompp";
6523 else
6524 return "fcompp";
cda749b1
JW
6525 }
6526 }
cda749b1
JW
6527 }
6528 else
6529 {
e075ae69 6530 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
cda749b1 6531
0f290768 6532 static const char * const alt[24] =
e075ae69
RH
6533 {
6534 "fcom%z1\t%y1",
6535 "fcomp%z1\t%y1",
6536 "fucom%z1\t%y1",
6537 "fucomp%z1\t%y1",
0f290768 6538
e075ae69
RH
6539 "ficom%z1\t%y1",
6540 "ficomp%z1\t%y1",
6541 NULL,
6542 NULL,
6543
6544 "fcomi\t{%y1, %0|%0, %y1}",
6545 "fcomip\t{%y1, %0|%0, %y1}",
6546 "fucomi\t{%y1, %0|%0, %y1}",
6547 "fucomip\t{%y1, %0|%0, %y1}",
6548
6549 NULL,
6550 NULL,
6551 NULL,
6552 NULL,
6553
6554 "fcom%z2\t%y2\n\tfnstsw\t%0",
6555 "fcomp%z2\t%y2\n\tfnstsw\t%0",
6556 "fucom%z2\t%y2\n\tfnstsw\t%0",
6557 "fucomp%z2\t%y2\n\tfnstsw\t%0",
0f290768 6558
e075ae69
RH
6559 "ficom%z2\t%y2\n\tfnstsw\t%0",
6560 "ficomp%z2\t%y2\n\tfnstsw\t%0",
6561 NULL,
6562 NULL
6563 };
6564
6565 int mask;
69ddee61 6566 const char *ret;
e075ae69
RH
6567
6568 mask = eflags_p << 3;
6569 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
6570 mask |= unordered_p << 1;
6571 mask |= stack_top_dies;
6572
6573 if (mask >= 24)
6574 abort ();
6575 ret = alt[mask];
6576 if (ret == NULL)
6577 abort ();
cda749b1 6578
e075ae69 6579 return ret;
cda749b1
JW
6580 }
6581}
2a2ab3f9 6582
f88c65f7
RH
6583void
6584ix86_output_addr_vec_elt (file, value)
6585 FILE *file;
6586 int value;
6587{
6588 const char *directive = ASM_LONG;
6589
6590 if (TARGET_64BIT)
6591 {
6592#ifdef ASM_QUAD
6593 directive = ASM_QUAD;
6594#else
6595 abort ();
6596#endif
6597 }
6598
6599 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
6600}
6601
6602void
6603ix86_output_addr_diff_elt (file, value, rel)
6604 FILE *file;
6605 int value, rel;
6606{
6607 if (TARGET_64BIT)
6608 fprintf (file, "%s%s%d-.+4+(.-%s%d)\n",
6609 ASM_LONG, LPREFIX, value, LPREFIX, rel);
6610 else if (HAVE_AS_GOTOFF_IN_DATA)
6611 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
6612 else
6613 asm_fprintf (file, "%s%U_GLOBAL_OFFSET_TABLE_+[.-%s%d]\n",
6614 ASM_LONG, LPREFIX, value);
6615}
32b5b1aa 6616\f
a8bac9ab
RH
6617/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
6618 for the target. */
6619
6620void
6621ix86_expand_clear (dest)
6622 rtx dest;
6623{
6624 rtx tmp;
6625
6626 /* We play register width games, which are only valid after reload. */
6627 if (!reload_completed)
6628 abort ();
6629
6630 /* Avoid HImode and its attendant prefix byte. */
6631 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
6632 dest = gen_rtx_REG (SImode, REGNO (dest));
6633
6634 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
6635
6636 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
6637 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
6638 {
6639 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
6640 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
6641 }
6642
6643 emit_insn (tmp);
6644}
6645
79325812 6646void
e075ae69
RH
6647ix86_expand_move (mode, operands)
6648 enum machine_mode mode;
6649 rtx operands[];
32b5b1aa 6650{
e075ae69 6651 int strict = (reload_in_progress || reload_completed);
e075ae69 6652 rtx insn;
e9a25f70 6653
e075ae69 6654 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
32b5b1aa 6655 {
e075ae69 6656 /* Emit insns to move operands[1] into operands[0]. */
e9a25f70 6657
e075ae69
RH
6658 if (GET_CODE (operands[0]) == MEM)
6659 operands[1] = force_reg (Pmode, operands[1]);
6660 else
32b5b1aa 6661 {
e075ae69
RH
6662 rtx temp = operands[0];
6663 if (GET_CODE (temp) != REG)
6664 temp = gen_reg_rtx (Pmode);
6665 temp = legitimize_pic_address (operands[1], temp);
6666 if (temp == operands[0])
6667 return;
6668 operands[1] = temp;
32b5b1aa 6669 }
e075ae69
RH
6670 }
6671 else
6672 {
d7a29404 6673 if (GET_CODE (operands[0]) == MEM
44cf5b6a 6674 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
d7a29404
JH
6675 || !push_operand (operands[0], mode))
6676 && GET_CODE (operands[1]) == MEM)
e075ae69 6677 operands[1] = force_reg (mode, operands[1]);
e9a25f70 6678
2c5a510c
RH
6679 if (push_operand (operands[0], mode)
6680 && ! general_no_elim_operand (operands[1], mode))
6681 operands[1] = copy_to_mode_reg (mode, operands[1]);
6682
44cf5b6a
JH
6683 /* Force large constants in 64bit compilation into register
6684 to get them CSEed. */
6685 if (TARGET_64BIT && mode == DImode
6686 && immediate_operand (operands[1], mode)
6687 && !x86_64_zero_extended_value (operands[1])
6688 && !register_operand (operands[0], mode)
6689 && optimize && !reload_completed && !reload_in_progress)
6690 operands[1] = copy_to_mode_reg (mode, operands[1]);
6691
e075ae69 6692 if (FLOAT_MODE_P (mode))
32b5b1aa 6693 {
d7a29404
JH
6694 /* If we are loading a floating point constant to a register,
6695 force the value to memory now, since we'll get better code
6696 out the back end. */
e075ae69
RH
6697
6698 if (strict)
6699 ;
e075ae69 6700 else if (GET_CODE (operands[1]) == CONST_DOUBLE
d7a29404 6701 && register_operand (operands[0], mode))
e075ae69 6702 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
32b5b1aa 6703 }
32b5b1aa 6704 }
e9a25f70 6705
e075ae69 6706 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
e9a25f70 6707
e075ae69
RH
6708 emit_insn (insn);
6709}
e9a25f70 6710
e37af218
RH
6711void
6712ix86_expand_vector_move (mode, operands)
6713 enum machine_mode mode;
6714 rtx operands[];
6715{
6716 /* Force constants other than zero into memory. We do not know how
6717 the instructions used to build constants modify the upper 64 bits
6718 of the register, once we have that information we may be able
6719 to handle some of them more efficiently. */
6720 if ((reload_in_progress | reload_completed) == 0
6721 && register_operand (operands[0], mode)
6722 && CONSTANT_P (operands[1]))
6723 {
6724 rtx addr = gen_reg_rtx (Pmode);
6725 emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0));
6726 operands[1] = gen_rtx_MEM (mode, addr);
6727 }
6728
6729 /* Make operand1 a register if it isn't already. */
6730 if ((reload_in_progress | reload_completed) == 0
6731 && !register_operand (operands[0], mode)
6732 && !register_operand (operands[1], mode)
6733 && operands[1] != CONST0_RTX (mode))
6734 {
59bef189 6735 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
e37af218
RH
6736 emit_move_insn (operands[0], temp);
6737 return;
6738 }
6739
6740 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
6741}
6742
e075ae69
RH
6743/* Attempt to expand a binary operator. Make the expansion closer to the
6744 actual machine, then just general_operand, which will allow 3 separate
9d81fc27 6745 memory references (one output, two input) in a single insn. */
e9a25f70 6746
e075ae69
RH
6747void
6748ix86_expand_binary_operator (code, mode, operands)
6749 enum rtx_code code;
6750 enum machine_mode mode;
6751 rtx operands[];
6752{
6753 int matching_memory;
6754 rtx src1, src2, dst, op, clob;
6755
6756 dst = operands[0];
6757 src1 = operands[1];
6758 src2 = operands[2];
6759
6760 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
6761 if (GET_RTX_CLASS (code) == 'c'
6762 && (rtx_equal_p (dst, src2)
6763 || immediate_operand (src1, mode)))
6764 {
6765 rtx temp = src1;
6766 src1 = src2;
6767 src2 = temp;
32b5b1aa 6768 }
e9a25f70 6769
e075ae69
RH
6770 /* If the destination is memory, and we do not have matching source
6771 operands, do things in registers. */
6772 matching_memory = 0;
6773 if (GET_CODE (dst) == MEM)
32b5b1aa 6774 {
e075ae69
RH
6775 if (rtx_equal_p (dst, src1))
6776 matching_memory = 1;
6777 else if (GET_RTX_CLASS (code) == 'c'
6778 && rtx_equal_p (dst, src2))
6779 matching_memory = 2;
6780 else
6781 dst = gen_reg_rtx (mode);
6782 }
0f290768 6783
e075ae69
RH
6784 /* Both source operands cannot be in memory. */
6785 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
6786 {
6787 if (matching_memory != 2)
6788 src2 = force_reg (mode, src2);
6789 else
6790 src1 = force_reg (mode, src1);
32b5b1aa 6791 }
e9a25f70 6792
06a964de
JH
6793 /* If the operation is not commutable, source 1 cannot be a constant
6794 or non-matching memory. */
0f290768 6795 if ((CONSTANT_P (src1)
06a964de
JH
6796 || (!matching_memory && GET_CODE (src1) == MEM))
6797 && GET_RTX_CLASS (code) != 'c')
e075ae69 6798 src1 = force_reg (mode, src1);
0f290768 6799
e075ae69 6800 /* If optimizing, copy to regs to improve CSE */
fe577e58 6801 if (optimize && ! no_new_pseudos)
32b5b1aa 6802 {
e075ae69
RH
6803 if (GET_CODE (dst) == MEM)
6804 dst = gen_reg_rtx (mode);
6805 if (GET_CODE (src1) == MEM)
6806 src1 = force_reg (mode, src1);
6807 if (GET_CODE (src2) == MEM)
6808 src2 = force_reg (mode, src2);
32b5b1aa 6809 }
e9a25f70 6810
e075ae69
RH
6811 /* Emit the instruction. */
6812
6813 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
6814 if (reload_in_progress)
6815 {
6816 /* Reload doesn't know about the flags register, and doesn't know that
6817 it doesn't want to clobber it. We can only do this with PLUS. */
6818 if (code != PLUS)
6819 abort ();
6820 emit_insn (op);
6821 }
6822 else
32b5b1aa 6823 {
e075ae69
RH
6824 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6825 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
32b5b1aa 6826 }
e9a25f70 6827
e075ae69
RH
6828 /* Fix up the destination if needed. */
6829 if (dst != operands[0])
6830 emit_move_insn (operands[0], dst);
6831}
6832
6833/* Return TRUE or FALSE depending on whether the binary operator meets the
6834 appropriate constraints. */
6835
6836int
6837ix86_binary_operator_ok (code, mode, operands)
6838 enum rtx_code code;
6839 enum machine_mode mode ATTRIBUTE_UNUSED;
6840 rtx operands[3];
6841{
6842 /* Both source operands cannot be in memory. */
6843 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
6844 return 0;
6845 /* If the operation is not commutable, source 1 cannot be a constant. */
6846 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
6847 return 0;
6848 /* If the destination is memory, we must have a matching source operand. */
6849 if (GET_CODE (operands[0]) == MEM
6850 && ! (rtx_equal_p (operands[0], operands[1])
6851 || (GET_RTX_CLASS (code) == 'c'
6852 && rtx_equal_p (operands[0], operands[2]))))
6853 return 0;
06a964de 6854 /* If the operation is not commutable and the source 1 is memory, we must
d6a7951f 6855 have a matching destination. */
06a964de
JH
6856 if (GET_CODE (operands[1]) == MEM
6857 && GET_RTX_CLASS (code) != 'c'
6858 && ! rtx_equal_p (operands[0], operands[1]))
6859 return 0;
e075ae69
RH
6860 return 1;
6861}
6862
6863/* Attempt to expand a unary operator. Make the expansion closer to the
6864 actual machine, then just general_operand, which will allow 2 separate
9d81fc27 6865 memory references (one output, one input) in a single insn. */
e075ae69 6866
9d81fc27 6867void
e075ae69
RH
6868ix86_expand_unary_operator (code, mode, operands)
6869 enum rtx_code code;
6870 enum machine_mode mode;
6871 rtx operands[];
6872{
06a964de
JH
6873 int matching_memory;
6874 rtx src, dst, op, clob;
6875
6876 dst = operands[0];
6877 src = operands[1];
e075ae69 6878
06a964de
JH
6879 /* If the destination is memory, and we do not have matching source
6880 operands, do things in registers. */
6881 matching_memory = 0;
6882 if (GET_CODE (dst) == MEM)
32b5b1aa 6883 {
06a964de
JH
6884 if (rtx_equal_p (dst, src))
6885 matching_memory = 1;
e075ae69 6886 else
06a964de 6887 dst = gen_reg_rtx (mode);
32b5b1aa 6888 }
e9a25f70 6889
06a964de
JH
6890 /* When source operand is memory, destination must match. */
6891 if (!matching_memory && GET_CODE (src) == MEM)
6892 src = force_reg (mode, src);
0f290768 6893
06a964de 6894 /* If optimizing, copy to regs to improve CSE */
fe577e58 6895 if (optimize && ! no_new_pseudos)
06a964de
JH
6896 {
6897 if (GET_CODE (dst) == MEM)
6898 dst = gen_reg_rtx (mode);
6899 if (GET_CODE (src) == MEM)
6900 src = force_reg (mode, src);
6901 }
6902
6903 /* Emit the instruction. */
6904
6905 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
6906 if (reload_in_progress || code == NOT)
6907 {
6908 /* Reload doesn't know about the flags register, and doesn't know that
6909 it doesn't want to clobber it. */
6910 if (code != NOT)
6911 abort ();
6912 emit_insn (op);
6913 }
6914 else
6915 {
6916 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6917 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
6918 }
6919
6920 /* Fix up the destination if needed. */
6921 if (dst != operands[0])
6922 emit_move_insn (operands[0], dst);
e075ae69
RH
6923}
6924
6925/* Return TRUE or FALSE depending on whether the unary operator meets the
6926 appropriate constraints. */
6927
6928int
6929ix86_unary_operator_ok (code, mode, operands)
6930 enum rtx_code code ATTRIBUTE_UNUSED;
6931 enum machine_mode mode ATTRIBUTE_UNUSED;
6932 rtx operands[2] ATTRIBUTE_UNUSED;
6933{
06a964de
JH
6934 /* If one of operands is memory, source and destination must match. */
6935 if ((GET_CODE (operands[0]) == MEM
6936 || GET_CODE (operands[1]) == MEM)
6937 && ! rtx_equal_p (operands[0], operands[1]))
6938 return FALSE;
e075ae69
RH
6939 return TRUE;
6940}
6941
16189740
RH
6942/* Return TRUE or FALSE depending on whether the first SET in INSN
6943 has source and destination with matching CC modes, and that the
6944 CC mode is at least as constrained as REQ_MODE. */
6945
6946int
6947ix86_match_ccmode (insn, req_mode)
6948 rtx insn;
6949 enum machine_mode req_mode;
6950{
6951 rtx set;
6952 enum machine_mode set_mode;
6953
6954 set = PATTERN (insn);
6955 if (GET_CODE (set) == PARALLEL)
6956 set = XVECEXP (set, 0, 0);
6957 if (GET_CODE (set) != SET)
6958 abort ();
9076b9c1
JH
6959 if (GET_CODE (SET_SRC (set)) != COMPARE)
6960 abort ();
16189740
RH
6961
6962 set_mode = GET_MODE (SET_DEST (set));
6963 switch (set_mode)
6964 {
9076b9c1
JH
6965 case CCNOmode:
6966 if (req_mode != CCNOmode
6967 && (req_mode != CCmode
6968 || XEXP (SET_SRC (set), 1) != const0_rtx))
6969 return 0;
6970 break;
16189740 6971 case CCmode:
9076b9c1 6972 if (req_mode == CCGCmode)
16189740
RH
6973 return 0;
6974 /* FALLTHRU */
9076b9c1
JH
6975 case CCGCmode:
6976 if (req_mode == CCGOCmode || req_mode == CCNOmode)
6977 return 0;
6978 /* FALLTHRU */
6979 case CCGOCmode:
16189740
RH
6980 if (req_mode == CCZmode)
6981 return 0;
6982 /* FALLTHRU */
6983 case CCZmode:
6984 break;
6985
6986 default:
6987 abort ();
6988 }
6989
6990 return (GET_MODE (SET_SRC (set)) == set_mode);
6991}
6992
e075ae69
RH
6993/* Generate insn patterns to do an integer compare of OPERANDS. */
6994
6995static rtx
6996ix86_expand_int_compare (code, op0, op1)
6997 enum rtx_code code;
6998 rtx op0, op1;
6999{
7000 enum machine_mode cmpmode;
7001 rtx tmp, flags;
7002
7003 cmpmode = SELECT_CC_MODE (code, op0, op1);
7004 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
7005
7006 /* This is very simple, but making the interface the same as in the
7007 FP case makes the rest of the code easier. */
7008 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
7009 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
7010
7011 /* Return the test that should be put into the flags user, i.e.
7012 the bcc, scc, or cmov instruction. */
7013 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
7014}
7015
3a3677ff
RH
7016/* Figure out whether to use ordered or unordered fp comparisons.
7017 Return the appropriate mode to use. */
e075ae69 7018
b1cdafbb 7019enum machine_mode
3a3677ff 7020ix86_fp_compare_mode (code)
8752c357 7021 enum rtx_code code ATTRIBUTE_UNUSED;
e075ae69 7022{
9e7adcb3
JH
7023 /* ??? In order to make all comparisons reversible, we do all comparisons
7024 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7025 all forms trapping and nontrapping comparisons, we can make inequality
7026 comparisons trapping again, since it results in better code when using
7027 FCOM based compares. */
7028 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
3a3677ff
RH
7029}
7030
9076b9c1
JH
7031enum machine_mode
7032ix86_cc_mode (code, op0, op1)
7033 enum rtx_code code;
7034 rtx op0, op1;
7035{
7036 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7037 return ix86_fp_compare_mode (code);
7038 switch (code)
7039 {
7040 /* Only zero flag is needed. */
7041 case EQ: /* ZF=0 */
7042 case NE: /* ZF!=0 */
7043 return CCZmode;
7044 /* Codes needing carry flag. */
265dab10
JH
7045 case GEU: /* CF=0 */
7046 case GTU: /* CF=0 & ZF=0 */
7e08e190
JH
7047 case LTU: /* CF=1 */
7048 case LEU: /* CF=1 | ZF=1 */
265dab10 7049 return CCmode;
9076b9c1
JH
7050 /* Codes possibly doable only with sign flag when
7051 comparing against zero. */
7052 case GE: /* SF=OF or SF=0 */
7e08e190 7053 case LT: /* SF<>OF or SF=1 */
9076b9c1
JH
7054 if (op1 == const0_rtx)
7055 return CCGOCmode;
7056 else
7057 /* For other cases Carry flag is not required. */
7058 return CCGCmode;
7059 /* Codes doable only with sign flag when comparing
7060 against zero, but we miss jump instruction for it
7061 so we need to use relational tests agains overflow
7062 that thus needs to be zero. */
7063 case GT: /* ZF=0 & SF=OF */
7064 case LE: /* ZF=1 | SF<>OF */
7065 if (op1 == const0_rtx)
7066 return CCNOmode;
7067 else
7068 return CCGCmode;
7fcd7218
JH
7069 /* strcmp pattern do (use flags) and combine may ask us for proper
7070 mode. */
7071 case USE:
7072 return CCmode;
9076b9c1 7073 default:
0f290768 7074 abort ();
9076b9c1
JH
7075 }
7076}
7077
3a3677ff
RH
7078/* Return true if we should use an FCOMI instruction for this fp comparison. */
7079
a940d8bd 7080int
3a3677ff 7081ix86_use_fcomi_compare (code)
9e7adcb3 7082 enum rtx_code code ATTRIBUTE_UNUSED;
3a3677ff 7083{
9e7adcb3
JH
7084 enum rtx_code swapped_code = swap_condition (code);
7085 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
7086 || (ix86_fp_comparison_cost (swapped_code)
7087 == ix86_fp_comparison_fcomi_cost (swapped_code)));
3a3677ff
RH
7088}
7089
0f290768 7090/* Swap, force into registers, or otherwise massage the two operands
3a3677ff
RH
7091 to a fp comparison. The operands are updated in place; the new
7092 comparsion code is returned. */
7093
7094static enum rtx_code
7095ix86_prepare_fp_compare_args (code, pop0, pop1)
7096 enum rtx_code code;
7097 rtx *pop0, *pop1;
7098{
7099 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
7100 rtx op0 = *pop0, op1 = *pop1;
7101 enum machine_mode op_mode = GET_MODE (op0);
0644b628 7102 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
3a3677ff 7103
e075ae69 7104 /* All of the unordered compare instructions only work on registers.
3a3677ff
RH
7105 The same is true of the XFmode compare instructions. The same is
7106 true of the fcomi compare instructions. */
7107
0644b628
JH
7108 if (!is_sse
7109 && (fpcmp_mode == CCFPUmode
7110 || op_mode == XFmode
7111 || op_mode == TFmode
7112 || ix86_use_fcomi_compare (code)))
e075ae69 7113 {
3a3677ff
RH
7114 op0 = force_reg (op_mode, op0);
7115 op1 = force_reg (op_mode, op1);
e075ae69
RH
7116 }
7117 else
7118 {
7119 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
7120 things around if they appear profitable, otherwise force op0
7121 into a register. */
7122
7123 if (standard_80387_constant_p (op0) == 0
7124 || (GET_CODE (op0) == MEM
7125 && ! (standard_80387_constant_p (op1) == 0
7126 || GET_CODE (op1) == MEM)))
32b5b1aa 7127 {
e075ae69
RH
7128 rtx tmp;
7129 tmp = op0, op0 = op1, op1 = tmp;
7130 code = swap_condition (code);
7131 }
7132
7133 if (GET_CODE (op0) != REG)
3a3677ff 7134 op0 = force_reg (op_mode, op0);
e075ae69
RH
7135
7136 if (CONSTANT_P (op1))
7137 {
7138 if (standard_80387_constant_p (op1))
3a3677ff 7139 op1 = force_reg (op_mode, op1);
e075ae69 7140 else
3a3677ff 7141 op1 = validize_mem (force_const_mem (op_mode, op1));
32b5b1aa
SC
7142 }
7143 }
e9a25f70 7144
9e7adcb3
JH
7145 /* Try to rearrange the comparison to make it cheaper. */
7146 if (ix86_fp_comparison_cost (code)
7147 > ix86_fp_comparison_cost (swap_condition (code))
558740bf 7148 && (GET_CODE (op1) == REG || !no_new_pseudos))
9e7adcb3
JH
7149 {
7150 rtx tmp;
7151 tmp = op0, op0 = op1, op1 = tmp;
7152 code = swap_condition (code);
7153 if (GET_CODE (op0) != REG)
7154 op0 = force_reg (op_mode, op0);
7155 }
7156
3a3677ff
RH
7157 *pop0 = op0;
7158 *pop1 = op1;
7159 return code;
7160}
7161
c0c102a9
JH
7162/* Convert comparison codes we use to represent FP comparison to integer
7163 code that will result in proper branch. Return UNKNOWN if no such code
7164 is available. */
7165static enum rtx_code
7166ix86_fp_compare_code_to_integer (code)
7167 enum rtx_code code;
7168{
7169 switch (code)
7170 {
7171 case GT:
7172 return GTU;
7173 case GE:
7174 return GEU;
7175 case ORDERED:
7176 case UNORDERED:
7177 return code;
7178 break;
7179 case UNEQ:
7180 return EQ;
7181 break;
7182 case UNLT:
7183 return LTU;
7184 break;
7185 case UNLE:
7186 return LEU;
7187 break;
7188 case LTGT:
7189 return NE;
7190 break;
7191 default:
7192 return UNKNOWN;
7193 }
7194}
7195
7196/* Split comparison code CODE into comparisons we can do using branch
7197 instructions. BYPASS_CODE is comparison code for branch that will
7198 branch around FIRST_CODE and SECOND_CODE. If some of branches
7199 is not required, set value to NIL.
7200 We never require more than two branches. */
7201static void
7202ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
7203 enum rtx_code code, *bypass_code, *first_code, *second_code;
7204{
7205 *first_code = code;
7206 *bypass_code = NIL;
7207 *second_code = NIL;
7208
7209 /* The fcomi comparison sets flags as follows:
7210
7211 cmp ZF PF CF
7212 > 0 0 0
7213 < 0 0 1
7214 = 1 0 0
7215 un 1 1 1 */
7216
7217 switch (code)
7218 {
7219 case GT: /* GTU - CF=0 & ZF=0 */
7220 case GE: /* GEU - CF=0 */
7221 case ORDERED: /* PF=0 */
7222 case UNORDERED: /* PF=1 */
7223 case UNEQ: /* EQ - ZF=1 */
7224 case UNLT: /* LTU - CF=1 */
7225 case UNLE: /* LEU - CF=1 | ZF=1 */
7226 case LTGT: /* EQ - ZF=0 */
7227 break;
7228 case LT: /* LTU - CF=1 - fails on unordered */
7229 *first_code = UNLT;
7230 *bypass_code = UNORDERED;
7231 break;
7232 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
7233 *first_code = UNLE;
7234 *bypass_code = UNORDERED;
7235 break;
7236 case EQ: /* EQ - ZF=1 - fails on unordered */
7237 *first_code = UNEQ;
7238 *bypass_code = UNORDERED;
7239 break;
7240 case NE: /* NE - ZF=0 - fails on unordered */
7241 *first_code = LTGT;
7242 *second_code = UNORDERED;
7243 break;
7244 case UNGE: /* GEU - CF=0 - fails on unordered */
7245 *first_code = GE;
7246 *second_code = UNORDERED;
7247 break;
7248 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
7249 *first_code = GT;
7250 *second_code = UNORDERED;
7251 break;
7252 default:
7253 abort ();
7254 }
7255 if (!TARGET_IEEE_FP)
7256 {
7257 *second_code = NIL;
7258 *bypass_code = NIL;
7259 }
7260}
7261
9e7adcb3
JH
7262/* Return cost of comparison done fcom + arithmetics operations on AX.
7263 All following functions do use number of instructions as an cost metrics.
7264 In future this should be tweaked to compute bytes for optimize_size and
7265 take into account performance of various instructions on various CPUs. */
7266static int
7267ix86_fp_comparison_arithmetics_cost (code)
7268 enum rtx_code code;
7269{
7270 if (!TARGET_IEEE_FP)
7271 return 4;
7272 /* The cost of code output by ix86_expand_fp_compare. */
7273 switch (code)
7274 {
7275 case UNLE:
7276 case UNLT:
7277 case LTGT:
7278 case GT:
7279 case GE:
7280 case UNORDERED:
7281 case ORDERED:
7282 case UNEQ:
7283 return 4;
7284 break;
7285 case LT:
7286 case NE:
7287 case EQ:
7288 case UNGE:
7289 return 5;
7290 break;
7291 case LE:
7292 case UNGT:
7293 return 6;
7294 break;
7295 default:
7296 abort ();
7297 }
7298}
7299
7300/* Return cost of comparison done using fcomi operation.
7301 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7302static int
7303ix86_fp_comparison_fcomi_cost (code)
7304 enum rtx_code code;
7305{
7306 enum rtx_code bypass_code, first_code, second_code;
7307 /* Return arbitarily high cost when instruction is not supported - this
7308 prevents gcc from using it. */
7309 if (!TARGET_CMOVE)
7310 return 1024;
7311 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7312 return (bypass_code != NIL || second_code != NIL) + 2;
7313}
7314
7315/* Return cost of comparison done using sahf operation.
7316 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7317static int
7318ix86_fp_comparison_sahf_cost (code)
7319 enum rtx_code code;
7320{
7321 enum rtx_code bypass_code, first_code, second_code;
7322 /* Return arbitarily high cost when instruction is not preferred - this
7323 avoids gcc from using it. */
7324 if (!TARGET_USE_SAHF && !optimize_size)
7325 return 1024;
7326 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7327 return (bypass_code != NIL || second_code != NIL) + 3;
7328}
7329
7330/* Compute cost of the comparison done using any method.
7331 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7332static int
7333ix86_fp_comparison_cost (code)
7334 enum rtx_code code;
7335{
7336 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
7337 int min;
7338
7339 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
7340 sahf_cost = ix86_fp_comparison_sahf_cost (code);
7341
7342 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
7343 if (min > sahf_cost)
7344 min = sahf_cost;
7345 if (min > fcomi_cost)
7346 min = fcomi_cost;
7347 return min;
7348}
c0c102a9 7349
3a3677ff
RH
7350/* Generate insn patterns to do a floating point compare of OPERANDS. */
7351
9e7adcb3
JH
7352static rtx
7353ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
3a3677ff
RH
7354 enum rtx_code code;
7355 rtx op0, op1, scratch;
9e7adcb3
JH
7356 rtx *second_test;
7357 rtx *bypass_test;
3a3677ff
RH
7358{
7359 enum machine_mode fpcmp_mode, intcmp_mode;
c0c102a9 7360 rtx tmp, tmp2;
9e7adcb3 7361 int cost = ix86_fp_comparison_cost (code);
c0c102a9 7362 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
7363
7364 fpcmp_mode = ix86_fp_compare_mode (code);
7365 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
7366
9e7adcb3
JH
7367 if (second_test)
7368 *second_test = NULL_RTX;
7369 if (bypass_test)
7370 *bypass_test = NULL_RTX;
7371
c0c102a9
JH
7372 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7373
9e7adcb3
JH
7374 /* Do fcomi/sahf based test when profitable. */
7375 if ((bypass_code == NIL || bypass_test)
7376 && (second_code == NIL || second_test)
7377 && ix86_fp_comparison_arithmetics_cost (code) > cost)
32b5b1aa 7378 {
c0c102a9
JH
7379 if (TARGET_CMOVE)
7380 {
7381 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7382 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
7383 tmp);
7384 emit_insn (tmp);
7385 }
7386 else
7387 {
7388 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7389 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
bf71a4f8
JH
7390 if (!scratch)
7391 scratch = gen_reg_rtx (HImode);
c0c102a9
JH
7392 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
7393 emit_insn (gen_x86_sahf_1 (scratch));
7394 }
e075ae69
RH
7395
7396 /* The FP codes work out to act like unsigned. */
9a915772 7397 intcmp_mode = fpcmp_mode;
9e7adcb3
JH
7398 code = first_code;
7399 if (bypass_code != NIL)
7400 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
7401 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7402 const0_rtx);
7403 if (second_code != NIL)
7404 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
7405 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7406 const0_rtx);
e075ae69
RH
7407 }
7408 else
7409 {
7410 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
e075ae69
RH
7411 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7412 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
bf71a4f8
JH
7413 if (!scratch)
7414 scratch = gen_reg_rtx (HImode);
3a3677ff 7415 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
e075ae69 7416
9a915772
JH
7417 /* In the unordered case, we have to check C2 for NaN's, which
7418 doesn't happen to work out to anything nice combination-wise.
7419 So do some bit twiddling on the value we've got in AH to come
7420 up with an appropriate set of condition codes. */
e075ae69 7421
9a915772
JH
7422 intcmp_mode = CCNOmode;
7423 switch (code)
32b5b1aa 7424 {
9a915772
JH
7425 case GT:
7426 case UNGT:
7427 if (code == GT || !TARGET_IEEE_FP)
32b5b1aa 7428 {
3a3677ff 7429 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
e075ae69 7430 code = EQ;
9a915772
JH
7431 }
7432 else
7433 {
7434 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7435 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
7436 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
7437 intcmp_mode = CCmode;
7438 code = GEU;
7439 }
7440 break;
7441 case LT:
7442 case UNLT:
7443 if (code == LT && TARGET_IEEE_FP)
7444 {
3a3677ff
RH
7445 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7446 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
e075ae69
RH
7447 intcmp_mode = CCmode;
7448 code = EQ;
9a915772
JH
7449 }
7450 else
7451 {
7452 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
7453 code = NE;
7454 }
7455 break;
7456 case GE:
7457 case UNGE:
7458 if (code == GE || !TARGET_IEEE_FP)
7459 {
3a3677ff 7460 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
e075ae69 7461 code = EQ;
9a915772
JH
7462 }
7463 else
7464 {
7465 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7466 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
7467 GEN_INT (0x01)));
7468 code = NE;
7469 }
7470 break;
7471 case LE:
7472 case UNLE:
7473 if (code == LE && TARGET_IEEE_FP)
7474 {
3a3677ff
RH
7475 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7476 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
7477 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
7478 intcmp_mode = CCmode;
7479 code = LTU;
9a915772
JH
7480 }
7481 else
7482 {
7483 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
7484 code = NE;
7485 }
7486 break;
7487 case EQ:
7488 case UNEQ:
7489 if (code == EQ && TARGET_IEEE_FP)
7490 {
3a3677ff
RH
7491 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7492 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
7493 intcmp_mode = CCmode;
7494 code = EQ;
9a915772
JH
7495 }
7496 else
7497 {
3a3677ff
RH
7498 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
7499 code = NE;
7500 break;
9a915772
JH
7501 }
7502 break;
7503 case NE:
7504 case LTGT:
7505 if (code == NE && TARGET_IEEE_FP)
7506 {
3a3677ff 7507 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9a915772
JH
7508 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
7509 GEN_INT (0x40)));
3a3677ff 7510 code = NE;
9a915772
JH
7511 }
7512 else
7513 {
3a3677ff
RH
7514 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
7515 code = EQ;
32b5b1aa 7516 }
9a915772
JH
7517 break;
7518
7519 case UNORDERED:
7520 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
7521 code = NE;
7522 break;
7523 case ORDERED:
7524 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
7525 code = EQ;
7526 break;
7527
7528 default:
7529 abort ();
32b5b1aa 7530 }
32b5b1aa 7531 }
e075ae69
RH
7532
7533 /* Return the test that should be put into the flags user, i.e.
7534 the bcc, scc, or cmov instruction. */
7535 return gen_rtx_fmt_ee (code, VOIDmode,
7536 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7537 const0_rtx);
7538}
7539
9e3e266c 7540rtx
a1b8572c 7541ix86_expand_compare (code, second_test, bypass_test)
e075ae69 7542 enum rtx_code code;
a1b8572c 7543 rtx *second_test, *bypass_test;
e075ae69
RH
7544{
7545 rtx op0, op1, ret;
7546 op0 = ix86_compare_op0;
7547 op1 = ix86_compare_op1;
7548
a1b8572c
JH
7549 if (second_test)
7550 *second_test = NULL_RTX;
7551 if (bypass_test)
7552 *bypass_test = NULL_RTX;
7553
e075ae69 7554 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
bf71a4f8 7555 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
77ebd435 7556 second_test, bypass_test);
32b5b1aa 7557 else
e075ae69
RH
7558 ret = ix86_expand_int_compare (code, op0, op1);
7559
7560 return ret;
7561}
7562
03598dea
JH
7563/* Return true if the CODE will result in nontrivial jump sequence. */
7564bool
7565ix86_fp_jump_nontrivial_p (code)
7566 enum rtx_code code;
7567{
7568 enum rtx_code bypass_code, first_code, second_code;
7569 if (!TARGET_CMOVE)
7570 return true;
7571 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7572 return bypass_code != NIL || second_code != NIL;
7573}
7574
e075ae69 7575void
3a3677ff 7576ix86_expand_branch (code, label)
e075ae69 7577 enum rtx_code code;
e075ae69
RH
7578 rtx label;
7579{
3a3677ff 7580 rtx tmp;
e075ae69 7581
3a3677ff 7582 switch (GET_MODE (ix86_compare_op0))
32b5b1aa 7583 {
3a3677ff
RH
7584 case QImode:
7585 case HImode:
7586 case SImode:
0d7d98ee 7587 simple:
a1b8572c 7588 tmp = ix86_expand_compare (code, NULL, NULL);
e075ae69
RH
7589 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7590 gen_rtx_LABEL_REF (VOIDmode, label),
7591 pc_rtx);
7592 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32b5b1aa 7593 return;
e075ae69 7594
3a3677ff
RH
7595 case SFmode:
7596 case DFmode:
0f290768 7597 case XFmode:
2b589241 7598 case TFmode:
3a3677ff
RH
7599 {
7600 rtvec vec;
7601 int use_fcomi;
03598dea 7602 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
7603
7604 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
7605 &ix86_compare_op1);
03598dea
JH
7606
7607 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7608
7609 /* Check whether we will use the natural sequence with one jump. If
7610 so, we can expand jump early. Otherwise delay expansion by
7611 creating compound insn to not confuse optimizers. */
7612 if (bypass_code == NIL && second_code == NIL
7613 && TARGET_CMOVE)
7614 {
7615 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
7616 gen_rtx_LABEL_REF (VOIDmode, label),
7617 pc_rtx, NULL_RTX);
7618 }
7619 else
7620 {
7621 tmp = gen_rtx_fmt_ee (code, VOIDmode,
7622 ix86_compare_op0, ix86_compare_op1);
7623 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7624 gen_rtx_LABEL_REF (VOIDmode, label),
7625 pc_rtx);
7626 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
7627
7628 use_fcomi = ix86_use_fcomi_compare (code);
7629 vec = rtvec_alloc (3 + !use_fcomi);
7630 RTVEC_ELT (vec, 0) = tmp;
7631 RTVEC_ELT (vec, 1)
7632 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
7633 RTVEC_ELT (vec, 2)
7634 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
7635 if (! use_fcomi)
7636 RTVEC_ELT (vec, 3)
7637 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
7638
7639 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
7640 }
3a3677ff
RH
7641 return;
7642 }
32b5b1aa 7643
3a3677ff 7644 case DImode:
0d7d98ee
JH
7645 if (TARGET_64BIT)
7646 goto simple;
3a3677ff
RH
7647 /* Expand DImode branch into multiple compare+branch. */
7648 {
7649 rtx lo[2], hi[2], label2;
7650 enum rtx_code code1, code2, code3;
32b5b1aa 7651
3a3677ff
RH
7652 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
7653 {
7654 tmp = ix86_compare_op0;
7655 ix86_compare_op0 = ix86_compare_op1;
7656 ix86_compare_op1 = tmp;
7657 code = swap_condition (code);
7658 }
7659 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
7660 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
32b5b1aa 7661
3a3677ff
RH
7662 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
7663 avoid two branches. This costs one extra insn, so disable when
7664 optimizing for size. */
32b5b1aa 7665
3a3677ff
RH
7666 if ((code == EQ || code == NE)
7667 && (!optimize_size
7668 || hi[1] == const0_rtx || lo[1] == const0_rtx))
7669 {
7670 rtx xor0, xor1;
32b5b1aa 7671
3a3677ff
RH
7672 xor1 = hi[0];
7673 if (hi[1] != const0_rtx)
7674 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
7675 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 7676
3a3677ff
RH
7677 xor0 = lo[0];
7678 if (lo[1] != const0_rtx)
7679 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
7680 NULL_RTX, 0, OPTAB_WIDEN);
e075ae69 7681
3a3677ff
RH
7682 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
7683 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 7684
3a3677ff
RH
7685 ix86_compare_op0 = tmp;
7686 ix86_compare_op1 = const0_rtx;
7687 ix86_expand_branch (code, label);
7688 return;
7689 }
e075ae69 7690
1f9124e4
JJ
7691 /* Otherwise, if we are doing less-than or greater-or-equal-than,
7692 op1 is a constant and the low word is zero, then we can just
7693 examine the high word. */
32b5b1aa 7694
1f9124e4
JJ
7695 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
7696 switch (code)
7697 {
7698 case LT: case LTU: case GE: case GEU:
7699 ix86_compare_op0 = hi[0];
7700 ix86_compare_op1 = hi[1];
7701 ix86_expand_branch (code, label);
7702 return;
7703 default:
7704 break;
7705 }
e075ae69 7706
3a3677ff 7707 /* Otherwise, we need two or three jumps. */
e075ae69 7708
3a3677ff 7709 label2 = gen_label_rtx ();
e075ae69 7710
3a3677ff
RH
7711 code1 = code;
7712 code2 = swap_condition (code);
7713 code3 = unsigned_condition (code);
e075ae69 7714
3a3677ff
RH
7715 switch (code)
7716 {
7717 case LT: case GT: case LTU: case GTU:
7718 break;
e075ae69 7719
3a3677ff
RH
7720 case LE: code1 = LT; code2 = GT; break;
7721 case GE: code1 = GT; code2 = LT; break;
7722 case LEU: code1 = LTU; code2 = GTU; break;
7723 case GEU: code1 = GTU; code2 = LTU; break;
e075ae69 7724
3a3677ff
RH
7725 case EQ: code1 = NIL; code2 = NE; break;
7726 case NE: code2 = NIL; break;
e075ae69 7727
3a3677ff
RH
7728 default:
7729 abort ();
7730 }
e075ae69 7731
3a3677ff
RH
7732 /*
7733 * a < b =>
7734 * if (hi(a) < hi(b)) goto true;
7735 * if (hi(a) > hi(b)) goto false;
7736 * if (lo(a) < lo(b)) goto true;
7737 * false:
7738 */
7739
7740 ix86_compare_op0 = hi[0];
7741 ix86_compare_op1 = hi[1];
7742
7743 if (code1 != NIL)
7744 ix86_expand_branch (code1, label);
7745 if (code2 != NIL)
7746 ix86_expand_branch (code2, label2);
7747
7748 ix86_compare_op0 = lo[0];
7749 ix86_compare_op1 = lo[1];
7750 ix86_expand_branch (code3, label);
7751
7752 if (code2 != NIL)
7753 emit_label (label2);
7754 return;
7755 }
e075ae69 7756
3a3677ff
RH
7757 default:
7758 abort ();
7759 }
32b5b1aa 7760}
e075ae69 7761
9e7adcb3
JH
7762/* Split branch based on floating point condition. */
7763void
03598dea
JH
7764ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
7765 enum rtx_code code;
7766 rtx op1, op2, target1, target2, tmp;
9e7adcb3
JH
7767{
7768 rtx second, bypass;
7769 rtx label = NULL_RTX;
03598dea 7770 rtx condition;
6b24c259
JH
7771 int bypass_probability = -1, second_probability = -1, probability = -1;
7772 rtx i;
9e7adcb3
JH
7773
7774 if (target2 != pc_rtx)
7775 {
7776 rtx tmp = target2;
7777 code = reverse_condition_maybe_unordered (code);
7778 target2 = target1;
7779 target1 = tmp;
7780 }
7781
7782 condition = ix86_expand_fp_compare (code, op1, op2,
7783 tmp, &second, &bypass);
6b24c259
JH
7784
7785 if (split_branch_probability >= 0)
7786 {
7787 /* Distribute the probabilities across the jumps.
7788 Assume the BYPASS and SECOND to be always test
7789 for UNORDERED. */
7790 probability = split_branch_probability;
7791
d6a7951f 7792 /* Value of 1 is low enough to make no need for probability
6b24c259
JH
7793 to be updated. Later we may run some experiments and see
7794 if unordered values are more frequent in practice. */
7795 if (bypass)
7796 bypass_probability = 1;
7797 if (second)
7798 second_probability = 1;
7799 }
9e7adcb3
JH
7800 if (bypass != NULL_RTX)
7801 {
7802 label = gen_label_rtx ();
6b24c259
JH
7803 i = emit_jump_insn (gen_rtx_SET
7804 (VOIDmode, pc_rtx,
7805 gen_rtx_IF_THEN_ELSE (VOIDmode,
7806 bypass,
7807 gen_rtx_LABEL_REF (VOIDmode,
7808 label),
7809 pc_rtx)));
7810 if (bypass_probability >= 0)
7811 REG_NOTES (i)
7812 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7813 GEN_INT (bypass_probability),
7814 REG_NOTES (i));
7815 }
7816 i = emit_jump_insn (gen_rtx_SET
9e7adcb3
JH
7817 (VOIDmode, pc_rtx,
7818 gen_rtx_IF_THEN_ELSE (VOIDmode,
6b24c259
JH
7819 condition, target1, target2)));
7820 if (probability >= 0)
7821 REG_NOTES (i)
7822 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7823 GEN_INT (probability),
7824 REG_NOTES (i));
7825 if (second != NULL_RTX)
9e7adcb3 7826 {
6b24c259
JH
7827 i = emit_jump_insn (gen_rtx_SET
7828 (VOIDmode, pc_rtx,
7829 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
7830 target2)));
7831 if (second_probability >= 0)
7832 REG_NOTES (i)
7833 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7834 GEN_INT (second_probability),
7835 REG_NOTES (i));
9e7adcb3 7836 }
9e7adcb3
JH
7837 if (label != NULL_RTX)
7838 emit_label (label);
7839}
7840
32b5b1aa 7841int
3a3677ff 7842ix86_expand_setcc (code, dest)
e075ae69 7843 enum rtx_code code;
e075ae69 7844 rtx dest;
32b5b1aa 7845{
a1b8572c
JH
7846 rtx ret, tmp, tmpreg;
7847 rtx second_test, bypass_test;
e075ae69 7848
885a70fd
JH
7849 if (GET_MODE (ix86_compare_op0) == DImode
7850 && !TARGET_64BIT)
e075ae69
RH
7851 return 0; /* FAIL */
7852
b932f770
JH
7853 if (GET_MODE (dest) != QImode)
7854 abort ();
e075ae69 7855
a1b8572c 7856 ret = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
7857 PUT_MODE (ret, QImode);
7858
7859 tmp = dest;
a1b8572c 7860 tmpreg = dest;
32b5b1aa 7861
e075ae69 7862 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
a1b8572c
JH
7863 if (bypass_test || second_test)
7864 {
7865 rtx test = second_test;
7866 int bypass = 0;
7867 rtx tmp2 = gen_reg_rtx (QImode);
7868 if (bypass_test)
7869 {
7870 if (second_test)
b531087a 7871 abort ();
a1b8572c
JH
7872 test = bypass_test;
7873 bypass = 1;
7874 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
7875 }
7876 PUT_MODE (test, QImode);
7877 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
7878
7879 if (bypass)
7880 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
7881 else
7882 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
7883 }
e075ae69 7884
e075ae69 7885 return 1; /* DONE */
32b5b1aa 7886}
e075ae69 7887
32b5b1aa 7888int
e075ae69
RH
7889ix86_expand_int_movcc (operands)
7890 rtx operands[];
32b5b1aa 7891{
e075ae69
RH
7892 enum rtx_code code = GET_CODE (operands[1]), compare_code;
7893 rtx compare_seq, compare_op;
a1b8572c 7894 rtx second_test, bypass_test;
635559ab 7895 enum machine_mode mode = GET_MODE (operands[0]);
32b5b1aa 7896
36583fea
JH
7897 /* When the compare code is not LTU or GEU, we can not use sbbl case.
7898 In case comparsion is done with immediate, we can convert it to LTU or
7899 GEU by altering the integer. */
7900
7901 if ((code == LEU || code == GTU)
7902 && GET_CODE (ix86_compare_op1) == CONST_INT
635559ab 7903 && mode != HImode
b531087a 7904 && (unsigned int) INTVAL (ix86_compare_op1) != 0xffffffff
0f290768 7905 && GET_CODE (operands[2]) == CONST_INT
36583fea
JH
7906 && GET_CODE (operands[3]) == CONST_INT)
7907 {
7908 if (code == LEU)
7909 code = LTU;
7910 else
7911 code = GEU;
7912 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
7913 }
3a3677ff 7914
e075ae69 7915 start_sequence ();
a1b8572c 7916 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
7917 compare_seq = gen_sequence ();
7918 end_sequence ();
7919
7920 compare_code = GET_CODE (compare_op);
7921
7922 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
7923 HImode insns, we'd be swallowed in word prefix ops. */
7924
635559ab
JH
7925 if (mode != HImode
7926 && (mode != DImode || TARGET_64BIT)
0f290768 7927 && GET_CODE (operands[2]) == CONST_INT
e075ae69
RH
7928 && GET_CODE (operands[3]) == CONST_INT)
7929 {
7930 rtx out = operands[0];
7931 HOST_WIDE_INT ct = INTVAL (operands[2]);
7932 HOST_WIDE_INT cf = INTVAL (operands[3]);
7933 HOST_WIDE_INT diff;
7934
a1b8572c
JH
7935 if ((compare_code == LTU || compare_code == GEU)
7936 && !second_test && !bypass_test)
e075ae69 7937 {
e075ae69
RH
7938
7939 /* Detect overlap between destination and compare sources. */
7940 rtx tmp = out;
7941
0f290768 7942 /* To simplify rest of code, restrict to the GEU case. */
36583fea
JH
7943 if (compare_code == LTU)
7944 {
7945 int tmp = ct;
7946 ct = cf;
7947 cf = tmp;
7948 compare_code = reverse_condition (compare_code);
7949 code = reverse_condition (code);
7950 }
7951 diff = ct - cf;
7952
e075ae69 7953 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
a500c31b 7954 || reg_overlap_mentioned_p (out, ix86_compare_op1))
635559ab 7955 tmp = gen_reg_rtx (mode);
e075ae69
RH
7956
7957 emit_insn (compare_seq);
635559ab 7958 if (mode == DImode)
14f73b5a
JH
7959 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
7960 else
7961 emit_insn (gen_x86_movsicc_0_m1 (tmp));
e075ae69 7962
36583fea
JH
7963 if (diff == 1)
7964 {
7965 /*
7966 * cmpl op0,op1
7967 * sbbl dest,dest
7968 * [addl dest, ct]
7969 *
7970 * Size 5 - 8.
7971 */
7972 if (ct)
635559ab
JH
7973 tmp = expand_simple_binop (mode, PLUS,
7974 tmp, GEN_INT (ct),
7975 tmp, 1, OPTAB_DIRECT);
36583fea
JH
7976 }
7977 else if (cf == -1)
7978 {
7979 /*
7980 * cmpl op0,op1
7981 * sbbl dest,dest
7982 * orl $ct, dest
7983 *
7984 * Size 8.
7985 */
635559ab
JH
7986 tmp = expand_simple_binop (mode, IOR,
7987 tmp, GEN_INT (ct),
7988 tmp, 1, OPTAB_DIRECT);
36583fea
JH
7989 }
7990 else if (diff == -1 && ct)
7991 {
7992 /*
7993 * cmpl op0,op1
7994 * sbbl dest,dest
7995 * xorl $-1, dest
7996 * [addl dest, cf]
7997 *
7998 * Size 8 - 11.
7999 */
635559ab
JH
8000 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8001 if (cf)
8002 tmp = expand_simple_binop (mode, PLUS,
8003 tmp, GEN_INT (cf),
8004 tmp, 1, OPTAB_DIRECT);
36583fea
JH
8005 }
8006 else
8007 {
8008 /*
8009 * cmpl op0,op1
8010 * sbbl dest,dest
8011 * andl cf - ct, dest
8012 * [addl dest, ct]
8013 *
8014 * Size 8 - 11.
8015 */
635559ab
JH
8016 tmp = expand_simple_binop (mode, AND,
8017 tmp,
8018 GEN_INT (trunc_int_for_mode
8019 (cf - ct, mode)),
8020 tmp, 1, OPTAB_DIRECT);
8021 if (ct)
8022 tmp = expand_simple_binop (mode, PLUS,
8023 tmp, GEN_INT (ct),
8024 tmp, 1, OPTAB_DIRECT);
36583fea 8025 }
e075ae69
RH
8026
8027 if (tmp != out)
8028 emit_move_insn (out, tmp);
8029
8030 return 1; /* DONE */
8031 }
8032
8033 diff = ct - cf;
8034 if (diff < 0)
8035 {
8036 HOST_WIDE_INT tmp;
8037 tmp = ct, ct = cf, cf = tmp;
8038 diff = -diff;
734dba19
JH
8039 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8040 {
8041 /* We may be reversing unordered compare to normal compare, that
8042 is not valid in general (we may convert non-trapping condition
8043 to trapping one), however on i386 we currently emit all
8044 comparisons unordered. */
8045 compare_code = reverse_condition_maybe_unordered (compare_code);
8046 code = reverse_condition_maybe_unordered (code);
8047 }
8048 else
8049 {
8050 compare_code = reverse_condition (compare_code);
8051 code = reverse_condition (code);
8052 }
e075ae69 8053 }
635559ab
JH
8054 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
8055 || diff == 3 || diff == 5 || diff == 9)
8056 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
e075ae69
RH
8057 {
8058 /*
8059 * xorl dest,dest
8060 * cmpl op1,op2
8061 * setcc dest
8062 * lea cf(dest*(ct-cf)),dest
8063 *
8064 * Size 14.
8065 *
8066 * This also catches the degenerate setcc-only case.
8067 */
8068
8069 rtx tmp;
8070 int nops;
8071
8072 out = emit_store_flag (out, code, ix86_compare_op0,
8073 ix86_compare_op1, VOIDmode, 0, 1);
8074
8075 nops = 0;
885a70fd
JH
8076 /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics
8077 done in proper mode to match. */
e075ae69 8078 if (diff == 1)
14f73b5a 8079 tmp = out;
e075ae69
RH
8080 else
8081 {
885a70fd 8082 rtx out1;
14f73b5a 8083 out1 = out;
635559ab 8084 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
e075ae69
RH
8085 nops++;
8086 if (diff & 1)
8087 {
635559ab 8088 tmp = gen_rtx_PLUS (mode, tmp, out1);
e075ae69
RH
8089 nops++;
8090 }
8091 }
8092 if (cf != 0)
8093 {
635559ab 8094 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
e075ae69
RH
8095 nops++;
8096 }
885a70fd
JH
8097 if (tmp != out
8098 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
e075ae69 8099 {
14f73b5a 8100 if (nops == 1)
e075ae69
RH
8101 {
8102 rtx clob;
8103
8104 clob = gen_rtx_REG (CCmode, FLAGS_REG);
8105 clob = gen_rtx_CLOBBER (VOIDmode, clob);
8106
8107 tmp = gen_rtx_SET (VOIDmode, out, tmp);
8108 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8109 emit_insn (tmp);
8110 }
8111 else
8112 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
8113 }
8114 if (out != operands[0])
8115 emit_move_insn (operands[0], out);
8116
8117 return 1; /* DONE */
8118 }
8119
8120 /*
8121 * General case: Jumpful:
8122 * xorl dest,dest cmpl op1, op2
8123 * cmpl op1, op2 movl ct, dest
8124 * setcc dest jcc 1f
8125 * decl dest movl cf, dest
8126 * andl (cf-ct),dest 1:
8127 * addl ct,dest
0f290768 8128 *
e075ae69
RH
8129 * Size 20. Size 14.
8130 *
8131 * This is reasonably steep, but branch mispredict costs are
8132 * high on modern cpus, so consider failing only if optimizing
8133 * for space.
8134 *
8135 * %%% Parameterize branch_cost on the tuning architecture, then
8136 * use that. The 80386 couldn't care less about mispredicts.
8137 */
8138
8139 if (!optimize_size && !TARGET_CMOVE)
8140 {
8141 if (ct == 0)
8142 {
8143 ct = cf;
8144 cf = 0;
734dba19
JH
8145 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8146 {
8147 /* We may be reversing unordered compare to normal compare,
8148 that is not valid in general (we may convert non-trapping
8149 condition to trapping one), however on i386 we currently
8150 emit all comparisons unordered. */
8151 compare_code = reverse_condition_maybe_unordered (compare_code);
8152 code = reverse_condition_maybe_unordered (code);
8153 }
8154 else
8155 {
8156 compare_code = reverse_condition (compare_code);
8157 code = reverse_condition (code);
8158 }
e075ae69
RH
8159 }
8160
8161 out = emit_store_flag (out, code, ix86_compare_op0,
8162 ix86_compare_op1, VOIDmode, 0, 1);
8163
635559ab
JH
8164 out = expand_simple_binop (mode, PLUS,
8165 out, constm1_rtx,
8166 out, 1, OPTAB_DIRECT);
8167 out = expand_simple_binop (mode, AND,
8168 out,
8169 GEN_INT (trunc_int_for_mode
8170 (cf - ct, mode)),
8171 out, 1, OPTAB_DIRECT);
8172 out = expand_simple_binop (mode, PLUS,
8173 out, GEN_INT (ct),
8174 out, 1, OPTAB_DIRECT);
e075ae69
RH
8175 if (out != operands[0])
8176 emit_move_insn (operands[0], out);
8177
8178 return 1; /* DONE */
8179 }
8180 }
8181
8182 if (!TARGET_CMOVE)
8183 {
8184 /* Try a few things more with specific constants and a variable. */
8185
78a0d70c 8186 optab op;
e075ae69
RH
8187 rtx var, orig_out, out, tmp;
8188
8189 if (optimize_size)
8190 return 0; /* FAIL */
8191
0f290768 8192 /* If one of the two operands is an interesting constant, load a
e075ae69 8193 constant with the above and mask it in with a logical operation. */
0f290768 8194
e075ae69
RH
8195 if (GET_CODE (operands[2]) == CONST_INT)
8196 {
8197 var = operands[3];
8198 if (INTVAL (operands[2]) == 0)
8199 operands[3] = constm1_rtx, op = and_optab;
8200 else if (INTVAL (operands[2]) == -1)
8201 operands[3] = const0_rtx, op = ior_optab;
78a0d70c
ZW
8202 else
8203 return 0; /* FAIL */
e075ae69
RH
8204 }
8205 else if (GET_CODE (operands[3]) == CONST_INT)
8206 {
8207 var = operands[2];
8208 if (INTVAL (operands[3]) == 0)
8209 operands[2] = constm1_rtx, op = and_optab;
8210 else if (INTVAL (operands[3]) == -1)
8211 operands[2] = const0_rtx, op = ior_optab;
78a0d70c
ZW
8212 else
8213 return 0; /* FAIL */
e075ae69 8214 }
78a0d70c 8215 else
e075ae69
RH
8216 return 0; /* FAIL */
8217
8218 orig_out = operands[0];
635559ab 8219 tmp = gen_reg_rtx (mode);
e075ae69
RH
8220 operands[0] = tmp;
8221
8222 /* Recurse to get the constant loaded. */
8223 if (ix86_expand_int_movcc (operands) == 0)
8224 return 0; /* FAIL */
8225
8226 /* Mask in the interesting variable. */
635559ab 8227 out = expand_binop (mode, op, var, tmp, orig_out, 0,
e075ae69
RH
8228 OPTAB_WIDEN);
8229 if (out != orig_out)
8230 emit_move_insn (orig_out, out);
8231
8232 return 1; /* DONE */
8233 }
8234
8235 /*
8236 * For comparison with above,
8237 *
8238 * movl cf,dest
8239 * movl ct,tmp
8240 * cmpl op1,op2
8241 * cmovcc tmp,dest
8242 *
8243 * Size 15.
8244 */
8245
635559ab
JH
8246 if (! nonimmediate_operand (operands[2], mode))
8247 operands[2] = force_reg (mode, operands[2]);
8248 if (! nonimmediate_operand (operands[3], mode))
8249 operands[3] = force_reg (mode, operands[3]);
e075ae69 8250
a1b8572c
JH
8251 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
8252 {
635559ab 8253 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
8254 emit_move_insn (tmp, operands[3]);
8255 operands[3] = tmp;
8256 }
8257 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
8258 {
635559ab 8259 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
8260 emit_move_insn (tmp, operands[2]);
8261 operands[2] = tmp;
8262 }
c9682caf
JH
8263 if (! register_operand (operands[2], VOIDmode)
8264 && ! register_operand (operands[3], VOIDmode))
635559ab 8265 operands[2] = force_reg (mode, operands[2]);
a1b8572c 8266
e075ae69
RH
8267 emit_insn (compare_seq);
8268 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 8269 gen_rtx_IF_THEN_ELSE (mode,
e075ae69
RH
8270 compare_op, operands[2],
8271 operands[3])));
a1b8572c
JH
8272 if (bypass_test)
8273 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 8274 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c
JH
8275 bypass_test,
8276 operands[3],
8277 operands[0])));
8278 if (second_test)
8279 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 8280 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c
JH
8281 second_test,
8282 operands[2],
8283 operands[0])));
e075ae69
RH
8284
8285 return 1; /* DONE */
e9a25f70 8286}
e075ae69 8287
32b5b1aa 8288int
e075ae69
RH
8289ix86_expand_fp_movcc (operands)
8290 rtx operands[];
32b5b1aa 8291{
e075ae69 8292 enum rtx_code code;
e075ae69 8293 rtx tmp;
a1b8572c 8294 rtx compare_op, second_test, bypass_test;
32b5b1aa 8295
0073023d
JH
8296 /* For SF/DFmode conditional moves based on comparisons
8297 in same mode, we may want to use SSE min/max instructions. */
965f5423
JH
8298 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
8299 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
0073023d 8300 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
fa9f36a1
JH
8301 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
8302 && (!TARGET_IEEE_FP
8303 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
0073023d
JH
8304 /* We may be called from the post-reload splitter. */
8305 && (!REG_P (operands[0])
8306 || SSE_REG_P (operands[0])
52a661a6 8307 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
0073023d
JH
8308 {
8309 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
8310 code = GET_CODE (operands[1]);
8311
8312 /* See if we have (cross) match between comparison operands and
8313 conditional move operands. */
8314 if (rtx_equal_p (operands[2], op1))
8315 {
8316 rtx tmp = op0;
8317 op0 = op1;
8318 op1 = tmp;
8319 code = reverse_condition_maybe_unordered (code);
8320 }
8321 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
8322 {
8323 /* Check for min operation. */
8324 if (code == LT)
8325 {
8326 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
8327 if (memory_operand (op0, VOIDmode))
8328 op0 = force_reg (GET_MODE (operands[0]), op0);
8329 if (GET_MODE (operands[0]) == SFmode)
8330 emit_insn (gen_minsf3 (operands[0], op0, op1));
8331 else
8332 emit_insn (gen_mindf3 (operands[0], op0, op1));
8333 return 1;
8334 }
8335 /* Check for max operation. */
8336 if (code == GT)
8337 {
8338 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
8339 if (memory_operand (op0, VOIDmode))
8340 op0 = force_reg (GET_MODE (operands[0]), op0);
8341 if (GET_MODE (operands[0]) == SFmode)
8342 emit_insn (gen_maxsf3 (operands[0], op0, op1));
8343 else
8344 emit_insn (gen_maxdf3 (operands[0], op0, op1));
8345 return 1;
8346 }
8347 }
8348 /* Manage condition to be sse_comparison_operator. In case we are
8349 in non-ieee mode, try to canonicalize the destination operand
8350 to be first in the comparison - this helps reload to avoid extra
8351 moves. */
8352 if (!sse_comparison_operator (operands[1], VOIDmode)
8353 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
8354 {
8355 rtx tmp = ix86_compare_op0;
8356 ix86_compare_op0 = ix86_compare_op1;
8357 ix86_compare_op1 = tmp;
8358 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
8359 VOIDmode, ix86_compare_op0,
8360 ix86_compare_op1);
8361 }
8362 /* Similary try to manage result to be first operand of conditional
fa9f36a1
JH
8363 move. We also don't support the NE comparison on SSE, so try to
8364 avoid it. */
037f20f1
JH
8365 if ((rtx_equal_p (operands[0], operands[3])
8366 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
8367 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
0073023d
JH
8368 {
8369 rtx tmp = operands[2];
8370 operands[2] = operands[3];
92d0fb09 8371 operands[3] = tmp;
0073023d
JH
8372 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
8373 (GET_CODE (operands[1])),
8374 VOIDmode, ix86_compare_op0,
8375 ix86_compare_op1);
8376 }
8377 if (GET_MODE (operands[0]) == SFmode)
8378 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
8379 operands[2], operands[3],
8380 ix86_compare_op0, ix86_compare_op1));
8381 else
8382 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
8383 operands[2], operands[3],
8384 ix86_compare_op0, ix86_compare_op1));
8385 return 1;
8386 }
8387
e075ae69 8388 /* The floating point conditional move instructions don't directly
0f290768 8389 support conditions resulting from a signed integer comparison. */
32b5b1aa 8390
e075ae69 8391 code = GET_CODE (operands[1]);
a1b8572c 8392 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9e7adcb3
JH
8393
8394 /* The floating point conditional move instructions don't directly
8395 support signed integer comparisons. */
8396
a1b8572c 8397 if (!fcmov_comparison_operator (compare_op, VOIDmode))
e075ae69 8398 {
a1b8572c 8399 if (second_test != NULL || bypass_test != NULL)
b531087a 8400 abort ();
e075ae69 8401 tmp = gen_reg_rtx (QImode);
3a3677ff 8402 ix86_expand_setcc (code, tmp);
e075ae69
RH
8403 code = NE;
8404 ix86_compare_op0 = tmp;
8405 ix86_compare_op1 = const0_rtx;
a1b8572c
JH
8406 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8407 }
8408 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
8409 {
8410 tmp = gen_reg_rtx (GET_MODE (operands[0]));
8411 emit_move_insn (tmp, operands[3]);
8412 operands[3] = tmp;
8413 }
8414 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
8415 {
8416 tmp = gen_reg_rtx (GET_MODE (operands[0]));
8417 emit_move_insn (tmp, operands[2]);
8418 operands[2] = tmp;
e075ae69 8419 }
e9a25f70 8420
e075ae69
RH
8421 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8422 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9e7adcb3 8423 compare_op,
e075ae69
RH
8424 operands[2],
8425 operands[3])));
a1b8572c
JH
8426 if (bypass_test)
8427 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8428 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8429 bypass_test,
8430 operands[3],
8431 operands[0])));
8432 if (second_test)
8433 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8434 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8435 second_test,
8436 operands[2],
8437 operands[0])));
32b5b1aa 8438
e075ae69 8439 return 1;
32b5b1aa
SC
8440}
8441
2450a057
JH
8442/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
8443 works for floating pointer parameters and nonoffsetable memories.
8444 For pushes, it returns just stack offsets; the values will be saved
8445 in the right order. Maximally three parts are generated. */
8446
2b589241 8447static int
2450a057
JH
8448ix86_split_to_parts (operand, parts, mode)
8449 rtx operand;
8450 rtx *parts;
8451 enum machine_mode mode;
32b5b1aa 8452{
26e5b205
JH
8453 int size;
8454
8455 if (!TARGET_64BIT)
8456 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
8457 else
8458 size = (GET_MODE_SIZE (mode) + 4) / 8;
2450a057 8459
a7180f70
BS
8460 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
8461 abort ();
2450a057
JH
8462 if (size < 2 || size > 3)
8463 abort ();
8464
d7a29404
JH
8465 /* Optimize constant pool reference to immediates. This is used by fp moves,
8466 that force all constants to memory to allow combining. */
8467
8468 if (GET_CODE (operand) == MEM
8469 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
8470 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
8471 operand = get_pool_constant (XEXP (operand, 0));
8472
2450a057 8473 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
e075ae69 8474 {
2450a057
JH
8475 /* The only non-offsetable memories we handle are pushes. */
8476 if (! push_operand (operand, VOIDmode))
8477 abort ();
8478
26e5b205
JH
8479 operand = copy_rtx (operand);
8480 PUT_MODE (operand, Pmode);
2450a057
JH
8481 parts[0] = parts[1] = parts[2] = operand;
8482 }
26e5b205 8483 else if (!TARGET_64BIT)
2450a057
JH
8484 {
8485 if (mode == DImode)
8486 split_di (&operand, 1, &parts[0], &parts[1]);
8487 else
e075ae69 8488 {
2450a057
JH
8489 if (REG_P (operand))
8490 {
8491 if (!reload_completed)
8492 abort ();
8493 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
8494 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
8495 if (size == 3)
8496 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
8497 }
8498 else if (offsettable_memref_p (operand))
8499 {
f4ef873c 8500 operand = adjust_address (operand, SImode, 0);
2450a057 8501 parts[0] = operand;
b72f00af 8502 parts[1] = adjust_address (operand, SImode, 4);
2450a057 8503 if (size == 3)
b72f00af 8504 parts[2] = adjust_address (operand, SImode, 8);
2450a057
JH
8505 }
8506 else if (GET_CODE (operand) == CONST_DOUBLE)
8507 {
8508 REAL_VALUE_TYPE r;
2b589241 8509 long l[4];
2450a057
JH
8510
8511 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
8512 switch (mode)
8513 {
8514 case XFmode:
2b589241 8515 case TFmode:
2450a057 8516 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
523fbd9d 8517 parts[2] = GEN_INT (trunc_int_for_mode (l[2], SImode));
2450a057
JH
8518 break;
8519 case DFmode:
8520 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
8521 break;
8522 default:
8523 abort ();
8524 }
523fbd9d
RK
8525 parts[1] = GEN_INT (trunc_int_for_mode (l[1], SImode));
8526 parts[0] = GEN_INT (trunc_int_for_mode (l[0], SImode));
2450a057
JH
8527 }
8528 else
8529 abort ();
e075ae69 8530 }
2450a057 8531 }
26e5b205
JH
8532 else
8533 {
44cf5b6a
JH
8534 if (mode == TImode)
8535 split_ti (&operand, 1, &parts[0], &parts[1]);
26e5b205
JH
8536 if (mode == XFmode || mode == TFmode)
8537 {
8538 if (REG_P (operand))
8539 {
8540 if (!reload_completed)
8541 abort ();
8542 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
8543 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
8544 }
8545 else if (offsettable_memref_p (operand))
8546 {
b72f00af 8547 operand = adjust_address (operand, DImode, 0);
26e5b205 8548 parts[0] = operand;
b72f00af 8549 parts[1] = adjust_address (operand, SImode, 8);
26e5b205
JH
8550 }
8551 else if (GET_CODE (operand) == CONST_DOUBLE)
8552 {
8553 REAL_VALUE_TYPE r;
8554 long l[3];
8555
8556 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
8557 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
8558 /* Do not use shift by 32 to avoid warning on 32bit systems. */
8559 if (HOST_BITS_PER_WIDE_INT >= 64)
523fbd9d 8560 parts[0]
44cf5b6a
JH
8561 = GEN_INT (trunc_int_for_mode
8562 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
b531087a 8563 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
44cf5b6a 8564 DImode));
26e5b205
JH
8565 else
8566 parts[0] = immed_double_const (l[0], l[1], DImode);
523fbd9d 8567 parts[1] = GEN_INT (trunc_int_for_mode (l[2], SImode));
26e5b205
JH
8568 }
8569 else
8570 abort ();
8571 }
8572 }
2450a057 8573
2b589241 8574 return size;
2450a057
JH
8575}
8576
8577/* Emit insns to perform a move or push of DI, DF, and XF values.
8578 Return false when normal moves are needed; true when all required
8579 insns have been emitted. Operands 2-4 contain the input values
8580 int the correct order; operands 5-7 contain the output values. */
8581
26e5b205
JH
8582void
8583ix86_split_long_move (operands)
8584 rtx operands[];
2450a057
JH
8585{
8586 rtx part[2][3];
26e5b205 8587 int nparts;
2450a057
JH
8588 int push = 0;
8589 int collisions = 0;
26e5b205
JH
8590 enum machine_mode mode = GET_MODE (operands[0]);
8591
8592 /* The DFmode expanders may ask us to move double.
8593 For 64bit target this is single move. By hiding the fact
8594 here we simplify i386.md splitters. */
8595 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
8596 {
8cdfa312
RH
8597 /* Optimize constant pool reference to immediates. This is used by
8598 fp moves, that force all constants to memory to allow combining. */
26e5b205
JH
8599
8600 if (GET_CODE (operands[1]) == MEM
8601 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
8602 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
8603 operands[1] = get_pool_constant (XEXP (operands[1], 0));
8604 if (push_operand (operands[0], VOIDmode))
b47b4f21
JH
8605 {
8606 operands[0] = copy_rtx (operands[0]);
8607 PUT_MODE (operands[0], Pmode);
8608 }
26e5b205
JH
8609 else
8610 operands[0] = gen_lowpart (DImode, operands[0]);
8611 operands[1] = gen_lowpart (DImode, operands[1]);
8612 emit_move_insn (operands[0], operands[1]);
8613 return;
8614 }
2450a057 8615
2450a057
JH
8616 /* The only non-offsettable memory we handle is push. */
8617 if (push_operand (operands[0], VOIDmode))
8618 push = 1;
8619 else if (GET_CODE (operands[0]) == MEM
8620 && ! offsettable_memref_p (operands[0]))
8621 abort ();
8622
26e5b205
JH
8623 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
8624 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
2450a057
JH
8625
8626 /* When emitting push, take care for source operands on the stack. */
8627 if (push && GET_CODE (operands[1]) == MEM
8628 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
8629 {
26e5b205 8630 if (nparts == 3)
886cbb88
JH
8631 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
8632 XEXP (part[1][2], 0));
8633 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
8634 XEXP (part[1][1], 0));
2450a057
JH
8635 }
8636
0f290768 8637 /* We need to do copy in the right order in case an address register
2450a057
JH
8638 of the source overlaps the destination. */
8639 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
8640 {
8641 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
8642 collisions++;
8643 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
8644 collisions++;
26e5b205 8645 if (nparts == 3
2450a057
JH
8646 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
8647 collisions++;
8648
8649 /* Collision in the middle part can be handled by reordering. */
26e5b205 8650 if (collisions == 1 && nparts == 3
2450a057 8651 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
e075ae69 8652 {
2450a057
JH
8653 rtx tmp;
8654 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
8655 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
8656 }
e075ae69 8657
2450a057
JH
8658 /* If there are more collisions, we can't handle it by reordering.
8659 Do an lea to the last part and use only one colliding move. */
8660 else if (collisions > 1)
8661 {
8662 collisions = 1;
26e5b205 8663 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
2450a057 8664 XEXP (part[1][0], 0)));
26e5b205
JH
8665 part[1][0] = change_address (part[1][0],
8666 TARGET_64BIT ? DImode : SImode,
8667 part[0][nparts - 1]);
b72f00af 8668 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
26e5b205 8669 if (nparts == 3)
b72f00af 8670 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
2450a057
JH
8671 }
8672 }
8673
8674 if (push)
8675 {
26e5b205 8676 if (!TARGET_64BIT)
2b589241 8677 {
26e5b205
JH
8678 if (nparts == 3)
8679 {
8680 /* We use only first 12 bytes of TFmode value, but for pushing we
8681 are required to adjust stack as if we were pushing real 16byte
8682 value. */
8683 if (mode == TFmode && !TARGET_64BIT)
8684 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
8685 GEN_INT (-4)));
8686 emit_move_insn (part[0][2], part[1][2]);
8687 }
2b589241 8688 }
26e5b205
JH
8689 else
8690 {
8691 /* In 64bit mode we don't have 32bit push available. In case this is
8692 register, it is OK - we will just use larger counterpart. We also
8693 retype memory - these comes from attempt to avoid REX prefix on
8694 moving of second half of TFmode value. */
8695 if (GET_MODE (part[1][1]) == SImode)
8696 {
8697 if (GET_CODE (part[1][1]) == MEM)
f4ef873c 8698 part[1][1] = adjust_address (part[1][1], DImode, 0);
26e5b205
JH
8699 else if (REG_P (part[1][1]))
8700 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
8701 else
b531087a 8702 abort ();
886cbb88
JH
8703 if (GET_MODE (part[1][0]) == SImode)
8704 part[1][0] = part[1][1];
26e5b205
JH
8705 }
8706 }
8707 emit_move_insn (part[0][1], part[1][1]);
8708 emit_move_insn (part[0][0], part[1][0]);
8709 return;
2450a057
JH
8710 }
8711
8712 /* Choose correct order to not overwrite the source before it is copied. */
8713 if ((REG_P (part[0][0])
8714 && REG_P (part[1][1])
8715 && (REGNO (part[0][0]) == REGNO (part[1][1])
26e5b205 8716 || (nparts == 3
2450a057
JH
8717 && REGNO (part[0][0]) == REGNO (part[1][2]))))
8718 || (collisions > 0
8719 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
8720 {
26e5b205 8721 if (nparts == 3)
2450a057 8722 {
26e5b205
JH
8723 operands[2] = part[0][2];
8724 operands[3] = part[0][1];
8725 operands[4] = part[0][0];
8726 operands[5] = part[1][2];
8727 operands[6] = part[1][1];
8728 operands[7] = part[1][0];
2450a057
JH
8729 }
8730 else
8731 {
26e5b205
JH
8732 operands[2] = part[0][1];
8733 operands[3] = part[0][0];
8734 operands[5] = part[1][1];
8735 operands[6] = part[1][0];
2450a057
JH
8736 }
8737 }
8738 else
8739 {
26e5b205 8740 if (nparts == 3)
2450a057 8741 {
26e5b205
JH
8742 operands[2] = part[0][0];
8743 operands[3] = part[0][1];
8744 operands[4] = part[0][2];
8745 operands[5] = part[1][0];
8746 operands[6] = part[1][1];
8747 operands[7] = part[1][2];
2450a057
JH
8748 }
8749 else
8750 {
26e5b205
JH
8751 operands[2] = part[0][0];
8752 operands[3] = part[0][1];
8753 operands[5] = part[1][0];
8754 operands[6] = part[1][1];
e075ae69
RH
8755 }
8756 }
26e5b205
JH
8757 emit_move_insn (operands[2], operands[5]);
8758 emit_move_insn (operands[3], operands[6]);
8759 if (nparts == 3)
8760 emit_move_insn (operands[4], operands[7]);
32b5b1aa 8761
26e5b205 8762 return;
32b5b1aa 8763}
32b5b1aa 8764
e075ae69
RH
8765void
8766ix86_split_ashldi (operands, scratch)
8767 rtx *operands, scratch;
32b5b1aa 8768{
e075ae69
RH
8769 rtx low[2], high[2];
8770 int count;
b985a30f 8771
e075ae69
RH
8772 if (GET_CODE (operands[2]) == CONST_INT)
8773 {
8774 split_di (operands, 2, low, high);
8775 count = INTVAL (operands[2]) & 63;
32b5b1aa 8776
e075ae69
RH
8777 if (count >= 32)
8778 {
8779 emit_move_insn (high[0], low[1]);
8780 emit_move_insn (low[0], const0_rtx);
b985a30f 8781
e075ae69
RH
8782 if (count > 32)
8783 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
8784 }
8785 else
8786 {
8787 if (!rtx_equal_p (operands[0], operands[1]))
8788 emit_move_insn (operands[0], operands[1]);
8789 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
8790 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
8791 }
8792 }
8793 else
8794 {
8795 if (!rtx_equal_p (operands[0], operands[1]))
8796 emit_move_insn (operands[0], operands[1]);
b985a30f 8797
e075ae69 8798 split_di (operands, 1, low, high);
b985a30f 8799
e075ae69
RH
8800 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
8801 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
32b5b1aa 8802
fe577e58 8803 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 8804 {
fe577e58 8805 if (! no_new_pseudos)
e075ae69
RH
8806 scratch = force_reg (SImode, const0_rtx);
8807 else
8808 emit_move_insn (scratch, const0_rtx);
8809
8810 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
8811 scratch));
8812 }
8813 else
8814 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
8815 }
e9a25f70 8816}
32b5b1aa 8817
e075ae69
RH
8818void
8819ix86_split_ashrdi (operands, scratch)
8820 rtx *operands, scratch;
32b5b1aa 8821{
e075ae69
RH
8822 rtx low[2], high[2];
8823 int count;
32b5b1aa 8824
e075ae69
RH
8825 if (GET_CODE (operands[2]) == CONST_INT)
8826 {
8827 split_di (operands, 2, low, high);
8828 count = INTVAL (operands[2]) & 63;
32b5b1aa 8829
e075ae69
RH
8830 if (count >= 32)
8831 {
8832 emit_move_insn (low[0], high[1]);
32b5b1aa 8833
e075ae69
RH
8834 if (! reload_completed)
8835 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
8836 else
8837 {
8838 emit_move_insn (high[0], low[0]);
8839 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
8840 }
8841
8842 if (count > 32)
8843 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
8844 }
8845 else
8846 {
8847 if (!rtx_equal_p (operands[0], operands[1]))
8848 emit_move_insn (operands[0], operands[1]);
8849 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
8850 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
8851 }
8852 }
8853 else
32b5b1aa 8854 {
e075ae69
RH
8855 if (!rtx_equal_p (operands[0], operands[1]))
8856 emit_move_insn (operands[0], operands[1]);
8857
8858 split_di (operands, 1, low, high);
8859
8860 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
8861 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
8862
fe577e58 8863 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 8864 {
fe577e58 8865 if (! no_new_pseudos)
e075ae69
RH
8866 scratch = gen_reg_rtx (SImode);
8867 emit_move_insn (scratch, high[0]);
8868 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
8869 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
8870 scratch));
8871 }
8872 else
8873 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
32b5b1aa 8874 }
e075ae69 8875}
32b5b1aa 8876
e075ae69
RH
8877void
8878ix86_split_lshrdi (operands, scratch)
8879 rtx *operands, scratch;
8880{
8881 rtx low[2], high[2];
8882 int count;
32b5b1aa 8883
e075ae69 8884 if (GET_CODE (operands[2]) == CONST_INT)
32b5b1aa 8885 {
e075ae69
RH
8886 split_di (operands, 2, low, high);
8887 count = INTVAL (operands[2]) & 63;
8888
8889 if (count >= 32)
c7271385 8890 {
e075ae69
RH
8891 emit_move_insn (low[0], high[1]);
8892 emit_move_insn (high[0], const0_rtx);
32b5b1aa 8893
e075ae69
RH
8894 if (count > 32)
8895 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
8896 }
8897 else
8898 {
8899 if (!rtx_equal_p (operands[0], operands[1]))
8900 emit_move_insn (operands[0], operands[1]);
8901 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
8902 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
8903 }
32b5b1aa 8904 }
e075ae69
RH
8905 else
8906 {
8907 if (!rtx_equal_p (operands[0], operands[1]))
8908 emit_move_insn (operands[0], operands[1]);
32b5b1aa 8909
e075ae69
RH
8910 split_di (operands, 1, low, high);
8911
8912 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
8913 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
8914
8915 /* Heh. By reversing the arguments, we can reuse this pattern. */
fe577e58 8916 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 8917 {
fe577e58 8918 if (! no_new_pseudos)
e075ae69
RH
8919 scratch = force_reg (SImode, const0_rtx);
8920 else
8921 emit_move_insn (scratch, const0_rtx);
8922
8923 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
8924 scratch));
8925 }
8926 else
8927 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
8928 }
32b5b1aa 8929}
3f803cd9 8930
0407c02b 8931/* Helper function for the string operations below. Dest VARIABLE whether
0945b39d
JH
8932 it is aligned to VALUE bytes. If true, jump to the label. */
8933static rtx
8934ix86_expand_aligntest (variable, value)
8935 rtx variable;
8936 int value;
8937{
8938 rtx label = gen_label_rtx ();
8939 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
8940 if (GET_MODE (variable) == DImode)
8941 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
8942 else
8943 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
8944 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
d43e0b7d 8945 1, label);
0945b39d
JH
8946 return label;
8947}
8948
8949/* Adjust COUNTER by the VALUE. */
8950static void
8951ix86_adjust_counter (countreg, value)
8952 rtx countreg;
8953 HOST_WIDE_INT value;
8954{
8955 if (GET_MODE (countreg) == DImode)
8956 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
8957 else
8958 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
8959}
8960
8961/* Zero extend possibly SImode EXP to Pmode register. */
d24b3457 8962rtx
0945b39d
JH
8963ix86_zero_extend_to_Pmode (exp)
8964 rtx exp;
8965{
8966 rtx r;
8967 if (GET_MODE (exp) == VOIDmode)
8968 return force_reg (Pmode, exp);
8969 if (GET_MODE (exp) == Pmode)
8970 return copy_to_mode_reg (Pmode, exp);
8971 r = gen_reg_rtx (Pmode);
8972 emit_insn (gen_zero_extendsidi2 (r, exp));
8973 return r;
8974}
8975
8976/* Expand string move (memcpy) operation. Use i386 string operations when
8977 profitable. expand_clrstr contains similar code. */
8978int
8979ix86_expand_movstr (dst, src, count_exp, align_exp)
8980 rtx dst, src, count_exp, align_exp;
8981{
8982 rtx srcreg, destreg, countreg;
8983 enum machine_mode counter_mode;
8984 HOST_WIDE_INT align = 0;
8985 unsigned HOST_WIDE_INT count = 0;
8986 rtx insns;
8987
8988 start_sequence ();
8989
8990 if (GET_CODE (align_exp) == CONST_INT)
8991 align = INTVAL (align_exp);
8992
5519a4f9 8993 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
8994 if (!TARGET_ALIGN_STRINGOPS)
8995 align = 64;
8996
8997 if (GET_CODE (count_exp) == CONST_INT)
8998 count = INTVAL (count_exp);
8999
9000 /* Figure out proper mode for counter. For 32bits it is always SImode,
9001 for 64bits use SImode when possible, otherwise DImode.
9002 Set count to number of bytes copied when known at compile time. */
9003 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9004 || x86_64_zero_extended_value (count_exp))
9005 counter_mode = SImode;
9006 else
9007 counter_mode = DImode;
9008
9009 if (counter_mode != SImode && counter_mode != DImode)
9010 abort ();
9011
9012 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
9013 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9014
9015 emit_insn (gen_cld ());
9016
9017 /* When optimizing for size emit simple rep ; movsb instruction for
9018 counts not divisible by 4. */
9019
9020 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9021 {
9022 countreg = ix86_zero_extend_to_Pmode (count_exp);
9023 if (TARGET_64BIT)
9024 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
9025 destreg, srcreg, countreg));
9026 else
9027 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
9028 destreg, srcreg, countreg));
9029 }
9030
9031 /* For constant aligned (or small unaligned) copies use rep movsl
9032 followed by code copying the rest. For PentiumPro ensure 8 byte
9033 alignment to allow rep movsl acceleration. */
9034
9035 else if (count != 0
9036 && (align >= 8
9037 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 9038 || optimize_size || count < (unsigned int) 64))
0945b39d
JH
9039 {
9040 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9041 if (count & ~(size - 1))
9042 {
9043 countreg = copy_to_mode_reg (counter_mode,
9044 GEN_INT ((count >> (size == 4 ? 2 : 3))
9045 & (TARGET_64BIT ? -1 : 0x3fffffff)));
9046 countreg = ix86_zero_extend_to_Pmode (countreg);
9047 if (size == 4)
9048 {
9049 if (TARGET_64BIT)
9050 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
9051 destreg, srcreg, countreg));
9052 else
9053 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
9054 destreg, srcreg, countreg));
9055 }
9056 else
9057 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
9058 destreg, srcreg, countreg));
9059 }
9060 if (size == 8 && (count & 0x04))
9061 emit_insn (gen_strmovsi (destreg, srcreg));
9062 if (count & 0x02)
9063 emit_insn (gen_strmovhi (destreg, srcreg));
9064 if (count & 0x01)
9065 emit_insn (gen_strmovqi (destreg, srcreg));
9066 }
9067 /* The generic code based on the glibc implementation:
9068 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
9069 allowing accelerated copying there)
9070 - copy the data using rep movsl
9071 - copy the rest. */
9072 else
9073 {
9074 rtx countreg2;
9075 rtx label = NULL;
9076
9077 /* In case we don't know anything about the alignment, default to
9078 library version, since it is usually equally fast and result in
9079 shorter code. */
9080 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9081 {
9082 end_sequence ();
9083 return 0;
9084 }
9085
9086 if (TARGET_SINGLE_STRINGOP)
9087 emit_insn (gen_cld ());
9088
9089 countreg2 = gen_reg_rtx (Pmode);
9090 countreg = copy_to_mode_reg (counter_mode, count_exp);
9091
9092 /* We don't use loops to align destination and to copy parts smaller
9093 than 4 bytes, because gcc is able to optimize such code better (in
9094 the case the destination or the count really is aligned, gcc is often
9095 able to predict the branches) and also it is friendlier to the
a4f31c00 9096 hardware branch prediction.
0945b39d
JH
9097
9098 Using loops is benefical for generic case, because we can
9099 handle small counts using the loops. Many CPUs (such as Athlon)
9100 have large REP prefix setup costs.
9101
9102 This is quite costy. Maybe we can revisit this decision later or
9103 add some customizability to this code. */
9104
9105 if (count == 0
9106 && align < (TARGET_PENTIUMPRO && (count == 0
b531087a 9107 || count >= (unsigned int) 260)
0945b39d
JH
9108 ? 8 : UNITS_PER_WORD))
9109 {
9110 label = gen_label_rtx ();
9111 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
d43e0b7d 9112 LEU, 0, counter_mode, 1, label);
0945b39d
JH
9113 }
9114 if (align <= 1)
9115 {
9116 rtx label = ix86_expand_aligntest (destreg, 1);
9117 emit_insn (gen_strmovqi (destreg, srcreg));
9118 ix86_adjust_counter (countreg, 1);
9119 emit_label (label);
9120 LABEL_NUSES (label) = 1;
9121 }
9122 if (align <= 2)
9123 {
9124 rtx label = ix86_expand_aligntest (destreg, 2);
9125 emit_insn (gen_strmovhi (destreg, srcreg));
9126 ix86_adjust_counter (countreg, 2);
9127 emit_label (label);
9128 LABEL_NUSES (label) = 1;
9129 }
9130 if (align <= 4
9131 && ((TARGET_PENTIUMPRO && (count == 0
b531087a 9132 || count >= (unsigned int) 260))
0945b39d
JH
9133 || TARGET_64BIT))
9134 {
9135 rtx label = ix86_expand_aligntest (destreg, 4);
9136 emit_insn (gen_strmovsi (destreg, srcreg));
9137 ix86_adjust_counter (countreg, 4);
9138 emit_label (label);
9139 LABEL_NUSES (label) = 1;
9140 }
9141
9142 if (!TARGET_SINGLE_STRINGOP)
9143 emit_insn (gen_cld ());
9144 if (TARGET_64BIT)
9145 {
9146 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9147 GEN_INT (3)));
9148 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
9149 destreg, srcreg, countreg2));
9150 }
9151 else
9152 {
9153 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9154 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
9155 destreg, srcreg, countreg2));
9156 }
9157
9158 if (label)
9159 {
9160 emit_label (label);
9161 LABEL_NUSES (label) = 1;
9162 }
9163 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9164 emit_insn (gen_strmovsi (destreg, srcreg));
9165 if ((align <= 4 || count == 0) && TARGET_64BIT)
9166 {
9167 rtx label = ix86_expand_aligntest (countreg, 4);
9168 emit_insn (gen_strmovsi (destreg, srcreg));
9169 emit_label (label);
9170 LABEL_NUSES (label) = 1;
9171 }
9172 if (align > 2 && count != 0 && (count & 2))
9173 emit_insn (gen_strmovhi (destreg, srcreg));
9174 if (align <= 2 || count == 0)
9175 {
9176 rtx label = ix86_expand_aligntest (countreg, 2);
9177 emit_insn (gen_strmovhi (destreg, srcreg));
9178 emit_label (label);
9179 LABEL_NUSES (label) = 1;
9180 }
9181 if (align > 1 && count != 0 && (count & 1))
9182 emit_insn (gen_strmovqi (destreg, srcreg));
9183 if (align <= 1 || count == 0)
9184 {
9185 rtx label = ix86_expand_aligntest (countreg, 1);
9186 emit_insn (gen_strmovqi (destreg, srcreg));
9187 emit_label (label);
9188 LABEL_NUSES (label) = 1;
9189 }
9190 }
9191
9192 insns = get_insns ();
9193 end_sequence ();
9194
9195 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
9196 emit_insns (insns);
9197 return 1;
9198}
9199
9200/* Expand string clear operation (bzero). Use i386 string operations when
9201 profitable. expand_movstr contains similar code. */
9202int
9203ix86_expand_clrstr (src, count_exp, align_exp)
9204 rtx src, count_exp, align_exp;
9205{
9206 rtx destreg, zeroreg, countreg;
9207 enum machine_mode counter_mode;
9208 HOST_WIDE_INT align = 0;
9209 unsigned HOST_WIDE_INT count = 0;
9210
9211 if (GET_CODE (align_exp) == CONST_INT)
9212 align = INTVAL (align_exp);
9213
5519a4f9 9214 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
9215 if (!TARGET_ALIGN_STRINGOPS)
9216 align = 32;
9217
9218 if (GET_CODE (count_exp) == CONST_INT)
9219 count = INTVAL (count_exp);
9220 /* Figure out proper mode for counter. For 32bits it is always SImode,
9221 for 64bits use SImode when possible, otherwise DImode.
9222 Set count to number of bytes copied when known at compile time. */
9223 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9224 || x86_64_zero_extended_value (count_exp))
9225 counter_mode = SImode;
9226 else
9227 counter_mode = DImode;
9228
9229 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9230
9231 emit_insn (gen_cld ());
9232
9233 /* When optimizing for size emit simple rep ; movsb instruction for
9234 counts not divisible by 4. */
9235
9236 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9237 {
9238 countreg = ix86_zero_extend_to_Pmode (count_exp);
9239 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
9240 if (TARGET_64BIT)
9241 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
9242 destreg, countreg));
9243 else
9244 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
9245 destreg, countreg));
9246 }
9247 else if (count != 0
9248 && (align >= 8
9249 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 9250 || optimize_size || count < (unsigned int) 64))
0945b39d
JH
9251 {
9252 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9253 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
9254 if (count & ~(size - 1))
9255 {
9256 countreg = copy_to_mode_reg (counter_mode,
9257 GEN_INT ((count >> (size == 4 ? 2 : 3))
9258 & (TARGET_64BIT ? -1 : 0x3fffffff)));
9259 countreg = ix86_zero_extend_to_Pmode (countreg);
9260 if (size == 4)
9261 {
9262 if (TARGET_64BIT)
9263 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
9264 destreg, countreg));
9265 else
9266 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
9267 destreg, countreg));
9268 }
9269 else
9270 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
9271 destreg, countreg));
9272 }
9273 if (size == 8 && (count & 0x04))
9274 emit_insn (gen_strsetsi (destreg,
9275 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9276 if (count & 0x02)
9277 emit_insn (gen_strsethi (destreg,
9278 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9279 if (count & 0x01)
9280 emit_insn (gen_strsetqi (destreg,
9281 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9282 }
9283 else
9284 {
9285 rtx countreg2;
9286 rtx label = NULL;
9287
9288 /* In case we don't know anything about the alignment, default to
9289 library version, since it is usually equally fast and result in
9290 shorter code. */
9291 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9292 return 0;
9293
9294 if (TARGET_SINGLE_STRINGOP)
9295 emit_insn (gen_cld ());
9296
9297 countreg2 = gen_reg_rtx (Pmode);
9298 countreg = copy_to_mode_reg (counter_mode, count_exp);
9299 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
9300
9301 if (count == 0
9302 && align < (TARGET_PENTIUMPRO && (count == 0
b531087a 9303 || count >= (unsigned int) 260)
0945b39d
JH
9304 ? 8 : UNITS_PER_WORD))
9305 {
9306 label = gen_label_rtx ();
9307 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
d43e0b7d 9308 LEU, 0, counter_mode, 1, label);
0945b39d
JH
9309 }
9310 if (align <= 1)
9311 {
9312 rtx label = ix86_expand_aligntest (destreg, 1);
9313 emit_insn (gen_strsetqi (destreg,
9314 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9315 ix86_adjust_counter (countreg, 1);
9316 emit_label (label);
9317 LABEL_NUSES (label) = 1;
9318 }
9319 if (align <= 2)
9320 {
9321 rtx label = ix86_expand_aligntest (destreg, 2);
9322 emit_insn (gen_strsethi (destreg,
9323 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9324 ix86_adjust_counter (countreg, 2);
9325 emit_label (label);
9326 LABEL_NUSES (label) = 1;
9327 }
9328 if (align <= 4 && TARGET_PENTIUMPRO && (count == 0
b531087a 9329 || count >= (unsigned int) 260))
0945b39d
JH
9330 {
9331 rtx label = ix86_expand_aligntest (destreg, 4);
9332 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
9333 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
9334 : zeroreg)));
9335 ix86_adjust_counter (countreg, 4);
9336 emit_label (label);
9337 LABEL_NUSES (label) = 1;
9338 }
9339
9340 if (!TARGET_SINGLE_STRINGOP)
9341 emit_insn (gen_cld ());
9342 if (TARGET_64BIT)
9343 {
9344 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9345 GEN_INT (3)));
9346 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
9347 destreg, countreg2));
9348 }
9349 else
9350 {
9351 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9352 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
9353 destreg, countreg2));
9354 }
9355
9356 if (label)
9357 {
9358 emit_label (label);
9359 LABEL_NUSES (label) = 1;
9360 }
9361 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9362 emit_insn (gen_strsetsi (destreg,
9363 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9364 if (TARGET_64BIT && (align <= 4 || count == 0))
9365 {
9366 rtx label = ix86_expand_aligntest (destreg, 2);
9367 emit_insn (gen_strsetsi (destreg,
9368 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9369 emit_label (label);
9370 LABEL_NUSES (label) = 1;
9371 }
9372 if (align > 2 && count != 0 && (count & 2))
9373 emit_insn (gen_strsethi (destreg,
9374 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9375 if (align <= 2 || count == 0)
9376 {
9377 rtx label = ix86_expand_aligntest (destreg, 2);
9378 emit_insn (gen_strsethi (destreg,
9379 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9380 emit_label (label);
9381 LABEL_NUSES (label) = 1;
9382 }
9383 if (align > 1 && count != 0 && (count & 1))
9384 emit_insn (gen_strsetqi (destreg,
9385 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9386 if (align <= 1 || count == 0)
9387 {
9388 rtx label = ix86_expand_aligntest (destreg, 1);
9389 emit_insn (gen_strsetqi (destreg,
9390 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9391 emit_label (label);
9392 LABEL_NUSES (label) = 1;
9393 }
9394 }
9395 return 1;
9396}
9397/* Expand strlen. */
9398int
9399ix86_expand_strlen (out, src, eoschar, align)
9400 rtx out, src, eoschar, align;
9401{
9402 rtx addr, scratch1, scratch2, scratch3, scratch4;
9403
9404 /* The generic case of strlen expander is long. Avoid it's
9405 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
9406
9407 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
9408 && !TARGET_INLINE_ALL_STRINGOPS
9409 && !optimize_size
9410 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
9411 return 0;
9412
9413 addr = force_reg (Pmode, XEXP (src, 0));
9414 scratch1 = gen_reg_rtx (Pmode);
9415
9416 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
9417 && !optimize_size)
9418 {
9419 /* Well it seems that some optimizer does not combine a call like
9420 foo(strlen(bar), strlen(bar));
9421 when the move and the subtraction is done here. It does calculate
9422 the length just once when these instructions are done inside of
9423 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
9424 often used and I use one fewer register for the lifetime of
9425 output_strlen_unroll() this is better. */
9426
9427 emit_move_insn (out, addr);
9428
9429 ix86_expand_strlensi_unroll_1 (out, align);
9430
9431 /* strlensi_unroll_1 returns the address of the zero at the end of
9432 the string, like memchr(), so compute the length by subtracting
9433 the start address. */
9434 if (TARGET_64BIT)
9435 emit_insn (gen_subdi3 (out, out, addr));
9436 else
9437 emit_insn (gen_subsi3 (out, out, addr));
9438 }
9439 else
9440 {
9441 scratch2 = gen_reg_rtx (Pmode);
9442 scratch3 = gen_reg_rtx (Pmode);
9443 scratch4 = force_reg (Pmode, constm1_rtx);
9444
9445 emit_move_insn (scratch3, addr);
9446 eoschar = force_reg (QImode, eoschar);
9447
9448 emit_insn (gen_cld ());
9449 if (TARGET_64BIT)
9450 {
9451 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
9452 align, scratch4, scratch3));
9453 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
9454 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
9455 }
9456 else
9457 {
9458 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
9459 align, scratch4, scratch3));
9460 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
9461 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
9462 }
9463 }
9464 return 1;
9465}
9466
e075ae69
RH
9467/* Expand the appropriate insns for doing strlen if not just doing
9468 repnz; scasb
9469
9470 out = result, initialized with the start address
9471 align_rtx = alignment of the address.
9472 scratch = scratch register, initialized with the startaddress when
77ebd435 9473 not aligned, otherwise undefined
3f803cd9
SC
9474
9475 This is just the body. It needs the initialisations mentioned above and
9476 some address computing at the end. These things are done in i386.md. */
9477
0945b39d
JH
9478static void
9479ix86_expand_strlensi_unroll_1 (out, align_rtx)
9480 rtx out, align_rtx;
3f803cd9 9481{
e075ae69
RH
9482 int align;
9483 rtx tmp;
9484 rtx align_2_label = NULL_RTX;
9485 rtx align_3_label = NULL_RTX;
9486 rtx align_4_label = gen_label_rtx ();
9487 rtx end_0_label = gen_label_rtx ();
e075ae69 9488 rtx mem;
e2e52e1b 9489 rtx tmpreg = gen_reg_rtx (SImode);
0945b39d 9490 rtx scratch = gen_reg_rtx (SImode);
e075ae69
RH
9491
9492 align = 0;
9493 if (GET_CODE (align_rtx) == CONST_INT)
9494 align = INTVAL (align_rtx);
3f803cd9 9495
e9a25f70 9496 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
3f803cd9 9497
e9a25f70 9498 /* Is there a known alignment and is it less than 4? */
e075ae69 9499 if (align < 4)
3f803cd9 9500 {
0945b39d
JH
9501 rtx scratch1 = gen_reg_rtx (Pmode);
9502 emit_move_insn (scratch1, out);
e9a25f70 9503 /* Is there a known alignment and is it not 2? */
e075ae69 9504 if (align != 2)
3f803cd9 9505 {
e075ae69
RH
9506 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
9507 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
9508
9509 /* Leave just the 3 lower bits. */
0945b39d 9510 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
e075ae69
RH
9511 NULL_RTX, 0, OPTAB_WIDEN);
9512
9076b9c1 9513 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 9514 Pmode, 1, align_4_label);
9076b9c1 9515 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
d43e0b7d 9516 Pmode, 1, align_2_label);
9076b9c1 9517 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
d43e0b7d 9518 Pmode, 1, align_3_label);
3f803cd9
SC
9519 }
9520 else
9521 {
e9a25f70
JL
9522 /* Since the alignment is 2, we have to check 2 or 0 bytes;
9523 check if is aligned to 4 - byte. */
e9a25f70 9524
0945b39d 9525 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
e075ae69
RH
9526 NULL_RTX, 0, OPTAB_WIDEN);
9527
9076b9c1 9528 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 9529 Pmode, 1, align_4_label);
3f803cd9
SC
9530 }
9531
e075ae69 9532 mem = gen_rtx_MEM (QImode, out);
e9a25f70 9533
e075ae69 9534 /* Now compare the bytes. */
e9a25f70 9535
0f290768 9536 /* Compare the first n unaligned byte on a byte per byte basis. */
9076b9c1 9537 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
d43e0b7d 9538 QImode, 1, end_0_label);
3f803cd9 9539
0f290768 9540 /* Increment the address. */
0945b39d
JH
9541 if (TARGET_64BIT)
9542 emit_insn (gen_adddi3 (out, out, const1_rtx));
9543 else
9544 emit_insn (gen_addsi3 (out, out, const1_rtx));
e9a25f70 9545
e075ae69
RH
9546 /* Not needed with an alignment of 2 */
9547 if (align != 2)
9548 {
9549 emit_label (align_2_label);
3f803cd9 9550
d43e0b7d
RK
9551 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
9552 end_0_label);
e075ae69 9553
0945b39d
JH
9554 if (TARGET_64BIT)
9555 emit_insn (gen_adddi3 (out, out, const1_rtx));
9556 else
9557 emit_insn (gen_addsi3 (out, out, const1_rtx));
e075ae69
RH
9558
9559 emit_label (align_3_label);
9560 }
9561
d43e0b7d
RK
9562 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
9563 end_0_label);
e075ae69 9564
0945b39d
JH
9565 if (TARGET_64BIT)
9566 emit_insn (gen_adddi3 (out, out, const1_rtx));
9567 else
9568 emit_insn (gen_addsi3 (out, out, const1_rtx));
3f803cd9
SC
9569 }
9570
e075ae69
RH
9571 /* Generate loop to check 4 bytes at a time. It is not a good idea to
9572 align this loop. It gives only huge programs, but does not help to
9573 speed up. */
9574 emit_label (align_4_label);
3f803cd9 9575
e075ae69
RH
9576 mem = gen_rtx_MEM (SImode, out);
9577 emit_move_insn (scratch, mem);
0945b39d
JH
9578 if (TARGET_64BIT)
9579 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
9580 else
9581 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
e075ae69 9582
e2e52e1b
JH
9583 /* This formula yields a nonzero result iff one of the bytes is zero.
9584 This saves three branches inside loop and many cycles. */
9585
9586 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
9587 emit_insn (gen_one_cmplsi2 (scratch, scratch));
9588 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
7471a1f0
AO
9589 emit_insn (gen_andsi3 (tmpreg, tmpreg,
9590 GEN_INT (trunc_int_for_mode
9591 (0x80808080, SImode))));
d43e0b7d
RK
9592 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
9593 align_4_label);
e2e52e1b
JH
9594
9595 if (TARGET_CMOVE)
9596 {
9597 rtx reg = gen_reg_rtx (SImode);
0945b39d 9598 rtx reg2 = gen_reg_rtx (Pmode);
e2e52e1b
JH
9599 emit_move_insn (reg, tmpreg);
9600 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
9601
0f290768 9602 /* If zero is not in the first two bytes, move two bytes forward. */
16189740 9603 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
9604 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9605 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
9606 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
9607 gen_rtx_IF_THEN_ELSE (SImode, tmp,
77ebd435
AJ
9608 reg,
9609 tmpreg)));
e2e52e1b 9610 /* Emit lea manually to avoid clobbering of flags. */
0945b39d
JH
9611 emit_insn (gen_rtx_SET (SImode, reg2,
9612 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
e2e52e1b
JH
9613
9614 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9615 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
9616 emit_insn (gen_rtx_SET (VOIDmode, out,
0945b39d 9617 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
a4f31c00
AJ
9618 reg2,
9619 out)));
e2e52e1b
JH
9620
9621 }
9622 else
9623 {
9624 rtx end_2_label = gen_label_rtx ();
9625 /* Is zero in the first two bytes? */
9626
16189740 9627 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
9628 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9629 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
9630 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9631 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
9632 pc_rtx);
9633 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9634 JUMP_LABEL (tmp) = end_2_label;
9635
0f290768 9636 /* Not in the first two. Move two bytes forward. */
e2e52e1b 9637 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
0945b39d
JH
9638 if (TARGET_64BIT)
9639 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
9640 else
9641 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
e2e52e1b
JH
9642
9643 emit_label (end_2_label);
9644
9645 }
9646
0f290768 9647 /* Avoid branch in fixing the byte. */
e2e52e1b 9648 tmpreg = gen_lowpart (QImode, tmpreg);
7e08e190 9649 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
0945b39d
JH
9650 if (TARGET_64BIT)
9651 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
9652 else
9653 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
e075ae69
RH
9654
9655 emit_label (end_0_label);
9656}
9657\f
e075ae69
RH
9658/* Clear stack slot assignments remembered from previous functions.
9659 This is called from INIT_EXPANDERS once before RTL is emitted for each
9660 function. */
9661
36edd3cc
BS
9662static void
9663ix86_init_machine_status (p)
1526a060 9664 struct function *p;
e075ae69 9665{
37b15744
RH
9666 p->machine = (struct machine_function *)
9667 xcalloc (1, sizeof (struct machine_function));
e075ae69
RH
9668}
9669
1526a060
BS
9670/* Mark machine specific bits of P for GC. */
9671static void
9672ix86_mark_machine_status (p)
9673 struct function *p;
9674{
37b15744 9675 struct machine_function *machine = p->machine;
1526a060
BS
9676 enum machine_mode mode;
9677 int n;
9678
37b15744
RH
9679 if (! machine)
9680 return;
9681
1526a060
BS
9682 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
9683 mode = (enum machine_mode) ((int) mode + 1))
9684 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
37b15744
RH
9685 ggc_mark_rtx (machine->stack_locals[(int) mode][n]);
9686}
9687
9688static void
9689ix86_free_machine_status (p)
9690 struct function *p;
9691{
9692 free (p->machine);
9693 p->machine = NULL;
1526a060
BS
9694}
9695
e075ae69
RH
9696/* Return a MEM corresponding to a stack slot with mode MODE.
9697 Allocate a new slot if necessary.
9698
9699 The RTL for a function can have several slots available: N is
9700 which slot to use. */
9701
9702rtx
9703assign_386_stack_local (mode, n)
9704 enum machine_mode mode;
9705 int n;
9706{
9707 if (n < 0 || n >= MAX_386_STACK_LOCALS)
9708 abort ();
9709
9710 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
9711 ix86_stack_locals[(int) mode][n]
9712 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
9713
9714 return ix86_stack_locals[(int) mode][n];
9715}
9716\f
9717/* Calculate the length of the memory address in the instruction
9718 encoding. Does not include the one-byte modrm, opcode, or prefix. */
9719
9720static int
9721memory_address_length (addr)
9722 rtx addr;
9723{
9724 struct ix86_address parts;
9725 rtx base, index, disp;
9726 int len;
9727
9728 if (GET_CODE (addr) == PRE_DEC
0d7d98ee
JH
9729 || GET_CODE (addr) == POST_INC
9730 || GET_CODE (addr) == PRE_MODIFY
9731 || GET_CODE (addr) == POST_MODIFY)
e075ae69 9732 return 0;
3f803cd9 9733
e075ae69
RH
9734 if (! ix86_decompose_address (addr, &parts))
9735 abort ();
3f803cd9 9736
e075ae69
RH
9737 base = parts.base;
9738 index = parts.index;
9739 disp = parts.disp;
9740 len = 0;
3f803cd9 9741
e075ae69
RH
9742 /* Register Indirect. */
9743 if (base && !index && !disp)
9744 {
9745 /* Special cases: ebp and esp need the two-byte modrm form. */
9746 if (addr == stack_pointer_rtx
9747 || addr == arg_pointer_rtx
564d80f4
JH
9748 || addr == frame_pointer_rtx
9749 || addr == hard_frame_pointer_rtx)
e075ae69 9750 len = 1;
3f803cd9 9751 }
e9a25f70 9752
e075ae69
RH
9753 /* Direct Addressing. */
9754 else if (disp && !base && !index)
9755 len = 4;
9756
3f803cd9
SC
9757 else
9758 {
e075ae69
RH
9759 /* Find the length of the displacement constant. */
9760 if (disp)
9761 {
9762 if (GET_CODE (disp) == CONST_INT
9763 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
9764 len = 1;
9765 else
9766 len = 4;
9767 }
3f803cd9 9768
e075ae69
RH
9769 /* An index requires the two-byte modrm form. */
9770 if (index)
9771 len += 1;
3f803cd9
SC
9772 }
9773
e075ae69
RH
9774 return len;
9775}
79325812 9776
6ef67412
JH
9777/* Compute default value for "length_immediate" attribute. When SHORTFORM is set
9778 expect that insn have 8bit immediate alternative. */
e075ae69 9779int
6ef67412 9780ix86_attr_length_immediate_default (insn, shortform)
e075ae69 9781 rtx insn;
6ef67412 9782 int shortform;
e075ae69 9783{
6ef67412
JH
9784 int len = 0;
9785 int i;
6c698a6d 9786 extract_insn_cached (insn);
6ef67412
JH
9787 for (i = recog_data.n_operands - 1; i >= 0; --i)
9788 if (CONSTANT_P (recog_data.operand[i]))
3071fab5 9789 {
6ef67412 9790 if (len)
3071fab5 9791 abort ();
6ef67412
JH
9792 if (shortform
9793 && GET_CODE (recog_data.operand[i]) == CONST_INT
9794 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
9795 len = 1;
9796 else
9797 {
9798 switch (get_attr_mode (insn))
9799 {
9800 case MODE_QI:
9801 len+=1;
9802 break;
9803 case MODE_HI:
9804 len+=2;
9805 break;
9806 case MODE_SI:
9807 len+=4;
9808 break;
14f73b5a
JH
9809 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
9810 case MODE_DI:
9811 len+=4;
9812 break;
6ef67412 9813 default:
c725bd79 9814 fatal_insn ("unknown insn mode", insn);
6ef67412
JH
9815 }
9816 }
3071fab5 9817 }
6ef67412
JH
9818 return len;
9819}
9820/* Compute default value for "length_address" attribute. */
9821int
9822ix86_attr_length_address_default (insn)
9823 rtx insn;
9824{
9825 int i;
6c698a6d 9826 extract_insn_cached (insn);
1ccbefce
RH
9827 for (i = recog_data.n_operands - 1; i >= 0; --i)
9828 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 9829 {
6ef67412 9830 return memory_address_length (XEXP (recog_data.operand[i], 0));
e075ae69
RH
9831 break;
9832 }
6ef67412 9833 return 0;
3f803cd9 9834}
e075ae69
RH
9835\f
9836/* Return the maximum number of instructions a cpu can issue. */
b657fc39 9837
c237e94a 9838static int
e075ae69 9839ix86_issue_rate ()
b657fc39 9840{
e075ae69 9841 switch (ix86_cpu)
b657fc39 9842 {
e075ae69
RH
9843 case PROCESSOR_PENTIUM:
9844 case PROCESSOR_K6:
9845 return 2;
79325812 9846
e075ae69 9847 case PROCESSOR_PENTIUMPRO:
b4e89e2d
JH
9848 case PROCESSOR_PENTIUM4:
9849 case PROCESSOR_ATHLON:
e075ae69 9850 return 3;
b657fc39 9851
b657fc39 9852 default:
e075ae69 9853 return 1;
b657fc39 9854 }
b657fc39
L
9855}
9856
e075ae69
RH
9857/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
9858 by DEP_INSN and nothing set by DEP_INSN. */
b657fc39 9859
e075ae69
RH
9860static int
9861ix86_flags_dependant (insn, dep_insn, insn_type)
9862 rtx insn, dep_insn;
9863 enum attr_type insn_type;
9864{
9865 rtx set, set2;
b657fc39 9866
e075ae69
RH
9867 /* Simplify the test for uninteresting insns. */
9868 if (insn_type != TYPE_SETCC
9869 && insn_type != TYPE_ICMOV
9870 && insn_type != TYPE_FCMOV
9871 && insn_type != TYPE_IBR)
9872 return 0;
b657fc39 9873
e075ae69
RH
9874 if ((set = single_set (dep_insn)) != 0)
9875 {
9876 set = SET_DEST (set);
9877 set2 = NULL_RTX;
9878 }
9879 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
9880 && XVECLEN (PATTERN (dep_insn), 0) == 2
9881 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
9882 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
9883 {
9884 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
9885 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
9886 }
78a0d70c
ZW
9887 else
9888 return 0;
b657fc39 9889
78a0d70c
ZW
9890 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
9891 return 0;
b657fc39 9892
f5143c46 9893 /* This test is true if the dependent insn reads the flags but
78a0d70c
ZW
9894 not any other potentially set register. */
9895 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
9896 return 0;
9897
9898 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
9899 return 0;
9900
9901 return 1;
e075ae69 9902}
b657fc39 9903
e075ae69
RH
9904/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
9905 address with operands set by DEP_INSN. */
9906
9907static int
9908ix86_agi_dependant (insn, dep_insn, insn_type)
9909 rtx insn, dep_insn;
9910 enum attr_type insn_type;
9911{
9912 rtx addr;
9913
6ad48e84
JH
9914 if (insn_type == TYPE_LEA
9915 && TARGET_PENTIUM)
5fbdde42
RH
9916 {
9917 addr = PATTERN (insn);
9918 if (GET_CODE (addr) == SET)
9919 ;
9920 else if (GET_CODE (addr) == PARALLEL
9921 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
9922 addr = XVECEXP (addr, 0, 0);
9923 else
9924 abort ();
9925 addr = SET_SRC (addr);
9926 }
e075ae69
RH
9927 else
9928 {
9929 int i;
6c698a6d 9930 extract_insn_cached (insn);
1ccbefce
RH
9931 for (i = recog_data.n_operands - 1; i >= 0; --i)
9932 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 9933 {
1ccbefce 9934 addr = XEXP (recog_data.operand[i], 0);
e075ae69
RH
9935 goto found;
9936 }
9937 return 0;
9938 found:;
b657fc39
L
9939 }
9940
e075ae69 9941 return modified_in_p (addr, dep_insn);
b657fc39 9942}
a269a03c 9943
c237e94a 9944static int
e075ae69 9945ix86_adjust_cost (insn, link, dep_insn, cost)
a269a03c
JC
9946 rtx insn, link, dep_insn;
9947 int cost;
9948{
e075ae69 9949 enum attr_type insn_type, dep_insn_type;
6ad48e84 9950 enum attr_memory memory, dep_memory;
e075ae69 9951 rtx set, set2;
9b00189f 9952 int dep_insn_code_number;
a269a03c 9953
309ada50 9954 /* Anti and output depenancies have zero cost on all CPUs. */
e075ae69 9955 if (REG_NOTE_KIND (link) != 0)
309ada50 9956 return 0;
a269a03c 9957
9b00189f
JH
9958 dep_insn_code_number = recog_memoized (dep_insn);
9959
e075ae69 9960 /* If we can't recognize the insns, we can't really do anything. */
9b00189f 9961 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
e075ae69 9962 return cost;
a269a03c 9963
1c71e60e
JH
9964 insn_type = get_attr_type (insn);
9965 dep_insn_type = get_attr_type (dep_insn);
9b00189f 9966
a269a03c
JC
9967 switch (ix86_cpu)
9968 {
9969 case PROCESSOR_PENTIUM:
e075ae69
RH
9970 /* Address Generation Interlock adds a cycle of latency. */
9971 if (ix86_agi_dependant (insn, dep_insn, insn_type))
9972 cost += 1;
9973
9974 /* ??? Compares pair with jump/setcc. */
9975 if (ix86_flags_dependant (insn, dep_insn, insn_type))
9976 cost = 0;
9977
9978 /* Floating point stores require value to be ready one cycle ealier. */
0f290768 9979 if (insn_type == TYPE_FMOV
e075ae69
RH
9980 && get_attr_memory (insn) == MEMORY_STORE
9981 && !ix86_agi_dependant (insn, dep_insn, insn_type))
9982 cost += 1;
9983 break;
a269a03c 9984
e075ae69 9985 case PROCESSOR_PENTIUMPRO:
6ad48e84
JH
9986 memory = get_attr_memory (insn);
9987 dep_memory = get_attr_memory (dep_insn);
9988
0f290768 9989 /* Since we can't represent delayed latencies of load+operation,
e075ae69
RH
9990 increase the cost here for non-imov insns. */
9991 if (dep_insn_type != TYPE_IMOV
6ad48e84
JH
9992 && dep_insn_type != TYPE_FMOV
9993 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
e075ae69
RH
9994 cost += 1;
9995
9996 /* INT->FP conversion is expensive. */
9997 if (get_attr_fp_int_src (dep_insn))
9998 cost += 5;
9999
10000 /* There is one cycle extra latency between an FP op and a store. */
10001 if (insn_type == TYPE_FMOV
10002 && (set = single_set (dep_insn)) != NULL_RTX
10003 && (set2 = single_set (insn)) != NULL_RTX
10004 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
10005 && GET_CODE (SET_DEST (set2)) == MEM)
10006 cost += 1;
6ad48e84
JH
10007
10008 /* Show ability of reorder buffer to hide latency of load by executing
10009 in parallel with previous instruction in case
10010 previous instruction is not needed to compute the address. */
10011 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10012 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10013 {
10014 /* Claim moves to take one cycle, as core can issue one load
10015 at time and the next load can start cycle later. */
10016 if (dep_insn_type == TYPE_IMOV
10017 || dep_insn_type == TYPE_FMOV)
10018 cost = 1;
10019 else if (cost > 1)
10020 cost--;
10021 }
e075ae69 10022 break;
a269a03c 10023
e075ae69 10024 case PROCESSOR_K6:
6ad48e84
JH
10025 memory = get_attr_memory (insn);
10026 dep_memory = get_attr_memory (dep_insn);
e075ae69
RH
10027 /* The esp dependency is resolved before the instruction is really
10028 finished. */
10029 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
10030 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
10031 return 1;
a269a03c 10032
0f290768 10033 /* Since we can't represent delayed latencies of load+operation,
e075ae69 10034 increase the cost here for non-imov insns. */
6ad48e84 10035 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
e075ae69
RH
10036 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
10037
10038 /* INT->FP conversion is expensive. */
10039 if (get_attr_fp_int_src (dep_insn))
10040 cost += 5;
6ad48e84
JH
10041
10042 /* Show ability of reorder buffer to hide latency of load by executing
10043 in parallel with previous instruction in case
10044 previous instruction is not needed to compute the address. */
10045 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10046 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10047 {
10048 /* Claim moves to take one cycle, as core can issue one load
10049 at time and the next load can start cycle later. */
10050 if (dep_insn_type == TYPE_IMOV
10051 || dep_insn_type == TYPE_FMOV)
10052 cost = 1;
10053 else if (cost > 2)
10054 cost -= 2;
10055 else
10056 cost = 1;
10057 }
a14003ee 10058 break;
e075ae69 10059
309ada50 10060 case PROCESSOR_ATHLON:
6ad48e84
JH
10061 memory = get_attr_memory (insn);
10062 dep_memory = get_attr_memory (dep_insn);
10063
10064 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
0b5107cf
JH
10065 {
10066 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
10067 cost += 2;
10068 else
10069 cost += 3;
10070 }
6ad48e84
JH
10071 /* Show ability of reorder buffer to hide latency of load by executing
10072 in parallel with previous instruction in case
10073 previous instruction is not needed to compute the address. */
10074 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10075 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10076 {
10077 /* Claim moves to take one cycle, as core can issue one load
10078 at time and the next load can start cycle later. */
10079 if (dep_insn_type == TYPE_IMOV
10080 || dep_insn_type == TYPE_FMOV)
10081 cost = 0;
10082 else if (cost >= 3)
10083 cost -= 3;
10084 else
10085 cost = 0;
10086 }
309ada50 10087
a269a03c 10088 default:
a269a03c
JC
10089 break;
10090 }
10091
10092 return cost;
10093}
0a726ef1 10094
e075ae69
RH
10095static union
10096{
10097 struct ppro_sched_data
10098 {
10099 rtx decode[3];
10100 int issued_this_cycle;
10101 } ppro;
10102} ix86_sched_data;
0a726ef1 10103
e075ae69
RH
10104static int
10105ix86_safe_length (insn)
10106 rtx insn;
10107{
10108 if (recog_memoized (insn) >= 0)
b531087a 10109 return get_attr_length (insn);
e075ae69
RH
10110 else
10111 return 128;
10112}
0a726ef1 10113
e075ae69
RH
10114static int
10115ix86_safe_length_prefix (insn)
10116 rtx insn;
10117{
10118 if (recog_memoized (insn) >= 0)
b531087a 10119 return get_attr_length (insn);
e075ae69
RH
10120 else
10121 return 0;
10122}
10123
10124static enum attr_memory
10125ix86_safe_memory (insn)
10126 rtx insn;
10127{
10128 if (recog_memoized (insn) >= 0)
b531087a 10129 return get_attr_memory (insn);
e075ae69
RH
10130 else
10131 return MEMORY_UNKNOWN;
10132}
0a726ef1 10133
e075ae69
RH
10134static enum attr_pent_pair
10135ix86_safe_pent_pair (insn)
10136 rtx insn;
10137{
10138 if (recog_memoized (insn) >= 0)
b531087a 10139 return get_attr_pent_pair (insn);
e075ae69
RH
10140 else
10141 return PENT_PAIR_NP;
10142}
0a726ef1 10143
e075ae69
RH
10144static enum attr_ppro_uops
10145ix86_safe_ppro_uops (insn)
10146 rtx insn;
10147{
10148 if (recog_memoized (insn) >= 0)
10149 return get_attr_ppro_uops (insn);
10150 else
10151 return PPRO_UOPS_MANY;
10152}
0a726ef1 10153
e075ae69
RH
10154static void
10155ix86_dump_ppro_packet (dump)
10156 FILE *dump;
0a726ef1 10157{
e075ae69 10158 if (ix86_sched_data.ppro.decode[0])
0a726ef1 10159 {
e075ae69
RH
10160 fprintf (dump, "PPRO packet: %d",
10161 INSN_UID (ix86_sched_data.ppro.decode[0]));
10162 if (ix86_sched_data.ppro.decode[1])
10163 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
10164 if (ix86_sched_data.ppro.decode[2])
10165 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
10166 fputc ('\n', dump);
10167 }
10168}
0a726ef1 10169
e075ae69 10170/* We're beginning a new block. Initialize data structures as necessary. */
0a726ef1 10171
c237e94a
ZW
10172static void
10173ix86_sched_init (dump, sched_verbose, veclen)
e075ae69
RH
10174 FILE *dump ATTRIBUTE_UNUSED;
10175 int sched_verbose ATTRIBUTE_UNUSED;
c237e94a 10176 int veclen ATTRIBUTE_UNUSED;
e075ae69
RH
10177{
10178 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
10179}
10180
10181/* Shift INSN to SLOT, and shift everything else down. */
10182
10183static void
10184ix86_reorder_insn (insnp, slot)
10185 rtx *insnp, *slot;
10186{
10187 if (insnp != slot)
10188 {
10189 rtx insn = *insnp;
0f290768 10190 do
e075ae69
RH
10191 insnp[0] = insnp[1];
10192 while (++insnp != slot);
10193 *insnp = insn;
0a726ef1 10194 }
e075ae69
RH
10195}
10196
10197/* Find an instruction with given pairability and minimal amount of cycles
10198 lost by the fact that the CPU waits for both pipelines to finish before
10199 reading next instructions. Also take care that both instructions together
10200 can not exceed 7 bytes. */
10201
10202static rtx *
10203ix86_pent_find_pair (e_ready, ready, type, first)
10204 rtx *e_ready;
10205 rtx *ready;
10206 enum attr_pent_pair type;
10207 rtx first;
10208{
10209 int mincycles, cycles;
10210 enum attr_pent_pair tmp;
10211 enum attr_memory memory;
10212 rtx *insnp, *bestinsnp = NULL;
0a726ef1 10213
e075ae69
RH
10214 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
10215 return NULL;
0a726ef1 10216
e075ae69
RH
10217 memory = ix86_safe_memory (first);
10218 cycles = result_ready_cost (first);
10219 mincycles = INT_MAX;
10220
10221 for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
10222 if ((tmp = ix86_safe_pent_pair (*insnp)) == type
10223 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
6ec6d558 10224 {
e075ae69
RH
10225 enum attr_memory second_memory;
10226 int secondcycles, currentcycles;
10227
10228 second_memory = ix86_safe_memory (*insnp);
10229 secondcycles = result_ready_cost (*insnp);
10230 currentcycles = abs (cycles - secondcycles);
10231
10232 if (secondcycles >= 1 && cycles >= 1)
6ec6d558 10233 {
e075ae69
RH
10234 /* Two read/modify/write instructions together takes two
10235 cycles longer. */
10236 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
10237 currentcycles += 2;
0f290768 10238
e075ae69
RH
10239 /* Read modify/write instruction followed by read/modify
10240 takes one cycle longer. */
10241 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
10242 && tmp != PENT_PAIR_UV
10243 && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
10244 currentcycles += 1;
6ec6d558 10245 }
e075ae69
RH
10246 if (currentcycles < mincycles)
10247 bestinsnp = insnp, mincycles = currentcycles;
6ec6d558 10248 }
0a726ef1 10249
e075ae69
RH
10250 return bestinsnp;
10251}
10252
78a0d70c 10253/* Subroutines of ix86_sched_reorder. */
e075ae69 10254
c6991660 10255static void
78a0d70c 10256ix86_sched_reorder_pentium (ready, e_ready)
e075ae69 10257 rtx *ready;
78a0d70c 10258 rtx *e_ready;
e075ae69 10259{
78a0d70c 10260 enum attr_pent_pair pair1, pair2;
e075ae69 10261 rtx *insnp;
e075ae69 10262
78a0d70c
ZW
10263 /* This wouldn't be necessary if Haifa knew that static insn ordering
10264 is important to which pipe an insn is issued to. So we have to make
10265 some minor rearrangements. */
e075ae69 10266
78a0d70c
ZW
10267 pair1 = ix86_safe_pent_pair (*e_ready);
10268
10269 /* If the first insn is non-pairable, let it be. */
10270 if (pair1 == PENT_PAIR_NP)
10271 return;
10272
10273 pair2 = PENT_PAIR_NP;
10274 insnp = 0;
10275
10276 /* If the first insn is UV or PV pairable, search for a PU
10277 insn to go with. */
10278 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
e075ae69 10279 {
78a0d70c
ZW
10280 insnp = ix86_pent_find_pair (e_ready-1, ready,
10281 PENT_PAIR_PU, *e_ready);
10282 if (insnp)
10283 pair2 = PENT_PAIR_PU;
10284 }
e075ae69 10285
78a0d70c
ZW
10286 /* If the first insn is PU or UV pairable, search for a PV
10287 insn to go with. */
10288 if (pair2 == PENT_PAIR_NP
10289 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
10290 {
10291 insnp = ix86_pent_find_pair (e_ready-1, ready,
10292 PENT_PAIR_PV, *e_ready);
10293 if (insnp)
10294 pair2 = PENT_PAIR_PV;
10295 }
e075ae69 10296
78a0d70c
ZW
10297 /* If the first insn is pairable, search for a UV
10298 insn to go with. */
10299 if (pair2 == PENT_PAIR_NP)
10300 {
10301 insnp = ix86_pent_find_pair (e_ready-1, ready,
10302 PENT_PAIR_UV, *e_ready);
10303 if (insnp)
10304 pair2 = PENT_PAIR_UV;
10305 }
e075ae69 10306
78a0d70c
ZW
10307 if (pair2 == PENT_PAIR_NP)
10308 return;
e075ae69 10309
78a0d70c
ZW
10310 /* Found something! Decide if we need to swap the order. */
10311 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
10312 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
10313 && ix86_safe_memory (*e_ready) == MEMORY_BOTH
10314 && ix86_safe_memory (*insnp) == MEMORY_LOAD))
10315 ix86_reorder_insn (insnp, e_ready);
10316 else
10317 ix86_reorder_insn (insnp, e_ready - 1);
10318}
e075ae69 10319
c6991660 10320static void
78a0d70c
ZW
10321ix86_sched_reorder_ppro (ready, e_ready)
10322 rtx *ready;
10323 rtx *e_ready;
10324{
10325 rtx decode[3];
10326 enum attr_ppro_uops cur_uops;
10327 int issued_this_cycle;
10328 rtx *insnp;
10329 int i;
e075ae69 10330
0f290768 10331 /* At this point .ppro.decode contains the state of the three
78a0d70c 10332 decoders from last "cycle". That is, those insns that were
0f290768 10333 actually independent. But here we're scheduling for the
78a0d70c
ZW
10334 decoder, and we may find things that are decodable in the
10335 same cycle. */
e075ae69 10336
0f290768 10337 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
78a0d70c 10338 issued_this_cycle = 0;
e075ae69 10339
78a0d70c
ZW
10340 insnp = e_ready;
10341 cur_uops = ix86_safe_ppro_uops (*insnp);
0a726ef1 10342
78a0d70c
ZW
10343 /* If the decoders are empty, and we've a complex insn at the
10344 head of the priority queue, let it issue without complaint. */
10345 if (decode[0] == NULL)
10346 {
10347 if (cur_uops == PPRO_UOPS_MANY)
10348 {
10349 decode[0] = *insnp;
10350 goto ppro_done;
10351 }
10352
10353 /* Otherwise, search for a 2-4 uop unsn to issue. */
10354 while (cur_uops != PPRO_UOPS_FEW)
10355 {
10356 if (insnp == ready)
10357 break;
10358 cur_uops = ix86_safe_ppro_uops (*--insnp);
10359 }
10360
10361 /* If so, move it to the head of the line. */
10362 if (cur_uops == PPRO_UOPS_FEW)
10363 ix86_reorder_insn (insnp, e_ready);
0a726ef1 10364
78a0d70c
ZW
10365 /* Issue the head of the queue. */
10366 issued_this_cycle = 1;
10367 decode[0] = *e_ready--;
10368 }
fb693d44 10369
78a0d70c
ZW
10370 /* Look for simple insns to fill in the other two slots. */
10371 for (i = 1; i < 3; ++i)
10372 if (decode[i] == NULL)
10373 {
10374 if (ready >= e_ready)
10375 goto ppro_done;
fb693d44 10376
e075ae69
RH
10377 insnp = e_ready;
10378 cur_uops = ix86_safe_ppro_uops (*insnp);
78a0d70c
ZW
10379 while (cur_uops != PPRO_UOPS_ONE)
10380 {
10381 if (insnp == ready)
10382 break;
10383 cur_uops = ix86_safe_ppro_uops (*--insnp);
10384 }
fb693d44 10385
78a0d70c
ZW
10386 /* Found one. Move it to the head of the queue and issue it. */
10387 if (cur_uops == PPRO_UOPS_ONE)
e075ae69 10388 {
78a0d70c
ZW
10389 ix86_reorder_insn (insnp, e_ready);
10390 decode[i] = *e_ready--;
10391 issued_this_cycle++;
10392 continue;
10393 }
fb693d44 10394
78a0d70c
ZW
10395 /* ??? Didn't find one. Ideally, here we would do a lazy split
10396 of 2-uop insns, issue one and queue the other. */
10397 }
fb693d44 10398
78a0d70c
ZW
10399 ppro_done:
10400 if (issued_this_cycle == 0)
10401 issued_this_cycle = 1;
10402 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
10403}
fb693d44 10404
0f290768 10405/* We are about to being issuing insns for this clock cycle.
78a0d70c 10406 Override the default sort algorithm to better slot instructions. */
c237e94a
ZW
10407static int
10408ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
78a0d70c
ZW
10409 FILE *dump ATTRIBUTE_UNUSED;
10410 int sched_verbose ATTRIBUTE_UNUSED;
10411 rtx *ready;
c237e94a 10412 int *n_readyp;
78a0d70c
ZW
10413 int clock_var ATTRIBUTE_UNUSED;
10414{
c237e94a 10415 int n_ready = *n_readyp;
78a0d70c 10416 rtx *e_ready = ready + n_ready - 1;
fb693d44 10417
78a0d70c
ZW
10418 if (n_ready < 2)
10419 goto out;
e075ae69 10420
78a0d70c
ZW
10421 switch (ix86_cpu)
10422 {
10423 default:
10424 break;
e075ae69 10425
78a0d70c
ZW
10426 case PROCESSOR_PENTIUM:
10427 ix86_sched_reorder_pentium (ready, e_ready);
10428 break;
e075ae69 10429
78a0d70c
ZW
10430 case PROCESSOR_PENTIUMPRO:
10431 ix86_sched_reorder_ppro (ready, e_ready);
e075ae69 10432 break;
fb693d44
RH
10433 }
10434
e075ae69
RH
10435out:
10436 return ix86_issue_rate ();
10437}
fb693d44 10438
e075ae69
RH
10439/* We are about to issue INSN. Return the number of insns left on the
10440 ready queue that can be issued this cycle. */
b222082e 10441
c237e94a 10442static int
e075ae69
RH
10443ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
10444 FILE *dump;
10445 int sched_verbose;
10446 rtx insn;
10447 int can_issue_more;
10448{
10449 int i;
10450 switch (ix86_cpu)
fb693d44 10451 {
e075ae69
RH
10452 default:
10453 return can_issue_more - 1;
fb693d44 10454
e075ae69
RH
10455 case PROCESSOR_PENTIUMPRO:
10456 {
10457 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
fb693d44 10458
e075ae69
RH
10459 if (uops == PPRO_UOPS_MANY)
10460 {
10461 if (sched_verbose)
10462 ix86_dump_ppro_packet (dump);
10463 ix86_sched_data.ppro.decode[0] = insn;
10464 ix86_sched_data.ppro.decode[1] = NULL;
10465 ix86_sched_data.ppro.decode[2] = NULL;
10466 if (sched_verbose)
10467 ix86_dump_ppro_packet (dump);
10468 ix86_sched_data.ppro.decode[0] = NULL;
10469 }
10470 else if (uops == PPRO_UOPS_FEW)
10471 {
10472 if (sched_verbose)
10473 ix86_dump_ppro_packet (dump);
10474 ix86_sched_data.ppro.decode[0] = insn;
10475 ix86_sched_data.ppro.decode[1] = NULL;
10476 ix86_sched_data.ppro.decode[2] = NULL;
10477 }
10478 else
10479 {
10480 for (i = 0; i < 3; ++i)
10481 if (ix86_sched_data.ppro.decode[i] == NULL)
10482 {
10483 ix86_sched_data.ppro.decode[i] = insn;
10484 break;
10485 }
10486 if (i == 3)
10487 abort ();
10488 if (i == 2)
10489 {
10490 if (sched_verbose)
10491 ix86_dump_ppro_packet (dump);
10492 ix86_sched_data.ppro.decode[0] = NULL;
10493 ix86_sched_data.ppro.decode[1] = NULL;
10494 ix86_sched_data.ppro.decode[2] = NULL;
10495 }
10496 }
10497 }
10498 return --ix86_sched_data.ppro.issued_this_cycle;
10499 }
fb693d44 10500}
a7180f70 10501\f
0e4970d7
RK
10502/* Walk through INSNS and look for MEM references whose address is DSTREG or
10503 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
10504 appropriate. */
10505
10506void
10507ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
10508 rtx insns;
10509 rtx dstref, srcref, dstreg, srcreg;
10510{
10511 rtx insn;
10512
10513 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
10514 if (INSN_P (insn))
10515 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
10516 dstreg, srcreg);
10517}
10518
10519/* Subroutine of above to actually do the updating by recursively walking
10520 the rtx. */
10521
10522static void
10523ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
10524 rtx x;
10525 rtx dstref, srcref, dstreg, srcreg;
10526{
10527 enum rtx_code code = GET_CODE (x);
10528 const char *format_ptr = GET_RTX_FORMAT (code);
10529 int i, j;
10530
10531 if (code == MEM && XEXP (x, 0) == dstreg)
10532 MEM_COPY_ATTRIBUTES (x, dstref);
10533 else if (code == MEM && XEXP (x, 0) == srcreg)
10534 MEM_COPY_ATTRIBUTES (x, srcref);
10535
10536 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
10537 {
10538 if (*format_ptr == 'e')
10539 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
10540 dstreg, srcreg);
10541 else if (*format_ptr == 'E')
10542 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
397be6cf 10543 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
0e4970d7
RK
10544 dstreg, srcreg);
10545 }
10546}
10547\f
a7180f70
BS
10548/* Compute the alignment given to a constant that is being placed in memory.
10549 EXP is the constant and ALIGN is the alignment that the object would
10550 ordinarily have.
10551 The value of this function is used instead of that alignment to align
10552 the object. */
10553
10554int
10555ix86_constant_alignment (exp, align)
10556 tree exp;
10557 int align;
10558{
10559 if (TREE_CODE (exp) == REAL_CST)
10560 {
10561 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
10562 return 64;
10563 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
10564 return 128;
10565 }
10566 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
10567 && align < 256)
10568 return 256;
10569
10570 return align;
10571}
10572
10573/* Compute the alignment for a static variable.
10574 TYPE is the data type, and ALIGN is the alignment that
10575 the object would ordinarily have. The value of this function is used
10576 instead of that alignment to align the object. */
10577
10578int
10579ix86_data_alignment (type, align)
10580 tree type;
10581 int align;
10582{
10583 if (AGGREGATE_TYPE_P (type)
10584 && TYPE_SIZE (type)
10585 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10586 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
10587 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
10588 return 256;
10589
0d7d98ee
JH
10590 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10591 to 16byte boundary. */
10592 if (TARGET_64BIT)
10593 {
10594 if (AGGREGATE_TYPE_P (type)
10595 && TYPE_SIZE (type)
10596 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10597 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
10598 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
10599 return 128;
10600 }
10601
a7180f70
BS
10602 if (TREE_CODE (type) == ARRAY_TYPE)
10603 {
10604 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
10605 return 64;
10606 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
10607 return 128;
10608 }
10609 else if (TREE_CODE (type) == COMPLEX_TYPE)
10610 {
0f290768 10611
a7180f70
BS
10612 if (TYPE_MODE (type) == DCmode && align < 64)
10613 return 64;
10614 if (TYPE_MODE (type) == XCmode && align < 128)
10615 return 128;
10616 }
10617 else if ((TREE_CODE (type) == RECORD_TYPE
10618 || TREE_CODE (type) == UNION_TYPE
10619 || TREE_CODE (type) == QUAL_UNION_TYPE)
10620 && TYPE_FIELDS (type))
10621 {
10622 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
10623 return 64;
10624 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
10625 return 128;
10626 }
10627 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
10628 || TREE_CODE (type) == INTEGER_TYPE)
10629 {
10630 if (TYPE_MODE (type) == DFmode && align < 64)
10631 return 64;
10632 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
10633 return 128;
10634 }
10635
10636 return align;
10637}
10638
10639/* Compute the alignment for a local variable.
10640 TYPE is the data type, and ALIGN is the alignment that
10641 the object would ordinarily have. The value of this macro is used
10642 instead of that alignment to align the object. */
10643
10644int
10645ix86_local_alignment (type, align)
10646 tree type;
10647 int align;
10648{
0d7d98ee
JH
10649 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10650 to 16byte boundary. */
10651 if (TARGET_64BIT)
10652 {
10653 if (AGGREGATE_TYPE_P (type)
10654 && TYPE_SIZE (type)
10655 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10656 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
10657 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
10658 return 128;
10659 }
a7180f70
BS
10660 if (TREE_CODE (type) == ARRAY_TYPE)
10661 {
10662 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
10663 return 64;
10664 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
10665 return 128;
10666 }
10667 else if (TREE_CODE (type) == COMPLEX_TYPE)
10668 {
10669 if (TYPE_MODE (type) == DCmode && align < 64)
10670 return 64;
10671 if (TYPE_MODE (type) == XCmode && align < 128)
10672 return 128;
10673 }
10674 else if ((TREE_CODE (type) == RECORD_TYPE
10675 || TREE_CODE (type) == UNION_TYPE
10676 || TREE_CODE (type) == QUAL_UNION_TYPE)
10677 && TYPE_FIELDS (type))
10678 {
10679 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
10680 return 64;
10681 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
10682 return 128;
10683 }
10684 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
10685 || TREE_CODE (type) == INTEGER_TYPE)
10686 {
0f290768 10687
a7180f70
BS
10688 if (TYPE_MODE (type) == DFmode && align < 64)
10689 return 64;
10690 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
10691 return 128;
10692 }
10693 return align;
10694}
0ed08620
JH
10695\f
10696/* Emit RTL insns to initialize the variable parts of a trampoline.
10697 FNADDR is an RTX for the address of the function's pure code.
10698 CXT is an RTX for the static chain value for the function. */
10699void
10700x86_initialize_trampoline (tramp, fnaddr, cxt)
10701 rtx tramp, fnaddr, cxt;
10702{
10703 if (!TARGET_64BIT)
10704 {
10705 /* Compute offset from the end of the jmp to the target function. */
10706 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
10707 plus_constant (tramp, 10),
10708 NULL_RTX, 1, OPTAB_DIRECT);
10709 emit_move_insn (gen_rtx_MEM (QImode, tramp),
10710 GEN_INT (trunc_int_for_mode (0xb9, QImode)));
10711 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
10712 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
10713 GEN_INT (trunc_int_for_mode (0xe9, QImode)));
10714 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
10715 }
10716 else
10717 {
10718 int offset = 0;
10719 /* Try to load address using shorter movl instead of movabs.
10720 We may want to support movq for kernel mode, but kernel does not use
10721 trampolines at the moment. */
10722 if (x86_64_zero_extended_value (fnaddr))
10723 {
10724 fnaddr = copy_to_mode_reg (DImode, fnaddr);
10725 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10726 GEN_INT (trunc_int_for_mode (0xbb41, HImode)));
10727 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
10728 gen_lowpart (SImode, fnaddr));
10729 offset += 6;
10730 }
10731 else
10732 {
10733 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10734 GEN_INT (trunc_int_for_mode (0xbb49, HImode)));
10735 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
10736 fnaddr);
10737 offset += 10;
10738 }
10739 /* Load static chain using movabs to r10. */
10740 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10741 GEN_INT (trunc_int_for_mode (0xba49, HImode)));
10742 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
10743 cxt);
10744 offset += 10;
10745 /* Jump to the r11 */
10746 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10747 GEN_INT (trunc_int_for_mode (0xff49, HImode)));
10748 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
44cf5b6a 10749 GEN_INT (trunc_int_for_mode (0xe3, QImode)));
0ed08620
JH
10750 offset += 3;
10751 if (offset > TRAMPOLINE_SIZE)
b531087a 10752 abort ();
0ed08620
JH
10753 }
10754}
eeb06b1b
BS
10755\f
10756#define def_builtin(MASK, NAME, TYPE, CODE) \
10757do { \
10758 if ((MASK) & target_flags) \
10759 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL); \
10760} while (0)
bd793c65 10761
bd793c65
BS
10762struct builtin_description
10763{
8b60264b
KG
10764 const unsigned int mask;
10765 const enum insn_code icode;
10766 const char *const name;
10767 const enum ix86_builtins code;
10768 const enum rtx_code comparison;
10769 const unsigned int flag;
bd793c65
BS
10770};
10771
8b60264b 10772static const struct builtin_description bdesc_comi[] =
bd793c65 10773{
eeb06b1b
BS
10774 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
10775 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
10776 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
10777 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
10778 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
10779 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
10780 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
10781 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
10782 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
10783 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
10784 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
10785 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 }
bd793c65
BS
10786};
10787
8b60264b 10788static const struct builtin_description bdesc_2arg[] =
bd793c65
BS
10789{
10790 /* SSE */
eeb06b1b
BS
10791 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
10792 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
10793 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
10794 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
10795 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
10796 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
10797 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
10798 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
10799
10800 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
10801 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
10802 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
10803 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
10804 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
10805 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
10806 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
10807 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
10808 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
10809 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
10810 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
10811 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
10812 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
10813 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
10814 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
10815 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
10816 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
10817 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
10818 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
10819 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
10820 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
10821 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
10822 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
10823 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
10824
10825 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
10826 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
10827 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
10828 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
10829
eeb06b1b
BS
10830 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
10831 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
10832 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
10833 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
10834 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
bd793c65
BS
10835
10836 /* MMX */
eeb06b1b
BS
10837 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
10838 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
10839 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
10840 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
10841 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
10842 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
10843
10844 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
10845 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
10846 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
10847 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
10848 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
10849 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
10850 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
10851 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
10852
10853 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
10854 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
47f339cf 10855 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
eeb06b1b
BS
10856
10857 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
10858 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
10859 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
10860 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
10861
47f339cf
BS
10862 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
10863 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
eeb06b1b
BS
10864
10865 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
10866 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
10867 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
10868 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
10869 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
10870 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
10871
47f339cf
BS
10872 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
10873 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
10874 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
10875 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
eeb06b1b
BS
10876
10877 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
10878 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
10879 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
10880 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
10881 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
10882 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
bd793c65
BS
10883
10884 /* Special. */
eeb06b1b
BS
10885 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
10886 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
10887 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
10888
10889 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
10890 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
10891
10892 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
10893 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
10894 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
10895 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
10896 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
10897 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
10898
10899 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
10900 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
10901 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
10902 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
10903 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
10904 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
10905
10906 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
10907 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
10908 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
10909 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
10910
38b29e64 10911 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
eeb06b1b 10912 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }
bd793c65
BS
10913
10914};
10915
8b60264b 10916static const struct builtin_description bdesc_1arg[] =
bd793c65 10917{
47f339cf 10918 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
eeb06b1b 10919 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
bd793c65 10920
eeb06b1b
BS
10921 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
10922 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
10923 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
bd793c65 10924
eeb06b1b
BS
10925 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
10926 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
10927 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
10928 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }
bd793c65
BS
10929
10930};
10931
f6155fda
SS
10932void
10933ix86_init_builtins ()
10934{
10935 if (TARGET_MMX)
10936 ix86_init_mmx_sse_builtins ();
10937}
10938
10939/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
bd793c65
BS
10940 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
10941 builtins. */
e37af218 10942static void
f6155fda 10943ix86_init_mmx_sse_builtins ()
bd793c65 10944{
8b60264b 10945 const struct builtin_description * d;
77ebd435 10946 size_t i;
cbd5937a 10947 tree endlink = void_list_node;
bd793c65
BS
10948
10949 tree pchar_type_node = build_pointer_type (char_type_node);
10950 tree pfloat_type_node = build_pointer_type (float_type_node);
10951 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
10952 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
10953
10954 /* Comparisons. */
10955 tree int_ftype_v4sf_v4sf
10956 = build_function_type (integer_type_node,
10957 tree_cons (NULL_TREE, V4SF_type_node,
10958 tree_cons (NULL_TREE,
10959 V4SF_type_node,
10960 endlink)));
10961 tree v4si_ftype_v4sf_v4sf
10962 = build_function_type (V4SI_type_node,
10963 tree_cons (NULL_TREE, V4SF_type_node,
10964 tree_cons (NULL_TREE,
10965 V4SF_type_node,
10966 endlink)));
10967 /* MMX/SSE/integer conversions. */
bd793c65
BS
10968 tree int_ftype_v4sf
10969 = build_function_type (integer_type_node,
10970 tree_cons (NULL_TREE, V4SF_type_node,
10971 endlink));
10972 tree int_ftype_v8qi
10973 = build_function_type (integer_type_node,
10974 tree_cons (NULL_TREE, V8QI_type_node,
10975 endlink));
bd793c65 10976 tree v4sf_ftype_v4sf_int
21e1b5f1 10977 = build_function_type (V4SF_type_node,
bd793c65
BS
10978 tree_cons (NULL_TREE, V4SF_type_node,
10979 tree_cons (NULL_TREE, integer_type_node,
10980 endlink)));
10981 tree v4sf_ftype_v4sf_v2si
10982 = build_function_type (V4SF_type_node,
10983 tree_cons (NULL_TREE, V4SF_type_node,
10984 tree_cons (NULL_TREE, V2SI_type_node,
10985 endlink)));
10986 tree int_ftype_v4hi_int
10987 = build_function_type (integer_type_node,
10988 tree_cons (NULL_TREE, V4HI_type_node,
10989 tree_cons (NULL_TREE, integer_type_node,
10990 endlink)));
10991 tree v4hi_ftype_v4hi_int_int
332316cd 10992 = build_function_type (V4HI_type_node,
bd793c65
BS
10993 tree_cons (NULL_TREE, V4HI_type_node,
10994 tree_cons (NULL_TREE, integer_type_node,
10995 tree_cons (NULL_TREE,
10996 integer_type_node,
10997 endlink))));
10998 /* Miscellaneous. */
10999 tree v8qi_ftype_v4hi_v4hi
11000 = build_function_type (V8QI_type_node,
11001 tree_cons (NULL_TREE, V4HI_type_node,
11002 tree_cons (NULL_TREE, V4HI_type_node,
11003 endlink)));
11004 tree v4hi_ftype_v2si_v2si
11005 = build_function_type (V4HI_type_node,
11006 tree_cons (NULL_TREE, V2SI_type_node,
11007 tree_cons (NULL_TREE, V2SI_type_node,
11008 endlink)));
11009 tree v4sf_ftype_v4sf_v4sf_int
11010 = build_function_type (V4SF_type_node,
11011 tree_cons (NULL_TREE, V4SF_type_node,
11012 tree_cons (NULL_TREE, V4SF_type_node,
11013 tree_cons (NULL_TREE,
11014 integer_type_node,
11015 endlink))));
11016 tree v4hi_ftype_v8qi_v8qi
11017 = build_function_type (V4HI_type_node,
11018 tree_cons (NULL_TREE, V8QI_type_node,
11019 tree_cons (NULL_TREE, V8QI_type_node,
11020 endlink)));
11021 tree v2si_ftype_v4hi_v4hi
11022 = build_function_type (V2SI_type_node,
11023 tree_cons (NULL_TREE, V4HI_type_node,
11024 tree_cons (NULL_TREE, V4HI_type_node,
11025 endlink)));
11026 tree v4hi_ftype_v4hi_int
11027 = build_function_type (V4HI_type_node,
11028 tree_cons (NULL_TREE, V4HI_type_node,
11029 tree_cons (NULL_TREE, integer_type_node,
11030 endlink)));
bd793c65
BS
11031 tree v4hi_ftype_v4hi_di
11032 = build_function_type (V4HI_type_node,
11033 tree_cons (NULL_TREE, V4HI_type_node,
11034 tree_cons (NULL_TREE,
11035 long_long_integer_type_node,
11036 endlink)));
11037 tree v2si_ftype_v2si_di
11038 = build_function_type (V2SI_type_node,
11039 tree_cons (NULL_TREE, V2SI_type_node,
11040 tree_cons (NULL_TREE,
11041 long_long_integer_type_node,
11042 endlink)));
11043 tree void_ftype_void
11044 = build_function_type (void_type_node, endlink);
bd793c65
BS
11045 tree void_ftype_unsigned
11046 = build_function_type (void_type_node,
11047 tree_cons (NULL_TREE, unsigned_type_node,
11048 endlink));
11049 tree unsigned_ftype_void
11050 = build_function_type (unsigned_type_node, endlink);
11051 tree di_ftype_void
11052 = build_function_type (long_long_unsigned_type_node, endlink);
e37af218
RH
11053 tree v4sf_ftype_void
11054 = build_function_type (V4SF_type_node, endlink);
bd793c65
BS
11055 tree v2si_ftype_v4sf
11056 = build_function_type (V2SI_type_node,
11057 tree_cons (NULL_TREE, V4SF_type_node,
11058 endlink));
11059 /* Loads/stores. */
11060 tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node,
11061 tree_cons (NULL_TREE, V8QI_type_node,
11062 tree_cons (NULL_TREE,
11063 pchar_type_node,
11064 endlink)));
11065 tree void_ftype_v8qi_v8qi_pchar
11066 = build_function_type (void_type_node, maskmovq_args);
11067 tree v4sf_ftype_pfloat
11068 = build_function_type (V4SF_type_node,
11069 tree_cons (NULL_TREE, pfloat_type_node,
11070 endlink));
bd793c65
BS
11071 /* @@@ the type is bogus */
11072 tree v4sf_ftype_v4sf_pv2si
11073 = build_function_type (V4SF_type_node,
11074 tree_cons (NULL_TREE, V4SF_type_node,
11075 tree_cons (NULL_TREE, pv2si_type_node,
11076 endlink)));
1255c85c
BS
11077 tree void_ftype_pv2si_v4sf
11078 = build_function_type (void_type_node,
11079 tree_cons (NULL_TREE, pv2si_type_node,
11080 tree_cons (NULL_TREE, V4SF_type_node,
bd793c65
BS
11081 endlink)));
11082 tree void_ftype_pfloat_v4sf
11083 = build_function_type (void_type_node,
11084 tree_cons (NULL_TREE, pfloat_type_node,
11085 tree_cons (NULL_TREE, V4SF_type_node,
11086 endlink)));
11087 tree void_ftype_pdi_di
11088 = build_function_type (void_type_node,
11089 tree_cons (NULL_TREE, pdi_type_node,
11090 tree_cons (NULL_TREE,
11091 long_long_unsigned_type_node,
11092 endlink)));
11093 /* Normal vector unops. */
11094 tree v4sf_ftype_v4sf
11095 = build_function_type (V4SF_type_node,
11096 tree_cons (NULL_TREE, V4SF_type_node,
11097 endlink));
0f290768 11098
bd793c65
BS
11099 /* Normal vector binops. */
11100 tree v4sf_ftype_v4sf_v4sf
11101 = build_function_type (V4SF_type_node,
11102 tree_cons (NULL_TREE, V4SF_type_node,
11103 tree_cons (NULL_TREE, V4SF_type_node,
11104 endlink)));
11105 tree v8qi_ftype_v8qi_v8qi
11106 = build_function_type (V8QI_type_node,
11107 tree_cons (NULL_TREE, V8QI_type_node,
11108 tree_cons (NULL_TREE, V8QI_type_node,
11109 endlink)));
11110 tree v4hi_ftype_v4hi_v4hi
11111 = build_function_type (V4HI_type_node,
11112 tree_cons (NULL_TREE, V4HI_type_node,
11113 tree_cons (NULL_TREE, V4HI_type_node,
11114 endlink)));
11115 tree v2si_ftype_v2si_v2si
11116 = build_function_type (V2SI_type_node,
11117 tree_cons (NULL_TREE, V2SI_type_node,
11118 tree_cons (NULL_TREE, V2SI_type_node,
11119 endlink)));
bd793c65
BS
11120 tree di_ftype_di_di
11121 = build_function_type (long_long_unsigned_type_node,
11122 tree_cons (NULL_TREE, long_long_unsigned_type_node,
11123 tree_cons (NULL_TREE,
11124 long_long_unsigned_type_node,
11125 endlink)));
11126
47f339cf
BS
11127 tree v2si_ftype_v2sf
11128 = build_function_type (V2SI_type_node,
11129 tree_cons (NULL_TREE, V2SF_type_node,
11130 endlink));
11131 tree v2sf_ftype_v2si
11132 = build_function_type (V2SF_type_node,
11133 tree_cons (NULL_TREE, V2SI_type_node,
11134 endlink));
11135 tree v2si_ftype_v2si
11136 = build_function_type (V2SI_type_node,
11137 tree_cons (NULL_TREE, V2SI_type_node,
11138 endlink));
11139 tree v2sf_ftype_v2sf
11140 = build_function_type (V2SF_type_node,
11141 tree_cons (NULL_TREE, V2SF_type_node,
11142 endlink));
11143 tree v2sf_ftype_v2sf_v2sf
11144 = build_function_type (V2SF_type_node,
11145 tree_cons (NULL_TREE, V2SF_type_node,
11146 tree_cons (NULL_TREE,
11147 V2SF_type_node,
11148 endlink)));
11149 tree v2si_ftype_v2sf_v2sf
11150 = build_function_type (V2SI_type_node,
11151 tree_cons (NULL_TREE, V2SF_type_node,
11152 tree_cons (NULL_TREE,
11153 V2SF_type_node,
11154 endlink)));
11155
bd793c65
BS
11156 /* Add all builtins that are more or less simple operations on two
11157 operands. */
11158 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
11159 {
11160 /* Use one of the operands; the target can have a different mode for
11161 mask-generating compares. */
11162 enum machine_mode mode;
11163 tree type;
11164
11165 if (d->name == 0)
11166 continue;
11167 mode = insn_data[d->icode].operand[1].mode;
11168
bd793c65
BS
11169 switch (mode)
11170 {
11171 case V4SFmode:
11172 type = v4sf_ftype_v4sf_v4sf;
11173 break;
11174 case V8QImode:
11175 type = v8qi_ftype_v8qi_v8qi;
11176 break;
11177 case V4HImode:
11178 type = v4hi_ftype_v4hi_v4hi;
11179 break;
11180 case V2SImode:
11181 type = v2si_ftype_v2si_v2si;
11182 break;
bd793c65
BS
11183 case DImode:
11184 type = di_ftype_di_di;
11185 break;
11186
11187 default:
11188 abort ();
11189 }
0f290768 11190
bd793c65
BS
11191 /* Override for comparisons. */
11192 if (d->icode == CODE_FOR_maskcmpv4sf3
11193 || d->icode == CODE_FOR_maskncmpv4sf3
11194 || d->icode == CODE_FOR_vmmaskcmpv4sf3
11195 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
11196 type = v4si_ftype_v4sf_v4sf;
11197
eeb06b1b 11198 def_builtin (d->mask, d->name, type, d->code);
bd793c65
BS
11199 }
11200
11201 /* Add the remaining MMX insns with somewhat more complicated types. */
eeb06b1b
BS
11202 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
11203 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
11204 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
11205 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
11206 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
11207 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
11208 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
11209
11210 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
11211 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
11212 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
11213
11214 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
11215 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
11216
11217 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
11218 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
0f290768 11219
bd793c65
BS
11220 /* comi/ucomi insns. */
11221 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
eeb06b1b 11222 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
bd793c65 11223
1255c85c
BS
11224 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
11225 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
11226 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
bd793c65 11227
eeb06b1b
BS
11228 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
11229 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
11230 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
11231 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
11232 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
11233 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
bd793c65 11234
e37af218
RH
11235 def_builtin (MASK_SSE, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS);
11236 def_builtin (MASK_SSE, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS);
11237 def_builtin (MASK_SSE, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS);
11238 def_builtin (MASK_SSE, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS);
11239
47f339cf
BS
11240 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
11241 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
bd793c65 11242
47f339cf 11243 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
bd793c65 11244
eeb06b1b
BS
11245 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
11246 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
11247 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
11248 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
11249 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
11250 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
bd793c65 11251
eeb06b1b
BS
11252 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
11253 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
1255c85c
BS
11254 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
11255 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
bd793c65 11256
eeb06b1b 11257 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
47f339cf 11258 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
eeb06b1b 11259 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
47f339cf 11260 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
bd793c65 11261
47f339cf 11262 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
bd793c65 11263
47f339cf 11264 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
bd793c65 11265
eeb06b1b
BS
11266 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
11267 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
11268 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
11269 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
11270 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
11271 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
bd793c65 11272
eeb06b1b 11273 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
bd793c65 11274
47f339cf
BS
11275 /* Original 3DNow! */
11276 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
11277 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
11278 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
11279 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
11280 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
11281 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
11282 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
11283 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
11284 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
11285 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
11286 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
11287 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
11288 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
11289 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
11290 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
11291 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
11292 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
11293 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
11294 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
11295 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
47f339cf
BS
11296
11297 /* 3DNow! extension as used in the Athlon CPU. */
11298 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
11299 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
11300 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
11301 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
11302 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
11303 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
11304
e37af218 11305 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
bd793c65
BS
11306}
11307
11308/* Errors in the source file can cause expand_expr to return const0_rtx
11309 where we expect a vector. To avoid crashing, use one of the vector
11310 clear instructions. */
11311static rtx
11312safe_vector_operand (x, mode)
11313 rtx x;
11314 enum machine_mode mode;
11315{
11316 if (x != const0_rtx)
11317 return x;
11318 x = gen_reg_rtx (mode);
11319
47f339cf 11320 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
bd793c65
BS
11321 emit_insn (gen_mmx_clrdi (mode == DImode ? x
11322 : gen_rtx_SUBREG (DImode, x, 0)));
11323 else
e37af218
RH
11324 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
11325 : gen_rtx_SUBREG (V4SFmode, x, 0)));
bd793c65
BS
11326 return x;
11327}
11328
11329/* Subroutine of ix86_expand_builtin to take care of binop insns. */
11330
11331static rtx
11332ix86_expand_binop_builtin (icode, arglist, target)
11333 enum insn_code icode;
11334 tree arglist;
11335 rtx target;
11336{
11337 rtx pat;
11338 tree arg0 = TREE_VALUE (arglist);
11339 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11340 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11341 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11342 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11343 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11344 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
11345
11346 if (VECTOR_MODE_P (mode0))
11347 op0 = safe_vector_operand (op0, mode0);
11348 if (VECTOR_MODE_P (mode1))
11349 op1 = safe_vector_operand (op1, mode1);
11350
11351 if (! target
11352 || GET_MODE (target) != tmode
11353 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11354 target = gen_reg_rtx (tmode);
11355
11356 /* In case the insn wants input operands in modes different from
11357 the result, abort. */
11358 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
11359 abort ();
11360
11361 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11362 op0 = copy_to_mode_reg (mode0, op0);
11363 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11364 op1 = copy_to_mode_reg (mode1, op1);
11365
59bef189
RH
11366 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
11367 yet one of the two must not be a memory. This is normally enforced
11368 by expanders, but we didn't bother to create one here. */
11369 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
11370 op0 = copy_to_mode_reg (mode0, op0);
11371
bd793c65
BS
11372 pat = GEN_FCN (icode) (target, op0, op1);
11373 if (! pat)
11374 return 0;
11375 emit_insn (pat);
11376 return target;
11377}
11378
e37af218
RH
11379/* In type_for_mode we restrict the ability to create TImode types
11380 to hosts with 64-bit H_W_I. So we've defined the SSE logicals
11381 to have a V4SFmode signature. Convert them in-place to TImode. */
11382
11383static rtx
11384ix86_expand_timode_binop_builtin (icode, arglist, target)
11385 enum insn_code icode;
11386 tree arglist;
11387 rtx target;
11388{
11389 rtx pat;
11390 tree arg0 = TREE_VALUE (arglist);
11391 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11392 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11393 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11394
11395 op0 = gen_lowpart (TImode, op0);
11396 op1 = gen_lowpart (TImode, op1);
11397 target = gen_reg_rtx (TImode);
11398
11399 if (! (*insn_data[icode].operand[1].predicate) (op0, TImode))
11400 op0 = copy_to_mode_reg (TImode, op0);
11401 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
11402 op1 = copy_to_mode_reg (TImode, op1);
11403
59bef189
RH
11404 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
11405 yet one of the two must not be a memory. This is normally enforced
11406 by expanders, but we didn't bother to create one here. */
11407 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
11408 op0 = copy_to_mode_reg (TImode, op0);
11409
e37af218
RH
11410 pat = GEN_FCN (icode) (target, op0, op1);
11411 if (! pat)
11412 return 0;
11413 emit_insn (pat);
11414
11415 return gen_lowpart (V4SFmode, target);
11416}
11417
bd793c65
BS
11418/* Subroutine of ix86_expand_builtin to take care of stores. */
11419
11420static rtx
e37af218 11421ix86_expand_store_builtin (icode, arglist)
bd793c65
BS
11422 enum insn_code icode;
11423 tree arglist;
bd793c65
BS
11424{
11425 rtx pat;
11426 tree arg0 = TREE_VALUE (arglist);
11427 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11428 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11429 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11430 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
11431 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
11432
11433 if (VECTOR_MODE_P (mode1))
11434 op1 = safe_vector_operand (op1, mode1);
11435
11436 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
59bef189
RH
11437
11438 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11439 op1 = copy_to_mode_reg (mode1, op1);
11440
bd793c65
BS
11441 pat = GEN_FCN (icode) (op0, op1);
11442 if (pat)
11443 emit_insn (pat);
11444 return 0;
11445}
11446
11447/* Subroutine of ix86_expand_builtin to take care of unop insns. */
11448
11449static rtx
11450ix86_expand_unop_builtin (icode, arglist, target, do_load)
11451 enum insn_code icode;
11452 tree arglist;
11453 rtx target;
11454 int do_load;
11455{
11456 rtx pat;
11457 tree arg0 = TREE_VALUE (arglist);
11458 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11459 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11460 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11461
11462 if (! target
11463 || GET_MODE (target) != tmode
11464 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11465 target = gen_reg_rtx (tmode);
11466 if (do_load)
11467 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11468 else
11469 {
11470 if (VECTOR_MODE_P (mode0))
11471 op0 = safe_vector_operand (op0, mode0);
11472
11473 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11474 op0 = copy_to_mode_reg (mode0, op0);
11475 }
11476
11477 pat = GEN_FCN (icode) (target, op0);
11478 if (! pat)
11479 return 0;
11480 emit_insn (pat);
11481 return target;
11482}
11483
11484/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
11485 sqrtss, rsqrtss, rcpss. */
11486
11487static rtx
11488ix86_expand_unop1_builtin (icode, arglist, target)
11489 enum insn_code icode;
11490 tree arglist;
11491 rtx target;
11492{
11493 rtx pat;
11494 tree arg0 = TREE_VALUE (arglist);
59bef189 11495 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
bd793c65
BS
11496 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11497 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11498
11499 if (! target
11500 || GET_MODE (target) != tmode
11501 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11502 target = gen_reg_rtx (tmode);
11503
11504 if (VECTOR_MODE_P (mode0))
11505 op0 = safe_vector_operand (op0, mode0);
11506
11507 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11508 op0 = copy_to_mode_reg (mode0, op0);
59bef189
RH
11509
11510 op1 = op0;
11511 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
11512 op1 = copy_to_mode_reg (mode0, op1);
11513
11514 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
11515 if (! pat)
11516 return 0;
11517 emit_insn (pat);
11518 return target;
11519}
11520
11521/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
11522
11523static rtx
11524ix86_expand_sse_compare (d, arglist, target)
8b60264b 11525 const struct builtin_description *d;
bd793c65
BS
11526 tree arglist;
11527 rtx target;
11528{
11529 rtx pat;
11530 tree arg0 = TREE_VALUE (arglist);
11531 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11532 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11533 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11534 rtx op2;
11535 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
11536 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
11537 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
11538 enum rtx_code comparison = d->comparison;
11539
11540 if (VECTOR_MODE_P (mode0))
11541 op0 = safe_vector_operand (op0, mode0);
11542 if (VECTOR_MODE_P (mode1))
11543 op1 = safe_vector_operand (op1, mode1);
11544
11545 /* Swap operands if we have a comparison that isn't available in
11546 hardware. */
11547 if (d->flag)
11548 {
21e1b5f1
BS
11549 rtx tmp = gen_reg_rtx (mode1);
11550 emit_move_insn (tmp, op1);
bd793c65 11551 op1 = op0;
21e1b5f1 11552 op0 = tmp;
bd793c65 11553 }
21e1b5f1
BS
11554
11555 if (! target
11556 || GET_MODE (target) != tmode
11557 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
bd793c65
BS
11558 target = gen_reg_rtx (tmode);
11559
11560 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
11561 op0 = copy_to_mode_reg (mode0, op0);
11562 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
11563 op1 = copy_to_mode_reg (mode1, op1);
11564
11565 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
11566 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
11567 if (! pat)
11568 return 0;
11569 emit_insn (pat);
11570 return target;
11571}
11572
11573/* Subroutine of ix86_expand_builtin to take care of comi insns. */
11574
11575static rtx
11576ix86_expand_sse_comi (d, arglist, target)
8b60264b 11577 const struct builtin_description *d;
bd793c65
BS
11578 tree arglist;
11579 rtx target;
11580{
11581 rtx pat;
11582 tree arg0 = TREE_VALUE (arglist);
11583 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11584 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11585 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11586 rtx op2;
11587 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
11588 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
11589 enum rtx_code comparison = d->comparison;
11590
11591 if (VECTOR_MODE_P (mode0))
11592 op0 = safe_vector_operand (op0, mode0);
11593 if (VECTOR_MODE_P (mode1))
11594 op1 = safe_vector_operand (op1, mode1);
11595
11596 /* Swap operands if we have a comparison that isn't available in
11597 hardware. */
11598 if (d->flag)
11599 {
11600 rtx tmp = op1;
11601 op1 = op0;
11602 op0 = tmp;
bd793c65
BS
11603 }
11604
11605 target = gen_reg_rtx (SImode);
11606 emit_move_insn (target, const0_rtx);
11607 target = gen_rtx_SUBREG (QImode, target, 0);
11608
11609 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
11610 op0 = copy_to_mode_reg (mode0, op0);
11611 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
11612 op1 = copy_to_mode_reg (mode1, op1);
11613
11614 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
11615 pat = GEN_FCN (d->icode) (op0, op1, op2);
11616 if (! pat)
11617 return 0;
11618 emit_insn (pat);
29628f27
BS
11619 emit_insn (gen_rtx_SET (VOIDmode,
11620 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
11621 gen_rtx_fmt_ee (comparison, QImode,
11622 gen_rtx_REG (CCmode, FLAGS_REG),
11623 const0_rtx)));
bd793c65 11624
6f1a6c5b 11625 return SUBREG_REG (target);
bd793c65
BS
11626}
11627
11628/* Expand an expression EXP that calls a built-in function,
11629 with result going to TARGET if that's convenient
11630 (and in mode MODE if that's convenient).
11631 SUBTARGET may be used as the target for computing one of EXP's operands.
11632 IGNORE is nonzero if the value is to be ignored. */
11633
11634rtx
11635ix86_expand_builtin (exp, target, subtarget, mode, ignore)
11636 tree exp;
11637 rtx target;
11638 rtx subtarget ATTRIBUTE_UNUSED;
11639 enum machine_mode mode ATTRIBUTE_UNUSED;
11640 int ignore ATTRIBUTE_UNUSED;
11641{
8b60264b 11642 const struct builtin_description *d;
77ebd435 11643 size_t i;
bd793c65
BS
11644 enum insn_code icode;
11645 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
11646 tree arglist = TREE_OPERAND (exp, 1);
e37af218 11647 tree arg0, arg1, arg2;
bd793c65
BS
11648 rtx op0, op1, op2, pat;
11649 enum machine_mode tmode, mode0, mode1, mode2;
8752c357 11650 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
bd793c65
BS
11651
11652 switch (fcode)
11653 {
11654 case IX86_BUILTIN_EMMS:
11655 emit_insn (gen_emms ());
11656 return 0;
11657
11658 case IX86_BUILTIN_SFENCE:
11659 emit_insn (gen_sfence ());
11660 return 0;
11661
bd793c65
BS
11662 case IX86_BUILTIN_PEXTRW:
11663 icode = CODE_FOR_mmx_pextrw;
11664 arg0 = TREE_VALUE (arglist);
11665 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11666 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11667 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11668 tmode = insn_data[icode].operand[0].mode;
11669 mode0 = insn_data[icode].operand[1].mode;
11670 mode1 = insn_data[icode].operand[2].mode;
11671
11672 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11673 op0 = copy_to_mode_reg (mode0, op0);
11674 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11675 {
11676 /* @@@ better error message */
11677 error ("selector must be an immediate");
6f1a6c5b 11678 return gen_reg_rtx (tmode);
bd793c65
BS
11679 }
11680 if (target == 0
11681 || GET_MODE (target) != tmode
11682 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11683 target = gen_reg_rtx (tmode);
11684 pat = GEN_FCN (icode) (target, op0, op1);
11685 if (! pat)
11686 return 0;
11687 emit_insn (pat);
11688 return target;
11689
11690 case IX86_BUILTIN_PINSRW:
11691 icode = CODE_FOR_mmx_pinsrw;
11692 arg0 = TREE_VALUE (arglist);
11693 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11694 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11695 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11696 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11697 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11698 tmode = insn_data[icode].operand[0].mode;
11699 mode0 = insn_data[icode].operand[1].mode;
11700 mode1 = insn_data[icode].operand[2].mode;
11701 mode2 = insn_data[icode].operand[3].mode;
11702
11703 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11704 op0 = copy_to_mode_reg (mode0, op0);
11705 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11706 op1 = copy_to_mode_reg (mode1, op1);
11707 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
11708 {
11709 /* @@@ better error message */
11710 error ("selector must be an immediate");
11711 return const0_rtx;
11712 }
11713 if (target == 0
11714 || GET_MODE (target) != tmode
11715 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11716 target = gen_reg_rtx (tmode);
11717 pat = GEN_FCN (icode) (target, op0, op1, op2);
11718 if (! pat)
11719 return 0;
11720 emit_insn (pat);
11721 return target;
11722
11723 case IX86_BUILTIN_MASKMOVQ:
e95d6b23 11724 icode = TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq;
bd793c65
BS
11725 /* Note the arg order is different from the operand order. */
11726 arg1 = TREE_VALUE (arglist);
11727 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
11728 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11729 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11730 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11731 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11732 mode0 = insn_data[icode].operand[0].mode;
11733 mode1 = insn_data[icode].operand[1].mode;
11734 mode2 = insn_data[icode].operand[2].mode;
11735
5c464583 11736 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
bd793c65
BS
11737 op0 = copy_to_mode_reg (mode0, op0);
11738 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11739 op1 = copy_to_mode_reg (mode1, op1);
11740 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
11741 op2 = copy_to_mode_reg (mode2, op2);
11742 pat = GEN_FCN (icode) (op0, op1, op2);
11743 if (! pat)
11744 return 0;
11745 emit_insn (pat);
11746 return 0;
11747
11748 case IX86_BUILTIN_SQRTSS:
11749 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
11750 case IX86_BUILTIN_RSQRTSS:
11751 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
11752 case IX86_BUILTIN_RCPSS:
11753 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
11754
e37af218
RH
11755 case IX86_BUILTIN_ANDPS:
11756 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3,
11757 arglist, target);
11758 case IX86_BUILTIN_ANDNPS:
11759 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3,
11760 arglist, target);
11761 case IX86_BUILTIN_ORPS:
11762 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3,
11763 arglist, target);
11764 case IX86_BUILTIN_XORPS:
11765 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3,
11766 arglist, target);
11767
bd793c65
BS
11768 case IX86_BUILTIN_LOADAPS:
11769 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
11770
11771 case IX86_BUILTIN_LOADUPS:
11772 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
11773
11774 case IX86_BUILTIN_STOREAPS:
e37af218 11775 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
bd793c65 11776 case IX86_BUILTIN_STOREUPS:
e37af218 11777 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
bd793c65
BS
11778
11779 case IX86_BUILTIN_LOADSS:
11780 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
11781
11782 case IX86_BUILTIN_STORESS:
e37af218 11783 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
bd793c65 11784
0f290768 11785 case IX86_BUILTIN_LOADHPS:
bd793c65
BS
11786 case IX86_BUILTIN_LOADLPS:
11787 icode = (fcode == IX86_BUILTIN_LOADHPS
11788 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
11789 arg0 = TREE_VALUE (arglist);
11790 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11791 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11792 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11793 tmode = insn_data[icode].operand[0].mode;
11794 mode0 = insn_data[icode].operand[1].mode;
11795 mode1 = insn_data[icode].operand[2].mode;
11796
11797 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11798 op0 = copy_to_mode_reg (mode0, op0);
11799 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
11800 if (target == 0
11801 || GET_MODE (target) != tmode
11802 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11803 target = gen_reg_rtx (tmode);
11804 pat = GEN_FCN (icode) (target, op0, op1);
11805 if (! pat)
11806 return 0;
11807 emit_insn (pat);
11808 return target;
0f290768 11809
bd793c65
BS
11810 case IX86_BUILTIN_STOREHPS:
11811 case IX86_BUILTIN_STORELPS:
11812 icode = (fcode == IX86_BUILTIN_STOREHPS
11813 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
11814 arg0 = TREE_VALUE (arglist);
11815 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11816 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11817 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11818 mode0 = insn_data[icode].operand[1].mode;
11819 mode1 = insn_data[icode].operand[2].mode;
11820
11821 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11822 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11823 op1 = copy_to_mode_reg (mode1, op1);
11824
11825 pat = GEN_FCN (icode) (op0, op0, op1);
11826 if (! pat)
11827 return 0;
11828 emit_insn (pat);
11829 return 0;
11830
11831 case IX86_BUILTIN_MOVNTPS:
e37af218 11832 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
bd793c65 11833 case IX86_BUILTIN_MOVNTQ:
e37af218 11834 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
bd793c65
BS
11835
11836 case IX86_BUILTIN_LDMXCSR:
11837 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
11838 target = assign_386_stack_local (SImode, 0);
11839 emit_move_insn (target, op0);
11840 emit_insn (gen_ldmxcsr (target));
11841 return 0;
11842
11843 case IX86_BUILTIN_STMXCSR:
11844 target = assign_386_stack_local (SImode, 0);
11845 emit_insn (gen_stmxcsr (target));
11846 return copy_to_mode_reg (SImode, target);
11847
bd793c65
BS
11848 case IX86_BUILTIN_SHUFPS:
11849 icode = CODE_FOR_sse_shufps;
11850 arg0 = TREE_VALUE (arglist);
11851 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11852 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11853 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11854 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11855 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11856 tmode = insn_data[icode].operand[0].mode;
11857 mode0 = insn_data[icode].operand[1].mode;
11858 mode1 = insn_data[icode].operand[2].mode;
11859 mode2 = insn_data[icode].operand[3].mode;
11860
11861 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11862 op0 = copy_to_mode_reg (mode0, op0);
11863 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11864 op1 = copy_to_mode_reg (mode1, op1);
11865 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
11866 {
11867 /* @@@ better error message */
11868 error ("mask must be an immediate");
6f1a6c5b 11869 return gen_reg_rtx (tmode);
bd793c65
BS
11870 }
11871 if (target == 0
11872 || GET_MODE (target) != tmode
11873 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11874 target = gen_reg_rtx (tmode);
11875 pat = GEN_FCN (icode) (target, op0, op1, op2);
11876 if (! pat)
11877 return 0;
11878 emit_insn (pat);
11879 return target;
11880
11881 case IX86_BUILTIN_PSHUFW:
11882 icode = CODE_FOR_mmx_pshufw;
11883 arg0 = TREE_VALUE (arglist);
11884 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11885 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11886 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11887 tmode = insn_data[icode].operand[0].mode;
29628f27
BS
11888 mode1 = insn_data[icode].operand[1].mode;
11889 mode2 = insn_data[icode].operand[2].mode;
bd793c65 11890
29628f27
BS
11891 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
11892 op0 = copy_to_mode_reg (mode1, op0);
11893 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
bd793c65
BS
11894 {
11895 /* @@@ better error message */
11896 error ("mask must be an immediate");
11897 return const0_rtx;
11898 }
11899 if (target == 0
11900 || GET_MODE (target) != tmode
11901 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11902 target = gen_reg_rtx (tmode);
29628f27 11903 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
11904 if (! pat)
11905 return 0;
11906 emit_insn (pat);
11907 return target;
11908
47f339cf
BS
11909 case IX86_BUILTIN_FEMMS:
11910 emit_insn (gen_femms ());
11911 return NULL_RTX;
11912
11913 case IX86_BUILTIN_PAVGUSB:
11914 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
11915
11916 case IX86_BUILTIN_PF2ID:
11917 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
11918
11919 case IX86_BUILTIN_PFACC:
11920 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
11921
11922 case IX86_BUILTIN_PFADD:
11923 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
11924
11925 case IX86_BUILTIN_PFCMPEQ:
11926 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
11927
11928 case IX86_BUILTIN_PFCMPGE:
11929 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
11930
11931 case IX86_BUILTIN_PFCMPGT:
11932 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
11933
11934 case IX86_BUILTIN_PFMAX:
11935 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
11936
11937 case IX86_BUILTIN_PFMIN:
11938 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
11939
11940 case IX86_BUILTIN_PFMUL:
11941 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
11942
11943 case IX86_BUILTIN_PFRCP:
11944 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
11945
11946 case IX86_BUILTIN_PFRCPIT1:
11947 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
11948
11949 case IX86_BUILTIN_PFRCPIT2:
11950 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
11951
11952 case IX86_BUILTIN_PFRSQIT1:
11953 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
11954
11955 case IX86_BUILTIN_PFRSQRT:
11956 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
11957
11958 case IX86_BUILTIN_PFSUB:
11959 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
11960
11961 case IX86_BUILTIN_PFSUBR:
11962 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
11963
11964 case IX86_BUILTIN_PI2FD:
11965 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
11966
11967 case IX86_BUILTIN_PMULHRW:
11968 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
11969
47f339cf
BS
11970 case IX86_BUILTIN_PF2IW:
11971 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
11972
11973 case IX86_BUILTIN_PFNACC:
11974 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
11975
11976 case IX86_BUILTIN_PFPNACC:
11977 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
11978
11979 case IX86_BUILTIN_PI2FW:
11980 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
11981
11982 case IX86_BUILTIN_PSWAPDSI:
11983 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
11984
11985 case IX86_BUILTIN_PSWAPDSF:
11986 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
11987
e37af218
RH
11988 case IX86_BUILTIN_SSE_ZERO:
11989 target = gen_reg_rtx (V4SFmode);
11990 emit_insn (gen_sse_clrv4sf (target));
bd793c65
BS
11991 return target;
11992
bd793c65
BS
11993 case IX86_BUILTIN_MMX_ZERO:
11994 target = gen_reg_rtx (DImode);
11995 emit_insn (gen_mmx_clrdi (target));
11996 return target;
11997
11998 default:
11999 break;
12000 }
12001
12002 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
12003 if (d->code == fcode)
12004 {
12005 /* Compares are treated specially. */
12006 if (d->icode == CODE_FOR_maskcmpv4sf3
12007 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12008 || d->icode == CODE_FOR_maskncmpv4sf3
12009 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12010 return ix86_expand_sse_compare (d, arglist, target);
12011
12012 return ix86_expand_binop_builtin (d->icode, arglist, target);
12013 }
12014
12015 for (i = 0, d = bdesc_1arg; i < sizeof (bdesc_1arg) / sizeof *d; i++, d++)
12016 if (d->code == fcode)
12017 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
0f290768 12018
bd793c65
BS
12019 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
12020 if (d->code == fcode)
12021 return ix86_expand_sse_comi (d, arglist, target);
0f290768 12022
bd793c65
BS
12023 /* @@@ Should really do something sensible here. */
12024 return 0;
bd793c65 12025}
4211a8fb
JH
12026
12027/* Store OPERAND to the memory after reload is completed. This means
f710504c 12028 that we can't easily use assign_stack_local. */
4211a8fb
JH
12029rtx
12030ix86_force_to_memory (mode, operand)
12031 enum machine_mode mode;
12032 rtx operand;
12033{
898d374d 12034 rtx result;
4211a8fb
JH
12035 if (!reload_completed)
12036 abort ();
898d374d
JH
12037 if (TARGET_64BIT && TARGET_RED_ZONE)
12038 {
12039 result = gen_rtx_MEM (mode,
12040 gen_rtx_PLUS (Pmode,
12041 stack_pointer_rtx,
12042 GEN_INT (-RED_ZONE_SIZE)));
12043 emit_move_insn (result, operand);
12044 }
12045 else if (TARGET_64BIT && !TARGET_RED_ZONE)
4211a8fb 12046 {
898d374d 12047 switch (mode)
4211a8fb 12048 {
898d374d
JH
12049 case HImode:
12050 case SImode:
12051 operand = gen_lowpart (DImode, operand);
12052 /* FALLTHRU */
12053 case DImode:
4211a8fb 12054 emit_insn (
898d374d
JH
12055 gen_rtx_SET (VOIDmode,
12056 gen_rtx_MEM (DImode,
12057 gen_rtx_PRE_DEC (DImode,
12058 stack_pointer_rtx)),
12059 operand));
12060 break;
12061 default:
12062 abort ();
12063 }
12064 result = gen_rtx_MEM (mode, stack_pointer_rtx);
12065 }
12066 else
12067 {
12068 switch (mode)
12069 {
12070 case DImode:
12071 {
12072 rtx operands[2];
12073 split_di (&operand, 1, operands, operands + 1);
12074 emit_insn (
12075 gen_rtx_SET (VOIDmode,
12076 gen_rtx_MEM (SImode,
12077 gen_rtx_PRE_DEC (Pmode,
12078 stack_pointer_rtx)),
12079 operands[1]));
12080 emit_insn (
12081 gen_rtx_SET (VOIDmode,
12082 gen_rtx_MEM (SImode,
12083 gen_rtx_PRE_DEC (Pmode,
12084 stack_pointer_rtx)),
12085 operands[0]));
12086 }
12087 break;
12088 case HImode:
12089 /* It is better to store HImodes as SImodes. */
12090 if (!TARGET_PARTIAL_REG_STALL)
12091 operand = gen_lowpart (SImode, operand);
12092 /* FALLTHRU */
12093 case SImode:
4211a8fb 12094 emit_insn (
898d374d
JH
12095 gen_rtx_SET (VOIDmode,
12096 gen_rtx_MEM (GET_MODE (operand),
12097 gen_rtx_PRE_DEC (SImode,
12098 stack_pointer_rtx)),
12099 operand));
12100 break;
12101 default:
12102 abort ();
4211a8fb 12103 }
898d374d 12104 result = gen_rtx_MEM (mode, stack_pointer_rtx);
4211a8fb 12105 }
898d374d 12106 return result;
4211a8fb
JH
12107}
12108
12109/* Free operand from the memory. */
12110void
12111ix86_free_from_memory (mode)
12112 enum machine_mode mode;
12113{
898d374d
JH
12114 if (!TARGET_64BIT || !TARGET_RED_ZONE)
12115 {
12116 int size;
12117
12118 if (mode == DImode || TARGET_64BIT)
12119 size = 8;
12120 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
12121 size = 2;
12122 else
12123 size = 4;
12124 /* Use LEA to deallocate stack space. In peephole2 it will be converted
12125 to pop or add instruction if registers are available. */
12126 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12127 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12128 GEN_INT (size))));
12129 }
4211a8fb 12130}
a946dd00 12131
f84aa48a
JH
12132/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
12133 QImode must go into class Q_REGS.
12134 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
892a2d68 12135 movdf to do mem-to-mem moves through integer regs. */
f84aa48a
JH
12136enum reg_class
12137ix86_preferred_reload_class (x, class)
12138 rtx x;
12139 enum reg_class class;
12140{
12141 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
12142 {
12143 /* SSE can't load any constant directly yet. */
12144 if (SSE_CLASS_P (class))
12145 return NO_REGS;
12146 /* Floats can load 0 and 1. */
12147 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
12148 {
12149 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
12150 if (MAYBE_SSE_CLASS_P (class))
12151 return (reg_class_subset_p (class, GENERAL_REGS)
12152 ? GENERAL_REGS : FLOAT_REGS);
12153 else
12154 return class;
12155 }
12156 /* General regs can load everything. */
12157 if (reg_class_subset_p (class, GENERAL_REGS))
12158 return GENERAL_REGS;
12159 /* In case we haven't resolved FLOAT or SSE yet, give up. */
12160 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
12161 return NO_REGS;
12162 }
12163 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
12164 return NO_REGS;
12165 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
12166 return Q_REGS;
12167 return class;
12168}
12169
12170/* If we are copying between general and FP registers, we need a memory
12171 location. The same is true for SSE and MMX registers.
12172
12173 The macro can't work reliably when one of the CLASSES is class containing
12174 registers from multiple units (SSE, MMX, integer). We avoid this by never
12175 combining those units in single alternative in the machine description.
12176 Ensure that this constraint holds to avoid unexpected surprises.
12177
12178 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
12179 enforce these sanity checks. */
12180int
12181ix86_secondary_memory_needed (class1, class2, mode, strict)
12182 enum reg_class class1, class2;
12183 enum machine_mode mode;
12184 int strict;
12185{
12186 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
12187 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
12188 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
12189 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
12190 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
12191 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
12192 {
12193 if (strict)
12194 abort ();
12195 else
12196 return 1;
12197 }
12198 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
12199 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
12200 && (mode) != SImode)
12201 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
12202 && (mode) != SImode));
12203}
12204/* Return the cost of moving data from a register in class CLASS1 to
a4f31c00 12205 one in class CLASS2.
f84aa48a
JH
12206
12207 It is not required that the cost always equal 2 when FROM is the same as TO;
12208 on some machines it is expensive to move between registers if they are not
12209 general registers. */
12210int
12211ix86_register_move_cost (mode, class1, class2)
12212 enum machine_mode mode;
12213 enum reg_class class1, class2;
12214{
12215 /* In case we require secondary memory, compute cost of the store followed
12216 by load. In case of copying from general_purpose_register we may emit
12217 multiple stores followed by single load causing memory size mismatch
12218 stall. Count this as arbitarily high cost of 20. */
12219 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
12220 {
92d0fb09 12221 int add_cost = 0;
62415523 12222 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
92d0fb09 12223 add_cost = 20;
62415523 12224 return (MEMORY_MOVE_COST (mode, class1, 0)
92d0fb09 12225 + MEMORY_MOVE_COST (mode, class2, 1) + add_cost);
f84aa48a 12226 }
92d0fb09 12227 /* Moves between SSE/MMX and integer unit are expensive. */
62415523
JH
12228 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
12229 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
fa79946e
JH
12230 return ix86_cost->mmxsse_to_integer;
12231 if (MAYBE_FLOAT_CLASS_P (class1))
12232 return ix86_cost->fp_move;
12233 if (MAYBE_SSE_CLASS_P (class1))
12234 return ix86_cost->sse_move;
12235 if (MAYBE_MMX_CLASS_P (class1))
12236 return ix86_cost->mmx_move;
f84aa48a
JH
12237 return 2;
12238}
12239
a946dd00
JH
12240/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
12241int
12242ix86_hard_regno_mode_ok (regno, mode)
12243 int regno;
12244 enum machine_mode mode;
12245{
12246 /* Flags and only flags can only hold CCmode values. */
12247 if (CC_REGNO_P (regno))
12248 return GET_MODE_CLASS (mode) == MODE_CC;
12249 if (GET_MODE_CLASS (mode) == MODE_CC
12250 || GET_MODE_CLASS (mode) == MODE_RANDOM
12251 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
12252 return 0;
12253 if (FP_REGNO_P (regno))
12254 return VALID_FP_MODE_P (mode);
12255 if (SSE_REGNO_P (regno))
12256 return VALID_SSE_REG_MODE (mode);
12257 if (MMX_REGNO_P (regno))
47f339cf 12258 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
a946dd00
JH
12259 /* We handle both integer and floats in the general purpose registers.
12260 In future we should be able to handle vector modes as well. */
12261 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
12262 return 0;
12263 /* Take care for QImode values - they can be in non-QI regs, but then
12264 they do cause partial register stalls. */
d2836273 12265 if (regno < 4 || mode != QImode || TARGET_64BIT)
a946dd00
JH
12266 return 1;
12267 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
12268}
fa79946e
JH
12269
12270/* Return the cost of moving data of mode M between a
12271 register and memory. A value of 2 is the default; this cost is
12272 relative to those in `REGISTER_MOVE_COST'.
12273
12274 If moving between registers and memory is more expensive than
12275 between two registers, you should define this macro to express the
a4f31c00
AJ
12276 relative cost.
12277
fa79946e
JH
12278 Model also increased moving costs of QImode registers in non
12279 Q_REGS classes.
12280 */
12281int
12282ix86_memory_move_cost (mode, class, in)
12283 enum machine_mode mode;
12284 enum reg_class class;
12285 int in;
12286{
12287 if (FLOAT_CLASS_P (class))
12288 {
12289 int index;
12290 switch (mode)
12291 {
12292 case SFmode:
12293 index = 0;
12294 break;
12295 case DFmode:
12296 index = 1;
12297 break;
12298 case XFmode:
12299 case TFmode:
12300 index = 2;
12301 break;
12302 default:
12303 return 100;
12304 }
12305 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
12306 }
12307 if (SSE_CLASS_P (class))
12308 {
12309 int index;
12310 switch (GET_MODE_SIZE (mode))
12311 {
12312 case 4:
12313 index = 0;
12314 break;
12315 case 8:
12316 index = 1;
12317 break;
12318 case 16:
12319 index = 2;
12320 break;
12321 default:
12322 return 100;
12323 }
12324 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
12325 }
12326 if (MMX_CLASS_P (class))
12327 {
12328 int index;
12329 switch (GET_MODE_SIZE (mode))
12330 {
12331 case 4:
12332 index = 0;
12333 break;
12334 case 8:
12335 index = 1;
12336 break;
12337 default:
12338 return 100;
12339 }
12340 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
12341 }
12342 switch (GET_MODE_SIZE (mode))
12343 {
12344 case 1:
12345 if (in)
12346 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
12347 : ix86_cost->movzbl_load);
12348 else
12349 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
12350 : ix86_cost->int_store[0] + 4);
12351 break;
12352 case 2:
12353 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
12354 default:
12355 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
12356 if (mode == TFmode)
12357 mode = XFmode;
3bb7e126 12358 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
fa79946e
JH
12359 * (int) GET_MODE_SIZE (mode) / 4);
12360 }
12361}
0ecf09f9 12362
2cc07db4
RH
12363#ifdef DO_GLOBAL_CTORS_BODY
12364static void
12365ix86_svr3_asm_out_constructor (symbol, priority)
12366 rtx symbol;
12367 int priority ATTRIBUTE_UNUSED;
12368{
12369 init_section ();
12370 fputs ("\tpushl $", asm_out_file);
12371 assemble_name (asm_out_file, XSTR (symbol, 0));
12372 fputc ('\n', asm_out_file);
12373}
12374#endif
162f023b
JH
12375
12376/* Order the registers for register allocator. */
12377
12378void
12379x86_order_regs_for_local_alloc ()
12380{
12381 int pos = 0;
12382 int i;
12383
12384 /* First allocate the local general purpose registers. */
12385 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
12386 if (GENERAL_REGNO_P (i) && call_used_regs[i])
12387 reg_alloc_order [pos++] = i;
12388
12389 /* Global general purpose registers. */
12390 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
12391 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
12392 reg_alloc_order [pos++] = i;
12393
12394 /* x87 registers come first in case we are doing FP math
12395 using them. */
12396 if (!TARGET_SSE_MATH)
12397 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
12398 reg_alloc_order [pos++] = i;
12399
12400 /* SSE registers. */
12401 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
12402 reg_alloc_order [pos++] = i;
12403 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
12404 reg_alloc_order [pos++] = i;
12405
12406 /* x87 registerts. */
12407 if (TARGET_SSE_MATH)
12408 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
12409 reg_alloc_order [pos++] = i;
12410
12411 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
12412 reg_alloc_order [pos++] = i;
12413
12414 /* Initialize the rest of array as we do not allocate some registers
12415 at all. */
12416 while (pos < FIRST_PSEUDO_REGISTER)
12417 reg_alloc_order [pos++] = 0;
12418}
This page took 2.90071 seconds and 5 git commands to generate.