]> gcc.gnu.org Git - gcc.git/blame - gcc/config/i386/i386.c
c-typeck.c (process_init_element): Don't save_expr COMPOUND_LITERAL_EXPR if just...
[gcc.git] / gcc / config / i386 / i386.c
CommitLineData
e075ae69 1/* Subroutines used for code generation on IA-32.
07933f72
GS
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
2a2ab3f9
JVA
4
5This file is part of GNU CC.
6
7GNU CC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GNU CC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU CC; see the file COPYING. If not, write to
97aadbb9 19the Free Software Foundation, 59 Temple Place - Suite 330,
0f290768 20Boston, MA 02111-1307, USA. */
2a2ab3f9 21
2a2ab3f9 22#include "config.h"
bb5177ac 23#include "system.h"
2a2ab3f9 24#include "rtl.h"
6baf1cc8
BS
25#include "tree.h"
26#include "tm_p.h"
2a2ab3f9
JVA
27#include "regs.h"
28#include "hard-reg-set.h"
29#include "real.h"
30#include "insn-config.h"
31#include "conditions.h"
2a2ab3f9
JVA
32#include "output.h"
33#include "insn-attr.h"
2a2ab3f9 34#include "flags.h"
a8ffcc81 35#include "except.h"
ecbc4695 36#include "function.h"
00c79232 37#include "recog.h"
ced8dd8c 38#include "expr.h"
e78d8e51 39#include "optabs.h"
f103890b 40#include "toplev.h"
e075ae69 41#include "basic-block.h"
1526a060 42#include "ggc.h"
672a6f42
NB
43#include "target.h"
44#include "target-def.h"
2a2ab3f9 45
8dfe5673 46#ifndef CHECK_STACK_LIMIT
07933f72 47#define CHECK_STACK_LIMIT (-1)
8dfe5673
RK
48#endif
49
2ab0437e 50/* Processor costs (relative to an add) */
8b60264b 51static const
2ab0437e
JH
52struct processor_costs size_cost = { /* costs for tunning for size */
53 2, /* cost of an add instruction */
54 3, /* cost of a lea instruction */
55 2, /* variable shift costs */
56 3, /* constant shift costs */
57 3, /* cost of starting a multiply */
58 0, /* cost of multiply per each bit set */
59 3, /* cost of a divide/mod */
44cf5b6a
JH
60 3, /* cost of movsx */
61 3, /* cost of movzx */
2ab0437e
JH
62 0, /* "large" insn */
63 2, /* MOVE_RATIO */
64 2, /* cost for loading QImode using movzbl */
65 {2, 2, 2}, /* cost of loading integer registers
66 in QImode, HImode and SImode.
67 Relative to reg-reg move (2). */
68 {2, 2, 2}, /* cost of storing integer registers */
69 2, /* cost of reg,reg fld/fst */
70 {2, 2, 2}, /* cost of loading fp registers
71 in SFmode, DFmode and XFmode */
72 {2, 2, 2}, /* cost of loading integer registers */
73 3, /* cost of moving MMX register */
74 {3, 3}, /* cost of loading MMX registers
75 in SImode and DImode */
76 {3, 3}, /* cost of storing MMX registers
77 in SImode and DImode */
78 3, /* cost of moving SSE register */
79 {3, 3, 3}, /* cost of loading SSE registers
80 in SImode, DImode and TImode */
81 {3, 3, 3}, /* cost of storing SSE registers
82 in SImode, DImode and TImode */
83 3, /* MMX or SSE register to integer */
f4365627
JH
84 0, /* size of prefetch block */
85 0, /* number of parallel prefetches */
2ab0437e 86};
32b5b1aa 87/* Processor costs (relative to an add) */
8b60264b 88static const
32b5b1aa 89struct processor_costs i386_cost = { /* 386 specific costs */
e9a25f70 90 1, /* cost of an add instruction */
32b5b1aa
SC
91 1, /* cost of a lea instruction */
92 3, /* variable shift costs */
93 2, /* constant shift costs */
94 6, /* cost of starting a multiply */
95 1, /* cost of multiply per each bit set */
e075ae69 96 23, /* cost of a divide/mod */
44cf5b6a
JH
97 3, /* cost of movsx */
98 2, /* cost of movzx */
96e7ae40 99 15, /* "large" insn */
e2e52e1b 100 3, /* MOVE_RATIO */
7c6b971d 101 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
102 {2, 4, 2}, /* cost of loading integer registers
103 in QImode, HImode and SImode.
0f290768 104 Relative to reg-reg move (2). */
96e7ae40
JH
105 {2, 4, 2}, /* cost of storing integer registers */
106 2, /* cost of reg,reg fld/fst */
107 {8, 8, 8}, /* cost of loading fp registers
108 in SFmode, DFmode and XFmode */
fa79946e
JH
109 {8, 8, 8}, /* cost of loading integer registers */
110 2, /* cost of moving MMX register */
111 {4, 8}, /* cost of loading MMX registers
112 in SImode and DImode */
113 {4, 8}, /* cost of storing MMX registers
114 in SImode and DImode */
115 2, /* cost of moving SSE register */
116 {4, 8, 16}, /* cost of loading SSE registers
117 in SImode, DImode and TImode */
118 {4, 8, 16}, /* cost of storing SSE registers
119 in SImode, DImode and TImode */
120 3, /* MMX or SSE register to integer */
f4365627
JH
121 0, /* size of prefetch block */
122 0, /* number of parallel prefetches */
32b5b1aa
SC
123};
124
8b60264b 125static const
32b5b1aa
SC
126struct processor_costs i486_cost = { /* 486 specific costs */
127 1, /* cost of an add instruction */
128 1, /* cost of a lea instruction */
129 3, /* variable shift costs */
130 2, /* constant shift costs */
131 12, /* cost of starting a multiply */
132 1, /* cost of multiply per each bit set */
e075ae69 133 40, /* cost of a divide/mod */
44cf5b6a
JH
134 3, /* cost of movsx */
135 2, /* cost of movzx */
96e7ae40 136 15, /* "large" insn */
e2e52e1b 137 3, /* MOVE_RATIO */
7c6b971d 138 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
139 {2, 4, 2}, /* cost of loading integer registers
140 in QImode, HImode and SImode.
0f290768 141 Relative to reg-reg move (2). */
96e7ae40
JH
142 {2, 4, 2}, /* cost of storing integer registers */
143 2, /* cost of reg,reg fld/fst */
144 {8, 8, 8}, /* cost of loading fp registers
145 in SFmode, DFmode and XFmode */
fa79946e
JH
146 {8, 8, 8}, /* cost of loading integer registers */
147 2, /* cost of moving MMX register */
148 {4, 8}, /* cost of loading MMX registers
149 in SImode and DImode */
150 {4, 8}, /* cost of storing MMX registers
151 in SImode and DImode */
152 2, /* cost of moving SSE register */
153 {4, 8, 16}, /* cost of loading SSE registers
154 in SImode, DImode and TImode */
155 {4, 8, 16}, /* cost of storing SSE registers
156 in SImode, DImode and TImode */
f4365627
JH
157 3, /* MMX or SSE register to integer */
158 0, /* size of prefetch block */
159 0, /* number of parallel prefetches */
32b5b1aa
SC
160};
161
8b60264b 162static const
e5cb57e8 163struct processor_costs pentium_cost = {
32b5b1aa
SC
164 1, /* cost of an add instruction */
165 1, /* cost of a lea instruction */
856b07a1 166 4, /* variable shift costs */
e5cb57e8 167 1, /* constant shift costs */
856b07a1
SC
168 11, /* cost of starting a multiply */
169 0, /* cost of multiply per each bit set */
e075ae69 170 25, /* cost of a divide/mod */
44cf5b6a
JH
171 3, /* cost of movsx */
172 2, /* cost of movzx */
96e7ae40 173 8, /* "large" insn */
e2e52e1b 174 6, /* MOVE_RATIO */
7c6b971d 175 6, /* cost for loading QImode using movzbl */
96e7ae40
JH
176 {2, 4, 2}, /* cost of loading integer registers
177 in QImode, HImode and SImode.
0f290768 178 Relative to reg-reg move (2). */
96e7ae40
JH
179 {2, 4, 2}, /* cost of storing integer registers */
180 2, /* cost of reg,reg fld/fst */
181 {2, 2, 6}, /* cost of loading fp registers
182 in SFmode, DFmode and XFmode */
fa79946e
JH
183 {4, 4, 6}, /* cost of loading integer registers */
184 8, /* cost of moving MMX register */
185 {8, 8}, /* cost of loading MMX registers
186 in SImode and DImode */
187 {8, 8}, /* cost of storing MMX registers
188 in SImode and DImode */
189 2, /* cost of moving SSE register */
190 {4, 8, 16}, /* cost of loading SSE registers
191 in SImode, DImode and TImode */
192 {4, 8, 16}, /* cost of storing SSE registers
193 in SImode, DImode and TImode */
f4365627
JH
194 3, /* MMX or SSE register to integer */
195 0, /* size of prefetch block */
196 0, /* number of parallel prefetches */
32b5b1aa
SC
197};
198
8b60264b 199static const
856b07a1
SC
200struct processor_costs pentiumpro_cost = {
201 1, /* cost of an add instruction */
202 1, /* cost of a lea instruction */
e075ae69 203 1, /* variable shift costs */
856b07a1 204 1, /* constant shift costs */
369e59b1 205 4, /* cost of starting a multiply */
856b07a1 206 0, /* cost of multiply per each bit set */
e075ae69 207 17, /* cost of a divide/mod */
44cf5b6a
JH
208 1, /* cost of movsx */
209 1, /* cost of movzx */
96e7ae40 210 8, /* "large" insn */
e2e52e1b 211 6, /* MOVE_RATIO */
7c6b971d 212 2, /* cost for loading QImode using movzbl */
96e7ae40
JH
213 {4, 4, 4}, /* cost of loading integer registers
214 in QImode, HImode and SImode.
0f290768 215 Relative to reg-reg move (2). */
96e7ae40
JH
216 {2, 2, 2}, /* cost of storing integer registers */
217 2, /* cost of reg,reg fld/fst */
218 {2, 2, 6}, /* cost of loading fp registers
219 in SFmode, DFmode and XFmode */
fa79946e
JH
220 {4, 4, 6}, /* cost of loading integer registers */
221 2, /* cost of moving MMX register */
222 {2, 2}, /* cost of loading MMX registers
223 in SImode and DImode */
224 {2, 2}, /* cost of storing MMX registers
225 in SImode and DImode */
226 2, /* cost of moving SSE register */
227 {2, 2, 8}, /* cost of loading SSE registers
228 in SImode, DImode and TImode */
229 {2, 2, 8}, /* cost of storing SSE registers
230 in SImode, DImode and TImode */
f4365627
JH
231 3, /* MMX or SSE register to integer */
232 32, /* size of prefetch block */
233 6, /* number of parallel prefetches */
856b07a1
SC
234};
235
8b60264b 236static const
a269a03c
JC
237struct processor_costs k6_cost = {
238 1, /* cost of an add instruction */
e075ae69 239 2, /* cost of a lea instruction */
a269a03c
JC
240 1, /* variable shift costs */
241 1, /* constant shift costs */
73fe76e4 242 3, /* cost of starting a multiply */
a269a03c 243 0, /* cost of multiply per each bit set */
e075ae69 244 18, /* cost of a divide/mod */
44cf5b6a
JH
245 2, /* cost of movsx */
246 2, /* cost of movzx */
96e7ae40 247 8, /* "large" insn */
e2e52e1b 248 4, /* MOVE_RATIO */
7c6b971d 249 3, /* cost for loading QImode using movzbl */
96e7ae40
JH
250 {4, 5, 4}, /* cost of loading integer registers
251 in QImode, HImode and SImode.
0f290768 252 Relative to reg-reg move (2). */
96e7ae40
JH
253 {2, 3, 2}, /* cost of storing integer registers */
254 4, /* cost of reg,reg fld/fst */
255 {6, 6, 6}, /* cost of loading fp registers
256 in SFmode, DFmode and XFmode */
fa79946e
JH
257 {4, 4, 4}, /* cost of loading integer registers */
258 2, /* cost of moving MMX register */
259 {2, 2}, /* cost of loading MMX registers
260 in SImode and DImode */
261 {2, 2}, /* cost of storing MMX registers
262 in SImode and DImode */
263 2, /* cost of moving SSE register */
264 {2, 2, 8}, /* cost of loading SSE registers
265 in SImode, DImode and TImode */
266 {2, 2, 8}, /* cost of storing SSE registers
267 in SImode, DImode and TImode */
f4365627
JH
268 6, /* MMX or SSE register to integer */
269 32, /* size of prefetch block */
270 1, /* number of parallel prefetches */
a269a03c
JC
271};
272
8b60264b 273static const
309ada50
JH
274struct processor_costs athlon_cost = {
275 1, /* cost of an add instruction */
0b5107cf 276 2, /* cost of a lea instruction */
309ada50
JH
277 1, /* variable shift costs */
278 1, /* constant shift costs */
279 5, /* cost of starting a multiply */
280 0, /* cost of multiply per each bit set */
0b5107cf 281 42, /* cost of a divide/mod */
44cf5b6a
JH
282 1, /* cost of movsx */
283 1, /* cost of movzx */
309ada50 284 8, /* "large" insn */
e2e52e1b 285 9, /* MOVE_RATIO */
309ada50
JH
286 4, /* cost for loading QImode using movzbl */
287 {4, 5, 4}, /* cost of loading integer registers
288 in QImode, HImode and SImode.
0f290768 289 Relative to reg-reg move (2). */
309ada50
JH
290 {2, 3, 2}, /* cost of storing integer registers */
291 4, /* cost of reg,reg fld/fst */
0b5107cf 292 {6, 6, 20}, /* cost of loading fp registers
309ada50 293 in SFmode, DFmode and XFmode */
fa79946e
JH
294 {4, 4, 16}, /* cost of loading integer registers */
295 2, /* cost of moving MMX register */
296 {2, 2}, /* cost of loading MMX registers
297 in SImode and DImode */
298 {2, 2}, /* cost of storing MMX registers
299 in SImode and DImode */
300 2, /* cost of moving SSE register */
301 {2, 2, 8}, /* cost of loading SSE registers
302 in SImode, DImode and TImode */
303 {2, 2, 8}, /* cost of storing SSE registers
304 in SImode, DImode and TImode */
f4365627
JH
305 6, /* MMX or SSE register to integer */
306 64, /* size of prefetch block */
307 6, /* number of parallel prefetches */
309ada50
JH
308};
309
8b60264b 310static const
b4e89e2d
JH
311struct processor_costs pentium4_cost = {
312 1, /* cost of an add instruction */
313 1, /* cost of a lea instruction */
314 8, /* variable shift costs */
315 8, /* constant shift costs */
316 30, /* cost of starting a multiply */
317 0, /* cost of multiply per each bit set */
318 112, /* cost of a divide/mod */
44cf5b6a
JH
319 1, /* cost of movsx */
320 1, /* cost of movzx */
b4e89e2d
JH
321 16, /* "large" insn */
322 6, /* MOVE_RATIO */
323 2, /* cost for loading QImode using movzbl */
324 {4, 5, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 3, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of loading integer registers */
332 2, /* cost of moving MMX register */
333 {2, 2}, /* cost of loading MMX registers
334 in SImode and DImode */
335 {2, 2}, /* cost of storing MMX registers
336 in SImode and DImode */
337 12, /* cost of moving SSE register */
338 {12, 12, 12}, /* cost of loading SSE registers
339 in SImode, DImode and TImode */
340 {2, 2, 8}, /* cost of storing SSE registers
341 in SImode, DImode and TImode */
342 10, /* MMX or SSE register to integer */
f4365627
JH
343 64, /* size of prefetch block */
344 6, /* number of parallel prefetches */
b4e89e2d
JH
345};
346
8b60264b 347const struct processor_costs *ix86_cost = &pentium_cost;
32b5b1aa 348
a269a03c
JC
349/* Processor feature/optimization bitmasks. */
350#define m_386 (1<<PROCESSOR_I386)
351#define m_486 (1<<PROCESSOR_I486)
352#define m_PENT (1<<PROCESSOR_PENTIUM)
353#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
354#define m_K6 (1<<PROCESSOR_K6)
309ada50 355#define m_ATHLON (1<<PROCESSOR_ATHLON)
b4e89e2d 356#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
a269a03c 357
309ada50 358const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
b4e89e2d 359const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
a269a03c 360const int x86_zero_extend_with_and = m_486 | m_PENT;
b4e89e2d 361const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
e075ae69 362const int x86_double_with_add = ~m_386;
a269a03c 363const int x86_use_bit_test = m_386;
e2e52e1b 364const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
b4e89e2d 365const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
47f339cf 366const int x86_3dnow_a = m_ATHLON;
b4e89e2d 367const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
ef6257cd 368const int x86_branch_hints = m_PENT4;
b4e89e2d 369const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
e075ae69
RH
370const int x86_partial_reg_stall = m_PPRO;
371const int x86_use_loop = m_K6;
309ada50 372const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
e075ae69
RH
373const int x86_use_mov0 = m_K6;
374const int x86_use_cltd = ~(m_PENT | m_K6);
375const int x86_read_modify_write = ~m_PENT;
376const int x86_read_modify = ~(m_PENT | m_PPRO);
377const int x86_split_long_moves = m_PPRO;
e9e80858 378const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
b4e89e2d 379const int x86_single_stringop = m_386 | m_PENT4;
d9f32422
JH
380const int x86_qimode_math = ~(0);
381const int x86_promote_qi_regs = 0;
382const int x86_himode_math = ~(m_PPRO);
383const int x86_promote_hi_regs = m_PPRO;
b4e89e2d
JH
384const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
385const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
386const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
387const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
388const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4);
389const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
390const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
c6036a37
JH
391const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
392const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
393const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
b972dd02 394const int x86_decompose_lea = m_PENT4;
a269a03c 395
6ab16dd9
JH
396/* In case the avreage insn count for single function invocation is
397 lower than this constant, emit fast (but longer) prologue and
398 epilogue code. */
399#define FAST_PROLOGUE_INSN_COUNT 30
400/* Set by prologue expander and used by epilogue expander to determine
401 the style used. */
402static int use_fast_prologue_epilogue;
403
07933f72 404#define AT_BP(MODE) (gen_rtx_MEM ((MODE), hard_frame_pointer_rtx))
2a2ab3f9 405
83182544
KG
406static const char *const hi_reg_name[] = HI_REGISTER_NAMES; /* names for 16 bit regs */
407static const char *const qi_reg_name[] = QI_REGISTER_NAMES; /* names for 8 bit regs (low) */
408static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; /* names for 8 bit regs (high) */
4c0d89b5
RS
409
410/* Array of the smallest class containing reg number REGNO, indexed by
0f290768 411 REGNO. Used by REGNO_REG_CLASS in i386.h. */
4c0d89b5 412
e075ae69 413enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
4c0d89b5
RS
414{
415 /* ax, dx, cx, bx */
ab408a86 416 AREG, DREG, CREG, BREG,
4c0d89b5 417 /* si, di, bp, sp */
e075ae69 418 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
4c0d89b5
RS
419 /* FP registers */
420 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
79325812 421 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
4c0d89b5 422 /* arg pointer */
83774849 423 NON_Q_REGS,
564d80f4 424 /* flags, fpsr, dirflag, frame */
a7180f70
BS
425 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
426 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
427 SSE_REGS, SSE_REGS,
428 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
3d117b30
JH
429 MMX_REGS, MMX_REGS,
430 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
431 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
432 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
433 SSE_REGS, SSE_REGS,
4c0d89b5 434};
c572e5ba 435
3d117b30 436/* The "default" register map used in 32bit mode. */
83774849 437
0f290768 438int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
439{
440 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
441 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
3d117b30 442 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
443 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
444 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
3d117b30
JH
445 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
446 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
447};
448
07933f72 449static int const x86_64_int_parameter_registers[6] = {5 /*RDI*/, 4 /*RSI*/,
53c17031
JH
450 1 /*RDX*/, 2 /*RCX*/,
451 FIRST_REX_INT_REG /*R8 */,
452 FIRST_REX_INT_REG + 1 /*R9 */};
07933f72 453static int const x86_64_int_return_registers[4] = {0 /*RAX*/, 1 /*RDI*/, 5, 4};
53c17031 454
0f7fa3d0
JH
455/* The "default" register map used in 64bit mode. */
456int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
457{
458 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
459 33, 34, 35, 36, 37, 38, 39, 40 /* fp regs */
460 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
461 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
462 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
463 8,9,10,11,12,13,14,15, /* extended integer registers */
464 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
465};
466
83774849
RH
467/* Define the register numbers to be used in Dwarf debugging information.
468 The SVR4 reference port C compiler uses the following register numbers
469 in its Dwarf output code:
470 0 for %eax (gcc regno = 0)
471 1 for %ecx (gcc regno = 2)
472 2 for %edx (gcc regno = 1)
473 3 for %ebx (gcc regno = 3)
474 4 for %esp (gcc regno = 7)
475 5 for %ebp (gcc regno = 6)
476 6 for %esi (gcc regno = 4)
477 7 for %edi (gcc regno = 5)
478 The following three DWARF register numbers are never generated by
479 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
480 believes these numbers have these meanings.
481 8 for %eip (no gcc equivalent)
482 9 for %eflags (gcc regno = 17)
483 10 for %trapno (no gcc equivalent)
484 It is not at all clear how we should number the FP stack registers
485 for the x86 architecture. If the version of SDB on x86/svr4 were
486 a bit less brain dead with respect to floating-point then we would
487 have a precedent to follow with respect to DWARF register numbers
488 for x86 FP registers, but the SDB on x86/svr4 is so completely
489 broken with respect to FP registers that it is hardly worth thinking
490 of it as something to strive for compatibility with.
491 The version of x86/svr4 SDB I have at the moment does (partially)
492 seem to believe that DWARF register number 11 is associated with
493 the x86 register %st(0), but that's about all. Higher DWARF
494 register numbers don't seem to be associated with anything in
495 particular, and even for DWARF regno 11, SDB only seems to under-
496 stand that it should say that a variable lives in %st(0) (when
497 asked via an `=' command) if we said it was in DWARF regno 11,
498 but SDB still prints garbage when asked for the value of the
499 variable in question (via a `/' command).
500 (Also note that the labels SDB prints for various FP stack regs
501 when doing an `x' command are all wrong.)
502 Note that these problems generally don't affect the native SVR4
503 C compiler because it doesn't allow the use of -O with -g and
504 because when it is *not* optimizing, it allocates a memory
505 location for each floating-point variable, and the memory
506 location is what gets described in the DWARF AT_location
507 attribute for the variable in question.
508 Regardless of the severe mental illness of the x86/svr4 SDB, we
509 do something sensible here and we use the following DWARF
510 register numbers. Note that these are all stack-top-relative
511 numbers.
512 11 for %st(0) (gcc regno = 8)
513 12 for %st(1) (gcc regno = 9)
514 13 for %st(2) (gcc regno = 10)
515 14 for %st(3) (gcc regno = 11)
516 15 for %st(4) (gcc regno = 12)
517 16 for %st(5) (gcc regno = 13)
518 17 for %st(6) (gcc regno = 14)
519 18 for %st(7) (gcc regno = 15)
520*/
0f290768 521int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
522{
523 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
524 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
3f3f2124 525 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
526 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
527 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
3f3f2124
JH
528 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
529 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
83774849
RH
530};
531
c572e5ba
JVA
532/* Test and compare insns in i386.md store the information needed to
533 generate branch and scc insns here. */
534
07933f72
GS
535rtx ix86_compare_op0 = NULL_RTX;
536rtx ix86_compare_op1 = NULL_RTX;
f5316dfe 537
7a2e09f4 538#define MAX_386_STACK_LOCALS 3
8362f420
JH
539/* Size of the register save area. */
540#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
36edd3cc
BS
541
542/* Define the structure for the machine field in struct function. */
543struct machine_function
544{
545 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
8362f420 546 int save_varrargs_registers;
6fca22eb 547 int accesses_prev_frame;
36edd3cc
BS
548};
549
01d939e8 550#define ix86_stack_locals (cfun->machine->stack_locals)
8362f420 551#define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
36edd3cc 552
4dd2ac2c
JH
553/* Structure describing stack frame layout.
554 Stack grows downward:
555
556 [arguments]
557 <- ARG_POINTER
558 saved pc
559
560 saved frame pointer if frame_pointer_needed
561 <- HARD_FRAME_POINTER
562 [saved regs]
563
564 [padding1] \
565 )
566 [va_arg registers] (
567 > to_allocate <- FRAME_POINTER
568 [frame] (
569 )
570 [padding2] /
571 */
572struct ix86_frame
573{
574 int nregs;
575 int padding1;
8362f420 576 int va_arg_size;
4dd2ac2c
JH
577 HOST_WIDE_INT frame;
578 int padding2;
579 int outgoing_arguments_size;
8362f420 580 int red_zone_size;
4dd2ac2c
JH
581
582 HOST_WIDE_INT to_allocate;
583 /* The offsets relative to ARG_POINTER. */
584 HOST_WIDE_INT frame_pointer_offset;
585 HOST_WIDE_INT hard_frame_pointer_offset;
586 HOST_WIDE_INT stack_pointer_offset;
587};
588
c93e80a5
JH
589/* Used to enable/disable debugging features. */
590const char *ix86_debug_arg_string, *ix86_debug_addr_string;
6189a572
JH
591/* Code model option as passed by user. */
592const char *ix86_cmodel_string;
593/* Parsed value. */
594enum cmodel ix86_cmodel;
80f33d06
GS
595/* Asm dialect. */
596const char *ix86_asm_string;
597enum asm_dialect ix86_asm_dialect = ASM_ATT;
6189a572 598
c8c5cb99 599/* which cpu are we scheduling for */
e42ea7f9 600enum processor_type ix86_cpu;
c8c5cb99 601
965f5423
JH
602/* which unit we are generating floating point math for */
603enum fpmath_unit ix86_fpmath;
604
c8c5cb99 605/* which instruction set architecture to use. */
c942177e 606int ix86_arch;
c8c5cb99
SC
607
608/* Strings to hold which cpu and instruction set architecture to use. */
9c23aa47
ZW
609const char *ix86_cpu_string; /* for -mcpu=<xxx> */
610const char *ix86_arch_string; /* for -march=<xxx> */
965f5423 611const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
c8c5cb99 612
0f290768 613/* # of registers to use to pass arguments. */
e075ae69 614const char *ix86_regparm_string;
e9a25f70 615
f4365627
JH
616/* true if sse prefetch instruction is not NOOP. */
617int x86_prefetch_sse;
618
e075ae69
RH
619/* ix86_regparm_string as a number */
620int ix86_regparm;
e9a25f70
JL
621
622/* Alignment to use for loops and jumps: */
623
0f290768 624/* Power of two alignment for loops. */
e075ae69 625const char *ix86_align_loops_string;
e9a25f70 626
0f290768 627/* Power of two alignment for non-loop jumps. */
e075ae69 628const char *ix86_align_jumps_string;
e9a25f70 629
3af4bd89 630/* Power of two alignment for stack boundary in bytes. */
e075ae69 631const char *ix86_preferred_stack_boundary_string;
3af4bd89
JH
632
633/* Preferred alignment for stack boundary in bits. */
e075ae69 634int ix86_preferred_stack_boundary;
3af4bd89 635
e9a25f70 636/* Values 1-5: see jump.c */
e075ae69
RH
637int ix86_branch_cost;
638const char *ix86_branch_cost_string;
e9a25f70 639
0f290768 640/* Power of two alignment for functions. */
e075ae69 641const char *ix86_align_funcs_string;
623fe810
RH
642
643/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
644static char internal_label_prefix[16];
645static int internal_label_prefix_len;
e075ae69 646\f
623fe810 647static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
f6da8bc3
KG
648static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
649static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
e075ae69 650 int, int, FILE *));
f6da8bc3 651static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
3a3677ff
RH
652static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
653 rtx *, rtx *));
f6da8bc3
KG
654static rtx gen_push PARAMS ((rtx));
655static int memory_address_length PARAMS ((rtx addr));
656static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
657static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
658static int ix86_safe_length PARAMS ((rtx));
659static enum attr_memory ix86_safe_memory PARAMS ((rtx));
660static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
661static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
662static void ix86_dump_ppro_packet PARAMS ((FILE *));
663static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
664static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
e075ae69 665 rtx));
f6da8bc3
KG
666static void ix86_init_machine_status PARAMS ((struct function *));
667static void ix86_mark_machine_status PARAMS ((struct function *));
37b15744 668static void ix86_free_machine_status PARAMS ((struct function *));
2b589241 669static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
f6da8bc3 670static int ix86_safe_length_prefix PARAMS ((rtx));
0903fcab
JH
671static int ix86_nsaved_regs PARAMS((void));
672static void ix86_emit_save_regs PARAMS((void));
c6036a37 673static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
37a58036 674static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
0e4970d7 675static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
c6991660
KG
676static void ix86_sched_reorder_pentium PARAMS((rtx *, rtx *));
677static void ix86_sched_reorder_ppro PARAMS((rtx *, rtx *));
55efb413 678static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
0945b39d 679static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
0945b39d
JH
680static rtx ix86_expand_aligntest PARAMS ((rtx, int));
681static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
c237e94a
ZW
682static int ix86_issue_rate PARAMS ((void));
683static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
684static void ix86_sched_init PARAMS ((FILE *, int, int));
685static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
686static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
e37af218 687static void ix86_init_mmx_sse_builtins PARAMS ((void));
e075ae69
RH
688
689struct ix86_address
690{
691 rtx base, index, disp;
692 HOST_WIDE_INT scale;
693};
b08de47e 694
e075ae69 695static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
bd793c65
BS
696
697struct builtin_description;
8b60264b
KG
698static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
699 tree, rtx));
700static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
701 tree, rtx));
bd793c65
BS
702static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
703static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
704static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
e37af218
RH
705static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code,
706 tree, rtx));
707static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
bd793c65 708static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
c0c102a9
JH
709static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
710static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
711 enum rtx_code *,
712 enum rtx_code *,
713 enum rtx_code *));
9e7adcb3
JH
714static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
715 rtx *, rtx *));
716static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
717static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
718static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
719static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
37a58036 720static int ix86_save_reg PARAMS ((int, int));
4dd2ac2c 721static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
8d8e52be 722static int ix86_comp_type_attributes PARAMS ((tree, tree));
91d231cb
JM
723const struct attribute_spec ix86_attribute_table[];
724static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
725static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
7c262518 726
2cc07db4
RH
727#ifdef DO_GLOBAL_CTORS_BODY
728static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
729#endif
e56feed6 730
53c17031
JH
731/* Register class used for passing given 64bit part of the argument.
732 These represent classes as documented by the PS ABI, with the exception
733 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
734 use SF or DFmode move instead of DImode to avoid reformating penalties.
735
736 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
737 whenever possible (upper half does contain padding).
738 */
739enum x86_64_reg_class
740 {
741 X86_64_NO_CLASS,
742 X86_64_INTEGER_CLASS,
743 X86_64_INTEGERSI_CLASS,
744 X86_64_SSE_CLASS,
745 X86_64_SSESF_CLASS,
746 X86_64_SSEDF_CLASS,
747 X86_64_SSEUP_CLASS,
748 X86_64_X87_CLASS,
749 X86_64_X87UP_CLASS,
750 X86_64_MEMORY_CLASS
751 };
0b5826ac 752static const char * const x86_64_reg_class_name[] =
53c17031
JH
753 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
754
755#define MAX_CLASSES 4
756static int classify_argument PARAMS ((enum machine_mode, tree,
757 enum x86_64_reg_class [MAX_CLASSES],
758 int));
759static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
760 int *));
761static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
07933f72 762 const int *, int));
53c17031
JH
763static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
764 enum x86_64_reg_class));
672a6f42
NB
765\f
766/* Initialize the GCC target structure. */
91d231cb
JM
767#undef TARGET_ATTRIBUTE_TABLE
768#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
672a6f42 769#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
f5f4be42
NB
770# undef TARGET_MERGE_DECL_ATTRIBUTES
771# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
672a6f42
NB
772#endif
773
8d8e52be
JM
774#undef TARGET_COMP_TYPE_ATTRIBUTES
775#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
776
f6155fda
SS
777#undef TARGET_INIT_BUILTINS
778#define TARGET_INIT_BUILTINS ix86_init_builtins
779
780#undef TARGET_EXPAND_BUILTIN
781#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
782
08c148a8
NB
783#if defined (OSF_OS) || defined (TARGET_OSF1ELF)
784 static void ix86_osf_output_function_prologue PARAMS ((FILE *,
785 HOST_WIDE_INT));
786# undef TARGET_ASM_FUNCTION_PROLOGUE
787# define TARGET_ASM_FUNCTION_PROLOGUE ix86_osf_output_function_prologue
788#endif
789
17b53c33
NB
790#undef TARGET_ASM_OPEN_PAREN
791#define TARGET_ASM_OPEN_PAREN ""
792#undef TARGET_ASM_CLOSE_PAREN
793#define TARGET_ASM_CLOSE_PAREN ""
794
301d03af
RS
795#undef TARGET_ASM_ALIGNED_HI_OP
796#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
797#undef TARGET_ASM_ALIGNED_SI_OP
798#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
799#ifdef ASM_QUAD
800#undef TARGET_ASM_ALIGNED_DI_OP
801#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
802#endif
803
804#undef TARGET_ASM_UNALIGNED_HI_OP
805#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
806#undef TARGET_ASM_UNALIGNED_SI_OP
807#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
808#undef TARGET_ASM_UNALIGNED_DI_OP
809#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
810
c237e94a
ZW
811#undef TARGET_SCHED_ADJUST_COST
812#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
813#undef TARGET_SCHED_ISSUE_RATE
814#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
815#undef TARGET_SCHED_VARIABLE_ISSUE
816#define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
817#undef TARGET_SCHED_INIT
818#define TARGET_SCHED_INIT ix86_sched_init
819#undef TARGET_SCHED_REORDER
820#define TARGET_SCHED_REORDER ix86_sched_reorder
821
f6897b10 822struct gcc_target targetm = TARGET_INITIALIZER;
e075ae69 823\f
f5316dfe
MM
824/* Sometimes certain combinations of command options do not make
825 sense on a particular target machine. You can define a macro
826 `OVERRIDE_OPTIONS' to take account of this. This macro, if
827 defined, is executed once just after all the command options have
828 been parsed.
829
830 Don't use this macro to turn on various extra optimizations for
831 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
832
833void
834override_options ()
835{
400500c4 836 int i;
e075ae69
RH
837 /* Comes from final.c -- no real reason to change it. */
838#define MAX_CODE_ALIGN 16
f5316dfe 839
c8c5cb99
SC
840 static struct ptt
841 {
8b60264b
KG
842 const struct processor_costs *cost; /* Processor costs */
843 const int target_enable; /* Target flags to enable. */
844 const int target_disable; /* Target flags to disable. */
845 const int align_loop; /* Default alignments. */
2cca7283 846 const int align_loop_max_skip;
8b60264b 847 const int align_jump;
2cca7283 848 const int align_jump_max_skip;
8b60264b
KG
849 const int align_func;
850 const int branch_cost;
e075ae69 851 }
0f290768 852 const processor_target_table[PROCESSOR_max] =
e075ae69 853 {
2cca7283
JH
854 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
855 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
856 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
857 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
858 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
859 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
860 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
e075ae69
RH
861 };
862
f4365627 863 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
e075ae69
RH
864 static struct pta
865 {
8b60264b
KG
866 const char *const name; /* processor name or nickname. */
867 const enum processor_type processor;
0dd0e980
JH
868 const enum pta_flags
869 {
870 PTA_SSE = 1,
871 PTA_SSE2 = 2,
872 PTA_MMX = 4,
f4365627 873 PTA_PREFETCH_SSE = 8,
0dd0e980
JH
874 PTA_3DNOW = 16,
875 PTA_3DNOW_A = 64
876 } flags;
e075ae69 877 }
0f290768 878 const processor_alias_table[] =
e075ae69 879 {
0dd0e980
JH
880 {"i386", PROCESSOR_I386, 0},
881 {"i486", PROCESSOR_I486, 0},
882 {"i586", PROCESSOR_PENTIUM, 0},
883 {"pentium", PROCESSOR_PENTIUM, 0},
884 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
885 {"i686", PROCESSOR_PENTIUMPRO, 0},
886 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
887 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
f4365627 888 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
0dd0e980 889 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
f4365627 890 PTA_MMX | PTA_PREFETCH_SSE},
0dd0e980
JH
891 {"k6", PROCESSOR_K6, PTA_MMX},
892 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
893 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
f4365627 894 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 895 | PTA_3DNOW_A},
f4365627 896 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
0dd0e980 897 | PTA_3DNOW | PTA_3DNOW_A},
f4365627 898 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 899 | PTA_3DNOW_A | PTA_SSE},
f4365627 900 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 901 | PTA_3DNOW_A | PTA_SSE},
f4365627 902 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 903 | PTA_3DNOW_A | PTA_SSE},
3af4bd89 904 };
c8c5cb99 905
0f290768 906 int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta);
c8c5cb99 907
f5316dfe
MM
908#ifdef SUBTARGET_OVERRIDE_OPTIONS
909 SUBTARGET_OVERRIDE_OPTIONS;
910#endif
911
f4365627
JH
912 if (!ix86_cpu_string && ix86_arch_string)
913 ix86_cpu_string = ix86_arch_string;
914 if (!ix86_cpu_string)
915 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
916 if (!ix86_arch_string)
917 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
e075ae69 918
6189a572
JH
919 if (ix86_cmodel_string != 0)
920 {
921 if (!strcmp (ix86_cmodel_string, "small"))
922 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
923 else if (flag_pic)
c725bd79 924 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
6189a572
JH
925 else if (!strcmp (ix86_cmodel_string, "32"))
926 ix86_cmodel = CM_32;
927 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
928 ix86_cmodel = CM_KERNEL;
929 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
930 ix86_cmodel = CM_MEDIUM;
931 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
932 ix86_cmodel = CM_LARGE;
933 else
934 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
935 }
936 else
937 {
938 ix86_cmodel = CM_32;
939 if (TARGET_64BIT)
940 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
941 }
c93e80a5
JH
942 if (ix86_asm_string != 0)
943 {
944 if (!strcmp (ix86_asm_string, "intel"))
945 ix86_asm_dialect = ASM_INTEL;
946 else if (!strcmp (ix86_asm_string, "att"))
947 ix86_asm_dialect = ASM_ATT;
948 else
949 error ("bad value (%s) for -masm= switch", ix86_asm_string);
950 }
6189a572 951 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
c725bd79 952 error ("code model `%s' not supported in the %s bit mode",
6189a572
JH
953 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
954 if (ix86_cmodel == CM_LARGE)
c725bd79 955 sorry ("code model `large' not supported yet");
0c2dc519 956 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
c725bd79 957 sorry ("%i-bit mode not compiled in",
0c2dc519 958 (target_flags & MASK_64BIT) ? 64 : 32);
6189a572 959
f4365627
JH
960 for (i = 0; i < pta_size; i++)
961 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
962 {
963 ix86_arch = processor_alias_table[i].processor;
964 /* Default cpu tuning to the architecture. */
965 ix86_cpu = ix86_arch;
966 if (processor_alias_table[i].flags & PTA_MMX
967 && !(target_flags & MASK_MMX_SET))
968 target_flags |= MASK_MMX;
969 if (processor_alias_table[i].flags & PTA_3DNOW
970 && !(target_flags & MASK_3DNOW_SET))
971 target_flags |= MASK_3DNOW;
972 if (processor_alias_table[i].flags & PTA_3DNOW_A
973 && !(target_flags & MASK_3DNOW_A_SET))
974 target_flags |= MASK_3DNOW_A;
975 if (processor_alias_table[i].flags & PTA_SSE
976 && !(target_flags & MASK_SSE_SET))
977 target_flags |= MASK_SSE;
978 if (processor_alias_table[i].flags & PTA_SSE2
979 && !(target_flags & MASK_SSE2_SET))
980 target_flags |= MASK_SSE2;
981 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
982 x86_prefetch_sse = true;
983 break;
984 }
400500c4 985
f4365627
JH
986 if (i == pta_size)
987 error ("bad value (%s) for -march= switch", ix86_arch_string);
e075ae69 988
f4365627
JH
989 for (i = 0; i < pta_size; i++)
990 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
991 {
992 ix86_cpu = processor_alias_table[i].processor;
993 break;
994 }
995 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
996 x86_prefetch_sse = true;
997 if (i == pta_size)
998 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
e075ae69 999
2ab0437e
JH
1000 if (optimize_size)
1001 ix86_cost = &size_cost;
1002 else
1003 ix86_cost = processor_target_table[ix86_cpu].cost;
e075ae69
RH
1004 target_flags |= processor_target_table[ix86_cpu].target_enable;
1005 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1006
36edd3cc
BS
1007 /* Arrange to set up i386_stack_locals for all functions. */
1008 init_machine_status = ix86_init_machine_status;
1526a060 1009 mark_machine_status = ix86_mark_machine_status;
37b15744 1010 free_machine_status = ix86_free_machine_status;
36edd3cc 1011
0f290768 1012 /* Validate -mregparm= value. */
e075ae69 1013 if (ix86_regparm_string)
b08de47e 1014 {
400500c4
RK
1015 i = atoi (ix86_regparm_string);
1016 if (i < 0 || i > REGPARM_MAX)
1017 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1018 else
1019 ix86_regparm = i;
b08de47e 1020 }
0d7d98ee
JH
1021 else
1022 if (TARGET_64BIT)
1023 ix86_regparm = REGPARM_MAX;
b08de47e 1024
3e18fdf6 1025 /* If the user has provided any of the -malign-* options,
a4f31c00 1026 warn and use that value only if -falign-* is not set.
3e18fdf6 1027 Remove this code in GCC 3.2 or later. */
e075ae69 1028 if (ix86_align_loops_string)
b08de47e 1029 {
3e18fdf6
GK
1030 warning ("-malign-loops is obsolete, use -falign-loops");
1031 if (align_loops == 0)
1032 {
1033 i = atoi (ix86_align_loops_string);
1034 if (i < 0 || i > MAX_CODE_ALIGN)
1035 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1036 else
1037 align_loops = 1 << i;
1038 }
b08de47e 1039 }
3af4bd89 1040
e075ae69 1041 if (ix86_align_jumps_string)
b08de47e 1042 {
3e18fdf6
GK
1043 warning ("-malign-jumps is obsolete, use -falign-jumps");
1044 if (align_jumps == 0)
1045 {
1046 i = atoi (ix86_align_jumps_string);
1047 if (i < 0 || i > MAX_CODE_ALIGN)
1048 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1049 else
1050 align_jumps = 1 << i;
1051 }
b08de47e 1052 }
b08de47e 1053
e075ae69 1054 if (ix86_align_funcs_string)
b08de47e 1055 {
3e18fdf6
GK
1056 warning ("-malign-functions is obsolete, use -falign-functions");
1057 if (align_functions == 0)
1058 {
1059 i = atoi (ix86_align_funcs_string);
1060 if (i < 0 || i > MAX_CODE_ALIGN)
1061 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1062 else
1063 align_functions = 1 << i;
1064 }
b08de47e 1065 }
3af4bd89 1066
3e18fdf6 1067 /* Default align_* from the processor table. */
3e18fdf6 1068 if (align_loops == 0)
2cca7283
JH
1069 {
1070 align_loops = processor_target_table[ix86_cpu].align_loop;
1071 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1072 }
3e18fdf6 1073 if (align_jumps == 0)
2cca7283
JH
1074 {
1075 align_jumps = processor_target_table[ix86_cpu].align_jump;
1076 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1077 }
3e18fdf6 1078 if (align_functions == 0)
2cca7283
JH
1079 {
1080 align_functions = processor_target_table[ix86_cpu].align_func;
1081 }
3e18fdf6 1082
e4c0478d 1083 /* Validate -mpreferred-stack-boundary= value, or provide default.
fbb83b43
AO
1084 The default of 128 bits is for Pentium III's SSE __m128, but we
1085 don't want additional code to keep the stack aligned when
1086 optimizing for code size. */
1087 ix86_preferred_stack_boundary = (optimize_size
1088 ? TARGET_64BIT ? 64 : 32
1089 : 128);
e075ae69 1090 if (ix86_preferred_stack_boundary_string)
3af4bd89 1091 {
400500c4 1092 i = atoi (ix86_preferred_stack_boundary_string);
c6257c5d
AO
1093 if (i < (TARGET_64BIT ? 3 : 2) || i > 12)
1094 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
0d7d98ee 1095 TARGET_64BIT ? 3 : 2);
400500c4
RK
1096 else
1097 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3af4bd89 1098 }
77a989d1 1099
0f290768 1100 /* Validate -mbranch-cost= value, or provide default. */
e075ae69
RH
1101 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1102 if (ix86_branch_cost_string)
804a8ee0 1103 {
400500c4
RK
1104 i = atoi (ix86_branch_cost_string);
1105 if (i < 0 || i > 5)
1106 error ("-mbranch-cost=%d is not between 0 and 5", i);
1107 else
1108 ix86_branch_cost = i;
804a8ee0 1109 }
804a8ee0 1110
e9a25f70
JL
1111 /* Keep nonleaf frame pointers. */
1112 if (TARGET_OMIT_LEAF_FRAME_POINTER)
77a989d1 1113 flag_omit_frame_pointer = 1;
e075ae69
RH
1114
1115 /* If we're doing fast math, we don't care about comparison order
1116 wrt NaNs. This lets us use a shorter comparison sequence. */
de6c5979 1117 if (flag_unsafe_math_optimizations)
e075ae69
RH
1118 target_flags &= ~MASK_IEEE_FP;
1119
14f73b5a
JH
1120 if (TARGET_64BIT)
1121 {
1122 if (TARGET_ALIGN_DOUBLE)
c725bd79 1123 error ("-malign-double makes no sense in the 64bit mode");
14f73b5a 1124 if (TARGET_RTD)
c725bd79 1125 error ("-mrtd calling convention not supported in the 64bit mode");
14f73b5a 1126 /* Enable by default the SSE and MMX builtins. */
965f5423
JH
1127 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1128 ix86_fpmath = FPMATH_SSE;
14f73b5a 1129 }
965f5423
JH
1130 else
1131 ix86_fpmath = FPMATH_387;
1132
1133 if (ix86_fpmath_string != 0)
1134 {
1135 if (! strcmp (ix86_fpmath_string, "387"))
1136 ix86_fpmath = FPMATH_387;
1137 else if (! strcmp (ix86_fpmath_string, "sse"))
1138 {
1139 if (!TARGET_SSE)
1140 {
1141 warning ("SSE instruction set disabled, using 387 arithmetics");
1142 ix86_fpmath = FPMATH_387;
1143 }
1144 else
1145 ix86_fpmath = FPMATH_SSE;
1146 }
1147 else if (! strcmp (ix86_fpmath_string, "387,sse")
1148 || ! strcmp (ix86_fpmath_string, "sse,387"))
1149 {
1150 if (!TARGET_SSE)
1151 {
1152 warning ("SSE instruction set disabled, using 387 arithmetics");
1153 ix86_fpmath = FPMATH_387;
1154 }
1155 else if (!TARGET_80387)
1156 {
1157 warning ("387 instruction set disabled, using SSE arithmetics");
1158 ix86_fpmath = FPMATH_SSE;
1159 }
1160 else
1161 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1162 }
1163 else
1164 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1165 }
14f73b5a 1166
a7180f70
BS
1167 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1168 on by -msse. */
1169 if (TARGET_SSE)
e37af218
RH
1170 {
1171 target_flags |= MASK_MMX;
1172 x86_prefetch_sse = true;
1173 }
c6036a37 1174
47f339cf
BS
1175 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1176 if (TARGET_3DNOW)
1177 {
1178 target_flags |= MASK_MMX;
1179 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1180 extensions it adds. */
1181 if (x86_3dnow_a & (1 << ix86_arch))
1182 target_flags |= MASK_3DNOW_A;
1183 }
c6036a37 1184 if ((x86_accumulate_outgoing_args & CPUMASK)
0dd0e980 1185 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS_SET)
c6036a37
JH
1186 && !optimize_size)
1187 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
623fe810
RH
1188
1189 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1190 {
1191 char *p;
1192 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1193 p = strchr (internal_label_prefix, 'X');
1194 internal_label_prefix_len = p - internal_label_prefix;
1195 *p = '\0';
1196 }
f5316dfe
MM
1197}
1198\f
32b5b1aa 1199void
c6aded7c 1200optimization_options (level, size)
32b5b1aa 1201 int level;
bb5177ac 1202 int size ATTRIBUTE_UNUSED;
32b5b1aa 1203{
e9a25f70
JL
1204 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1205 make the problem with not enough registers even worse. */
32b5b1aa
SC
1206#ifdef INSN_SCHEDULING
1207 if (level > 1)
1208 flag_schedule_insns = 0;
1209#endif
53c17031
JH
1210 if (TARGET_64BIT && optimize >= 1)
1211 flag_omit_frame_pointer = 1;
1212 if (TARGET_64BIT)
b932f770
JH
1213 {
1214 flag_pcc_struct_return = 0;
1215 flag_asynchronous_unwind_tables = 1;
1216 }
32b5b1aa 1217}
b08de47e 1218\f
91d231cb
JM
1219/* Table of valid machine attributes. */
1220const struct attribute_spec ix86_attribute_table[] =
b08de47e 1221{
91d231cb 1222 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
b08de47e
MM
1223 /* Stdcall attribute says callee is responsible for popping arguments
1224 if they are not variable. */
91d231cb
JM
1225 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1226 /* Cdecl attribute says the callee is a normal C declaration */
1227 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
b08de47e 1228 /* Regparm attribute specifies how many integer arguments are to be
0f290768 1229 passed in registers. */
91d231cb
JM
1230 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1231#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
3da1eb0b
DS
1232 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1233 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1234 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
91d231cb
JM
1235#endif
1236 { NULL, 0, 0, false, false, false, NULL }
1237};
1238
1239/* Handle a "cdecl" or "stdcall" attribute;
1240 arguments as in struct attribute_spec.handler. */
1241static tree
1242ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1243 tree *node;
1244 tree name;
1245 tree args ATTRIBUTE_UNUSED;
1246 int flags ATTRIBUTE_UNUSED;
1247 bool *no_add_attrs;
1248{
1249 if (TREE_CODE (*node) != FUNCTION_TYPE
1250 && TREE_CODE (*node) != METHOD_TYPE
1251 && TREE_CODE (*node) != FIELD_DECL
1252 && TREE_CODE (*node) != TYPE_DECL)
b08de47e 1253 {
91d231cb
JM
1254 warning ("`%s' attribute only applies to functions",
1255 IDENTIFIER_POINTER (name));
1256 *no_add_attrs = true;
1257 }
b08de47e 1258
91d231cb
JM
1259 if (TARGET_64BIT)
1260 {
1261 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1262 *no_add_attrs = true;
1263 }
b08de47e 1264
91d231cb
JM
1265 return NULL_TREE;
1266}
b08de47e 1267
91d231cb
JM
1268/* Handle a "regparm" attribute;
1269 arguments as in struct attribute_spec.handler. */
1270static tree
1271ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1272 tree *node;
1273 tree name;
1274 tree args;
1275 int flags ATTRIBUTE_UNUSED;
1276 bool *no_add_attrs;
1277{
1278 if (TREE_CODE (*node) != FUNCTION_TYPE
1279 && TREE_CODE (*node) != METHOD_TYPE
1280 && TREE_CODE (*node) != FIELD_DECL
1281 && TREE_CODE (*node) != TYPE_DECL)
1282 {
1283 warning ("`%s' attribute only applies to functions",
1284 IDENTIFIER_POINTER (name));
1285 *no_add_attrs = true;
1286 }
1287 else
1288 {
1289 tree cst;
b08de47e 1290
91d231cb
JM
1291 cst = TREE_VALUE (args);
1292 if (TREE_CODE (cst) != INTEGER_CST)
1293 {
1294 warning ("`%s' attribute requires an integer constant argument",
1295 IDENTIFIER_POINTER (name));
1296 *no_add_attrs = true;
1297 }
1298 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1299 {
1300 warning ("argument to `%s' attribute larger than %d",
1301 IDENTIFIER_POINTER (name), REGPARM_MAX);
1302 *no_add_attrs = true;
1303 }
b08de47e
MM
1304 }
1305
91d231cb 1306 return NULL_TREE;
b08de47e
MM
1307}
1308
08c148a8
NB
1309#if defined (OSF_OS) || defined (TARGET_OSF1ELF)
1310
1311/* Generate the assembly code for function entry. FILE is a stdio
1312 stream to output the code to. SIZE is an int: how many units of
1313 temporary storage to allocate.
1314
1315 Refer to the array `regs_ever_live' to determine which registers to
1316 save; `regs_ever_live[I]' is nonzero if register number I is ever
1317 used in the function. This function is responsible for knowing
1318 which registers should not be saved even if used.
1319
1320 We override it here to allow for the new profiling code to go before
1321 the prologue and the old mcount code to go after the prologue (and
1322 after %ebx has been set up for ELF shared library support). */
1323
1324static void
1325ix86_osf_output_function_prologue (file, size)
1326 FILE *file;
1327 HOST_WIDE_INT size;
1328{
5f37d07c
KG
1329 const char *prefix = "";
1330 const char *const lprefix = LPREFIX;
08c148a8
NB
1331 int labelno = profile_label_no;
1332
1333#ifdef OSF_OS
1334
1335 if (TARGET_UNDERSCORES)
1336 prefix = "_";
1337
70f4f91c 1338 if (current_function_profile && OSF_PROFILE_BEFORE_PROLOGUE)
08c148a8
NB
1339 {
1340 if (!flag_pic && !HALF_PIC_P ())
1341 {
1342 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1343 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1344 }
1345
1346 else if (HALF_PIC_P ())
1347 {
1348 rtx symref;
1349
1350 HALF_PIC_EXTERNAL ("_mcount_ptr");
1351 symref = HALF_PIC_PTR (gen_rtx_SYMBOL_REF (Pmode,
1352 "_mcount_ptr"));
1353
1354 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1355 fprintf (file, "\tmovl %s%s,%%eax\n", prefix,
1356 XSTR (symref, 0));
1357 fprintf (file, "\tcall *(%%eax)\n");
1358 }
1359
1360 else
1361 {
1362 static int call_no = 0;
1363
1364 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1365 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1366 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1367 lprefix, call_no++);
1368 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1369 lprefix, labelno);
1370 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1371 prefix);
1372 fprintf (file, "\tcall *(%%eax)\n");
1373 }
1374 }
1375
1376#else /* !OSF_OS */
1377
70f4f91c 1378 if (current_function_profile && OSF_PROFILE_BEFORE_PROLOGUE)
08c148a8
NB
1379 {
1380 if (!flag_pic)
1381 {
1382 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1383 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1384 }
1385
1386 else
1387 {
1388 static int call_no = 0;
1389
1390 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1391 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1392 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1393 lprefix, call_no++);
1394 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1395 lprefix, labelno);
1396 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1397 prefix);
1398 fprintf (file, "\tcall *(%%eax)\n");
1399 }
1400 }
1401#endif /* !OSF_OS */
1402
1403 function_prologue (file, size);
1404}
1405
1406#endif /* OSF_OS || TARGET_OSF1ELF */
1407
b08de47e
MM
1408/* Return 0 if the attributes for two types are incompatible, 1 if they
1409 are compatible, and 2 if they are nearly compatible (which causes a
1410 warning to be generated). */
1411
8d8e52be 1412static int
e075ae69 1413ix86_comp_type_attributes (type1, type2)
afcfe58c
MM
1414 tree type1;
1415 tree type2;
b08de47e 1416{
0f290768 1417 /* Check for mismatch of non-default calling convention. */
27c38fbe 1418 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
afcfe58c
MM
1419
1420 if (TREE_CODE (type1) != FUNCTION_TYPE)
1421 return 1;
1422
1423 /* Check for mismatched return types (cdecl vs stdcall). */
6093f019
RH
1424 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1425 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
afcfe58c 1426 return 0;
b08de47e
MM
1427 return 1;
1428}
b08de47e
MM
1429\f
1430/* Value is the number of bytes of arguments automatically
1431 popped when returning from a subroutine call.
1432 FUNDECL is the declaration node of the function (as a tree),
1433 FUNTYPE is the data type of the function (as a tree),
1434 or for a library call it is an identifier node for the subroutine name.
1435 SIZE is the number of bytes of arguments passed on the stack.
1436
1437 On the 80386, the RTD insn may be used to pop them if the number
1438 of args is fixed, but if the number is variable then the caller
1439 must pop them all. RTD can't be used for library calls now
1440 because the library is compiled with the Unix compiler.
1441 Use of RTD is a selectable option, since it is incompatible with
1442 standard Unix calling sequences. If the option is not selected,
1443 the caller must always pop the args.
1444
1445 The attribute stdcall is equivalent to RTD on a per module basis. */
1446
1447int
e075ae69 1448ix86_return_pops_args (fundecl, funtype, size)
b08de47e
MM
1449 tree fundecl;
1450 tree funtype;
1451 int size;
79325812 1452{
3345ee7d 1453 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
b08de47e 1454
0f290768 1455 /* Cdecl functions override -mrtd, and never pop the stack. */
e9a25f70 1456 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
79325812 1457
0f290768 1458 /* Stdcall functions will pop the stack if not variable args. */
698cdd84
SC
1459 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1460 rtd = 1;
79325812 1461
698cdd84
SC
1462 if (rtd
1463 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
e9a25f70
JL
1464 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1465 == void_type_node)))
698cdd84
SC
1466 return size;
1467 }
79325812 1468
e9a25f70 1469 /* Lose any fake structure return argument. */
0d7d98ee
JH
1470 if (aggregate_value_p (TREE_TYPE (funtype))
1471 && !TARGET_64BIT)
698cdd84 1472 return GET_MODE_SIZE (Pmode);
79325812 1473
2614aac6 1474 return 0;
b08de47e 1475}
b08de47e
MM
1476\f
1477/* Argument support functions. */
1478
53c17031
JH
1479/* Return true when register may be used to pass function parameters. */
1480bool
1481ix86_function_arg_regno_p (regno)
1482 int regno;
1483{
1484 int i;
1485 if (!TARGET_64BIT)
1486 return regno < REGPARM_MAX || (TARGET_SSE && SSE_REGNO_P (regno));
1487 if (SSE_REGNO_P (regno) && TARGET_SSE)
1488 return true;
1489 /* RAX is used as hidden argument to va_arg functions. */
1490 if (!regno)
1491 return true;
1492 for (i = 0; i < REGPARM_MAX; i++)
1493 if (regno == x86_64_int_parameter_registers[i])
1494 return true;
1495 return false;
1496}
1497
b08de47e
MM
1498/* Initialize a variable CUM of type CUMULATIVE_ARGS
1499 for a call to a function whose data type is FNTYPE.
1500 For a library call, FNTYPE is 0. */
1501
1502void
1503init_cumulative_args (cum, fntype, libname)
e9a25f70 1504 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
b08de47e
MM
1505 tree fntype; /* tree ptr for function decl */
1506 rtx libname; /* SYMBOL_REF of library name or 0 */
1507{
1508 static CUMULATIVE_ARGS zero_cum;
1509 tree param, next_param;
1510
1511 if (TARGET_DEBUG_ARG)
1512 {
1513 fprintf (stderr, "\ninit_cumulative_args (");
1514 if (fntype)
e9a25f70
JL
1515 fprintf (stderr, "fntype code = %s, ret code = %s",
1516 tree_code_name[(int) TREE_CODE (fntype)],
1517 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
b08de47e
MM
1518 else
1519 fprintf (stderr, "no fntype");
1520
1521 if (libname)
1522 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1523 }
1524
1525 *cum = zero_cum;
1526
1527 /* Set up the number of registers to use for passing arguments. */
e075ae69 1528 cum->nregs = ix86_regparm;
53c17031
JH
1529 cum->sse_nregs = SSE_REGPARM_MAX;
1530 if (fntype && !TARGET_64BIT)
b08de47e
MM
1531 {
1532 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
e9a25f70 1533
b08de47e
MM
1534 if (attr)
1535 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1536 }
53c17031 1537 cum->maybe_vaarg = false;
b08de47e
MM
1538
1539 /* Determine if this function has variable arguments. This is
1540 indicated by the last argument being 'void_type_mode' if there
1541 are no variable arguments. If there are variable arguments, then
1542 we won't pass anything in registers */
1543
1544 if (cum->nregs)
1545 {
1546 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
e9a25f70 1547 param != 0; param = next_param)
b08de47e
MM
1548 {
1549 next_param = TREE_CHAIN (param);
e9a25f70 1550 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
53c17031
JH
1551 {
1552 if (!TARGET_64BIT)
1553 cum->nregs = 0;
1554 cum->maybe_vaarg = true;
1555 }
b08de47e
MM
1556 }
1557 }
53c17031
JH
1558 if ((!fntype && !libname)
1559 || (fntype && !TYPE_ARG_TYPES (fntype)))
1560 cum->maybe_vaarg = 1;
b08de47e
MM
1561
1562 if (TARGET_DEBUG_ARG)
1563 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1564
1565 return;
1566}
1567
53c17031 1568/* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
f710504c 1569 of this code is to classify each 8bytes of incoming argument by the register
53c17031
JH
1570 class and assign registers accordingly. */
1571
1572/* Return the union class of CLASS1 and CLASS2.
1573 See the x86-64 PS ABI for details. */
1574
1575static enum x86_64_reg_class
1576merge_classes (class1, class2)
1577 enum x86_64_reg_class class1, class2;
1578{
1579 /* Rule #1: If both classes are equal, this is the resulting class. */
1580 if (class1 == class2)
1581 return class1;
1582
1583 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1584 the other class. */
1585 if (class1 == X86_64_NO_CLASS)
1586 return class2;
1587 if (class2 == X86_64_NO_CLASS)
1588 return class1;
1589
1590 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1591 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1592 return X86_64_MEMORY_CLASS;
1593
1594 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1595 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1596 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1597 return X86_64_INTEGERSI_CLASS;
1598 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1599 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1600 return X86_64_INTEGER_CLASS;
1601
1602 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1603 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1604 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1605 return X86_64_MEMORY_CLASS;
1606
1607 /* Rule #6: Otherwise class SSE is used. */
1608 return X86_64_SSE_CLASS;
1609}
1610
1611/* Classify the argument of type TYPE and mode MODE.
1612 CLASSES will be filled by the register class used to pass each word
1613 of the operand. The number of words is returned. In case the parameter
1614 should be passed in memory, 0 is returned. As a special case for zero
1615 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1616
1617 BIT_OFFSET is used internally for handling records and specifies offset
1618 of the offset in bits modulo 256 to avoid overflow cases.
1619
1620 See the x86-64 PS ABI for details.
1621*/
1622
1623static int
1624classify_argument (mode, type, classes, bit_offset)
1625 enum machine_mode mode;
1626 tree type;
1627 enum x86_64_reg_class classes[MAX_CLASSES];
1628 int bit_offset;
1629{
1630 int bytes =
1631 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1632 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1633
1634 if (type && AGGREGATE_TYPE_P (type))
1635 {
1636 int i;
1637 tree field;
1638 enum x86_64_reg_class subclasses[MAX_CLASSES];
1639
1640 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1641 if (bytes > 16)
1642 return 0;
1643
1644 for (i = 0; i < words; i++)
1645 classes[i] = X86_64_NO_CLASS;
1646
1647 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1648 signalize memory class, so handle it as special case. */
1649 if (!words)
1650 {
1651 classes[0] = X86_64_NO_CLASS;
1652 return 1;
1653 }
1654
1655 /* Classify each field of record and merge classes. */
1656 if (TREE_CODE (type) == RECORD_TYPE)
1657 {
1658 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1659 {
1660 if (TREE_CODE (field) == FIELD_DECL)
1661 {
1662 int num;
1663
1664 /* Bitfields are always classified as integer. Handle them
1665 early, since later code would consider them to be
1666 misaligned integers. */
1667 if (DECL_BIT_FIELD (field))
1668 {
1669 for (i = int_bit_position (field) / 8 / 8;
1670 i < (int_bit_position (field)
1671 + tree_low_cst (DECL_SIZE (field), 0)
1672 + 63) / 8 / 8; i++)
1673 classes[i] =
1674 merge_classes (X86_64_INTEGER_CLASS,
1675 classes[i]);
1676 }
1677 else
1678 {
1679 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1680 TREE_TYPE (field), subclasses,
1681 (int_bit_position (field)
1682 + bit_offset) % 256);
1683 if (!num)
1684 return 0;
1685 for (i = 0; i < num; i++)
1686 {
1687 int pos =
1688 (int_bit_position (field) + bit_offset) / 8 / 8;
1689 classes[i + pos] =
1690 merge_classes (subclasses[i], classes[i + pos]);
1691 }
1692 }
1693 }
1694 }
1695 }
1696 /* Arrays are handled as small records. */
1697 else if (TREE_CODE (type) == ARRAY_TYPE)
1698 {
1699 int num;
1700 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1701 TREE_TYPE (type), subclasses, bit_offset);
1702 if (!num)
1703 return 0;
1704
1705 /* The partial classes are now full classes. */
1706 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1707 subclasses[0] = X86_64_SSE_CLASS;
1708 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1709 subclasses[0] = X86_64_INTEGER_CLASS;
1710
1711 for (i = 0; i < words; i++)
1712 classes[i] = subclasses[i % num];
1713 }
1714 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1715 else if (TREE_CODE (type) == UNION_TYPE)
1716 {
1717 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1718 {
1719 if (TREE_CODE (field) == FIELD_DECL)
1720 {
1721 int num;
1722 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1723 TREE_TYPE (field), subclasses,
1724 bit_offset);
1725 if (!num)
1726 return 0;
1727 for (i = 0; i < num; i++)
1728 classes[i] = merge_classes (subclasses[i], classes[i]);
1729 }
1730 }
1731 }
1732 else
1733 abort ();
1734
1735 /* Final merger cleanup. */
1736 for (i = 0; i < words; i++)
1737 {
1738 /* If one class is MEMORY, everything should be passed in
1739 memory. */
1740 if (classes[i] == X86_64_MEMORY_CLASS)
1741 return 0;
1742
d6a7951f 1743 /* The X86_64_SSEUP_CLASS should be always preceded by
53c17031
JH
1744 X86_64_SSE_CLASS. */
1745 if (classes[i] == X86_64_SSEUP_CLASS
1746 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1747 classes[i] = X86_64_SSE_CLASS;
1748
d6a7951f 1749 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
53c17031
JH
1750 if (classes[i] == X86_64_X87UP_CLASS
1751 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1752 classes[i] = X86_64_SSE_CLASS;
1753 }
1754 return words;
1755 }
1756
1757 /* Compute alignment needed. We align all types to natural boundaries with
1758 exception of XFmode that is aligned to 64bits. */
1759 if (mode != VOIDmode && mode != BLKmode)
1760 {
1761 int mode_alignment = GET_MODE_BITSIZE (mode);
1762
1763 if (mode == XFmode)
1764 mode_alignment = 128;
1765 else if (mode == XCmode)
1766 mode_alignment = 256;
f5143c46 1767 /* Misaligned fields are always returned in memory. */
53c17031
JH
1768 if (bit_offset % mode_alignment)
1769 return 0;
1770 }
1771
1772 /* Classification of atomic types. */
1773 switch (mode)
1774 {
1775 case DImode:
1776 case SImode:
1777 case HImode:
1778 case QImode:
1779 case CSImode:
1780 case CHImode:
1781 case CQImode:
1782 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1783 classes[0] = X86_64_INTEGERSI_CLASS;
1784 else
1785 classes[0] = X86_64_INTEGER_CLASS;
1786 return 1;
1787 case CDImode:
1788 case TImode:
1789 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1790 return 2;
1791 case CTImode:
1792 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1793 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1794 return 4;
1795 case SFmode:
1796 if (!(bit_offset % 64))
1797 classes[0] = X86_64_SSESF_CLASS;
1798 else
1799 classes[0] = X86_64_SSE_CLASS;
1800 return 1;
1801 case DFmode:
1802 classes[0] = X86_64_SSEDF_CLASS;
1803 return 1;
1804 case TFmode:
1805 classes[0] = X86_64_X87_CLASS;
1806 classes[1] = X86_64_X87UP_CLASS;
1807 return 2;
1808 case TCmode:
1809 classes[0] = X86_64_X87_CLASS;
1810 classes[1] = X86_64_X87UP_CLASS;
1811 classes[2] = X86_64_X87_CLASS;
1812 classes[3] = X86_64_X87UP_CLASS;
1813 return 4;
1814 case DCmode:
1815 classes[0] = X86_64_SSEDF_CLASS;
1816 classes[1] = X86_64_SSEDF_CLASS;
1817 return 2;
1818 case SCmode:
1819 classes[0] = X86_64_SSE_CLASS;
1820 return 1;
1821 case BLKmode:
1822 return 0;
1823 default:
1824 abort ();
1825 }
1826}
1827
1828/* Examine the argument and return set number of register required in each
f5143c46 1829 class. Return 0 iff parameter should be passed in memory. */
53c17031
JH
1830static int
1831examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1832 enum machine_mode mode;
1833 tree type;
1834 int *int_nregs, *sse_nregs;
1835 int in_return;
1836{
1837 enum x86_64_reg_class class[MAX_CLASSES];
1838 int n = classify_argument (mode, type, class, 0);
1839
1840 *int_nregs = 0;
1841 *sse_nregs = 0;
1842 if (!n)
1843 return 0;
1844 for (n--; n >= 0; n--)
1845 switch (class[n])
1846 {
1847 case X86_64_INTEGER_CLASS:
1848 case X86_64_INTEGERSI_CLASS:
1849 (*int_nregs)++;
1850 break;
1851 case X86_64_SSE_CLASS:
1852 case X86_64_SSESF_CLASS:
1853 case X86_64_SSEDF_CLASS:
1854 (*sse_nregs)++;
1855 break;
1856 case X86_64_NO_CLASS:
1857 case X86_64_SSEUP_CLASS:
1858 break;
1859 case X86_64_X87_CLASS:
1860 case X86_64_X87UP_CLASS:
1861 if (!in_return)
1862 return 0;
1863 break;
1864 case X86_64_MEMORY_CLASS:
1865 abort ();
1866 }
1867 return 1;
1868}
1869/* Construct container for the argument used by GCC interface. See
1870 FUNCTION_ARG for the detailed description. */
1871static rtx
1872construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1873 enum machine_mode mode;
1874 tree type;
1875 int in_return;
1876 int nintregs, nsseregs;
07933f72
GS
1877 const int * intreg;
1878 int sse_regno;
53c17031
JH
1879{
1880 enum machine_mode tmpmode;
1881 int bytes =
1882 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1883 enum x86_64_reg_class class[MAX_CLASSES];
1884 int n;
1885 int i;
1886 int nexps = 0;
1887 int needed_sseregs, needed_intregs;
1888 rtx exp[MAX_CLASSES];
1889 rtx ret;
1890
1891 n = classify_argument (mode, type, class, 0);
1892 if (TARGET_DEBUG_ARG)
1893 {
1894 if (!n)
1895 fprintf (stderr, "Memory class\n");
1896 else
1897 {
1898 fprintf (stderr, "Classes:");
1899 for (i = 0; i < n; i++)
1900 {
1901 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
1902 }
1903 fprintf (stderr, "\n");
1904 }
1905 }
1906 if (!n)
1907 return NULL;
1908 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
1909 return NULL;
1910 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
1911 return NULL;
1912
1913 /* First construct simple cases. Avoid SCmode, since we want to use
1914 single register to pass this type. */
1915 if (n == 1 && mode != SCmode)
1916 switch (class[0])
1917 {
1918 case X86_64_INTEGER_CLASS:
1919 case X86_64_INTEGERSI_CLASS:
1920 return gen_rtx_REG (mode, intreg[0]);
1921 case X86_64_SSE_CLASS:
1922 case X86_64_SSESF_CLASS:
1923 case X86_64_SSEDF_CLASS:
1924 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1925 case X86_64_X87_CLASS:
1926 return gen_rtx_REG (mode, FIRST_STACK_REG);
1927 case X86_64_NO_CLASS:
1928 /* Zero sized array, struct or class. */
1929 return NULL;
1930 default:
1931 abort ();
1932 }
1933 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
1934 return gen_rtx_REG (TImode, SSE_REGNO (sse_regno));
1935 if (n == 2
1936 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
1937 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
1938 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
1939 && class[1] == X86_64_INTEGER_CLASS
1940 && (mode == CDImode || mode == TImode)
1941 && intreg[0] + 1 == intreg[1])
1942 return gen_rtx_REG (mode, intreg[0]);
1943 if (n == 4
1944 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
1945 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
1946 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
1947
1948 /* Otherwise figure out the entries of the PARALLEL. */
1949 for (i = 0; i < n; i++)
1950 {
1951 switch (class[i])
1952 {
1953 case X86_64_NO_CLASS:
1954 break;
1955 case X86_64_INTEGER_CLASS:
1956 case X86_64_INTEGERSI_CLASS:
1957 /* Merge TImodes on aligned occassions here too. */
1958 if (i * 8 + 8 > bytes)
1959 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
1960 else if (class[i] == X86_64_INTEGERSI_CLASS)
1961 tmpmode = SImode;
1962 else
1963 tmpmode = DImode;
1964 /* We've requested 24 bytes we don't have mode for. Use DImode. */
1965 if (tmpmode == BLKmode)
1966 tmpmode = DImode;
1967 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1968 gen_rtx_REG (tmpmode, *intreg),
1969 GEN_INT (i*8));
1970 intreg++;
1971 break;
1972 case X86_64_SSESF_CLASS:
1973 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1974 gen_rtx_REG (SFmode,
1975 SSE_REGNO (sse_regno)),
1976 GEN_INT (i*8));
1977 sse_regno++;
1978 break;
1979 case X86_64_SSEDF_CLASS:
1980 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1981 gen_rtx_REG (DFmode,
1982 SSE_REGNO (sse_regno)),
1983 GEN_INT (i*8));
1984 sse_regno++;
1985 break;
1986 case X86_64_SSE_CLASS:
1987 if (i < n && class[i + 1] == X86_64_SSEUP_CLASS)
1988 tmpmode = TImode, i++;
1989 else
1990 tmpmode = DImode;
1991 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1992 gen_rtx_REG (tmpmode,
1993 SSE_REGNO (sse_regno)),
1994 GEN_INT (i*8));
1995 sse_regno++;
1996 break;
1997 default:
1998 abort ();
1999 }
2000 }
2001 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2002 for (i = 0; i < nexps; i++)
2003 XVECEXP (ret, 0, i) = exp [i];
2004 return ret;
2005}
2006
b08de47e
MM
2007/* Update the data in CUM to advance over an argument
2008 of mode MODE and data type TYPE.
2009 (TYPE is null for libcalls where that information may not be available.) */
2010
2011void
2012function_arg_advance (cum, mode, type, named)
2013 CUMULATIVE_ARGS *cum; /* current arg information */
2014 enum machine_mode mode; /* current arg mode */
2015 tree type; /* type of the argument or 0 if lib support */
2016 int named; /* whether or not the argument was named */
2017{
5ac9118e
KG
2018 int bytes =
2019 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
2020 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2021
2022 if (TARGET_DEBUG_ARG)
2023 fprintf (stderr,
e9a25f70 2024 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
b08de47e 2025 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
53c17031 2026 if (TARGET_64BIT)
b08de47e 2027 {
53c17031
JH
2028 int int_nregs, sse_nregs;
2029 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2030 cum->words += words;
2031 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
82a127a9 2032 {
53c17031
JH
2033 cum->nregs -= int_nregs;
2034 cum->sse_nregs -= sse_nregs;
2035 cum->regno += int_nregs;
2036 cum->sse_regno += sse_nregs;
82a127a9 2037 }
53c17031
JH
2038 else
2039 cum->words += words;
b08de47e 2040 }
a4f31c00 2041 else
82a127a9 2042 {
53c17031
JH
2043 if (TARGET_SSE && mode == TImode)
2044 {
2045 cum->sse_words += words;
2046 cum->sse_nregs -= 1;
2047 cum->sse_regno += 1;
2048 if (cum->sse_nregs <= 0)
2049 {
2050 cum->sse_nregs = 0;
2051 cum->sse_regno = 0;
2052 }
2053 }
2054 else
82a127a9 2055 {
53c17031
JH
2056 cum->words += words;
2057 cum->nregs -= words;
2058 cum->regno += words;
2059
2060 if (cum->nregs <= 0)
2061 {
2062 cum->nregs = 0;
2063 cum->regno = 0;
2064 }
82a127a9
CM
2065 }
2066 }
b08de47e
MM
2067 return;
2068}
2069
2070/* Define where to put the arguments to a function.
2071 Value is zero to push the argument on the stack,
2072 or a hard register in which to store the argument.
2073
2074 MODE is the argument's machine mode.
2075 TYPE is the data type of the argument (as a tree).
2076 This is null for libcalls where that information may
2077 not be available.
2078 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2079 the preceding args and about the function being called.
2080 NAMED is nonzero if this argument is a named parameter
2081 (otherwise it is an extra parameter matching an ellipsis). */
2082
07933f72 2083rtx
b08de47e
MM
2084function_arg (cum, mode, type, named)
2085 CUMULATIVE_ARGS *cum; /* current arg information */
2086 enum machine_mode mode; /* current arg mode */
2087 tree type; /* type of the argument or 0 if lib support */
2088 int named; /* != 0 for normal args, == 0 for ... args */
2089{
2090 rtx ret = NULL_RTX;
5ac9118e
KG
2091 int bytes =
2092 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
2093 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2094
53c17031
JH
2095 /* Handle an hidden AL argument containing number of registers for varargs
2096 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2097 any AL settings. */
32ee7d1d 2098 if (mode == VOIDmode)
b08de47e 2099 {
53c17031
JH
2100 if (TARGET_64BIT)
2101 return GEN_INT (cum->maybe_vaarg
2102 ? (cum->sse_nregs < 0
2103 ? SSE_REGPARM_MAX
2104 : cum->sse_regno)
2105 : -1);
2106 else
2107 return constm1_rtx;
b08de47e 2108 }
53c17031
JH
2109 if (TARGET_64BIT)
2110 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2111 &x86_64_int_parameter_registers [cum->regno],
2112 cum->sse_regno);
2113 else
2114 switch (mode)
2115 {
2116 /* For now, pass fp/complex values on the stack. */
2117 default:
2118 break;
2119
2120 case BLKmode:
2121 case DImode:
2122 case SImode:
2123 case HImode:
2124 case QImode:
2125 if (words <= cum->nregs)
2126 ret = gen_rtx_REG (mode, cum->regno);
2127 break;
2128 case TImode:
2129 if (cum->sse_nregs)
2130 ret = gen_rtx_REG (mode, cum->sse_regno);
2131 break;
2132 }
b08de47e
MM
2133
2134 if (TARGET_DEBUG_ARG)
2135 {
2136 fprintf (stderr,
e9a25f70 2137 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
b08de47e
MM
2138 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2139
2140 if (ret)
2141 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO(ret) ]);
2142 else
2143 fprintf (stderr, ", stack");
2144
2145 fprintf (stderr, " )\n");
2146 }
2147
2148 return ret;
2149}
53c17031
JH
2150
2151/* Gives the alignment boundary, in bits, of an argument with the specified mode
2152 and type. */
2153
2154int
2155ix86_function_arg_boundary (mode, type)
2156 enum machine_mode mode;
2157 tree type;
2158{
2159 int align;
2160 if (!TARGET_64BIT)
2161 return PARM_BOUNDARY;
2162 if (type)
2163 align = TYPE_ALIGN (type);
2164 else
2165 align = GET_MODE_ALIGNMENT (mode);
2166 if (align < PARM_BOUNDARY)
2167 align = PARM_BOUNDARY;
2168 if (align > 128)
2169 align = 128;
2170 return align;
2171}
2172
2173/* Return true if N is a possible register number of function value. */
2174bool
2175ix86_function_value_regno_p (regno)
2176 int regno;
2177{
2178 if (!TARGET_64BIT)
2179 {
2180 return ((regno) == 0
2181 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2182 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2183 }
2184 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2185 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2186 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2187}
2188
2189/* Define how to find the value returned by a function.
2190 VALTYPE is the data type of the value (as a tree).
2191 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2192 otherwise, FUNC is 0. */
2193rtx
2194ix86_function_value (valtype)
2195 tree valtype;
2196{
2197 if (TARGET_64BIT)
2198 {
2199 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2200 REGPARM_MAX, SSE_REGPARM_MAX,
2201 x86_64_int_return_registers, 0);
2202 /* For zero sized structures, construct_continer return NULL, but we need
2203 to keep rest of compiler happy by returning meaningfull value. */
2204 if (!ret)
2205 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2206 return ret;
2207 }
2208 else
2209 return gen_rtx_REG (TYPE_MODE (valtype), VALUE_REGNO (TYPE_MODE (valtype)));
2210}
2211
f5143c46 2212/* Return false iff type is returned in memory. */
53c17031
JH
2213int
2214ix86_return_in_memory (type)
2215 tree type;
2216{
2217 int needed_intregs, needed_sseregs;
2218 if (TARGET_64BIT)
2219 {
2220 return !examine_argument (TYPE_MODE (type), type, 1,
2221 &needed_intregs, &needed_sseregs);
2222 }
2223 else
2224 {
2225 if (TYPE_MODE (type) == BLKmode
2226 || (VECTOR_MODE_P (TYPE_MODE (type))
2227 && int_size_in_bytes (type) == 8)
2228 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2229 && TYPE_MODE (type) != TFmode
2230 && !VECTOR_MODE_P (TYPE_MODE (type))))
2231 return 1;
2232 return 0;
2233 }
2234}
2235
2236/* Define how to find the value returned by a library function
2237 assuming the value has mode MODE. */
2238rtx
2239ix86_libcall_value (mode)
2240 enum machine_mode mode;
2241{
2242 if (TARGET_64BIT)
2243 {
2244 switch (mode)
2245 {
2246 case SFmode:
2247 case SCmode:
2248 case DFmode:
2249 case DCmode:
2250 return gen_rtx_REG (mode, FIRST_SSE_REG);
2251 case TFmode:
2252 case TCmode:
2253 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2254 default:
2255 return gen_rtx_REG (mode, 0);
2256 }
2257 }
2258 else
2259 return gen_rtx_REG (mode, VALUE_REGNO (mode));
2260}
ad919812
JH
2261\f
2262/* Create the va_list data type. */
53c17031 2263
ad919812
JH
2264tree
2265ix86_build_va_list ()
2266{
2267 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
53c17031 2268
ad919812
JH
2269 /* For i386 we use plain pointer to argument area. */
2270 if (!TARGET_64BIT)
2271 return build_pointer_type (char_type_node);
2272
2273 record = make_lang_type (RECORD_TYPE);
2274 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2275
2276 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2277 unsigned_type_node);
2278 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2279 unsigned_type_node);
2280 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2281 ptr_type_node);
2282 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2283 ptr_type_node);
2284
2285 DECL_FIELD_CONTEXT (f_gpr) = record;
2286 DECL_FIELD_CONTEXT (f_fpr) = record;
2287 DECL_FIELD_CONTEXT (f_ovf) = record;
2288 DECL_FIELD_CONTEXT (f_sav) = record;
2289
2290 TREE_CHAIN (record) = type_decl;
2291 TYPE_NAME (record) = type_decl;
2292 TYPE_FIELDS (record) = f_gpr;
2293 TREE_CHAIN (f_gpr) = f_fpr;
2294 TREE_CHAIN (f_fpr) = f_ovf;
2295 TREE_CHAIN (f_ovf) = f_sav;
2296
2297 layout_type (record);
2298
2299 /* The correct type is an array type of one element. */
2300 return build_array_type (record, build_index_type (size_zero_node));
2301}
2302
2303/* Perform any needed actions needed for a function that is receiving a
2304 variable number of arguments.
2305
2306 CUM is as above.
2307
2308 MODE and TYPE are the mode and type of the current parameter.
2309
2310 PRETEND_SIZE is a variable that should be set to the amount of stack
2311 that must be pushed by the prolog to pretend that our caller pushed
2312 it.
2313
2314 Normally, this macro will push all remaining incoming registers on the
2315 stack and set PRETEND_SIZE to the length of the registers pushed. */
2316
2317void
2318ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2319 CUMULATIVE_ARGS *cum;
2320 enum machine_mode mode;
2321 tree type;
2322 int *pretend_size ATTRIBUTE_UNUSED;
2323 int no_rtl;
2324
2325{
2326 CUMULATIVE_ARGS next_cum;
2327 rtx save_area = NULL_RTX, mem;
2328 rtx label;
2329 rtx label_ref;
2330 rtx tmp_reg;
2331 rtx nsse_reg;
2332 int set;
2333 tree fntype;
2334 int stdarg_p;
2335 int i;
2336
2337 if (!TARGET_64BIT)
2338 return;
2339
2340 /* Indicate to allocate space on the stack for varargs save area. */
2341 ix86_save_varrargs_registers = 1;
2342
2343 fntype = TREE_TYPE (current_function_decl);
2344 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2345 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2346 != void_type_node));
2347
2348 /* For varargs, we do not want to skip the dummy va_dcl argument.
2349 For stdargs, we do want to skip the last named argument. */
2350 next_cum = *cum;
2351 if (stdarg_p)
2352 function_arg_advance (&next_cum, mode, type, 1);
2353
2354 if (!no_rtl)
2355 save_area = frame_pointer_rtx;
2356
2357 set = get_varargs_alias_set ();
2358
2359 for (i = next_cum.regno; i < ix86_regparm; i++)
2360 {
2361 mem = gen_rtx_MEM (Pmode,
2362 plus_constant (save_area, i * UNITS_PER_WORD));
0692acba 2363 set_mem_alias_set (mem, set);
ad919812
JH
2364 emit_move_insn (mem, gen_rtx_REG (Pmode,
2365 x86_64_int_parameter_registers[i]));
2366 }
2367
2368 if (next_cum.sse_nregs)
2369 {
2370 /* Now emit code to save SSE registers. The AX parameter contains number
2371 of SSE parameter regsiters used to call this function. We use
2372 sse_prologue_save insn template that produces computed jump across
2373 SSE saves. We need some preparation work to get this working. */
2374
2375 label = gen_label_rtx ();
2376 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2377
2378 /* Compute address to jump to :
2379 label - 5*eax + nnamed_sse_arguments*5 */
2380 tmp_reg = gen_reg_rtx (Pmode);
2381 nsse_reg = gen_reg_rtx (Pmode);
2382 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2383 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
44cf5b6a 2384 gen_rtx_MULT (Pmode, nsse_reg,
ad919812
JH
2385 GEN_INT (4))));
2386 if (next_cum.sse_regno)
2387 emit_move_insn
2388 (nsse_reg,
2389 gen_rtx_CONST (DImode,
2390 gen_rtx_PLUS (DImode,
2391 label_ref,
2392 GEN_INT (next_cum.sse_regno * 4))));
2393 else
2394 emit_move_insn (nsse_reg, label_ref);
2395 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2396
2397 /* Compute address of memory block we save into. We always use pointer
2398 pointing 127 bytes after first byte to store - this is needed to keep
2399 instruction size limited by 4 bytes. */
2400 tmp_reg = gen_reg_rtx (Pmode);
8ac61af7
RK
2401 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2402 plus_constant (save_area,
2403 8 * REGPARM_MAX + 127)));
ad919812 2404 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
14f73b5a 2405 set_mem_alias_set (mem, set);
8ac61af7 2406 set_mem_align (mem, BITS_PER_WORD);
ad919812
JH
2407
2408 /* And finally do the dirty job! */
8ac61af7
RK
2409 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2410 GEN_INT (next_cum.sse_regno), label));
ad919812
JH
2411 }
2412
2413}
2414
2415/* Implement va_start. */
2416
2417void
2418ix86_va_start (stdarg_p, valist, nextarg)
2419 int stdarg_p;
2420 tree valist;
2421 rtx nextarg;
2422{
2423 HOST_WIDE_INT words, n_gpr, n_fpr;
2424 tree f_gpr, f_fpr, f_ovf, f_sav;
2425 tree gpr, fpr, ovf, sav, t;
2426
2427 /* Only 64bit target needs something special. */
2428 if (!TARGET_64BIT)
2429 {
2430 std_expand_builtin_va_start (stdarg_p, valist, nextarg);
2431 return;
2432 }
2433
2434 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2435 f_fpr = TREE_CHAIN (f_gpr);
2436 f_ovf = TREE_CHAIN (f_fpr);
2437 f_sav = TREE_CHAIN (f_ovf);
2438
2439 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2440 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2441 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2442 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2443 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2444
2445 /* Count number of gp and fp argument registers used. */
2446 words = current_function_args_info.words;
2447 n_gpr = current_function_args_info.regno;
2448 n_fpr = current_function_args_info.sse_regno;
2449
2450 if (TARGET_DEBUG_ARG)
2451 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
14f73b5a 2452 (int)words, (int)n_gpr, (int)n_fpr);
ad919812
JH
2453
2454 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2455 build_int_2 (n_gpr * 8, 0));
2456 TREE_SIDE_EFFECTS (t) = 1;
2457 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2458
2459 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2460 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2461 TREE_SIDE_EFFECTS (t) = 1;
2462 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2463
2464 /* Find the overflow area. */
2465 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2466 if (words != 0)
2467 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2468 build_int_2 (words * UNITS_PER_WORD, 0));
2469 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2470 TREE_SIDE_EFFECTS (t) = 1;
2471 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2472
2473 /* Find the register save area.
2474 Prologue of the function save it right above stack frame. */
2475 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2476 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2477 TREE_SIDE_EFFECTS (t) = 1;
2478 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2479}
2480
2481/* Implement va_arg. */
2482rtx
2483ix86_va_arg (valist, type)
2484 tree valist, type;
2485{
2486 static int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2487 tree f_gpr, f_fpr, f_ovf, f_sav;
2488 tree gpr, fpr, ovf, sav, t;
b932f770 2489 int size, rsize;
ad919812
JH
2490 rtx lab_false, lab_over = NULL_RTX;
2491 rtx addr_rtx, r;
2492 rtx container;
2493
2494 /* Only 64bit target needs something special. */
2495 if (!TARGET_64BIT)
2496 {
2497 return std_expand_builtin_va_arg (valist, type);
2498 }
2499
2500 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2501 f_fpr = TREE_CHAIN (f_gpr);
2502 f_ovf = TREE_CHAIN (f_fpr);
2503 f_sav = TREE_CHAIN (f_ovf);
2504
2505 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2506 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2507 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2508 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2509 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2510
2511 size = int_size_in_bytes (type);
2512 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2513
2514 container = construct_container (TYPE_MODE (type), type, 0,
2515 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2516 /*
2517 * Pull the value out of the saved registers ...
2518 */
2519
2520 addr_rtx = gen_reg_rtx (Pmode);
2521
2522 if (container)
2523 {
2524 rtx int_addr_rtx, sse_addr_rtx;
2525 int needed_intregs, needed_sseregs;
2526 int need_temp;
2527
2528 lab_over = gen_label_rtx ();
2529 lab_false = gen_label_rtx ();
8bad7136 2530
ad919812
JH
2531 examine_argument (TYPE_MODE (type), type, 0,
2532 &needed_intregs, &needed_sseregs);
2533
2534
2535 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2536 || TYPE_ALIGN (type) > 128);
2537
2538 /* In case we are passing structure, verify that it is consetuctive block
2539 on the register save area. If not we need to do moves. */
2540 if (!need_temp && !REG_P (container))
2541 {
2542 /* Verify that all registers are strictly consetuctive */
2543 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2544 {
2545 int i;
2546
2547 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2548 {
2549 rtx slot = XVECEXP (container, 0, i);
2550 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int)i
2551 || INTVAL (XEXP (slot, 1)) != i * 16)
2552 need_temp = 1;
2553 }
2554 }
2555 else
2556 {
2557 int i;
2558
2559 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2560 {
2561 rtx slot = XVECEXP (container, 0, i);
2562 if (REGNO (XEXP (slot, 0)) != (unsigned int)i
2563 || INTVAL (XEXP (slot, 1)) != i * 8)
2564 need_temp = 1;
2565 }
2566 }
2567 }
2568 if (!need_temp)
2569 {
2570 int_addr_rtx = addr_rtx;
2571 sse_addr_rtx = addr_rtx;
2572 }
2573 else
2574 {
2575 int_addr_rtx = gen_reg_rtx (Pmode);
2576 sse_addr_rtx = gen_reg_rtx (Pmode);
2577 }
2578 /* First ensure that we fit completely in registers. */
2579 if (needed_intregs)
2580 {
2581 emit_cmp_and_jump_insns (expand_expr
2582 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2583 GEN_INT ((REGPARM_MAX - needed_intregs +
2584 1) * 8), GE, const1_rtx, SImode,
d43e0b7d 2585 1, lab_false);
ad919812
JH
2586 }
2587 if (needed_sseregs)
2588 {
2589 emit_cmp_and_jump_insns (expand_expr
2590 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2591 GEN_INT ((SSE_REGPARM_MAX -
2592 needed_sseregs + 1) * 16 +
2593 REGPARM_MAX * 8), GE, const1_rtx,
d43e0b7d 2594 SImode, 1, lab_false);
ad919812
JH
2595 }
2596
2597 /* Compute index to start of area used for integer regs. */
2598 if (needed_intregs)
2599 {
2600 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2601 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2602 if (r != int_addr_rtx)
2603 emit_move_insn (int_addr_rtx, r);
2604 }
2605 if (needed_sseregs)
2606 {
2607 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2608 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2609 if (r != sse_addr_rtx)
2610 emit_move_insn (sse_addr_rtx, r);
2611 }
2612 if (need_temp)
2613 {
2614 int i;
2615 rtx mem;
2616
b932f770
JH
2617 /* Never use the memory itself, as it has the alias set. */
2618 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2619 mem = gen_rtx_MEM (BLKmode, addr_rtx);
0692acba 2620 set_mem_alias_set (mem, get_varargs_alias_set ());
8ac61af7 2621 set_mem_align (mem, BITS_PER_UNIT);
b932f770 2622
ad919812
JH
2623 for (i = 0; i < XVECLEN (container, 0); i++)
2624 {
2625 rtx slot = XVECEXP (container, 0, i);
2626 rtx reg = XEXP (slot, 0);
2627 enum machine_mode mode = GET_MODE (reg);
2628 rtx src_addr;
2629 rtx src_mem;
2630 int src_offset;
2631 rtx dest_mem;
2632
2633 if (SSE_REGNO_P (REGNO (reg)))
2634 {
2635 src_addr = sse_addr_rtx;
2636 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2637 }
2638 else
2639 {
2640 src_addr = int_addr_rtx;
2641 src_offset = REGNO (reg) * 8;
2642 }
2643 src_mem = gen_rtx_MEM (mode, src_addr);
0692acba 2644 set_mem_alias_set (src_mem, get_varargs_alias_set ());
ad919812
JH
2645 src_mem = adjust_address (src_mem, mode, src_offset);
2646 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
ad919812
JH
2647 emit_move_insn (dest_mem, src_mem);
2648 }
2649 }
2650
2651 if (needed_intregs)
2652 {
2653 t =
2654 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2655 build_int_2 (needed_intregs * 8, 0));
2656 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2657 TREE_SIDE_EFFECTS (t) = 1;
2658 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2659 }
2660 if (needed_sseregs)
2661 {
2662 t =
2663 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2664 build_int_2 (needed_sseregs * 16, 0));
2665 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2666 TREE_SIDE_EFFECTS (t) = 1;
2667 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2668 }
2669
2670 emit_jump_insn (gen_jump (lab_over));
2671 emit_barrier ();
2672 emit_label (lab_false);
2673 }
2674
2675 /* ... otherwise out of the overflow area. */
2676
2677 /* Care for on-stack alignment if needed. */
2678 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2679 t = ovf;
2680 else
2681 {
2682 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2683 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2684 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2685 }
2686 t = save_expr (t);
2687
2688 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2689 if (r != addr_rtx)
2690 emit_move_insn (addr_rtx, r);
2691
2692 t =
2693 build (PLUS_EXPR, TREE_TYPE (t), t,
2694 build_int_2 (rsize * UNITS_PER_WORD, 0));
2695 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2696 TREE_SIDE_EFFECTS (t) = 1;
2697 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2698
2699 if (container)
2700 emit_label (lab_over);
2701
ad919812
JH
2702 return addr_rtx;
2703}
2704\f
7dd4b4a3
JH
2705/* Return nonzero if OP is general operand representable on x86_64. */
2706
2707int
2708x86_64_general_operand (op, mode)
2709 rtx op;
2710 enum machine_mode mode;
2711{
2712 if (!TARGET_64BIT)
2713 return general_operand (op, mode);
2714 if (nonimmediate_operand (op, mode))
2715 return 1;
2716 return x86_64_sign_extended_value (op);
2717}
2718
2719/* Return nonzero if OP is general operand representable on x86_64
d6a7951f 2720 as either sign extended or zero extended constant. */
7dd4b4a3
JH
2721
2722int
2723x86_64_szext_general_operand (op, mode)
2724 rtx op;
2725 enum machine_mode mode;
2726{
2727 if (!TARGET_64BIT)
2728 return general_operand (op, mode);
2729 if (nonimmediate_operand (op, mode))
2730 return 1;
2731 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2732}
2733
2734/* Return nonzero if OP is nonmemory operand representable on x86_64. */
2735
2736int
2737x86_64_nonmemory_operand (op, mode)
2738 rtx op;
2739 enum machine_mode mode;
2740{
2741 if (!TARGET_64BIT)
2742 return nonmemory_operand (op, mode);
2743 if (register_operand (op, mode))
2744 return 1;
2745 return x86_64_sign_extended_value (op);
2746}
2747
2748/* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2749
2750int
2751x86_64_movabs_operand (op, mode)
2752 rtx op;
2753 enum machine_mode mode;
2754{
2755 if (!TARGET_64BIT || !flag_pic)
2756 return nonmemory_operand (op, mode);
2757 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
2758 return 1;
2759 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2760 return 1;
2761 return 0;
2762}
2763
2764/* Return nonzero if OP is nonmemory operand representable on x86_64. */
2765
2766int
2767x86_64_szext_nonmemory_operand (op, mode)
2768 rtx op;
2769 enum machine_mode mode;
2770{
2771 if (!TARGET_64BIT)
2772 return nonmemory_operand (op, mode);
2773 if (register_operand (op, mode))
2774 return 1;
2775 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2776}
2777
2778/* Return nonzero if OP is immediate operand representable on x86_64. */
2779
2780int
2781x86_64_immediate_operand (op, mode)
2782 rtx op;
2783 enum machine_mode mode;
2784{
2785 if (!TARGET_64BIT)
2786 return immediate_operand (op, mode);
2787 return x86_64_sign_extended_value (op);
2788}
2789
2790/* Return nonzero if OP is immediate operand representable on x86_64. */
2791
2792int
2793x86_64_zext_immediate_operand (op, mode)
2794 rtx op;
2795 enum machine_mode mode ATTRIBUTE_UNUSED;
2796{
2797 return x86_64_zero_extended_value (op);
2798}
2799
8bad7136
JL
2800/* Return nonzero if OP is (const_int 1), else return zero. */
2801
2802int
2803const_int_1_operand (op, mode)
2804 rtx op;
2805 enum machine_mode mode ATTRIBUTE_UNUSED;
2806{
2807 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2808}
2809
e075ae69
RH
2810/* Returns 1 if OP is either a symbol reference or a sum of a symbol
2811 reference and a constant. */
b08de47e
MM
2812
2813int
e075ae69
RH
2814symbolic_operand (op, mode)
2815 register rtx op;
2816 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 2817{
e075ae69 2818 switch (GET_CODE (op))
2a2ab3f9 2819 {
e075ae69
RH
2820 case SYMBOL_REF:
2821 case LABEL_REF:
2822 return 1;
2823
2824 case CONST:
2825 op = XEXP (op, 0);
2826 if (GET_CODE (op) == SYMBOL_REF
2827 || GET_CODE (op) == LABEL_REF
2828 || (GET_CODE (op) == UNSPEC
6eb791fc
JH
2829 && (XINT (op, 1) == 6
2830 || XINT (op, 1) == 7
2831 || XINT (op, 1) == 15)))
e075ae69
RH
2832 return 1;
2833 if (GET_CODE (op) != PLUS
2834 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2835 return 0;
2836
2837 op = XEXP (op, 0);
2838 if (GET_CODE (op) == SYMBOL_REF
2839 || GET_CODE (op) == LABEL_REF)
2840 return 1;
2841 /* Only @GOTOFF gets offsets. */
2842 if (GET_CODE (op) != UNSPEC
2843 || XINT (op, 1) != 7)
2844 return 0;
2845
2846 op = XVECEXP (op, 0, 0);
2847 if (GET_CODE (op) == SYMBOL_REF
2848 || GET_CODE (op) == LABEL_REF)
2849 return 1;
2850 return 0;
2851
2852 default:
2853 return 0;
2a2ab3f9
JVA
2854 }
2855}
2a2ab3f9 2856
e075ae69 2857/* Return true if the operand contains a @GOT or @GOTOFF reference. */
3b3c6a3f 2858
e075ae69
RH
2859int
2860pic_symbolic_operand (op, mode)
2861 register rtx op;
2862 enum machine_mode mode ATTRIBUTE_UNUSED;
2863{
6eb791fc
JH
2864 if (GET_CODE (op) != CONST)
2865 return 0;
2866 op = XEXP (op, 0);
2867 if (TARGET_64BIT)
2868 {
2869 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
2870 return 1;
2871 }
2872 else
2a2ab3f9 2873 {
e075ae69
RH
2874 if (GET_CODE (op) == UNSPEC)
2875 return 1;
2876 if (GET_CODE (op) != PLUS
2877 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2878 return 0;
2879 op = XEXP (op, 0);
2880 if (GET_CODE (op) == UNSPEC)
2881 return 1;
2a2ab3f9 2882 }
e075ae69 2883 return 0;
2a2ab3f9 2884}
2a2ab3f9 2885
623fe810
RH
2886/* Return true if OP is a symbolic operand that resolves locally. */
2887
2888static int
2889local_symbolic_operand (op, mode)
2890 rtx op;
2891 enum machine_mode mode ATTRIBUTE_UNUSED;
2892{
2893 if (GET_CODE (op) == LABEL_REF)
2894 return 1;
2895
2896 if (GET_CODE (op) == CONST
2897 && GET_CODE (XEXP (op, 0)) == PLUS
2898 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2899 op = XEXP (XEXP (op, 0), 0);
2900
2901 if (GET_CODE (op) != SYMBOL_REF)
2902 return 0;
2903
2904 /* These we've been told are local by varasm and encode_section_info
2905 respectively. */
2906 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
2907 return 1;
2908
2909 /* There is, however, a not insubstantial body of code in the rest of
2910 the compiler that assumes it can just stick the results of
2911 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
2912 /* ??? This is a hack. Should update the body of the compiler to
2913 always create a DECL an invoke ENCODE_SECTION_INFO. */
2914 if (strncmp (XSTR (op, 0), internal_label_prefix,
2915 internal_label_prefix_len) == 0)
2916 return 1;
2917
2918 return 0;
2919}
2920
28d52ffb
RH
2921/* Test for a valid operand for a call instruction. Don't allow the
2922 arg pointer register or virtual regs since they may decay into
2923 reg + const, which the patterns can't handle. */
2a2ab3f9 2924
e075ae69
RH
2925int
2926call_insn_operand (op, mode)
2927 rtx op;
2928 enum machine_mode mode ATTRIBUTE_UNUSED;
2929{
e075ae69
RH
2930 /* Disallow indirect through a virtual register. This leads to
2931 compiler aborts when trying to eliminate them. */
2932 if (GET_CODE (op) == REG
2933 && (op == arg_pointer_rtx
564d80f4 2934 || op == frame_pointer_rtx
e075ae69
RH
2935 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
2936 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
2937 return 0;
2a2ab3f9 2938
28d52ffb
RH
2939 /* Disallow `call 1234'. Due to varying assembler lameness this
2940 gets either rejected or translated to `call .+1234'. */
2941 if (GET_CODE (op) == CONST_INT)
2942 return 0;
2943
cbbf65e0
RH
2944 /* Explicitly allow SYMBOL_REF even if pic. */
2945 if (GET_CODE (op) == SYMBOL_REF)
e075ae69 2946 return 1;
2a2ab3f9 2947
cbbf65e0
RH
2948 /* Half-pic doesn't allow anything but registers and constants.
2949 We've just taken care of the later. */
2950 if (HALF_PIC_P ())
2951 return register_operand (op, Pmode);
2952
2953 /* Otherwise we can allow any general_operand in the address. */
2954 return general_operand (op, Pmode);
e075ae69 2955}
79325812 2956
e075ae69
RH
2957int
2958constant_call_address_operand (op, mode)
2959 rtx op;
2960 enum machine_mode mode ATTRIBUTE_UNUSED;
2961{
eaf19aba
JJ
2962 if (GET_CODE (op) == CONST
2963 && GET_CODE (XEXP (op, 0)) == PLUS
2964 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2965 op = XEXP (XEXP (op, 0), 0);
e1ff012c 2966 return GET_CODE (op) == SYMBOL_REF;
e075ae69 2967}
2a2ab3f9 2968
e075ae69 2969/* Match exactly zero and one. */
e9a25f70 2970
0f290768 2971int
e075ae69
RH
2972const0_operand (op, mode)
2973 register rtx op;
2974 enum machine_mode mode;
2975{
2976 return op == CONST0_RTX (mode);
2977}
e9a25f70 2978
0f290768 2979int
e075ae69
RH
2980const1_operand (op, mode)
2981 register rtx op;
2982 enum machine_mode mode ATTRIBUTE_UNUSED;
2983{
2984 return op == const1_rtx;
2985}
2a2ab3f9 2986
e075ae69 2987/* Match 2, 4, or 8. Used for leal multiplicands. */
e9a25f70 2988
e075ae69
RH
2989int
2990const248_operand (op, mode)
2991 register rtx op;
2992 enum machine_mode mode ATTRIBUTE_UNUSED;
2993{
2994 return (GET_CODE (op) == CONST_INT
2995 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
2996}
e9a25f70 2997
e075ae69 2998/* True if this is a constant appropriate for an increment or decremenmt. */
81fd0956 2999
e075ae69
RH
3000int
3001incdec_operand (op, mode)
3002 register rtx op;
0631e0bf 3003 enum machine_mode mode ATTRIBUTE_UNUSED;
e075ae69 3004{
f5143c46 3005 /* On Pentium4, the inc and dec operations causes extra dependency on flag
b4e89e2d
JH
3006 registers, since carry flag is not set. */
3007 if (TARGET_PENTIUM4 && !optimize_size)
3008 return 0;
2b1c08f5 3009 return op == const1_rtx || op == constm1_rtx;
e075ae69 3010}
2a2ab3f9 3011
371bc54b
JH
3012/* Return nonzero if OP is acceptable as operand of DImode shift
3013 expander. */
3014
3015int
3016shiftdi_operand (op, mode)
3017 rtx op;
3018 enum machine_mode mode ATTRIBUTE_UNUSED;
3019{
3020 if (TARGET_64BIT)
3021 return nonimmediate_operand (op, mode);
3022 else
3023 return register_operand (op, mode);
3024}
3025
0f290768 3026/* Return false if this is the stack pointer, or any other fake
e075ae69
RH
3027 register eliminable to the stack pointer. Otherwise, this is
3028 a register operand.
2a2ab3f9 3029
e075ae69
RH
3030 This is used to prevent esp from being used as an index reg.
3031 Which would only happen in pathological cases. */
5f1ec3e6 3032
e075ae69
RH
3033int
3034reg_no_sp_operand (op, mode)
3035 register rtx op;
3036 enum machine_mode mode;
3037{
3038 rtx t = op;
3039 if (GET_CODE (t) == SUBREG)
3040 t = SUBREG_REG (t);
564d80f4 3041 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
e075ae69 3042 return 0;
2a2ab3f9 3043
e075ae69 3044 return register_operand (op, mode);
2a2ab3f9 3045}
b840bfb0 3046
915119a5
BS
3047int
3048mmx_reg_operand (op, mode)
3049 register rtx op;
bd793c65 3050 enum machine_mode mode ATTRIBUTE_UNUSED;
915119a5
BS
3051{
3052 return MMX_REG_P (op);
3053}
3054
2c5a510c
RH
3055/* Return false if this is any eliminable register. Otherwise
3056 general_operand. */
3057
3058int
3059general_no_elim_operand (op, mode)
3060 register rtx op;
3061 enum machine_mode mode;
3062{
3063 rtx t = op;
3064 if (GET_CODE (t) == SUBREG)
3065 t = SUBREG_REG (t);
3066 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3067 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3068 || t == virtual_stack_dynamic_rtx)
3069 return 0;
1020a5ab
RH
3070 if (REG_P (t)
3071 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3072 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3073 return 0;
2c5a510c
RH
3074
3075 return general_operand (op, mode);
3076}
3077
3078/* Return false if this is any eliminable register. Otherwise
3079 register_operand or const_int. */
3080
3081int
3082nonmemory_no_elim_operand (op, mode)
3083 register rtx op;
3084 enum machine_mode mode;
3085{
3086 rtx t = op;
3087 if (GET_CODE (t) == SUBREG)
3088 t = SUBREG_REG (t);
3089 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3090 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3091 || t == virtual_stack_dynamic_rtx)
3092 return 0;
3093
3094 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3095}
3096
e075ae69 3097/* Return true if op is a Q_REGS class register. */
b840bfb0 3098
e075ae69
RH
3099int
3100q_regs_operand (op, mode)
3101 register rtx op;
3102 enum machine_mode mode;
b840bfb0 3103{
e075ae69
RH
3104 if (mode != VOIDmode && GET_MODE (op) != mode)
3105 return 0;
3106 if (GET_CODE (op) == SUBREG)
3107 op = SUBREG_REG (op);
3108 return QI_REG_P (op);
0f290768 3109}
b840bfb0 3110
e075ae69 3111/* Return true if op is a NON_Q_REGS class register. */
b840bfb0 3112
e075ae69
RH
3113int
3114non_q_regs_operand (op, mode)
3115 register rtx op;
3116 enum machine_mode mode;
3117{
3118 if (mode != VOIDmode && GET_MODE (op) != mode)
3119 return 0;
3120 if (GET_CODE (op) == SUBREG)
3121 op = SUBREG_REG (op);
3122 return NON_QI_REG_P (op);
0f290768 3123}
b840bfb0 3124
915119a5
BS
3125/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3126 insns. */
3127int
3128sse_comparison_operator (op, mode)
3129 rtx op;
3130 enum machine_mode mode ATTRIBUTE_UNUSED;
3131{
3132 enum rtx_code code = GET_CODE (op);
a46d1d38
JH
3133 switch (code)
3134 {
3135 /* Operations supported directly. */
3136 case EQ:
3137 case LT:
3138 case LE:
3139 case UNORDERED:
3140 case NE:
3141 case UNGE:
3142 case UNGT:
3143 case ORDERED:
3144 return 1;
3145 /* These are equivalent to ones above in non-IEEE comparisons. */
3146 case UNEQ:
3147 case UNLT:
3148 case UNLE:
3149 case LTGT:
3150 case GE:
3151 case GT:
3152 return !TARGET_IEEE_FP;
3153 default:
3154 return 0;
3155 }
915119a5 3156}
9076b9c1 3157/* Return 1 if OP is a valid comparison operator in valid mode. */
e075ae69 3158int
9076b9c1
JH
3159ix86_comparison_operator (op, mode)
3160 register rtx op;
3161 enum machine_mode mode;
e075ae69 3162{
9076b9c1 3163 enum machine_mode inmode;
9a915772 3164 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
3165 if (mode != VOIDmode && GET_MODE (op) != mode)
3166 return 0;
9a915772
JH
3167 if (GET_RTX_CLASS (code) != '<')
3168 return 0;
3169 inmode = GET_MODE (XEXP (op, 0));
3170
3171 if (inmode == CCFPmode || inmode == CCFPUmode)
3172 {
3173 enum rtx_code second_code, bypass_code;
3174 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3175 return (bypass_code == NIL && second_code == NIL);
3176 }
3177 switch (code)
3a3677ff
RH
3178 {
3179 case EQ: case NE:
3a3677ff 3180 return 1;
9076b9c1 3181 case LT: case GE:
7e08e190 3182 if (inmode == CCmode || inmode == CCGCmode
9076b9c1
JH
3183 || inmode == CCGOCmode || inmode == CCNOmode)
3184 return 1;
3185 return 0;
7e08e190 3186 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
7e08e190 3187 if (inmode == CCmode)
9076b9c1
JH
3188 return 1;
3189 return 0;
3190 case GT: case LE:
7e08e190 3191 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
9076b9c1
JH
3192 return 1;
3193 return 0;
3a3677ff
RH
3194 default:
3195 return 0;
3196 }
3197}
3198
9076b9c1 3199/* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3a3677ff 3200
9076b9c1
JH
3201int
3202fcmov_comparison_operator (op, mode)
3a3677ff
RH
3203 register rtx op;
3204 enum machine_mode mode;
3205{
b62d22a2 3206 enum machine_mode inmode;
9a915772 3207 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
3208 if (mode != VOIDmode && GET_MODE (op) != mode)
3209 return 0;
9a915772
JH
3210 if (GET_RTX_CLASS (code) != '<')
3211 return 0;
3212 inmode = GET_MODE (XEXP (op, 0));
3213 if (inmode == CCFPmode || inmode == CCFPUmode)
3a3677ff 3214 {
9a915772
JH
3215 enum rtx_code second_code, bypass_code;
3216 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3217 if (bypass_code != NIL || second_code != NIL)
3218 return 0;
3219 code = ix86_fp_compare_code_to_integer (code);
3220 }
3221 /* i387 supports just limited amount of conditional codes. */
3222 switch (code)
3223 {
3224 case LTU: case GTU: case LEU: case GEU:
3225 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
9076b9c1
JH
3226 return 1;
3227 return 0;
9a915772
JH
3228 case ORDERED: case UNORDERED:
3229 case EQ: case NE:
3230 return 1;
3a3677ff
RH
3231 default:
3232 return 0;
3233 }
e075ae69 3234}
b840bfb0 3235
e9e80858
JH
3236/* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3237
3238int
3239promotable_binary_operator (op, mode)
3240 register rtx op;
3241 enum machine_mode mode ATTRIBUTE_UNUSED;
3242{
3243 switch (GET_CODE (op))
3244 {
3245 case MULT:
3246 /* Modern CPUs have same latency for HImode and SImode multiply,
3247 but 386 and 486 do HImode multiply faster. */
3248 return ix86_cpu > PROCESSOR_I486;
3249 case PLUS:
3250 case AND:
3251 case IOR:
3252 case XOR:
3253 case ASHIFT:
3254 return 1;
3255 default:
3256 return 0;
3257 }
3258}
3259
e075ae69
RH
3260/* Nearly general operand, but accept any const_double, since we wish
3261 to be able to drop them into memory rather than have them get pulled
3262 into registers. */
b840bfb0 3263
2a2ab3f9 3264int
e075ae69
RH
3265cmp_fp_expander_operand (op, mode)
3266 register rtx op;
3267 enum machine_mode mode;
2a2ab3f9 3268{
e075ae69 3269 if (mode != VOIDmode && mode != GET_MODE (op))
0b6b2900 3270 return 0;
e075ae69 3271 if (GET_CODE (op) == CONST_DOUBLE)
2a2ab3f9 3272 return 1;
e075ae69 3273 return general_operand (op, mode);
2a2ab3f9
JVA
3274}
3275
e075ae69 3276/* Match an SI or HImode register for a zero_extract. */
2a2ab3f9
JVA
3277
3278int
e075ae69 3279ext_register_operand (op, mode)
2a2ab3f9 3280 register rtx op;
bb5177ac 3281 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 3282{
3522082b 3283 int regno;
0d7d98ee
JH
3284 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3285 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
e075ae69 3286 return 0;
3522082b
JH
3287
3288 if (!register_operand (op, VOIDmode))
3289 return 0;
3290
3291 /* Be curefull to accept only registers having upper parts. */
3292 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3293 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
e075ae69
RH
3294}
3295
3296/* Return 1 if this is a valid binary floating-point operation.
0f290768 3297 OP is the expression matched, and MODE is its mode. */
e075ae69
RH
3298
3299int
3300binary_fp_operator (op, mode)
3301 register rtx op;
3302 enum machine_mode mode;
3303{
3304 if (mode != VOIDmode && mode != GET_MODE (op))
3305 return 0;
3306
2a2ab3f9
JVA
3307 switch (GET_CODE (op))
3308 {
e075ae69
RH
3309 case PLUS:
3310 case MINUS:
3311 case MULT:
3312 case DIV:
3313 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
e9a25f70 3314
2a2ab3f9
JVA
3315 default:
3316 return 0;
3317 }
3318}
fee2770d 3319
e075ae69
RH
3320int
3321mult_operator(op, mode)
3322 register rtx op;
3323 enum machine_mode mode ATTRIBUTE_UNUSED;
3324{
3325 return GET_CODE (op) == MULT;
3326}
3327
3328int
3329div_operator(op, mode)
3330 register rtx op;
3331 enum machine_mode mode ATTRIBUTE_UNUSED;
3332{
3333 return GET_CODE (op) == DIV;
3334}
0a726ef1
JL
3335
3336int
e075ae69
RH
3337arith_or_logical_operator (op, mode)
3338 rtx op;
3339 enum machine_mode mode;
0a726ef1 3340{
e075ae69
RH
3341 return ((mode == VOIDmode || GET_MODE (op) == mode)
3342 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3343 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
0a726ef1
JL
3344}
3345
e075ae69 3346/* Returns 1 if OP is memory operand with a displacement. */
fee2770d
RS
3347
3348int
e075ae69
RH
3349memory_displacement_operand (op, mode)
3350 register rtx op;
3351 enum machine_mode mode;
4f2c8ebb 3352{
e075ae69 3353 struct ix86_address parts;
e9a25f70 3354
e075ae69
RH
3355 if (! memory_operand (op, mode))
3356 return 0;
3357
3358 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3359 abort ();
3360
3361 return parts.disp != NULL_RTX;
4f2c8ebb
RS
3362}
3363
16189740 3364/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
e075ae69
RH
3365 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3366
3367 ??? It seems likely that this will only work because cmpsi is an
3368 expander, and no actual insns use this. */
4f2c8ebb
RS
3369
3370int
e075ae69
RH
3371cmpsi_operand (op, mode)
3372 rtx op;
3373 enum machine_mode mode;
fee2770d 3374{
b9b2c339 3375 if (nonimmediate_operand (op, mode))
e075ae69
RH
3376 return 1;
3377
3378 if (GET_CODE (op) == AND
3379 && GET_MODE (op) == SImode
3380 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3381 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3382 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3383 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3384 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3385 && GET_CODE (XEXP (op, 1)) == CONST_INT)
fee2770d 3386 return 1;
e9a25f70 3387
fee2770d
RS
3388 return 0;
3389}
d784886d 3390
e075ae69
RH
3391/* Returns 1 if OP is memory operand that can not be represented by the
3392 modRM array. */
d784886d
RK
3393
3394int
e075ae69 3395long_memory_operand (op, mode)
d784886d
RK
3396 register rtx op;
3397 enum machine_mode mode;
3398{
e075ae69 3399 if (! memory_operand (op, mode))
d784886d
RK
3400 return 0;
3401
e075ae69 3402 return memory_address_length (op) != 0;
d784886d 3403}
2247f6ed
JH
3404
3405/* Return nonzero if the rtx is known aligned. */
3406
3407int
3408aligned_operand (op, mode)
3409 rtx op;
3410 enum machine_mode mode;
3411{
3412 struct ix86_address parts;
3413
3414 if (!general_operand (op, mode))
3415 return 0;
3416
0f290768 3417 /* Registers and immediate operands are always "aligned". */
2247f6ed
JH
3418 if (GET_CODE (op) != MEM)
3419 return 1;
3420
0f290768 3421 /* Don't even try to do any aligned optimizations with volatiles. */
2247f6ed
JH
3422 if (MEM_VOLATILE_P (op))
3423 return 0;
3424
3425 op = XEXP (op, 0);
3426
3427 /* Pushes and pops are only valid on the stack pointer. */
3428 if (GET_CODE (op) == PRE_DEC
3429 || GET_CODE (op) == POST_INC)
3430 return 1;
3431
3432 /* Decode the address. */
3433 if (! ix86_decompose_address (op, &parts))
3434 abort ();
3435
3436 /* Look for some component that isn't known to be aligned. */
3437 if (parts.index)
3438 {
3439 if (parts.scale < 4
bdb429a5 3440 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
2247f6ed
JH
3441 return 0;
3442 }
3443 if (parts.base)
3444 {
bdb429a5 3445 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
2247f6ed
JH
3446 return 0;
3447 }
3448 if (parts.disp)
3449 {
3450 if (GET_CODE (parts.disp) != CONST_INT
3451 || (INTVAL (parts.disp) & 3) != 0)
3452 return 0;
3453 }
3454
3455 /* Didn't find one -- this must be an aligned address. */
3456 return 1;
3457}
e075ae69
RH
3458\f
3459/* Return true if the constant is something that can be loaded with
3460 a special instruction. Only handle 0.0 and 1.0; others are less
3461 worthwhile. */
57dbca5e
BS
3462
3463int
e075ae69
RH
3464standard_80387_constant_p (x)
3465 rtx x;
57dbca5e 3466{
2b04e52b 3467 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
e075ae69 3468 return -1;
2b04e52b
JH
3469 /* Note that on the 80387, other constants, such as pi, that we should support
3470 too. On some machines, these are much slower to load as standard constant,
3471 than to load from doubles in memory. */
3472 if (x == CONST0_RTX (GET_MODE (x)))
3473 return 1;
3474 if (x == CONST1_RTX (GET_MODE (x)))
3475 return 2;
e075ae69 3476 return 0;
57dbca5e
BS
3477}
3478
2b04e52b
JH
3479/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3480 */
3481int
3482standard_sse_constant_p (x)
3483 rtx x;
3484{
3485 if (GET_CODE (x) != CONST_DOUBLE)
3486 return -1;
3487 return (x == CONST0_RTX (GET_MODE (x)));
3488}
3489
2a2ab3f9
JVA
3490/* Returns 1 if OP contains a symbol reference */
3491
3492int
3493symbolic_reference_mentioned_p (op)
3494 rtx op;
3495{
6f7d635c 3496 register const char *fmt;
2a2ab3f9
JVA
3497 register int i;
3498
3499 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3500 return 1;
3501
3502 fmt = GET_RTX_FORMAT (GET_CODE (op));
3503 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3504 {
3505 if (fmt[i] == 'E')
3506 {
3507 register int j;
3508
3509 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3510 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3511 return 1;
3512 }
e9a25f70 3513
2a2ab3f9
JVA
3514 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3515 return 1;
3516 }
3517
3518 return 0;
3519}
e075ae69
RH
3520
3521/* Return 1 if it is appropriate to emit `ret' instructions in the
3522 body of a function. Do this only if the epilogue is simple, needing a
3523 couple of insns. Prior to reloading, we can't tell how many registers
3524 must be saved, so return 0 then. Return 0 if there is no frame
3525 marker to de-allocate.
3526
3527 If NON_SAVING_SETJMP is defined and true, then it is not possible
3528 for the epilogue to be simple, so return 0. This is a special case
3529 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3530 until final, but jump_optimize may need to know sooner if a
3531 `return' is OK. */
32b5b1aa
SC
3532
3533int
e075ae69 3534ix86_can_use_return_insn_p ()
32b5b1aa 3535{
4dd2ac2c 3536 struct ix86_frame frame;
9a7372d6 3537
e075ae69
RH
3538#ifdef NON_SAVING_SETJMP
3539 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3540 return 0;
3541#endif
9a7372d6
RH
3542
3543 if (! reload_completed || frame_pointer_needed)
3544 return 0;
32b5b1aa 3545
9a7372d6
RH
3546 /* Don't allow more than 32 pop, since that's all we can do
3547 with one instruction. */
3548 if (current_function_pops_args
3549 && current_function_args_size >= 32768)
e075ae69 3550 return 0;
32b5b1aa 3551
4dd2ac2c
JH
3552 ix86_compute_frame_layout (&frame);
3553 return frame.to_allocate == 0 && frame.nregs == 0;
e075ae69 3554}
6189a572
JH
3555\f
3556/* Return 1 if VALUE can be stored in the sign extended immediate field. */
3557int
3558x86_64_sign_extended_value (value)
3559 rtx value;
3560{
3561 switch (GET_CODE (value))
3562 {
3563 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3564 to be at least 32 and this all acceptable constants are
3565 represented as CONST_INT. */
3566 case CONST_INT:
3567 if (HOST_BITS_PER_WIDE_INT == 32)
3568 return 1;
3569 else
3570 {
3571 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
fa9f36a1 3572 return trunc_int_for_mode (val, SImode) == val;
6189a572
JH
3573 }
3574 break;
3575
3576 /* For certain code models, the symbolic references are known to fit. */
3577 case SYMBOL_REF:
3578 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
3579
3580 /* For certain code models, the code is near as well. */
3581 case LABEL_REF:
3582 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
3583
3584 /* We also may accept the offsetted memory references in certain special
3585 cases. */
3586 case CONST:
3587 if (GET_CODE (XEXP (value, 0)) == UNSPEC
3588 && XVECLEN (XEXP (value, 0), 0) == 1
3589 && XINT (XEXP (value, 0), 1) == 15)
3590 return 1;
3591 else if (GET_CODE (XEXP (value, 0)) == PLUS)
3592 {
3593 rtx op1 = XEXP (XEXP (value, 0), 0);
3594 rtx op2 = XEXP (XEXP (value, 0), 1);
3595 HOST_WIDE_INT offset;
3596
3597 if (ix86_cmodel == CM_LARGE)
3598 return 0;
3599 if (GET_CODE (op2) != CONST_INT)
3600 return 0;
3601 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3602 switch (GET_CODE (op1))
3603 {
3604 case SYMBOL_REF:
3605 /* For CM_SMALL assume that latest object is 1MB before
3606 end of 31bits boundary. We may also accept pretty
3607 large negative constants knowing that all objects are
3608 in the positive half of address space. */
3609 if (ix86_cmodel == CM_SMALL
3610 && offset < 1024*1024*1024
3611 && trunc_int_for_mode (offset, SImode) == offset)
3612 return 1;
3613 /* For CM_KERNEL we know that all object resist in the
3614 negative half of 32bits address space. We may not
3615 accept negative offsets, since they may be just off
d6a7951f 3616 and we may accept pretty large positive ones. */
6189a572
JH
3617 if (ix86_cmodel == CM_KERNEL
3618 && offset > 0
3619 && trunc_int_for_mode (offset, SImode) == offset)
3620 return 1;
3621 break;
3622 case LABEL_REF:
3623 /* These conditions are similar to SYMBOL_REF ones, just the
3624 constraints for code models differ. */
3625 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3626 && offset < 1024*1024*1024
3627 && trunc_int_for_mode (offset, SImode) == offset)
3628 return 1;
3629 if (ix86_cmodel == CM_KERNEL
3630 && offset > 0
3631 && trunc_int_for_mode (offset, SImode) == offset)
3632 return 1;
3633 break;
3634 default:
3635 return 0;
3636 }
3637 }
3638 return 0;
3639 default:
3640 return 0;
3641 }
3642}
3643
3644/* Return 1 if VALUE can be stored in the zero extended immediate field. */
3645int
3646x86_64_zero_extended_value (value)
3647 rtx value;
3648{
3649 switch (GET_CODE (value))
3650 {
3651 case CONST_DOUBLE:
3652 if (HOST_BITS_PER_WIDE_INT == 32)
3653 return (GET_MODE (value) == VOIDmode
3654 && !CONST_DOUBLE_HIGH (value));
3655 else
3656 return 0;
3657 case CONST_INT:
3658 if (HOST_BITS_PER_WIDE_INT == 32)
3659 return INTVAL (value) >= 0;
3660 else
3661 return !(INTVAL (value) & ~(HOST_WIDE_INT)0xffffffff);
3662 break;
3663
3664 /* For certain code models, the symbolic references are known to fit. */
3665 case SYMBOL_REF:
3666 return ix86_cmodel == CM_SMALL;
3667
3668 /* For certain code models, the code is near as well. */
3669 case LABEL_REF:
3670 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3671
3672 /* We also may accept the offsetted memory references in certain special
3673 cases. */
3674 case CONST:
3675 if (GET_CODE (XEXP (value, 0)) == PLUS)
3676 {
3677 rtx op1 = XEXP (XEXP (value, 0), 0);
3678 rtx op2 = XEXP (XEXP (value, 0), 1);
3679
3680 if (ix86_cmodel == CM_LARGE)
3681 return 0;
3682 switch (GET_CODE (op1))
3683 {
3684 case SYMBOL_REF:
3685 return 0;
d6a7951f 3686 /* For small code model we may accept pretty large positive
6189a572
JH
3687 offsets, since one bit is available for free. Negative
3688 offsets are limited by the size of NULL pointer area
3689 specified by the ABI. */
3690 if (ix86_cmodel == CM_SMALL
3691 && GET_CODE (op2) == CONST_INT
3692 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3693 && (trunc_int_for_mode (INTVAL (op2), SImode)
3694 == INTVAL (op2)))
3695 return 1;
3696 /* ??? For the kernel, we may accept adjustment of
3697 -0x10000000, since we know that it will just convert
d6a7951f 3698 negative address space to positive, but perhaps this
6189a572
JH
3699 is not worthwhile. */
3700 break;
3701 case LABEL_REF:
3702 /* These conditions are similar to SYMBOL_REF ones, just the
3703 constraints for code models differ. */
3704 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3705 && GET_CODE (op2) == CONST_INT
3706 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3707 && (trunc_int_for_mode (INTVAL (op2), SImode)
3708 == INTVAL (op2)))
3709 return 1;
3710 break;
3711 default:
3712 return 0;
3713 }
3714 }
3715 return 0;
3716 default:
3717 return 0;
3718 }
3719}
6fca22eb
RH
3720
3721/* Value should be nonzero if functions must have frame pointers.
3722 Zero means the frame pointer need not be set up (and parms may
3723 be accessed via the stack pointer) in functions that seem suitable. */
3724
3725int
3726ix86_frame_pointer_required ()
3727{
3728 /* If we accessed previous frames, then the generated code expects
3729 to be able to access the saved ebp value in our frame. */
3730 if (cfun->machine->accesses_prev_frame)
3731 return 1;
a4f31c00 3732
6fca22eb
RH
3733 /* Several x86 os'es need a frame pointer for other reasons,
3734 usually pertaining to setjmp. */
3735 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3736 return 1;
3737
3738 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3739 the frame pointer by default. Turn it back on now if we've not
3740 got a leaf function. */
3741 if (TARGET_OMIT_LEAF_FRAME_POINTER && ! leaf_function_p ())
3742 return 1;
3743
3744 return 0;
3745}
3746
3747/* Record that the current function accesses previous call frames. */
3748
3749void
3750ix86_setup_frame_addresses ()
3751{
3752 cfun->machine->accesses_prev_frame = 1;
3753}
e075ae69 3754\f
4cf12e7e 3755static char pic_label_name[32];
e9a25f70 3756
e075ae69
RH
3757/* This function generates code for -fpic that loads %ebx with
3758 the return address of the caller and then returns. */
3759
3760void
4cf12e7e 3761ix86_asm_file_end (file)
e075ae69 3762 FILE *file;
e075ae69
RH
3763{
3764 rtx xops[2];
32b5b1aa 3765
4cf12e7e
RH
3766 if (! TARGET_DEEP_BRANCH_PREDICTION || pic_label_name[0] == 0)
3767 return;
32b5b1aa 3768
c7f0da1d
RH
3769 /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related
3770 to updating relocations to a section being discarded such that this
3771 doesn't work. Ought to detect this at configure time. */
7c262518 3772#if 0
4cf12e7e
RH
3773 /* The trick here is to create a linkonce section containing the
3774 pic label thunk, but to refer to it with an internal label.
3775 Because the label is internal, we don't have inter-dso name
3776 binding issues on hosts that don't support ".hidden".
e9a25f70 3777
4cf12e7e
RH
3778 In order to use these macros, however, we must create a fake
3779 function decl. */
7c262518
RH
3780 if (targetm.have_named_sections)
3781 {
3782 tree decl = build_decl (FUNCTION_DECL,
3783 get_identifier ("i686.get_pc_thunk"),
3784 error_mark_node);
3785 DECL_ONE_ONLY (decl) = 1;
3786 UNIQUE_SECTION (decl, 0);
715bdd29 3787 named_section (decl, NULL);
7c262518
RH
3788 }
3789 else
4cf12e7e 3790#else
7c262518 3791 text_section ();
4cf12e7e 3792#endif
0afeb08a 3793
4cf12e7e
RH
3794 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
3795 internal (non-global) label that's being emitted, it didn't make
3796 sense to have .type information for local labels. This caused
3797 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
3798 me debug info for a label that you're declaring non-global?) this
3799 was changed to call ASM_OUTPUT_LABEL() instead. */
3800
3801 ASM_OUTPUT_LABEL (file, pic_label_name);
3802
3803 xops[0] = pic_offset_table_rtx;
3804 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3805 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3806 output_asm_insn ("ret", xops);
32b5b1aa 3807}
32b5b1aa 3808
e075ae69
RH
3809void
3810load_pic_register ()
32b5b1aa 3811{
e075ae69 3812 rtx gotsym, pclab;
32b5b1aa 3813
0d7d98ee
JH
3814 if (TARGET_64BIT)
3815 abort();
3816
a8a05998 3817 gotsym = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
32b5b1aa 3818
e075ae69 3819 if (TARGET_DEEP_BRANCH_PREDICTION)
32b5b1aa 3820 {
4cf12e7e
RH
3821 if (! pic_label_name[0])
3822 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
e075ae69 3823 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
32b5b1aa 3824 }
e075ae69 3825 else
e5cb57e8 3826 {
e075ae69 3827 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
e5cb57e8 3828 }
e5cb57e8 3829
e075ae69 3830 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
2a2ab3f9 3831
e075ae69
RH
3832 if (! TARGET_DEEP_BRANCH_PREDICTION)
3833 emit_insn (gen_popsi1 (pic_offset_table_rtx));
79325812 3834
e075ae69 3835 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
e9a25f70 3836}
8dfe5673 3837
0d7d98ee 3838/* Generate an "push" pattern for input ARG. */
e9a25f70 3839
e075ae69
RH
3840static rtx
3841gen_push (arg)
3842 rtx arg;
e9a25f70 3843{
c5c76735 3844 return gen_rtx_SET (VOIDmode,
0d7d98ee
JH
3845 gen_rtx_MEM (Pmode,
3846 gen_rtx_PRE_DEC (Pmode,
c5c76735
JL
3847 stack_pointer_rtx)),
3848 arg);
e9a25f70
JL
3849}
3850
4dd2ac2c
JH
3851/* Return 1 if we need to save REGNO. */
3852static int
1020a5ab
RH
3853ix86_save_reg (regno, maybe_eh_return)
3854 int regno;
37a58036 3855 int maybe_eh_return;
1020a5ab
RH
3856{
3857 if (flag_pic
3858 && ! TARGET_64BIT
3859 && regno == PIC_OFFSET_TABLE_REGNUM
3860 && (current_function_uses_pic_offset_table
3861 || current_function_uses_const_pool
3862 || current_function_calls_eh_return))
3863 return 1;
3864
3865 if (current_function_calls_eh_return && maybe_eh_return)
3866 {
3867 unsigned i;
3868 for (i = 0; ; i++)
3869 {
3870 unsigned test = EH_RETURN_DATA_REGNO(i);
3871 if (test == INVALID_REGNUM)
3872 break;
3873 if (test == (unsigned) regno)
3874 return 1;
3875 }
3876 }
4dd2ac2c 3877
1020a5ab
RH
3878 return (regs_ever_live[regno]
3879 && !call_used_regs[regno]
3880 && !fixed_regs[regno]
3881 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4dd2ac2c
JH
3882}
3883
0903fcab
JH
3884/* Return number of registers to be saved on the stack. */
3885
3886static int
3887ix86_nsaved_regs ()
3888{
3889 int nregs = 0;
0903fcab
JH
3890 int regno;
3891
4dd2ac2c 3892 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 3893 if (ix86_save_reg (regno, true))
4dd2ac2c 3894 nregs++;
0903fcab
JH
3895 return nregs;
3896}
3897
3898/* Return the offset between two registers, one to be eliminated, and the other
3899 its replacement, at the start of a routine. */
3900
3901HOST_WIDE_INT
3902ix86_initial_elimination_offset (from, to)
3903 int from;
3904 int to;
3905{
4dd2ac2c
JH
3906 struct ix86_frame frame;
3907 ix86_compute_frame_layout (&frame);
564d80f4
JH
3908
3909 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 3910 return frame.hard_frame_pointer_offset;
564d80f4
JH
3911 else if (from == FRAME_POINTER_REGNUM
3912 && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 3913 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
3914 else
3915 {
564d80f4
JH
3916 if (to != STACK_POINTER_REGNUM)
3917 abort ();
3918 else if (from == ARG_POINTER_REGNUM)
4dd2ac2c 3919 return frame.stack_pointer_offset;
564d80f4
JH
3920 else if (from != FRAME_POINTER_REGNUM)
3921 abort ();
0903fcab 3922 else
4dd2ac2c 3923 return frame.stack_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
3924 }
3925}
3926
4dd2ac2c 3927/* Fill structure ix86_frame about frame of currently computed function. */
0f290768 3928
4dd2ac2c
JH
3929static void
3930ix86_compute_frame_layout (frame)
3931 struct ix86_frame *frame;
65954bd8 3932{
65954bd8 3933 HOST_WIDE_INT total_size;
564d80f4 3934 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
44affdae
JH
3935 int offset;
3936 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4dd2ac2c 3937 HOST_WIDE_INT size = get_frame_size ();
65954bd8 3938
4dd2ac2c 3939 frame->nregs = ix86_nsaved_regs ();
564d80f4 3940 total_size = size;
65954bd8 3941
4dd2ac2c
JH
3942 /* Skip return value and save base pointer. */
3943 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
3944
3945 frame->hard_frame_pointer_offset = offset;
564d80f4 3946
fcbfaa65
RK
3947 /* Do some sanity checking of stack_alignment_needed and
3948 preferred_alignment, since i386 port is the only using those features
f710504c 3949 that may break easily. */
564d80f4 3950
44affdae
JH
3951 if (size && !stack_alignment_needed)
3952 abort ();
44affdae
JH
3953 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
3954 abort ();
3955 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
3956 abort ();
3957 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
3958 abort ();
564d80f4 3959
4dd2ac2c
JH
3960 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
3961 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
564d80f4 3962
4dd2ac2c
JH
3963 /* Register save area */
3964 offset += frame->nregs * UNITS_PER_WORD;
65954bd8 3965
8362f420
JH
3966 /* Va-arg area */
3967 if (ix86_save_varrargs_registers)
3968 {
3969 offset += X86_64_VARARGS_SIZE;
3970 frame->va_arg_size = X86_64_VARARGS_SIZE;
3971 }
3972 else
3973 frame->va_arg_size = 0;
3974
4dd2ac2c
JH
3975 /* Align start of frame for local function. */
3976 frame->padding1 = ((offset + stack_alignment_needed - 1)
3977 & -stack_alignment_needed) - offset;
f73ad30e 3978
4dd2ac2c 3979 offset += frame->padding1;
65954bd8 3980
4dd2ac2c
JH
3981 /* Frame pointer points here. */
3982 frame->frame_pointer_offset = offset;
54ff41b7 3983
4dd2ac2c 3984 offset += size;
65954bd8 3985
4dd2ac2c 3986 /* Add outgoing arguments area. */
f73ad30e 3987 if (ACCUMULATE_OUTGOING_ARGS)
4dd2ac2c
JH
3988 {
3989 offset += current_function_outgoing_args_size;
3990 frame->outgoing_arguments_size = current_function_outgoing_args_size;
3991 }
3992 else
3993 frame->outgoing_arguments_size = 0;
564d80f4 3994
4dd2ac2c
JH
3995 /* Align stack boundary. */
3996 frame->padding2 = ((offset + preferred_alignment - 1)
3997 & -preferred_alignment) - offset;
3998
3999 offset += frame->padding2;
4000
4001 /* We've reached end of stack frame. */
4002 frame->stack_pointer_offset = offset;
4003
4004 /* Size prologue needs to allocate. */
4005 frame->to_allocate =
4006 (size + frame->padding1 + frame->padding2
8362f420 4007 + frame->outgoing_arguments_size + frame->va_arg_size);
4dd2ac2c 4008
8362f420
JH
4009 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4010 && current_function_is_leaf)
4011 {
4012 frame->red_zone_size = frame->to_allocate;
4013 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4014 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4015 }
4016 else
4017 frame->red_zone_size = 0;
4018 frame->to_allocate -= frame->red_zone_size;
4019 frame->stack_pointer_offset -= frame->red_zone_size;
4dd2ac2c
JH
4020#if 0
4021 fprintf (stderr, "nregs: %i\n", frame->nregs);
4022 fprintf (stderr, "size: %i\n", size);
4023 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4024 fprintf (stderr, "padding1: %i\n", frame->padding1);
8362f420 4025 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4dd2ac2c
JH
4026 fprintf (stderr, "padding2: %i\n", frame->padding2);
4027 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
8362f420 4028 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4dd2ac2c
JH
4029 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4030 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4031 frame->hard_frame_pointer_offset);
4032 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4033#endif
65954bd8
JL
4034}
4035
0903fcab
JH
4036/* Emit code to save registers in the prologue. */
4037
4038static void
4039ix86_emit_save_regs ()
4040{
4041 register int regno;
0903fcab 4042 rtx insn;
0903fcab 4043
4dd2ac2c 4044 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 4045 if (ix86_save_reg (regno, true))
0903fcab 4046 {
0d7d98ee 4047 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
0903fcab
JH
4048 RTX_FRAME_RELATED_P (insn) = 1;
4049 }
4050}
4051
c6036a37
JH
4052/* Emit code to save registers using MOV insns. First register
4053 is restored from POINTER + OFFSET. */
4054static void
4055ix86_emit_save_regs_using_mov (pointer, offset)
b72f00af
RK
4056 rtx pointer;
4057 HOST_WIDE_INT offset;
c6036a37
JH
4058{
4059 int regno;
4060 rtx insn;
4061
4062 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4063 if (ix86_save_reg (regno, true))
4064 {
b72f00af
RK
4065 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4066 Pmode, offset),
c6036a37
JH
4067 gen_rtx_REG (Pmode, regno));
4068 RTX_FRAME_RELATED_P (insn) = 1;
4069 offset += UNITS_PER_WORD;
4070 }
4071}
4072
0f290768 4073/* Expand the prologue into a bunch of separate insns. */
e075ae69
RH
4074
4075void
4076ix86_expand_prologue ()
2a2ab3f9 4077{
564d80f4 4078 rtx insn;
0d7d98ee
JH
4079 int pic_reg_used = (flag_pic && (current_function_uses_pic_offset_table
4080 || current_function_uses_const_pool)
4081 && !TARGET_64BIT);
4dd2ac2c 4082 struct ix86_frame frame;
6ab16dd9 4083 int use_mov = 0;
c6036a37 4084 HOST_WIDE_INT allocate;
4dd2ac2c 4085
2ab0437e 4086 if (!optimize_size)
6ab16dd9
JH
4087 {
4088 use_fast_prologue_epilogue
4089 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
2ab0437e
JH
4090 if (TARGET_PROLOGUE_USING_MOVE)
4091 use_mov = use_fast_prologue_epilogue;
6ab16dd9 4092 }
4dd2ac2c 4093 ix86_compute_frame_layout (&frame);
79325812 4094
e075ae69
RH
4095 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4096 slower on all targets. Also sdb doesn't like it. */
e9a25f70 4097
2a2ab3f9
JVA
4098 if (frame_pointer_needed)
4099 {
564d80f4 4100 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
e075ae69 4101 RTX_FRAME_RELATED_P (insn) = 1;
e9a25f70 4102
564d80f4 4103 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
e075ae69 4104 RTX_FRAME_RELATED_P (insn) = 1;
2a2ab3f9
JVA
4105 }
4106
c6036a37
JH
4107 allocate = frame.to_allocate;
4108 /* In case we are dealing only with single register and empty frame,
4109 push is equivalent of the mov+add sequence. */
4110 if (allocate == 0 && frame.nregs <= 1)
4111 use_mov = 0;
4112
4113 if (!use_mov)
4114 ix86_emit_save_regs ();
4115 else
4116 allocate += frame.nregs * UNITS_PER_WORD;
564d80f4 4117
c6036a37 4118 if (allocate == 0)
8dfe5673 4119 ;
e323735c 4120 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
469ac993 4121 {
f2042df3
RH
4122 insn = emit_insn (gen_pro_epilogue_adjust_stack
4123 (stack_pointer_rtx, stack_pointer_rtx,
e323735c 4124 GEN_INT (-allocate)));
e075ae69 4125 RTX_FRAME_RELATED_P (insn) = 1;
469ac993 4126 }
79325812 4127 else
8dfe5673 4128 {
e075ae69 4129 /* ??? Is this only valid for Win32? */
e9a25f70 4130
e075ae69 4131 rtx arg0, sym;
e9a25f70 4132
8362f420
JH
4133 if (TARGET_64BIT)
4134 abort();
4135
e075ae69 4136 arg0 = gen_rtx_REG (SImode, 0);
c6036a37 4137 emit_move_insn (arg0, GEN_INT (allocate));
77a989d1 4138
e075ae69
RH
4139 sym = gen_rtx_MEM (FUNCTION_MODE,
4140 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
32ee7d1d 4141 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
e075ae69
RH
4142
4143 CALL_INSN_FUNCTION_USAGE (insn)
276ab4a4
RH
4144 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4145 CALL_INSN_FUNCTION_USAGE (insn));
e075ae69 4146 }
c6036a37
JH
4147 if (use_mov)
4148 {
4149 if (!frame_pointer_needed || !frame.to_allocate)
4150 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4151 else
4152 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4153 -frame.nregs * UNITS_PER_WORD);
4154 }
e9a25f70 4155
84530511
SC
4156#ifdef SUBTARGET_PROLOGUE
4157 SUBTARGET_PROLOGUE;
0f290768 4158#endif
84530511 4159
e9a25f70 4160 if (pic_reg_used)
e075ae69 4161 load_pic_register ();
77a989d1 4162
e9a25f70
JL
4163 /* If we are profiling, make sure no instructions are scheduled before
4164 the call to mcount. However, if -fpic, the above call will have
4165 done that. */
70f4f91c 4166 if (current_function_profile && ! pic_reg_used)
e9a25f70 4167 emit_insn (gen_blockage ());
77a989d1
SC
4168}
4169
da2d1d3a
JH
4170/* Emit code to restore saved registers using MOV insns. First register
4171 is restored from POINTER + OFFSET. */
4172static void
1020a5ab
RH
4173ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4174 rtx pointer;
4175 int offset;
37a58036 4176 int maybe_eh_return;
da2d1d3a
JH
4177{
4178 int regno;
da2d1d3a 4179
4dd2ac2c 4180 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 4181 if (ix86_save_reg (regno, maybe_eh_return))
da2d1d3a 4182 {
4dd2ac2c 4183 emit_move_insn (gen_rtx_REG (Pmode, regno),
b72f00af
RK
4184 adjust_address (gen_rtx_MEM (Pmode, pointer),
4185 Pmode, offset));
4dd2ac2c 4186 offset += UNITS_PER_WORD;
da2d1d3a
JH
4187 }
4188}
4189
0f290768 4190/* Restore function stack, frame, and registers. */
e9a25f70 4191
2a2ab3f9 4192void
1020a5ab
RH
4193ix86_expand_epilogue (style)
4194 int style;
2a2ab3f9 4195{
1c71e60e 4196 int regno;
fdb8a883 4197 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4dd2ac2c 4198 struct ix86_frame frame;
65954bd8 4199 HOST_WIDE_INT offset;
4dd2ac2c
JH
4200
4201 ix86_compute_frame_layout (&frame);
2a2ab3f9 4202
a4f31c00 4203 /* Calculate start of saved registers relative to ebp. Special care
84e306b4
RH
4204 must be taken for the normal return case of a function using
4205 eh_return: the eax and edx registers are marked as saved, but not
4206 restored along this path. */
4207 offset = frame.nregs;
4208 if (current_function_calls_eh_return && style != 2)
4209 offset -= 2;
4210 offset *= -UNITS_PER_WORD;
2a2ab3f9 4211
fdb8a883
JW
4212 /* If we're only restoring one register and sp is not valid then
4213 using a move instruction to restore the register since it's
0f290768 4214 less work than reloading sp and popping the register.
da2d1d3a
JH
4215
4216 The default code result in stack adjustment using add/lea instruction,
4217 while this code results in LEAVE instruction (or discrete equivalent),
4218 so it is profitable in some other cases as well. Especially when there
4219 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4220 and there is exactly one register to pop. This heruistic may need some
4221 tuning in future. */
4dd2ac2c 4222 if ((!sp_valid && frame.nregs <= 1)
2ab0437e 4223 || (TARGET_EPILOGUE_USING_MOVE
6ab16dd9 4224 && use_fast_prologue_epilogue
c6036a37 4225 && (frame.nregs > 1 || frame.to_allocate))
4dd2ac2c 4226 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
2ab0437e 4227 || (frame_pointer_needed && TARGET_USE_LEAVE
6ab16dd9 4228 && use_fast_prologue_epilogue && frame.nregs == 1)
2ab0437e 4229 || current_function_calls_eh_return)
2a2ab3f9 4230 {
da2d1d3a
JH
4231 /* Restore registers. We can use ebp or esp to address the memory
4232 locations. If both are available, default to ebp, since offsets
4233 are known to be small. Only exception is esp pointing directly to the
4234 end of block of saved registers, where we may simplify addressing
4235 mode. */
4236
4dd2ac2c 4237 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
1020a5ab
RH
4238 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4239 frame.to_allocate, style == 2);
da2d1d3a 4240 else
1020a5ab
RH
4241 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4242 offset, style == 2);
4243
4244 /* eh_return epilogues need %ecx added to the stack pointer. */
4245 if (style == 2)
4246 {
4247 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
2a2ab3f9 4248
1020a5ab
RH
4249 if (frame_pointer_needed)
4250 {
4251 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4252 tmp = plus_constant (tmp, UNITS_PER_WORD);
4253 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4254
4255 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4256 emit_move_insn (hard_frame_pointer_rtx, tmp);
4257
4258 emit_insn (gen_pro_epilogue_adjust_stack
f2042df3 4259 (stack_pointer_rtx, sa, const0_rtx));
1020a5ab
RH
4260 }
4261 else
4262 {
4263 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4264 tmp = plus_constant (tmp, (frame.to_allocate
4265 + frame.nregs * UNITS_PER_WORD));
4266 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4267 }
4268 }
4269 else if (!frame_pointer_needed)
f2042df3
RH
4270 emit_insn (gen_pro_epilogue_adjust_stack
4271 (stack_pointer_rtx, stack_pointer_rtx,
4272 GEN_INT (frame.to_allocate
4273 + frame.nregs * UNITS_PER_WORD)));
0f290768 4274 /* If not an i386, mov & pop is faster than "leave". */
6ab16dd9 4275 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
8362f420 4276 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
c8c5cb99 4277 else
2a2ab3f9 4278 {
1c71e60e
JH
4279 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4280 hard_frame_pointer_rtx,
f2042df3 4281 const0_rtx));
8362f420
JH
4282 if (TARGET_64BIT)
4283 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4284 else
4285 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
e9a25f70
JL
4286 }
4287 }
1c71e60e 4288 else
68f654ec 4289 {
1c71e60e
JH
4290 /* First step is to deallocate the stack frame so that we can
4291 pop the registers. */
4292 if (!sp_valid)
4293 {
4294 if (!frame_pointer_needed)
4295 abort ();
4296 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4297 hard_frame_pointer_rtx,
f2042df3 4298 GEN_INT (offset)));
1c71e60e 4299 }
4dd2ac2c 4300 else if (frame.to_allocate)
f2042df3
RH
4301 emit_insn (gen_pro_epilogue_adjust_stack
4302 (stack_pointer_rtx, stack_pointer_rtx,
4303 GEN_INT (frame.to_allocate)));
1c71e60e 4304
4dd2ac2c 4305 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 4306 if (ix86_save_reg (regno, false))
8362f420
JH
4307 {
4308 if (TARGET_64BIT)
4309 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4310 else
4311 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4312 }
4dd2ac2c 4313 if (frame_pointer_needed)
8362f420 4314 {
f5143c46 4315 /* Leave results in shorter dependency chains on CPUs that are
2ab0437e
JH
4316 able to grok it fast. */
4317 if (TARGET_USE_LEAVE)
4318 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4319 else if (TARGET_64BIT)
8362f420
JH
4320 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4321 else
4322 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4323 }
68f654ec 4324 }
68f654ec 4325
cbbf65e0 4326 /* Sibcall epilogues don't want a return instruction. */
1020a5ab 4327 if (style == 0)
cbbf65e0
RH
4328 return;
4329
2a2ab3f9
JVA
4330 if (current_function_pops_args && current_function_args_size)
4331 {
e075ae69 4332 rtx popc = GEN_INT (current_function_pops_args);
2a2ab3f9 4333
b8c752c8
UD
4334 /* i386 can only pop 64K bytes. If asked to pop more, pop
4335 return address, do explicit add, and jump indirectly to the
0f290768 4336 caller. */
2a2ab3f9 4337
b8c752c8 4338 if (current_function_pops_args >= 65536)
2a2ab3f9 4339 {
e075ae69 4340 rtx ecx = gen_rtx_REG (SImode, 2);
e9a25f70 4341
8362f420
JH
4342 /* There are is no "pascal" calling convention in 64bit ABI. */
4343 if (TARGET_64BIT)
4344 abort();
4345
e075ae69
RH
4346 emit_insn (gen_popsi1 (ecx));
4347 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
11837777 4348 emit_jump_insn (gen_return_indirect_internal (ecx));
e9a25f70 4349 }
79325812 4350 else
e075ae69
RH
4351 emit_jump_insn (gen_return_pop_internal (popc));
4352 }
4353 else
4354 emit_jump_insn (gen_return_internal ());
4355}
4356\f
4357/* Extract the parts of an RTL expression that is a valid memory address
b446e5a2
JH
4358 for an instruction. Return 0 if the structure of the address is
4359 grossly off. Return -1 if the address contains ASHIFT, so it is not
4360 strictly valid, but still used for computing length of lea instruction.
4361 */
e075ae69
RH
4362
4363static int
4364ix86_decompose_address (addr, out)
4365 register rtx addr;
4366 struct ix86_address *out;
4367{
4368 rtx base = NULL_RTX;
4369 rtx index = NULL_RTX;
4370 rtx disp = NULL_RTX;
4371 HOST_WIDE_INT scale = 1;
4372 rtx scale_rtx = NULL_RTX;
b446e5a2 4373 int retval = 1;
e075ae69
RH
4374
4375 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
4376 base = addr;
4377 else if (GET_CODE (addr) == PLUS)
4378 {
4379 rtx op0 = XEXP (addr, 0);
4380 rtx op1 = XEXP (addr, 1);
4381 enum rtx_code code0 = GET_CODE (op0);
4382 enum rtx_code code1 = GET_CODE (op1);
4383
4384 if (code0 == REG || code0 == SUBREG)
4385 {
4386 if (code1 == REG || code1 == SUBREG)
4387 index = op0, base = op1; /* index + base */
4388 else
4389 base = op0, disp = op1; /* base + displacement */
4390 }
4391 else if (code0 == MULT)
e9a25f70 4392 {
e075ae69
RH
4393 index = XEXP (op0, 0);
4394 scale_rtx = XEXP (op0, 1);
4395 if (code1 == REG || code1 == SUBREG)
4396 base = op1; /* index*scale + base */
e9a25f70 4397 else
e075ae69
RH
4398 disp = op1; /* index*scale + disp */
4399 }
4400 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4401 {
4402 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4403 scale_rtx = XEXP (XEXP (op0, 0), 1);
4404 base = XEXP (op0, 1);
4405 disp = op1;
2a2ab3f9 4406 }
e075ae69
RH
4407 else if (code0 == PLUS)
4408 {
4409 index = XEXP (op0, 0); /* index + base + disp */
4410 base = XEXP (op0, 1);
4411 disp = op1;
4412 }
4413 else
b446e5a2 4414 return 0;
e075ae69
RH
4415 }
4416 else if (GET_CODE (addr) == MULT)
4417 {
4418 index = XEXP (addr, 0); /* index*scale */
4419 scale_rtx = XEXP (addr, 1);
4420 }
4421 else if (GET_CODE (addr) == ASHIFT)
4422 {
4423 rtx tmp;
4424
4425 /* We're called for lea too, which implements ashift on occasion. */
4426 index = XEXP (addr, 0);
4427 tmp = XEXP (addr, 1);
4428 if (GET_CODE (tmp) != CONST_INT)
b446e5a2 4429 return 0;
e075ae69
RH
4430 scale = INTVAL (tmp);
4431 if ((unsigned HOST_WIDE_INT) scale > 3)
b446e5a2 4432 return 0;
e075ae69 4433 scale = 1 << scale;
b446e5a2 4434 retval = -1;
2a2ab3f9 4435 }
2a2ab3f9 4436 else
e075ae69
RH
4437 disp = addr; /* displacement */
4438
4439 /* Extract the integral value of scale. */
4440 if (scale_rtx)
e9a25f70 4441 {
e075ae69 4442 if (GET_CODE (scale_rtx) != CONST_INT)
b446e5a2 4443 return 0;
e075ae69 4444 scale = INTVAL (scale_rtx);
e9a25f70 4445 }
3b3c6a3f 4446
e075ae69
RH
4447 /* Allow arg pointer and stack pointer as index if there is not scaling */
4448 if (base && index && scale == 1
564d80f4
JH
4449 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4450 || index == stack_pointer_rtx))
e075ae69
RH
4451 {
4452 rtx tmp = base;
4453 base = index;
4454 index = tmp;
4455 }
4456
4457 /* Special case: %ebp cannot be encoded as a base without a displacement. */
564d80f4
JH
4458 if ((base == hard_frame_pointer_rtx
4459 || base == frame_pointer_rtx
4460 || base == arg_pointer_rtx) && !disp)
e075ae69
RH
4461 disp = const0_rtx;
4462
4463 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4464 Avoid this by transforming to [%esi+0]. */
4465 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4466 && base && !index && !disp
329e1d01 4467 && REG_P (base)
e075ae69
RH
4468 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4469 disp = const0_rtx;
4470
4471 /* Special case: encode reg+reg instead of reg*2. */
4472 if (!base && index && scale && scale == 2)
4473 base = index, scale = 1;
0f290768 4474
e075ae69
RH
4475 /* Special case: scaling cannot be encoded without base or displacement. */
4476 if (!base && !disp && index && scale != 1)
4477 disp = const0_rtx;
4478
4479 out->base = base;
4480 out->index = index;
4481 out->disp = disp;
4482 out->scale = scale;
3b3c6a3f 4483
b446e5a2 4484 return retval;
e075ae69 4485}
01329426
JH
4486\f
4487/* Return cost of the memory address x.
4488 For i386, it is better to use a complex address than let gcc copy
4489 the address into a reg and make a new pseudo. But not if the address
4490 requires to two regs - that would mean more pseudos with longer
4491 lifetimes. */
4492int
4493ix86_address_cost (x)
4494 rtx x;
4495{
4496 struct ix86_address parts;
4497 int cost = 1;
3b3c6a3f 4498
01329426
JH
4499 if (!ix86_decompose_address (x, &parts))
4500 abort ();
4501
4502 /* More complex memory references are better. */
4503 if (parts.disp && parts.disp != const0_rtx)
4504 cost--;
4505
4506 /* Attempt to minimize number of registers in the address. */
4507 if ((parts.base
4508 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4509 || (parts.index
4510 && (!REG_P (parts.index)
4511 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4512 cost++;
4513
4514 if (parts.base
4515 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4516 && parts.index
4517 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4518 && parts.base != parts.index)
4519 cost++;
4520
4521 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4522 since it's predecode logic can't detect the length of instructions
4523 and it degenerates to vector decoded. Increase cost of such
4524 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
0f290768 4525 to split such addresses or even refuse such addresses at all.
01329426
JH
4526
4527 Following addressing modes are affected:
4528 [base+scale*index]
4529 [scale*index+disp]
4530 [base+index]
0f290768 4531
01329426
JH
4532 The first and last case may be avoidable by explicitly coding the zero in
4533 memory address, but I don't have AMD-K6 machine handy to check this
4534 theory. */
4535
4536 if (TARGET_K6
4537 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4538 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4539 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4540 cost += 10;
0f290768 4541
01329426
JH
4542 return cost;
4543}
4544\f
b949ea8b
JW
4545/* If X is a machine specific address (i.e. a symbol or label being
4546 referenced as a displacement from the GOT implemented using an
4547 UNSPEC), then return the base term. Otherwise return X. */
4548
4549rtx
4550ix86_find_base_term (x)
4551 rtx x;
4552{
4553 rtx term;
4554
6eb791fc
JH
4555 if (TARGET_64BIT)
4556 {
4557 if (GET_CODE (x) != CONST)
4558 return x;
4559 term = XEXP (x, 0);
4560 if (GET_CODE (term) == PLUS
4561 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4562 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4563 term = XEXP (term, 0);
4564 if (GET_CODE (term) != UNSPEC
4565 || XVECLEN (term, 0) != 1
4566 || XINT (term, 1) != 15)
4567 return x;
4568
4569 term = XVECEXP (term, 0, 0);
4570
4571 if (GET_CODE (term) != SYMBOL_REF
4572 && GET_CODE (term) != LABEL_REF)
4573 return x;
4574
4575 return term;
4576 }
4577
b949ea8b
JW
4578 if (GET_CODE (x) != PLUS
4579 || XEXP (x, 0) != pic_offset_table_rtx
4580 || GET_CODE (XEXP (x, 1)) != CONST)
4581 return x;
4582
4583 term = XEXP (XEXP (x, 1), 0);
4584
4585 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4586 term = XEXP (term, 0);
4587
4588 if (GET_CODE (term) != UNSPEC
4589 || XVECLEN (term, 0) != 1
4590 || XINT (term, 1) != 7)
4591 return x;
4592
4593 term = XVECEXP (term, 0, 0);
4594
4595 if (GET_CODE (term) != SYMBOL_REF
4596 && GET_CODE (term) != LABEL_REF)
4597 return x;
4598
4599 return term;
4600}
4601\f
e075ae69
RH
4602/* Determine if a given CONST RTX is a valid memory displacement
4603 in PIC mode. */
0f290768 4604
59be65f6 4605int
91bb873f
RH
4606legitimate_pic_address_disp_p (disp)
4607 register rtx disp;
4608{
6eb791fc
JH
4609 /* In 64bit mode we can allow direct addresses of symbols and labels
4610 when they are not dynamic symbols. */
4611 if (TARGET_64BIT)
4612 {
4613 rtx x = disp;
4614 if (GET_CODE (disp) == CONST)
4615 x = XEXP (disp, 0);
4616 /* ??? Handle PIC code models */
4617 if (GET_CODE (x) == PLUS
4618 && (GET_CODE (XEXP (x, 1)) == CONST_INT
4619 && ix86_cmodel == CM_SMALL_PIC
4620 && INTVAL (XEXP (x, 1)) < 1024*1024*1024
4621 && INTVAL (XEXP (x, 1)) > -1024*1024*1024))
4622 x = XEXP (x, 0);
4623 if (local_symbolic_operand (x, Pmode))
4624 return 1;
4625 }
91bb873f
RH
4626 if (GET_CODE (disp) != CONST)
4627 return 0;
4628 disp = XEXP (disp, 0);
4629
6eb791fc
JH
4630 if (TARGET_64BIT)
4631 {
4632 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4633 of GOT tables. We should not need these anyway. */
4634 if (GET_CODE (disp) != UNSPEC
4635 || XVECLEN (disp, 0) != 1
4636 || XINT (disp, 1) != 15)
4637 return 0;
4638
4639 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
4640 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
4641 return 0;
4642 return 1;
4643 }
4644
91bb873f
RH
4645 if (GET_CODE (disp) == PLUS)
4646 {
4647 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
4648 return 0;
4649 disp = XEXP (disp, 0);
4650 }
4651
4652 if (GET_CODE (disp) != UNSPEC
4653 || XVECLEN (disp, 0) != 1)
4654 return 0;
4655
4656 /* Must be @GOT or @GOTOFF. */
623fe810
RH
4657 switch (XINT (disp, 1))
4658 {
4659 case 6: /* @GOT */
4660 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
91bb873f 4661
623fe810
RH
4662 case 7: /* @GOTOFF */
4663 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
4664 }
4665
4666 return 0;
91bb873f
RH
4667}
4668
e075ae69
RH
4669/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
4670 memory address for an instruction. The MODE argument is the machine mode
4671 for the MEM expression that wants to use this address.
4672
4673 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
4674 convert common non-canonical forms to canonical form so that they will
4675 be recognized. */
4676
3b3c6a3f
MM
4677int
4678legitimate_address_p (mode, addr, strict)
4679 enum machine_mode mode;
4680 register rtx addr;
4681 int strict;
4682{
e075ae69
RH
4683 struct ix86_address parts;
4684 rtx base, index, disp;
4685 HOST_WIDE_INT scale;
4686 const char *reason = NULL;
4687 rtx reason_rtx = NULL_RTX;
3b3c6a3f
MM
4688
4689 if (TARGET_DEBUG_ADDR)
4690 {
4691 fprintf (stderr,
e9a25f70 4692 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
3b3c6a3f 4693 GET_MODE_NAME (mode), strict);
3b3c6a3f
MM
4694 debug_rtx (addr);
4695 }
4696
b446e5a2 4697 if (ix86_decompose_address (addr, &parts) <= 0)
3b3c6a3f 4698 {
e075ae69 4699 reason = "decomposition failed";
50e60bc3 4700 goto report_error;
3b3c6a3f
MM
4701 }
4702
e075ae69
RH
4703 base = parts.base;
4704 index = parts.index;
4705 disp = parts.disp;
4706 scale = parts.scale;
91f0226f 4707
e075ae69 4708 /* Validate base register.
e9a25f70
JL
4709
4710 Don't allow SUBREG's here, it can lead to spill failures when the base
3d771dfd
MM
4711 is one word out of a two word structure, which is represented internally
4712 as a DImode int. */
e9a25f70 4713
3b3c6a3f
MM
4714 if (base)
4715 {
e075ae69
RH
4716 reason_rtx = base;
4717
3d771dfd 4718 if (GET_CODE (base) != REG)
3b3c6a3f 4719 {
e075ae69 4720 reason = "base is not a register";
50e60bc3 4721 goto report_error;
3b3c6a3f
MM
4722 }
4723
c954bd01
RH
4724 if (GET_MODE (base) != Pmode)
4725 {
e075ae69 4726 reason = "base is not in Pmode";
50e60bc3 4727 goto report_error;
c954bd01
RH
4728 }
4729
e9a25f70
JL
4730 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
4731 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
3b3c6a3f 4732 {
e075ae69 4733 reason = "base is not valid";
50e60bc3 4734 goto report_error;
3b3c6a3f
MM
4735 }
4736 }
4737
e075ae69 4738 /* Validate index register.
e9a25f70
JL
4739
4740 Don't allow SUBREG's here, it can lead to spill failures when the index
3d771dfd
MM
4741 is one word out of a two word structure, which is represented internally
4742 as a DImode int. */
e075ae69
RH
4743
4744 if (index)
3b3c6a3f 4745 {
e075ae69
RH
4746 reason_rtx = index;
4747
4748 if (GET_CODE (index) != REG)
3b3c6a3f 4749 {
e075ae69 4750 reason = "index is not a register";
50e60bc3 4751 goto report_error;
3b3c6a3f
MM
4752 }
4753
e075ae69 4754 if (GET_MODE (index) != Pmode)
c954bd01 4755 {
e075ae69 4756 reason = "index is not in Pmode";
50e60bc3 4757 goto report_error;
c954bd01
RH
4758 }
4759
e075ae69
RH
4760 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
4761 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
3b3c6a3f 4762 {
e075ae69 4763 reason = "index is not valid";
50e60bc3 4764 goto report_error;
3b3c6a3f
MM
4765 }
4766 }
3b3c6a3f 4767
e075ae69
RH
4768 /* Validate scale factor. */
4769 if (scale != 1)
3b3c6a3f 4770 {
e075ae69
RH
4771 reason_rtx = GEN_INT (scale);
4772 if (!index)
3b3c6a3f 4773 {
e075ae69 4774 reason = "scale without index";
50e60bc3 4775 goto report_error;
3b3c6a3f
MM
4776 }
4777
e075ae69 4778 if (scale != 2 && scale != 4 && scale != 8)
3b3c6a3f 4779 {
e075ae69 4780 reason = "scale is not a valid multiplier";
50e60bc3 4781 goto report_error;
3b3c6a3f
MM
4782 }
4783 }
4784
91bb873f 4785 /* Validate displacement. */
3b3c6a3f
MM
4786 if (disp)
4787 {
e075ae69
RH
4788 reason_rtx = disp;
4789
91bb873f 4790 if (!CONSTANT_ADDRESS_P (disp))
3b3c6a3f 4791 {
e075ae69 4792 reason = "displacement is not constant";
50e60bc3 4793 goto report_error;
3b3c6a3f
MM
4794 }
4795
0d7d98ee 4796 if (TARGET_64BIT)
3b3c6a3f 4797 {
0d7d98ee
JH
4798 if (!x86_64_sign_extended_value (disp))
4799 {
4800 reason = "displacement is out of range";
4801 goto report_error;
4802 }
4803 }
4804 else
4805 {
4806 if (GET_CODE (disp) == CONST_DOUBLE)
4807 {
4808 reason = "displacement is a const_double";
4809 goto report_error;
4810 }
3b3c6a3f
MM
4811 }
4812
91bb873f 4813 if (flag_pic && SYMBOLIC_CONST (disp))
3b3c6a3f 4814 {
0d7d98ee
JH
4815 if (TARGET_64BIT && (index || base))
4816 {
4817 reason = "non-constant pic memory reference";
4818 goto report_error;
4819 }
91bb873f
RH
4820 if (! legitimate_pic_address_disp_p (disp))
4821 {
e075ae69 4822 reason = "displacement is an invalid pic construct";
50e60bc3 4823 goto report_error;
91bb873f
RH
4824 }
4825
4e9efe54 4826 /* This code used to verify that a symbolic pic displacement
0f290768
KH
4827 includes the pic_offset_table_rtx register.
4828
4e9efe54
JH
4829 While this is good idea, unfortunately these constructs may
4830 be created by "adds using lea" optimization for incorrect
4831 code like:
4832
4833 int a;
4834 int foo(int i)
4835 {
4836 return *(&a+i);
4837 }
4838
50e60bc3 4839 This code is nonsensical, but results in addressing
4e9efe54 4840 GOT table with pic_offset_table_rtx base. We can't
f710504c 4841 just refuse it easily, since it gets matched by
4e9efe54
JH
4842 "addsi3" pattern, that later gets split to lea in the
4843 case output register differs from input. While this
4844 can be handled by separate addsi pattern for this case
4845 that never results in lea, this seems to be easier and
4846 correct fix for crash to disable this test. */
3b3c6a3f 4847 }
91bb873f 4848 else if (HALF_PIC_P ())
3b3c6a3f 4849 {
91bb873f 4850 if (! HALF_PIC_ADDRESS_P (disp)
e075ae69 4851 || (base != NULL_RTX || index != NULL_RTX))
91bb873f 4852 {
e075ae69 4853 reason = "displacement is an invalid half-pic reference";
50e60bc3 4854 goto report_error;
91bb873f 4855 }
3b3c6a3f
MM
4856 }
4857 }
4858
e075ae69 4859 /* Everything looks valid. */
3b3c6a3f 4860 if (TARGET_DEBUG_ADDR)
e075ae69 4861 fprintf (stderr, "Success.\n");
3b3c6a3f 4862 return TRUE;
e075ae69 4863
50e60bc3 4864report_error:
e075ae69
RH
4865 if (TARGET_DEBUG_ADDR)
4866 {
4867 fprintf (stderr, "Error: %s\n", reason);
4868 debug_rtx (reason_rtx);
4869 }
4870 return FALSE;
3b3c6a3f 4871}
3b3c6a3f 4872\f
55efb413
JW
4873/* Return an unique alias set for the GOT. */
4874
0f290768 4875static HOST_WIDE_INT
55efb413
JW
4876ix86_GOT_alias_set ()
4877{
4878 static HOST_WIDE_INT set = -1;
4879 if (set == -1)
4880 set = new_alias_set ();
4881 return set;
0f290768 4882}
55efb413 4883
3b3c6a3f
MM
4884/* Return a legitimate reference for ORIG (an address) using the
4885 register REG. If REG is 0, a new pseudo is generated.
4886
91bb873f 4887 There are two types of references that must be handled:
3b3c6a3f
MM
4888
4889 1. Global data references must load the address from the GOT, via
4890 the PIC reg. An insn is emitted to do this load, and the reg is
4891 returned.
4892
91bb873f
RH
4893 2. Static data references, constant pool addresses, and code labels
4894 compute the address as an offset from the GOT, whose base is in
4895 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
4896 differentiate them from global data objects. The returned
4897 address is the PIC reg + an unspec constant.
3b3c6a3f
MM
4898
4899 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
91bb873f 4900 reg also appears in the address. */
3b3c6a3f
MM
4901
4902rtx
4903legitimize_pic_address (orig, reg)
4904 rtx orig;
4905 rtx reg;
4906{
4907 rtx addr = orig;
4908 rtx new = orig;
91bb873f 4909 rtx base;
3b3c6a3f 4910
623fe810 4911 if (local_symbolic_operand (addr, Pmode))
3b3c6a3f 4912 {
14f73b5a
JH
4913 /* In 64bit mode we can address such objects directly. */
4914 if (TARGET_64BIT)
4915 new = addr;
4916 else
4917 {
4918 /* This symbol may be referenced via a displacement from the PIC
4919 base address (@GOTOFF). */
3b3c6a3f 4920
14f73b5a
JH
4921 current_function_uses_pic_offset_table = 1;
4922 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 7);
4923 new = gen_rtx_CONST (Pmode, new);
4924 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3b3c6a3f 4925
14f73b5a
JH
4926 if (reg != 0)
4927 {
4928 emit_move_insn (reg, new);
4929 new = reg;
4930 }
4931 }
3b3c6a3f 4932 }
91bb873f 4933 else if (GET_CODE (addr) == SYMBOL_REF)
3b3c6a3f 4934 {
14f73b5a
JH
4935 if (TARGET_64BIT)
4936 {
4937 current_function_uses_pic_offset_table = 1;
4938 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 15);
4939 new = gen_rtx_CONST (Pmode, new);
4940 new = gen_rtx_MEM (Pmode, new);
4941 RTX_UNCHANGING_P (new) = 1;
4942 set_mem_alias_set (new, ix86_GOT_alias_set ());
4943
4944 if (reg == 0)
4945 reg = gen_reg_rtx (Pmode);
4946 /* Use directly gen_movsi, otherwise the address is loaded
4947 into register for CSE. We don't want to CSE this addresses,
4948 instead we CSE addresses from the GOT table, so skip this. */
4949 emit_insn (gen_movsi (reg, new));
4950 new = reg;
4951 }
4952 else
4953 {
4954 /* This symbol must be referenced via a load from the
4955 Global Offset Table (@GOT). */
3b3c6a3f 4956
14f73b5a
JH
4957 current_function_uses_pic_offset_table = 1;
4958 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 6);
4959 new = gen_rtx_CONST (Pmode, new);
4960 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
4961 new = gen_rtx_MEM (Pmode, new);
4962 RTX_UNCHANGING_P (new) = 1;
4963 set_mem_alias_set (new, ix86_GOT_alias_set ());
3b3c6a3f 4964
14f73b5a
JH
4965 if (reg == 0)
4966 reg = gen_reg_rtx (Pmode);
4967 emit_move_insn (reg, new);
4968 new = reg;
4969 }
0f290768 4970 }
91bb873f
RH
4971 else
4972 {
4973 if (GET_CODE (addr) == CONST)
3b3c6a3f 4974 {
91bb873f
RH
4975 addr = XEXP (addr, 0);
4976 if (GET_CODE (addr) == UNSPEC)
4977 {
4978 /* Check that the unspec is one of the ones we generate? */
4979 }
4980 else if (GET_CODE (addr) != PLUS)
564d80f4 4981 abort ();
3b3c6a3f 4982 }
91bb873f
RH
4983 if (GET_CODE (addr) == PLUS)
4984 {
4985 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
e9a25f70 4986
91bb873f
RH
4987 /* Check first to see if this is a constant offset from a @GOTOFF
4988 symbol reference. */
623fe810 4989 if (local_symbolic_operand (op0, Pmode)
91bb873f
RH
4990 && GET_CODE (op1) == CONST_INT)
4991 {
6eb791fc
JH
4992 if (!TARGET_64BIT)
4993 {
4994 current_function_uses_pic_offset_table = 1;
4995 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 7);
4996 new = gen_rtx_PLUS (Pmode, new, op1);
4997 new = gen_rtx_CONST (Pmode, new);
4998 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
91bb873f 4999
6eb791fc
JH
5000 if (reg != 0)
5001 {
5002 emit_move_insn (reg, new);
5003 new = reg;
5004 }
5005 }
5006 else
91bb873f 5007 {
6eb791fc 5008 /* ??? We need to limit offsets here. */
91bb873f
RH
5009 }
5010 }
5011 else
5012 {
5013 base = legitimize_pic_address (XEXP (addr, 0), reg);
5014 new = legitimize_pic_address (XEXP (addr, 1),
5015 base == reg ? NULL_RTX : reg);
5016
5017 if (GET_CODE (new) == CONST_INT)
5018 new = plus_constant (base, INTVAL (new));
5019 else
5020 {
5021 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5022 {
5023 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5024 new = XEXP (new, 1);
5025 }
5026 new = gen_rtx_PLUS (Pmode, base, new);
5027 }
5028 }
5029 }
3b3c6a3f
MM
5030 }
5031 return new;
5032}
5033\f
3b3c6a3f
MM
5034/* Try machine-dependent ways of modifying an illegitimate address
5035 to be legitimate. If we find one, return the new, valid address.
5036 This macro is used in only one place: `memory_address' in explow.c.
5037
5038 OLDX is the address as it was before break_out_memory_refs was called.
5039 In some cases it is useful to look at this to decide what needs to be done.
5040
5041 MODE and WIN are passed so that this macro can use
5042 GO_IF_LEGITIMATE_ADDRESS.
5043
5044 It is always safe for this macro to do nothing. It exists to recognize
5045 opportunities to optimize the output.
5046
5047 For the 80386, we handle X+REG by loading X into a register R and
5048 using R+REG. R will go in a general reg and indexing will be used.
5049 However, if REG is a broken-out memory address or multiplication,
5050 nothing needs to be done because REG can certainly go in a general reg.
5051
5052 When -fpic is used, special handling is needed for symbolic references.
5053 See comments by legitimize_pic_address in i386.c for details. */
5054
5055rtx
5056legitimize_address (x, oldx, mode)
5057 register rtx x;
bb5177ac 5058 register rtx oldx ATTRIBUTE_UNUSED;
3b3c6a3f
MM
5059 enum machine_mode mode;
5060{
5061 int changed = 0;
5062 unsigned log;
5063
5064 if (TARGET_DEBUG_ADDR)
5065 {
e9a25f70
JL
5066 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5067 GET_MODE_NAME (mode));
3b3c6a3f
MM
5068 debug_rtx (x);
5069 }
5070
5071 if (flag_pic && SYMBOLIC_CONST (x))
5072 return legitimize_pic_address (x, 0);
5073
5074 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5075 if (GET_CODE (x) == ASHIFT
5076 && GET_CODE (XEXP (x, 1)) == CONST_INT
5077 && (log = (unsigned)exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5078 {
5079 changed = 1;
a269a03c
JC
5080 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5081 GEN_INT (1 << log));
3b3c6a3f
MM
5082 }
5083
5084 if (GET_CODE (x) == PLUS)
5085 {
0f290768 5086 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
e9a25f70 5087
3b3c6a3f
MM
5088 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5089 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5090 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5091 {
5092 changed = 1;
c5c76735
JL
5093 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5094 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5095 GEN_INT (1 << log));
3b3c6a3f
MM
5096 }
5097
5098 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5099 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5100 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5101 {
5102 changed = 1;
c5c76735
JL
5103 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5104 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5105 GEN_INT (1 << log));
3b3c6a3f
MM
5106 }
5107
0f290768 5108 /* Put multiply first if it isn't already. */
3b3c6a3f
MM
5109 if (GET_CODE (XEXP (x, 1)) == MULT)
5110 {
5111 rtx tmp = XEXP (x, 0);
5112 XEXP (x, 0) = XEXP (x, 1);
5113 XEXP (x, 1) = tmp;
5114 changed = 1;
5115 }
5116
5117 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5118 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5119 created by virtual register instantiation, register elimination, and
5120 similar optimizations. */
5121 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5122 {
5123 changed = 1;
c5c76735
JL
5124 x = gen_rtx_PLUS (Pmode,
5125 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5126 XEXP (XEXP (x, 1), 0)),
5127 XEXP (XEXP (x, 1), 1));
3b3c6a3f
MM
5128 }
5129
e9a25f70
JL
5130 /* Canonicalize
5131 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3b3c6a3f
MM
5132 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5133 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5134 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5135 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5136 && CONSTANT_P (XEXP (x, 1)))
5137 {
00c79232
ML
5138 rtx constant;
5139 rtx other = NULL_RTX;
3b3c6a3f
MM
5140
5141 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5142 {
5143 constant = XEXP (x, 1);
5144 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5145 }
5146 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5147 {
5148 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5149 other = XEXP (x, 1);
5150 }
5151 else
5152 constant = 0;
5153
5154 if (constant)
5155 {
5156 changed = 1;
c5c76735
JL
5157 x = gen_rtx_PLUS (Pmode,
5158 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5159 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5160 plus_constant (other, INTVAL (constant)));
3b3c6a3f
MM
5161 }
5162 }
5163
5164 if (changed && legitimate_address_p (mode, x, FALSE))
5165 return x;
5166
5167 if (GET_CODE (XEXP (x, 0)) == MULT)
5168 {
5169 changed = 1;
5170 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5171 }
5172
5173 if (GET_CODE (XEXP (x, 1)) == MULT)
5174 {
5175 changed = 1;
5176 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5177 }
5178
5179 if (changed
5180 && GET_CODE (XEXP (x, 1)) == REG
5181 && GET_CODE (XEXP (x, 0)) == REG)
5182 return x;
5183
5184 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5185 {
5186 changed = 1;
5187 x = legitimize_pic_address (x, 0);
5188 }
5189
5190 if (changed && legitimate_address_p (mode, x, FALSE))
5191 return x;
5192
5193 if (GET_CODE (XEXP (x, 0)) == REG)
5194 {
5195 register rtx temp = gen_reg_rtx (Pmode);
5196 register rtx val = force_operand (XEXP (x, 1), temp);
5197 if (val != temp)
5198 emit_move_insn (temp, val);
5199
5200 XEXP (x, 1) = temp;
5201 return x;
5202 }
5203
5204 else if (GET_CODE (XEXP (x, 1)) == REG)
5205 {
5206 register rtx temp = gen_reg_rtx (Pmode);
5207 register rtx val = force_operand (XEXP (x, 0), temp);
5208 if (val != temp)
5209 emit_move_insn (temp, val);
5210
5211 XEXP (x, 0) = temp;
5212 return x;
5213 }
5214 }
5215
5216 return x;
5217}
2a2ab3f9
JVA
5218\f
5219/* Print an integer constant expression in assembler syntax. Addition
5220 and subtraction are the only arithmetic that may appear in these
5221 expressions. FILE is the stdio stream to write to, X is the rtx, and
5222 CODE is the operand print code from the output string. */
5223
5224static void
5225output_pic_addr_const (file, x, code)
5226 FILE *file;
5227 rtx x;
5228 int code;
5229{
5230 char buf[256];
5231
5232 switch (GET_CODE (x))
5233 {
5234 case PC:
5235 if (flag_pic)
5236 putc ('.', file);
5237 else
5238 abort ();
5239 break;
5240
5241 case SYMBOL_REF:
91bb873f
RH
5242 assemble_name (file, XSTR (x, 0));
5243 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
5244 fputs ("@PLT", file);
2a2ab3f9
JVA
5245 break;
5246
91bb873f
RH
5247 case LABEL_REF:
5248 x = XEXP (x, 0);
5249 /* FALLTHRU */
2a2ab3f9
JVA
5250 case CODE_LABEL:
5251 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5252 assemble_name (asm_out_file, buf);
5253 break;
5254
5255 case CONST_INT:
f64cecad 5256 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2a2ab3f9
JVA
5257 break;
5258
5259 case CONST:
5260 /* This used to output parentheses around the expression,
5261 but that does not work on the 386 (either ATT or BSD assembler). */
5262 output_pic_addr_const (file, XEXP (x, 0), code);
5263 break;
5264
5265 case CONST_DOUBLE:
5266 if (GET_MODE (x) == VOIDmode)
5267 {
5268 /* We can use %d if the number is <32 bits and positive. */
5269 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
f64cecad
JC
5270 fprintf (file, "0x%lx%08lx",
5271 (unsigned long) CONST_DOUBLE_HIGH (x),
5272 (unsigned long) CONST_DOUBLE_LOW (x));
2a2ab3f9 5273 else
f64cecad 5274 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2a2ab3f9
JVA
5275 }
5276 else
5277 /* We can't handle floating point constants;
5278 PRINT_OPERAND must handle them. */
5279 output_operand_lossage ("floating constant misused");
5280 break;
5281
5282 case PLUS:
e9a25f70 5283 /* Some assemblers need integer constants to appear first. */
2a2ab3f9
JVA
5284 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5285 {
2a2ab3f9 5286 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 5287 putc ('+', file);
e9a25f70 5288 output_pic_addr_const (file, XEXP (x, 1), code);
2a2ab3f9 5289 }
91bb873f 5290 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2a2ab3f9 5291 {
2a2ab3f9 5292 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 5293 putc ('+', file);
e9a25f70 5294 output_pic_addr_const (file, XEXP (x, 0), code);
2a2ab3f9 5295 }
91bb873f
RH
5296 else
5297 abort ();
2a2ab3f9
JVA
5298 break;
5299
5300 case MINUS:
80f33d06 5301 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
2a2ab3f9 5302 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 5303 putc ('-', file);
2a2ab3f9 5304 output_pic_addr_const (file, XEXP (x, 1), code);
80f33d06 5305 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
2a2ab3f9
JVA
5306 break;
5307
91bb873f
RH
5308 case UNSPEC:
5309 if (XVECLEN (x, 0) != 1)
77ebd435 5310 abort ();
91bb873f
RH
5311 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5312 switch (XINT (x, 1))
77ebd435
AJ
5313 {
5314 case 6:
5315 fputs ("@GOT", file);
5316 break;
5317 case 7:
5318 fputs ("@GOTOFF", file);
5319 break;
5320 case 8:
5321 fputs ("@PLT", file);
5322 break;
6eb791fc
JH
5323 case 15:
5324 fputs ("@GOTPCREL(%RIP)", file);
5325 break;
77ebd435
AJ
5326 default:
5327 output_operand_lossage ("invalid UNSPEC as operand");
5328 break;
5329 }
91bb873f
RH
5330 break;
5331
2a2ab3f9
JVA
5332 default:
5333 output_operand_lossage ("invalid expression as operand");
5334 }
5335}
1865dbb5 5336
0f290768 5337/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
1865dbb5
JM
5338 We need to handle our special PIC relocations. */
5339
0f290768 5340void
1865dbb5
JM
5341i386_dwarf_output_addr_const (file, x)
5342 FILE *file;
5343 rtx x;
5344{
14f73b5a 5345#ifdef ASM_QUAD
18b5b8d6 5346 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
14f73b5a
JH
5347#else
5348 if (TARGET_64BIT)
5349 abort ();
18b5b8d6 5350 fprintf (file, "%s", ASM_LONG);
14f73b5a 5351#endif
1865dbb5
JM
5352 if (flag_pic)
5353 output_pic_addr_const (file, x, '\0');
5354 else
5355 output_addr_const (file, x);
5356 fputc ('\n', file);
5357}
5358
5359/* In the name of slightly smaller debug output, and to cater to
5360 general assembler losage, recognize PIC+GOTOFF and turn it back
5361 into a direct symbol reference. */
5362
5363rtx
5364i386_simplify_dwarf_addr (orig_x)
5365 rtx orig_x;
5366{
5367 rtx x = orig_x;
5368
6eb791fc
JH
5369 if (TARGET_64BIT)
5370 {
5371 if (GET_CODE (x) != CONST
5372 || GET_CODE (XEXP (x, 0)) != UNSPEC
5373 || XINT (XEXP (x, 0), 1) != 15)
5374 return orig_x;
5375 return XVECEXP (XEXP (x, 0), 0, 0);
5376 }
5377
1865dbb5
JM
5378 if (GET_CODE (x) != PLUS
5379 || GET_CODE (XEXP (x, 0)) != REG
5380 || GET_CODE (XEXP (x, 1)) != CONST)
5381 return orig_x;
5382
5383 x = XEXP (XEXP (x, 1), 0);
5384 if (GET_CODE (x) == UNSPEC
3adbce3d
RH
5385 && (XINT (x, 1) == 6
5386 || XINT (x, 1) == 7))
1865dbb5
JM
5387 return XVECEXP (x, 0, 0);
5388
5389 if (GET_CODE (x) == PLUS
5390 && GET_CODE (XEXP (x, 0)) == UNSPEC
5391 && GET_CODE (XEXP (x, 1)) == CONST_INT
3adbce3d
RH
5392 && (XINT (XEXP (x, 0), 1) == 6
5393 || XINT (XEXP (x, 0), 1) == 7))
1865dbb5
JM
5394 return gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
5395
5396 return orig_x;
5397}
2a2ab3f9 5398\f
a269a03c 5399static void
e075ae69 5400put_condition_code (code, mode, reverse, fp, file)
a269a03c 5401 enum rtx_code code;
e075ae69
RH
5402 enum machine_mode mode;
5403 int reverse, fp;
a269a03c
JC
5404 FILE *file;
5405{
a269a03c
JC
5406 const char *suffix;
5407
9a915772
JH
5408 if (mode == CCFPmode || mode == CCFPUmode)
5409 {
5410 enum rtx_code second_code, bypass_code;
5411 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
5412 if (bypass_code != NIL || second_code != NIL)
5413 abort();
5414 code = ix86_fp_compare_code_to_integer (code);
5415 mode = CCmode;
5416 }
a269a03c
JC
5417 if (reverse)
5418 code = reverse_condition (code);
e075ae69 5419
a269a03c
JC
5420 switch (code)
5421 {
5422 case EQ:
5423 suffix = "e";
5424 break;
a269a03c
JC
5425 case NE:
5426 suffix = "ne";
5427 break;
a269a03c 5428 case GT:
7e08e190 5429 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
e075ae69
RH
5430 abort ();
5431 suffix = "g";
a269a03c 5432 break;
a269a03c 5433 case GTU:
e075ae69
RH
5434 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
5435 Those same assemblers have the same but opposite losage on cmov. */
7e08e190 5436 if (mode != CCmode)
0f290768 5437 abort ();
e075ae69 5438 suffix = fp ? "nbe" : "a";
a269a03c 5439 break;
a269a03c 5440 case LT:
9076b9c1 5441 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 5442 suffix = "s";
7e08e190 5443 else if (mode == CCmode || mode == CCGCmode)
e075ae69 5444 suffix = "l";
9076b9c1 5445 else
0f290768 5446 abort ();
a269a03c 5447 break;
a269a03c 5448 case LTU:
9076b9c1 5449 if (mode != CCmode)
0f290768 5450 abort ();
a269a03c
JC
5451 suffix = "b";
5452 break;
a269a03c 5453 case GE:
9076b9c1 5454 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 5455 suffix = "ns";
7e08e190 5456 else if (mode == CCmode || mode == CCGCmode)
e075ae69 5457 suffix = "ge";
9076b9c1 5458 else
0f290768 5459 abort ();
a269a03c 5460 break;
a269a03c 5461 case GEU:
e075ae69 5462 /* ??? As above. */
7e08e190 5463 if (mode != CCmode)
0f290768 5464 abort ();
7e08e190 5465 suffix = fp ? "nb" : "ae";
a269a03c 5466 break;
a269a03c 5467 case LE:
7e08e190 5468 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
e075ae69
RH
5469 abort ();
5470 suffix = "le";
a269a03c 5471 break;
a269a03c 5472 case LEU:
9076b9c1
JH
5473 if (mode != CCmode)
5474 abort ();
7e08e190 5475 suffix = "be";
a269a03c 5476 break;
3a3677ff 5477 case UNORDERED:
9e7adcb3 5478 suffix = fp ? "u" : "p";
3a3677ff
RH
5479 break;
5480 case ORDERED:
9e7adcb3 5481 suffix = fp ? "nu" : "np";
3a3677ff 5482 break;
a269a03c
JC
5483 default:
5484 abort ();
5485 }
5486 fputs (suffix, file);
5487}
5488
e075ae69
RH
5489void
5490print_reg (x, code, file)
5491 rtx x;
5492 int code;
5493 FILE *file;
e5cb57e8 5494{
e075ae69 5495 if (REGNO (x) == ARG_POINTER_REGNUM
564d80f4 5496 || REGNO (x) == FRAME_POINTER_REGNUM
e075ae69
RH
5497 || REGNO (x) == FLAGS_REG
5498 || REGNO (x) == FPSR_REG)
5499 abort ();
e9a25f70 5500
80f33d06 5501 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
e075ae69
RH
5502 putc ('%', file);
5503
ef6257cd 5504 if (code == 'w' || MMX_REG_P (x))
e075ae69
RH
5505 code = 2;
5506 else if (code == 'b')
5507 code = 1;
5508 else if (code == 'k')
5509 code = 4;
3f3f2124
JH
5510 else if (code == 'q')
5511 code = 8;
e075ae69
RH
5512 else if (code == 'y')
5513 code = 3;
5514 else if (code == 'h')
5515 code = 0;
5516 else
5517 code = GET_MODE_SIZE (GET_MODE (x));
e9a25f70 5518
3f3f2124
JH
5519 /* Irritatingly, AMD extended registers use different naming convention
5520 from the normal registers. */
5521 if (REX_INT_REG_P (x))
5522 {
885a70fd
JH
5523 if (!TARGET_64BIT)
5524 abort ();
3f3f2124
JH
5525 switch (code)
5526 {
ef6257cd 5527 case 0:
c725bd79 5528 error ("extended registers have no high halves");
3f3f2124
JH
5529 break;
5530 case 1:
5531 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
5532 break;
5533 case 2:
5534 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
5535 break;
5536 case 4:
5537 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
5538 break;
5539 case 8:
5540 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
5541 break;
5542 default:
c725bd79 5543 error ("unsupported operand size for extended register");
3f3f2124
JH
5544 break;
5545 }
5546 return;
5547 }
e075ae69
RH
5548 switch (code)
5549 {
5550 case 3:
5551 if (STACK_TOP_P (x))
5552 {
5553 fputs ("st(0)", file);
5554 break;
5555 }
5556 /* FALLTHRU */
e075ae69 5557 case 8:
3f3f2124 5558 case 4:
e075ae69 5559 case 12:
446988df 5560 if (! ANY_FP_REG_P (x))
885a70fd 5561 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
e075ae69 5562 /* FALLTHRU */
a7180f70 5563 case 16:
e075ae69
RH
5564 case 2:
5565 fputs (hi_reg_name[REGNO (x)], file);
5566 break;
5567 case 1:
5568 fputs (qi_reg_name[REGNO (x)], file);
5569 break;
5570 case 0:
5571 fputs (qi_high_reg_name[REGNO (x)], file);
5572 break;
5573 default:
5574 abort ();
fe25fea3 5575 }
e5cb57e8
SC
5576}
5577
2a2ab3f9 5578/* Meaning of CODE:
fe25fea3 5579 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
e5cb57e8 5580 C -- print opcode suffix for set/cmov insn.
fe25fea3 5581 c -- like C, but print reversed condition
ef6257cd 5582 F,f -- likewise, but for floating-point.
2a2ab3f9
JVA
5583 R -- print the prefix for register names.
5584 z -- print the opcode suffix for the size of the current operand.
5585 * -- print a star (in certain assembler syntax)
fb204271 5586 A -- print an absolute memory reference.
2a2ab3f9 5587 w -- print the operand as if it's a "word" (HImode) even if it isn't.
2d49677f
SC
5588 s -- print a shift double count, followed by the assemblers argument
5589 delimiter.
fe25fea3
SC
5590 b -- print the QImode name of the register for the indicated operand.
5591 %b0 would print %al if operands[0] is reg 0.
5592 w -- likewise, print the HImode name of the register.
5593 k -- likewise, print the SImode name of the register.
3f3f2124 5594 q -- likewise, print the DImode name of the register.
ef6257cd
JH
5595 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
5596 y -- print "st(0)" instead of "st" as a register.
a46d1d38 5597 D -- print condition for SSE cmp instruction.
ef6257cd
JH
5598 P -- if PIC, print an @PLT suffix.
5599 X -- don't print any sort of PIC '@' suffix for a symbol.
a46d1d38 5600 */
2a2ab3f9
JVA
5601
5602void
5603print_operand (file, x, code)
5604 FILE *file;
5605 rtx x;
5606 int code;
5607{
5608 if (code)
5609 {
5610 switch (code)
5611 {
5612 case '*':
80f33d06 5613 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9
JVA
5614 putc ('*', file);
5615 return;
5616
fb204271 5617 case 'A':
80f33d06 5618 if (ASSEMBLER_DIALECT == ASM_ATT)
fb204271 5619 putc ('*', file);
80f33d06 5620 else if (ASSEMBLER_DIALECT == ASM_INTEL)
fb204271
DN
5621 {
5622 /* Intel syntax. For absolute addresses, registers should not
5623 be surrounded by braces. */
5624 if (GET_CODE (x) != REG)
5625 {
5626 putc ('[', file);
5627 PRINT_OPERAND (file, x, 0);
5628 putc (']', file);
5629 return;
5630 }
5631 }
80f33d06
GS
5632 else
5633 abort ();
fb204271
DN
5634
5635 PRINT_OPERAND (file, x, 0);
5636 return;
5637
5638
2a2ab3f9 5639 case 'L':
80f33d06 5640 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 5641 putc ('l', file);
2a2ab3f9
JVA
5642 return;
5643
5644 case 'W':
80f33d06 5645 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 5646 putc ('w', file);
2a2ab3f9
JVA
5647 return;
5648
5649 case 'B':
80f33d06 5650 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 5651 putc ('b', file);
2a2ab3f9
JVA
5652 return;
5653
5654 case 'Q':
80f33d06 5655 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 5656 putc ('l', file);
2a2ab3f9
JVA
5657 return;
5658
5659 case 'S':
80f33d06 5660 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 5661 putc ('s', file);
2a2ab3f9
JVA
5662 return;
5663
5f1ec3e6 5664 case 'T':
80f33d06 5665 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 5666 putc ('t', file);
5f1ec3e6
JVA
5667 return;
5668
2a2ab3f9
JVA
5669 case 'z':
5670 /* 387 opcodes don't get size suffixes if the operands are
0f290768 5671 registers. */
2a2ab3f9
JVA
5672
5673 if (STACK_REG_P (x))
5674 return;
5675
5676 /* this is the size of op from size of operand */
5677 switch (GET_MODE_SIZE (GET_MODE (x)))
5678 {
2a2ab3f9 5679 case 2:
155d8a47
JW
5680#ifdef HAVE_GAS_FILDS_FISTS
5681 putc ('s', file);
5682#endif
2a2ab3f9
JVA
5683 return;
5684
5685 case 4:
5686 if (GET_MODE (x) == SFmode)
5687 {
e075ae69 5688 putc ('s', file);
2a2ab3f9
JVA
5689 return;
5690 }
5691 else
e075ae69 5692 putc ('l', file);
2a2ab3f9
JVA
5693 return;
5694
5f1ec3e6 5695 case 12:
2b589241 5696 case 16:
e075ae69
RH
5697 putc ('t', file);
5698 return;
5f1ec3e6 5699
2a2ab3f9
JVA
5700 case 8:
5701 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
56c0e8fa
JVA
5702 {
5703#ifdef GAS_MNEMONICS
e075ae69 5704 putc ('q', file);
56c0e8fa 5705#else
e075ae69
RH
5706 putc ('l', file);
5707 putc ('l', file);
56c0e8fa
JVA
5708#endif
5709 }
e075ae69
RH
5710 else
5711 putc ('l', file);
2a2ab3f9 5712 return;
155d8a47
JW
5713
5714 default:
5715 abort ();
2a2ab3f9 5716 }
4af3895e
JVA
5717
5718 case 'b':
5719 case 'w':
5720 case 'k':
3f3f2124 5721 case 'q':
4af3895e
JVA
5722 case 'h':
5723 case 'y':
5cb6195d 5724 case 'X':
e075ae69 5725 case 'P':
4af3895e
JVA
5726 break;
5727
2d49677f
SC
5728 case 's':
5729 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
5730 {
5731 PRINT_OPERAND (file, x, 0);
e075ae69 5732 putc (',', file);
2d49677f 5733 }
a269a03c
JC
5734 return;
5735
a46d1d38
JH
5736 case 'D':
5737 /* Little bit of braindamage here. The SSE compare instructions
5738 does use completely different names for the comparisons that the
5739 fp conditional moves. */
5740 switch (GET_CODE (x))
5741 {
5742 case EQ:
5743 case UNEQ:
5744 fputs ("eq", file);
5745 break;
5746 case LT:
5747 case UNLT:
5748 fputs ("lt", file);
5749 break;
5750 case LE:
5751 case UNLE:
5752 fputs ("le", file);
5753 break;
5754 case UNORDERED:
5755 fputs ("unord", file);
5756 break;
5757 case NE:
5758 case LTGT:
5759 fputs ("neq", file);
5760 break;
5761 case UNGE:
5762 case GE:
5763 fputs ("nlt", file);
5764 break;
5765 case UNGT:
5766 case GT:
5767 fputs ("nle", file);
5768 break;
5769 case ORDERED:
5770 fputs ("ord", file);
5771 break;
5772 default:
5773 abort ();
5774 break;
5775 }
5776 return;
1853aadd 5777 case 'C':
e075ae69 5778 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
1853aadd 5779 return;
fe25fea3 5780 case 'F':
e075ae69 5781 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
fe25fea3
SC
5782 return;
5783
e9a25f70 5784 /* Like above, but reverse condition */
e075ae69 5785 case 'c':
c1d5afc4
CR
5786 /* Check to see if argument to %c is really a constant
5787 and not a condition code which needs to be reversed. */
5788 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
5789 {
5790 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
5791 return;
5792 }
e075ae69
RH
5793 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
5794 return;
fe25fea3 5795 case 'f':
e075ae69 5796 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
1853aadd 5797 return;
ef6257cd
JH
5798 case '+':
5799 {
5800 rtx x;
e5cb57e8 5801
ef6257cd
JH
5802 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
5803 return;
a4f31c00 5804
ef6257cd
JH
5805 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
5806 if (x)
5807 {
5808 int pred_val = INTVAL (XEXP (x, 0));
5809
5810 if (pred_val < REG_BR_PROB_BASE * 45 / 100
5811 || pred_val > REG_BR_PROB_BASE * 55 / 100)
5812 {
5813 int taken = pred_val > REG_BR_PROB_BASE / 2;
5814 int cputaken = final_forward_branch_p (current_output_insn) == 0;
5815
5816 /* Emit hints only in the case default branch prediction
5817 heruistics would fail. */
5818 if (taken != cputaken)
5819 {
5820 /* We use 3e (DS) prefix for taken branches and
5821 2e (CS) prefix for not taken branches. */
5822 if (taken)
5823 fputs ("ds ; ", file);
5824 else
5825 fputs ("cs ; ", file);
5826 }
5827 }
5828 }
5829 return;
5830 }
4af3895e 5831 default:
68daafd4
JVA
5832 {
5833 char str[50];
68daafd4
JVA
5834 sprintf (str, "invalid operand code `%c'", code);
5835 output_operand_lossage (str);
5836 }
2a2ab3f9
JVA
5837 }
5838 }
e9a25f70 5839
2a2ab3f9
JVA
5840 if (GET_CODE (x) == REG)
5841 {
5842 PRINT_REG (x, code, file);
5843 }
e9a25f70 5844
2a2ab3f9
JVA
5845 else if (GET_CODE (x) == MEM)
5846 {
e075ae69 5847 /* No `byte ptr' prefix for call instructions. */
80f33d06 5848 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
2a2ab3f9 5849 {
69ddee61 5850 const char * size;
e075ae69
RH
5851 switch (GET_MODE_SIZE (GET_MODE (x)))
5852 {
5853 case 1: size = "BYTE"; break;
5854 case 2: size = "WORD"; break;
5855 case 4: size = "DWORD"; break;
5856 case 8: size = "QWORD"; break;
5857 case 12: size = "XWORD"; break;
a7180f70 5858 case 16: size = "XMMWORD"; break;
e075ae69 5859 default:
564d80f4 5860 abort ();
e075ae69 5861 }
fb204271
DN
5862
5863 /* Check for explicit size override (codes 'b', 'w' and 'k') */
5864 if (code == 'b')
5865 size = "BYTE";
5866 else if (code == 'w')
5867 size = "WORD";
5868 else if (code == 'k')
5869 size = "DWORD";
5870
e075ae69
RH
5871 fputs (size, file);
5872 fputs (" PTR ", file);
2a2ab3f9 5873 }
e075ae69
RH
5874
5875 x = XEXP (x, 0);
5876 if (flag_pic && CONSTANT_ADDRESS_P (x))
5877 output_pic_addr_const (file, x, code);
0d7d98ee
JH
5878 /* Avoid (%rip) for call operands. */
5879 else if (CONSTANT_ADDRESS_P (x) && code =='P'
5880 && GET_CODE (x) != CONST_INT)
5881 output_addr_const (file, x);
2a2ab3f9 5882 else
e075ae69 5883 output_address (x);
2a2ab3f9 5884 }
e9a25f70 5885
2a2ab3f9
JVA
5886 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
5887 {
e9a25f70
JL
5888 REAL_VALUE_TYPE r;
5889 long l;
5890
5f1ec3e6
JVA
5891 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5892 REAL_VALUE_TO_TARGET_SINGLE (r, l);
e075ae69 5893
80f33d06 5894 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 5895 putc ('$', file);
52267fcb 5896 fprintf (file, "0x%lx", l);
5f1ec3e6 5897 }
e9a25f70 5898
0f290768 5899 /* These float cases don't actually occur as immediate operands. */
5f1ec3e6
JVA
5900 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
5901 {
e9a25f70
JL
5902 REAL_VALUE_TYPE r;
5903 char dstr[30];
5904
5f1ec3e6
JVA
5905 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5906 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
5907 fprintf (file, "%s", dstr);
2a2ab3f9 5908 }
e9a25f70 5909
2b589241
JH
5910 else if (GET_CODE (x) == CONST_DOUBLE
5911 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
2a2ab3f9 5912 {
e9a25f70
JL
5913 REAL_VALUE_TYPE r;
5914 char dstr[30];
5915
5f1ec3e6
JVA
5916 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5917 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
5918 fprintf (file, "%s", dstr);
2a2ab3f9 5919 }
79325812 5920 else
2a2ab3f9 5921 {
4af3895e 5922 if (code != 'P')
2a2ab3f9 5923 {
695dac07 5924 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
e075ae69 5925 {
80f33d06 5926 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
5927 putc ('$', file);
5928 }
2a2ab3f9
JVA
5929 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
5930 || GET_CODE (x) == LABEL_REF)
e075ae69 5931 {
80f33d06 5932 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
5933 putc ('$', file);
5934 else
5935 fputs ("OFFSET FLAT:", file);
5936 }
2a2ab3f9 5937 }
e075ae69
RH
5938 if (GET_CODE (x) == CONST_INT)
5939 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5940 else if (flag_pic)
2a2ab3f9
JVA
5941 output_pic_addr_const (file, x, code);
5942 else
5943 output_addr_const (file, x);
5944 }
5945}
5946\f
5947/* Print a memory operand whose address is ADDR. */
5948
5949void
5950print_operand_address (file, addr)
5951 FILE *file;
5952 register rtx addr;
5953{
e075ae69
RH
5954 struct ix86_address parts;
5955 rtx base, index, disp;
5956 int scale;
e9a25f70 5957
e075ae69
RH
5958 if (! ix86_decompose_address (addr, &parts))
5959 abort ();
e9a25f70 5960
e075ae69
RH
5961 base = parts.base;
5962 index = parts.index;
5963 disp = parts.disp;
5964 scale = parts.scale;
e9a25f70 5965
e075ae69
RH
5966 if (!base && !index)
5967 {
5968 /* Displacement only requires special attention. */
e9a25f70 5969
e075ae69 5970 if (GET_CODE (disp) == CONST_INT)
2a2ab3f9 5971 {
80f33d06 5972 if (ASSEMBLER_DIALECT == ASM_INTEL)
fb204271
DN
5973 {
5974 if (USER_LABEL_PREFIX[0] == 0)
5975 putc ('%', file);
5976 fputs ("ds:", file);
5977 }
e075ae69 5978 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
2a2ab3f9 5979 }
e075ae69
RH
5980 else if (flag_pic)
5981 output_pic_addr_const (file, addr, 0);
5982 else
5983 output_addr_const (file, addr);
0d7d98ee
JH
5984
5985 /* Use one byte shorter RIP relative addressing for 64bit mode. */
5986 if (GET_CODE (disp) != CONST_INT && TARGET_64BIT)
5987 fputs ("(%rip)", file);
e075ae69
RH
5988 }
5989 else
5990 {
80f33d06 5991 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9 5992 {
e075ae69 5993 if (disp)
2a2ab3f9 5994 {
c399861d 5995 if (flag_pic)
e075ae69
RH
5996 output_pic_addr_const (file, disp, 0);
5997 else if (GET_CODE (disp) == LABEL_REF)
5998 output_asm_label (disp);
2a2ab3f9 5999 else
e075ae69 6000 output_addr_const (file, disp);
2a2ab3f9
JVA
6001 }
6002
e075ae69
RH
6003 putc ('(', file);
6004 if (base)
6005 PRINT_REG (base, 0, file);
6006 if (index)
2a2ab3f9 6007 {
e075ae69
RH
6008 putc (',', file);
6009 PRINT_REG (index, 0, file);
6010 if (scale != 1)
6011 fprintf (file, ",%d", scale);
2a2ab3f9 6012 }
e075ae69 6013 putc (')', file);
2a2ab3f9 6014 }
2a2ab3f9
JVA
6015 else
6016 {
e075ae69 6017 rtx offset = NULL_RTX;
e9a25f70 6018
e075ae69
RH
6019 if (disp)
6020 {
6021 /* Pull out the offset of a symbol; print any symbol itself. */
6022 if (GET_CODE (disp) == CONST
6023 && GET_CODE (XEXP (disp, 0)) == PLUS
6024 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6025 {
6026 offset = XEXP (XEXP (disp, 0), 1);
6027 disp = gen_rtx_CONST (VOIDmode,
6028 XEXP (XEXP (disp, 0), 0));
6029 }
ce193852 6030
e075ae69
RH
6031 if (flag_pic)
6032 output_pic_addr_const (file, disp, 0);
6033 else if (GET_CODE (disp) == LABEL_REF)
6034 output_asm_label (disp);
6035 else if (GET_CODE (disp) == CONST_INT)
6036 offset = disp;
6037 else
6038 output_addr_const (file, disp);
6039 }
e9a25f70 6040
e075ae69
RH
6041 putc ('[', file);
6042 if (base)
a8620236 6043 {
e075ae69
RH
6044 PRINT_REG (base, 0, file);
6045 if (offset)
6046 {
6047 if (INTVAL (offset) >= 0)
6048 putc ('+', file);
6049 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6050 }
a8620236 6051 }
e075ae69
RH
6052 else if (offset)
6053 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
2247a58c 6054 else
e075ae69 6055 putc ('0', file);
e9a25f70 6056
e075ae69
RH
6057 if (index)
6058 {
6059 putc ('+', file);
6060 PRINT_REG (index, 0, file);
6061 if (scale != 1)
6062 fprintf (file, "*%d", scale);
6063 }
6064 putc (']', file);
6065 }
2a2ab3f9
JVA
6066 }
6067}
6068\f
6069/* Split one or more DImode RTL references into pairs of SImode
6070 references. The RTL can be REG, offsettable MEM, integer constant, or
6071 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6072 split and "num" is its length. lo_half and hi_half are output arrays
0f290768 6073 that parallel "operands". */
2a2ab3f9
JVA
6074
6075void
6076split_di (operands, num, lo_half, hi_half)
6077 rtx operands[];
6078 int num;
6079 rtx lo_half[], hi_half[];
6080{
6081 while (num--)
6082 {
57dbca5e 6083 rtx op = operands[num];
b932f770
JH
6084
6085 /* simplify_subreg refuse to split volatile memory addresses,
6086 but we still have to handle it. */
6087 if (GET_CODE (op) == MEM)
2a2ab3f9 6088 {
f4ef873c 6089 lo_half[num] = adjust_address (op, SImode, 0);
b72f00af 6090 hi_half[num] = adjust_address (op, SImode, 4);
2a2ab3f9
JVA
6091 }
6092 else
b932f770 6093 {
38ca929b
JH
6094 lo_half[num] = simplify_gen_subreg (SImode, op,
6095 GET_MODE (op) == VOIDmode
6096 ? DImode : GET_MODE (op), 0);
6097 hi_half[num] = simplify_gen_subreg (SImode, op,
6098 GET_MODE (op) == VOIDmode
6099 ? DImode : GET_MODE (op), 4);
b932f770 6100 }
2a2ab3f9
JVA
6101 }
6102}
44cf5b6a
JH
6103/* Split one or more TImode RTL references into pairs of SImode
6104 references. The RTL can be REG, offsettable MEM, integer constant, or
6105 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6106 split and "num" is its length. lo_half and hi_half are output arrays
6107 that parallel "operands". */
6108
6109void
6110split_ti (operands, num, lo_half, hi_half)
6111 rtx operands[];
6112 int num;
6113 rtx lo_half[], hi_half[];
6114{
6115 while (num--)
6116 {
6117 rtx op = operands[num];
b932f770
JH
6118
6119 /* simplify_subreg refuse to split volatile memory addresses, but we
6120 still have to handle it. */
6121 if (GET_CODE (op) == MEM)
44cf5b6a
JH
6122 {
6123 lo_half[num] = adjust_address (op, DImode, 0);
6124 hi_half[num] = adjust_address (op, DImode, 8);
6125 }
6126 else
b932f770
JH
6127 {
6128 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6129 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6130 }
44cf5b6a
JH
6131 }
6132}
2a2ab3f9 6133\f
2a2ab3f9
JVA
6134/* Output code to perform a 387 binary operation in INSN, one of PLUS,
6135 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6136 is the expression of the binary operation. The output may either be
6137 emitted here, or returned to the caller, like all output_* functions.
6138
6139 There is no guarantee that the operands are the same mode, as they
0f290768 6140 might be within FLOAT or FLOAT_EXTEND expressions. */
2a2ab3f9 6141
e3c2afab
AM
6142#ifndef SYSV386_COMPAT
6143/* Set to 1 for compatibility with brain-damaged assemblers. No-one
6144 wants to fix the assemblers because that causes incompatibility
6145 with gcc. No-one wants to fix gcc because that causes
6146 incompatibility with assemblers... You can use the option of
6147 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6148#define SYSV386_COMPAT 1
6149#endif
6150
69ddee61 6151const char *
2a2ab3f9
JVA
6152output_387_binary_op (insn, operands)
6153 rtx insn;
6154 rtx *operands;
6155{
e3c2afab 6156 static char buf[30];
69ddee61 6157 const char *p;
1deaa899
JH
6158 const char *ssep;
6159 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
2a2ab3f9 6160
e3c2afab
AM
6161#ifdef ENABLE_CHECKING
6162 /* Even if we do not want to check the inputs, this documents input
6163 constraints. Which helps in understanding the following code. */
6164 if (STACK_REG_P (operands[0])
6165 && ((REG_P (operands[1])
6166 && REGNO (operands[0]) == REGNO (operands[1])
6167 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
6168 || (REG_P (operands[2])
6169 && REGNO (operands[0]) == REGNO (operands[2])
6170 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
6171 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
6172 ; /* ok */
1deaa899 6173 else if (!is_sse)
e3c2afab
AM
6174 abort ();
6175#endif
6176
2a2ab3f9
JVA
6177 switch (GET_CODE (operands[3]))
6178 {
6179 case PLUS:
e075ae69
RH
6180 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6181 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6182 p = "fiadd";
6183 else
6184 p = "fadd";
1deaa899 6185 ssep = "add";
2a2ab3f9
JVA
6186 break;
6187
6188 case MINUS:
e075ae69
RH
6189 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6190 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6191 p = "fisub";
6192 else
6193 p = "fsub";
1deaa899 6194 ssep = "sub";
2a2ab3f9
JVA
6195 break;
6196
6197 case MULT:
e075ae69
RH
6198 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6199 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6200 p = "fimul";
6201 else
6202 p = "fmul";
1deaa899 6203 ssep = "mul";
2a2ab3f9
JVA
6204 break;
6205
6206 case DIV:
e075ae69
RH
6207 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6208 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6209 p = "fidiv";
6210 else
6211 p = "fdiv";
1deaa899 6212 ssep = "div";
2a2ab3f9
JVA
6213 break;
6214
6215 default:
6216 abort ();
6217 }
6218
1deaa899
JH
6219 if (is_sse)
6220 {
6221 strcpy (buf, ssep);
6222 if (GET_MODE (operands[0]) == SFmode)
6223 strcat (buf, "ss\t{%2, %0|%0, %2}");
6224 else
6225 strcat (buf, "sd\t{%2, %0|%0, %2}");
6226 return buf;
6227 }
e075ae69 6228 strcpy (buf, p);
2a2ab3f9
JVA
6229
6230 switch (GET_CODE (operands[3]))
6231 {
6232 case MULT:
6233 case PLUS:
6234 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
6235 {
e3c2afab 6236 rtx temp = operands[2];
2a2ab3f9
JVA
6237 operands[2] = operands[1];
6238 operands[1] = temp;
6239 }
6240
e3c2afab
AM
6241 /* know operands[0] == operands[1]. */
6242
2a2ab3f9 6243 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
6244 {
6245 p = "%z2\t%2";
6246 break;
6247 }
2a2ab3f9
JVA
6248
6249 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63
JL
6250 {
6251 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
6252 /* How is it that we are storing to a dead operand[2]?
6253 Well, presumably operands[1] is dead too. We can't
6254 store the result to st(0) as st(0) gets popped on this
6255 instruction. Instead store to operands[2] (which I
6256 think has to be st(1)). st(1) will be popped later.
6257 gcc <= 2.8.1 didn't have this check and generated
6258 assembly code that the Unixware assembler rejected. */
6259 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 6260 else
e3c2afab 6261 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
e075ae69 6262 break;
6b28fd63 6263 }
2a2ab3f9
JVA
6264
6265 if (STACK_TOP_P (operands[0]))
e3c2afab 6266 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 6267 else
e3c2afab 6268 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
e075ae69 6269 break;
2a2ab3f9
JVA
6270
6271 case MINUS:
6272 case DIV:
6273 if (GET_CODE (operands[1]) == MEM)
e075ae69
RH
6274 {
6275 p = "r%z1\t%1";
6276 break;
6277 }
2a2ab3f9
JVA
6278
6279 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
6280 {
6281 p = "%z2\t%2";
6282 break;
6283 }
2a2ab3f9 6284
2a2ab3f9 6285 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63 6286 {
e3c2afab
AM
6287#if SYSV386_COMPAT
6288 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
6289 derived assemblers, confusingly reverse the direction of
6290 the operation for fsub{r} and fdiv{r} when the
6291 destination register is not st(0). The Intel assembler
6292 doesn't have this brain damage. Read !SYSV386_COMPAT to
6293 figure out what the hardware really does. */
6294 if (STACK_TOP_P (operands[0]))
6295 p = "{p\t%0, %2|rp\t%2, %0}";
6296 else
6297 p = "{rp\t%2, %0|p\t%0, %2}";
6298#else
6b28fd63 6299 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
6300 /* As above for fmul/fadd, we can't store to st(0). */
6301 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 6302 else
e3c2afab
AM
6303 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6304#endif
e075ae69 6305 break;
6b28fd63 6306 }
2a2ab3f9
JVA
6307
6308 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6b28fd63 6309 {
e3c2afab 6310#if SYSV386_COMPAT
6b28fd63 6311 if (STACK_TOP_P (operands[0]))
e3c2afab 6312 p = "{rp\t%0, %1|p\t%1, %0}";
6b28fd63 6313 else
e3c2afab
AM
6314 p = "{p\t%1, %0|rp\t%0, %1}";
6315#else
6316 if (STACK_TOP_P (operands[0]))
6317 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
6318 else
6319 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
6320#endif
e075ae69 6321 break;
6b28fd63 6322 }
2a2ab3f9
JVA
6323
6324 if (STACK_TOP_P (operands[0]))
6325 {
6326 if (STACK_TOP_P (operands[1]))
e3c2afab 6327 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 6328 else
e3c2afab 6329 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
e075ae69 6330 break;
2a2ab3f9
JVA
6331 }
6332 else if (STACK_TOP_P (operands[1]))
e3c2afab
AM
6333 {
6334#if SYSV386_COMPAT
6335 p = "{\t%1, %0|r\t%0, %1}";
6336#else
6337 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
6338#endif
6339 }
2a2ab3f9 6340 else
e3c2afab
AM
6341 {
6342#if SYSV386_COMPAT
6343 p = "{r\t%2, %0|\t%0, %2}";
6344#else
6345 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6346#endif
6347 }
e075ae69 6348 break;
2a2ab3f9
JVA
6349
6350 default:
6351 abort ();
6352 }
e075ae69
RH
6353
6354 strcat (buf, p);
6355 return buf;
2a2ab3f9 6356}
e075ae69 6357
a4f31c00 6358/* Output code to initialize control word copies used by
7a2e09f4
JH
6359 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
6360 is set to control word rounding downwards. */
6361void
6362emit_i387_cw_initialization (normal, round_down)
6363 rtx normal, round_down;
6364{
6365 rtx reg = gen_reg_rtx (HImode);
6366
6367 emit_insn (gen_x86_fnstcw_1 (normal));
6368 emit_move_insn (reg, normal);
6369 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
6370 && !TARGET_64BIT)
6371 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
6372 else
6373 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
6374 emit_move_insn (round_down, reg);
6375}
6376
2a2ab3f9 6377/* Output code for INSN to convert a float to a signed int. OPERANDS
46d21d2c 6378 are the insn operands. The output may be [HSD]Imode and the input
e075ae69 6379 operand may be [SDX]Fmode. */
2a2ab3f9 6380
69ddee61 6381const char *
2a2ab3f9
JVA
6382output_fix_trunc (insn, operands)
6383 rtx insn;
6384 rtx *operands;
6385{
6386 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
e075ae69 6387 int dimode_p = GET_MODE (operands[0]) == DImode;
2a2ab3f9 6388
e075ae69
RH
6389 /* Jump through a hoop or two for DImode, since the hardware has no
6390 non-popping instruction. We used to do this a different way, but
6391 that was somewhat fragile and broke with post-reload splitters. */
a05924f9
JH
6392 if (dimode_p && !stack_top_dies)
6393 output_asm_insn ("fld\t%y1", operands);
e075ae69 6394
7a2e09f4 6395 if (!STACK_TOP_P (operands[1]))
10195bd8
JW
6396 abort ();
6397
e075ae69 6398 if (GET_CODE (operands[0]) != MEM)
7a2e09f4 6399 abort ();
e9a25f70 6400
7a2e09f4 6401 output_asm_insn ("fldcw\t%3", operands);
e075ae69 6402 if (stack_top_dies || dimode_p)
7a2e09f4 6403 output_asm_insn ("fistp%z0\t%0", operands);
10195bd8 6404 else
7a2e09f4 6405 output_asm_insn ("fist%z0\t%0", operands);
e075ae69 6406 output_asm_insn ("fldcw\t%2", operands);
10195bd8 6407
e075ae69 6408 return "";
2a2ab3f9 6409}
cda749b1 6410
e075ae69
RH
6411/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
6412 should be used and 2 when fnstsw should be used. UNORDERED_P is true
6413 when fucom should be used. */
6414
69ddee61 6415const char *
e075ae69 6416output_fp_compare (insn, operands, eflags_p, unordered_p)
cda749b1
JW
6417 rtx insn;
6418 rtx *operands;
e075ae69 6419 int eflags_p, unordered_p;
cda749b1 6420{
e075ae69
RH
6421 int stack_top_dies;
6422 rtx cmp_op0 = operands[0];
6423 rtx cmp_op1 = operands[1];
0644b628 6424 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
e075ae69
RH
6425
6426 if (eflags_p == 2)
6427 {
6428 cmp_op0 = cmp_op1;
6429 cmp_op1 = operands[2];
6430 }
0644b628
JH
6431 if (is_sse)
6432 {
6433 if (GET_MODE (operands[0]) == SFmode)
6434 if (unordered_p)
6435 return "ucomiss\t{%1, %0|%0, %1}";
6436 else
6437 return "comiss\t{%1, %0|%0, %y}";
6438 else
6439 if (unordered_p)
6440 return "ucomisd\t{%1, %0|%0, %1}";
6441 else
6442 return "comisd\t{%1, %0|%0, %y}";
6443 }
cda749b1 6444
e075ae69 6445 if (! STACK_TOP_P (cmp_op0))
cda749b1
JW
6446 abort ();
6447
e075ae69 6448 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
cda749b1 6449
e075ae69
RH
6450 if (STACK_REG_P (cmp_op1)
6451 && stack_top_dies
6452 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
6453 && REGNO (cmp_op1) != FIRST_STACK_REG)
cda749b1 6454 {
e075ae69
RH
6455 /* If both the top of the 387 stack dies, and the other operand
6456 is also a stack register that dies, then this must be a
6457 `fcompp' float compare */
6458
6459 if (eflags_p == 1)
6460 {
6461 /* There is no double popping fcomi variant. Fortunately,
6462 eflags is immune from the fstp's cc clobbering. */
6463 if (unordered_p)
6464 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
6465 else
6466 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
6467 return "fstp\t%y0";
6468 }
6469 else
cda749b1 6470 {
e075ae69
RH
6471 if (eflags_p == 2)
6472 {
6473 if (unordered_p)
6474 return "fucompp\n\tfnstsw\t%0";
6475 else
6476 return "fcompp\n\tfnstsw\t%0";
6477 }
cda749b1
JW
6478 else
6479 {
e075ae69
RH
6480 if (unordered_p)
6481 return "fucompp";
6482 else
6483 return "fcompp";
cda749b1
JW
6484 }
6485 }
cda749b1
JW
6486 }
6487 else
6488 {
e075ae69 6489 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
cda749b1 6490
0f290768 6491 static const char * const alt[24] =
e075ae69
RH
6492 {
6493 "fcom%z1\t%y1",
6494 "fcomp%z1\t%y1",
6495 "fucom%z1\t%y1",
6496 "fucomp%z1\t%y1",
0f290768 6497
e075ae69
RH
6498 "ficom%z1\t%y1",
6499 "ficomp%z1\t%y1",
6500 NULL,
6501 NULL,
6502
6503 "fcomi\t{%y1, %0|%0, %y1}",
6504 "fcomip\t{%y1, %0|%0, %y1}",
6505 "fucomi\t{%y1, %0|%0, %y1}",
6506 "fucomip\t{%y1, %0|%0, %y1}",
6507
6508 NULL,
6509 NULL,
6510 NULL,
6511 NULL,
6512
6513 "fcom%z2\t%y2\n\tfnstsw\t%0",
6514 "fcomp%z2\t%y2\n\tfnstsw\t%0",
6515 "fucom%z2\t%y2\n\tfnstsw\t%0",
6516 "fucomp%z2\t%y2\n\tfnstsw\t%0",
0f290768 6517
e075ae69
RH
6518 "ficom%z2\t%y2\n\tfnstsw\t%0",
6519 "ficomp%z2\t%y2\n\tfnstsw\t%0",
6520 NULL,
6521 NULL
6522 };
6523
6524 int mask;
69ddee61 6525 const char *ret;
e075ae69
RH
6526
6527 mask = eflags_p << 3;
6528 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
6529 mask |= unordered_p << 1;
6530 mask |= stack_top_dies;
6531
6532 if (mask >= 24)
6533 abort ();
6534 ret = alt[mask];
6535 if (ret == NULL)
6536 abort ();
cda749b1 6537
e075ae69 6538 return ret;
cda749b1
JW
6539 }
6540}
2a2ab3f9 6541
f88c65f7
RH
6542void
6543ix86_output_addr_vec_elt (file, value)
6544 FILE *file;
6545 int value;
6546{
6547 const char *directive = ASM_LONG;
6548
6549 if (TARGET_64BIT)
6550 {
6551#ifdef ASM_QUAD
6552 directive = ASM_QUAD;
6553#else
6554 abort ();
6555#endif
6556 }
6557
6558 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
6559}
6560
6561void
6562ix86_output_addr_diff_elt (file, value, rel)
6563 FILE *file;
6564 int value, rel;
6565{
6566 if (TARGET_64BIT)
6567 fprintf (file, "%s%s%d-.+4+(.-%s%d)\n",
6568 ASM_LONG, LPREFIX, value, LPREFIX, rel);
6569 else if (HAVE_AS_GOTOFF_IN_DATA)
6570 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
6571 else
6572 asm_fprintf (file, "%s%U_GLOBAL_OFFSET_TABLE_+[.-%s%d]\n",
6573 ASM_LONG, LPREFIX, value);
6574}
32b5b1aa 6575\f
a8bac9ab
RH
6576/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
6577 for the target. */
6578
6579void
6580ix86_expand_clear (dest)
6581 rtx dest;
6582{
6583 rtx tmp;
6584
6585 /* We play register width games, which are only valid after reload. */
6586 if (!reload_completed)
6587 abort ();
6588
6589 /* Avoid HImode and its attendant prefix byte. */
6590 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
6591 dest = gen_rtx_REG (SImode, REGNO (dest));
6592
6593 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
6594
6595 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
6596 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
6597 {
6598 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
6599 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
6600 }
6601
6602 emit_insn (tmp);
6603}
6604
79325812 6605void
e075ae69
RH
6606ix86_expand_move (mode, operands)
6607 enum machine_mode mode;
6608 rtx operands[];
32b5b1aa 6609{
e075ae69 6610 int strict = (reload_in_progress || reload_completed);
e075ae69 6611 rtx insn;
e9a25f70 6612
e075ae69 6613 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
32b5b1aa 6614 {
e075ae69 6615 /* Emit insns to move operands[1] into operands[0]. */
e9a25f70 6616
e075ae69
RH
6617 if (GET_CODE (operands[0]) == MEM)
6618 operands[1] = force_reg (Pmode, operands[1]);
6619 else
32b5b1aa 6620 {
e075ae69
RH
6621 rtx temp = operands[0];
6622 if (GET_CODE (temp) != REG)
6623 temp = gen_reg_rtx (Pmode);
6624 temp = legitimize_pic_address (operands[1], temp);
6625 if (temp == operands[0])
6626 return;
6627 operands[1] = temp;
32b5b1aa 6628 }
e075ae69
RH
6629 }
6630 else
6631 {
d7a29404 6632 if (GET_CODE (operands[0]) == MEM
44cf5b6a 6633 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
d7a29404
JH
6634 || !push_operand (operands[0], mode))
6635 && GET_CODE (operands[1]) == MEM)
e075ae69 6636 operands[1] = force_reg (mode, operands[1]);
e9a25f70 6637
2c5a510c
RH
6638 if (push_operand (operands[0], mode)
6639 && ! general_no_elim_operand (operands[1], mode))
6640 operands[1] = copy_to_mode_reg (mode, operands[1]);
6641
44cf5b6a
JH
6642 /* Force large constants in 64bit compilation into register
6643 to get them CSEed. */
6644 if (TARGET_64BIT && mode == DImode
6645 && immediate_operand (operands[1], mode)
6646 && !x86_64_zero_extended_value (operands[1])
6647 && !register_operand (operands[0], mode)
6648 && optimize && !reload_completed && !reload_in_progress)
6649 operands[1] = copy_to_mode_reg (mode, operands[1]);
6650
e075ae69 6651 if (FLOAT_MODE_P (mode))
32b5b1aa 6652 {
d7a29404
JH
6653 /* If we are loading a floating point constant to a register,
6654 force the value to memory now, since we'll get better code
6655 out the back end. */
e075ae69
RH
6656
6657 if (strict)
6658 ;
e075ae69 6659 else if (GET_CODE (operands[1]) == CONST_DOUBLE
d7a29404 6660 && register_operand (operands[0], mode))
e075ae69 6661 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
32b5b1aa 6662 }
32b5b1aa 6663 }
e9a25f70 6664
e075ae69 6665 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
e9a25f70 6666
e075ae69
RH
6667 emit_insn (insn);
6668}
e9a25f70 6669
e37af218
RH
6670void
6671ix86_expand_vector_move (mode, operands)
6672 enum machine_mode mode;
6673 rtx operands[];
6674{
6675 /* Force constants other than zero into memory. We do not know how
6676 the instructions used to build constants modify the upper 64 bits
6677 of the register, once we have that information we may be able
6678 to handle some of them more efficiently. */
6679 if ((reload_in_progress | reload_completed) == 0
6680 && register_operand (operands[0], mode)
6681 && CONSTANT_P (operands[1]))
6682 {
6683 rtx addr = gen_reg_rtx (Pmode);
6684 emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0));
6685 operands[1] = gen_rtx_MEM (mode, addr);
6686 }
6687
6688 /* Make operand1 a register if it isn't already. */
6689 if ((reload_in_progress | reload_completed) == 0
6690 && !register_operand (operands[0], mode)
6691 && !register_operand (operands[1], mode)
6692 && operands[1] != CONST0_RTX (mode))
6693 {
6694 rtx temp = force_reg (TImode, operands[1]);
6695 emit_move_insn (operands[0], temp);
6696 return;
6697 }
6698
6699 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
6700}
6701
e075ae69
RH
6702/* Attempt to expand a binary operator. Make the expansion closer to the
6703 actual machine, then just general_operand, which will allow 3 separate
9d81fc27 6704 memory references (one output, two input) in a single insn. */
e9a25f70 6705
e075ae69
RH
6706void
6707ix86_expand_binary_operator (code, mode, operands)
6708 enum rtx_code code;
6709 enum machine_mode mode;
6710 rtx operands[];
6711{
6712 int matching_memory;
6713 rtx src1, src2, dst, op, clob;
6714
6715 dst = operands[0];
6716 src1 = operands[1];
6717 src2 = operands[2];
6718
6719 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
6720 if (GET_RTX_CLASS (code) == 'c'
6721 && (rtx_equal_p (dst, src2)
6722 || immediate_operand (src1, mode)))
6723 {
6724 rtx temp = src1;
6725 src1 = src2;
6726 src2 = temp;
32b5b1aa 6727 }
e9a25f70 6728
e075ae69
RH
6729 /* If the destination is memory, and we do not have matching source
6730 operands, do things in registers. */
6731 matching_memory = 0;
6732 if (GET_CODE (dst) == MEM)
32b5b1aa 6733 {
e075ae69
RH
6734 if (rtx_equal_p (dst, src1))
6735 matching_memory = 1;
6736 else if (GET_RTX_CLASS (code) == 'c'
6737 && rtx_equal_p (dst, src2))
6738 matching_memory = 2;
6739 else
6740 dst = gen_reg_rtx (mode);
6741 }
0f290768 6742
e075ae69
RH
6743 /* Both source operands cannot be in memory. */
6744 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
6745 {
6746 if (matching_memory != 2)
6747 src2 = force_reg (mode, src2);
6748 else
6749 src1 = force_reg (mode, src1);
32b5b1aa 6750 }
e9a25f70 6751
06a964de
JH
6752 /* If the operation is not commutable, source 1 cannot be a constant
6753 or non-matching memory. */
0f290768 6754 if ((CONSTANT_P (src1)
06a964de
JH
6755 || (!matching_memory && GET_CODE (src1) == MEM))
6756 && GET_RTX_CLASS (code) != 'c')
e075ae69 6757 src1 = force_reg (mode, src1);
0f290768 6758
e075ae69 6759 /* If optimizing, copy to regs to improve CSE */
fe577e58 6760 if (optimize && ! no_new_pseudos)
32b5b1aa 6761 {
e075ae69
RH
6762 if (GET_CODE (dst) == MEM)
6763 dst = gen_reg_rtx (mode);
6764 if (GET_CODE (src1) == MEM)
6765 src1 = force_reg (mode, src1);
6766 if (GET_CODE (src2) == MEM)
6767 src2 = force_reg (mode, src2);
32b5b1aa 6768 }
e9a25f70 6769
e075ae69
RH
6770 /* Emit the instruction. */
6771
6772 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
6773 if (reload_in_progress)
6774 {
6775 /* Reload doesn't know about the flags register, and doesn't know that
6776 it doesn't want to clobber it. We can only do this with PLUS. */
6777 if (code != PLUS)
6778 abort ();
6779 emit_insn (op);
6780 }
6781 else
32b5b1aa 6782 {
e075ae69
RH
6783 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6784 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
32b5b1aa 6785 }
e9a25f70 6786
e075ae69
RH
6787 /* Fix up the destination if needed. */
6788 if (dst != operands[0])
6789 emit_move_insn (operands[0], dst);
6790}
6791
6792/* Return TRUE or FALSE depending on whether the binary operator meets the
6793 appropriate constraints. */
6794
6795int
6796ix86_binary_operator_ok (code, mode, operands)
6797 enum rtx_code code;
6798 enum machine_mode mode ATTRIBUTE_UNUSED;
6799 rtx operands[3];
6800{
6801 /* Both source operands cannot be in memory. */
6802 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
6803 return 0;
6804 /* If the operation is not commutable, source 1 cannot be a constant. */
6805 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
6806 return 0;
6807 /* If the destination is memory, we must have a matching source operand. */
6808 if (GET_CODE (operands[0]) == MEM
6809 && ! (rtx_equal_p (operands[0], operands[1])
6810 || (GET_RTX_CLASS (code) == 'c'
6811 && rtx_equal_p (operands[0], operands[2]))))
6812 return 0;
06a964de 6813 /* If the operation is not commutable and the source 1 is memory, we must
d6a7951f 6814 have a matching destination. */
06a964de
JH
6815 if (GET_CODE (operands[1]) == MEM
6816 && GET_RTX_CLASS (code) != 'c'
6817 && ! rtx_equal_p (operands[0], operands[1]))
6818 return 0;
e075ae69
RH
6819 return 1;
6820}
6821
6822/* Attempt to expand a unary operator. Make the expansion closer to the
6823 actual machine, then just general_operand, which will allow 2 separate
9d81fc27 6824 memory references (one output, one input) in a single insn. */
e075ae69 6825
9d81fc27 6826void
e075ae69
RH
6827ix86_expand_unary_operator (code, mode, operands)
6828 enum rtx_code code;
6829 enum machine_mode mode;
6830 rtx operands[];
6831{
06a964de
JH
6832 int matching_memory;
6833 rtx src, dst, op, clob;
6834
6835 dst = operands[0];
6836 src = operands[1];
e075ae69 6837
06a964de
JH
6838 /* If the destination is memory, and we do not have matching source
6839 operands, do things in registers. */
6840 matching_memory = 0;
6841 if (GET_CODE (dst) == MEM)
32b5b1aa 6842 {
06a964de
JH
6843 if (rtx_equal_p (dst, src))
6844 matching_memory = 1;
e075ae69 6845 else
06a964de 6846 dst = gen_reg_rtx (mode);
32b5b1aa 6847 }
e9a25f70 6848
06a964de
JH
6849 /* When source operand is memory, destination must match. */
6850 if (!matching_memory && GET_CODE (src) == MEM)
6851 src = force_reg (mode, src);
0f290768 6852
06a964de 6853 /* If optimizing, copy to regs to improve CSE */
fe577e58 6854 if (optimize && ! no_new_pseudos)
06a964de
JH
6855 {
6856 if (GET_CODE (dst) == MEM)
6857 dst = gen_reg_rtx (mode);
6858 if (GET_CODE (src) == MEM)
6859 src = force_reg (mode, src);
6860 }
6861
6862 /* Emit the instruction. */
6863
6864 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
6865 if (reload_in_progress || code == NOT)
6866 {
6867 /* Reload doesn't know about the flags register, and doesn't know that
6868 it doesn't want to clobber it. */
6869 if (code != NOT)
6870 abort ();
6871 emit_insn (op);
6872 }
6873 else
6874 {
6875 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6876 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
6877 }
6878
6879 /* Fix up the destination if needed. */
6880 if (dst != operands[0])
6881 emit_move_insn (operands[0], dst);
e075ae69
RH
6882}
6883
6884/* Return TRUE or FALSE depending on whether the unary operator meets the
6885 appropriate constraints. */
6886
6887int
6888ix86_unary_operator_ok (code, mode, operands)
6889 enum rtx_code code ATTRIBUTE_UNUSED;
6890 enum machine_mode mode ATTRIBUTE_UNUSED;
6891 rtx operands[2] ATTRIBUTE_UNUSED;
6892{
06a964de
JH
6893 /* If one of operands is memory, source and destination must match. */
6894 if ((GET_CODE (operands[0]) == MEM
6895 || GET_CODE (operands[1]) == MEM)
6896 && ! rtx_equal_p (operands[0], operands[1]))
6897 return FALSE;
e075ae69
RH
6898 return TRUE;
6899}
6900
16189740
RH
6901/* Return TRUE or FALSE depending on whether the first SET in INSN
6902 has source and destination with matching CC modes, and that the
6903 CC mode is at least as constrained as REQ_MODE. */
6904
6905int
6906ix86_match_ccmode (insn, req_mode)
6907 rtx insn;
6908 enum machine_mode req_mode;
6909{
6910 rtx set;
6911 enum machine_mode set_mode;
6912
6913 set = PATTERN (insn);
6914 if (GET_CODE (set) == PARALLEL)
6915 set = XVECEXP (set, 0, 0);
6916 if (GET_CODE (set) != SET)
6917 abort ();
9076b9c1
JH
6918 if (GET_CODE (SET_SRC (set)) != COMPARE)
6919 abort ();
16189740
RH
6920
6921 set_mode = GET_MODE (SET_DEST (set));
6922 switch (set_mode)
6923 {
9076b9c1
JH
6924 case CCNOmode:
6925 if (req_mode != CCNOmode
6926 && (req_mode != CCmode
6927 || XEXP (SET_SRC (set), 1) != const0_rtx))
6928 return 0;
6929 break;
16189740 6930 case CCmode:
9076b9c1 6931 if (req_mode == CCGCmode)
16189740
RH
6932 return 0;
6933 /* FALLTHRU */
9076b9c1
JH
6934 case CCGCmode:
6935 if (req_mode == CCGOCmode || req_mode == CCNOmode)
6936 return 0;
6937 /* FALLTHRU */
6938 case CCGOCmode:
16189740
RH
6939 if (req_mode == CCZmode)
6940 return 0;
6941 /* FALLTHRU */
6942 case CCZmode:
6943 break;
6944
6945 default:
6946 abort ();
6947 }
6948
6949 return (GET_MODE (SET_SRC (set)) == set_mode);
6950}
6951
e075ae69
RH
6952/* Generate insn patterns to do an integer compare of OPERANDS. */
6953
6954static rtx
6955ix86_expand_int_compare (code, op0, op1)
6956 enum rtx_code code;
6957 rtx op0, op1;
6958{
6959 enum machine_mode cmpmode;
6960 rtx tmp, flags;
6961
6962 cmpmode = SELECT_CC_MODE (code, op0, op1);
6963 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
6964
6965 /* This is very simple, but making the interface the same as in the
6966 FP case makes the rest of the code easier. */
6967 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
6968 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
6969
6970 /* Return the test that should be put into the flags user, i.e.
6971 the bcc, scc, or cmov instruction. */
6972 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
6973}
6974
3a3677ff
RH
6975/* Figure out whether to use ordered or unordered fp comparisons.
6976 Return the appropriate mode to use. */
e075ae69 6977
b1cdafbb 6978enum machine_mode
3a3677ff 6979ix86_fp_compare_mode (code)
8752c357 6980 enum rtx_code code ATTRIBUTE_UNUSED;
e075ae69 6981{
9e7adcb3
JH
6982 /* ??? In order to make all comparisons reversible, we do all comparisons
6983 non-trapping when compiling for IEEE. Once gcc is able to distinguish
6984 all forms trapping and nontrapping comparisons, we can make inequality
6985 comparisons trapping again, since it results in better code when using
6986 FCOM based compares. */
6987 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
3a3677ff
RH
6988}
6989
9076b9c1
JH
6990enum machine_mode
6991ix86_cc_mode (code, op0, op1)
6992 enum rtx_code code;
6993 rtx op0, op1;
6994{
6995 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
6996 return ix86_fp_compare_mode (code);
6997 switch (code)
6998 {
6999 /* Only zero flag is needed. */
7000 case EQ: /* ZF=0 */
7001 case NE: /* ZF!=0 */
7002 return CCZmode;
7003 /* Codes needing carry flag. */
265dab10
JH
7004 case GEU: /* CF=0 */
7005 case GTU: /* CF=0 & ZF=0 */
7e08e190
JH
7006 case LTU: /* CF=1 */
7007 case LEU: /* CF=1 | ZF=1 */
265dab10 7008 return CCmode;
9076b9c1
JH
7009 /* Codes possibly doable only with sign flag when
7010 comparing against zero. */
7011 case GE: /* SF=OF or SF=0 */
7e08e190 7012 case LT: /* SF<>OF or SF=1 */
9076b9c1
JH
7013 if (op1 == const0_rtx)
7014 return CCGOCmode;
7015 else
7016 /* For other cases Carry flag is not required. */
7017 return CCGCmode;
7018 /* Codes doable only with sign flag when comparing
7019 against zero, but we miss jump instruction for it
7020 so we need to use relational tests agains overflow
7021 that thus needs to be zero. */
7022 case GT: /* ZF=0 & SF=OF */
7023 case LE: /* ZF=1 | SF<>OF */
7024 if (op1 == const0_rtx)
7025 return CCNOmode;
7026 else
7027 return CCGCmode;
7fcd7218
JH
7028 /* strcmp pattern do (use flags) and combine may ask us for proper
7029 mode. */
7030 case USE:
7031 return CCmode;
9076b9c1 7032 default:
0f290768 7033 abort ();
9076b9c1
JH
7034 }
7035}
7036
3a3677ff
RH
7037/* Return true if we should use an FCOMI instruction for this fp comparison. */
7038
a940d8bd 7039int
3a3677ff 7040ix86_use_fcomi_compare (code)
9e7adcb3 7041 enum rtx_code code ATTRIBUTE_UNUSED;
3a3677ff 7042{
9e7adcb3
JH
7043 enum rtx_code swapped_code = swap_condition (code);
7044 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
7045 || (ix86_fp_comparison_cost (swapped_code)
7046 == ix86_fp_comparison_fcomi_cost (swapped_code)));
3a3677ff
RH
7047}
7048
0f290768 7049/* Swap, force into registers, or otherwise massage the two operands
3a3677ff
RH
7050 to a fp comparison. The operands are updated in place; the new
7051 comparsion code is returned. */
7052
7053static enum rtx_code
7054ix86_prepare_fp_compare_args (code, pop0, pop1)
7055 enum rtx_code code;
7056 rtx *pop0, *pop1;
7057{
7058 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
7059 rtx op0 = *pop0, op1 = *pop1;
7060 enum machine_mode op_mode = GET_MODE (op0);
0644b628 7061 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
3a3677ff 7062
e075ae69 7063 /* All of the unordered compare instructions only work on registers.
3a3677ff
RH
7064 The same is true of the XFmode compare instructions. The same is
7065 true of the fcomi compare instructions. */
7066
0644b628
JH
7067 if (!is_sse
7068 && (fpcmp_mode == CCFPUmode
7069 || op_mode == XFmode
7070 || op_mode == TFmode
7071 || ix86_use_fcomi_compare (code)))
e075ae69 7072 {
3a3677ff
RH
7073 op0 = force_reg (op_mode, op0);
7074 op1 = force_reg (op_mode, op1);
e075ae69
RH
7075 }
7076 else
7077 {
7078 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
7079 things around if they appear profitable, otherwise force op0
7080 into a register. */
7081
7082 if (standard_80387_constant_p (op0) == 0
7083 || (GET_CODE (op0) == MEM
7084 && ! (standard_80387_constant_p (op1) == 0
7085 || GET_CODE (op1) == MEM)))
32b5b1aa 7086 {
e075ae69
RH
7087 rtx tmp;
7088 tmp = op0, op0 = op1, op1 = tmp;
7089 code = swap_condition (code);
7090 }
7091
7092 if (GET_CODE (op0) != REG)
3a3677ff 7093 op0 = force_reg (op_mode, op0);
e075ae69
RH
7094
7095 if (CONSTANT_P (op1))
7096 {
7097 if (standard_80387_constant_p (op1))
3a3677ff 7098 op1 = force_reg (op_mode, op1);
e075ae69 7099 else
3a3677ff 7100 op1 = validize_mem (force_const_mem (op_mode, op1));
32b5b1aa
SC
7101 }
7102 }
e9a25f70 7103
9e7adcb3
JH
7104 /* Try to rearrange the comparison to make it cheaper. */
7105 if (ix86_fp_comparison_cost (code)
7106 > ix86_fp_comparison_cost (swap_condition (code))
7107 && (GET_CODE (op0) == REG || !reload_completed))
7108 {
7109 rtx tmp;
7110 tmp = op0, op0 = op1, op1 = tmp;
7111 code = swap_condition (code);
7112 if (GET_CODE (op0) != REG)
7113 op0 = force_reg (op_mode, op0);
7114 }
7115
3a3677ff
RH
7116 *pop0 = op0;
7117 *pop1 = op1;
7118 return code;
7119}
7120
c0c102a9
JH
7121/* Convert comparison codes we use to represent FP comparison to integer
7122 code that will result in proper branch. Return UNKNOWN if no such code
7123 is available. */
7124static enum rtx_code
7125ix86_fp_compare_code_to_integer (code)
7126 enum rtx_code code;
7127{
7128 switch (code)
7129 {
7130 case GT:
7131 return GTU;
7132 case GE:
7133 return GEU;
7134 case ORDERED:
7135 case UNORDERED:
7136 return code;
7137 break;
7138 case UNEQ:
7139 return EQ;
7140 break;
7141 case UNLT:
7142 return LTU;
7143 break;
7144 case UNLE:
7145 return LEU;
7146 break;
7147 case LTGT:
7148 return NE;
7149 break;
7150 default:
7151 return UNKNOWN;
7152 }
7153}
7154
7155/* Split comparison code CODE into comparisons we can do using branch
7156 instructions. BYPASS_CODE is comparison code for branch that will
7157 branch around FIRST_CODE and SECOND_CODE. If some of branches
7158 is not required, set value to NIL.
7159 We never require more than two branches. */
7160static void
7161ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
7162 enum rtx_code code, *bypass_code, *first_code, *second_code;
7163{
7164 *first_code = code;
7165 *bypass_code = NIL;
7166 *second_code = NIL;
7167
7168 /* The fcomi comparison sets flags as follows:
7169
7170 cmp ZF PF CF
7171 > 0 0 0
7172 < 0 0 1
7173 = 1 0 0
7174 un 1 1 1 */
7175
7176 switch (code)
7177 {
7178 case GT: /* GTU - CF=0 & ZF=0 */
7179 case GE: /* GEU - CF=0 */
7180 case ORDERED: /* PF=0 */
7181 case UNORDERED: /* PF=1 */
7182 case UNEQ: /* EQ - ZF=1 */
7183 case UNLT: /* LTU - CF=1 */
7184 case UNLE: /* LEU - CF=1 | ZF=1 */
7185 case LTGT: /* EQ - ZF=0 */
7186 break;
7187 case LT: /* LTU - CF=1 - fails on unordered */
7188 *first_code = UNLT;
7189 *bypass_code = UNORDERED;
7190 break;
7191 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
7192 *first_code = UNLE;
7193 *bypass_code = UNORDERED;
7194 break;
7195 case EQ: /* EQ - ZF=1 - fails on unordered */
7196 *first_code = UNEQ;
7197 *bypass_code = UNORDERED;
7198 break;
7199 case NE: /* NE - ZF=0 - fails on unordered */
7200 *first_code = LTGT;
7201 *second_code = UNORDERED;
7202 break;
7203 case UNGE: /* GEU - CF=0 - fails on unordered */
7204 *first_code = GE;
7205 *second_code = UNORDERED;
7206 break;
7207 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
7208 *first_code = GT;
7209 *second_code = UNORDERED;
7210 break;
7211 default:
7212 abort ();
7213 }
7214 if (!TARGET_IEEE_FP)
7215 {
7216 *second_code = NIL;
7217 *bypass_code = NIL;
7218 }
7219}
7220
9e7adcb3
JH
7221/* Return cost of comparison done fcom + arithmetics operations on AX.
7222 All following functions do use number of instructions as an cost metrics.
7223 In future this should be tweaked to compute bytes for optimize_size and
7224 take into account performance of various instructions on various CPUs. */
7225static int
7226ix86_fp_comparison_arithmetics_cost (code)
7227 enum rtx_code code;
7228{
7229 if (!TARGET_IEEE_FP)
7230 return 4;
7231 /* The cost of code output by ix86_expand_fp_compare. */
7232 switch (code)
7233 {
7234 case UNLE:
7235 case UNLT:
7236 case LTGT:
7237 case GT:
7238 case GE:
7239 case UNORDERED:
7240 case ORDERED:
7241 case UNEQ:
7242 return 4;
7243 break;
7244 case LT:
7245 case NE:
7246 case EQ:
7247 case UNGE:
7248 return 5;
7249 break;
7250 case LE:
7251 case UNGT:
7252 return 6;
7253 break;
7254 default:
7255 abort ();
7256 }
7257}
7258
7259/* Return cost of comparison done using fcomi operation.
7260 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7261static int
7262ix86_fp_comparison_fcomi_cost (code)
7263 enum rtx_code code;
7264{
7265 enum rtx_code bypass_code, first_code, second_code;
7266 /* Return arbitarily high cost when instruction is not supported - this
7267 prevents gcc from using it. */
7268 if (!TARGET_CMOVE)
7269 return 1024;
7270 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7271 return (bypass_code != NIL || second_code != NIL) + 2;
7272}
7273
7274/* Return cost of comparison done using sahf operation.
7275 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7276static int
7277ix86_fp_comparison_sahf_cost (code)
7278 enum rtx_code code;
7279{
7280 enum rtx_code bypass_code, first_code, second_code;
7281 /* Return arbitarily high cost when instruction is not preferred - this
7282 avoids gcc from using it. */
7283 if (!TARGET_USE_SAHF && !optimize_size)
7284 return 1024;
7285 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7286 return (bypass_code != NIL || second_code != NIL) + 3;
7287}
7288
7289/* Compute cost of the comparison done using any method.
7290 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7291static int
7292ix86_fp_comparison_cost (code)
7293 enum rtx_code code;
7294{
7295 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
7296 int min;
7297
7298 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
7299 sahf_cost = ix86_fp_comparison_sahf_cost (code);
7300
7301 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
7302 if (min > sahf_cost)
7303 min = sahf_cost;
7304 if (min > fcomi_cost)
7305 min = fcomi_cost;
7306 return min;
7307}
c0c102a9 7308
3a3677ff
RH
7309/* Generate insn patterns to do a floating point compare of OPERANDS. */
7310
9e7adcb3
JH
7311static rtx
7312ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
3a3677ff
RH
7313 enum rtx_code code;
7314 rtx op0, op1, scratch;
9e7adcb3
JH
7315 rtx *second_test;
7316 rtx *bypass_test;
3a3677ff
RH
7317{
7318 enum machine_mode fpcmp_mode, intcmp_mode;
c0c102a9 7319 rtx tmp, tmp2;
9e7adcb3 7320 int cost = ix86_fp_comparison_cost (code);
c0c102a9 7321 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
7322
7323 fpcmp_mode = ix86_fp_compare_mode (code);
7324 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
7325
9e7adcb3
JH
7326 if (second_test)
7327 *second_test = NULL_RTX;
7328 if (bypass_test)
7329 *bypass_test = NULL_RTX;
7330
c0c102a9
JH
7331 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7332
9e7adcb3
JH
7333 /* Do fcomi/sahf based test when profitable. */
7334 if ((bypass_code == NIL || bypass_test)
7335 && (second_code == NIL || second_test)
7336 && ix86_fp_comparison_arithmetics_cost (code) > cost)
32b5b1aa 7337 {
c0c102a9
JH
7338 if (TARGET_CMOVE)
7339 {
7340 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7341 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
7342 tmp);
7343 emit_insn (tmp);
7344 }
7345 else
7346 {
7347 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7348 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
bf71a4f8
JH
7349 if (!scratch)
7350 scratch = gen_reg_rtx (HImode);
c0c102a9
JH
7351 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
7352 emit_insn (gen_x86_sahf_1 (scratch));
7353 }
e075ae69
RH
7354
7355 /* The FP codes work out to act like unsigned. */
9a915772 7356 intcmp_mode = fpcmp_mode;
9e7adcb3
JH
7357 code = first_code;
7358 if (bypass_code != NIL)
7359 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
7360 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7361 const0_rtx);
7362 if (second_code != NIL)
7363 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
7364 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7365 const0_rtx);
e075ae69
RH
7366 }
7367 else
7368 {
7369 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
e075ae69
RH
7370 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7371 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
bf71a4f8
JH
7372 if (!scratch)
7373 scratch = gen_reg_rtx (HImode);
3a3677ff 7374 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
e075ae69 7375
9a915772
JH
7376 /* In the unordered case, we have to check C2 for NaN's, which
7377 doesn't happen to work out to anything nice combination-wise.
7378 So do some bit twiddling on the value we've got in AH to come
7379 up with an appropriate set of condition codes. */
e075ae69 7380
9a915772
JH
7381 intcmp_mode = CCNOmode;
7382 switch (code)
32b5b1aa 7383 {
9a915772
JH
7384 case GT:
7385 case UNGT:
7386 if (code == GT || !TARGET_IEEE_FP)
32b5b1aa 7387 {
3a3677ff 7388 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
e075ae69 7389 code = EQ;
9a915772
JH
7390 }
7391 else
7392 {
7393 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7394 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
7395 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
7396 intcmp_mode = CCmode;
7397 code = GEU;
7398 }
7399 break;
7400 case LT:
7401 case UNLT:
7402 if (code == LT && TARGET_IEEE_FP)
7403 {
3a3677ff
RH
7404 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7405 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
e075ae69
RH
7406 intcmp_mode = CCmode;
7407 code = EQ;
9a915772
JH
7408 }
7409 else
7410 {
7411 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
7412 code = NE;
7413 }
7414 break;
7415 case GE:
7416 case UNGE:
7417 if (code == GE || !TARGET_IEEE_FP)
7418 {
3a3677ff 7419 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
e075ae69 7420 code = EQ;
9a915772
JH
7421 }
7422 else
7423 {
7424 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7425 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
7426 GEN_INT (0x01)));
7427 code = NE;
7428 }
7429 break;
7430 case LE:
7431 case UNLE:
7432 if (code == LE && TARGET_IEEE_FP)
7433 {
3a3677ff
RH
7434 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7435 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
7436 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
7437 intcmp_mode = CCmode;
7438 code = LTU;
9a915772
JH
7439 }
7440 else
7441 {
7442 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
7443 code = NE;
7444 }
7445 break;
7446 case EQ:
7447 case UNEQ:
7448 if (code == EQ && TARGET_IEEE_FP)
7449 {
3a3677ff
RH
7450 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7451 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
7452 intcmp_mode = CCmode;
7453 code = EQ;
9a915772
JH
7454 }
7455 else
7456 {
3a3677ff
RH
7457 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
7458 code = NE;
7459 break;
9a915772
JH
7460 }
7461 break;
7462 case NE:
7463 case LTGT:
7464 if (code == NE && TARGET_IEEE_FP)
7465 {
3a3677ff 7466 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9a915772
JH
7467 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
7468 GEN_INT (0x40)));
3a3677ff 7469 code = NE;
9a915772
JH
7470 }
7471 else
7472 {
3a3677ff
RH
7473 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
7474 code = EQ;
32b5b1aa 7475 }
9a915772
JH
7476 break;
7477
7478 case UNORDERED:
7479 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
7480 code = NE;
7481 break;
7482 case ORDERED:
7483 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
7484 code = EQ;
7485 break;
7486
7487 default:
7488 abort ();
32b5b1aa 7489 }
32b5b1aa 7490 }
e075ae69
RH
7491
7492 /* Return the test that should be put into the flags user, i.e.
7493 the bcc, scc, or cmov instruction. */
7494 return gen_rtx_fmt_ee (code, VOIDmode,
7495 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7496 const0_rtx);
7497}
7498
9e3e266c 7499rtx
a1b8572c 7500ix86_expand_compare (code, second_test, bypass_test)
e075ae69 7501 enum rtx_code code;
a1b8572c 7502 rtx *second_test, *bypass_test;
e075ae69
RH
7503{
7504 rtx op0, op1, ret;
7505 op0 = ix86_compare_op0;
7506 op1 = ix86_compare_op1;
7507
a1b8572c
JH
7508 if (second_test)
7509 *second_test = NULL_RTX;
7510 if (bypass_test)
7511 *bypass_test = NULL_RTX;
7512
e075ae69 7513 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
bf71a4f8 7514 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
77ebd435 7515 second_test, bypass_test);
32b5b1aa 7516 else
e075ae69
RH
7517 ret = ix86_expand_int_compare (code, op0, op1);
7518
7519 return ret;
7520}
7521
03598dea
JH
7522/* Return true if the CODE will result in nontrivial jump sequence. */
7523bool
7524ix86_fp_jump_nontrivial_p (code)
7525 enum rtx_code code;
7526{
7527 enum rtx_code bypass_code, first_code, second_code;
7528 if (!TARGET_CMOVE)
7529 return true;
7530 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7531 return bypass_code != NIL || second_code != NIL;
7532}
7533
e075ae69 7534void
3a3677ff 7535ix86_expand_branch (code, label)
e075ae69 7536 enum rtx_code code;
e075ae69
RH
7537 rtx label;
7538{
3a3677ff 7539 rtx tmp;
e075ae69 7540
3a3677ff 7541 switch (GET_MODE (ix86_compare_op0))
32b5b1aa 7542 {
3a3677ff
RH
7543 case QImode:
7544 case HImode:
7545 case SImode:
0d7d98ee 7546 simple:
a1b8572c 7547 tmp = ix86_expand_compare (code, NULL, NULL);
e075ae69
RH
7548 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7549 gen_rtx_LABEL_REF (VOIDmode, label),
7550 pc_rtx);
7551 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32b5b1aa 7552 return;
e075ae69 7553
3a3677ff
RH
7554 case SFmode:
7555 case DFmode:
0f290768 7556 case XFmode:
2b589241 7557 case TFmode:
3a3677ff
RH
7558 {
7559 rtvec vec;
7560 int use_fcomi;
03598dea 7561 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
7562
7563 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
7564 &ix86_compare_op1);
03598dea
JH
7565
7566 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7567
7568 /* Check whether we will use the natural sequence with one jump. If
7569 so, we can expand jump early. Otherwise delay expansion by
7570 creating compound insn to not confuse optimizers. */
7571 if (bypass_code == NIL && second_code == NIL
7572 && TARGET_CMOVE)
7573 {
7574 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
7575 gen_rtx_LABEL_REF (VOIDmode, label),
7576 pc_rtx, NULL_RTX);
7577 }
7578 else
7579 {
7580 tmp = gen_rtx_fmt_ee (code, VOIDmode,
7581 ix86_compare_op0, ix86_compare_op1);
7582 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7583 gen_rtx_LABEL_REF (VOIDmode, label),
7584 pc_rtx);
7585 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
7586
7587 use_fcomi = ix86_use_fcomi_compare (code);
7588 vec = rtvec_alloc (3 + !use_fcomi);
7589 RTVEC_ELT (vec, 0) = tmp;
7590 RTVEC_ELT (vec, 1)
7591 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
7592 RTVEC_ELT (vec, 2)
7593 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
7594 if (! use_fcomi)
7595 RTVEC_ELT (vec, 3)
7596 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
7597
7598 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
7599 }
3a3677ff
RH
7600 return;
7601 }
32b5b1aa 7602
3a3677ff 7603 case DImode:
0d7d98ee
JH
7604 if (TARGET_64BIT)
7605 goto simple;
3a3677ff
RH
7606 /* Expand DImode branch into multiple compare+branch. */
7607 {
7608 rtx lo[2], hi[2], label2;
7609 enum rtx_code code1, code2, code3;
32b5b1aa 7610
3a3677ff
RH
7611 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
7612 {
7613 tmp = ix86_compare_op0;
7614 ix86_compare_op0 = ix86_compare_op1;
7615 ix86_compare_op1 = tmp;
7616 code = swap_condition (code);
7617 }
7618 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
7619 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
32b5b1aa 7620
3a3677ff
RH
7621 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
7622 avoid two branches. This costs one extra insn, so disable when
7623 optimizing for size. */
32b5b1aa 7624
3a3677ff
RH
7625 if ((code == EQ || code == NE)
7626 && (!optimize_size
7627 || hi[1] == const0_rtx || lo[1] == const0_rtx))
7628 {
7629 rtx xor0, xor1;
32b5b1aa 7630
3a3677ff
RH
7631 xor1 = hi[0];
7632 if (hi[1] != const0_rtx)
7633 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
7634 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 7635
3a3677ff
RH
7636 xor0 = lo[0];
7637 if (lo[1] != const0_rtx)
7638 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
7639 NULL_RTX, 0, OPTAB_WIDEN);
e075ae69 7640
3a3677ff
RH
7641 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
7642 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 7643
3a3677ff
RH
7644 ix86_compare_op0 = tmp;
7645 ix86_compare_op1 = const0_rtx;
7646 ix86_expand_branch (code, label);
7647 return;
7648 }
e075ae69 7649
1f9124e4
JJ
7650 /* Otherwise, if we are doing less-than or greater-or-equal-than,
7651 op1 is a constant and the low word is zero, then we can just
7652 examine the high word. */
32b5b1aa 7653
1f9124e4
JJ
7654 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
7655 switch (code)
7656 {
7657 case LT: case LTU: case GE: case GEU:
7658 ix86_compare_op0 = hi[0];
7659 ix86_compare_op1 = hi[1];
7660 ix86_expand_branch (code, label);
7661 return;
7662 default:
7663 break;
7664 }
e075ae69 7665
3a3677ff 7666 /* Otherwise, we need two or three jumps. */
e075ae69 7667
3a3677ff 7668 label2 = gen_label_rtx ();
e075ae69 7669
3a3677ff
RH
7670 code1 = code;
7671 code2 = swap_condition (code);
7672 code3 = unsigned_condition (code);
e075ae69 7673
3a3677ff
RH
7674 switch (code)
7675 {
7676 case LT: case GT: case LTU: case GTU:
7677 break;
e075ae69 7678
3a3677ff
RH
7679 case LE: code1 = LT; code2 = GT; break;
7680 case GE: code1 = GT; code2 = LT; break;
7681 case LEU: code1 = LTU; code2 = GTU; break;
7682 case GEU: code1 = GTU; code2 = LTU; break;
e075ae69 7683
3a3677ff
RH
7684 case EQ: code1 = NIL; code2 = NE; break;
7685 case NE: code2 = NIL; break;
e075ae69 7686
3a3677ff
RH
7687 default:
7688 abort ();
7689 }
e075ae69 7690
3a3677ff
RH
7691 /*
7692 * a < b =>
7693 * if (hi(a) < hi(b)) goto true;
7694 * if (hi(a) > hi(b)) goto false;
7695 * if (lo(a) < lo(b)) goto true;
7696 * false:
7697 */
7698
7699 ix86_compare_op0 = hi[0];
7700 ix86_compare_op1 = hi[1];
7701
7702 if (code1 != NIL)
7703 ix86_expand_branch (code1, label);
7704 if (code2 != NIL)
7705 ix86_expand_branch (code2, label2);
7706
7707 ix86_compare_op0 = lo[0];
7708 ix86_compare_op1 = lo[1];
7709 ix86_expand_branch (code3, label);
7710
7711 if (code2 != NIL)
7712 emit_label (label2);
7713 return;
7714 }
e075ae69 7715
3a3677ff
RH
7716 default:
7717 abort ();
7718 }
32b5b1aa 7719}
e075ae69 7720
9e7adcb3
JH
7721/* Split branch based on floating point condition. */
7722void
03598dea
JH
7723ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
7724 enum rtx_code code;
7725 rtx op1, op2, target1, target2, tmp;
9e7adcb3
JH
7726{
7727 rtx second, bypass;
7728 rtx label = NULL_RTX;
03598dea 7729 rtx condition;
6b24c259
JH
7730 int bypass_probability = -1, second_probability = -1, probability = -1;
7731 rtx i;
9e7adcb3
JH
7732
7733 if (target2 != pc_rtx)
7734 {
7735 rtx tmp = target2;
7736 code = reverse_condition_maybe_unordered (code);
7737 target2 = target1;
7738 target1 = tmp;
7739 }
7740
7741 condition = ix86_expand_fp_compare (code, op1, op2,
7742 tmp, &second, &bypass);
6b24c259
JH
7743
7744 if (split_branch_probability >= 0)
7745 {
7746 /* Distribute the probabilities across the jumps.
7747 Assume the BYPASS and SECOND to be always test
7748 for UNORDERED. */
7749 probability = split_branch_probability;
7750
d6a7951f 7751 /* Value of 1 is low enough to make no need for probability
6b24c259
JH
7752 to be updated. Later we may run some experiments and see
7753 if unordered values are more frequent in practice. */
7754 if (bypass)
7755 bypass_probability = 1;
7756 if (second)
7757 second_probability = 1;
7758 }
9e7adcb3
JH
7759 if (bypass != NULL_RTX)
7760 {
7761 label = gen_label_rtx ();
6b24c259
JH
7762 i = emit_jump_insn (gen_rtx_SET
7763 (VOIDmode, pc_rtx,
7764 gen_rtx_IF_THEN_ELSE (VOIDmode,
7765 bypass,
7766 gen_rtx_LABEL_REF (VOIDmode,
7767 label),
7768 pc_rtx)));
7769 if (bypass_probability >= 0)
7770 REG_NOTES (i)
7771 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7772 GEN_INT (bypass_probability),
7773 REG_NOTES (i));
7774 }
7775 i = emit_jump_insn (gen_rtx_SET
9e7adcb3
JH
7776 (VOIDmode, pc_rtx,
7777 gen_rtx_IF_THEN_ELSE (VOIDmode,
6b24c259
JH
7778 condition, target1, target2)));
7779 if (probability >= 0)
7780 REG_NOTES (i)
7781 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7782 GEN_INT (probability),
7783 REG_NOTES (i));
7784 if (second != NULL_RTX)
9e7adcb3 7785 {
6b24c259
JH
7786 i = emit_jump_insn (gen_rtx_SET
7787 (VOIDmode, pc_rtx,
7788 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
7789 target2)));
7790 if (second_probability >= 0)
7791 REG_NOTES (i)
7792 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7793 GEN_INT (second_probability),
7794 REG_NOTES (i));
9e7adcb3 7795 }
9e7adcb3
JH
7796 if (label != NULL_RTX)
7797 emit_label (label);
7798}
7799
32b5b1aa 7800int
3a3677ff 7801ix86_expand_setcc (code, dest)
e075ae69 7802 enum rtx_code code;
e075ae69 7803 rtx dest;
32b5b1aa 7804{
a1b8572c
JH
7805 rtx ret, tmp, tmpreg;
7806 rtx second_test, bypass_test;
e075ae69 7807
885a70fd
JH
7808 if (GET_MODE (ix86_compare_op0) == DImode
7809 && !TARGET_64BIT)
e075ae69
RH
7810 return 0; /* FAIL */
7811
b932f770
JH
7812 if (GET_MODE (dest) != QImode)
7813 abort ();
e075ae69 7814
a1b8572c 7815 ret = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
7816 PUT_MODE (ret, QImode);
7817
7818 tmp = dest;
a1b8572c 7819 tmpreg = dest;
32b5b1aa 7820
e075ae69 7821 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
a1b8572c
JH
7822 if (bypass_test || second_test)
7823 {
7824 rtx test = second_test;
7825 int bypass = 0;
7826 rtx tmp2 = gen_reg_rtx (QImode);
7827 if (bypass_test)
7828 {
7829 if (second_test)
7830 abort();
7831 test = bypass_test;
7832 bypass = 1;
7833 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
7834 }
7835 PUT_MODE (test, QImode);
7836 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
7837
7838 if (bypass)
7839 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
7840 else
7841 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
7842 }
e075ae69 7843
e075ae69 7844 return 1; /* DONE */
32b5b1aa 7845}
e075ae69 7846
32b5b1aa 7847int
e075ae69
RH
7848ix86_expand_int_movcc (operands)
7849 rtx operands[];
32b5b1aa 7850{
e075ae69
RH
7851 enum rtx_code code = GET_CODE (operands[1]), compare_code;
7852 rtx compare_seq, compare_op;
a1b8572c 7853 rtx second_test, bypass_test;
635559ab 7854 enum machine_mode mode = GET_MODE (operands[0]);
32b5b1aa 7855
36583fea
JH
7856 /* When the compare code is not LTU or GEU, we can not use sbbl case.
7857 In case comparsion is done with immediate, we can convert it to LTU or
7858 GEU by altering the integer. */
7859
7860 if ((code == LEU || code == GTU)
7861 && GET_CODE (ix86_compare_op1) == CONST_INT
635559ab 7862 && mode != HImode
36583fea 7863 && (unsigned int)INTVAL (ix86_compare_op1) != 0xffffffff
0f290768 7864 && GET_CODE (operands[2]) == CONST_INT
36583fea
JH
7865 && GET_CODE (operands[3]) == CONST_INT)
7866 {
7867 if (code == LEU)
7868 code = LTU;
7869 else
7870 code = GEU;
7871 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
7872 }
3a3677ff 7873
e075ae69 7874 start_sequence ();
a1b8572c 7875 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
7876 compare_seq = gen_sequence ();
7877 end_sequence ();
7878
7879 compare_code = GET_CODE (compare_op);
7880
7881 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
7882 HImode insns, we'd be swallowed in word prefix ops. */
7883
635559ab
JH
7884 if (mode != HImode
7885 && (mode != DImode || TARGET_64BIT)
0f290768 7886 && GET_CODE (operands[2]) == CONST_INT
e075ae69
RH
7887 && GET_CODE (operands[3]) == CONST_INT)
7888 {
7889 rtx out = operands[0];
7890 HOST_WIDE_INT ct = INTVAL (operands[2]);
7891 HOST_WIDE_INT cf = INTVAL (operands[3]);
7892 HOST_WIDE_INT diff;
7893
a1b8572c
JH
7894 if ((compare_code == LTU || compare_code == GEU)
7895 && !second_test && !bypass_test)
e075ae69 7896 {
e075ae69
RH
7897
7898 /* Detect overlap between destination and compare sources. */
7899 rtx tmp = out;
7900
0f290768 7901 /* To simplify rest of code, restrict to the GEU case. */
36583fea
JH
7902 if (compare_code == LTU)
7903 {
7904 int tmp = ct;
7905 ct = cf;
7906 cf = tmp;
7907 compare_code = reverse_condition (compare_code);
7908 code = reverse_condition (code);
7909 }
7910 diff = ct - cf;
7911
e075ae69 7912 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
a500c31b 7913 || reg_overlap_mentioned_p (out, ix86_compare_op1))
635559ab 7914 tmp = gen_reg_rtx (mode);
e075ae69
RH
7915
7916 emit_insn (compare_seq);
635559ab 7917 if (mode == DImode)
14f73b5a
JH
7918 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
7919 else
7920 emit_insn (gen_x86_movsicc_0_m1 (tmp));
e075ae69 7921
36583fea
JH
7922 if (diff == 1)
7923 {
7924 /*
7925 * cmpl op0,op1
7926 * sbbl dest,dest
7927 * [addl dest, ct]
7928 *
7929 * Size 5 - 8.
7930 */
7931 if (ct)
635559ab
JH
7932 tmp = expand_simple_binop (mode, PLUS,
7933 tmp, GEN_INT (ct),
7934 tmp, 1, OPTAB_DIRECT);
36583fea
JH
7935 }
7936 else if (cf == -1)
7937 {
7938 /*
7939 * cmpl op0,op1
7940 * sbbl dest,dest
7941 * orl $ct, dest
7942 *
7943 * Size 8.
7944 */
635559ab
JH
7945 tmp = expand_simple_binop (mode, IOR,
7946 tmp, GEN_INT (ct),
7947 tmp, 1, OPTAB_DIRECT);
36583fea
JH
7948 }
7949 else if (diff == -1 && ct)
7950 {
7951 /*
7952 * cmpl op0,op1
7953 * sbbl dest,dest
7954 * xorl $-1, dest
7955 * [addl dest, cf]
7956 *
7957 * Size 8 - 11.
7958 */
635559ab
JH
7959 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
7960 if (cf)
7961 tmp = expand_simple_binop (mode, PLUS,
7962 tmp, GEN_INT (cf),
7963 tmp, 1, OPTAB_DIRECT);
36583fea
JH
7964 }
7965 else
7966 {
7967 /*
7968 * cmpl op0,op1
7969 * sbbl dest,dest
7970 * andl cf - ct, dest
7971 * [addl dest, ct]
7972 *
7973 * Size 8 - 11.
7974 */
635559ab
JH
7975 tmp = expand_simple_binop (mode, AND,
7976 tmp,
7977 GEN_INT (trunc_int_for_mode
7978 (cf - ct, mode)),
7979 tmp, 1, OPTAB_DIRECT);
7980 if (ct)
7981 tmp = expand_simple_binop (mode, PLUS,
7982 tmp, GEN_INT (ct),
7983 tmp, 1, OPTAB_DIRECT);
36583fea 7984 }
e075ae69
RH
7985
7986 if (tmp != out)
7987 emit_move_insn (out, tmp);
7988
7989 return 1; /* DONE */
7990 }
7991
7992 diff = ct - cf;
7993 if (diff < 0)
7994 {
7995 HOST_WIDE_INT tmp;
7996 tmp = ct, ct = cf, cf = tmp;
7997 diff = -diff;
734dba19
JH
7998 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
7999 {
8000 /* We may be reversing unordered compare to normal compare, that
8001 is not valid in general (we may convert non-trapping condition
8002 to trapping one), however on i386 we currently emit all
8003 comparisons unordered. */
8004 compare_code = reverse_condition_maybe_unordered (compare_code);
8005 code = reverse_condition_maybe_unordered (code);
8006 }
8007 else
8008 {
8009 compare_code = reverse_condition (compare_code);
8010 code = reverse_condition (code);
8011 }
e075ae69 8012 }
635559ab
JH
8013 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
8014 || diff == 3 || diff == 5 || diff == 9)
8015 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
e075ae69
RH
8016 {
8017 /*
8018 * xorl dest,dest
8019 * cmpl op1,op2
8020 * setcc dest
8021 * lea cf(dest*(ct-cf)),dest
8022 *
8023 * Size 14.
8024 *
8025 * This also catches the degenerate setcc-only case.
8026 */
8027
8028 rtx tmp;
8029 int nops;
8030
8031 out = emit_store_flag (out, code, ix86_compare_op0,
8032 ix86_compare_op1, VOIDmode, 0, 1);
8033
8034 nops = 0;
885a70fd
JH
8035 /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics
8036 done in proper mode to match. */
e075ae69 8037 if (diff == 1)
14f73b5a 8038 tmp = out;
e075ae69
RH
8039 else
8040 {
885a70fd 8041 rtx out1;
14f73b5a 8042 out1 = out;
635559ab 8043 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
e075ae69
RH
8044 nops++;
8045 if (diff & 1)
8046 {
635559ab 8047 tmp = gen_rtx_PLUS (mode, tmp, out1);
e075ae69
RH
8048 nops++;
8049 }
8050 }
8051 if (cf != 0)
8052 {
635559ab 8053 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
e075ae69
RH
8054 nops++;
8055 }
885a70fd
JH
8056 if (tmp != out
8057 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
e075ae69 8058 {
14f73b5a 8059 if (nops == 1)
e075ae69
RH
8060 {
8061 rtx clob;
8062
8063 clob = gen_rtx_REG (CCmode, FLAGS_REG);
8064 clob = gen_rtx_CLOBBER (VOIDmode, clob);
8065
8066 tmp = gen_rtx_SET (VOIDmode, out, tmp);
8067 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8068 emit_insn (tmp);
8069 }
8070 else
8071 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
8072 }
8073 if (out != operands[0])
8074 emit_move_insn (operands[0], out);
8075
8076 return 1; /* DONE */
8077 }
8078
8079 /*
8080 * General case: Jumpful:
8081 * xorl dest,dest cmpl op1, op2
8082 * cmpl op1, op2 movl ct, dest
8083 * setcc dest jcc 1f
8084 * decl dest movl cf, dest
8085 * andl (cf-ct),dest 1:
8086 * addl ct,dest
0f290768 8087 *
e075ae69
RH
8088 * Size 20. Size 14.
8089 *
8090 * This is reasonably steep, but branch mispredict costs are
8091 * high on modern cpus, so consider failing only if optimizing
8092 * for space.
8093 *
8094 * %%% Parameterize branch_cost on the tuning architecture, then
8095 * use that. The 80386 couldn't care less about mispredicts.
8096 */
8097
8098 if (!optimize_size && !TARGET_CMOVE)
8099 {
8100 if (ct == 0)
8101 {
8102 ct = cf;
8103 cf = 0;
734dba19
JH
8104 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8105 {
8106 /* We may be reversing unordered compare to normal compare,
8107 that is not valid in general (we may convert non-trapping
8108 condition to trapping one), however on i386 we currently
8109 emit all comparisons unordered. */
8110 compare_code = reverse_condition_maybe_unordered (compare_code);
8111 code = reverse_condition_maybe_unordered (code);
8112 }
8113 else
8114 {
8115 compare_code = reverse_condition (compare_code);
8116 code = reverse_condition (code);
8117 }
e075ae69
RH
8118 }
8119
8120 out = emit_store_flag (out, code, ix86_compare_op0,
8121 ix86_compare_op1, VOIDmode, 0, 1);
8122
635559ab
JH
8123 out = expand_simple_binop (mode, PLUS,
8124 out, constm1_rtx,
8125 out, 1, OPTAB_DIRECT);
8126 out = expand_simple_binop (mode, AND,
8127 out,
8128 GEN_INT (trunc_int_for_mode
8129 (cf - ct, mode)),
8130 out, 1, OPTAB_DIRECT);
8131 out = expand_simple_binop (mode, PLUS,
8132 out, GEN_INT (ct),
8133 out, 1, OPTAB_DIRECT);
e075ae69
RH
8134 if (out != operands[0])
8135 emit_move_insn (operands[0], out);
8136
8137 return 1; /* DONE */
8138 }
8139 }
8140
8141 if (!TARGET_CMOVE)
8142 {
8143 /* Try a few things more with specific constants and a variable. */
8144
78a0d70c 8145 optab op;
e075ae69
RH
8146 rtx var, orig_out, out, tmp;
8147
8148 if (optimize_size)
8149 return 0; /* FAIL */
8150
0f290768 8151 /* If one of the two operands is an interesting constant, load a
e075ae69 8152 constant with the above and mask it in with a logical operation. */
0f290768 8153
e075ae69
RH
8154 if (GET_CODE (operands[2]) == CONST_INT)
8155 {
8156 var = operands[3];
8157 if (INTVAL (operands[2]) == 0)
8158 operands[3] = constm1_rtx, op = and_optab;
8159 else if (INTVAL (operands[2]) == -1)
8160 operands[3] = const0_rtx, op = ior_optab;
78a0d70c
ZW
8161 else
8162 return 0; /* FAIL */
e075ae69
RH
8163 }
8164 else if (GET_CODE (operands[3]) == CONST_INT)
8165 {
8166 var = operands[2];
8167 if (INTVAL (operands[3]) == 0)
8168 operands[2] = constm1_rtx, op = and_optab;
8169 else if (INTVAL (operands[3]) == -1)
8170 operands[2] = const0_rtx, op = ior_optab;
78a0d70c
ZW
8171 else
8172 return 0; /* FAIL */
e075ae69 8173 }
78a0d70c 8174 else
e075ae69
RH
8175 return 0; /* FAIL */
8176
8177 orig_out = operands[0];
635559ab 8178 tmp = gen_reg_rtx (mode);
e075ae69
RH
8179 operands[0] = tmp;
8180
8181 /* Recurse to get the constant loaded. */
8182 if (ix86_expand_int_movcc (operands) == 0)
8183 return 0; /* FAIL */
8184
8185 /* Mask in the interesting variable. */
635559ab 8186 out = expand_binop (mode, op, var, tmp, orig_out, 0,
e075ae69
RH
8187 OPTAB_WIDEN);
8188 if (out != orig_out)
8189 emit_move_insn (orig_out, out);
8190
8191 return 1; /* DONE */
8192 }
8193
8194 /*
8195 * For comparison with above,
8196 *
8197 * movl cf,dest
8198 * movl ct,tmp
8199 * cmpl op1,op2
8200 * cmovcc tmp,dest
8201 *
8202 * Size 15.
8203 */
8204
635559ab
JH
8205 if (! nonimmediate_operand (operands[2], mode))
8206 operands[2] = force_reg (mode, operands[2]);
8207 if (! nonimmediate_operand (operands[3], mode))
8208 operands[3] = force_reg (mode, operands[3]);
e075ae69 8209
a1b8572c
JH
8210 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
8211 {
635559ab 8212 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
8213 emit_move_insn (tmp, operands[3]);
8214 operands[3] = tmp;
8215 }
8216 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
8217 {
635559ab 8218 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
8219 emit_move_insn (tmp, operands[2]);
8220 operands[2] = tmp;
8221 }
c9682caf
JH
8222 if (! register_operand (operands[2], VOIDmode)
8223 && ! register_operand (operands[3], VOIDmode))
635559ab 8224 operands[2] = force_reg (mode, operands[2]);
a1b8572c 8225
e075ae69
RH
8226 emit_insn (compare_seq);
8227 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 8228 gen_rtx_IF_THEN_ELSE (mode,
e075ae69
RH
8229 compare_op, operands[2],
8230 operands[3])));
a1b8572c
JH
8231 if (bypass_test)
8232 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 8233 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c
JH
8234 bypass_test,
8235 operands[3],
8236 operands[0])));
8237 if (second_test)
8238 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 8239 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c
JH
8240 second_test,
8241 operands[2],
8242 operands[0])));
e075ae69
RH
8243
8244 return 1; /* DONE */
e9a25f70 8245}
e075ae69 8246
32b5b1aa 8247int
e075ae69
RH
8248ix86_expand_fp_movcc (operands)
8249 rtx operands[];
32b5b1aa 8250{
e075ae69 8251 enum rtx_code code;
e075ae69 8252 rtx tmp;
a1b8572c 8253 rtx compare_op, second_test, bypass_test;
32b5b1aa 8254
0073023d
JH
8255 /* For SF/DFmode conditional moves based on comparisons
8256 in same mode, we may want to use SSE min/max instructions. */
965f5423
JH
8257 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
8258 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
0073023d 8259 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
fa9f36a1
JH
8260 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
8261 && (!TARGET_IEEE_FP
8262 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
0073023d
JH
8263 /* We may be called from the post-reload splitter. */
8264 && (!REG_P (operands[0])
8265 || SSE_REG_P (operands[0])
52a661a6 8266 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
0073023d
JH
8267 {
8268 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
8269 code = GET_CODE (operands[1]);
8270
8271 /* See if we have (cross) match between comparison operands and
8272 conditional move operands. */
8273 if (rtx_equal_p (operands[2], op1))
8274 {
8275 rtx tmp = op0;
8276 op0 = op1;
8277 op1 = tmp;
8278 code = reverse_condition_maybe_unordered (code);
8279 }
8280 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
8281 {
8282 /* Check for min operation. */
8283 if (code == LT)
8284 {
8285 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
8286 if (memory_operand (op0, VOIDmode))
8287 op0 = force_reg (GET_MODE (operands[0]), op0);
8288 if (GET_MODE (operands[0]) == SFmode)
8289 emit_insn (gen_minsf3 (operands[0], op0, op1));
8290 else
8291 emit_insn (gen_mindf3 (operands[0], op0, op1));
8292 return 1;
8293 }
8294 /* Check for max operation. */
8295 if (code == GT)
8296 {
8297 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
8298 if (memory_operand (op0, VOIDmode))
8299 op0 = force_reg (GET_MODE (operands[0]), op0);
8300 if (GET_MODE (operands[0]) == SFmode)
8301 emit_insn (gen_maxsf3 (operands[0], op0, op1));
8302 else
8303 emit_insn (gen_maxdf3 (operands[0], op0, op1));
8304 return 1;
8305 }
8306 }
8307 /* Manage condition to be sse_comparison_operator. In case we are
8308 in non-ieee mode, try to canonicalize the destination operand
8309 to be first in the comparison - this helps reload to avoid extra
8310 moves. */
8311 if (!sse_comparison_operator (operands[1], VOIDmode)
8312 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
8313 {
8314 rtx tmp = ix86_compare_op0;
8315 ix86_compare_op0 = ix86_compare_op1;
8316 ix86_compare_op1 = tmp;
8317 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
8318 VOIDmode, ix86_compare_op0,
8319 ix86_compare_op1);
8320 }
8321 /* Similary try to manage result to be first operand of conditional
fa9f36a1
JH
8322 move. We also don't support the NE comparison on SSE, so try to
8323 avoid it. */
037f20f1
JH
8324 if ((rtx_equal_p (operands[0], operands[3])
8325 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
8326 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
0073023d
JH
8327 {
8328 rtx tmp = operands[2];
8329 operands[2] = operands[3];
92d0fb09 8330 operands[3] = tmp;
0073023d
JH
8331 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
8332 (GET_CODE (operands[1])),
8333 VOIDmode, ix86_compare_op0,
8334 ix86_compare_op1);
8335 }
8336 if (GET_MODE (operands[0]) == SFmode)
8337 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
8338 operands[2], operands[3],
8339 ix86_compare_op0, ix86_compare_op1));
8340 else
8341 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
8342 operands[2], operands[3],
8343 ix86_compare_op0, ix86_compare_op1));
8344 return 1;
8345 }
8346
e075ae69 8347 /* The floating point conditional move instructions don't directly
0f290768 8348 support conditions resulting from a signed integer comparison. */
32b5b1aa 8349
e075ae69 8350 code = GET_CODE (operands[1]);
a1b8572c 8351 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9e7adcb3
JH
8352
8353 /* The floating point conditional move instructions don't directly
8354 support signed integer comparisons. */
8355
a1b8572c 8356 if (!fcmov_comparison_operator (compare_op, VOIDmode))
e075ae69 8357 {
a1b8572c
JH
8358 if (second_test != NULL || bypass_test != NULL)
8359 abort();
e075ae69 8360 tmp = gen_reg_rtx (QImode);
3a3677ff 8361 ix86_expand_setcc (code, tmp);
e075ae69
RH
8362 code = NE;
8363 ix86_compare_op0 = tmp;
8364 ix86_compare_op1 = const0_rtx;
a1b8572c
JH
8365 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8366 }
8367 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
8368 {
8369 tmp = gen_reg_rtx (GET_MODE (operands[0]));
8370 emit_move_insn (tmp, operands[3]);
8371 operands[3] = tmp;
8372 }
8373 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
8374 {
8375 tmp = gen_reg_rtx (GET_MODE (operands[0]));
8376 emit_move_insn (tmp, operands[2]);
8377 operands[2] = tmp;
e075ae69 8378 }
e9a25f70 8379
e075ae69
RH
8380 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8381 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9e7adcb3 8382 compare_op,
e075ae69
RH
8383 operands[2],
8384 operands[3])));
a1b8572c
JH
8385 if (bypass_test)
8386 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8387 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8388 bypass_test,
8389 operands[3],
8390 operands[0])));
8391 if (second_test)
8392 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8393 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8394 second_test,
8395 operands[2],
8396 operands[0])));
32b5b1aa 8397
e075ae69 8398 return 1;
32b5b1aa
SC
8399}
8400
2450a057
JH
8401/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
8402 works for floating pointer parameters and nonoffsetable memories.
8403 For pushes, it returns just stack offsets; the values will be saved
8404 in the right order. Maximally three parts are generated. */
8405
2b589241 8406static int
2450a057
JH
8407ix86_split_to_parts (operand, parts, mode)
8408 rtx operand;
8409 rtx *parts;
8410 enum machine_mode mode;
32b5b1aa 8411{
26e5b205
JH
8412 int size;
8413
8414 if (!TARGET_64BIT)
8415 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
8416 else
8417 size = (GET_MODE_SIZE (mode) + 4) / 8;
2450a057 8418
a7180f70
BS
8419 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
8420 abort ();
2450a057
JH
8421 if (size < 2 || size > 3)
8422 abort ();
8423
d7a29404
JH
8424 /* Optimize constant pool reference to immediates. This is used by fp moves,
8425 that force all constants to memory to allow combining. */
8426
8427 if (GET_CODE (operand) == MEM
8428 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
8429 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
8430 operand = get_pool_constant (XEXP (operand, 0));
8431
2450a057 8432 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
e075ae69 8433 {
2450a057
JH
8434 /* The only non-offsetable memories we handle are pushes. */
8435 if (! push_operand (operand, VOIDmode))
8436 abort ();
8437
26e5b205
JH
8438 operand = copy_rtx (operand);
8439 PUT_MODE (operand, Pmode);
2450a057
JH
8440 parts[0] = parts[1] = parts[2] = operand;
8441 }
26e5b205 8442 else if (!TARGET_64BIT)
2450a057
JH
8443 {
8444 if (mode == DImode)
8445 split_di (&operand, 1, &parts[0], &parts[1]);
8446 else
e075ae69 8447 {
2450a057
JH
8448 if (REG_P (operand))
8449 {
8450 if (!reload_completed)
8451 abort ();
8452 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
8453 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
8454 if (size == 3)
8455 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
8456 }
8457 else if (offsettable_memref_p (operand))
8458 {
f4ef873c 8459 operand = adjust_address (operand, SImode, 0);
2450a057 8460 parts[0] = operand;
b72f00af 8461 parts[1] = adjust_address (operand, SImode, 4);
2450a057 8462 if (size == 3)
b72f00af 8463 parts[2] = adjust_address (operand, SImode, 8);
2450a057
JH
8464 }
8465 else if (GET_CODE (operand) == CONST_DOUBLE)
8466 {
8467 REAL_VALUE_TYPE r;
2b589241 8468 long l[4];
2450a057
JH
8469
8470 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
8471 switch (mode)
8472 {
8473 case XFmode:
2b589241 8474 case TFmode:
2450a057 8475 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
523fbd9d 8476 parts[2] = GEN_INT (trunc_int_for_mode (l[2], SImode));
2450a057
JH
8477 break;
8478 case DFmode:
8479 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
8480 break;
8481 default:
8482 abort ();
8483 }
523fbd9d
RK
8484 parts[1] = GEN_INT (trunc_int_for_mode (l[1], SImode));
8485 parts[0] = GEN_INT (trunc_int_for_mode (l[0], SImode));
2450a057
JH
8486 }
8487 else
8488 abort ();
e075ae69 8489 }
2450a057 8490 }
26e5b205
JH
8491 else
8492 {
44cf5b6a
JH
8493 if (mode == TImode)
8494 split_ti (&operand, 1, &parts[0], &parts[1]);
26e5b205
JH
8495 if (mode == XFmode || mode == TFmode)
8496 {
8497 if (REG_P (operand))
8498 {
8499 if (!reload_completed)
8500 abort ();
8501 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
8502 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
8503 }
8504 else if (offsettable_memref_p (operand))
8505 {
b72f00af 8506 operand = adjust_address (operand, DImode, 0);
26e5b205 8507 parts[0] = operand;
b72f00af 8508 parts[1] = adjust_address (operand, SImode, 8);
26e5b205
JH
8509 }
8510 else if (GET_CODE (operand) == CONST_DOUBLE)
8511 {
8512 REAL_VALUE_TYPE r;
8513 long l[3];
8514
8515 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
8516 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
8517 /* Do not use shift by 32 to avoid warning on 32bit systems. */
8518 if (HOST_BITS_PER_WIDE_INT >= 64)
523fbd9d 8519 parts[0]
44cf5b6a
JH
8520 = GEN_INT (trunc_int_for_mode
8521 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
8522 + ((((HOST_WIDE_INT)l[1]) << 31) << 1),
8523 DImode));
26e5b205
JH
8524 else
8525 parts[0] = immed_double_const (l[0], l[1], DImode);
523fbd9d 8526 parts[1] = GEN_INT (trunc_int_for_mode (l[2], SImode));
26e5b205
JH
8527 }
8528 else
8529 abort ();
8530 }
8531 }
2450a057 8532
2b589241 8533 return size;
2450a057
JH
8534}
8535
8536/* Emit insns to perform a move or push of DI, DF, and XF values.
8537 Return false when normal moves are needed; true when all required
8538 insns have been emitted. Operands 2-4 contain the input values
8539 int the correct order; operands 5-7 contain the output values. */
8540
26e5b205
JH
8541void
8542ix86_split_long_move (operands)
8543 rtx operands[];
2450a057
JH
8544{
8545 rtx part[2][3];
26e5b205 8546 int nparts;
2450a057
JH
8547 int push = 0;
8548 int collisions = 0;
26e5b205
JH
8549 enum machine_mode mode = GET_MODE (operands[0]);
8550
8551 /* The DFmode expanders may ask us to move double.
8552 For 64bit target this is single move. By hiding the fact
8553 here we simplify i386.md splitters. */
8554 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
8555 {
8cdfa312
RH
8556 /* Optimize constant pool reference to immediates. This is used by
8557 fp moves, that force all constants to memory to allow combining. */
26e5b205
JH
8558
8559 if (GET_CODE (operands[1]) == MEM
8560 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
8561 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
8562 operands[1] = get_pool_constant (XEXP (operands[1], 0));
8563 if (push_operand (operands[0], VOIDmode))
b47b4f21
JH
8564 {
8565 operands[0] = copy_rtx (operands[0]);
8566 PUT_MODE (operands[0], Pmode);
8567 }
26e5b205
JH
8568 else
8569 operands[0] = gen_lowpart (DImode, operands[0]);
8570 operands[1] = gen_lowpart (DImode, operands[1]);
8571 emit_move_insn (operands[0], operands[1]);
8572 return;
8573 }
2450a057 8574
2450a057
JH
8575 /* The only non-offsettable memory we handle is push. */
8576 if (push_operand (operands[0], VOIDmode))
8577 push = 1;
8578 else if (GET_CODE (operands[0]) == MEM
8579 && ! offsettable_memref_p (operands[0]))
8580 abort ();
8581
26e5b205
JH
8582 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
8583 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
2450a057
JH
8584
8585 /* When emitting push, take care for source operands on the stack. */
8586 if (push && GET_CODE (operands[1]) == MEM
8587 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
8588 {
26e5b205 8589 if (nparts == 3)
886cbb88
JH
8590 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
8591 XEXP (part[1][2], 0));
8592 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
8593 XEXP (part[1][1], 0));
2450a057
JH
8594 }
8595
0f290768 8596 /* We need to do copy in the right order in case an address register
2450a057
JH
8597 of the source overlaps the destination. */
8598 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
8599 {
8600 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
8601 collisions++;
8602 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
8603 collisions++;
26e5b205 8604 if (nparts == 3
2450a057
JH
8605 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
8606 collisions++;
8607
8608 /* Collision in the middle part can be handled by reordering. */
26e5b205 8609 if (collisions == 1 && nparts == 3
2450a057 8610 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
e075ae69 8611 {
2450a057
JH
8612 rtx tmp;
8613 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
8614 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
8615 }
e075ae69 8616
2450a057
JH
8617 /* If there are more collisions, we can't handle it by reordering.
8618 Do an lea to the last part and use only one colliding move. */
8619 else if (collisions > 1)
8620 {
8621 collisions = 1;
26e5b205 8622 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
2450a057 8623 XEXP (part[1][0], 0)));
26e5b205
JH
8624 part[1][0] = change_address (part[1][0],
8625 TARGET_64BIT ? DImode : SImode,
8626 part[0][nparts - 1]);
b72f00af 8627 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
26e5b205 8628 if (nparts == 3)
b72f00af 8629 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
2450a057
JH
8630 }
8631 }
8632
8633 if (push)
8634 {
26e5b205 8635 if (!TARGET_64BIT)
2b589241 8636 {
26e5b205
JH
8637 if (nparts == 3)
8638 {
8639 /* We use only first 12 bytes of TFmode value, but for pushing we
8640 are required to adjust stack as if we were pushing real 16byte
8641 value. */
8642 if (mode == TFmode && !TARGET_64BIT)
8643 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
8644 GEN_INT (-4)));
8645 emit_move_insn (part[0][2], part[1][2]);
8646 }
2b589241 8647 }
26e5b205
JH
8648 else
8649 {
8650 /* In 64bit mode we don't have 32bit push available. In case this is
8651 register, it is OK - we will just use larger counterpart. We also
8652 retype memory - these comes from attempt to avoid REX prefix on
8653 moving of second half of TFmode value. */
8654 if (GET_MODE (part[1][1]) == SImode)
8655 {
8656 if (GET_CODE (part[1][1]) == MEM)
f4ef873c 8657 part[1][1] = adjust_address (part[1][1], DImode, 0);
26e5b205
JH
8658 else if (REG_P (part[1][1]))
8659 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
8660 else
8661 abort();
886cbb88
JH
8662 if (GET_MODE (part[1][0]) == SImode)
8663 part[1][0] = part[1][1];
26e5b205
JH
8664 }
8665 }
8666 emit_move_insn (part[0][1], part[1][1]);
8667 emit_move_insn (part[0][0], part[1][0]);
8668 return;
2450a057
JH
8669 }
8670
8671 /* Choose correct order to not overwrite the source before it is copied. */
8672 if ((REG_P (part[0][0])
8673 && REG_P (part[1][1])
8674 && (REGNO (part[0][0]) == REGNO (part[1][1])
26e5b205 8675 || (nparts == 3
2450a057
JH
8676 && REGNO (part[0][0]) == REGNO (part[1][2]))))
8677 || (collisions > 0
8678 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
8679 {
26e5b205 8680 if (nparts == 3)
2450a057 8681 {
26e5b205
JH
8682 operands[2] = part[0][2];
8683 operands[3] = part[0][1];
8684 operands[4] = part[0][0];
8685 operands[5] = part[1][2];
8686 operands[6] = part[1][1];
8687 operands[7] = part[1][0];
2450a057
JH
8688 }
8689 else
8690 {
26e5b205
JH
8691 operands[2] = part[0][1];
8692 operands[3] = part[0][0];
8693 operands[5] = part[1][1];
8694 operands[6] = part[1][0];
2450a057
JH
8695 }
8696 }
8697 else
8698 {
26e5b205 8699 if (nparts == 3)
2450a057 8700 {
26e5b205
JH
8701 operands[2] = part[0][0];
8702 operands[3] = part[0][1];
8703 operands[4] = part[0][2];
8704 operands[5] = part[1][0];
8705 operands[6] = part[1][1];
8706 operands[7] = part[1][2];
2450a057
JH
8707 }
8708 else
8709 {
26e5b205
JH
8710 operands[2] = part[0][0];
8711 operands[3] = part[0][1];
8712 operands[5] = part[1][0];
8713 operands[6] = part[1][1];
e075ae69
RH
8714 }
8715 }
26e5b205
JH
8716 emit_move_insn (operands[2], operands[5]);
8717 emit_move_insn (operands[3], operands[6]);
8718 if (nparts == 3)
8719 emit_move_insn (operands[4], operands[7]);
32b5b1aa 8720
26e5b205 8721 return;
32b5b1aa 8722}
32b5b1aa 8723
e075ae69
RH
8724void
8725ix86_split_ashldi (operands, scratch)
8726 rtx *operands, scratch;
32b5b1aa 8727{
e075ae69
RH
8728 rtx low[2], high[2];
8729 int count;
b985a30f 8730
e075ae69
RH
8731 if (GET_CODE (operands[2]) == CONST_INT)
8732 {
8733 split_di (operands, 2, low, high);
8734 count = INTVAL (operands[2]) & 63;
32b5b1aa 8735
e075ae69
RH
8736 if (count >= 32)
8737 {
8738 emit_move_insn (high[0], low[1]);
8739 emit_move_insn (low[0], const0_rtx);
b985a30f 8740
e075ae69
RH
8741 if (count > 32)
8742 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
8743 }
8744 else
8745 {
8746 if (!rtx_equal_p (operands[0], operands[1]))
8747 emit_move_insn (operands[0], operands[1]);
8748 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
8749 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
8750 }
8751 }
8752 else
8753 {
8754 if (!rtx_equal_p (operands[0], operands[1]))
8755 emit_move_insn (operands[0], operands[1]);
b985a30f 8756
e075ae69 8757 split_di (operands, 1, low, high);
b985a30f 8758
e075ae69
RH
8759 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
8760 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
32b5b1aa 8761
fe577e58 8762 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 8763 {
fe577e58 8764 if (! no_new_pseudos)
e075ae69
RH
8765 scratch = force_reg (SImode, const0_rtx);
8766 else
8767 emit_move_insn (scratch, const0_rtx);
8768
8769 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
8770 scratch));
8771 }
8772 else
8773 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
8774 }
e9a25f70 8775}
32b5b1aa 8776
e075ae69
RH
8777void
8778ix86_split_ashrdi (operands, scratch)
8779 rtx *operands, scratch;
32b5b1aa 8780{
e075ae69
RH
8781 rtx low[2], high[2];
8782 int count;
32b5b1aa 8783
e075ae69
RH
8784 if (GET_CODE (operands[2]) == CONST_INT)
8785 {
8786 split_di (operands, 2, low, high);
8787 count = INTVAL (operands[2]) & 63;
32b5b1aa 8788
e075ae69
RH
8789 if (count >= 32)
8790 {
8791 emit_move_insn (low[0], high[1]);
32b5b1aa 8792
e075ae69
RH
8793 if (! reload_completed)
8794 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
8795 else
8796 {
8797 emit_move_insn (high[0], low[0]);
8798 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
8799 }
8800
8801 if (count > 32)
8802 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
8803 }
8804 else
8805 {
8806 if (!rtx_equal_p (operands[0], operands[1]))
8807 emit_move_insn (operands[0], operands[1]);
8808 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
8809 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
8810 }
8811 }
8812 else
32b5b1aa 8813 {
e075ae69
RH
8814 if (!rtx_equal_p (operands[0], operands[1]))
8815 emit_move_insn (operands[0], operands[1]);
8816
8817 split_di (operands, 1, low, high);
8818
8819 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
8820 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
8821
fe577e58 8822 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 8823 {
fe577e58 8824 if (! no_new_pseudos)
e075ae69
RH
8825 scratch = gen_reg_rtx (SImode);
8826 emit_move_insn (scratch, high[0]);
8827 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
8828 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
8829 scratch));
8830 }
8831 else
8832 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
32b5b1aa 8833 }
e075ae69 8834}
32b5b1aa 8835
e075ae69
RH
8836void
8837ix86_split_lshrdi (operands, scratch)
8838 rtx *operands, scratch;
8839{
8840 rtx low[2], high[2];
8841 int count;
32b5b1aa 8842
e075ae69 8843 if (GET_CODE (operands[2]) == CONST_INT)
32b5b1aa 8844 {
e075ae69
RH
8845 split_di (operands, 2, low, high);
8846 count = INTVAL (operands[2]) & 63;
8847
8848 if (count >= 32)
c7271385 8849 {
e075ae69
RH
8850 emit_move_insn (low[0], high[1]);
8851 emit_move_insn (high[0], const0_rtx);
32b5b1aa 8852
e075ae69
RH
8853 if (count > 32)
8854 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
8855 }
8856 else
8857 {
8858 if (!rtx_equal_p (operands[0], operands[1]))
8859 emit_move_insn (operands[0], operands[1]);
8860 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
8861 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
8862 }
32b5b1aa 8863 }
e075ae69
RH
8864 else
8865 {
8866 if (!rtx_equal_p (operands[0], operands[1]))
8867 emit_move_insn (operands[0], operands[1]);
32b5b1aa 8868
e075ae69
RH
8869 split_di (operands, 1, low, high);
8870
8871 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
8872 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
8873
8874 /* Heh. By reversing the arguments, we can reuse this pattern. */
fe577e58 8875 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 8876 {
fe577e58 8877 if (! no_new_pseudos)
e075ae69
RH
8878 scratch = force_reg (SImode, const0_rtx);
8879 else
8880 emit_move_insn (scratch, const0_rtx);
8881
8882 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
8883 scratch));
8884 }
8885 else
8886 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
8887 }
32b5b1aa 8888}
3f803cd9 8889
0407c02b 8890/* Helper function for the string operations below. Dest VARIABLE whether
0945b39d
JH
8891 it is aligned to VALUE bytes. If true, jump to the label. */
8892static rtx
8893ix86_expand_aligntest (variable, value)
8894 rtx variable;
8895 int value;
8896{
8897 rtx label = gen_label_rtx ();
8898 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
8899 if (GET_MODE (variable) == DImode)
8900 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
8901 else
8902 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
8903 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
d43e0b7d 8904 1, label);
0945b39d
JH
8905 return label;
8906}
8907
8908/* Adjust COUNTER by the VALUE. */
8909static void
8910ix86_adjust_counter (countreg, value)
8911 rtx countreg;
8912 HOST_WIDE_INT value;
8913{
8914 if (GET_MODE (countreg) == DImode)
8915 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
8916 else
8917 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
8918}
8919
8920/* Zero extend possibly SImode EXP to Pmode register. */
d24b3457 8921rtx
0945b39d
JH
8922ix86_zero_extend_to_Pmode (exp)
8923 rtx exp;
8924{
8925 rtx r;
8926 if (GET_MODE (exp) == VOIDmode)
8927 return force_reg (Pmode, exp);
8928 if (GET_MODE (exp) == Pmode)
8929 return copy_to_mode_reg (Pmode, exp);
8930 r = gen_reg_rtx (Pmode);
8931 emit_insn (gen_zero_extendsidi2 (r, exp));
8932 return r;
8933}
8934
8935/* Expand string move (memcpy) operation. Use i386 string operations when
8936 profitable. expand_clrstr contains similar code. */
8937int
8938ix86_expand_movstr (dst, src, count_exp, align_exp)
8939 rtx dst, src, count_exp, align_exp;
8940{
8941 rtx srcreg, destreg, countreg;
8942 enum machine_mode counter_mode;
8943 HOST_WIDE_INT align = 0;
8944 unsigned HOST_WIDE_INT count = 0;
8945 rtx insns;
8946
8947 start_sequence ();
8948
8949 if (GET_CODE (align_exp) == CONST_INT)
8950 align = INTVAL (align_exp);
8951
5519a4f9 8952 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
8953 if (!TARGET_ALIGN_STRINGOPS)
8954 align = 64;
8955
8956 if (GET_CODE (count_exp) == CONST_INT)
8957 count = INTVAL (count_exp);
8958
8959 /* Figure out proper mode for counter. For 32bits it is always SImode,
8960 for 64bits use SImode when possible, otherwise DImode.
8961 Set count to number of bytes copied when known at compile time. */
8962 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
8963 || x86_64_zero_extended_value (count_exp))
8964 counter_mode = SImode;
8965 else
8966 counter_mode = DImode;
8967
8968 if (counter_mode != SImode && counter_mode != DImode)
8969 abort ();
8970
8971 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
8972 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
8973
8974 emit_insn (gen_cld ());
8975
8976 /* When optimizing for size emit simple rep ; movsb instruction for
8977 counts not divisible by 4. */
8978
8979 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
8980 {
8981 countreg = ix86_zero_extend_to_Pmode (count_exp);
8982 if (TARGET_64BIT)
8983 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
8984 destreg, srcreg, countreg));
8985 else
8986 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
8987 destreg, srcreg, countreg));
8988 }
8989
8990 /* For constant aligned (or small unaligned) copies use rep movsl
8991 followed by code copying the rest. For PentiumPro ensure 8 byte
8992 alignment to allow rep movsl acceleration. */
8993
8994 else if (count != 0
8995 && (align >= 8
8996 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
8997 || optimize_size || count < (unsigned int)64))
8998 {
8999 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9000 if (count & ~(size - 1))
9001 {
9002 countreg = copy_to_mode_reg (counter_mode,
9003 GEN_INT ((count >> (size == 4 ? 2 : 3))
9004 & (TARGET_64BIT ? -1 : 0x3fffffff)));
9005 countreg = ix86_zero_extend_to_Pmode (countreg);
9006 if (size == 4)
9007 {
9008 if (TARGET_64BIT)
9009 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
9010 destreg, srcreg, countreg));
9011 else
9012 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
9013 destreg, srcreg, countreg));
9014 }
9015 else
9016 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
9017 destreg, srcreg, countreg));
9018 }
9019 if (size == 8 && (count & 0x04))
9020 emit_insn (gen_strmovsi (destreg, srcreg));
9021 if (count & 0x02)
9022 emit_insn (gen_strmovhi (destreg, srcreg));
9023 if (count & 0x01)
9024 emit_insn (gen_strmovqi (destreg, srcreg));
9025 }
9026 /* The generic code based on the glibc implementation:
9027 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
9028 allowing accelerated copying there)
9029 - copy the data using rep movsl
9030 - copy the rest. */
9031 else
9032 {
9033 rtx countreg2;
9034 rtx label = NULL;
9035
9036 /* In case we don't know anything about the alignment, default to
9037 library version, since it is usually equally fast and result in
9038 shorter code. */
9039 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9040 {
9041 end_sequence ();
9042 return 0;
9043 }
9044
9045 if (TARGET_SINGLE_STRINGOP)
9046 emit_insn (gen_cld ());
9047
9048 countreg2 = gen_reg_rtx (Pmode);
9049 countreg = copy_to_mode_reg (counter_mode, count_exp);
9050
9051 /* We don't use loops to align destination and to copy parts smaller
9052 than 4 bytes, because gcc is able to optimize such code better (in
9053 the case the destination or the count really is aligned, gcc is often
9054 able to predict the branches) and also it is friendlier to the
a4f31c00 9055 hardware branch prediction.
0945b39d
JH
9056
9057 Using loops is benefical for generic case, because we can
9058 handle small counts using the loops. Many CPUs (such as Athlon)
9059 have large REP prefix setup costs.
9060
9061 This is quite costy. Maybe we can revisit this decision later or
9062 add some customizability to this code. */
9063
9064 if (count == 0
9065 && align < (TARGET_PENTIUMPRO && (count == 0
9066 || count >= (unsigned int)260)
9067 ? 8 : UNITS_PER_WORD))
9068 {
9069 label = gen_label_rtx ();
9070 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
d43e0b7d 9071 LEU, 0, counter_mode, 1, label);
0945b39d
JH
9072 }
9073 if (align <= 1)
9074 {
9075 rtx label = ix86_expand_aligntest (destreg, 1);
9076 emit_insn (gen_strmovqi (destreg, srcreg));
9077 ix86_adjust_counter (countreg, 1);
9078 emit_label (label);
9079 LABEL_NUSES (label) = 1;
9080 }
9081 if (align <= 2)
9082 {
9083 rtx label = ix86_expand_aligntest (destreg, 2);
9084 emit_insn (gen_strmovhi (destreg, srcreg));
9085 ix86_adjust_counter (countreg, 2);
9086 emit_label (label);
9087 LABEL_NUSES (label) = 1;
9088 }
9089 if (align <= 4
9090 && ((TARGET_PENTIUMPRO && (count == 0
9091 || count >= (unsigned int)260))
9092 || TARGET_64BIT))
9093 {
9094 rtx label = ix86_expand_aligntest (destreg, 4);
9095 emit_insn (gen_strmovsi (destreg, srcreg));
9096 ix86_adjust_counter (countreg, 4);
9097 emit_label (label);
9098 LABEL_NUSES (label) = 1;
9099 }
9100
9101 if (!TARGET_SINGLE_STRINGOP)
9102 emit_insn (gen_cld ());
9103 if (TARGET_64BIT)
9104 {
9105 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9106 GEN_INT (3)));
9107 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
9108 destreg, srcreg, countreg2));
9109 }
9110 else
9111 {
9112 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9113 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
9114 destreg, srcreg, countreg2));
9115 }
9116
9117 if (label)
9118 {
9119 emit_label (label);
9120 LABEL_NUSES (label) = 1;
9121 }
9122 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9123 emit_insn (gen_strmovsi (destreg, srcreg));
9124 if ((align <= 4 || count == 0) && TARGET_64BIT)
9125 {
9126 rtx label = ix86_expand_aligntest (countreg, 4);
9127 emit_insn (gen_strmovsi (destreg, srcreg));
9128 emit_label (label);
9129 LABEL_NUSES (label) = 1;
9130 }
9131 if (align > 2 && count != 0 && (count & 2))
9132 emit_insn (gen_strmovhi (destreg, srcreg));
9133 if (align <= 2 || count == 0)
9134 {
9135 rtx label = ix86_expand_aligntest (countreg, 2);
9136 emit_insn (gen_strmovhi (destreg, srcreg));
9137 emit_label (label);
9138 LABEL_NUSES (label) = 1;
9139 }
9140 if (align > 1 && count != 0 && (count & 1))
9141 emit_insn (gen_strmovqi (destreg, srcreg));
9142 if (align <= 1 || count == 0)
9143 {
9144 rtx label = ix86_expand_aligntest (countreg, 1);
9145 emit_insn (gen_strmovqi (destreg, srcreg));
9146 emit_label (label);
9147 LABEL_NUSES (label) = 1;
9148 }
9149 }
9150
9151 insns = get_insns ();
9152 end_sequence ();
9153
9154 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
9155 emit_insns (insns);
9156 return 1;
9157}
9158
9159/* Expand string clear operation (bzero). Use i386 string operations when
9160 profitable. expand_movstr contains similar code. */
9161int
9162ix86_expand_clrstr (src, count_exp, align_exp)
9163 rtx src, count_exp, align_exp;
9164{
9165 rtx destreg, zeroreg, countreg;
9166 enum machine_mode counter_mode;
9167 HOST_WIDE_INT align = 0;
9168 unsigned HOST_WIDE_INT count = 0;
9169
9170 if (GET_CODE (align_exp) == CONST_INT)
9171 align = INTVAL (align_exp);
9172
5519a4f9 9173 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
9174 if (!TARGET_ALIGN_STRINGOPS)
9175 align = 32;
9176
9177 if (GET_CODE (count_exp) == CONST_INT)
9178 count = INTVAL (count_exp);
9179 /* Figure out proper mode for counter. For 32bits it is always SImode,
9180 for 64bits use SImode when possible, otherwise DImode.
9181 Set count to number of bytes copied when known at compile time. */
9182 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9183 || x86_64_zero_extended_value (count_exp))
9184 counter_mode = SImode;
9185 else
9186 counter_mode = DImode;
9187
9188 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9189
9190 emit_insn (gen_cld ());
9191
9192 /* When optimizing for size emit simple rep ; movsb instruction for
9193 counts not divisible by 4. */
9194
9195 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9196 {
9197 countreg = ix86_zero_extend_to_Pmode (count_exp);
9198 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
9199 if (TARGET_64BIT)
9200 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
9201 destreg, countreg));
9202 else
9203 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
9204 destreg, countreg));
9205 }
9206 else if (count != 0
9207 && (align >= 8
9208 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
9209 || optimize_size || count < (unsigned int)64))
9210 {
9211 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9212 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
9213 if (count & ~(size - 1))
9214 {
9215 countreg = copy_to_mode_reg (counter_mode,
9216 GEN_INT ((count >> (size == 4 ? 2 : 3))
9217 & (TARGET_64BIT ? -1 : 0x3fffffff)));
9218 countreg = ix86_zero_extend_to_Pmode (countreg);
9219 if (size == 4)
9220 {
9221 if (TARGET_64BIT)
9222 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
9223 destreg, countreg));
9224 else
9225 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
9226 destreg, countreg));
9227 }
9228 else
9229 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
9230 destreg, countreg));
9231 }
9232 if (size == 8 && (count & 0x04))
9233 emit_insn (gen_strsetsi (destreg,
9234 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9235 if (count & 0x02)
9236 emit_insn (gen_strsethi (destreg,
9237 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9238 if (count & 0x01)
9239 emit_insn (gen_strsetqi (destreg,
9240 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9241 }
9242 else
9243 {
9244 rtx countreg2;
9245 rtx label = NULL;
9246
9247 /* In case we don't know anything about the alignment, default to
9248 library version, since it is usually equally fast and result in
9249 shorter code. */
9250 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9251 return 0;
9252
9253 if (TARGET_SINGLE_STRINGOP)
9254 emit_insn (gen_cld ());
9255
9256 countreg2 = gen_reg_rtx (Pmode);
9257 countreg = copy_to_mode_reg (counter_mode, count_exp);
9258 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
9259
9260 if (count == 0
9261 && align < (TARGET_PENTIUMPRO && (count == 0
9262 || count >= (unsigned int)260)
9263 ? 8 : UNITS_PER_WORD))
9264 {
9265 label = gen_label_rtx ();
9266 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
d43e0b7d 9267 LEU, 0, counter_mode, 1, label);
0945b39d
JH
9268 }
9269 if (align <= 1)
9270 {
9271 rtx label = ix86_expand_aligntest (destreg, 1);
9272 emit_insn (gen_strsetqi (destreg,
9273 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9274 ix86_adjust_counter (countreg, 1);
9275 emit_label (label);
9276 LABEL_NUSES (label) = 1;
9277 }
9278 if (align <= 2)
9279 {
9280 rtx label = ix86_expand_aligntest (destreg, 2);
9281 emit_insn (gen_strsethi (destreg,
9282 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9283 ix86_adjust_counter (countreg, 2);
9284 emit_label (label);
9285 LABEL_NUSES (label) = 1;
9286 }
9287 if (align <= 4 && TARGET_PENTIUMPRO && (count == 0
9288 || count >= (unsigned int)260))
9289 {
9290 rtx label = ix86_expand_aligntest (destreg, 4);
9291 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
9292 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
9293 : zeroreg)));
9294 ix86_adjust_counter (countreg, 4);
9295 emit_label (label);
9296 LABEL_NUSES (label) = 1;
9297 }
9298
9299 if (!TARGET_SINGLE_STRINGOP)
9300 emit_insn (gen_cld ());
9301 if (TARGET_64BIT)
9302 {
9303 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9304 GEN_INT (3)));
9305 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
9306 destreg, countreg2));
9307 }
9308 else
9309 {
9310 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9311 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
9312 destreg, countreg2));
9313 }
9314
9315 if (label)
9316 {
9317 emit_label (label);
9318 LABEL_NUSES (label) = 1;
9319 }
9320 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9321 emit_insn (gen_strsetsi (destreg,
9322 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9323 if (TARGET_64BIT && (align <= 4 || count == 0))
9324 {
9325 rtx label = ix86_expand_aligntest (destreg, 2);
9326 emit_insn (gen_strsetsi (destreg,
9327 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9328 emit_label (label);
9329 LABEL_NUSES (label) = 1;
9330 }
9331 if (align > 2 && count != 0 && (count & 2))
9332 emit_insn (gen_strsethi (destreg,
9333 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9334 if (align <= 2 || count == 0)
9335 {
9336 rtx label = ix86_expand_aligntest (destreg, 2);
9337 emit_insn (gen_strsethi (destreg,
9338 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9339 emit_label (label);
9340 LABEL_NUSES (label) = 1;
9341 }
9342 if (align > 1 && count != 0 && (count & 1))
9343 emit_insn (gen_strsetqi (destreg,
9344 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9345 if (align <= 1 || count == 0)
9346 {
9347 rtx label = ix86_expand_aligntest (destreg, 1);
9348 emit_insn (gen_strsetqi (destreg,
9349 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9350 emit_label (label);
9351 LABEL_NUSES (label) = 1;
9352 }
9353 }
9354 return 1;
9355}
9356/* Expand strlen. */
9357int
9358ix86_expand_strlen (out, src, eoschar, align)
9359 rtx out, src, eoschar, align;
9360{
9361 rtx addr, scratch1, scratch2, scratch3, scratch4;
9362
9363 /* The generic case of strlen expander is long. Avoid it's
9364 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
9365
9366 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
9367 && !TARGET_INLINE_ALL_STRINGOPS
9368 && !optimize_size
9369 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
9370 return 0;
9371
9372 addr = force_reg (Pmode, XEXP (src, 0));
9373 scratch1 = gen_reg_rtx (Pmode);
9374
9375 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
9376 && !optimize_size)
9377 {
9378 /* Well it seems that some optimizer does not combine a call like
9379 foo(strlen(bar), strlen(bar));
9380 when the move and the subtraction is done here. It does calculate
9381 the length just once when these instructions are done inside of
9382 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
9383 often used and I use one fewer register for the lifetime of
9384 output_strlen_unroll() this is better. */
9385
9386 emit_move_insn (out, addr);
9387
9388 ix86_expand_strlensi_unroll_1 (out, align);
9389
9390 /* strlensi_unroll_1 returns the address of the zero at the end of
9391 the string, like memchr(), so compute the length by subtracting
9392 the start address. */
9393 if (TARGET_64BIT)
9394 emit_insn (gen_subdi3 (out, out, addr));
9395 else
9396 emit_insn (gen_subsi3 (out, out, addr));
9397 }
9398 else
9399 {
9400 scratch2 = gen_reg_rtx (Pmode);
9401 scratch3 = gen_reg_rtx (Pmode);
9402 scratch4 = force_reg (Pmode, constm1_rtx);
9403
9404 emit_move_insn (scratch3, addr);
9405 eoschar = force_reg (QImode, eoschar);
9406
9407 emit_insn (gen_cld ());
9408 if (TARGET_64BIT)
9409 {
9410 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
9411 align, scratch4, scratch3));
9412 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
9413 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
9414 }
9415 else
9416 {
9417 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
9418 align, scratch4, scratch3));
9419 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
9420 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
9421 }
9422 }
9423 return 1;
9424}
9425
e075ae69
RH
9426/* Expand the appropriate insns for doing strlen if not just doing
9427 repnz; scasb
9428
9429 out = result, initialized with the start address
9430 align_rtx = alignment of the address.
9431 scratch = scratch register, initialized with the startaddress when
77ebd435 9432 not aligned, otherwise undefined
3f803cd9
SC
9433
9434 This is just the body. It needs the initialisations mentioned above and
9435 some address computing at the end. These things are done in i386.md. */
9436
0945b39d
JH
9437static void
9438ix86_expand_strlensi_unroll_1 (out, align_rtx)
9439 rtx out, align_rtx;
3f803cd9 9440{
e075ae69
RH
9441 int align;
9442 rtx tmp;
9443 rtx align_2_label = NULL_RTX;
9444 rtx align_3_label = NULL_RTX;
9445 rtx align_4_label = gen_label_rtx ();
9446 rtx end_0_label = gen_label_rtx ();
e075ae69 9447 rtx mem;
e2e52e1b 9448 rtx tmpreg = gen_reg_rtx (SImode);
0945b39d 9449 rtx scratch = gen_reg_rtx (SImode);
e075ae69
RH
9450
9451 align = 0;
9452 if (GET_CODE (align_rtx) == CONST_INT)
9453 align = INTVAL (align_rtx);
3f803cd9 9454
e9a25f70 9455 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
3f803cd9 9456
e9a25f70 9457 /* Is there a known alignment and is it less than 4? */
e075ae69 9458 if (align < 4)
3f803cd9 9459 {
0945b39d
JH
9460 rtx scratch1 = gen_reg_rtx (Pmode);
9461 emit_move_insn (scratch1, out);
e9a25f70 9462 /* Is there a known alignment and is it not 2? */
e075ae69 9463 if (align != 2)
3f803cd9 9464 {
e075ae69
RH
9465 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
9466 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
9467
9468 /* Leave just the 3 lower bits. */
0945b39d 9469 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
e075ae69
RH
9470 NULL_RTX, 0, OPTAB_WIDEN);
9471
9076b9c1 9472 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 9473 Pmode, 1, align_4_label);
9076b9c1 9474 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
d43e0b7d 9475 Pmode, 1, align_2_label);
9076b9c1 9476 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
d43e0b7d 9477 Pmode, 1, align_3_label);
3f803cd9
SC
9478 }
9479 else
9480 {
e9a25f70
JL
9481 /* Since the alignment is 2, we have to check 2 or 0 bytes;
9482 check if is aligned to 4 - byte. */
e9a25f70 9483
0945b39d 9484 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
e075ae69
RH
9485 NULL_RTX, 0, OPTAB_WIDEN);
9486
9076b9c1 9487 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 9488 Pmode, 1, align_4_label);
3f803cd9
SC
9489 }
9490
e075ae69 9491 mem = gen_rtx_MEM (QImode, out);
e9a25f70 9492
e075ae69 9493 /* Now compare the bytes. */
e9a25f70 9494
0f290768 9495 /* Compare the first n unaligned byte on a byte per byte basis. */
9076b9c1 9496 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
d43e0b7d 9497 QImode, 1, end_0_label);
3f803cd9 9498
0f290768 9499 /* Increment the address. */
0945b39d
JH
9500 if (TARGET_64BIT)
9501 emit_insn (gen_adddi3 (out, out, const1_rtx));
9502 else
9503 emit_insn (gen_addsi3 (out, out, const1_rtx));
e9a25f70 9504
e075ae69
RH
9505 /* Not needed with an alignment of 2 */
9506 if (align != 2)
9507 {
9508 emit_label (align_2_label);
3f803cd9 9509
d43e0b7d
RK
9510 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
9511 end_0_label);
e075ae69 9512
0945b39d
JH
9513 if (TARGET_64BIT)
9514 emit_insn (gen_adddi3 (out, out, const1_rtx));
9515 else
9516 emit_insn (gen_addsi3 (out, out, const1_rtx));
e075ae69
RH
9517
9518 emit_label (align_3_label);
9519 }
9520
d43e0b7d
RK
9521 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
9522 end_0_label);
e075ae69 9523
0945b39d
JH
9524 if (TARGET_64BIT)
9525 emit_insn (gen_adddi3 (out, out, const1_rtx));
9526 else
9527 emit_insn (gen_addsi3 (out, out, const1_rtx));
3f803cd9
SC
9528 }
9529
e075ae69
RH
9530 /* Generate loop to check 4 bytes at a time. It is not a good idea to
9531 align this loop. It gives only huge programs, but does not help to
9532 speed up. */
9533 emit_label (align_4_label);
3f803cd9 9534
e075ae69
RH
9535 mem = gen_rtx_MEM (SImode, out);
9536 emit_move_insn (scratch, mem);
0945b39d
JH
9537 if (TARGET_64BIT)
9538 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
9539 else
9540 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
e075ae69 9541
e2e52e1b
JH
9542 /* This formula yields a nonzero result iff one of the bytes is zero.
9543 This saves three branches inside loop and many cycles. */
9544
9545 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
9546 emit_insn (gen_one_cmplsi2 (scratch, scratch));
9547 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
7471a1f0
AO
9548 emit_insn (gen_andsi3 (tmpreg, tmpreg,
9549 GEN_INT (trunc_int_for_mode
9550 (0x80808080, SImode))));
d43e0b7d
RK
9551 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
9552 align_4_label);
e2e52e1b
JH
9553
9554 if (TARGET_CMOVE)
9555 {
9556 rtx reg = gen_reg_rtx (SImode);
0945b39d 9557 rtx reg2 = gen_reg_rtx (Pmode);
e2e52e1b
JH
9558 emit_move_insn (reg, tmpreg);
9559 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
9560
0f290768 9561 /* If zero is not in the first two bytes, move two bytes forward. */
16189740 9562 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
9563 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9564 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
9565 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
9566 gen_rtx_IF_THEN_ELSE (SImode, tmp,
77ebd435
AJ
9567 reg,
9568 tmpreg)));
e2e52e1b 9569 /* Emit lea manually to avoid clobbering of flags. */
0945b39d
JH
9570 emit_insn (gen_rtx_SET (SImode, reg2,
9571 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
e2e52e1b
JH
9572
9573 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9574 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
9575 emit_insn (gen_rtx_SET (VOIDmode, out,
0945b39d 9576 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
a4f31c00
AJ
9577 reg2,
9578 out)));
e2e52e1b
JH
9579
9580 }
9581 else
9582 {
9583 rtx end_2_label = gen_label_rtx ();
9584 /* Is zero in the first two bytes? */
9585
16189740 9586 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
9587 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9588 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
9589 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9590 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
9591 pc_rtx);
9592 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9593 JUMP_LABEL (tmp) = end_2_label;
9594
0f290768 9595 /* Not in the first two. Move two bytes forward. */
e2e52e1b 9596 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
0945b39d
JH
9597 if (TARGET_64BIT)
9598 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
9599 else
9600 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
e2e52e1b
JH
9601
9602 emit_label (end_2_label);
9603
9604 }
9605
0f290768 9606 /* Avoid branch in fixing the byte. */
e2e52e1b 9607 tmpreg = gen_lowpart (QImode, tmpreg);
7e08e190 9608 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
0945b39d
JH
9609 if (TARGET_64BIT)
9610 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
9611 else
9612 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
e075ae69
RH
9613
9614 emit_label (end_0_label);
9615}
9616\f
e075ae69
RH
9617/* Clear stack slot assignments remembered from previous functions.
9618 This is called from INIT_EXPANDERS once before RTL is emitted for each
9619 function. */
9620
36edd3cc
BS
9621static void
9622ix86_init_machine_status (p)
1526a060 9623 struct function *p;
e075ae69 9624{
37b15744
RH
9625 p->machine = (struct machine_function *)
9626 xcalloc (1, sizeof (struct machine_function));
e075ae69
RH
9627}
9628
1526a060
BS
9629/* Mark machine specific bits of P for GC. */
9630static void
9631ix86_mark_machine_status (p)
9632 struct function *p;
9633{
37b15744 9634 struct machine_function *machine = p->machine;
1526a060
BS
9635 enum machine_mode mode;
9636 int n;
9637
37b15744
RH
9638 if (! machine)
9639 return;
9640
1526a060
BS
9641 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
9642 mode = (enum machine_mode) ((int) mode + 1))
9643 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
37b15744
RH
9644 ggc_mark_rtx (machine->stack_locals[(int) mode][n]);
9645}
9646
9647static void
9648ix86_free_machine_status (p)
9649 struct function *p;
9650{
9651 free (p->machine);
9652 p->machine = NULL;
1526a060
BS
9653}
9654
e075ae69
RH
9655/* Return a MEM corresponding to a stack slot with mode MODE.
9656 Allocate a new slot if necessary.
9657
9658 The RTL for a function can have several slots available: N is
9659 which slot to use. */
9660
9661rtx
9662assign_386_stack_local (mode, n)
9663 enum machine_mode mode;
9664 int n;
9665{
9666 if (n < 0 || n >= MAX_386_STACK_LOCALS)
9667 abort ();
9668
9669 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
9670 ix86_stack_locals[(int) mode][n]
9671 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
9672
9673 return ix86_stack_locals[(int) mode][n];
9674}
9675\f
9676/* Calculate the length of the memory address in the instruction
9677 encoding. Does not include the one-byte modrm, opcode, or prefix. */
9678
9679static int
9680memory_address_length (addr)
9681 rtx addr;
9682{
9683 struct ix86_address parts;
9684 rtx base, index, disp;
9685 int len;
9686
9687 if (GET_CODE (addr) == PRE_DEC
0d7d98ee
JH
9688 || GET_CODE (addr) == POST_INC
9689 || GET_CODE (addr) == PRE_MODIFY
9690 || GET_CODE (addr) == POST_MODIFY)
e075ae69 9691 return 0;
3f803cd9 9692
e075ae69
RH
9693 if (! ix86_decompose_address (addr, &parts))
9694 abort ();
3f803cd9 9695
e075ae69
RH
9696 base = parts.base;
9697 index = parts.index;
9698 disp = parts.disp;
9699 len = 0;
3f803cd9 9700
e075ae69
RH
9701 /* Register Indirect. */
9702 if (base && !index && !disp)
9703 {
9704 /* Special cases: ebp and esp need the two-byte modrm form. */
9705 if (addr == stack_pointer_rtx
9706 || addr == arg_pointer_rtx
564d80f4
JH
9707 || addr == frame_pointer_rtx
9708 || addr == hard_frame_pointer_rtx)
e075ae69 9709 len = 1;
3f803cd9 9710 }
e9a25f70 9711
e075ae69
RH
9712 /* Direct Addressing. */
9713 else if (disp && !base && !index)
9714 len = 4;
9715
3f803cd9
SC
9716 else
9717 {
e075ae69
RH
9718 /* Find the length of the displacement constant. */
9719 if (disp)
9720 {
9721 if (GET_CODE (disp) == CONST_INT
9722 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
9723 len = 1;
9724 else
9725 len = 4;
9726 }
3f803cd9 9727
e075ae69
RH
9728 /* An index requires the two-byte modrm form. */
9729 if (index)
9730 len += 1;
3f803cd9
SC
9731 }
9732
e075ae69
RH
9733 return len;
9734}
79325812 9735
6ef67412
JH
9736/* Compute default value for "length_immediate" attribute. When SHORTFORM is set
9737 expect that insn have 8bit immediate alternative. */
e075ae69 9738int
6ef67412 9739ix86_attr_length_immediate_default (insn, shortform)
e075ae69 9740 rtx insn;
6ef67412 9741 int shortform;
e075ae69 9742{
6ef67412
JH
9743 int len = 0;
9744 int i;
6c698a6d 9745 extract_insn_cached (insn);
6ef67412
JH
9746 for (i = recog_data.n_operands - 1; i >= 0; --i)
9747 if (CONSTANT_P (recog_data.operand[i]))
3071fab5 9748 {
6ef67412 9749 if (len)
3071fab5 9750 abort ();
6ef67412
JH
9751 if (shortform
9752 && GET_CODE (recog_data.operand[i]) == CONST_INT
9753 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
9754 len = 1;
9755 else
9756 {
9757 switch (get_attr_mode (insn))
9758 {
9759 case MODE_QI:
9760 len+=1;
9761 break;
9762 case MODE_HI:
9763 len+=2;
9764 break;
9765 case MODE_SI:
9766 len+=4;
9767 break;
14f73b5a
JH
9768 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
9769 case MODE_DI:
9770 len+=4;
9771 break;
6ef67412 9772 default:
c725bd79 9773 fatal_insn ("unknown insn mode", insn);
6ef67412
JH
9774 }
9775 }
3071fab5 9776 }
6ef67412
JH
9777 return len;
9778}
9779/* Compute default value for "length_address" attribute. */
9780int
9781ix86_attr_length_address_default (insn)
9782 rtx insn;
9783{
9784 int i;
6c698a6d 9785 extract_insn_cached (insn);
1ccbefce
RH
9786 for (i = recog_data.n_operands - 1; i >= 0; --i)
9787 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 9788 {
6ef67412 9789 return memory_address_length (XEXP (recog_data.operand[i], 0));
e075ae69
RH
9790 break;
9791 }
6ef67412 9792 return 0;
3f803cd9 9793}
e075ae69
RH
9794\f
9795/* Return the maximum number of instructions a cpu can issue. */
b657fc39 9796
c237e94a 9797static int
e075ae69 9798ix86_issue_rate ()
b657fc39 9799{
e075ae69 9800 switch (ix86_cpu)
b657fc39 9801 {
e075ae69
RH
9802 case PROCESSOR_PENTIUM:
9803 case PROCESSOR_K6:
9804 return 2;
79325812 9805
e075ae69 9806 case PROCESSOR_PENTIUMPRO:
b4e89e2d
JH
9807 case PROCESSOR_PENTIUM4:
9808 case PROCESSOR_ATHLON:
e075ae69 9809 return 3;
b657fc39 9810
b657fc39 9811 default:
e075ae69 9812 return 1;
b657fc39 9813 }
b657fc39
L
9814}
9815
e075ae69
RH
9816/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
9817 by DEP_INSN and nothing set by DEP_INSN. */
b657fc39 9818
e075ae69
RH
9819static int
9820ix86_flags_dependant (insn, dep_insn, insn_type)
9821 rtx insn, dep_insn;
9822 enum attr_type insn_type;
9823{
9824 rtx set, set2;
b657fc39 9825
e075ae69
RH
9826 /* Simplify the test for uninteresting insns. */
9827 if (insn_type != TYPE_SETCC
9828 && insn_type != TYPE_ICMOV
9829 && insn_type != TYPE_FCMOV
9830 && insn_type != TYPE_IBR)
9831 return 0;
b657fc39 9832
e075ae69
RH
9833 if ((set = single_set (dep_insn)) != 0)
9834 {
9835 set = SET_DEST (set);
9836 set2 = NULL_RTX;
9837 }
9838 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
9839 && XVECLEN (PATTERN (dep_insn), 0) == 2
9840 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
9841 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
9842 {
9843 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
9844 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
9845 }
78a0d70c
ZW
9846 else
9847 return 0;
b657fc39 9848
78a0d70c
ZW
9849 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
9850 return 0;
b657fc39 9851
f5143c46 9852 /* This test is true if the dependent insn reads the flags but
78a0d70c
ZW
9853 not any other potentially set register. */
9854 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
9855 return 0;
9856
9857 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
9858 return 0;
9859
9860 return 1;
e075ae69 9861}
b657fc39 9862
e075ae69
RH
9863/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
9864 address with operands set by DEP_INSN. */
9865
9866static int
9867ix86_agi_dependant (insn, dep_insn, insn_type)
9868 rtx insn, dep_insn;
9869 enum attr_type insn_type;
9870{
9871 rtx addr;
9872
6ad48e84
JH
9873 if (insn_type == TYPE_LEA
9874 && TARGET_PENTIUM)
5fbdde42
RH
9875 {
9876 addr = PATTERN (insn);
9877 if (GET_CODE (addr) == SET)
9878 ;
9879 else if (GET_CODE (addr) == PARALLEL
9880 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
9881 addr = XVECEXP (addr, 0, 0);
9882 else
9883 abort ();
9884 addr = SET_SRC (addr);
9885 }
e075ae69
RH
9886 else
9887 {
9888 int i;
6c698a6d 9889 extract_insn_cached (insn);
1ccbefce
RH
9890 for (i = recog_data.n_operands - 1; i >= 0; --i)
9891 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 9892 {
1ccbefce 9893 addr = XEXP (recog_data.operand[i], 0);
e075ae69
RH
9894 goto found;
9895 }
9896 return 0;
9897 found:;
b657fc39
L
9898 }
9899
e075ae69 9900 return modified_in_p (addr, dep_insn);
b657fc39 9901}
a269a03c 9902
c237e94a 9903static int
e075ae69 9904ix86_adjust_cost (insn, link, dep_insn, cost)
a269a03c
JC
9905 rtx insn, link, dep_insn;
9906 int cost;
9907{
e075ae69 9908 enum attr_type insn_type, dep_insn_type;
6ad48e84 9909 enum attr_memory memory, dep_memory;
e075ae69 9910 rtx set, set2;
9b00189f 9911 int dep_insn_code_number;
a269a03c 9912
309ada50 9913 /* Anti and output depenancies have zero cost on all CPUs. */
e075ae69 9914 if (REG_NOTE_KIND (link) != 0)
309ada50 9915 return 0;
a269a03c 9916
9b00189f
JH
9917 dep_insn_code_number = recog_memoized (dep_insn);
9918
e075ae69 9919 /* If we can't recognize the insns, we can't really do anything. */
9b00189f 9920 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
e075ae69 9921 return cost;
a269a03c 9922
1c71e60e
JH
9923 insn_type = get_attr_type (insn);
9924 dep_insn_type = get_attr_type (dep_insn);
9b00189f 9925
a269a03c
JC
9926 switch (ix86_cpu)
9927 {
9928 case PROCESSOR_PENTIUM:
e075ae69
RH
9929 /* Address Generation Interlock adds a cycle of latency. */
9930 if (ix86_agi_dependant (insn, dep_insn, insn_type))
9931 cost += 1;
9932
9933 /* ??? Compares pair with jump/setcc. */
9934 if (ix86_flags_dependant (insn, dep_insn, insn_type))
9935 cost = 0;
9936
9937 /* Floating point stores require value to be ready one cycle ealier. */
0f290768 9938 if (insn_type == TYPE_FMOV
e075ae69
RH
9939 && get_attr_memory (insn) == MEMORY_STORE
9940 && !ix86_agi_dependant (insn, dep_insn, insn_type))
9941 cost += 1;
9942 break;
a269a03c 9943
e075ae69 9944 case PROCESSOR_PENTIUMPRO:
6ad48e84
JH
9945 memory = get_attr_memory (insn);
9946 dep_memory = get_attr_memory (dep_insn);
9947
0f290768 9948 /* Since we can't represent delayed latencies of load+operation,
e075ae69
RH
9949 increase the cost here for non-imov insns. */
9950 if (dep_insn_type != TYPE_IMOV
6ad48e84
JH
9951 && dep_insn_type != TYPE_FMOV
9952 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
e075ae69
RH
9953 cost += 1;
9954
9955 /* INT->FP conversion is expensive. */
9956 if (get_attr_fp_int_src (dep_insn))
9957 cost += 5;
9958
9959 /* There is one cycle extra latency between an FP op and a store. */
9960 if (insn_type == TYPE_FMOV
9961 && (set = single_set (dep_insn)) != NULL_RTX
9962 && (set2 = single_set (insn)) != NULL_RTX
9963 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
9964 && GET_CODE (SET_DEST (set2)) == MEM)
9965 cost += 1;
6ad48e84
JH
9966
9967 /* Show ability of reorder buffer to hide latency of load by executing
9968 in parallel with previous instruction in case
9969 previous instruction is not needed to compute the address. */
9970 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
9971 && !ix86_agi_dependant (insn, dep_insn, insn_type))
9972 {
9973 /* Claim moves to take one cycle, as core can issue one load
9974 at time and the next load can start cycle later. */
9975 if (dep_insn_type == TYPE_IMOV
9976 || dep_insn_type == TYPE_FMOV)
9977 cost = 1;
9978 else if (cost > 1)
9979 cost--;
9980 }
e075ae69 9981 break;
a269a03c 9982
e075ae69 9983 case PROCESSOR_K6:
6ad48e84
JH
9984 memory = get_attr_memory (insn);
9985 dep_memory = get_attr_memory (dep_insn);
e075ae69
RH
9986 /* The esp dependency is resolved before the instruction is really
9987 finished. */
9988 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
9989 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
9990 return 1;
a269a03c 9991
0f290768 9992 /* Since we can't represent delayed latencies of load+operation,
e075ae69 9993 increase the cost here for non-imov insns. */
6ad48e84 9994 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
e075ae69
RH
9995 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
9996
9997 /* INT->FP conversion is expensive. */
9998 if (get_attr_fp_int_src (dep_insn))
9999 cost += 5;
6ad48e84
JH
10000
10001 /* Show ability of reorder buffer to hide latency of load by executing
10002 in parallel with previous instruction in case
10003 previous instruction is not needed to compute the address. */
10004 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10005 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10006 {
10007 /* Claim moves to take one cycle, as core can issue one load
10008 at time and the next load can start cycle later. */
10009 if (dep_insn_type == TYPE_IMOV
10010 || dep_insn_type == TYPE_FMOV)
10011 cost = 1;
10012 else if (cost > 2)
10013 cost -= 2;
10014 else
10015 cost = 1;
10016 }
a14003ee 10017 break;
e075ae69 10018
309ada50 10019 case PROCESSOR_ATHLON:
6ad48e84
JH
10020 memory = get_attr_memory (insn);
10021 dep_memory = get_attr_memory (dep_insn);
10022
10023 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
0b5107cf
JH
10024 {
10025 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
10026 cost += 2;
10027 else
10028 cost += 3;
10029 }
6ad48e84
JH
10030 /* Show ability of reorder buffer to hide latency of load by executing
10031 in parallel with previous instruction in case
10032 previous instruction is not needed to compute the address. */
10033 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10034 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10035 {
10036 /* Claim moves to take one cycle, as core can issue one load
10037 at time and the next load can start cycle later. */
10038 if (dep_insn_type == TYPE_IMOV
10039 || dep_insn_type == TYPE_FMOV)
10040 cost = 0;
10041 else if (cost >= 3)
10042 cost -= 3;
10043 else
10044 cost = 0;
10045 }
309ada50 10046
a269a03c 10047 default:
a269a03c
JC
10048 break;
10049 }
10050
10051 return cost;
10052}
0a726ef1 10053
e075ae69
RH
10054static union
10055{
10056 struct ppro_sched_data
10057 {
10058 rtx decode[3];
10059 int issued_this_cycle;
10060 } ppro;
10061} ix86_sched_data;
0a726ef1 10062
e075ae69
RH
10063static int
10064ix86_safe_length (insn)
10065 rtx insn;
10066{
10067 if (recog_memoized (insn) >= 0)
10068 return get_attr_length(insn);
10069 else
10070 return 128;
10071}
0a726ef1 10072
e075ae69
RH
10073static int
10074ix86_safe_length_prefix (insn)
10075 rtx insn;
10076{
10077 if (recog_memoized (insn) >= 0)
10078 return get_attr_length(insn);
10079 else
10080 return 0;
10081}
10082
10083static enum attr_memory
10084ix86_safe_memory (insn)
10085 rtx insn;
10086{
10087 if (recog_memoized (insn) >= 0)
10088 return get_attr_memory(insn);
10089 else
10090 return MEMORY_UNKNOWN;
10091}
0a726ef1 10092
e075ae69
RH
10093static enum attr_pent_pair
10094ix86_safe_pent_pair (insn)
10095 rtx insn;
10096{
10097 if (recog_memoized (insn) >= 0)
10098 return get_attr_pent_pair(insn);
10099 else
10100 return PENT_PAIR_NP;
10101}
0a726ef1 10102
e075ae69
RH
10103static enum attr_ppro_uops
10104ix86_safe_ppro_uops (insn)
10105 rtx insn;
10106{
10107 if (recog_memoized (insn) >= 0)
10108 return get_attr_ppro_uops (insn);
10109 else
10110 return PPRO_UOPS_MANY;
10111}
0a726ef1 10112
e075ae69
RH
10113static void
10114ix86_dump_ppro_packet (dump)
10115 FILE *dump;
0a726ef1 10116{
e075ae69 10117 if (ix86_sched_data.ppro.decode[0])
0a726ef1 10118 {
e075ae69
RH
10119 fprintf (dump, "PPRO packet: %d",
10120 INSN_UID (ix86_sched_data.ppro.decode[0]));
10121 if (ix86_sched_data.ppro.decode[1])
10122 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
10123 if (ix86_sched_data.ppro.decode[2])
10124 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
10125 fputc ('\n', dump);
10126 }
10127}
0a726ef1 10128
e075ae69 10129/* We're beginning a new block. Initialize data structures as necessary. */
0a726ef1 10130
c237e94a
ZW
10131static void
10132ix86_sched_init (dump, sched_verbose, veclen)
e075ae69
RH
10133 FILE *dump ATTRIBUTE_UNUSED;
10134 int sched_verbose ATTRIBUTE_UNUSED;
c237e94a 10135 int veclen ATTRIBUTE_UNUSED;
e075ae69
RH
10136{
10137 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
10138}
10139
10140/* Shift INSN to SLOT, and shift everything else down. */
10141
10142static void
10143ix86_reorder_insn (insnp, slot)
10144 rtx *insnp, *slot;
10145{
10146 if (insnp != slot)
10147 {
10148 rtx insn = *insnp;
0f290768 10149 do
e075ae69
RH
10150 insnp[0] = insnp[1];
10151 while (++insnp != slot);
10152 *insnp = insn;
0a726ef1 10153 }
e075ae69
RH
10154}
10155
10156/* Find an instruction with given pairability and minimal amount of cycles
10157 lost by the fact that the CPU waits for both pipelines to finish before
10158 reading next instructions. Also take care that both instructions together
10159 can not exceed 7 bytes. */
10160
10161static rtx *
10162ix86_pent_find_pair (e_ready, ready, type, first)
10163 rtx *e_ready;
10164 rtx *ready;
10165 enum attr_pent_pair type;
10166 rtx first;
10167{
10168 int mincycles, cycles;
10169 enum attr_pent_pair tmp;
10170 enum attr_memory memory;
10171 rtx *insnp, *bestinsnp = NULL;
0a726ef1 10172
e075ae69
RH
10173 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
10174 return NULL;
0a726ef1 10175
e075ae69
RH
10176 memory = ix86_safe_memory (first);
10177 cycles = result_ready_cost (first);
10178 mincycles = INT_MAX;
10179
10180 for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
10181 if ((tmp = ix86_safe_pent_pair (*insnp)) == type
10182 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
6ec6d558 10183 {
e075ae69
RH
10184 enum attr_memory second_memory;
10185 int secondcycles, currentcycles;
10186
10187 second_memory = ix86_safe_memory (*insnp);
10188 secondcycles = result_ready_cost (*insnp);
10189 currentcycles = abs (cycles - secondcycles);
10190
10191 if (secondcycles >= 1 && cycles >= 1)
6ec6d558 10192 {
e075ae69
RH
10193 /* Two read/modify/write instructions together takes two
10194 cycles longer. */
10195 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
10196 currentcycles += 2;
0f290768 10197
e075ae69
RH
10198 /* Read modify/write instruction followed by read/modify
10199 takes one cycle longer. */
10200 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
10201 && tmp != PENT_PAIR_UV
10202 && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
10203 currentcycles += 1;
6ec6d558 10204 }
e075ae69
RH
10205 if (currentcycles < mincycles)
10206 bestinsnp = insnp, mincycles = currentcycles;
6ec6d558 10207 }
0a726ef1 10208
e075ae69
RH
10209 return bestinsnp;
10210}
10211
78a0d70c 10212/* Subroutines of ix86_sched_reorder. */
e075ae69 10213
c6991660 10214static void
78a0d70c 10215ix86_sched_reorder_pentium (ready, e_ready)
e075ae69 10216 rtx *ready;
78a0d70c 10217 rtx *e_ready;
e075ae69 10218{
78a0d70c 10219 enum attr_pent_pair pair1, pair2;
e075ae69 10220 rtx *insnp;
e075ae69 10221
78a0d70c
ZW
10222 /* This wouldn't be necessary if Haifa knew that static insn ordering
10223 is important to which pipe an insn is issued to. So we have to make
10224 some minor rearrangements. */
e075ae69 10225
78a0d70c
ZW
10226 pair1 = ix86_safe_pent_pair (*e_ready);
10227
10228 /* If the first insn is non-pairable, let it be. */
10229 if (pair1 == PENT_PAIR_NP)
10230 return;
10231
10232 pair2 = PENT_PAIR_NP;
10233 insnp = 0;
10234
10235 /* If the first insn is UV or PV pairable, search for a PU
10236 insn to go with. */
10237 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
e075ae69 10238 {
78a0d70c
ZW
10239 insnp = ix86_pent_find_pair (e_ready-1, ready,
10240 PENT_PAIR_PU, *e_ready);
10241 if (insnp)
10242 pair2 = PENT_PAIR_PU;
10243 }
e075ae69 10244
78a0d70c
ZW
10245 /* If the first insn is PU or UV pairable, search for a PV
10246 insn to go with. */
10247 if (pair2 == PENT_PAIR_NP
10248 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
10249 {
10250 insnp = ix86_pent_find_pair (e_ready-1, ready,
10251 PENT_PAIR_PV, *e_ready);
10252 if (insnp)
10253 pair2 = PENT_PAIR_PV;
10254 }
e075ae69 10255
78a0d70c
ZW
10256 /* If the first insn is pairable, search for a UV
10257 insn to go with. */
10258 if (pair2 == PENT_PAIR_NP)
10259 {
10260 insnp = ix86_pent_find_pair (e_ready-1, ready,
10261 PENT_PAIR_UV, *e_ready);
10262 if (insnp)
10263 pair2 = PENT_PAIR_UV;
10264 }
e075ae69 10265
78a0d70c
ZW
10266 if (pair2 == PENT_PAIR_NP)
10267 return;
e075ae69 10268
78a0d70c
ZW
10269 /* Found something! Decide if we need to swap the order. */
10270 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
10271 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
10272 && ix86_safe_memory (*e_ready) == MEMORY_BOTH
10273 && ix86_safe_memory (*insnp) == MEMORY_LOAD))
10274 ix86_reorder_insn (insnp, e_ready);
10275 else
10276 ix86_reorder_insn (insnp, e_ready - 1);
10277}
e075ae69 10278
c6991660 10279static void
78a0d70c
ZW
10280ix86_sched_reorder_ppro (ready, e_ready)
10281 rtx *ready;
10282 rtx *e_ready;
10283{
10284 rtx decode[3];
10285 enum attr_ppro_uops cur_uops;
10286 int issued_this_cycle;
10287 rtx *insnp;
10288 int i;
e075ae69 10289
0f290768 10290 /* At this point .ppro.decode contains the state of the three
78a0d70c 10291 decoders from last "cycle". That is, those insns that were
0f290768 10292 actually independent. But here we're scheduling for the
78a0d70c
ZW
10293 decoder, and we may find things that are decodable in the
10294 same cycle. */
e075ae69 10295
0f290768 10296 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
78a0d70c 10297 issued_this_cycle = 0;
e075ae69 10298
78a0d70c
ZW
10299 insnp = e_ready;
10300 cur_uops = ix86_safe_ppro_uops (*insnp);
0a726ef1 10301
78a0d70c
ZW
10302 /* If the decoders are empty, and we've a complex insn at the
10303 head of the priority queue, let it issue without complaint. */
10304 if (decode[0] == NULL)
10305 {
10306 if (cur_uops == PPRO_UOPS_MANY)
10307 {
10308 decode[0] = *insnp;
10309 goto ppro_done;
10310 }
10311
10312 /* Otherwise, search for a 2-4 uop unsn to issue. */
10313 while (cur_uops != PPRO_UOPS_FEW)
10314 {
10315 if (insnp == ready)
10316 break;
10317 cur_uops = ix86_safe_ppro_uops (*--insnp);
10318 }
10319
10320 /* If so, move it to the head of the line. */
10321 if (cur_uops == PPRO_UOPS_FEW)
10322 ix86_reorder_insn (insnp, e_ready);
0a726ef1 10323
78a0d70c
ZW
10324 /* Issue the head of the queue. */
10325 issued_this_cycle = 1;
10326 decode[0] = *e_ready--;
10327 }
fb693d44 10328
78a0d70c
ZW
10329 /* Look for simple insns to fill in the other two slots. */
10330 for (i = 1; i < 3; ++i)
10331 if (decode[i] == NULL)
10332 {
10333 if (ready >= e_ready)
10334 goto ppro_done;
fb693d44 10335
e075ae69
RH
10336 insnp = e_ready;
10337 cur_uops = ix86_safe_ppro_uops (*insnp);
78a0d70c
ZW
10338 while (cur_uops != PPRO_UOPS_ONE)
10339 {
10340 if (insnp == ready)
10341 break;
10342 cur_uops = ix86_safe_ppro_uops (*--insnp);
10343 }
fb693d44 10344
78a0d70c
ZW
10345 /* Found one. Move it to the head of the queue and issue it. */
10346 if (cur_uops == PPRO_UOPS_ONE)
e075ae69 10347 {
78a0d70c
ZW
10348 ix86_reorder_insn (insnp, e_ready);
10349 decode[i] = *e_ready--;
10350 issued_this_cycle++;
10351 continue;
10352 }
fb693d44 10353
78a0d70c
ZW
10354 /* ??? Didn't find one. Ideally, here we would do a lazy split
10355 of 2-uop insns, issue one and queue the other. */
10356 }
fb693d44 10357
78a0d70c
ZW
10358 ppro_done:
10359 if (issued_this_cycle == 0)
10360 issued_this_cycle = 1;
10361 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
10362}
fb693d44 10363
0f290768 10364/* We are about to being issuing insns for this clock cycle.
78a0d70c 10365 Override the default sort algorithm to better slot instructions. */
c237e94a
ZW
10366static int
10367ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
78a0d70c
ZW
10368 FILE *dump ATTRIBUTE_UNUSED;
10369 int sched_verbose ATTRIBUTE_UNUSED;
10370 rtx *ready;
c237e94a 10371 int *n_readyp;
78a0d70c
ZW
10372 int clock_var ATTRIBUTE_UNUSED;
10373{
c237e94a 10374 int n_ready = *n_readyp;
78a0d70c 10375 rtx *e_ready = ready + n_ready - 1;
fb693d44 10376
78a0d70c
ZW
10377 if (n_ready < 2)
10378 goto out;
e075ae69 10379
78a0d70c
ZW
10380 switch (ix86_cpu)
10381 {
10382 default:
10383 break;
e075ae69 10384
78a0d70c
ZW
10385 case PROCESSOR_PENTIUM:
10386 ix86_sched_reorder_pentium (ready, e_ready);
10387 break;
e075ae69 10388
78a0d70c
ZW
10389 case PROCESSOR_PENTIUMPRO:
10390 ix86_sched_reorder_ppro (ready, e_ready);
e075ae69 10391 break;
fb693d44
RH
10392 }
10393
e075ae69
RH
10394out:
10395 return ix86_issue_rate ();
10396}
fb693d44 10397
e075ae69
RH
10398/* We are about to issue INSN. Return the number of insns left on the
10399 ready queue that can be issued this cycle. */
b222082e 10400
c237e94a 10401static int
e075ae69
RH
10402ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
10403 FILE *dump;
10404 int sched_verbose;
10405 rtx insn;
10406 int can_issue_more;
10407{
10408 int i;
10409 switch (ix86_cpu)
fb693d44 10410 {
e075ae69
RH
10411 default:
10412 return can_issue_more - 1;
fb693d44 10413
e075ae69
RH
10414 case PROCESSOR_PENTIUMPRO:
10415 {
10416 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
fb693d44 10417
e075ae69
RH
10418 if (uops == PPRO_UOPS_MANY)
10419 {
10420 if (sched_verbose)
10421 ix86_dump_ppro_packet (dump);
10422 ix86_sched_data.ppro.decode[0] = insn;
10423 ix86_sched_data.ppro.decode[1] = NULL;
10424 ix86_sched_data.ppro.decode[2] = NULL;
10425 if (sched_verbose)
10426 ix86_dump_ppro_packet (dump);
10427 ix86_sched_data.ppro.decode[0] = NULL;
10428 }
10429 else if (uops == PPRO_UOPS_FEW)
10430 {
10431 if (sched_verbose)
10432 ix86_dump_ppro_packet (dump);
10433 ix86_sched_data.ppro.decode[0] = insn;
10434 ix86_sched_data.ppro.decode[1] = NULL;
10435 ix86_sched_data.ppro.decode[2] = NULL;
10436 }
10437 else
10438 {
10439 for (i = 0; i < 3; ++i)
10440 if (ix86_sched_data.ppro.decode[i] == NULL)
10441 {
10442 ix86_sched_data.ppro.decode[i] = insn;
10443 break;
10444 }
10445 if (i == 3)
10446 abort ();
10447 if (i == 2)
10448 {
10449 if (sched_verbose)
10450 ix86_dump_ppro_packet (dump);
10451 ix86_sched_data.ppro.decode[0] = NULL;
10452 ix86_sched_data.ppro.decode[1] = NULL;
10453 ix86_sched_data.ppro.decode[2] = NULL;
10454 }
10455 }
10456 }
10457 return --ix86_sched_data.ppro.issued_this_cycle;
10458 }
fb693d44 10459}
a7180f70 10460\f
0e4970d7
RK
10461/* Walk through INSNS and look for MEM references whose address is DSTREG or
10462 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
10463 appropriate. */
10464
10465void
10466ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
10467 rtx insns;
10468 rtx dstref, srcref, dstreg, srcreg;
10469{
10470 rtx insn;
10471
10472 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
10473 if (INSN_P (insn))
10474 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
10475 dstreg, srcreg);
10476}
10477
10478/* Subroutine of above to actually do the updating by recursively walking
10479 the rtx. */
10480
10481static void
10482ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
10483 rtx x;
10484 rtx dstref, srcref, dstreg, srcreg;
10485{
10486 enum rtx_code code = GET_CODE (x);
10487 const char *format_ptr = GET_RTX_FORMAT (code);
10488 int i, j;
10489
10490 if (code == MEM && XEXP (x, 0) == dstreg)
10491 MEM_COPY_ATTRIBUTES (x, dstref);
10492 else if (code == MEM && XEXP (x, 0) == srcreg)
10493 MEM_COPY_ATTRIBUTES (x, srcref);
10494
10495 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
10496 {
10497 if (*format_ptr == 'e')
10498 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
10499 dstreg, srcreg);
10500 else if (*format_ptr == 'E')
10501 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
397be6cf 10502 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
0e4970d7
RK
10503 dstreg, srcreg);
10504 }
10505}
10506\f
a7180f70
BS
10507/* Compute the alignment given to a constant that is being placed in memory.
10508 EXP is the constant and ALIGN is the alignment that the object would
10509 ordinarily have.
10510 The value of this function is used instead of that alignment to align
10511 the object. */
10512
10513int
10514ix86_constant_alignment (exp, align)
10515 tree exp;
10516 int align;
10517{
10518 if (TREE_CODE (exp) == REAL_CST)
10519 {
10520 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
10521 return 64;
10522 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
10523 return 128;
10524 }
10525 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
10526 && align < 256)
10527 return 256;
10528
10529 return align;
10530}
10531
10532/* Compute the alignment for a static variable.
10533 TYPE is the data type, and ALIGN is the alignment that
10534 the object would ordinarily have. The value of this function is used
10535 instead of that alignment to align the object. */
10536
10537int
10538ix86_data_alignment (type, align)
10539 tree type;
10540 int align;
10541{
10542 if (AGGREGATE_TYPE_P (type)
10543 && TYPE_SIZE (type)
10544 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10545 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
10546 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
10547 return 256;
10548
0d7d98ee
JH
10549 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10550 to 16byte boundary. */
10551 if (TARGET_64BIT)
10552 {
10553 if (AGGREGATE_TYPE_P (type)
10554 && TYPE_SIZE (type)
10555 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10556 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
10557 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
10558 return 128;
10559 }
10560
a7180f70
BS
10561 if (TREE_CODE (type) == ARRAY_TYPE)
10562 {
10563 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
10564 return 64;
10565 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
10566 return 128;
10567 }
10568 else if (TREE_CODE (type) == COMPLEX_TYPE)
10569 {
0f290768 10570
a7180f70
BS
10571 if (TYPE_MODE (type) == DCmode && align < 64)
10572 return 64;
10573 if (TYPE_MODE (type) == XCmode && align < 128)
10574 return 128;
10575 }
10576 else if ((TREE_CODE (type) == RECORD_TYPE
10577 || TREE_CODE (type) == UNION_TYPE
10578 || TREE_CODE (type) == QUAL_UNION_TYPE)
10579 && TYPE_FIELDS (type))
10580 {
10581 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
10582 return 64;
10583 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
10584 return 128;
10585 }
10586 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
10587 || TREE_CODE (type) == INTEGER_TYPE)
10588 {
10589 if (TYPE_MODE (type) == DFmode && align < 64)
10590 return 64;
10591 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
10592 return 128;
10593 }
10594
10595 return align;
10596}
10597
10598/* Compute the alignment for a local variable.
10599 TYPE is the data type, and ALIGN is the alignment that
10600 the object would ordinarily have. The value of this macro is used
10601 instead of that alignment to align the object. */
10602
10603int
10604ix86_local_alignment (type, align)
10605 tree type;
10606 int align;
10607{
0d7d98ee
JH
10608 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10609 to 16byte boundary. */
10610 if (TARGET_64BIT)
10611 {
10612 if (AGGREGATE_TYPE_P (type)
10613 && TYPE_SIZE (type)
10614 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10615 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
10616 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
10617 return 128;
10618 }
a7180f70
BS
10619 if (TREE_CODE (type) == ARRAY_TYPE)
10620 {
10621 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
10622 return 64;
10623 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
10624 return 128;
10625 }
10626 else if (TREE_CODE (type) == COMPLEX_TYPE)
10627 {
10628 if (TYPE_MODE (type) == DCmode && align < 64)
10629 return 64;
10630 if (TYPE_MODE (type) == XCmode && align < 128)
10631 return 128;
10632 }
10633 else if ((TREE_CODE (type) == RECORD_TYPE
10634 || TREE_CODE (type) == UNION_TYPE
10635 || TREE_CODE (type) == QUAL_UNION_TYPE)
10636 && TYPE_FIELDS (type))
10637 {
10638 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
10639 return 64;
10640 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
10641 return 128;
10642 }
10643 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
10644 || TREE_CODE (type) == INTEGER_TYPE)
10645 {
0f290768 10646
a7180f70
BS
10647 if (TYPE_MODE (type) == DFmode && align < 64)
10648 return 64;
10649 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
10650 return 128;
10651 }
10652 return align;
10653}
0ed08620
JH
10654\f
10655/* Emit RTL insns to initialize the variable parts of a trampoline.
10656 FNADDR is an RTX for the address of the function's pure code.
10657 CXT is an RTX for the static chain value for the function. */
10658void
10659x86_initialize_trampoline (tramp, fnaddr, cxt)
10660 rtx tramp, fnaddr, cxt;
10661{
10662 if (!TARGET_64BIT)
10663 {
10664 /* Compute offset from the end of the jmp to the target function. */
10665 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
10666 plus_constant (tramp, 10),
10667 NULL_RTX, 1, OPTAB_DIRECT);
10668 emit_move_insn (gen_rtx_MEM (QImode, tramp),
10669 GEN_INT (trunc_int_for_mode (0xb9, QImode)));
10670 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
10671 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
10672 GEN_INT (trunc_int_for_mode (0xe9, QImode)));
10673 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
10674 }
10675 else
10676 {
10677 int offset = 0;
10678 /* Try to load address using shorter movl instead of movabs.
10679 We may want to support movq for kernel mode, but kernel does not use
10680 trampolines at the moment. */
10681 if (x86_64_zero_extended_value (fnaddr))
10682 {
10683 fnaddr = copy_to_mode_reg (DImode, fnaddr);
10684 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10685 GEN_INT (trunc_int_for_mode (0xbb41, HImode)));
10686 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
10687 gen_lowpart (SImode, fnaddr));
10688 offset += 6;
10689 }
10690 else
10691 {
10692 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10693 GEN_INT (trunc_int_for_mode (0xbb49, HImode)));
10694 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
10695 fnaddr);
10696 offset += 10;
10697 }
10698 /* Load static chain using movabs to r10. */
10699 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10700 GEN_INT (trunc_int_for_mode (0xba49, HImode)));
10701 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
10702 cxt);
10703 offset += 10;
10704 /* Jump to the r11 */
10705 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10706 GEN_INT (trunc_int_for_mode (0xff49, HImode)));
10707 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
44cf5b6a 10708 GEN_INT (trunc_int_for_mode (0xe3, QImode)));
0ed08620
JH
10709 offset += 3;
10710 if (offset > TRAMPOLINE_SIZE)
10711 abort();
10712 }
10713}
eeb06b1b
BS
10714\f
10715#define def_builtin(MASK, NAME, TYPE, CODE) \
10716do { \
10717 if ((MASK) & target_flags) \
10718 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL); \
10719} while (0)
bd793c65 10720
bd793c65
BS
10721struct builtin_description
10722{
8b60264b
KG
10723 const unsigned int mask;
10724 const enum insn_code icode;
10725 const char *const name;
10726 const enum ix86_builtins code;
10727 const enum rtx_code comparison;
10728 const unsigned int flag;
bd793c65
BS
10729};
10730
8b60264b 10731static const struct builtin_description bdesc_comi[] =
bd793c65 10732{
eeb06b1b
BS
10733 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
10734 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
10735 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
10736 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
10737 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
10738 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
10739 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
10740 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
10741 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
10742 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
10743 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
10744 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 }
bd793c65
BS
10745};
10746
8b60264b 10747static const struct builtin_description bdesc_2arg[] =
bd793c65
BS
10748{
10749 /* SSE */
eeb06b1b
BS
10750 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
10751 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
10752 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
10753 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
10754 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
10755 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
10756 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
10757 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
10758
10759 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
10760 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
10761 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
10762 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
10763 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
10764 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
10765 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
10766 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
10767 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
10768 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
10769 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
10770 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
10771 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
10772 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
10773 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
10774 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
10775 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
10776 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
10777 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
10778 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
10779 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
10780 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
10781 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
10782 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
10783
10784 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
10785 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
10786 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
10787 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
10788
eeb06b1b
BS
10789 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
10790 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
10791 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
10792 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
10793 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
bd793c65
BS
10794
10795 /* MMX */
eeb06b1b
BS
10796 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
10797 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
10798 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
10799 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
10800 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
10801 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
10802
10803 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
10804 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
10805 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
10806 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
10807 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
10808 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
10809 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
10810 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
10811
10812 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
10813 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
47f339cf 10814 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
eeb06b1b
BS
10815
10816 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
10817 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
10818 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
10819 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
10820
47f339cf
BS
10821 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
10822 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
eeb06b1b
BS
10823
10824 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
10825 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
10826 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
10827 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
10828 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
10829 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
10830
47f339cf
BS
10831 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
10832 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
10833 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
10834 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
eeb06b1b
BS
10835
10836 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
10837 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
10838 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
10839 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
10840 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
10841 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
bd793c65
BS
10842
10843 /* Special. */
eeb06b1b
BS
10844 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
10845 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
10846 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
10847
10848 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
10849 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
10850
10851 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
10852 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
10853 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
10854 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
10855 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
10856 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
10857
10858 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
10859 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
10860 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
10861 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
10862 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
10863 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
10864
10865 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
10866 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
10867 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
10868 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
10869
38b29e64 10870 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
eeb06b1b 10871 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }
bd793c65
BS
10872
10873};
10874
8b60264b 10875static const struct builtin_description bdesc_1arg[] =
bd793c65 10876{
47f339cf 10877 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
eeb06b1b 10878 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
bd793c65 10879
eeb06b1b
BS
10880 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
10881 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
10882 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
bd793c65 10883
eeb06b1b
BS
10884 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
10885 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
10886 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
10887 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }
bd793c65
BS
10888
10889};
10890
f6155fda
SS
10891void
10892ix86_init_builtins ()
10893{
10894 if (TARGET_MMX)
10895 ix86_init_mmx_sse_builtins ();
10896}
10897
10898/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
bd793c65
BS
10899 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
10900 builtins. */
e37af218 10901static void
f6155fda 10902ix86_init_mmx_sse_builtins ()
bd793c65 10903{
8b60264b 10904 const struct builtin_description * d;
77ebd435 10905 size_t i;
cbd5937a 10906 tree endlink = void_list_node;
bd793c65
BS
10907
10908 tree pchar_type_node = build_pointer_type (char_type_node);
10909 tree pfloat_type_node = build_pointer_type (float_type_node);
10910 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
10911 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
10912
10913 /* Comparisons. */
10914 tree int_ftype_v4sf_v4sf
10915 = build_function_type (integer_type_node,
10916 tree_cons (NULL_TREE, V4SF_type_node,
10917 tree_cons (NULL_TREE,
10918 V4SF_type_node,
10919 endlink)));
10920 tree v4si_ftype_v4sf_v4sf
10921 = build_function_type (V4SI_type_node,
10922 tree_cons (NULL_TREE, V4SF_type_node,
10923 tree_cons (NULL_TREE,
10924 V4SF_type_node,
10925 endlink)));
10926 /* MMX/SSE/integer conversions. */
bd793c65
BS
10927 tree int_ftype_v4sf
10928 = build_function_type (integer_type_node,
10929 tree_cons (NULL_TREE, V4SF_type_node,
10930 endlink));
10931 tree int_ftype_v8qi
10932 = build_function_type (integer_type_node,
10933 tree_cons (NULL_TREE, V8QI_type_node,
10934 endlink));
bd793c65 10935 tree v4sf_ftype_v4sf_int
21e1b5f1 10936 = build_function_type (V4SF_type_node,
bd793c65
BS
10937 tree_cons (NULL_TREE, V4SF_type_node,
10938 tree_cons (NULL_TREE, integer_type_node,
10939 endlink)));
10940 tree v4sf_ftype_v4sf_v2si
10941 = build_function_type (V4SF_type_node,
10942 tree_cons (NULL_TREE, V4SF_type_node,
10943 tree_cons (NULL_TREE, V2SI_type_node,
10944 endlink)));
10945 tree int_ftype_v4hi_int
10946 = build_function_type (integer_type_node,
10947 tree_cons (NULL_TREE, V4HI_type_node,
10948 tree_cons (NULL_TREE, integer_type_node,
10949 endlink)));
10950 tree v4hi_ftype_v4hi_int_int
332316cd 10951 = build_function_type (V4HI_type_node,
bd793c65
BS
10952 tree_cons (NULL_TREE, V4HI_type_node,
10953 tree_cons (NULL_TREE, integer_type_node,
10954 tree_cons (NULL_TREE,
10955 integer_type_node,
10956 endlink))));
10957 /* Miscellaneous. */
10958 tree v8qi_ftype_v4hi_v4hi
10959 = build_function_type (V8QI_type_node,
10960 tree_cons (NULL_TREE, V4HI_type_node,
10961 tree_cons (NULL_TREE, V4HI_type_node,
10962 endlink)));
10963 tree v4hi_ftype_v2si_v2si
10964 = build_function_type (V4HI_type_node,
10965 tree_cons (NULL_TREE, V2SI_type_node,
10966 tree_cons (NULL_TREE, V2SI_type_node,
10967 endlink)));
10968 tree v4sf_ftype_v4sf_v4sf_int
10969 = build_function_type (V4SF_type_node,
10970 tree_cons (NULL_TREE, V4SF_type_node,
10971 tree_cons (NULL_TREE, V4SF_type_node,
10972 tree_cons (NULL_TREE,
10973 integer_type_node,
10974 endlink))));
10975 tree v4hi_ftype_v8qi_v8qi
10976 = build_function_type (V4HI_type_node,
10977 tree_cons (NULL_TREE, V8QI_type_node,
10978 tree_cons (NULL_TREE, V8QI_type_node,
10979 endlink)));
10980 tree v2si_ftype_v4hi_v4hi
10981 = build_function_type (V2SI_type_node,
10982 tree_cons (NULL_TREE, V4HI_type_node,
10983 tree_cons (NULL_TREE, V4HI_type_node,
10984 endlink)));
10985 tree v4hi_ftype_v4hi_int
10986 = build_function_type (V4HI_type_node,
10987 tree_cons (NULL_TREE, V4HI_type_node,
10988 tree_cons (NULL_TREE, integer_type_node,
10989 endlink)));
bd793c65
BS
10990 tree v4hi_ftype_v4hi_di
10991 = build_function_type (V4HI_type_node,
10992 tree_cons (NULL_TREE, V4HI_type_node,
10993 tree_cons (NULL_TREE,
10994 long_long_integer_type_node,
10995 endlink)));
10996 tree v2si_ftype_v2si_di
10997 = build_function_type (V2SI_type_node,
10998 tree_cons (NULL_TREE, V2SI_type_node,
10999 tree_cons (NULL_TREE,
11000 long_long_integer_type_node,
11001 endlink)));
11002 tree void_ftype_void
11003 = build_function_type (void_type_node, endlink);
bd793c65
BS
11004 tree void_ftype_unsigned
11005 = build_function_type (void_type_node,
11006 tree_cons (NULL_TREE, unsigned_type_node,
11007 endlink));
11008 tree unsigned_ftype_void
11009 = build_function_type (unsigned_type_node, endlink);
11010 tree di_ftype_void
11011 = build_function_type (long_long_unsigned_type_node, endlink);
e37af218
RH
11012 tree v4sf_ftype_void
11013 = build_function_type (V4SF_type_node, endlink);
bd793c65
BS
11014 tree v2si_ftype_v4sf
11015 = build_function_type (V2SI_type_node,
11016 tree_cons (NULL_TREE, V4SF_type_node,
11017 endlink));
11018 /* Loads/stores. */
11019 tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node,
11020 tree_cons (NULL_TREE, V8QI_type_node,
11021 tree_cons (NULL_TREE,
11022 pchar_type_node,
11023 endlink)));
11024 tree void_ftype_v8qi_v8qi_pchar
11025 = build_function_type (void_type_node, maskmovq_args);
11026 tree v4sf_ftype_pfloat
11027 = build_function_type (V4SF_type_node,
11028 tree_cons (NULL_TREE, pfloat_type_node,
11029 endlink));
bd793c65
BS
11030 /* @@@ the type is bogus */
11031 tree v4sf_ftype_v4sf_pv2si
11032 = build_function_type (V4SF_type_node,
11033 tree_cons (NULL_TREE, V4SF_type_node,
11034 tree_cons (NULL_TREE, pv2si_type_node,
11035 endlink)));
1255c85c
BS
11036 tree void_ftype_pv2si_v4sf
11037 = build_function_type (void_type_node,
11038 tree_cons (NULL_TREE, pv2si_type_node,
11039 tree_cons (NULL_TREE, V4SF_type_node,
bd793c65
BS
11040 endlink)));
11041 tree void_ftype_pfloat_v4sf
11042 = build_function_type (void_type_node,
11043 tree_cons (NULL_TREE, pfloat_type_node,
11044 tree_cons (NULL_TREE, V4SF_type_node,
11045 endlink)));
11046 tree void_ftype_pdi_di
11047 = build_function_type (void_type_node,
11048 tree_cons (NULL_TREE, pdi_type_node,
11049 tree_cons (NULL_TREE,
11050 long_long_unsigned_type_node,
11051 endlink)));
11052 /* Normal vector unops. */
11053 tree v4sf_ftype_v4sf
11054 = build_function_type (V4SF_type_node,
11055 tree_cons (NULL_TREE, V4SF_type_node,
11056 endlink));
0f290768 11057
bd793c65
BS
11058 /* Normal vector binops. */
11059 tree v4sf_ftype_v4sf_v4sf
11060 = build_function_type (V4SF_type_node,
11061 tree_cons (NULL_TREE, V4SF_type_node,
11062 tree_cons (NULL_TREE, V4SF_type_node,
11063 endlink)));
11064 tree v8qi_ftype_v8qi_v8qi
11065 = build_function_type (V8QI_type_node,
11066 tree_cons (NULL_TREE, V8QI_type_node,
11067 tree_cons (NULL_TREE, V8QI_type_node,
11068 endlink)));
11069 tree v4hi_ftype_v4hi_v4hi
11070 = build_function_type (V4HI_type_node,
11071 tree_cons (NULL_TREE, V4HI_type_node,
11072 tree_cons (NULL_TREE, V4HI_type_node,
11073 endlink)));
11074 tree v2si_ftype_v2si_v2si
11075 = build_function_type (V2SI_type_node,
11076 tree_cons (NULL_TREE, V2SI_type_node,
11077 tree_cons (NULL_TREE, V2SI_type_node,
11078 endlink)));
bd793c65
BS
11079 tree di_ftype_di_di
11080 = build_function_type (long_long_unsigned_type_node,
11081 tree_cons (NULL_TREE, long_long_unsigned_type_node,
11082 tree_cons (NULL_TREE,
11083 long_long_unsigned_type_node,
11084 endlink)));
11085
47f339cf
BS
11086 tree v2si_ftype_v2sf
11087 = build_function_type (V2SI_type_node,
11088 tree_cons (NULL_TREE, V2SF_type_node,
11089 endlink));
11090 tree v2sf_ftype_v2si
11091 = build_function_type (V2SF_type_node,
11092 tree_cons (NULL_TREE, V2SI_type_node,
11093 endlink));
11094 tree v2si_ftype_v2si
11095 = build_function_type (V2SI_type_node,
11096 tree_cons (NULL_TREE, V2SI_type_node,
11097 endlink));
11098 tree v2sf_ftype_v2sf
11099 = build_function_type (V2SF_type_node,
11100 tree_cons (NULL_TREE, V2SF_type_node,
11101 endlink));
11102 tree v2sf_ftype_v2sf_v2sf
11103 = build_function_type (V2SF_type_node,
11104 tree_cons (NULL_TREE, V2SF_type_node,
11105 tree_cons (NULL_TREE,
11106 V2SF_type_node,
11107 endlink)));
11108 tree v2si_ftype_v2sf_v2sf
11109 = build_function_type (V2SI_type_node,
11110 tree_cons (NULL_TREE, V2SF_type_node,
11111 tree_cons (NULL_TREE,
11112 V2SF_type_node,
11113 endlink)));
11114
bd793c65
BS
11115 /* Add all builtins that are more or less simple operations on two
11116 operands. */
11117 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
11118 {
11119 /* Use one of the operands; the target can have a different mode for
11120 mask-generating compares. */
11121 enum machine_mode mode;
11122 tree type;
11123
11124 if (d->name == 0)
11125 continue;
11126 mode = insn_data[d->icode].operand[1].mode;
11127
bd793c65
BS
11128 switch (mode)
11129 {
11130 case V4SFmode:
11131 type = v4sf_ftype_v4sf_v4sf;
11132 break;
11133 case V8QImode:
11134 type = v8qi_ftype_v8qi_v8qi;
11135 break;
11136 case V4HImode:
11137 type = v4hi_ftype_v4hi_v4hi;
11138 break;
11139 case V2SImode:
11140 type = v2si_ftype_v2si_v2si;
11141 break;
bd793c65
BS
11142 case DImode:
11143 type = di_ftype_di_di;
11144 break;
11145
11146 default:
11147 abort ();
11148 }
0f290768 11149
bd793c65
BS
11150 /* Override for comparisons. */
11151 if (d->icode == CODE_FOR_maskcmpv4sf3
11152 || d->icode == CODE_FOR_maskncmpv4sf3
11153 || d->icode == CODE_FOR_vmmaskcmpv4sf3
11154 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
11155 type = v4si_ftype_v4sf_v4sf;
11156
eeb06b1b 11157 def_builtin (d->mask, d->name, type, d->code);
bd793c65
BS
11158 }
11159
11160 /* Add the remaining MMX insns with somewhat more complicated types. */
eeb06b1b
BS
11161 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
11162 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
11163 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
11164 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
11165 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
11166 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
11167 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
11168
11169 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
11170 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
11171 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
11172
11173 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
11174 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
11175
11176 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
11177 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
0f290768 11178
bd793c65
BS
11179 /* comi/ucomi insns. */
11180 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
eeb06b1b 11181 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
bd793c65 11182
1255c85c
BS
11183 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
11184 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
11185 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
bd793c65 11186
eeb06b1b
BS
11187 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
11188 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
11189 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
11190 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
11191 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
11192 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
bd793c65 11193
e37af218
RH
11194 def_builtin (MASK_SSE, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS);
11195 def_builtin (MASK_SSE, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS);
11196 def_builtin (MASK_SSE, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS);
11197 def_builtin (MASK_SSE, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS);
11198
47f339cf
BS
11199 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
11200 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
bd793c65 11201
47f339cf 11202 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
bd793c65 11203
eeb06b1b
BS
11204 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
11205 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
11206 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
11207 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
11208 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
11209 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
bd793c65 11210
eeb06b1b
BS
11211 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
11212 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
1255c85c
BS
11213 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
11214 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
bd793c65 11215
eeb06b1b 11216 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
47f339cf 11217 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
eeb06b1b 11218 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
47f339cf 11219 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
bd793c65 11220
47f339cf 11221 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
bd793c65 11222
47f339cf 11223 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
bd793c65 11224
eeb06b1b
BS
11225 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
11226 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
11227 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
11228 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
11229 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
11230 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
bd793c65 11231
eeb06b1b 11232 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
bd793c65 11233
47f339cf
BS
11234 /* Original 3DNow! */
11235 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
11236 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
11237 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
11238 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
11239 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
11240 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
11241 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
11242 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
11243 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
11244 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
11245 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
11246 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
11247 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
11248 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
11249 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
11250 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
11251 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
11252 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
11253 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
11254 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
47f339cf
BS
11255
11256 /* 3DNow! extension as used in the Athlon CPU. */
11257 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
11258 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
11259 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
11260 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
11261 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
11262 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
11263
e37af218 11264 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
bd793c65
BS
11265}
11266
11267/* Errors in the source file can cause expand_expr to return const0_rtx
11268 where we expect a vector. To avoid crashing, use one of the vector
11269 clear instructions. */
11270static rtx
11271safe_vector_operand (x, mode)
11272 rtx x;
11273 enum machine_mode mode;
11274{
11275 if (x != const0_rtx)
11276 return x;
11277 x = gen_reg_rtx (mode);
11278
47f339cf 11279 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
bd793c65
BS
11280 emit_insn (gen_mmx_clrdi (mode == DImode ? x
11281 : gen_rtx_SUBREG (DImode, x, 0)));
11282 else
e37af218
RH
11283 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
11284 : gen_rtx_SUBREG (V4SFmode, x, 0)));
bd793c65
BS
11285 return x;
11286}
11287
11288/* Subroutine of ix86_expand_builtin to take care of binop insns. */
11289
11290static rtx
11291ix86_expand_binop_builtin (icode, arglist, target)
11292 enum insn_code icode;
11293 tree arglist;
11294 rtx target;
11295{
11296 rtx pat;
11297 tree arg0 = TREE_VALUE (arglist);
11298 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11299 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11300 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11301 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11302 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11303 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
11304
11305 if (VECTOR_MODE_P (mode0))
11306 op0 = safe_vector_operand (op0, mode0);
11307 if (VECTOR_MODE_P (mode1))
11308 op1 = safe_vector_operand (op1, mode1);
11309
11310 if (! target
11311 || GET_MODE (target) != tmode
11312 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11313 target = gen_reg_rtx (tmode);
11314
11315 /* In case the insn wants input operands in modes different from
11316 the result, abort. */
11317 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
11318 abort ();
11319
11320 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11321 op0 = copy_to_mode_reg (mode0, op0);
11322 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11323 op1 = copy_to_mode_reg (mode1, op1);
11324
11325 pat = GEN_FCN (icode) (target, op0, op1);
11326 if (! pat)
11327 return 0;
11328 emit_insn (pat);
11329 return target;
11330}
11331
e37af218
RH
11332/* In type_for_mode we restrict the ability to create TImode types
11333 to hosts with 64-bit H_W_I. So we've defined the SSE logicals
11334 to have a V4SFmode signature. Convert them in-place to TImode. */
11335
11336static rtx
11337ix86_expand_timode_binop_builtin (icode, arglist, target)
11338 enum insn_code icode;
11339 tree arglist;
11340 rtx target;
11341{
11342 rtx pat;
11343 tree arg0 = TREE_VALUE (arglist);
11344 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11345 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11346 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11347
11348 op0 = gen_lowpart (TImode, op0);
11349 op1 = gen_lowpart (TImode, op1);
11350 target = gen_reg_rtx (TImode);
11351
11352 if (! (*insn_data[icode].operand[1].predicate) (op0, TImode))
11353 op0 = copy_to_mode_reg (TImode, op0);
11354 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
11355 op1 = copy_to_mode_reg (TImode, op1);
11356
11357 pat = GEN_FCN (icode) (target, op0, op1);
11358 if (! pat)
11359 return 0;
11360 emit_insn (pat);
11361
11362 return gen_lowpart (V4SFmode, target);
11363}
11364
bd793c65
BS
11365/* Subroutine of ix86_expand_builtin to take care of stores. */
11366
11367static rtx
e37af218 11368ix86_expand_store_builtin (icode, arglist)
bd793c65
BS
11369 enum insn_code icode;
11370 tree arglist;
bd793c65
BS
11371{
11372 rtx pat;
11373 tree arg0 = TREE_VALUE (arglist);
11374 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11375 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11376 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11377 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
11378 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
11379
11380 if (VECTOR_MODE_P (mode1))
11381 op1 = safe_vector_operand (op1, mode1);
11382
11383 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
bd793c65
BS
11384 pat = GEN_FCN (icode) (op0, op1);
11385 if (pat)
11386 emit_insn (pat);
11387 return 0;
11388}
11389
11390/* Subroutine of ix86_expand_builtin to take care of unop insns. */
11391
11392static rtx
11393ix86_expand_unop_builtin (icode, arglist, target, do_load)
11394 enum insn_code icode;
11395 tree arglist;
11396 rtx target;
11397 int do_load;
11398{
11399 rtx pat;
11400 tree arg0 = TREE_VALUE (arglist);
11401 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11402 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11403 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11404
11405 if (! target
11406 || GET_MODE (target) != tmode
11407 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11408 target = gen_reg_rtx (tmode);
11409 if (do_load)
11410 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11411 else
11412 {
11413 if (VECTOR_MODE_P (mode0))
11414 op0 = safe_vector_operand (op0, mode0);
11415
11416 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11417 op0 = copy_to_mode_reg (mode0, op0);
11418 }
11419
11420 pat = GEN_FCN (icode) (target, op0);
11421 if (! pat)
11422 return 0;
11423 emit_insn (pat);
11424 return target;
11425}
11426
11427/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
11428 sqrtss, rsqrtss, rcpss. */
11429
11430static rtx
11431ix86_expand_unop1_builtin (icode, arglist, target)
11432 enum insn_code icode;
11433 tree arglist;
11434 rtx target;
11435{
11436 rtx pat;
11437 tree arg0 = TREE_VALUE (arglist);
11438 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11439 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11440 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11441
11442 if (! target
11443 || GET_MODE (target) != tmode
11444 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11445 target = gen_reg_rtx (tmode);
11446
11447 if (VECTOR_MODE_P (mode0))
11448 op0 = safe_vector_operand (op0, mode0);
11449
11450 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11451 op0 = copy_to_mode_reg (mode0, op0);
11452
11453 pat = GEN_FCN (icode) (target, op0, op0);
11454 if (! pat)
11455 return 0;
11456 emit_insn (pat);
11457 return target;
11458}
11459
11460/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
11461
11462static rtx
11463ix86_expand_sse_compare (d, arglist, target)
8b60264b 11464 const struct builtin_description *d;
bd793c65
BS
11465 tree arglist;
11466 rtx target;
11467{
11468 rtx pat;
11469 tree arg0 = TREE_VALUE (arglist);
11470 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11471 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11472 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11473 rtx op2;
11474 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
11475 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
11476 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
11477 enum rtx_code comparison = d->comparison;
11478
11479 if (VECTOR_MODE_P (mode0))
11480 op0 = safe_vector_operand (op0, mode0);
11481 if (VECTOR_MODE_P (mode1))
11482 op1 = safe_vector_operand (op1, mode1);
11483
11484 /* Swap operands if we have a comparison that isn't available in
11485 hardware. */
11486 if (d->flag)
11487 {
21e1b5f1
BS
11488 rtx tmp = gen_reg_rtx (mode1);
11489 emit_move_insn (tmp, op1);
bd793c65 11490 op1 = op0;
21e1b5f1 11491 op0 = tmp;
bd793c65 11492 }
21e1b5f1
BS
11493
11494 if (! target
11495 || GET_MODE (target) != tmode
11496 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
bd793c65
BS
11497 target = gen_reg_rtx (tmode);
11498
11499 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
11500 op0 = copy_to_mode_reg (mode0, op0);
11501 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
11502 op1 = copy_to_mode_reg (mode1, op1);
11503
11504 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
11505 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
11506 if (! pat)
11507 return 0;
11508 emit_insn (pat);
11509 return target;
11510}
11511
11512/* Subroutine of ix86_expand_builtin to take care of comi insns. */
11513
11514static rtx
11515ix86_expand_sse_comi (d, arglist, target)
8b60264b 11516 const struct builtin_description *d;
bd793c65
BS
11517 tree arglist;
11518 rtx target;
11519{
11520 rtx pat;
11521 tree arg0 = TREE_VALUE (arglist);
11522 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11523 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11524 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11525 rtx op2;
11526 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
11527 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
11528 enum rtx_code comparison = d->comparison;
11529
11530 if (VECTOR_MODE_P (mode0))
11531 op0 = safe_vector_operand (op0, mode0);
11532 if (VECTOR_MODE_P (mode1))
11533 op1 = safe_vector_operand (op1, mode1);
11534
11535 /* Swap operands if we have a comparison that isn't available in
11536 hardware. */
11537 if (d->flag)
11538 {
11539 rtx tmp = op1;
11540 op1 = op0;
11541 op0 = tmp;
bd793c65
BS
11542 }
11543
11544 target = gen_reg_rtx (SImode);
11545 emit_move_insn (target, const0_rtx);
11546 target = gen_rtx_SUBREG (QImode, target, 0);
11547
11548 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
11549 op0 = copy_to_mode_reg (mode0, op0);
11550 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
11551 op1 = copy_to_mode_reg (mode1, op1);
11552
11553 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
11554 pat = GEN_FCN (d->icode) (op0, op1, op2);
11555 if (! pat)
11556 return 0;
11557 emit_insn (pat);
29628f27
BS
11558 emit_insn (gen_rtx_SET (VOIDmode,
11559 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
11560 gen_rtx_fmt_ee (comparison, QImode,
11561 gen_rtx_REG (CCmode, FLAGS_REG),
11562 const0_rtx)));
bd793c65 11563
6f1a6c5b 11564 return SUBREG_REG (target);
bd793c65
BS
11565}
11566
11567/* Expand an expression EXP that calls a built-in function,
11568 with result going to TARGET if that's convenient
11569 (and in mode MODE if that's convenient).
11570 SUBTARGET may be used as the target for computing one of EXP's operands.
11571 IGNORE is nonzero if the value is to be ignored. */
11572
11573rtx
11574ix86_expand_builtin (exp, target, subtarget, mode, ignore)
11575 tree exp;
11576 rtx target;
11577 rtx subtarget ATTRIBUTE_UNUSED;
11578 enum machine_mode mode ATTRIBUTE_UNUSED;
11579 int ignore ATTRIBUTE_UNUSED;
11580{
8b60264b 11581 const struct builtin_description *d;
77ebd435 11582 size_t i;
bd793c65
BS
11583 enum insn_code icode;
11584 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
11585 tree arglist = TREE_OPERAND (exp, 1);
e37af218 11586 tree arg0, arg1, arg2;
bd793c65
BS
11587 rtx op0, op1, op2, pat;
11588 enum machine_mode tmode, mode0, mode1, mode2;
8752c357 11589 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
bd793c65
BS
11590
11591 switch (fcode)
11592 {
11593 case IX86_BUILTIN_EMMS:
11594 emit_insn (gen_emms ());
11595 return 0;
11596
11597 case IX86_BUILTIN_SFENCE:
11598 emit_insn (gen_sfence ());
11599 return 0;
11600
bd793c65
BS
11601 case IX86_BUILTIN_PEXTRW:
11602 icode = CODE_FOR_mmx_pextrw;
11603 arg0 = TREE_VALUE (arglist);
11604 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11605 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11606 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11607 tmode = insn_data[icode].operand[0].mode;
11608 mode0 = insn_data[icode].operand[1].mode;
11609 mode1 = insn_data[icode].operand[2].mode;
11610
11611 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11612 op0 = copy_to_mode_reg (mode0, op0);
11613 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11614 {
11615 /* @@@ better error message */
11616 error ("selector must be an immediate");
6f1a6c5b 11617 return gen_reg_rtx (tmode);
bd793c65
BS
11618 }
11619 if (target == 0
11620 || GET_MODE (target) != tmode
11621 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11622 target = gen_reg_rtx (tmode);
11623 pat = GEN_FCN (icode) (target, op0, op1);
11624 if (! pat)
11625 return 0;
11626 emit_insn (pat);
11627 return target;
11628
11629 case IX86_BUILTIN_PINSRW:
11630 icode = CODE_FOR_mmx_pinsrw;
11631 arg0 = TREE_VALUE (arglist);
11632 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11633 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11634 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11635 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11636 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11637 tmode = insn_data[icode].operand[0].mode;
11638 mode0 = insn_data[icode].operand[1].mode;
11639 mode1 = insn_data[icode].operand[2].mode;
11640 mode2 = insn_data[icode].operand[3].mode;
11641
11642 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11643 op0 = copy_to_mode_reg (mode0, op0);
11644 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11645 op1 = copy_to_mode_reg (mode1, op1);
11646 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
11647 {
11648 /* @@@ better error message */
11649 error ("selector must be an immediate");
11650 return const0_rtx;
11651 }
11652 if (target == 0
11653 || GET_MODE (target) != tmode
11654 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11655 target = gen_reg_rtx (tmode);
11656 pat = GEN_FCN (icode) (target, op0, op1, op2);
11657 if (! pat)
11658 return 0;
11659 emit_insn (pat);
11660 return target;
11661
11662 case IX86_BUILTIN_MASKMOVQ:
11663 icode = CODE_FOR_mmx_maskmovq;
11664 /* Note the arg order is different from the operand order. */
11665 arg1 = TREE_VALUE (arglist);
11666 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
11667 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11668 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11669 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11670 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11671 mode0 = insn_data[icode].operand[0].mode;
11672 mode1 = insn_data[icode].operand[1].mode;
11673 mode2 = insn_data[icode].operand[2].mode;
11674
11675 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11676 op0 = copy_to_mode_reg (mode0, op0);
11677 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11678 op1 = copy_to_mode_reg (mode1, op1);
11679 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
11680 op2 = copy_to_mode_reg (mode2, op2);
11681 pat = GEN_FCN (icode) (op0, op1, op2);
11682 if (! pat)
11683 return 0;
11684 emit_insn (pat);
11685 return 0;
11686
11687 case IX86_BUILTIN_SQRTSS:
11688 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
11689 case IX86_BUILTIN_RSQRTSS:
11690 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
11691 case IX86_BUILTIN_RCPSS:
11692 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
11693
e37af218
RH
11694 case IX86_BUILTIN_ANDPS:
11695 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3,
11696 arglist, target);
11697 case IX86_BUILTIN_ANDNPS:
11698 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3,
11699 arglist, target);
11700 case IX86_BUILTIN_ORPS:
11701 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3,
11702 arglist, target);
11703 case IX86_BUILTIN_XORPS:
11704 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3,
11705 arglist, target);
11706
bd793c65
BS
11707 case IX86_BUILTIN_LOADAPS:
11708 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
11709
11710 case IX86_BUILTIN_LOADUPS:
11711 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
11712
11713 case IX86_BUILTIN_STOREAPS:
e37af218 11714 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
bd793c65 11715 case IX86_BUILTIN_STOREUPS:
e37af218 11716 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
bd793c65
BS
11717
11718 case IX86_BUILTIN_LOADSS:
11719 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
11720
11721 case IX86_BUILTIN_STORESS:
e37af218 11722 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
bd793c65 11723
0f290768 11724 case IX86_BUILTIN_LOADHPS:
bd793c65
BS
11725 case IX86_BUILTIN_LOADLPS:
11726 icode = (fcode == IX86_BUILTIN_LOADHPS
11727 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
11728 arg0 = TREE_VALUE (arglist);
11729 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11730 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11731 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11732 tmode = insn_data[icode].operand[0].mode;
11733 mode0 = insn_data[icode].operand[1].mode;
11734 mode1 = insn_data[icode].operand[2].mode;
11735
11736 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11737 op0 = copy_to_mode_reg (mode0, op0);
11738 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
11739 if (target == 0
11740 || GET_MODE (target) != tmode
11741 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11742 target = gen_reg_rtx (tmode);
11743 pat = GEN_FCN (icode) (target, op0, op1);
11744 if (! pat)
11745 return 0;
11746 emit_insn (pat);
11747 return target;
0f290768 11748
bd793c65
BS
11749 case IX86_BUILTIN_STOREHPS:
11750 case IX86_BUILTIN_STORELPS:
11751 icode = (fcode == IX86_BUILTIN_STOREHPS
11752 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
11753 arg0 = TREE_VALUE (arglist);
11754 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11755 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11756 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11757 mode0 = insn_data[icode].operand[1].mode;
11758 mode1 = insn_data[icode].operand[2].mode;
11759
11760 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11761 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11762 op1 = copy_to_mode_reg (mode1, op1);
11763
11764 pat = GEN_FCN (icode) (op0, op0, op1);
11765 if (! pat)
11766 return 0;
11767 emit_insn (pat);
11768 return 0;
11769
11770 case IX86_BUILTIN_MOVNTPS:
e37af218 11771 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
bd793c65 11772 case IX86_BUILTIN_MOVNTQ:
e37af218 11773 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
bd793c65
BS
11774
11775 case IX86_BUILTIN_LDMXCSR:
11776 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
11777 target = assign_386_stack_local (SImode, 0);
11778 emit_move_insn (target, op0);
11779 emit_insn (gen_ldmxcsr (target));
11780 return 0;
11781
11782 case IX86_BUILTIN_STMXCSR:
11783 target = assign_386_stack_local (SImode, 0);
11784 emit_insn (gen_stmxcsr (target));
11785 return copy_to_mode_reg (SImode, target);
11786
bd793c65
BS
11787 case IX86_BUILTIN_SHUFPS:
11788 icode = CODE_FOR_sse_shufps;
11789 arg0 = TREE_VALUE (arglist);
11790 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11791 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11792 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11793 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11794 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11795 tmode = insn_data[icode].operand[0].mode;
11796 mode0 = insn_data[icode].operand[1].mode;
11797 mode1 = insn_data[icode].operand[2].mode;
11798 mode2 = insn_data[icode].operand[3].mode;
11799
11800 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11801 op0 = copy_to_mode_reg (mode0, op0);
11802 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11803 op1 = copy_to_mode_reg (mode1, op1);
11804 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
11805 {
11806 /* @@@ better error message */
11807 error ("mask must be an immediate");
6f1a6c5b 11808 return gen_reg_rtx (tmode);
bd793c65
BS
11809 }
11810 if (target == 0
11811 || GET_MODE (target) != tmode
11812 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11813 target = gen_reg_rtx (tmode);
11814 pat = GEN_FCN (icode) (target, op0, op1, op2);
11815 if (! pat)
11816 return 0;
11817 emit_insn (pat);
11818 return target;
11819
11820 case IX86_BUILTIN_PSHUFW:
11821 icode = CODE_FOR_mmx_pshufw;
11822 arg0 = TREE_VALUE (arglist);
11823 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11824 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11825 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11826 tmode = insn_data[icode].operand[0].mode;
29628f27
BS
11827 mode1 = insn_data[icode].operand[1].mode;
11828 mode2 = insn_data[icode].operand[2].mode;
bd793c65 11829
29628f27
BS
11830 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
11831 op0 = copy_to_mode_reg (mode1, op0);
11832 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
bd793c65
BS
11833 {
11834 /* @@@ better error message */
11835 error ("mask must be an immediate");
11836 return const0_rtx;
11837 }
11838 if (target == 0
11839 || GET_MODE (target) != tmode
11840 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11841 target = gen_reg_rtx (tmode);
29628f27 11842 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
11843 if (! pat)
11844 return 0;
11845 emit_insn (pat);
11846 return target;
11847
47f339cf
BS
11848 case IX86_BUILTIN_FEMMS:
11849 emit_insn (gen_femms ());
11850 return NULL_RTX;
11851
11852 case IX86_BUILTIN_PAVGUSB:
11853 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
11854
11855 case IX86_BUILTIN_PF2ID:
11856 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
11857
11858 case IX86_BUILTIN_PFACC:
11859 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
11860
11861 case IX86_BUILTIN_PFADD:
11862 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
11863
11864 case IX86_BUILTIN_PFCMPEQ:
11865 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
11866
11867 case IX86_BUILTIN_PFCMPGE:
11868 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
11869
11870 case IX86_BUILTIN_PFCMPGT:
11871 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
11872
11873 case IX86_BUILTIN_PFMAX:
11874 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
11875
11876 case IX86_BUILTIN_PFMIN:
11877 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
11878
11879 case IX86_BUILTIN_PFMUL:
11880 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
11881
11882 case IX86_BUILTIN_PFRCP:
11883 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
11884
11885 case IX86_BUILTIN_PFRCPIT1:
11886 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
11887
11888 case IX86_BUILTIN_PFRCPIT2:
11889 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
11890
11891 case IX86_BUILTIN_PFRSQIT1:
11892 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
11893
11894 case IX86_BUILTIN_PFRSQRT:
11895 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
11896
11897 case IX86_BUILTIN_PFSUB:
11898 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
11899
11900 case IX86_BUILTIN_PFSUBR:
11901 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
11902
11903 case IX86_BUILTIN_PI2FD:
11904 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
11905
11906 case IX86_BUILTIN_PMULHRW:
11907 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
11908
47f339cf
BS
11909 case IX86_BUILTIN_PF2IW:
11910 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
11911
11912 case IX86_BUILTIN_PFNACC:
11913 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
11914
11915 case IX86_BUILTIN_PFPNACC:
11916 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
11917
11918 case IX86_BUILTIN_PI2FW:
11919 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
11920
11921 case IX86_BUILTIN_PSWAPDSI:
11922 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
11923
11924 case IX86_BUILTIN_PSWAPDSF:
11925 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
11926
e37af218
RH
11927 case IX86_BUILTIN_SSE_ZERO:
11928 target = gen_reg_rtx (V4SFmode);
11929 emit_insn (gen_sse_clrv4sf (target));
bd793c65
BS
11930 return target;
11931
bd793c65
BS
11932 case IX86_BUILTIN_MMX_ZERO:
11933 target = gen_reg_rtx (DImode);
11934 emit_insn (gen_mmx_clrdi (target));
11935 return target;
11936
11937 default:
11938 break;
11939 }
11940
11941 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
11942 if (d->code == fcode)
11943 {
11944 /* Compares are treated specially. */
11945 if (d->icode == CODE_FOR_maskcmpv4sf3
11946 || d->icode == CODE_FOR_vmmaskcmpv4sf3
11947 || d->icode == CODE_FOR_maskncmpv4sf3
11948 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
11949 return ix86_expand_sse_compare (d, arglist, target);
11950
11951 return ix86_expand_binop_builtin (d->icode, arglist, target);
11952 }
11953
11954 for (i = 0, d = bdesc_1arg; i < sizeof (bdesc_1arg) / sizeof *d; i++, d++)
11955 if (d->code == fcode)
11956 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
0f290768 11957
bd793c65
BS
11958 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
11959 if (d->code == fcode)
11960 return ix86_expand_sse_comi (d, arglist, target);
0f290768 11961
bd793c65
BS
11962 /* @@@ Should really do something sensible here. */
11963 return 0;
bd793c65 11964}
4211a8fb
JH
11965
11966/* Store OPERAND to the memory after reload is completed. This means
f710504c 11967 that we can't easily use assign_stack_local. */
4211a8fb
JH
11968rtx
11969ix86_force_to_memory (mode, operand)
11970 enum machine_mode mode;
11971 rtx operand;
11972{
898d374d 11973 rtx result;
4211a8fb
JH
11974 if (!reload_completed)
11975 abort ();
898d374d
JH
11976 if (TARGET_64BIT && TARGET_RED_ZONE)
11977 {
11978 result = gen_rtx_MEM (mode,
11979 gen_rtx_PLUS (Pmode,
11980 stack_pointer_rtx,
11981 GEN_INT (-RED_ZONE_SIZE)));
11982 emit_move_insn (result, operand);
11983 }
11984 else if (TARGET_64BIT && !TARGET_RED_ZONE)
4211a8fb 11985 {
898d374d 11986 switch (mode)
4211a8fb 11987 {
898d374d
JH
11988 case HImode:
11989 case SImode:
11990 operand = gen_lowpart (DImode, operand);
11991 /* FALLTHRU */
11992 case DImode:
4211a8fb 11993 emit_insn (
898d374d
JH
11994 gen_rtx_SET (VOIDmode,
11995 gen_rtx_MEM (DImode,
11996 gen_rtx_PRE_DEC (DImode,
11997 stack_pointer_rtx)),
11998 operand));
11999 break;
12000 default:
12001 abort ();
12002 }
12003 result = gen_rtx_MEM (mode, stack_pointer_rtx);
12004 }
12005 else
12006 {
12007 switch (mode)
12008 {
12009 case DImode:
12010 {
12011 rtx operands[2];
12012 split_di (&operand, 1, operands, operands + 1);
12013 emit_insn (
12014 gen_rtx_SET (VOIDmode,
12015 gen_rtx_MEM (SImode,
12016 gen_rtx_PRE_DEC (Pmode,
12017 stack_pointer_rtx)),
12018 operands[1]));
12019 emit_insn (
12020 gen_rtx_SET (VOIDmode,
12021 gen_rtx_MEM (SImode,
12022 gen_rtx_PRE_DEC (Pmode,
12023 stack_pointer_rtx)),
12024 operands[0]));
12025 }
12026 break;
12027 case HImode:
12028 /* It is better to store HImodes as SImodes. */
12029 if (!TARGET_PARTIAL_REG_STALL)
12030 operand = gen_lowpart (SImode, operand);
12031 /* FALLTHRU */
12032 case SImode:
4211a8fb 12033 emit_insn (
898d374d
JH
12034 gen_rtx_SET (VOIDmode,
12035 gen_rtx_MEM (GET_MODE (operand),
12036 gen_rtx_PRE_DEC (SImode,
12037 stack_pointer_rtx)),
12038 operand));
12039 break;
12040 default:
12041 abort ();
4211a8fb 12042 }
898d374d 12043 result = gen_rtx_MEM (mode, stack_pointer_rtx);
4211a8fb 12044 }
898d374d 12045 return result;
4211a8fb
JH
12046}
12047
12048/* Free operand from the memory. */
12049void
12050ix86_free_from_memory (mode)
12051 enum machine_mode mode;
12052{
898d374d
JH
12053 if (!TARGET_64BIT || !TARGET_RED_ZONE)
12054 {
12055 int size;
12056
12057 if (mode == DImode || TARGET_64BIT)
12058 size = 8;
12059 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
12060 size = 2;
12061 else
12062 size = 4;
12063 /* Use LEA to deallocate stack space. In peephole2 it will be converted
12064 to pop or add instruction if registers are available. */
12065 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12066 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12067 GEN_INT (size))));
12068 }
4211a8fb 12069}
a946dd00 12070
f84aa48a
JH
12071/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
12072 QImode must go into class Q_REGS.
12073 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
892a2d68 12074 movdf to do mem-to-mem moves through integer regs. */
f84aa48a
JH
12075enum reg_class
12076ix86_preferred_reload_class (x, class)
12077 rtx x;
12078 enum reg_class class;
12079{
12080 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
12081 {
12082 /* SSE can't load any constant directly yet. */
12083 if (SSE_CLASS_P (class))
12084 return NO_REGS;
12085 /* Floats can load 0 and 1. */
12086 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
12087 {
12088 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
12089 if (MAYBE_SSE_CLASS_P (class))
12090 return (reg_class_subset_p (class, GENERAL_REGS)
12091 ? GENERAL_REGS : FLOAT_REGS);
12092 else
12093 return class;
12094 }
12095 /* General regs can load everything. */
12096 if (reg_class_subset_p (class, GENERAL_REGS))
12097 return GENERAL_REGS;
12098 /* In case we haven't resolved FLOAT or SSE yet, give up. */
12099 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
12100 return NO_REGS;
12101 }
12102 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
12103 return NO_REGS;
12104 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
12105 return Q_REGS;
12106 return class;
12107}
12108
12109/* If we are copying between general and FP registers, we need a memory
12110 location. The same is true for SSE and MMX registers.
12111
12112 The macro can't work reliably when one of the CLASSES is class containing
12113 registers from multiple units (SSE, MMX, integer). We avoid this by never
12114 combining those units in single alternative in the machine description.
12115 Ensure that this constraint holds to avoid unexpected surprises.
12116
12117 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
12118 enforce these sanity checks. */
12119int
12120ix86_secondary_memory_needed (class1, class2, mode, strict)
12121 enum reg_class class1, class2;
12122 enum machine_mode mode;
12123 int strict;
12124{
12125 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
12126 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
12127 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
12128 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
12129 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
12130 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
12131 {
12132 if (strict)
12133 abort ();
12134 else
12135 return 1;
12136 }
12137 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
12138 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
12139 && (mode) != SImode)
12140 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
12141 && (mode) != SImode));
12142}
12143/* Return the cost of moving data from a register in class CLASS1 to
a4f31c00 12144 one in class CLASS2.
f84aa48a
JH
12145
12146 It is not required that the cost always equal 2 when FROM is the same as TO;
12147 on some machines it is expensive to move between registers if they are not
12148 general registers. */
12149int
12150ix86_register_move_cost (mode, class1, class2)
12151 enum machine_mode mode;
12152 enum reg_class class1, class2;
12153{
12154 /* In case we require secondary memory, compute cost of the store followed
12155 by load. In case of copying from general_purpose_register we may emit
12156 multiple stores followed by single load causing memory size mismatch
12157 stall. Count this as arbitarily high cost of 20. */
12158 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
12159 {
92d0fb09 12160 int add_cost = 0;
62415523 12161 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
92d0fb09 12162 add_cost = 20;
62415523 12163 return (MEMORY_MOVE_COST (mode, class1, 0)
92d0fb09 12164 + MEMORY_MOVE_COST (mode, class2, 1) + add_cost);
f84aa48a 12165 }
92d0fb09 12166 /* Moves between SSE/MMX and integer unit are expensive. */
62415523
JH
12167 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
12168 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
fa79946e
JH
12169 return ix86_cost->mmxsse_to_integer;
12170 if (MAYBE_FLOAT_CLASS_P (class1))
12171 return ix86_cost->fp_move;
12172 if (MAYBE_SSE_CLASS_P (class1))
12173 return ix86_cost->sse_move;
12174 if (MAYBE_MMX_CLASS_P (class1))
12175 return ix86_cost->mmx_move;
f84aa48a
JH
12176 return 2;
12177}
12178
a946dd00
JH
12179/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
12180int
12181ix86_hard_regno_mode_ok (regno, mode)
12182 int regno;
12183 enum machine_mode mode;
12184{
12185 /* Flags and only flags can only hold CCmode values. */
12186 if (CC_REGNO_P (regno))
12187 return GET_MODE_CLASS (mode) == MODE_CC;
12188 if (GET_MODE_CLASS (mode) == MODE_CC
12189 || GET_MODE_CLASS (mode) == MODE_RANDOM
12190 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
12191 return 0;
12192 if (FP_REGNO_P (regno))
12193 return VALID_FP_MODE_P (mode);
12194 if (SSE_REGNO_P (regno))
12195 return VALID_SSE_REG_MODE (mode);
12196 if (MMX_REGNO_P (regno))
47f339cf 12197 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
a946dd00
JH
12198 /* We handle both integer and floats in the general purpose registers.
12199 In future we should be able to handle vector modes as well. */
12200 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
12201 return 0;
12202 /* Take care for QImode values - they can be in non-QI regs, but then
12203 they do cause partial register stalls. */
d2836273 12204 if (regno < 4 || mode != QImode || TARGET_64BIT)
a946dd00
JH
12205 return 1;
12206 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
12207}
fa79946e
JH
12208
12209/* Return the cost of moving data of mode M between a
12210 register and memory. A value of 2 is the default; this cost is
12211 relative to those in `REGISTER_MOVE_COST'.
12212
12213 If moving between registers and memory is more expensive than
12214 between two registers, you should define this macro to express the
a4f31c00
AJ
12215 relative cost.
12216
fa79946e
JH
12217 Model also increased moving costs of QImode registers in non
12218 Q_REGS classes.
12219 */
12220int
12221ix86_memory_move_cost (mode, class, in)
12222 enum machine_mode mode;
12223 enum reg_class class;
12224 int in;
12225{
12226 if (FLOAT_CLASS_P (class))
12227 {
12228 int index;
12229 switch (mode)
12230 {
12231 case SFmode:
12232 index = 0;
12233 break;
12234 case DFmode:
12235 index = 1;
12236 break;
12237 case XFmode:
12238 case TFmode:
12239 index = 2;
12240 break;
12241 default:
12242 return 100;
12243 }
12244 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
12245 }
12246 if (SSE_CLASS_P (class))
12247 {
12248 int index;
12249 switch (GET_MODE_SIZE (mode))
12250 {
12251 case 4:
12252 index = 0;
12253 break;
12254 case 8:
12255 index = 1;
12256 break;
12257 case 16:
12258 index = 2;
12259 break;
12260 default:
12261 return 100;
12262 }
12263 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
12264 }
12265 if (MMX_CLASS_P (class))
12266 {
12267 int index;
12268 switch (GET_MODE_SIZE (mode))
12269 {
12270 case 4:
12271 index = 0;
12272 break;
12273 case 8:
12274 index = 1;
12275 break;
12276 default:
12277 return 100;
12278 }
12279 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
12280 }
12281 switch (GET_MODE_SIZE (mode))
12282 {
12283 case 1:
12284 if (in)
12285 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
12286 : ix86_cost->movzbl_load);
12287 else
12288 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
12289 : ix86_cost->int_store[0] + 4);
12290 break;
12291 case 2:
12292 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
12293 default:
12294 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
12295 if (mode == TFmode)
12296 mode = XFmode;
3bb7e126 12297 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
fa79946e
JH
12298 * (int) GET_MODE_SIZE (mode) / 4);
12299 }
12300}
0ecf09f9 12301
2cc07db4
RH
12302#ifdef DO_GLOBAL_CTORS_BODY
12303static void
12304ix86_svr3_asm_out_constructor (symbol, priority)
12305 rtx symbol;
12306 int priority ATTRIBUTE_UNUSED;
12307{
12308 init_section ();
12309 fputs ("\tpushl $", asm_out_file);
12310 assemble_name (asm_out_file, XSTR (symbol, 0));
12311 fputc ('\n', asm_out_file);
12312}
12313#endif
This page took 3.029258 seconds and 5 git commands to generate.