]> gcc.gnu.org Git - gcc.git/blame - gcc/config/i386/i386.c
Daily bump.
[gcc.git] / gcc / config / i386 / i386.c
CommitLineData
e075ae69 1/* Subroutines used for code generation on IA-32.
07933f72
GS
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
2a2ab3f9
JVA
4
5This file is part of GNU CC.
6
7GNU CC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GNU CC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU CC; see the file COPYING. If not, write to
97aadbb9 19the Free Software Foundation, 59 Temple Place - Suite 330,
0f290768 20Boston, MA 02111-1307, USA. */
2a2ab3f9 21
2a2ab3f9 22#include "config.h"
bb5177ac 23#include "system.h"
2a2ab3f9 24#include "rtl.h"
6baf1cc8
BS
25#include "tree.h"
26#include "tm_p.h"
2a2ab3f9
JVA
27#include "regs.h"
28#include "hard-reg-set.h"
29#include "real.h"
30#include "insn-config.h"
31#include "conditions.h"
2a2ab3f9
JVA
32#include "output.h"
33#include "insn-attr.h"
2a2ab3f9 34#include "flags.h"
a8ffcc81 35#include "except.h"
ecbc4695 36#include "function.h"
00c79232 37#include "recog.h"
ced8dd8c 38#include "expr.h"
e78d8e51 39#include "optabs.h"
f103890b 40#include "toplev.h"
e075ae69 41#include "basic-block.h"
1526a060 42#include "ggc.h"
672a6f42
NB
43#include "target.h"
44#include "target-def.h"
f1e639b1 45#include "langhooks.h"
2a2ab3f9 46
02263a8a 47static int ia32_use_dfa_pipeline_interface PARAMS ((void));
16133d00 48static int ia32_multipass_dfa_lookahead PARAMS ((void));
02263a8a
JH
49
50#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
51#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE ia32_use_dfa_pipeline_interface
765b8f90
JL
52#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
53#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia32_multipass_dfa_lookahead
02263a8a
JH
54
55static int
56ia32_use_dfa_pipeline_interface ()
57{
58 if (ix86_cpu == PROCESSOR_PENTIUM)
59 return 1;
60 return 0;
61}
62
765b8f90
JL
63/* How many alternative schedules to try. This should be as wide as the
64 scheduling freedom in the DFA, but no wider. Making this value too
65 large results extra work for the scheduler. */
66
67static int
68ia32_multipass_dfa_lookahead ()
69{
70 if (ix86_cpu == PROCESSOR_PENTIUM)
71 return 2;
72 else
73 return 0;
74}
75
8dfe5673 76#ifndef CHECK_STACK_LIMIT
07933f72 77#define CHECK_STACK_LIMIT (-1)
8dfe5673
RK
78#endif
79
2ab0437e 80/* Processor costs (relative to an add) */
8b60264b 81static const
2ab0437e
JH
82struct processor_costs size_cost = { /* costs for tunning for size */
83 2, /* cost of an add instruction */
84 3, /* cost of a lea instruction */
85 2, /* variable shift costs */
86 3, /* constant shift costs */
87 3, /* cost of starting a multiply */
88 0, /* cost of multiply per each bit set */
89 3, /* cost of a divide/mod */
44cf5b6a
JH
90 3, /* cost of movsx */
91 3, /* cost of movzx */
2ab0437e
JH
92 0, /* "large" insn */
93 2, /* MOVE_RATIO */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of loading integer registers */
103 3, /* cost of moving MMX register */
104 {3, 3}, /* cost of loading MMX registers
105 in SImode and DImode */
106 {3, 3}, /* cost of storing MMX registers
107 in SImode and DImode */
108 3, /* cost of moving SSE register */
109 {3, 3, 3}, /* cost of loading SSE registers
110 in SImode, DImode and TImode */
111 {3, 3, 3}, /* cost of storing SSE registers
112 in SImode, DImode and TImode */
113 3, /* MMX or SSE register to integer */
f4365627
JH
114 0, /* size of prefetch block */
115 0, /* number of parallel prefetches */
2ab0437e 116};
32b5b1aa 117/* Processor costs (relative to an add) */
8b60264b 118static const
32b5b1aa 119struct processor_costs i386_cost = { /* 386 specific costs */
e9a25f70 120 1, /* cost of an add instruction */
32b5b1aa
SC
121 1, /* cost of a lea instruction */
122 3, /* variable shift costs */
123 2, /* constant shift costs */
124 6, /* cost of starting a multiply */
125 1, /* cost of multiply per each bit set */
e075ae69 126 23, /* cost of a divide/mod */
44cf5b6a
JH
127 3, /* cost of movsx */
128 2, /* cost of movzx */
96e7ae40 129 15, /* "large" insn */
e2e52e1b 130 3, /* MOVE_RATIO */
7c6b971d 131 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
132 {2, 4, 2}, /* cost of loading integer registers
133 in QImode, HImode and SImode.
0f290768 134 Relative to reg-reg move (2). */
96e7ae40
JH
135 {2, 4, 2}, /* cost of storing integer registers */
136 2, /* cost of reg,reg fld/fst */
137 {8, 8, 8}, /* cost of loading fp registers
138 in SFmode, DFmode and XFmode */
fa79946e
JH
139 {8, 8, 8}, /* cost of loading integer registers */
140 2, /* cost of moving MMX register */
141 {4, 8}, /* cost of loading MMX registers
142 in SImode and DImode */
143 {4, 8}, /* cost of storing MMX registers
144 in SImode and DImode */
145 2, /* cost of moving SSE register */
146 {4, 8, 16}, /* cost of loading SSE registers
147 in SImode, DImode and TImode */
148 {4, 8, 16}, /* cost of storing SSE registers
149 in SImode, DImode and TImode */
150 3, /* MMX or SSE register to integer */
f4365627
JH
151 0, /* size of prefetch block */
152 0, /* number of parallel prefetches */
32b5b1aa
SC
153};
154
8b60264b 155static const
32b5b1aa
SC
156struct processor_costs i486_cost = { /* 486 specific costs */
157 1, /* cost of an add instruction */
158 1, /* cost of a lea instruction */
159 3, /* variable shift costs */
160 2, /* constant shift costs */
161 12, /* cost of starting a multiply */
162 1, /* cost of multiply per each bit set */
e075ae69 163 40, /* cost of a divide/mod */
44cf5b6a
JH
164 3, /* cost of movsx */
165 2, /* cost of movzx */
96e7ae40 166 15, /* "large" insn */
e2e52e1b 167 3, /* MOVE_RATIO */
7c6b971d 168 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
169 {2, 4, 2}, /* cost of loading integer registers
170 in QImode, HImode and SImode.
0f290768 171 Relative to reg-reg move (2). */
96e7ae40
JH
172 {2, 4, 2}, /* cost of storing integer registers */
173 2, /* cost of reg,reg fld/fst */
174 {8, 8, 8}, /* cost of loading fp registers
175 in SFmode, DFmode and XFmode */
fa79946e
JH
176 {8, 8, 8}, /* cost of loading integer registers */
177 2, /* cost of moving MMX register */
178 {4, 8}, /* cost of loading MMX registers
179 in SImode and DImode */
180 {4, 8}, /* cost of storing MMX registers
181 in SImode and DImode */
182 2, /* cost of moving SSE register */
183 {4, 8, 16}, /* cost of loading SSE registers
184 in SImode, DImode and TImode */
185 {4, 8, 16}, /* cost of storing SSE registers
186 in SImode, DImode and TImode */
f4365627
JH
187 3, /* MMX or SSE register to integer */
188 0, /* size of prefetch block */
189 0, /* number of parallel prefetches */
32b5b1aa
SC
190};
191
8b60264b 192static const
e5cb57e8 193struct processor_costs pentium_cost = {
32b5b1aa
SC
194 1, /* cost of an add instruction */
195 1, /* cost of a lea instruction */
856b07a1 196 4, /* variable shift costs */
e5cb57e8 197 1, /* constant shift costs */
856b07a1
SC
198 11, /* cost of starting a multiply */
199 0, /* cost of multiply per each bit set */
e075ae69 200 25, /* cost of a divide/mod */
44cf5b6a
JH
201 3, /* cost of movsx */
202 2, /* cost of movzx */
96e7ae40 203 8, /* "large" insn */
e2e52e1b 204 6, /* MOVE_RATIO */
7c6b971d 205 6, /* cost for loading QImode using movzbl */
96e7ae40
JH
206 {2, 4, 2}, /* cost of loading integer registers
207 in QImode, HImode and SImode.
0f290768 208 Relative to reg-reg move (2). */
96e7ae40
JH
209 {2, 4, 2}, /* cost of storing integer registers */
210 2, /* cost of reg,reg fld/fst */
211 {2, 2, 6}, /* cost of loading fp registers
212 in SFmode, DFmode and XFmode */
fa79946e
JH
213 {4, 4, 6}, /* cost of loading integer registers */
214 8, /* cost of moving MMX register */
215 {8, 8}, /* cost of loading MMX registers
216 in SImode and DImode */
217 {8, 8}, /* cost of storing MMX registers
218 in SImode and DImode */
219 2, /* cost of moving SSE register */
220 {4, 8, 16}, /* cost of loading SSE registers
221 in SImode, DImode and TImode */
222 {4, 8, 16}, /* cost of storing SSE registers
223 in SImode, DImode and TImode */
f4365627
JH
224 3, /* MMX or SSE register to integer */
225 0, /* size of prefetch block */
226 0, /* number of parallel prefetches */
32b5b1aa
SC
227};
228
8b60264b 229static const
856b07a1
SC
230struct processor_costs pentiumpro_cost = {
231 1, /* cost of an add instruction */
232 1, /* cost of a lea instruction */
e075ae69 233 1, /* variable shift costs */
856b07a1 234 1, /* constant shift costs */
369e59b1 235 4, /* cost of starting a multiply */
856b07a1 236 0, /* cost of multiply per each bit set */
e075ae69 237 17, /* cost of a divide/mod */
44cf5b6a
JH
238 1, /* cost of movsx */
239 1, /* cost of movzx */
96e7ae40 240 8, /* "large" insn */
e2e52e1b 241 6, /* MOVE_RATIO */
7c6b971d 242 2, /* cost for loading QImode using movzbl */
96e7ae40
JH
243 {4, 4, 4}, /* cost of loading integer registers
244 in QImode, HImode and SImode.
0f290768 245 Relative to reg-reg move (2). */
96e7ae40
JH
246 {2, 2, 2}, /* cost of storing integer registers */
247 2, /* cost of reg,reg fld/fst */
248 {2, 2, 6}, /* cost of loading fp registers
249 in SFmode, DFmode and XFmode */
fa79946e
JH
250 {4, 4, 6}, /* cost of loading integer registers */
251 2, /* cost of moving MMX register */
252 {2, 2}, /* cost of loading MMX registers
253 in SImode and DImode */
254 {2, 2}, /* cost of storing MMX registers
255 in SImode and DImode */
256 2, /* cost of moving SSE register */
257 {2, 2, 8}, /* cost of loading SSE registers
258 in SImode, DImode and TImode */
259 {2, 2, 8}, /* cost of storing SSE registers
260 in SImode, DImode and TImode */
f4365627
JH
261 3, /* MMX or SSE register to integer */
262 32, /* size of prefetch block */
263 6, /* number of parallel prefetches */
856b07a1
SC
264};
265
8b60264b 266static const
a269a03c
JC
267struct processor_costs k6_cost = {
268 1, /* cost of an add instruction */
e075ae69 269 2, /* cost of a lea instruction */
a269a03c
JC
270 1, /* variable shift costs */
271 1, /* constant shift costs */
73fe76e4 272 3, /* cost of starting a multiply */
a269a03c 273 0, /* cost of multiply per each bit set */
e075ae69 274 18, /* cost of a divide/mod */
44cf5b6a
JH
275 2, /* cost of movsx */
276 2, /* cost of movzx */
96e7ae40 277 8, /* "large" insn */
e2e52e1b 278 4, /* MOVE_RATIO */
7c6b971d 279 3, /* cost for loading QImode using movzbl */
96e7ae40
JH
280 {4, 5, 4}, /* cost of loading integer registers
281 in QImode, HImode and SImode.
0f290768 282 Relative to reg-reg move (2). */
96e7ae40
JH
283 {2, 3, 2}, /* cost of storing integer registers */
284 4, /* cost of reg,reg fld/fst */
285 {6, 6, 6}, /* cost of loading fp registers
286 in SFmode, DFmode and XFmode */
fa79946e
JH
287 {4, 4, 4}, /* cost of loading integer registers */
288 2, /* cost of moving MMX register */
289 {2, 2}, /* cost of loading MMX registers
290 in SImode and DImode */
291 {2, 2}, /* cost of storing MMX registers
292 in SImode and DImode */
293 2, /* cost of moving SSE register */
294 {2, 2, 8}, /* cost of loading SSE registers
295 in SImode, DImode and TImode */
296 {2, 2, 8}, /* cost of storing SSE registers
297 in SImode, DImode and TImode */
f4365627
JH
298 6, /* MMX or SSE register to integer */
299 32, /* size of prefetch block */
300 1, /* number of parallel prefetches */
a269a03c
JC
301};
302
8b60264b 303static const
309ada50
JH
304struct processor_costs athlon_cost = {
305 1, /* cost of an add instruction */
0b5107cf 306 2, /* cost of a lea instruction */
309ada50
JH
307 1, /* variable shift costs */
308 1, /* constant shift costs */
309 5, /* cost of starting a multiply */
310 0, /* cost of multiply per each bit set */
0b5107cf 311 42, /* cost of a divide/mod */
44cf5b6a
JH
312 1, /* cost of movsx */
313 1, /* cost of movzx */
309ada50 314 8, /* "large" insn */
e2e52e1b 315 9, /* MOVE_RATIO */
309ada50
JH
316 4, /* cost for loading QImode using movzbl */
317 {4, 5, 4}, /* cost of loading integer registers
318 in QImode, HImode and SImode.
0f290768 319 Relative to reg-reg move (2). */
309ada50
JH
320 {2, 3, 2}, /* cost of storing integer registers */
321 4, /* cost of reg,reg fld/fst */
0b5107cf 322 {6, 6, 20}, /* cost of loading fp registers
309ada50 323 in SFmode, DFmode and XFmode */
fa79946e
JH
324 {4, 4, 16}, /* cost of loading integer registers */
325 2, /* cost of moving MMX register */
326 {2, 2}, /* cost of loading MMX registers
327 in SImode and DImode */
328 {2, 2}, /* cost of storing MMX registers
329 in SImode and DImode */
330 2, /* cost of moving SSE register */
331 {2, 2, 8}, /* cost of loading SSE registers
332 in SImode, DImode and TImode */
333 {2, 2, 8}, /* cost of storing SSE registers
334 in SImode, DImode and TImode */
f4365627
JH
335 6, /* MMX or SSE register to integer */
336 64, /* size of prefetch block */
337 6, /* number of parallel prefetches */
309ada50
JH
338};
339
8b60264b 340static const
b4e89e2d
JH
341struct processor_costs pentium4_cost = {
342 1, /* cost of an add instruction */
343 1, /* cost of a lea instruction */
344 8, /* variable shift costs */
345 8, /* constant shift costs */
346 30, /* cost of starting a multiply */
347 0, /* cost of multiply per each bit set */
348 112, /* cost of a divide/mod */
44cf5b6a
JH
349 1, /* cost of movsx */
350 1, /* cost of movzx */
b4e89e2d
JH
351 16, /* "large" insn */
352 6, /* MOVE_RATIO */
353 2, /* cost for loading QImode using movzbl */
354 {4, 5, 4}, /* cost of loading integer registers
355 in QImode, HImode and SImode.
356 Relative to reg-reg move (2). */
357 {2, 3, 2}, /* cost of storing integer registers */
358 2, /* cost of reg,reg fld/fst */
359 {2, 2, 6}, /* cost of loading fp registers
360 in SFmode, DFmode and XFmode */
361 {4, 4, 6}, /* cost of loading integer registers */
362 2, /* cost of moving MMX register */
363 {2, 2}, /* cost of loading MMX registers
364 in SImode and DImode */
365 {2, 2}, /* cost of storing MMX registers
366 in SImode and DImode */
367 12, /* cost of moving SSE register */
368 {12, 12, 12}, /* cost of loading SSE registers
369 in SImode, DImode and TImode */
370 {2, 2, 8}, /* cost of storing SSE registers
371 in SImode, DImode and TImode */
372 10, /* MMX or SSE register to integer */
f4365627
JH
373 64, /* size of prefetch block */
374 6, /* number of parallel prefetches */
b4e89e2d
JH
375};
376
8b60264b 377const struct processor_costs *ix86_cost = &pentium_cost;
32b5b1aa 378
a269a03c
JC
379/* Processor feature/optimization bitmasks. */
380#define m_386 (1<<PROCESSOR_I386)
381#define m_486 (1<<PROCESSOR_I486)
382#define m_PENT (1<<PROCESSOR_PENTIUM)
383#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
384#define m_K6 (1<<PROCESSOR_K6)
309ada50 385#define m_ATHLON (1<<PROCESSOR_ATHLON)
b4e89e2d 386#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
a269a03c 387
309ada50 388const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
b4e89e2d 389const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
a269a03c 390const int x86_zero_extend_with_and = m_486 | m_PENT;
b4e89e2d 391const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
e075ae69 392const int x86_double_with_add = ~m_386;
a269a03c 393const int x86_use_bit_test = m_386;
e2e52e1b 394const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
b4e89e2d 395const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
47f339cf 396const int x86_3dnow_a = m_ATHLON;
b4e89e2d 397const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
ef6257cd 398const int x86_branch_hints = m_PENT4;
b4e89e2d 399const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
e075ae69
RH
400const int x86_partial_reg_stall = m_PPRO;
401const int x86_use_loop = m_K6;
309ada50 402const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
e075ae69
RH
403const int x86_use_mov0 = m_K6;
404const int x86_use_cltd = ~(m_PENT | m_K6);
405const int x86_read_modify_write = ~m_PENT;
406const int x86_read_modify = ~(m_PENT | m_PPRO);
407const int x86_split_long_moves = m_PPRO;
e9e80858 408const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
b4e89e2d 409const int x86_single_stringop = m_386 | m_PENT4;
d9f32422
JH
410const int x86_qimode_math = ~(0);
411const int x86_promote_qi_regs = 0;
412const int x86_himode_math = ~(m_PPRO);
413const int x86_promote_hi_regs = m_PPRO;
b4e89e2d
JH
414const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
415const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
416const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
417const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
418const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4);
419const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
420const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
c6036a37
JH
421const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
422const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
423const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
b972dd02 424const int x86_decompose_lea = m_PENT4;
30c99a84 425const int x86_arch_always_fancy_math_387 = m_PENT|m_PPRO|m_ATHLON|m_PENT4;
a269a03c 426
6ab16dd9
JH
427/* In case the avreage insn count for single function invocation is
428 lower than this constant, emit fast (but longer) prologue and
429 epilogue code. */
430#define FAST_PROLOGUE_INSN_COUNT 30
431/* Set by prologue expander and used by epilogue expander to determine
432 the style used. */
433static int use_fast_prologue_epilogue;
434
07933f72 435#define AT_BP(MODE) (gen_rtx_MEM ((MODE), hard_frame_pointer_rtx))
2a2ab3f9 436
83182544
KG
437static const char *const hi_reg_name[] = HI_REGISTER_NAMES; /* names for 16 bit regs */
438static const char *const qi_reg_name[] = QI_REGISTER_NAMES; /* names for 8 bit regs (low) */
439static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; /* names for 8 bit regs (high) */
4c0d89b5
RS
440
441/* Array of the smallest class containing reg number REGNO, indexed by
0f290768 442 REGNO. Used by REGNO_REG_CLASS in i386.h. */
4c0d89b5 443
e075ae69 444enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
4c0d89b5
RS
445{
446 /* ax, dx, cx, bx */
ab408a86 447 AREG, DREG, CREG, BREG,
4c0d89b5 448 /* si, di, bp, sp */
e075ae69 449 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
4c0d89b5
RS
450 /* FP registers */
451 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
79325812 452 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
4c0d89b5 453 /* arg pointer */
83774849 454 NON_Q_REGS,
564d80f4 455 /* flags, fpsr, dirflag, frame */
a7180f70
BS
456 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
457 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
458 SSE_REGS, SSE_REGS,
459 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
3d117b30
JH
460 MMX_REGS, MMX_REGS,
461 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
462 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
463 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
464 SSE_REGS, SSE_REGS,
4c0d89b5 465};
c572e5ba 466
3d117b30 467/* The "default" register map used in 32bit mode. */
83774849 468
0f290768 469int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
470{
471 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
472 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
3d117b30 473 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
474 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
475 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
3d117b30
JH
476 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
477 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
478};
479
07933f72 480static int const x86_64_int_parameter_registers[6] = {5 /*RDI*/, 4 /*RSI*/,
53c17031
JH
481 1 /*RDX*/, 2 /*RCX*/,
482 FIRST_REX_INT_REG /*R8 */,
483 FIRST_REX_INT_REG + 1 /*R9 */};
07933f72 484static int const x86_64_int_return_registers[4] = {0 /*RAX*/, 1 /*RDI*/, 5, 4};
53c17031 485
0f7fa3d0
JH
486/* The "default" register map used in 64bit mode. */
487int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
488{
489 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
3de72741 490 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
0f7fa3d0
JH
491 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
492 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
493 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
494 8,9,10,11,12,13,14,15, /* extended integer registers */
495 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
496};
497
83774849
RH
498/* Define the register numbers to be used in Dwarf debugging information.
499 The SVR4 reference port C compiler uses the following register numbers
500 in its Dwarf output code:
501 0 for %eax (gcc regno = 0)
502 1 for %ecx (gcc regno = 2)
503 2 for %edx (gcc regno = 1)
504 3 for %ebx (gcc regno = 3)
505 4 for %esp (gcc regno = 7)
506 5 for %ebp (gcc regno = 6)
507 6 for %esi (gcc regno = 4)
508 7 for %edi (gcc regno = 5)
509 The following three DWARF register numbers are never generated by
510 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
511 believes these numbers have these meanings.
512 8 for %eip (no gcc equivalent)
513 9 for %eflags (gcc regno = 17)
514 10 for %trapno (no gcc equivalent)
515 It is not at all clear how we should number the FP stack registers
516 for the x86 architecture. If the version of SDB on x86/svr4 were
517 a bit less brain dead with respect to floating-point then we would
518 have a precedent to follow with respect to DWARF register numbers
519 for x86 FP registers, but the SDB on x86/svr4 is so completely
520 broken with respect to FP registers that it is hardly worth thinking
521 of it as something to strive for compatibility with.
522 The version of x86/svr4 SDB I have at the moment does (partially)
523 seem to believe that DWARF register number 11 is associated with
524 the x86 register %st(0), but that's about all. Higher DWARF
525 register numbers don't seem to be associated with anything in
526 particular, and even for DWARF regno 11, SDB only seems to under-
527 stand that it should say that a variable lives in %st(0) (when
528 asked via an `=' command) if we said it was in DWARF regno 11,
529 but SDB still prints garbage when asked for the value of the
530 variable in question (via a `/' command).
531 (Also note that the labels SDB prints for various FP stack regs
532 when doing an `x' command are all wrong.)
533 Note that these problems generally don't affect the native SVR4
534 C compiler because it doesn't allow the use of -O with -g and
535 because when it is *not* optimizing, it allocates a memory
536 location for each floating-point variable, and the memory
537 location is what gets described in the DWARF AT_location
538 attribute for the variable in question.
539 Regardless of the severe mental illness of the x86/svr4 SDB, we
540 do something sensible here and we use the following DWARF
541 register numbers. Note that these are all stack-top-relative
542 numbers.
543 11 for %st(0) (gcc regno = 8)
544 12 for %st(1) (gcc regno = 9)
545 13 for %st(2) (gcc regno = 10)
546 14 for %st(3) (gcc regno = 11)
547 15 for %st(4) (gcc regno = 12)
548 16 for %st(5) (gcc regno = 13)
549 17 for %st(6) (gcc regno = 14)
550 18 for %st(7) (gcc regno = 15)
551*/
0f290768 552int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
553{
554 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
555 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
3f3f2124 556 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
557 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
558 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
3f3f2124
JH
559 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
560 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
83774849
RH
561};
562
c572e5ba
JVA
563/* Test and compare insns in i386.md store the information needed to
564 generate branch and scc insns here. */
565
07933f72
GS
566rtx ix86_compare_op0 = NULL_RTX;
567rtx ix86_compare_op1 = NULL_RTX;
f5316dfe 568
7a2e09f4 569#define MAX_386_STACK_LOCALS 3
8362f420
JH
570/* Size of the register save area. */
571#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
36edd3cc
BS
572
573/* Define the structure for the machine field in struct function. */
574struct machine_function
575{
576 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
8362f420 577 int save_varrargs_registers;
6fca22eb 578 int accesses_prev_frame;
36edd3cc
BS
579};
580
01d939e8 581#define ix86_stack_locals (cfun->machine->stack_locals)
8362f420 582#define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
36edd3cc 583
4dd2ac2c
JH
584/* Structure describing stack frame layout.
585 Stack grows downward:
586
587 [arguments]
588 <- ARG_POINTER
589 saved pc
590
591 saved frame pointer if frame_pointer_needed
592 <- HARD_FRAME_POINTER
593 [saved regs]
594
595 [padding1] \
596 )
597 [va_arg registers] (
598 > to_allocate <- FRAME_POINTER
599 [frame] (
600 )
601 [padding2] /
602 */
603struct ix86_frame
604{
605 int nregs;
606 int padding1;
8362f420 607 int va_arg_size;
4dd2ac2c
JH
608 HOST_WIDE_INT frame;
609 int padding2;
610 int outgoing_arguments_size;
8362f420 611 int red_zone_size;
4dd2ac2c
JH
612
613 HOST_WIDE_INT to_allocate;
614 /* The offsets relative to ARG_POINTER. */
615 HOST_WIDE_INT frame_pointer_offset;
616 HOST_WIDE_INT hard_frame_pointer_offset;
617 HOST_WIDE_INT stack_pointer_offset;
618};
619
c93e80a5
JH
620/* Used to enable/disable debugging features. */
621const char *ix86_debug_arg_string, *ix86_debug_addr_string;
6189a572
JH
622/* Code model option as passed by user. */
623const char *ix86_cmodel_string;
624/* Parsed value. */
625enum cmodel ix86_cmodel;
80f33d06
GS
626/* Asm dialect. */
627const char *ix86_asm_string;
628enum asm_dialect ix86_asm_dialect = ASM_ATT;
6189a572 629
c8c5cb99 630/* which cpu are we scheduling for */
e42ea7f9 631enum processor_type ix86_cpu;
c8c5cb99 632
965f5423
JH
633/* which unit we are generating floating point math for */
634enum fpmath_unit ix86_fpmath;
635
c8c5cb99 636/* which instruction set architecture to use. */
c942177e 637int ix86_arch;
c8c5cb99
SC
638
639/* Strings to hold which cpu and instruction set architecture to use. */
9c23aa47
ZW
640const char *ix86_cpu_string; /* for -mcpu=<xxx> */
641const char *ix86_arch_string; /* for -march=<xxx> */
965f5423 642const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
c8c5cb99 643
0f290768 644/* # of registers to use to pass arguments. */
e075ae69 645const char *ix86_regparm_string;
e9a25f70 646
f4365627
JH
647/* true if sse prefetch instruction is not NOOP. */
648int x86_prefetch_sse;
649
e075ae69
RH
650/* ix86_regparm_string as a number */
651int ix86_regparm;
e9a25f70
JL
652
653/* Alignment to use for loops and jumps: */
654
0f290768 655/* Power of two alignment for loops. */
e075ae69 656const char *ix86_align_loops_string;
e9a25f70 657
0f290768 658/* Power of two alignment for non-loop jumps. */
e075ae69 659const char *ix86_align_jumps_string;
e9a25f70 660
3af4bd89 661/* Power of two alignment for stack boundary in bytes. */
e075ae69 662const char *ix86_preferred_stack_boundary_string;
3af4bd89
JH
663
664/* Preferred alignment for stack boundary in bits. */
e075ae69 665int ix86_preferred_stack_boundary;
3af4bd89 666
e9a25f70 667/* Values 1-5: see jump.c */
e075ae69
RH
668int ix86_branch_cost;
669const char *ix86_branch_cost_string;
e9a25f70 670
0f290768 671/* Power of two alignment for functions. */
e075ae69 672const char *ix86_align_funcs_string;
623fe810
RH
673
674/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
675static char internal_label_prefix[16];
676static int internal_label_prefix_len;
e075ae69 677\f
623fe810 678static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
f6da8bc3
KG
679static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
680static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
e075ae69 681 int, int, FILE *));
f6da8bc3 682static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
3a3677ff
RH
683static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
684 rtx *, rtx *));
f6da8bc3
KG
685static rtx gen_push PARAMS ((rtx));
686static int memory_address_length PARAMS ((rtx addr));
687static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
688static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
689static int ix86_safe_length PARAMS ((rtx));
690static enum attr_memory ix86_safe_memory PARAMS ((rtx));
f6da8bc3
KG
691static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
692static void ix86_dump_ppro_packet PARAMS ((FILE *));
693static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
f6da8bc3
KG
694static void ix86_init_machine_status PARAMS ((struct function *));
695static void ix86_mark_machine_status PARAMS ((struct function *));
37b15744 696static void ix86_free_machine_status PARAMS ((struct function *));
2b589241 697static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
f6da8bc3 698static int ix86_safe_length_prefix PARAMS ((rtx));
b531087a
KH
699static int ix86_nsaved_regs PARAMS ((void));
700static void ix86_emit_save_regs PARAMS ((void));
c6036a37 701static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
37a58036 702static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
0e4970d7 703static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
b531087a 704static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
55efb413 705static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
0945b39d 706static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
0945b39d
JH
707static rtx ix86_expand_aligntest PARAMS ((rtx, int));
708static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
c237e94a
ZW
709static int ix86_issue_rate PARAMS ((void));
710static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
711static void ix86_sched_init PARAMS ((FILE *, int, int));
712static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
713static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
e37af218 714static void ix86_init_mmx_sse_builtins PARAMS ((void));
e075ae69
RH
715
716struct ix86_address
717{
718 rtx base, index, disp;
719 HOST_WIDE_INT scale;
720};
b08de47e 721
e075ae69 722static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
bd793c65
BS
723
724struct builtin_description;
8b60264b
KG
725static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
726 tree, rtx));
727static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
728 tree, rtx));
bd793c65
BS
729static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
730static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
731static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
e37af218
RH
732static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code,
733 tree, rtx));
734static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
bd793c65 735static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
c0c102a9
JH
736static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
737static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
738 enum rtx_code *,
739 enum rtx_code *,
740 enum rtx_code *));
9e7adcb3
JH
741static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
742 rtx *, rtx *));
743static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
744static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
745static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
746static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
37a58036 747static int ix86_save_reg PARAMS ((int, int));
4dd2ac2c 748static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
8d8e52be 749static int ix86_comp_type_attributes PARAMS ((tree, tree));
91d231cb
JM
750const struct attribute_spec ix86_attribute_table[];
751static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
752static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
7c262518 753
2cc07db4
RH
754#ifdef DO_GLOBAL_CTORS_BODY
755static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
756#endif
e56feed6 757
53c17031
JH
758/* Register class used for passing given 64bit part of the argument.
759 These represent classes as documented by the PS ABI, with the exception
760 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
761 use SF or DFmode move instead of DImode to avoid reformating penalties.
762
763 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
764 whenever possible (upper half does contain padding).
765 */
766enum x86_64_reg_class
767 {
768 X86_64_NO_CLASS,
769 X86_64_INTEGER_CLASS,
770 X86_64_INTEGERSI_CLASS,
771 X86_64_SSE_CLASS,
772 X86_64_SSESF_CLASS,
773 X86_64_SSEDF_CLASS,
774 X86_64_SSEUP_CLASS,
775 X86_64_X87_CLASS,
776 X86_64_X87UP_CLASS,
777 X86_64_MEMORY_CLASS
778 };
0b5826ac 779static const char * const x86_64_reg_class_name[] =
53c17031
JH
780 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
781
782#define MAX_CLASSES 4
783static int classify_argument PARAMS ((enum machine_mode, tree,
784 enum x86_64_reg_class [MAX_CLASSES],
785 int));
786static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
787 int *));
788static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
07933f72 789 const int *, int));
53c17031
JH
790static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
791 enum x86_64_reg_class));
672a6f42
NB
792\f
793/* Initialize the GCC target structure. */
91d231cb
JM
794#undef TARGET_ATTRIBUTE_TABLE
795#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
672a6f42 796#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
f5f4be42
NB
797# undef TARGET_MERGE_DECL_ATTRIBUTES
798# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
672a6f42
NB
799#endif
800
8d8e52be
JM
801#undef TARGET_COMP_TYPE_ATTRIBUTES
802#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
803
f6155fda
SS
804#undef TARGET_INIT_BUILTINS
805#define TARGET_INIT_BUILTINS ix86_init_builtins
806
807#undef TARGET_EXPAND_BUILTIN
808#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
809
08c148a8
NB
810#if defined (OSF_OS) || defined (TARGET_OSF1ELF)
811 static void ix86_osf_output_function_prologue PARAMS ((FILE *,
812 HOST_WIDE_INT));
813# undef TARGET_ASM_FUNCTION_PROLOGUE
814# define TARGET_ASM_FUNCTION_PROLOGUE ix86_osf_output_function_prologue
815#endif
816
17b53c33
NB
817#undef TARGET_ASM_OPEN_PAREN
818#define TARGET_ASM_OPEN_PAREN ""
819#undef TARGET_ASM_CLOSE_PAREN
820#define TARGET_ASM_CLOSE_PAREN ""
821
301d03af
RS
822#undef TARGET_ASM_ALIGNED_HI_OP
823#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
824#undef TARGET_ASM_ALIGNED_SI_OP
825#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
826#ifdef ASM_QUAD
827#undef TARGET_ASM_ALIGNED_DI_OP
828#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
829#endif
830
831#undef TARGET_ASM_UNALIGNED_HI_OP
832#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
833#undef TARGET_ASM_UNALIGNED_SI_OP
834#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
835#undef TARGET_ASM_UNALIGNED_DI_OP
836#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
837
c237e94a
ZW
838#undef TARGET_SCHED_ADJUST_COST
839#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
840#undef TARGET_SCHED_ISSUE_RATE
841#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
842#undef TARGET_SCHED_VARIABLE_ISSUE
843#define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
844#undef TARGET_SCHED_INIT
845#define TARGET_SCHED_INIT ix86_sched_init
846#undef TARGET_SCHED_REORDER
847#define TARGET_SCHED_REORDER ix86_sched_reorder
848
f6897b10 849struct gcc_target targetm = TARGET_INITIALIZER;
e075ae69 850\f
f5316dfe
MM
851/* Sometimes certain combinations of command options do not make
852 sense on a particular target machine. You can define a macro
853 `OVERRIDE_OPTIONS' to take account of this. This macro, if
854 defined, is executed once just after all the command options have
855 been parsed.
856
857 Don't use this macro to turn on various extra optimizations for
858 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
859
860void
861override_options ()
862{
400500c4 863 int i;
e075ae69
RH
864 /* Comes from final.c -- no real reason to change it. */
865#define MAX_CODE_ALIGN 16
f5316dfe 866
c8c5cb99
SC
867 static struct ptt
868 {
8b60264b
KG
869 const struct processor_costs *cost; /* Processor costs */
870 const int target_enable; /* Target flags to enable. */
871 const int target_disable; /* Target flags to disable. */
872 const int align_loop; /* Default alignments. */
2cca7283 873 const int align_loop_max_skip;
8b60264b 874 const int align_jump;
2cca7283 875 const int align_jump_max_skip;
8b60264b
KG
876 const int align_func;
877 const int branch_cost;
e075ae69 878 }
0f290768 879 const processor_target_table[PROCESSOR_max] =
e075ae69 880 {
2cca7283
JH
881 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
882 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
883 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
884 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
885 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
886 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
887 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
e075ae69
RH
888 };
889
f4365627 890 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
e075ae69
RH
891 static struct pta
892 {
8b60264b
KG
893 const char *const name; /* processor name or nickname. */
894 const enum processor_type processor;
0dd0e980
JH
895 const enum pta_flags
896 {
897 PTA_SSE = 1,
898 PTA_SSE2 = 2,
899 PTA_MMX = 4,
f4365627 900 PTA_PREFETCH_SSE = 8,
0dd0e980
JH
901 PTA_3DNOW = 16,
902 PTA_3DNOW_A = 64
903 } flags;
e075ae69 904 }
0f290768 905 const processor_alias_table[] =
e075ae69 906 {
0dd0e980
JH
907 {"i386", PROCESSOR_I386, 0},
908 {"i486", PROCESSOR_I486, 0},
909 {"i586", PROCESSOR_PENTIUM, 0},
910 {"pentium", PROCESSOR_PENTIUM, 0},
911 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
912 {"i686", PROCESSOR_PENTIUMPRO, 0},
913 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
914 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
f4365627 915 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
0dd0e980 916 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
f4365627 917 PTA_MMX | PTA_PREFETCH_SSE},
0dd0e980
JH
918 {"k6", PROCESSOR_K6, PTA_MMX},
919 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
920 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
f4365627 921 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 922 | PTA_3DNOW_A},
f4365627 923 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
0dd0e980 924 | PTA_3DNOW | PTA_3DNOW_A},
f4365627 925 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 926 | PTA_3DNOW_A | PTA_SSE},
f4365627 927 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 928 | PTA_3DNOW_A | PTA_SSE},
f4365627 929 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 930 | PTA_3DNOW_A | PTA_SSE},
3af4bd89 931 };
c8c5cb99 932
ca7558fc 933 int const pta_size = ARRAY_SIZE (processor_alias_table);
c8c5cb99 934
f5316dfe
MM
935#ifdef SUBTARGET_OVERRIDE_OPTIONS
936 SUBTARGET_OVERRIDE_OPTIONS;
937#endif
938
f4365627
JH
939 if (!ix86_cpu_string && ix86_arch_string)
940 ix86_cpu_string = ix86_arch_string;
941 if (!ix86_cpu_string)
942 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
943 if (!ix86_arch_string)
944 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
e075ae69 945
6189a572
JH
946 if (ix86_cmodel_string != 0)
947 {
948 if (!strcmp (ix86_cmodel_string, "small"))
949 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
950 else if (flag_pic)
c725bd79 951 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
6189a572
JH
952 else if (!strcmp (ix86_cmodel_string, "32"))
953 ix86_cmodel = CM_32;
954 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
955 ix86_cmodel = CM_KERNEL;
956 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
957 ix86_cmodel = CM_MEDIUM;
958 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
959 ix86_cmodel = CM_LARGE;
960 else
961 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
962 }
963 else
964 {
965 ix86_cmodel = CM_32;
966 if (TARGET_64BIT)
967 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
968 }
c93e80a5
JH
969 if (ix86_asm_string != 0)
970 {
971 if (!strcmp (ix86_asm_string, "intel"))
972 ix86_asm_dialect = ASM_INTEL;
973 else if (!strcmp (ix86_asm_string, "att"))
974 ix86_asm_dialect = ASM_ATT;
975 else
976 error ("bad value (%s) for -masm= switch", ix86_asm_string);
977 }
6189a572 978 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
c725bd79 979 error ("code model `%s' not supported in the %s bit mode",
6189a572
JH
980 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
981 if (ix86_cmodel == CM_LARGE)
c725bd79 982 sorry ("code model `large' not supported yet");
0c2dc519 983 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
c725bd79 984 sorry ("%i-bit mode not compiled in",
0c2dc519 985 (target_flags & MASK_64BIT) ? 64 : 32);
6189a572 986
f4365627
JH
987 for (i = 0; i < pta_size; i++)
988 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
989 {
990 ix86_arch = processor_alias_table[i].processor;
991 /* Default cpu tuning to the architecture. */
992 ix86_cpu = ix86_arch;
993 if (processor_alias_table[i].flags & PTA_MMX
994 && !(target_flags & MASK_MMX_SET))
995 target_flags |= MASK_MMX;
996 if (processor_alias_table[i].flags & PTA_3DNOW
997 && !(target_flags & MASK_3DNOW_SET))
998 target_flags |= MASK_3DNOW;
999 if (processor_alias_table[i].flags & PTA_3DNOW_A
1000 && !(target_flags & MASK_3DNOW_A_SET))
1001 target_flags |= MASK_3DNOW_A;
1002 if (processor_alias_table[i].flags & PTA_SSE
1003 && !(target_flags & MASK_SSE_SET))
1004 target_flags |= MASK_SSE;
1005 if (processor_alias_table[i].flags & PTA_SSE2
1006 && !(target_flags & MASK_SSE2_SET))
1007 target_flags |= MASK_SSE2;
1008 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1009 x86_prefetch_sse = true;
1010 break;
1011 }
400500c4 1012
f4365627
JH
1013 if (i == pta_size)
1014 error ("bad value (%s) for -march= switch", ix86_arch_string);
e075ae69 1015
f4365627
JH
1016 for (i = 0; i < pta_size; i++)
1017 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1018 {
1019 ix86_cpu = processor_alias_table[i].processor;
1020 break;
1021 }
1022 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1023 x86_prefetch_sse = true;
1024 if (i == pta_size)
1025 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
e075ae69 1026
2ab0437e
JH
1027 if (optimize_size)
1028 ix86_cost = &size_cost;
1029 else
1030 ix86_cost = processor_target_table[ix86_cpu].cost;
e075ae69
RH
1031 target_flags |= processor_target_table[ix86_cpu].target_enable;
1032 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1033
36edd3cc
BS
1034 /* Arrange to set up i386_stack_locals for all functions. */
1035 init_machine_status = ix86_init_machine_status;
1526a060 1036 mark_machine_status = ix86_mark_machine_status;
37b15744 1037 free_machine_status = ix86_free_machine_status;
36edd3cc 1038
0f290768 1039 /* Validate -mregparm= value. */
e075ae69 1040 if (ix86_regparm_string)
b08de47e 1041 {
400500c4
RK
1042 i = atoi (ix86_regparm_string);
1043 if (i < 0 || i > REGPARM_MAX)
1044 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1045 else
1046 ix86_regparm = i;
b08de47e 1047 }
0d7d98ee
JH
1048 else
1049 if (TARGET_64BIT)
1050 ix86_regparm = REGPARM_MAX;
b08de47e 1051
3e18fdf6 1052 /* If the user has provided any of the -malign-* options,
a4f31c00 1053 warn and use that value only if -falign-* is not set.
3e18fdf6 1054 Remove this code in GCC 3.2 or later. */
e075ae69 1055 if (ix86_align_loops_string)
b08de47e 1056 {
3e18fdf6
GK
1057 warning ("-malign-loops is obsolete, use -falign-loops");
1058 if (align_loops == 0)
1059 {
1060 i = atoi (ix86_align_loops_string);
1061 if (i < 0 || i > MAX_CODE_ALIGN)
1062 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1063 else
1064 align_loops = 1 << i;
1065 }
b08de47e 1066 }
3af4bd89 1067
e075ae69 1068 if (ix86_align_jumps_string)
b08de47e 1069 {
3e18fdf6
GK
1070 warning ("-malign-jumps is obsolete, use -falign-jumps");
1071 if (align_jumps == 0)
1072 {
1073 i = atoi (ix86_align_jumps_string);
1074 if (i < 0 || i > MAX_CODE_ALIGN)
1075 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1076 else
1077 align_jumps = 1 << i;
1078 }
b08de47e 1079 }
b08de47e 1080
e075ae69 1081 if (ix86_align_funcs_string)
b08de47e 1082 {
3e18fdf6
GK
1083 warning ("-malign-functions is obsolete, use -falign-functions");
1084 if (align_functions == 0)
1085 {
1086 i = atoi (ix86_align_funcs_string);
1087 if (i < 0 || i > MAX_CODE_ALIGN)
1088 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1089 else
1090 align_functions = 1 << i;
1091 }
b08de47e 1092 }
3af4bd89 1093
3e18fdf6 1094 /* Default align_* from the processor table. */
3e18fdf6 1095 if (align_loops == 0)
2cca7283
JH
1096 {
1097 align_loops = processor_target_table[ix86_cpu].align_loop;
1098 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1099 }
3e18fdf6 1100 if (align_jumps == 0)
2cca7283
JH
1101 {
1102 align_jumps = processor_target_table[ix86_cpu].align_jump;
1103 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1104 }
3e18fdf6 1105 if (align_functions == 0)
2cca7283
JH
1106 {
1107 align_functions = processor_target_table[ix86_cpu].align_func;
1108 }
3e18fdf6 1109
e4c0478d 1110 /* Validate -mpreferred-stack-boundary= value, or provide default.
fbb83b43
AO
1111 The default of 128 bits is for Pentium III's SSE __m128, but we
1112 don't want additional code to keep the stack aligned when
1113 optimizing for code size. */
1114 ix86_preferred_stack_boundary = (optimize_size
1115 ? TARGET_64BIT ? 64 : 32
1116 : 128);
e075ae69 1117 if (ix86_preferred_stack_boundary_string)
3af4bd89 1118 {
400500c4 1119 i = atoi (ix86_preferred_stack_boundary_string);
c6257c5d
AO
1120 if (i < (TARGET_64BIT ? 3 : 2) || i > 12)
1121 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
0d7d98ee 1122 TARGET_64BIT ? 3 : 2);
400500c4
RK
1123 else
1124 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3af4bd89 1125 }
77a989d1 1126
0f290768 1127 /* Validate -mbranch-cost= value, or provide default. */
e075ae69
RH
1128 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1129 if (ix86_branch_cost_string)
804a8ee0 1130 {
400500c4
RK
1131 i = atoi (ix86_branch_cost_string);
1132 if (i < 0 || i > 5)
1133 error ("-mbranch-cost=%d is not between 0 and 5", i);
1134 else
1135 ix86_branch_cost = i;
804a8ee0 1136 }
804a8ee0 1137
e9a25f70
JL
1138 /* Keep nonleaf frame pointers. */
1139 if (TARGET_OMIT_LEAF_FRAME_POINTER)
77a989d1 1140 flag_omit_frame_pointer = 1;
e075ae69
RH
1141
1142 /* If we're doing fast math, we don't care about comparison order
1143 wrt NaNs. This lets us use a shorter comparison sequence. */
de6c5979 1144 if (flag_unsafe_math_optimizations)
e075ae69
RH
1145 target_flags &= ~MASK_IEEE_FP;
1146
30c99a84
RH
1147 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1148 since the insns won't need emulation. */
1149 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1150 target_flags &= ~MASK_NO_FANCY_MATH_387;
1151
14f73b5a
JH
1152 if (TARGET_64BIT)
1153 {
1154 if (TARGET_ALIGN_DOUBLE)
c725bd79 1155 error ("-malign-double makes no sense in the 64bit mode");
14f73b5a 1156 if (TARGET_RTD)
c725bd79 1157 error ("-mrtd calling convention not supported in the 64bit mode");
14f73b5a 1158 /* Enable by default the SSE and MMX builtins. */
965f5423
JH
1159 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1160 ix86_fpmath = FPMATH_SSE;
14f73b5a 1161 }
965f5423
JH
1162 else
1163 ix86_fpmath = FPMATH_387;
1164
1165 if (ix86_fpmath_string != 0)
1166 {
1167 if (! strcmp (ix86_fpmath_string, "387"))
1168 ix86_fpmath = FPMATH_387;
1169 else if (! strcmp (ix86_fpmath_string, "sse"))
1170 {
1171 if (!TARGET_SSE)
1172 {
1173 warning ("SSE instruction set disabled, using 387 arithmetics");
1174 ix86_fpmath = FPMATH_387;
1175 }
1176 else
1177 ix86_fpmath = FPMATH_SSE;
1178 }
1179 else if (! strcmp (ix86_fpmath_string, "387,sse")
1180 || ! strcmp (ix86_fpmath_string, "sse,387"))
1181 {
1182 if (!TARGET_SSE)
1183 {
1184 warning ("SSE instruction set disabled, using 387 arithmetics");
1185 ix86_fpmath = FPMATH_387;
1186 }
1187 else if (!TARGET_80387)
1188 {
1189 warning ("387 instruction set disabled, using SSE arithmetics");
1190 ix86_fpmath = FPMATH_SSE;
1191 }
1192 else
1193 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1194 }
1195 else
1196 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1197 }
14f73b5a 1198
a7180f70
BS
1199 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1200 on by -msse. */
1201 if (TARGET_SSE)
e37af218
RH
1202 {
1203 target_flags |= MASK_MMX;
1204 x86_prefetch_sse = true;
1205 }
c6036a37 1206
47f339cf
BS
1207 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1208 if (TARGET_3DNOW)
1209 {
1210 target_flags |= MASK_MMX;
1211 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1212 extensions it adds. */
1213 if (x86_3dnow_a & (1 << ix86_arch))
1214 target_flags |= MASK_3DNOW_A;
1215 }
c6036a37 1216 if ((x86_accumulate_outgoing_args & CPUMASK)
0dd0e980 1217 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS_SET)
c6036a37
JH
1218 && !optimize_size)
1219 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
623fe810
RH
1220
1221 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1222 {
1223 char *p;
1224 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1225 p = strchr (internal_label_prefix, 'X');
1226 internal_label_prefix_len = p - internal_label_prefix;
1227 *p = '\0';
1228 }
f5316dfe
MM
1229}
1230\f
32b5b1aa 1231void
c6aded7c 1232optimization_options (level, size)
32b5b1aa 1233 int level;
bb5177ac 1234 int size ATTRIBUTE_UNUSED;
32b5b1aa 1235{
e9a25f70
JL
1236 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1237 make the problem with not enough registers even worse. */
32b5b1aa
SC
1238#ifdef INSN_SCHEDULING
1239 if (level > 1)
1240 flag_schedule_insns = 0;
1241#endif
53c17031
JH
1242 if (TARGET_64BIT && optimize >= 1)
1243 flag_omit_frame_pointer = 1;
1244 if (TARGET_64BIT)
b932f770
JH
1245 {
1246 flag_pcc_struct_return = 0;
1247 flag_asynchronous_unwind_tables = 1;
1248 }
32b5b1aa 1249}
b08de47e 1250\f
91d231cb
JM
1251/* Table of valid machine attributes. */
1252const struct attribute_spec ix86_attribute_table[] =
b08de47e 1253{
91d231cb 1254 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
b08de47e
MM
1255 /* Stdcall attribute says callee is responsible for popping arguments
1256 if they are not variable. */
91d231cb
JM
1257 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1258 /* Cdecl attribute says the callee is a normal C declaration */
1259 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
b08de47e 1260 /* Regparm attribute specifies how many integer arguments are to be
0f290768 1261 passed in registers. */
91d231cb
JM
1262 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1263#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
3da1eb0b
DS
1264 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1265 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1266 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
91d231cb
JM
1267#endif
1268 { NULL, 0, 0, false, false, false, NULL }
1269};
1270
1271/* Handle a "cdecl" or "stdcall" attribute;
1272 arguments as in struct attribute_spec.handler. */
1273static tree
1274ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1275 tree *node;
1276 tree name;
1277 tree args ATTRIBUTE_UNUSED;
1278 int flags ATTRIBUTE_UNUSED;
1279 bool *no_add_attrs;
1280{
1281 if (TREE_CODE (*node) != FUNCTION_TYPE
1282 && TREE_CODE (*node) != METHOD_TYPE
1283 && TREE_CODE (*node) != FIELD_DECL
1284 && TREE_CODE (*node) != TYPE_DECL)
b08de47e 1285 {
91d231cb
JM
1286 warning ("`%s' attribute only applies to functions",
1287 IDENTIFIER_POINTER (name));
1288 *no_add_attrs = true;
1289 }
b08de47e 1290
91d231cb
JM
1291 if (TARGET_64BIT)
1292 {
1293 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1294 *no_add_attrs = true;
1295 }
b08de47e 1296
91d231cb
JM
1297 return NULL_TREE;
1298}
b08de47e 1299
91d231cb
JM
1300/* Handle a "regparm" attribute;
1301 arguments as in struct attribute_spec.handler. */
1302static tree
1303ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1304 tree *node;
1305 tree name;
1306 tree args;
1307 int flags ATTRIBUTE_UNUSED;
1308 bool *no_add_attrs;
1309{
1310 if (TREE_CODE (*node) != FUNCTION_TYPE
1311 && TREE_CODE (*node) != METHOD_TYPE
1312 && TREE_CODE (*node) != FIELD_DECL
1313 && TREE_CODE (*node) != TYPE_DECL)
1314 {
1315 warning ("`%s' attribute only applies to functions",
1316 IDENTIFIER_POINTER (name));
1317 *no_add_attrs = true;
1318 }
1319 else
1320 {
1321 tree cst;
b08de47e 1322
91d231cb
JM
1323 cst = TREE_VALUE (args);
1324 if (TREE_CODE (cst) != INTEGER_CST)
1325 {
1326 warning ("`%s' attribute requires an integer constant argument",
1327 IDENTIFIER_POINTER (name));
1328 *no_add_attrs = true;
1329 }
1330 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1331 {
1332 warning ("argument to `%s' attribute larger than %d",
1333 IDENTIFIER_POINTER (name), REGPARM_MAX);
1334 *no_add_attrs = true;
1335 }
b08de47e
MM
1336 }
1337
91d231cb 1338 return NULL_TREE;
b08de47e
MM
1339}
1340
08c148a8
NB
1341#if defined (OSF_OS) || defined (TARGET_OSF1ELF)
1342
1343/* Generate the assembly code for function entry. FILE is a stdio
1344 stream to output the code to. SIZE is an int: how many units of
1345 temporary storage to allocate.
1346
1347 Refer to the array `regs_ever_live' to determine which registers to
1348 save; `regs_ever_live[I]' is nonzero if register number I is ever
1349 used in the function. This function is responsible for knowing
1350 which registers should not be saved even if used.
1351
1352 We override it here to allow for the new profiling code to go before
1353 the prologue and the old mcount code to go after the prologue (and
1354 after %ebx has been set up for ELF shared library support). */
1355
1356static void
1357ix86_osf_output_function_prologue (file, size)
1358 FILE *file;
1359 HOST_WIDE_INT size;
1360{
5f37d07c
KG
1361 const char *prefix = "";
1362 const char *const lprefix = LPREFIX;
f6f315fe 1363 int labelno = current_function_profile_label_no;
08c148a8
NB
1364
1365#ifdef OSF_OS
1366
1367 if (TARGET_UNDERSCORES)
1368 prefix = "_";
1369
70f4f91c 1370 if (current_function_profile && OSF_PROFILE_BEFORE_PROLOGUE)
08c148a8
NB
1371 {
1372 if (!flag_pic && !HALF_PIC_P ())
1373 {
1374 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1375 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1376 }
1377
1378 else if (HALF_PIC_P ())
1379 {
1380 rtx symref;
1381
1382 HALF_PIC_EXTERNAL ("_mcount_ptr");
1383 symref = HALF_PIC_PTR (gen_rtx_SYMBOL_REF (Pmode,
1384 "_mcount_ptr"));
1385
1386 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1387 fprintf (file, "\tmovl %s%s,%%eax\n", prefix,
1388 XSTR (symref, 0));
1389 fprintf (file, "\tcall *(%%eax)\n");
1390 }
1391
1392 else
1393 {
1394 static int call_no = 0;
1395
1396 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1397 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1398 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1399 lprefix, call_no++);
1400 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1401 lprefix, labelno);
1402 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1403 prefix);
1404 fprintf (file, "\tcall *(%%eax)\n");
1405 }
1406 }
1407
1408#else /* !OSF_OS */
1409
70f4f91c 1410 if (current_function_profile && OSF_PROFILE_BEFORE_PROLOGUE)
08c148a8
NB
1411 {
1412 if (!flag_pic)
1413 {
1414 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1415 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1416 }
1417
1418 else
1419 {
1420 static int call_no = 0;
1421
1422 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1423 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1424 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1425 lprefix, call_no++);
1426 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1427 lprefix, labelno);
1428 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1429 prefix);
1430 fprintf (file, "\tcall *(%%eax)\n");
1431 }
1432 }
1433#endif /* !OSF_OS */
1434
1435 function_prologue (file, size);
1436}
1437
1438#endif /* OSF_OS || TARGET_OSF1ELF */
1439
b08de47e
MM
1440/* Return 0 if the attributes for two types are incompatible, 1 if they
1441 are compatible, and 2 if they are nearly compatible (which causes a
1442 warning to be generated). */
1443
8d8e52be 1444static int
e075ae69 1445ix86_comp_type_attributes (type1, type2)
afcfe58c
MM
1446 tree type1;
1447 tree type2;
b08de47e 1448{
0f290768 1449 /* Check for mismatch of non-default calling convention. */
27c38fbe 1450 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
afcfe58c
MM
1451
1452 if (TREE_CODE (type1) != FUNCTION_TYPE)
1453 return 1;
1454
1455 /* Check for mismatched return types (cdecl vs stdcall). */
6093f019
RH
1456 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1457 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
afcfe58c 1458 return 0;
b08de47e
MM
1459 return 1;
1460}
b08de47e
MM
1461\f
1462/* Value is the number of bytes of arguments automatically
1463 popped when returning from a subroutine call.
1464 FUNDECL is the declaration node of the function (as a tree),
1465 FUNTYPE is the data type of the function (as a tree),
1466 or for a library call it is an identifier node for the subroutine name.
1467 SIZE is the number of bytes of arguments passed on the stack.
1468
1469 On the 80386, the RTD insn may be used to pop them if the number
1470 of args is fixed, but if the number is variable then the caller
1471 must pop them all. RTD can't be used for library calls now
1472 because the library is compiled with the Unix compiler.
1473 Use of RTD is a selectable option, since it is incompatible with
1474 standard Unix calling sequences. If the option is not selected,
1475 the caller must always pop the args.
1476
1477 The attribute stdcall is equivalent to RTD on a per module basis. */
1478
1479int
e075ae69 1480ix86_return_pops_args (fundecl, funtype, size)
b08de47e
MM
1481 tree fundecl;
1482 tree funtype;
1483 int size;
79325812 1484{
3345ee7d 1485 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
b08de47e 1486
0f290768 1487 /* Cdecl functions override -mrtd, and never pop the stack. */
e9a25f70 1488 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
79325812 1489
0f290768 1490 /* Stdcall functions will pop the stack if not variable args. */
698cdd84
SC
1491 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1492 rtd = 1;
79325812 1493
698cdd84
SC
1494 if (rtd
1495 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
e9a25f70
JL
1496 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1497 == void_type_node)))
698cdd84
SC
1498 return size;
1499 }
79325812 1500
232b8f52 1501 /* Lose any fake structure return argument if it is passed on the stack. */
0d7d98ee
JH
1502 if (aggregate_value_p (TREE_TYPE (funtype))
1503 && !TARGET_64BIT)
232b8f52
JJ
1504 {
1505 int nregs = ix86_regparm;
79325812 1506
232b8f52
JJ
1507 if (funtype)
1508 {
1509 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (funtype));
1510
1511 if (attr)
1512 nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1513 }
1514
1515 if (!nregs)
1516 return GET_MODE_SIZE (Pmode);
1517 }
1518
1519 return 0;
b08de47e 1520}
b08de47e
MM
1521\f
1522/* Argument support functions. */
1523
53c17031
JH
1524/* Return true when register may be used to pass function parameters. */
1525bool
1526ix86_function_arg_regno_p (regno)
1527 int regno;
1528{
1529 int i;
1530 if (!TARGET_64BIT)
0333394e
JJ
1531 return (regno < REGPARM_MAX
1532 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
53c17031
JH
1533 if (SSE_REGNO_P (regno) && TARGET_SSE)
1534 return true;
1535 /* RAX is used as hidden argument to va_arg functions. */
1536 if (!regno)
1537 return true;
1538 for (i = 0; i < REGPARM_MAX; i++)
1539 if (regno == x86_64_int_parameter_registers[i])
1540 return true;
1541 return false;
1542}
1543
b08de47e
MM
1544/* Initialize a variable CUM of type CUMULATIVE_ARGS
1545 for a call to a function whose data type is FNTYPE.
1546 For a library call, FNTYPE is 0. */
1547
1548void
1549init_cumulative_args (cum, fntype, libname)
e9a25f70 1550 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
b08de47e
MM
1551 tree fntype; /* tree ptr for function decl */
1552 rtx libname; /* SYMBOL_REF of library name or 0 */
1553{
1554 static CUMULATIVE_ARGS zero_cum;
1555 tree param, next_param;
1556
1557 if (TARGET_DEBUG_ARG)
1558 {
1559 fprintf (stderr, "\ninit_cumulative_args (");
1560 if (fntype)
e9a25f70
JL
1561 fprintf (stderr, "fntype code = %s, ret code = %s",
1562 tree_code_name[(int) TREE_CODE (fntype)],
1563 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
b08de47e
MM
1564 else
1565 fprintf (stderr, "no fntype");
1566
1567 if (libname)
1568 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1569 }
1570
1571 *cum = zero_cum;
1572
1573 /* Set up the number of registers to use for passing arguments. */
e075ae69 1574 cum->nregs = ix86_regparm;
53c17031
JH
1575 cum->sse_nregs = SSE_REGPARM_MAX;
1576 if (fntype && !TARGET_64BIT)
b08de47e
MM
1577 {
1578 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
e9a25f70 1579
b08de47e
MM
1580 if (attr)
1581 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1582 }
53c17031 1583 cum->maybe_vaarg = false;
b08de47e
MM
1584
1585 /* Determine if this function has variable arguments. This is
1586 indicated by the last argument being 'void_type_mode' if there
1587 are no variable arguments. If there are variable arguments, then
1588 we won't pass anything in registers */
1589
1590 if (cum->nregs)
1591 {
1592 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
e9a25f70 1593 param != 0; param = next_param)
b08de47e
MM
1594 {
1595 next_param = TREE_CHAIN (param);
e9a25f70 1596 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
53c17031
JH
1597 {
1598 if (!TARGET_64BIT)
1599 cum->nregs = 0;
1600 cum->maybe_vaarg = true;
1601 }
b08de47e
MM
1602 }
1603 }
53c17031
JH
1604 if ((!fntype && !libname)
1605 || (fntype && !TYPE_ARG_TYPES (fntype)))
1606 cum->maybe_vaarg = 1;
b08de47e
MM
1607
1608 if (TARGET_DEBUG_ARG)
1609 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1610
1611 return;
1612}
1613
53c17031 1614/* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
f710504c 1615 of this code is to classify each 8bytes of incoming argument by the register
53c17031
JH
1616 class and assign registers accordingly. */
1617
1618/* Return the union class of CLASS1 and CLASS2.
1619 See the x86-64 PS ABI for details. */
1620
1621static enum x86_64_reg_class
1622merge_classes (class1, class2)
1623 enum x86_64_reg_class class1, class2;
1624{
1625 /* Rule #1: If both classes are equal, this is the resulting class. */
1626 if (class1 == class2)
1627 return class1;
1628
1629 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1630 the other class. */
1631 if (class1 == X86_64_NO_CLASS)
1632 return class2;
1633 if (class2 == X86_64_NO_CLASS)
1634 return class1;
1635
1636 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1637 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1638 return X86_64_MEMORY_CLASS;
1639
1640 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1641 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1642 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1643 return X86_64_INTEGERSI_CLASS;
1644 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1645 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1646 return X86_64_INTEGER_CLASS;
1647
1648 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1649 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1650 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1651 return X86_64_MEMORY_CLASS;
1652
1653 /* Rule #6: Otherwise class SSE is used. */
1654 return X86_64_SSE_CLASS;
1655}
1656
1657/* Classify the argument of type TYPE and mode MODE.
1658 CLASSES will be filled by the register class used to pass each word
1659 of the operand. The number of words is returned. In case the parameter
1660 should be passed in memory, 0 is returned. As a special case for zero
1661 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1662
1663 BIT_OFFSET is used internally for handling records and specifies offset
1664 of the offset in bits modulo 256 to avoid overflow cases.
1665
1666 See the x86-64 PS ABI for details.
1667*/
1668
1669static int
1670classify_argument (mode, type, classes, bit_offset)
1671 enum machine_mode mode;
1672 tree type;
1673 enum x86_64_reg_class classes[MAX_CLASSES];
1674 int bit_offset;
1675{
1676 int bytes =
1677 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1678 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1679
1680 if (type && AGGREGATE_TYPE_P (type))
1681 {
1682 int i;
1683 tree field;
1684 enum x86_64_reg_class subclasses[MAX_CLASSES];
1685
1686 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1687 if (bytes > 16)
1688 return 0;
1689
1690 for (i = 0; i < words; i++)
1691 classes[i] = X86_64_NO_CLASS;
1692
1693 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1694 signalize memory class, so handle it as special case. */
1695 if (!words)
1696 {
1697 classes[0] = X86_64_NO_CLASS;
1698 return 1;
1699 }
1700
1701 /* Classify each field of record and merge classes. */
1702 if (TREE_CODE (type) == RECORD_TYPE)
1703 {
1704 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1705 {
1706 if (TREE_CODE (field) == FIELD_DECL)
1707 {
1708 int num;
1709
1710 /* Bitfields are always classified as integer. Handle them
1711 early, since later code would consider them to be
1712 misaligned integers. */
1713 if (DECL_BIT_FIELD (field))
1714 {
1715 for (i = int_bit_position (field) / 8 / 8;
1716 i < (int_bit_position (field)
1717 + tree_low_cst (DECL_SIZE (field), 0)
1718 + 63) / 8 / 8; i++)
1719 classes[i] =
1720 merge_classes (X86_64_INTEGER_CLASS,
1721 classes[i]);
1722 }
1723 else
1724 {
1725 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1726 TREE_TYPE (field), subclasses,
1727 (int_bit_position (field)
1728 + bit_offset) % 256);
1729 if (!num)
1730 return 0;
1731 for (i = 0; i < num; i++)
1732 {
1733 int pos =
1734 (int_bit_position (field) + bit_offset) / 8 / 8;
1735 classes[i + pos] =
1736 merge_classes (subclasses[i], classes[i + pos]);
1737 }
1738 }
1739 }
1740 }
1741 }
1742 /* Arrays are handled as small records. */
1743 else if (TREE_CODE (type) == ARRAY_TYPE)
1744 {
1745 int num;
1746 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1747 TREE_TYPE (type), subclasses, bit_offset);
1748 if (!num)
1749 return 0;
1750
1751 /* The partial classes are now full classes. */
1752 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1753 subclasses[0] = X86_64_SSE_CLASS;
1754 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1755 subclasses[0] = X86_64_INTEGER_CLASS;
1756
1757 for (i = 0; i < words; i++)
1758 classes[i] = subclasses[i % num];
1759 }
1760 /* Unions are similar to RECORD_TYPE but offset is always 0. */
e4dbaed5
AS
1761 else if (TREE_CODE (type) == UNION_TYPE
1762 || TREE_CODE (type) == QUAL_UNION_TYPE)
53c17031
JH
1763 {
1764 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1765 {
1766 if (TREE_CODE (field) == FIELD_DECL)
1767 {
1768 int num;
1769 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1770 TREE_TYPE (field), subclasses,
1771 bit_offset);
1772 if (!num)
1773 return 0;
1774 for (i = 0; i < num; i++)
1775 classes[i] = merge_classes (subclasses[i], classes[i]);
1776 }
1777 }
1778 }
1779 else
1780 abort ();
1781
1782 /* Final merger cleanup. */
1783 for (i = 0; i < words; i++)
1784 {
1785 /* If one class is MEMORY, everything should be passed in
1786 memory. */
1787 if (classes[i] == X86_64_MEMORY_CLASS)
1788 return 0;
1789
d6a7951f 1790 /* The X86_64_SSEUP_CLASS should be always preceded by
53c17031
JH
1791 X86_64_SSE_CLASS. */
1792 if (classes[i] == X86_64_SSEUP_CLASS
1793 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1794 classes[i] = X86_64_SSE_CLASS;
1795
d6a7951f 1796 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
53c17031
JH
1797 if (classes[i] == X86_64_X87UP_CLASS
1798 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1799 classes[i] = X86_64_SSE_CLASS;
1800 }
1801 return words;
1802 }
1803
1804 /* Compute alignment needed. We align all types to natural boundaries with
1805 exception of XFmode that is aligned to 64bits. */
1806 if (mode != VOIDmode && mode != BLKmode)
1807 {
1808 int mode_alignment = GET_MODE_BITSIZE (mode);
1809
1810 if (mode == XFmode)
1811 mode_alignment = 128;
1812 else if (mode == XCmode)
1813 mode_alignment = 256;
f5143c46 1814 /* Misaligned fields are always returned in memory. */
53c17031
JH
1815 if (bit_offset % mode_alignment)
1816 return 0;
1817 }
1818
1819 /* Classification of atomic types. */
1820 switch (mode)
1821 {
1822 case DImode:
1823 case SImode:
1824 case HImode:
1825 case QImode:
1826 case CSImode:
1827 case CHImode:
1828 case CQImode:
1829 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1830 classes[0] = X86_64_INTEGERSI_CLASS;
1831 else
1832 classes[0] = X86_64_INTEGER_CLASS;
1833 return 1;
1834 case CDImode:
1835 case TImode:
1836 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1837 return 2;
1838 case CTImode:
1839 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1840 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1841 return 4;
1842 case SFmode:
1843 if (!(bit_offset % 64))
1844 classes[0] = X86_64_SSESF_CLASS;
1845 else
1846 classes[0] = X86_64_SSE_CLASS;
1847 return 1;
1848 case DFmode:
1849 classes[0] = X86_64_SSEDF_CLASS;
1850 return 1;
1851 case TFmode:
1852 classes[0] = X86_64_X87_CLASS;
1853 classes[1] = X86_64_X87UP_CLASS;
1854 return 2;
1855 case TCmode:
1856 classes[0] = X86_64_X87_CLASS;
1857 classes[1] = X86_64_X87UP_CLASS;
1858 classes[2] = X86_64_X87_CLASS;
1859 classes[3] = X86_64_X87UP_CLASS;
1860 return 4;
1861 case DCmode:
1862 classes[0] = X86_64_SSEDF_CLASS;
1863 classes[1] = X86_64_SSEDF_CLASS;
1864 return 2;
1865 case SCmode:
1866 classes[0] = X86_64_SSE_CLASS;
1867 return 1;
e95d6b23
JH
1868 case V4SFmode:
1869 case V4SImode:
1870 classes[0] = X86_64_SSE_CLASS;
1871 classes[1] = X86_64_SSEUP_CLASS;
1872 return 2;
1873 case V2SFmode:
1874 case V2SImode:
1875 case V4HImode:
1876 case V8QImode:
1877 classes[0] = X86_64_SSE_CLASS;
1878 return 1;
53c17031 1879 case BLKmode:
e95d6b23 1880 case VOIDmode:
53c17031
JH
1881 return 0;
1882 default:
1883 abort ();
1884 }
1885}
1886
1887/* Examine the argument and return set number of register required in each
f5143c46 1888 class. Return 0 iff parameter should be passed in memory. */
53c17031
JH
1889static int
1890examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1891 enum machine_mode mode;
1892 tree type;
1893 int *int_nregs, *sse_nregs;
1894 int in_return;
1895{
1896 enum x86_64_reg_class class[MAX_CLASSES];
1897 int n = classify_argument (mode, type, class, 0);
1898
1899 *int_nregs = 0;
1900 *sse_nregs = 0;
1901 if (!n)
1902 return 0;
1903 for (n--; n >= 0; n--)
1904 switch (class[n])
1905 {
1906 case X86_64_INTEGER_CLASS:
1907 case X86_64_INTEGERSI_CLASS:
1908 (*int_nregs)++;
1909 break;
1910 case X86_64_SSE_CLASS:
1911 case X86_64_SSESF_CLASS:
1912 case X86_64_SSEDF_CLASS:
1913 (*sse_nregs)++;
1914 break;
1915 case X86_64_NO_CLASS:
1916 case X86_64_SSEUP_CLASS:
1917 break;
1918 case X86_64_X87_CLASS:
1919 case X86_64_X87UP_CLASS:
1920 if (!in_return)
1921 return 0;
1922 break;
1923 case X86_64_MEMORY_CLASS:
1924 abort ();
1925 }
1926 return 1;
1927}
1928/* Construct container for the argument used by GCC interface. See
1929 FUNCTION_ARG for the detailed description. */
1930static rtx
1931construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1932 enum machine_mode mode;
1933 tree type;
1934 int in_return;
1935 int nintregs, nsseregs;
07933f72
GS
1936 const int * intreg;
1937 int sse_regno;
53c17031
JH
1938{
1939 enum machine_mode tmpmode;
1940 int bytes =
1941 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1942 enum x86_64_reg_class class[MAX_CLASSES];
1943 int n;
1944 int i;
1945 int nexps = 0;
1946 int needed_sseregs, needed_intregs;
1947 rtx exp[MAX_CLASSES];
1948 rtx ret;
1949
1950 n = classify_argument (mode, type, class, 0);
1951 if (TARGET_DEBUG_ARG)
1952 {
1953 if (!n)
1954 fprintf (stderr, "Memory class\n");
1955 else
1956 {
1957 fprintf (stderr, "Classes:");
1958 for (i = 0; i < n; i++)
1959 {
1960 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
1961 }
1962 fprintf (stderr, "\n");
1963 }
1964 }
1965 if (!n)
1966 return NULL;
1967 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
1968 return NULL;
1969 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
1970 return NULL;
1971
1972 /* First construct simple cases. Avoid SCmode, since we want to use
1973 single register to pass this type. */
1974 if (n == 1 && mode != SCmode)
1975 switch (class[0])
1976 {
1977 case X86_64_INTEGER_CLASS:
1978 case X86_64_INTEGERSI_CLASS:
1979 return gen_rtx_REG (mode, intreg[0]);
1980 case X86_64_SSE_CLASS:
1981 case X86_64_SSESF_CLASS:
1982 case X86_64_SSEDF_CLASS:
1983 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1984 case X86_64_X87_CLASS:
1985 return gen_rtx_REG (mode, FIRST_STACK_REG);
1986 case X86_64_NO_CLASS:
1987 /* Zero sized array, struct or class. */
1988 return NULL;
1989 default:
1990 abort ();
1991 }
1992 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
e95d6b23 1993 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
53c17031
JH
1994 if (n == 2
1995 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
1996 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
1997 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
1998 && class[1] == X86_64_INTEGER_CLASS
1999 && (mode == CDImode || mode == TImode)
2000 && intreg[0] + 1 == intreg[1])
2001 return gen_rtx_REG (mode, intreg[0]);
2002 if (n == 4
2003 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2004 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2005 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2006
2007 /* Otherwise figure out the entries of the PARALLEL. */
2008 for (i = 0; i < n; i++)
2009 {
2010 switch (class[i])
2011 {
2012 case X86_64_NO_CLASS:
2013 break;
2014 case X86_64_INTEGER_CLASS:
2015 case X86_64_INTEGERSI_CLASS:
2016 /* Merge TImodes on aligned occassions here too. */
2017 if (i * 8 + 8 > bytes)
2018 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2019 else if (class[i] == X86_64_INTEGERSI_CLASS)
2020 tmpmode = SImode;
2021 else
2022 tmpmode = DImode;
2023 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2024 if (tmpmode == BLKmode)
2025 tmpmode = DImode;
2026 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2027 gen_rtx_REG (tmpmode, *intreg),
2028 GEN_INT (i*8));
2029 intreg++;
2030 break;
2031 case X86_64_SSESF_CLASS:
2032 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2033 gen_rtx_REG (SFmode,
2034 SSE_REGNO (sse_regno)),
2035 GEN_INT (i*8));
2036 sse_regno++;
2037 break;
2038 case X86_64_SSEDF_CLASS:
2039 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2040 gen_rtx_REG (DFmode,
2041 SSE_REGNO (sse_regno)),
2042 GEN_INT (i*8));
2043 sse_regno++;
2044 break;
2045 case X86_64_SSE_CLASS:
2046 if (i < n && class[i + 1] == X86_64_SSEUP_CLASS)
2047 tmpmode = TImode, i++;
2048 else
2049 tmpmode = DImode;
2050 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2051 gen_rtx_REG (tmpmode,
2052 SSE_REGNO (sse_regno)),
2053 GEN_INT (i*8));
2054 sse_regno++;
2055 break;
2056 default:
2057 abort ();
2058 }
2059 }
2060 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2061 for (i = 0; i < nexps; i++)
2062 XVECEXP (ret, 0, i) = exp [i];
2063 return ret;
2064}
2065
b08de47e
MM
2066/* Update the data in CUM to advance over an argument
2067 of mode MODE and data type TYPE.
2068 (TYPE is null for libcalls where that information may not be available.) */
2069
2070void
2071function_arg_advance (cum, mode, type, named)
2072 CUMULATIVE_ARGS *cum; /* current arg information */
2073 enum machine_mode mode; /* current arg mode */
2074 tree type; /* type of the argument or 0 if lib support */
2075 int named; /* whether or not the argument was named */
2076{
5ac9118e
KG
2077 int bytes =
2078 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
2079 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2080
2081 if (TARGET_DEBUG_ARG)
2082 fprintf (stderr,
e9a25f70 2083 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
b08de47e 2084 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
53c17031 2085 if (TARGET_64BIT)
b08de47e 2086 {
53c17031
JH
2087 int int_nregs, sse_nregs;
2088 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2089 cum->words += words;
2090 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
82a127a9 2091 {
53c17031
JH
2092 cum->nregs -= int_nregs;
2093 cum->sse_nregs -= sse_nregs;
2094 cum->regno += int_nregs;
2095 cum->sse_regno += sse_nregs;
82a127a9 2096 }
53c17031
JH
2097 else
2098 cum->words += words;
b08de47e 2099 }
a4f31c00 2100 else
82a127a9 2101 {
53c17031
JH
2102 if (TARGET_SSE && mode == TImode)
2103 {
2104 cum->sse_words += words;
2105 cum->sse_nregs -= 1;
2106 cum->sse_regno += 1;
2107 if (cum->sse_nregs <= 0)
2108 {
2109 cum->sse_nregs = 0;
2110 cum->sse_regno = 0;
2111 }
2112 }
2113 else
82a127a9 2114 {
53c17031
JH
2115 cum->words += words;
2116 cum->nregs -= words;
2117 cum->regno += words;
2118
2119 if (cum->nregs <= 0)
2120 {
2121 cum->nregs = 0;
2122 cum->regno = 0;
2123 }
82a127a9
CM
2124 }
2125 }
b08de47e
MM
2126 return;
2127}
2128
2129/* Define where to put the arguments to a function.
2130 Value is zero to push the argument on the stack,
2131 or a hard register in which to store the argument.
2132
2133 MODE is the argument's machine mode.
2134 TYPE is the data type of the argument (as a tree).
2135 This is null for libcalls where that information may
2136 not be available.
2137 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2138 the preceding args and about the function being called.
2139 NAMED is nonzero if this argument is a named parameter
2140 (otherwise it is an extra parameter matching an ellipsis). */
2141
07933f72 2142rtx
b08de47e
MM
2143function_arg (cum, mode, type, named)
2144 CUMULATIVE_ARGS *cum; /* current arg information */
2145 enum machine_mode mode; /* current arg mode */
2146 tree type; /* type of the argument or 0 if lib support */
2147 int named; /* != 0 for normal args, == 0 for ... args */
2148{
2149 rtx ret = NULL_RTX;
5ac9118e
KG
2150 int bytes =
2151 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
2152 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2153
53c17031
JH
2154 /* Handle an hidden AL argument containing number of registers for varargs
2155 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2156 any AL settings. */
32ee7d1d 2157 if (mode == VOIDmode)
b08de47e 2158 {
53c17031
JH
2159 if (TARGET_64BIT)
2160 return GEN_INT (cum->maybe_vaarg
2161 ? (cum->sse_nregs < 0
2162 ? SSE_REGPARM_MAX
2163 : cum->sse_regno)
2164 : -1);
2165 else
2166 return constm1_rtx;
b08de47e 2167 }
53c17031
JH
2168 if (TARGET_64BIT)
2169 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2170 &x86_64_int_parameter_registers [cum->regno],
2171 cum->sse_regno);
2172 else
2173 switch (mode)
2174 {
2175 /* For now, pass fp/complex values on the stack. */
2176 default:
2177 break;
2178
2179 case BLKmode:
2180 case DImode:
2181 case SImode:
2182 case HImode:
2183 case QImode:
2184 if (words <= cum->nregs)
2185 ret = gen_rtx_REG (mode, cum->regno);
2186 break;
2187 case TImode:
2188 if (cum->sse_nregs)
2189 ret = gen_rtx_REG (mode, cum->sse_regno);
2190 break;
2191 }
b08de47e
MM
2192
2193 if (TARGET_DEBUG_ARG)
2194 {
2195 fprintf (stderr,
e9a25f70 2196 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
b08de47e
MM
2197 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2198
2199 if (ret)
b531087a 2200 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO (ret) ]);
b08de47e
MM
2201 else
2202 fprintf (stderr, ", stack");
2203
2204 fprintf (stderr, " )\n");
2205 }
2206
2207 return ret;
2208}
53c17031
JH
2209
2210/* Gives the alignment boundary, in bits, of an argument with the specified mode
2211 and type. */
2212
2213int
2214ix86_function_arg_boundary (mode, type)
2215 enum machine_mode mode;
2216 tree type;
2217{
2218 int align;
2219 if (!TARGET_64BIT)
2220 return PARM_BOUNDARY;
2221 if (type)
2222 align = TYPE_ALIGN (type);
2223 else
2224 align = GET_MODE_ALIGNMENT (mode);
2225 if (align < PARM_BOUNDARY)
2226 align = PARM_BOUNDARY;
2227 if (align > 128)
2228 align = 128;
2229 return align;
2230}
2231
2232/* Return true if N is a possible register number of function value. */
2233bool
2234ix86_function_value_regno_p (regno)
2235 int regno;
2236{
2237 if (!TARGET_64BIT)
2238 {
2239 return ((regno) == 0
2240 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2241 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2242 }
2243 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2244 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2245 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2246}
2247
2248/* Define how to find the value returned by a function.
2249 VALTYPE is the data type of the value (as a tree).
2250 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2251 otherwise, FUNC is 0. */
2252rtx
2253ix86_function_value (valtype)
2254 tree valtype;
2255{
2256 if (TARGET_64BIT)
2257 {
2258 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2259 REGPARM_MAX, SSE_REGPARM_MAX,
2260 x86_64_int_return_registers, 0);
2261 /* For zero sized structures, construct_continer return NULL, but we need
2262 to keep rest of compiler happy by returning meaningfull value. */
2263 if (!ret)
2264 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2265 return ret;
2266 }
2267 else
2268 return gen_rtx_REG (TYPE_MODE (valtype), VALUE_REGNO (TYPE_MODE (valtype)));
2269}
2270
f5143c46 2271/* Return false iff type is returned in memory. */
53c17031
JH
2272int
2273ix86_return_in_memory (type)
2274 tree type;
2275{
2276 int needed_intregs, needed_sseregs;
2277 if (TARGET_64BIT)
2278 {
2279 return !examine_argument (TYPE_MODE (type), type, 1,
2280 &needed_intregs, &needed_sseregs);
2281 }
2282 else
2283 {
2284 if (TYPE_MODE (type) == BLKmode
2285 || (VECTOR_MODE_P (TYPE_MODE (type))
2286 && int_size_in_bytes (type) == 8)
2287 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2288 && TYPE_MODE (type) != TFmode
2289 && !VECTOR_MODE_P (TYPE_MODE (type))))
2290 return 1;
2291 return 0;
2292 }
2293}
2294
2295/* Define how to find the value returned by a library function
2296 assuming the value has mode MODE. */
2297rtx
2298ix86_libcall_value (mode)
2299 enum machine_mode mode;
2300{
2301 if (TARGET_64BIT)
2302 {
2303 switch (mode)
2304 {
2305 case SFmode:
2306 case SCmode:
2307 case DFmode:
2308 case DCmode:
2309 return gen_rtx_REG (mode, FIRST_SSE_REG);
2310 case TFmode:
2311 case TCmode:
2312 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2313 default:
2314 return gen_rtx_REG (mode, 0);
2315 }
2316 }
2317 else
2318 return gen_rtx_REG (mode, VALUE_REGNO (mode));
2319}
ad919812
JH
2320\f
2321/* Create the va_list data type. */
53c17031 2322
ad919812
JH
2323tree
2324ix86_build_va_list ()
2325{
2326 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
53c17031 2327
ad919812
JH
2328 /* For i386 we use plain pointer to argument area. */
2329 if (!TARGET_64BIT)
2330 return build_pointer_type (char_type_node);
2331
f1e639b1 2332 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
ad919812
JH
2333 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2334
2335 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2336 unsigned_type_node);
2337 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2338 unsigned_type_node);
2339 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2340 ptr_type_node);
2341 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2342 ptr_type_node);
2343
2344 DECL_FIELD_CONTEXT (f_gpr) = record;
2345 DECL_FIELD_CONTEXT (f_fpr) = record;
2346 DECL_FIELD_CONTEXT (f_ovf) = record;
2347 DECL_FIELD_CONTEXT (f_sav) = record;
2348
2349 TREE_CHAIN (record) = type_decl;
2350 TYPE_NAME (record) = type_decl;
2351 TYPE_FIELDS (record) = f_gpr;
2352 TREE_CHAIN (f_gpr) = f_fpr;
2353 TREE_CHAIN (f_fpr) = f_ovf;
2354 TREE_CHAIN (f_ovf) = f_sav;
2355
2356 layout_type (record);
2357
2358 /* The correct type is an array type of one element. */
2359 return build_array_type (record, build_index_type (size_zero_node));
2360}
2361
2362/* Perform any needed actions needed for a function that is receiving a
2363 variable number of arguments.
2364
2365 CUM is as above.
2366
2367 MODE and TYPE are the mode and type of the current parameter.
2368
2369 PRETEND_SIZE is a variable that should be set to the amount of stack
2370 that must be pushed by the prolog to pretend that our caller pushed
2371 it.
2372
2373 Normally, this macro will push all remaining incoming registers on the
2374 stack and set PRETEND_SIZE to the length of the registers pushed. */
2375
2376void
2377ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2378 CUMULATIVE_ARGS *cum;
2379 enum machine_mode mode;
2380 tree type;
2381 int *pretend_size ATTRIBUTE_UNUSED;
2382 int no_rtl;
2383
2384{
2385 CUMULATIVE_ARGS next_cum;
2386 rtx save_area = NULL_RTX, mem;
2387 rtx label;
2388 rtx label_ref;
2389 rtx tmp_reg;
2390 rtx nsse_reg;
2391 int set;
2392 tree fntype;
2393 int stdarg_p;
2394 int i;
2395
2396 if (!TARGET_64BIT)
2397 return;
2398
2399 /* Indicate to allocate space on the stack for varargs save area. */
2400 ix86_save_varrargs_registers = 1;
2401
2402 fntype = TREE_TYPE (current_function_decl);
2403 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2404 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2405 != void_type_node));
2406
2407 /* For varargs, we do not want to skip the dummy va_dcl argument.
2408 For stdargs, we do want to skip the last named argument. */
2409 next_cum = *cum;
2410 if (stdarg_p)
2411 function_arg_advance (&next_cum, mode, type, 1);
2412
2413 if (!no_rtl)
2414 save_area = frame_pointer_rtx;
2415
2416 set = get_varargs_alias_set ();
2417
2418 for (i = next_cum.regno; i < ix86_regparm; i++)
2419 {
2420 mem = gen_rtx_MEM (Pmode,
2421 plus_constant (save_area, i * UNITS_PER_WORD));
0692acba 2422 set_mem_alias_set (mem, set);
ad919812
JH
2423 emit_move_insn (mem, gen_rtx_REG (Pmode,
2424 x86_64_int_parameter_registers[i]));
2425 }
2426
2427 if (next_cum.sse_nregs)
2428 {
2429 /* Now emit code to save SSE registers. The AX parameter contains number
2430 of SSE parameter regsiters used to call this function. We use
2431 sse_prologue_save insn template that produces computed jump across
2432 SSE saves. We need some preparation work to get this working. */
2433
2434 label = gen_label_rtx ();
2435 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2436
2437 /* Compute address to jump to :
2438 label - 5*eax + nnamed_sse_arguments*5 */
2439 tmp_reg = gen_reg_rtx (Pmode);
2440 nsse_reg = gen_reg_rtx (Pmode);
2441 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2442 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
44cf5b6a 2443 gen_rtx_MULT (Pmode, nsse_reg,
ad919812
JH
2444 GEN_INT (4))));
2445 if (next_cum.sse_regno)
2446 emit_move_insn
2447 (nsse_reg,
2448 gen_rtx_CONST (DImode,
2449 gen_rtx_PLUS (DImode,
2450 label_ref,
2451 GEN_INT (next_cum.sse_regno * 4))));
2452 else
2453 emit_move_insn (nsse_reg, label_ref);
2454 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2455
2456 /* Compute address of memory block we save into. We always use pointer
2457 pointing 127 bytes after first byte to store - this is needed to keep
2458 instruction size limited by 4 bytes. */
2459 tmp_reg = gen_reg_rtx (Pmode);
8ac61af7
RK
2460 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2461 plus_constant (save_area,
2462 8 * REGPARM_MAX + 127)));
ad919812 2463 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
14f73b5a 2464 set_mem_alias_set (mem, set);
8ac61af7 2465 set_mem_align (mem, BITS_PER_WORD);
ad919812
JH
2466
2467 /* And finally do the dirty job! */
8ac61af7
RK
2468 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2469 GEN_INT (next_cum.sse_regno), label));
ad919812
JH
2470 }
2471
2472}
2473
2474/* Implement va_start. */
2475
2476void
2477ix86_va_start (stdarg_p, valist, nextarg)
2478 int stdarg_p;
2479 tree valist;
2480 rtx nextarg;
2481{
2482 HOST_WIDE_INT words, n_gpr, n_fpr;
2483 tree f_gpr, f_fpr, f_ovf, f_sav;
2484 tree gpr, fpr, ovf, sav, t;
2485
2486 /* Only 64bit target needs something special. */
2487 if (!TARGET_64BIT)
2488 {
2489 std_expand_builtin_va_start (stdarg_p, valist, nextarg);
2490 return;
2491 }
2492
2493 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2494 f_fpr = TREE_CHAIN (f_gpr);
2495 f_ovf = TREE_CHAIN (f_fpr);
2496 f_sav = TREE_CHAIN (f_ovf);
2497
2498 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2499 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2500 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2501 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2502 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2503
2504 /* Count number of gp and fp argument registers used. */
2505 words = current_function_args_info.words;
2506 n_gpr = current_function_args_info.regno;
2507 n_fpr = current_function_args_info.sse_regno;
2508
2509 if (TARGET_DEBUG_ARG)
2510 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
b531087a 2511 (int) words, (int) n_gpr, (int) n_fpr);
ad919812
JH
2512
2513 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2514 build_int_2 (n_gpr * 8, 0));
2515 TREE_SIDE_EFFECTS (t) = 1;
2516 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2517
2518 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2519 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2520 TREE_SIDE_EFFECTS (t) = 1;
2521 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2522
2523 /* Find the overflow area. */
2524 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2525 if (words != 0)
2526 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2527 build_int_2 (words * UNITS_PER_WORD, 0));
2528 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2529 TREE_SIDE_EFFECTS (t) = 1;
2530 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2531
2532 /* Find the register save area.
2533 Prologue of the function save it right above stack frame. */
2534 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2535 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2536 TREE_SIDE_EFFECTS (t) = 1;
2537 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2538}
2539
2540/* Implement va_arg. */
2541rtx
2542ix86_va_arg (valist, type)
2543 tree valist, type;
2544{
0139adca 2545 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
ad919812
JH
2546 tree f_gpr, f_fpr, f_ovf, f_sav;
2547 tree gpr, fpr, ovf, sav, t;
b932f770 2548 int size, rsize;
ad919812
JH
2549 rtx lab_false, lab_over = NULL_RTX;
2550 rtx addr_rtx, r;
2551 rtx container;
2552
2553 /* Only 64bit target needs something special. */
2554 if (!TARGET_64BIT)
2555 {
2556 return std_expand_builtin_va_arg (valist, type);
2557 }
2558
2559 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2560 f_fpr = TREE_CHAIN (f_gpr);
2561 f_ovf = TREE_CHAIN (f_fpr);
2562 f_sav = TREE_CHAIN (f_ovf);
2563
2564 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2565 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2566 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2567 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2568 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2569
2570 size = int_size_in_bytes (type);
2571 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2572
2573 container = construct_container (TYPE_MODE (type), type, 0,
2574 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2575 /*
2576 * Pull the value out of the saved registers ...
2577 */
2578
2579 addr_rtx = gen_reg_rtx (Pmode);
2580
2581 if (container)
2582 {
2583 rtx int_addr_rtx, sse_addr_rtx;
2584 int needed_intregs, needed_sseregs;
2585 int need_temp;
2586
2587 lab_over = gen_label_rtx ();
2588 lab_false = gen_label_rtx ();
8bad7136 2589
ad919812
JH
2590 examine_argument (TYPE_MODE (type), type, 0,
2591 &needed_intregs, &needed_sseregs);
2592
2593
2594 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2595 || TYPE_ALIGN (type) > 128);
2596
2597 /* In case we are passing structure, verify that it is consetuctive block
2598 on the register save area. If not we need to do moves. */
2599 if (!need_temp && !REG_P (container))
2600 {
2601 /* Verify that all registers are strictly consetuctive */
2602 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2603 {
2604 int i;
2605
2606 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2607 {
2608 rtx slot = XVECEXP (container, 0, i);
b531087a 2609 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
ad919812
JH
2610 || INTVAL (XEXP (slot, 1)) != i * 16)
2611 need_temp = 1;
2612 }
2613 }
2614 else
2615 {
2616 int i;
2617
2618 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2619 {
2620 rtx slot = XVECEXP (container, 0, i);
b531087a 2621 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
ad919812
JH
2622 || INTVAL (XEXP (slot, 1)) != i * 8)
2623 need_temp = 1;
2624 }
2625 }
2626 }
2627 if (!need_temp)
2628 {
2629 int_addr_rtx = addr_rtx;
2630 sse_addr_rtx = addr_rtx;
2631 }
2632 else
2633 {
2634 int_addr_rtx = gen_reg_rtx (Pmode);
2635 sse_addr_rtx = gen_reg_rtx (Pmode);
2636 }
2637 /* First ensure that we fit completely in registers. */
2638 if (needed_intregs)
2639 {
2640 emit_cmp_and_jump_insns (expand_expr
2641 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2642 GEN_INT ((REGPARM_MAX - needed_intregs +
2643 1) * 8), GE, const1_rtx, SImode,
d43e0b7d 2644 1, lab_false);
ad919812
JH
2645 }
2646 if (needed_sseregs)
2647 {
2648 emit_cmp_and_jump_insns (expand_expr
2649 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2650 GEN_INT ((SSE_REGPARM_MAX -
2651 needed_sseregs + 1) * 16 +
2652 REGPARM_MAX * 8), GE, const1_rtx,
d43e0b7d 2653 SImode, 1, lab_false);
ad919812
JH
2654 }
2655
2656 /* Compute index to start of area used for integer regs. */
2657 if (needed_intregs)
2658 {
2659 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2660 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2661 if (r != int_addr_rtx)
2662 emit_move_insn (int_addr_rtx, r);
2663 }
2664 if (needed_sseregs)
2665 {
2666 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2667 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2668 if (r != sse_addr_rtx)
2669 emit_move_insn (sse_addr_rtx, r);
2670 }
2671 if (need_temp)
2672 {
2673 int i;
2674 rtx mem;
2675
b932f770
JH
2676 /* Never use the memory itself, as it has the alias set. */
2677 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2678 mem = gen_rtx_MEM (BLKmode, addr_rtx);
0692acba 2679 set_mem_alias_set (mem, get_varargs_alias_set ());
8ac61af7 2680 set_mem_align (mem, BITS_PER_UNIT);
b932f770 2681
ad919812
JH
2682 for (i = 0; i < XVECLEN (container, 0); i++)
2683 {
2684 rtx slot = XVECEXP (container, 0, i);
2685 rtx reg = XEXP (slot, 0);
2686 enum machine_mode mode = GET_MODE (reg);
2687 rtx src_addr;
2688 rtx src_mem;
2689 int src_offset;
2690 rtx dest_mem;
2691
2692 if (SSE_REGNO_P (REGNO (reg)))
2693 {
2694 src_addr = sse_addr_rtx;
2695 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2696 }
2697 else
2698 {
2699 src_addr = int_addr_rtx;
2700 src_offset = REGNO (reg) * 8;
2701 }
2702 src_mem = gen_rtx_MEM (mode, src_addr);
0692acba 2703 set_mem_alias_set (src_mem, get_varargs_alias_set ());
ad919812
JH
2704 src_mem = adjust_address (src_mem, mode, src_offset);
2705 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
ad919812
JH
2706 emit_move_insn (dest_mem, src_mem);
2707 }
2708 }
2709
2710 if (needed_intregs)
2711 {
2712 t =
2713 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2714 build_int_2 (needed_intregs * 8, 0));
2715 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2716 TREE_SIDE_EFFECTS (t) = 1;
2717 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2718 }
2719 if (needed_sseregs)
2720 {
2721 t =
2722 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2723 build_int_2 (needed_sseregs * 16, 0));
2724 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2725 TREE_SIDE_EFFECTS (t) = 1;
2726 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2727 }
2728
2729 emit_jump_insn (gen_jump (lab_over));
2730 emit_barrier ();
2731 emit_label (lab_false);
2732 }
2733
2734 /* ... otherwise out of the overflow area. */
2735
2736 /* Care for on-stack alignment if needed. */
2737 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2738 t = ovf;
2739 else
2740 {
2741 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2742 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2743 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2744 }
2745 t = save_expr (t);
2746
2747 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2748 if (r != addr_rtx)
2749 emit_move_insn (addr_rtx, r);
2750
2751 t =
2752 build (PLUS_EXPR, TREE_TYPE (t), t,
2753 build_int_2 (rsize * UNITS_PER_WORD, 0));
2754 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2755 TREE_SIDE_EFFECTS (t) = 1;
2756 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2757
2758 if (container)
2759 emit_label (lab_over);
2760
ad919812
JH
2761 return addr_rtx;
2762}
2763\f
7dd4b4a3
JH
2764/* Return nonzero if OP is general operand representable on x86_64. */
2765
2766int
2767x86_64_general_operand (op, mode)
2768 rtx op;
2769 enum machine_mode mode;
2770{
2771 if (!TARGET_64BIT)
2772 return general_operand (op, mode);
2773 if (nonimmediate_operand (op, mode))
2774 return 1;
2775 return x86_64_sign_extended_value (op);
2776}
2777
2778/* Return nonzero if OP is general operand representable on x86_64
d6a7951f 2779 as either sign extended or zero extended constant. */
7dd4b4a3
JH
2780
2781int
2782x86_64_szext_general_operand (op, mode)
2783 rtx op;
2784 enum machine_mode mode;
2785{
2786 if (!TARGET_64BIT)
2787 return general_operand (op, mode);
2788 if (nonimmediate_operand (op, mode))
2789 return 1;
2790 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2791}
2792
2793/* Return nonzero if OP is nonmemory operand representable on x86_64. */
2794
2795int
2796x86_64_nonmemory_operand (op, mode)
2797 rtx op;
2798 enum machine_mode mode;
2799{
2800 if (!TARGET_64BIT)
2801 return nonmemory_operand (op, mode);
2802 if (register_operand (op, mode))
2803 return 1;
2804 return x86_64_sign_extended_value (op);
2805}
2806
2807/* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2808
2809int
2810x86_64_movabs_operand (op, mode)
2811 rtx op;
2812 enum machine_mode mode;
2813{
2814 if (!TARGET_64BIT || !flag_pic)
2815 return nonmemory_operand (op, mode);
2816 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
2817 return 1;
2818 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2819 return 1;
2820 return 0;
2821}
2822
2823/* Return nonzero if OP is nonmemory operand representable on x86_64. */
2824
2825int
2826x86_64_szext_nonmemory_operand (op, mode)
2827 rtx op;
2828 enum machine_mode mode;
2829{
2830 if (!TARGET_64BIT)
2831 return nonmemory_operand (op, mode);
2832 if (register_operand (op, mode))
2833 return 1;
2834 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2835}
2836
2837/* Return nonzero if OP is immediate operand representable on x86_64. */
2838
2839int
2840x86_64_immediate_operand (op, mode)
2841 rtx op;
2842 enum machine_mode mode;
2843{
2844 if (!TARGET_64BIT)
2845 return immediate_operand (op, mode);
2846 return x86_64_sign_extended_value (op);
2847}
2848
2849/* Return nonzero if OP is immediate operand representable on x86_64. */
2850
2851int
2852x86_64_zext_immediate_operand (op, mode)
2853 rtx op;
2854 enum machine_mode mode ATTRIBUTE_UNUSED;
2855{
2856 return x86_64_zero_extended_value (op);
2857}
2858
8bad7136
JL
2859/* Return nonzero if OP is (const_int 1), else return zero. */
2860
2861int
2862const_int_1_operand (op, mode)
2863 rtx op;
2864 enum machine_mode mode ATTRIBUTE_UNUSED;
2865{
2866 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2867}
2868
e075ae69
RH
2869/* Returns 1 if OP is either a symbol reference or a sum of a symbol
2870 reference and a constant. */
b08de47e
MM
2871
2872int
e075ae69
RH
2873symbolic_operand (op, mode)
2874 register rtx op;
2875 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 2876{
e075ae69 2877 switch (GET_CODE (op))
2a2ab3f9 2878 {
e075ae69
RH
2879 case SYMBOL_REF:
2880 case LABEL_REF:
2881 return 1;
2882
2883 case CONST:
2884 op = XEXP (op, 0);
2885 if (GET_CODE (op) == SYMBOL_REF
2886 || GET_CODE (op) == LABEL_REF
2887 || (GET_CODE (op) == UNSPEC
6eb791fc
JH
2888 && (XINT (op, 1) == 6
2889 || XINT (op, 1) == 7
2890 || XINT (op, 1) == 15)))
e075ae69
RH
2891 return 1;
2892 if (GET_CODE (op) != PLUS
2893 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2894 return 0;
2895
2896 op = XEXP (op, 0);
2897 if (GET_CODE (op) == SYMBOL_REF
2898 || GET_CODE (op) == LABEL_REF)
2899 return 1;
2900 /* Only @GOTOFF gets offsets. */
2901 if (GET_CODE (op) != UNSPEC
2902 || XINT (op, 1) != 7)
2903 return 0;
2904
2905 op = XVECEXP (op, 0, 0);
2906 if (GET_CODE (op) == SYMBOL_REF
2907 || GET_CODE (op) == LABEL_REF)
2908 return 1;
2909 return 0;
2910
2911 default:
2912 return 0;
2a2ab3f9
JVA
2913 }
2914}
2a2ab3f9 2915
e075ae69 2916/* Return true if the operand contains a @GOT or @GOTOFF reference. */
3b3c6a3f 2917
e075ae69
RH
2918int
2919pic_symbolic_operand (op, mode)
2920 register rtx op;
2921 enum machine_mode mode ATTRIBUTE_UNUSED;
2922{
6eb791fc
JH
2923 if (GET_CODE (op) != CONST)
2924 return 0;
2925 op = XEXP (op, 0);
2926 if (TARGET_64BIT)
2927 {
2928 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
2929 return 1;
2930 }
2931 else
2a2ab3f9 2932 {
e075ae69
RH
2933 if (GET_CODE (op) == UNSPEC)
2934 return 1;
2935 if (GET_CODE (op) != PLUS
2936 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2937 return 0;
2938 op = XEXP (op, 0);
2939 if (GET_CODE (op) == UNSPEC)
2940 return 1;
2a2ab3f9 2941 }
e075ae69 2942 return 0;
2a2ab3f9 2943}
2a2ab3f9 2944
623fe810
RH
2945/* Return true if OP is a symbolic operand that resolves locally. */
2946
2947static int
2948local_symbolic_operand (op, mode)
2949 rtx op;
2950 enum machine_mode mode ATTRIBUTE_UNUSED;
2951{
2952 if (GET_CODE (op) == LABEL_REF)
2953 return 1;
2954
2955 if (GET_CODE (op) == CONST
2956 && GET_CODE (XEXP (op, 0)) == PLUS
2957 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2958 op = XEXP (XEXP (op, 0), 0);
2959
2960 if (GET_CODE (op) != SYMBOL_REF)
2961 return 0;
2962
2963 /* These we've been told are local by varasm and encode_section_info
2964 respectively. */
2965 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
2966 return 1;
2967
2968 /* There is, however, a not insubstantial body of code in the rest of
2969 the compiler that assumes it can just stick the results of
2970 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
2971 /* ??? This is a hack. Should update the body of the compiler to
2972 always create a DECL an invoke ENCODE_SECTION_INFO. */
2973 if (strncmp (XSTR (op, 0), internal_label_prefix,
2974 internal_label_prefix_len) == 0)
2975 return 1;
2976
2977 return 0;
2978}
2979
28d52ffb
RH
2980/* Test for a valid operand for a call instruction. Don't allow the
2981 arg pointer register or virtual regs since they may decay into
2982 reg + const, which the patterns can't handle. */
2a2ab3f9 2983
e075ae69
RH
2984int
2985call_insn_operand (op, mode)
2986 rtx op;
2987 enum machine_mode mode ATTRIBUTE_UNUSED;
2988{
e075ae69
RH
2989 /* Disallow indirect through a virtual register. This leads to
2990 compiler aborts when trying to eliminate them. */
2991 if (GET_CODE (op) == REG
2992 && (op == arg_pointer_rtx
564d80f4 2993 || op == frame_pointer_rtx
e075ae69
RH
2994 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
2995 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
2996 return 0;
2a2ab3f9 2997
28d52ffb
RH
2998 /* Disallow `call 1234'. Due to varying assembler lameness this
2999 gets either rejected or translated to `call .+1234'. */
3000 if (GET_CODE (op) == CONST_INT)
3001 return 0;
3002
cbbf65e0
RH
3003 /* Explicitly allow SYMBOL_REF even if pic. */
3004 if (GET_CODE (op) == SYMBOL_REF)
e075ae69 3005 return 1;
2a2ab3f9 3006
cbbf65e0
RH
3007 /* Half-pic doesn't allow anything but registers and constants.
3008 We've just taken care of the later. */
3009 if (HALF_PIC_P ())
3010 return register_operand (op, Pmode);
3011
3012 /* Otherwise we can allow any general_operand in the address. */
3013 return general_operand (op, Pmode);
e075ae69 3014}
79325812 3015
e075ae69
RH
3016int
3017constant_call_address_operand (op, mode)
3018 rtx op;
3019 enum machine_mode mode ATTRIBUTE_UNUSED;
3020{
eaf19aba
JJ
3021 if (GET_CODE (op) == CONST
3022 && GET_CODE (XEXP (op, 0)) == PLUS
3023 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3024 op = XEXP (XEXP (op, 0), 0);
e1ff012c 3025 return GET_CODE (op) == SYMBOL_REF;
e075ae69 3026}
2a2ab3f9 3027
e075ae69 3028/* Match exactly zero and one. */
e9a25f70 3029
0f290768 3030int
e075ae69
RH
3031const0_operand (op, mode)
3032 register rtx op;
3033 enum machine_mode mode;
3034{
3035 return op == CONST0_RTX (mode);
3036}
e9a25f70 3037
0f290768 3038int
e075ae69
RH
3039const1_operand (op, mode)
3040 register rtx op;
3041 enum machine_mode mode ATTRIBUTE_UNUSED;
3042{
3043 return op == const1_rtx;
3044}
2a2ab3f9 3045
e075ae69 3046/* Match 2, 4, or 8. Used for leal multiplicands. */
e9a25f70 3047
e075ae69
RH
3048int
3049const248_operand (op, mode)
3050 register rtx op;
3051 enum machine_mode mode ATTRIBUTE_UNUSED;
3052{
3053 return (GET_CODE (op) == CONST_INT
3054 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3055}
e9a25f70 3056
e075ae69 3057/* True if this is a constant appropriate for an increment or decremenmt. */
81fd0956 3058
e075ae69
RH
3059int
3060incdec_operand (op, mode)
3061 register rtx op;
0631e0bf 3062 enum machine_mode mode ATTRIBUTE_UNUSED;
e075ae69 3063{
f5143c46 3064 /* On Pentium4, the inc and dec operations causes extra dependency on flag
b4e89e2d
JH
3065 registers, since carry flag is not set. */
3066 if (TARGET_PENTIUM4 && !optimize_size)
3067 return 0;
2b1c08f5 3068 return op == const1_rtx || op == constm1_rtx;
e075ae69 3069}
2a2ab3f9 3070
371bc54b
JH
3071/* Return nonzero if OP is acceptable as operand of DImode shift
3072 expander. */
3073
3074int
3075shiftdi_operand (op, mode)
3076 rtx op;
3077 enum machine_mode mode ATTRIBUTE_UNUSED;
3078{
3079 if (TARGET_64BIT)
3080 return nonimmediate_operand (op, mode);
3081 else
3082 return register_operand (op, mode);
3083}
3084
0f290768 3085/* Return false if this is the stack pointer, or any other fake
e075ae69
RH
3086 register eliminable to the stack pointer. Otherwise, this is
3087 a register operand.
2a2ab3f9 3088
e075ae69
RH
3089 This is used to prevent esp from being used as an index reg.
3090 Which would only happen in pathological cases. */
5f1ec3e6 3091
e075ae69
RH
3092int
3093reg_no_sp_operand (op, mode)
3094 register rtx op;
3095 enum machine_mode mode;
3096{
3097 rtx t = op;
3098 if (GET_CODE (t) == SUBREG)
3099 t = SUBREG_REG (t);
564d80f4 3100 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
e075ae69 3101 return 0;
2a2ab3f9 3102
e075ae69 3103 return register_operand (op, mode);
2a2ab3f9 3104}
b840bfb0 3105
915119a5
BS
3106int
3107mmx_reg_operand (op, mode)
3108 register rtx op;
bd793c65 3109 enum machine_mode mode ATTRIBUTE_UNUSED;
915119a5
BS
3110{
3111 return MMX_REG_P (op);
3112}
3113
2c5a510c
RH
3114/* Return false if this is any eliminable register. Otherwise
3115 general_operand. */
3116
3117int
3118general_no_elim_operand (op, mode)
3119 register rtx op;
3120 enum machine_mode mode;
3121{
3122 rtx t = op;
3123 if (GET_CODE (t) == SUBREG)
3124 t = SUBREG_REG (t);
3125 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3126 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3127 || t == virtual_stack_dynamic_rtx)
3128 return 0;
1020a5ab
RH
3129 if (REG_P (t)
3130 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3131 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3132 return 0;
2c5a510c
RH
3133
3134 return general_operand (op, mode);
3135}
3136
3137/* Return false if this is any eliminable register. Otherwise
3138 register_operand or const_int. */
3139
3140int
3141nonmemory_no_elim_operand (op, mode)
3142 register rtx op;
3143 enum machine_mode mode;
3144{
3145 rtx t = op;
3146 if (GET_CODE (t) == SUBREG)
3147 t = SUBREG_REG (t);
3148 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3149 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3150 || t == virtual_stack_dynamic_rtx)
3151 return 0;
3152
3153 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3154}
3155
e075ae69 3156/* Return true if op is a Q_REGS class register. */
b840bfb0 3157
e075ae69
RH
3158int
3159q_regs_operand (op, mode)
3160 register rtx op;
3161 enum machine_mode mode;
b840bfb0 3162{
e075ae69
RH
3163 if (mode != VOIDmode && GET_MODE (op) != mode)
3164 return 0;
3165 if (GET_CODE (op) == SUBREG)
3166 op = SUBREG_REG (op);
7799175f 3167 return ANY_QI_REG_P (op);
0f290768 3168}
b840bfb0 3169
e075ae69 3170/* Return true if op is a NON_Q_REGS class register. */
b840bfb0 3171
e075ae69
RH
3172int
3173non_q_regs_operand (op, mode)
3174 register rtx op;
3175 enum machine_mode mode;
3176{
3177 if (mode != VOIDmode && GET_MODE (op) != mode)
3178 return 0;
3179 if (GET_CODE (op) == SUBREG)
3180 op = SUBREG_REG (op);
3181 return NON_QI_REG_P (op);
0f290768 3182}
b840bfb0 3183
915119a5
BS
3184/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3185 insns. */
3186int
3187sse_comparison_operator (op, mode)
3188 rtx op;
3189 enum machine_mode mode ATTRIBUTE_UNUSED;
3190{
3191 enum rtx_code code = GET_CODE (op);
a46d1d38
JH
3192 switch (code)
3193 {
3194 /* Operations supported directly. */
3195 case EQ:
3196 case LT:
3197 case LE:
3198 case UNORDERED:
3199 case NE:
3200 case UNGE:
3201 case UNGT:
3202 case ORDERED:
3203 return 1;
3204 /* These are equivalent to ones above in non-IEEE comparisons. */
3205 case UNEQ:
3206 case UNLT:
3207 case UNLE:
3208 case LTGT:
3209 case GE:
3210 case GT:
3211 return !TARGET_IEEE_FP;
3212 default:
3213 return 0;
3214 }
915119a5 3215}
9076b9c1 3216/* Return 1 if OP is a valid comparison operator in valid mode. */
e075ae69 3217int
9076b9c1
JH
3218ix86_comparison_operator (op, mode)
3219 register rtx op;
3220 enum machine_mode mode;
e075ae69 3221{
9076b9c1 3222 enum machine_mode inmode;
9a915772 3223 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
3224 if (mode != VOIDmode && GET_MODE (op) != mode)
3225 return 0;
9a915772
JH
3226 if (GET_RTX_CLASS (code) != '<')
3227 return 0;
3228 inmode = GET_MODE (XEXP (op, 0));
3229
3230 if (inmode == CCFPmode || inmode == CCFPUmode)
3231 {
3232 enum rtx_code second_code, bypass_code;
3233 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3234 return (bypass_code == NIL && second_code == NIL);
3235 }
3236 switch (code)
3a3677ff
RH
3237 {
3238 case EQ: case NE:
3a3677ff 3239 return 1;
9076b9c1 3240 case LT: case GE:
7e08e190 3241 if (inmode == CCmode || inmode == CCGCmode
9076b9c1
JH
3242 || inmode == CCGOCmode || inmode == CCNOmode)
3243 return 1;
3244 return 0;
7e08e190 3245 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
7e08e190 3246 if (inmode == CCmode)
9076b9c1
JH
3247 return 1;
3248 return 0;
3249 case GT: case LE:
7e08e190 3250 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
9076b9c1
JH
3251 return 1;
3252 return 0;
3a3677ff
RH
3253 default:
3254 return 0;
3255 }
3256}
3257
9076b9c1 3258/* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3a3677ff 3259
9076b9c1
JH
3260int
3261fcmov_comparison_operator (op, mode)
3a3677ff
RH
3262 register rtx op;
3263 enum machine_mode mode;
3264{
b62d22a2 3265 enum machine_mode inmode;
9a915772 3266 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
3267 if (mode != VOIDmode && GET_MODE (op) != mode)
3268 return 0;
9a915772
JH
3269 if (GET_RTX_CLASS (code) != '<')
3270 return 0;
3271 inmode = GET_MODE (XEXP (op, 0));
3272 if (inmode == CCFPmode || inmode == CCFPUmode)
3a3677ff 3273 {
9a915772
JH
3274 enum rtx_code second_code, bypass_code;
3275 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3276 if (bypass_code != NIL || second_code != NIL)
3277 return 0;
3278 code = ix86_fp_compare_code_to_integer (code);
3279 }
3280 /* i387 supports just limited amount of conditional codes. */
3281 switch (code)
3282 {
3283 case LTU: case GTU: case LEU: case GEU:
3284 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
9076b9c1
JH
3285 return 1;
3286 return 0;
9a915772
JH
3287 case ORDERED: case UNORDERED:
3288 case EQ: case NE:
3289 return 1;
3a3677ff
RH
3290 default:
3291 return 0;
3292 }
e075ae69 3293}
b840bfb0 3294
e9e80858
JH
3295/* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3296
3297int
3298promotable_binary_operator (op, mode)
3299 register rtx op;
3300 enum machine_mode mode ATTRIBUTE_UNUSED;
3301{
3302 switch (GET_CODE (op))
3303 {
3304 case MULT:
3305 /* Modern CPUs have same latency for HImode and SImode multiply,
3306 but 386 and 486 do HImode multiply faster. */
3307 return ix86_cpu > PROCESSOR_I486;
3308 case PLUS:
3309 case AND:
3310 case IOR:
3311 case XOR:
3312 case ASHIFT:
3313 return 1;
3314 default:
3315 return 0;
3316 }
3317}
3318
e075ae69
RH
3319/* Nearly general operand, but accept any const_double, since we wish
3320 to be able to drop them into memory rather than have them get pulled
3321 into registers. */
b840bfb0 3322
2a2ab3f9 3323int
e075ae69
RH
3324cmp_fp_expander_operand (op, mode)
3325 register rtx op;
3326 enum machine_mode mode;
2a2ab3f9 3327{
e075ae69 3328 if (mode != VOIDmode && mode != GET_MODE (op))
0b6b2900 3329 return 0;
e075ae69 3330 if (GET_CODE (op) == CONST_DOUBLE)
2a2ab3f9 3331 return 1;
e075ae69 3332 return general_operand (op, mode);
2a2ab3f9
JVA
3333}
3334
e075ae69 3335/* Match an SI or HImode register for a zero_extract. */
2a2ab3f9
JVA
3336
3337int
e075ae69 3338ext_register_operand (op, mode)
2a2ab3f9 3339 register rtx op;
bb5177ac 3340 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 3341{
3522082b 3342 int regno;
0d7d98ee
JH
3343 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3344 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
e075ae69 3345 return 0;
3522082b
JH
3346
3347 if (!register_operand (op, VOIDmode))
3348 return 0;
3349
3350 /* Be curefull to accept only registers having upper parts. */
3351 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3352 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
e075ae69
RH
3353}
3354
3355/* Return 1 if this is a valid binary floating-point operation.
0f290768 3356 OP is the expression matched, and MODE is its mode. */
e075ae69
RH
3357
3358int
3359binary_fp_operator (op, mode)
3360 register rtx op;
3361 enum machine_mode mode;
3362{
3363 if (mode != VOIDmode && mode != GET_MODE (op))
3364 return 0;
3365
2a2ab3f9
JVA
3366 switch (GET_CODE (op))
3367 {
e075ae69
RH
3368 case PLUS:
3369 case MINUS:
3370 case MULT:
3371 case DIV:
3372 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
e9a25f70 3373
2a2ab3f9
JVA
3374 default:
3375 return 0;
3376 }
3377}
fee2770d 3378
e075ae69 3379int
b531087a 3380mult_operator (op, mode)
e075ae69
RH
3381 register rtx op;
3382 enum machine_mode mode ATTRIBUTE_UNUSED;
3383{
3384 return GET_CODE (op) == MULT;
3385}
3386
3387int
b531087a 3388div_operator (op, mode)
e075ae69
RH
3389 register rtx op;
3390 enum machine_mode mode ATTRIBUTE_UNUSED;
3391{
3392 return GET_CODE (op) == DIV;
3393}
0a726ef1
JL
3394
3395int
e075ae69
RH
3396arith_or_logical_operator (op, mode)
3397 rtx op;
3398 enum machine_mode mode;
0a726ef1 3399{
e075ae69
RH
3400 return ((mode == VOIDmode || GET_MODE (op) == mode)
3401 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3402 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
0a726ef1
JL
3403}
3404
e075ae69 3405/* Returns 1 if OP is memory operand with a displacement. */
fee2770d
RS
3406
3407int
e075ae69
RH
3408memory_displacement_operand (op, mode)
3409 register rtx op;
3410 enum machine_mode mode;
4f2c8ebb 3411{
e075ae69 3412 struct ix86_address parts;
e9a25f70 3413
e075ae69
RH
3414 if (! memory_operand (op, mode))
3415 return 0;
3416
3417 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3418 abort ();
3419
3420 return parts.disp != NULL_RTX;
4f2c8ebb
RS
3421}
3422
16189740 3423/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
e075ae69
RH
3424 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3425
3426 ??? It seems likely that this will only work because cmpsi is an
3427 expander, and no actual insns use this. */
4f2c8ebb
RS
3428
3429int
e075ae69
RH
3430cmpsi_operand (op, mode)
3431 rtx op;
3432 enum machine_mode mode;
fee2770d 3433{
b9b2c339 3434 if (nonimmediate_operand (op, mode))
e075ae69
RH
3435 return 1;
3436
3437 if (GET_CODE (op) == AND
3438 && GET_MODE (op) == SImode
3439 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3440 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3441 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3442 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3443 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3444 && GET_CODE (XEXP (op, 1)) == CONST_INT)
fee2770d 3445 return 1;
e9a25f70 3446
fee2770d
RS
3447 return 0;
3448}
d784886d 3449
e075ae69
RH
3450/* Returns 1 if OP is memory operand that can not be represented by the
3451 modRM array. */
d784886d
RK
3452
3453int
e075ae69 3454long_memory_operand (op, mode)
d784886d
RK
3455 register rtx op;
3456 enum machine_mode mode;
3457{
e075ae69 3458 if (! memory_operand (op, mode))
d784886d
RK
3459 return 0;
3460
e075ae69 3461 return memory_address_length (op) != 0;
d784886d 3462}
2247f6ed
JH
3463
3464/* Return nonzero if the rtx is known aligned. */
3465
3466int
3467aligned_operand (op, mode)
3468 rtx op;
3469 enum machine_mode mode;
3470{
3471 struct ix86_address parts;
3472
3473 if (!general_operand (op, mode))
3474 return 0;
3475
0f290768 3476 /* Registers and immediate operands are always "aligned". */
2247f6ed
JH
3477 if (GET_CODE (op) != MEM)
3478 return 1;
3479
0f290768 3480 /* Don't even try to do any aligned optimizations with volatiles. */
2247f6ed
JH
3481 if (MEM_VOLATILE_P (op))
3482 return 0;
3483
3484 op = XEXP (op, 0);
3485
3486 /* Pushes and pops are only valid on the stack pointer. */
3487 if (GET_CODE (op) == PRE_DEC
3488 || GET_CODE (op) == POST_INC)
3489 return 1;
3490
3491 /* Decode the address. */
3492 if (! ix86_decompose_address (op, &parts))
3493 abort ();
3494
1540f9eb
JH
3495 if (parts.base && GET_CODE (parts.base) == SUBREG)
3496 parts.base = SUBREG_REG (parts.base);
3497 if (parts.index && GET_CODE (parts.index) == SUBREG)
3498 parts.index = SUBREG_REG (parts.index);
3499
2247f6ed
JH
3500 /* Look for some component that isn't known to be aligned. */
3501 if (parts.index)
3502 {
3503 if (parts.scale < 4
bdb429a5 3504 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
2247f6ed
JH
3505 return 0;
3506 }
3507 if (parts.base)
3508 {
bdb429a5 3509 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
2247f6ed
JH
3510 return 0;
3511 }
3512 if (parts.disp)
3513 {
3514 if (GET_CODE (parts.disp) != CONST_INT
3515 || (INTVAL (parts.disp) & 3) != 0)
3516 return 0;
3517 }
3518
3519 /* Didn't find one -- this must be an aligned address. */
3520 return 1;
3521}
e075ae69
RH
3522\f
3523/* Return true if the constant is something that can be loaded with
3524 a special instruction. Only handle 0.0 and 1.0; others are less
3525 worthwhile. */
57dbca5e
BS
3526
3527int
e075ae69
RH
3528standard_80387_constant_p (x)
3529 rtx x;
57dbca5e 3530{
2b04e52b 3531 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
e075ae69 3532 return -1;
2b04e52b
JH
3533 /* Note that on the 80387, other constants, such as pi, that we should support
3534 too. On some machines, these are much slower to load as standard constant,
3535 than to load from doubles in memory. */
3536 if (x == CONST0_RTX (GET_MODE (x)))
3537 return 1;
3538 if (x == CONST1_RTX (GET_MODE (x)))
3539 return 2;
e075ae69 3540 return 0;
57dbca5e
BS
3541}
3542
2b04e52b
JH
3543/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3544 */
3545int
3546standard_sse_constant_p (x)
3547 rtx x;
3548{
3549 if (GET_CODE (x) != CONST_DOUBLE)
3550 return -1;
3551 return (x == CONST0_RTX (GET_MODE (x)));
3552}
3553
2a2ab3f9
JVA
3554/* Returns 1 if OP contains a symbol reference */
3555
3556int
3557symbolic_reference_mentioned_p (op)
3558 rtx op;
3559{
6f7d635c 3560 register const char *fmt;
2a2ab3f9
JVA
3561 register int i;
3562
3563 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3564 return 1;
3565
3566 fmt = GET_RTX_FORMAT (GET_CODE (op));
3567 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3568 {
3569 if (fmt[i] == 'E')
3570 {
3571 register int j;
3572
3573 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3574 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3575 return 1;
3576 }
e9a25f70 3577
2a2ab3f9
JVA
3578 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3579 return 1;
3580 }
3581
3582 return 0;
3583}
e075ae69
RH
3584
3585/* Return 1 if it is appropriate to emit `ret' instructions in the
3586 body of a function. Do this only if the epilogue is simple, needing a
3587 couple of insns. Prior to reloading, we can't tell how many registers
3588 must be saved, so return 0 then. Return 0 if there is no frame
3589 marker to de-allocate.
3590
3591 If NON_SAVING_SETJMP is defined and true, then it is not possible
3592 for the epilogue to be simple, so return 0. This is a special case
3593 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3594 until final, but jump_optimize may need to know sooner if a
3595 `return' is OK. */
32b5b1aa
SC
3596
3597int
e075ae69 3598ix86_can_use_return_insn_p ()
32b5b1aa 3599{
4dd2ac2c 3600 struct ix86_frame frame;
9a7372d6 3601
e075ae69
RH
3602#ifdef NON_SAVING_SETJMP
3603 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3604 return 0;
3605#endif
9a7372d6
RH
3606
3607 if (! reload_completed || frame_pointer_needed)
3608 return 0;
32b5b1aa 3609
9a7372d6
RH
3610 /* Don't allow more than 32 pop, since that's all we can do
3611 with one instruction. */
3612 if (current_function_pops_args
3613 && current_function_args_size >= 32768)
e075ae69 3614 return 0;
32b5b1aa 3615
4dd2ac2c
JH
3616 ix86_compute_frame_layout (&frame);
3617 return frame.to_allocate == 0 && frame.nregs == 0;
e075ae69 3618}
6189a572
JH
3619\f
3620/* Return 1 if VALUE can be stored in the sign extended immediate field. */
3621int
3622x86_64_sign_extended_value (value)
3623 rtx value;
3624{
3625 switch (GET_CODE (value))
3626 {
3627 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3628 to be at least 32 and this all acceptable constants are
3629 represented as CONST_INT. */
3630 case CONST_INT:
3631 if (HOST_BITS_PER_WIDE_INT == 32)
3632 return 1;
3633 else
3634 {
3635 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
fa9f36a1 3636 return trunc_int_for_mode (val, SImode) == val;
6189a572
JH
3637 }
3638 break;
3639
3640 /* For certain code models, the symbolic references are known to fit. */
3641 case SYMBOL_REF:
3642 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
3643
3644 /* For certain code models, the code is near as well. */
3645 case LABEL_REF:
3646 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
3647
3648 /* We also may accept the offsetted memory references in certain special
3649 cases. */
3650 case CONST:
3651 if (GET_CODE (XEXP (value, 0)) == UNSPEC
3652 && XVECLEN (XEXP (value, 0), 0) == 1
3653 && XINT (XEXP (value, 0), 1) == 15)
3654 return 1;
3655 else if (GET_CODE (XEXP (value, 0)) == PLUS)
3656 {
3657 rtx op1 = XEXP (XEXP (value, 0), 0);
3658 rtx op2 = XEXP (XEXP (value, 0), 1);
3659 HOST_WIDE_INT offset;
3660
3661 if (ix86_cmodel == CM_LARGE)
3662 return 0;
3663 if (GET_CODE (op2) != CONST_INT)
3664 return 0;
3665 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3666 switch (GET_CODE (op1))
3667 {
3668 case SYMBOL_REF:
3669 /* For CM_SMALL assume that latest object is 1MB before
3670 end of 31bits boundary. We may also accept pretty
3671 large negative constants knowing that all objects are
3672 in the positive half of address space. */
3673 if (ix86_cmodel == CM_SMALL
3674 && offset < 1024*1024*1024
3675 && trunc_int_for_mode (offset, SImode) == offset)
3676 return 1;
3677 /* For CM_KERNEL we know that all object resist in the
3678 negative half of 32bits address space. We may not
3679 accept negative offsets, since they may be just off
d6a7951f 3680 and we may accept pretty large positive ones. */
6189a572
JH
3681 if (ix86_cmodel == CM_KERNEL
3682 && offset > 0
3683 && trunc_int_for_mode (offset, SImode) == offset)
3684 return 1;
3685 break;
3686 case LABEL_REF:
3687 /* These conditions are similar to SYMBOL_REF ones, just the
3688 constraints for code models differ. */
3689 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3690 && offset < 1024*1024*1024
3691 && trunc_int_for_mode (offset, SImode) == offset)
3692 return 1;
3693 if (ix86_cmodel == CM_KERNEL
3694 && offset > 0
3695 && trunc_int_for_mode (offset, SImode) == offset)
3696 return 1;
3697 break;
3698 default:
3699 return 0;
3700 }
3701 }
3702 return 0;
3703 default:
3704 return 0;
3705 }
3706}
3707
3708/* Return 1 if VALUE can be stored in the zero extended immediate field. */
3709int
3710x86_64_zero_extended_value (value)
3711 rtx value;
3712{
3713 switch (GET_CODE (value))
3714 {
3715 case CONST_DOUBLE:
3716 if (HOST_BITS_PER_WIDE_INT == 32)
3717 return (GET_MODE (value) == VOIDmode
3718 && !CONST_DOUBLE_HIGH (value));
3719 else
3720 return 0;
3721 case CONST_INT:
3722 if (HOST_BITS_PER_WIDE_INT == 32)
3723 return INTVAL (value) >= 0;
3724 else
b531087a 3725 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
6189a572
JH
3726 break;
3727
3728 /* For certain code models, the symbolic references are known to fit. */
3729 case SYMBOL_REF:
3730 return ix86_cmodel == CM_SMALL;
3731
3732 /* For certain code models, the code is near as well. */
3733 case LABEL_REF:
3734 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3735
3736 /* We also may accept the offsetted memory references in certain special
3737 cases. */
3738 case CONST:
3739 if (GET_CODE (XEXP (value, 0)) == PLUS)
3740 {
3741 rtx op1 = XEXP (XEXP (value, 0), 0);
3742 rtx op2 = XEXP (XEXP (value, 0), 1);
3743
3744 if (ix86_cmodel == CM_LARGE)
3745 return 0;
3746 switch (GET_CODE (op1))
3747 {
3748 case SYMBOL_REF:
3749 return 0;
d6a7951f 3750 /* For small code model we may accept pretty large positive
6189a572
JH
3751 offsets, since one bit is available for free. Negative
3752 offsets are limited by the size of NULL pointer area
3753 specified by the ABI. */
3754 if (ix86_cmodel == CM_SMALL
3755 && GET_CODE (op2) == CONST_INT
3756 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3757 && (trunc_int_for_mode (INTVAL (op2), SImode)
3758 == INTVAL (op2)))
3759 return 1;
3760 /* ??? For the kernel, we may accept adjustment of
3761 -0x10000000, since we know that it will just convert
d6a7951f 3762 negative address space to positive, but perhaps this
6189a572
JH
3763 is not worthwhile. */
3764 break;
3765 case LABEL_REF:
3766 /* These conditions are similar to SYMBOL_REF ones, just the
3767 constraints for code models differ. */
3768 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3769 && GET_CODE (op2) == CONST_INT
3770 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3771 && (trunc_int_for_mode (INTVAL (op2), SImode)
3772 == INTVAL (op2)))
3773 return 1;
3774 break;
3775 default:
3776 return 0;
3777 }
3778 }
3779 return 0;
3780 default:
3781 return 0;
3782 }
3783}
6fca22eb
RH
3784
3785/* Value should be nonzero if functions must have frame pointers.
3786 Zero means the frame pointer need not be set up (and parms may
3787 be accessed via the stack pointer) in functions that seem suitable. */
3788
3789int
3790ix86_frame_pointer_required ()
3791{
3792 /* If we accessed previous frames, then the generated code expects
3793 to be able to access the saved ebp value in our frame. */
3794 if (cfun->machine->accesses_prev_frame)
3795 return 1;
a4f31c00 3796
6fca22eb
RH
3797 /* Several x86 os'es need a frame pointer for other reasons,
3798 usually pertaining to setjmp. */
3799 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3800 return 1;
3801
3802 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3803 the frame pointer by default. Turn it back on now if we've not
3804 got a leaf function. */
3805 if (TARGET_OMIT_LEAF_FRAME_POINTER && ! leaf_function_p ())
3806 return 1;
3807
3808 return 0;
3809}
3810
3811/* Record that the current function accesses previous call frames. */
3812
3813void
3814ix86_setup_frame_addresses ()
3815{
3816 cfun->machine->accesses_prev_frame = 1;
3817}
e075ae69 3818\f
4cf12e7e 3819static char pic_label_name[32];
e9a25f70 3820
e075ae69
RH
3821/* This function generates code for -fpic that loads %ebx with
3822 the return address of the caller and then returns. */
3823
3824void
4cf12e7e 3825ix86_asm_file_end (file)
e075ae69 3826 FILE *file;
e075ae69
RH
3827{
3828 rtx xops[2];
32b5b1aa 3829
4cf12e7e
RH
3830 if (! TARGET_DEEP_BRANCH_PREDICTION || pic_label_name[0] == 0)
3831 return;
32b5b1aa 3832
c7f0da1d
RH
3833 /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related
3834 to updating relocations to a section being discarded such that this
3835 doesn't work. Ought to detect this at configure time. */
7c262518 3836#if 0
4cf12e7e
RH
3837 /* The trick here is to create a linkonce section containing the
3838 pic label thunk, but to refer to it with an internal label.
3839 Because the label is internal, we don't have inter-dso name
3840 binding issues on hosts that don't support ".hidden".
e9a25f70 3841
4cf12e7e
RH
3842 In order to use these macros, however, we must create a fake
3843 function decl. */
7c262518
RH
3844 if (targetm.have_named_sections)
3845 {
3846 tree decl = build_decl (FUNCTION_DECL,
3847 get_identifier ("i686.get_pc_thunk"),
3848 error_mark_node);
3849 DECL_ONE_ONLY (decl) = 1;
3850 UNIQUE_SECTION (decl, 0);
715bdd29 3851 named_section (decl, NULL);
7c262518
RH
3852 }
3853 else
4cf12e7e 3854#else
7c262518 3855 text_section ();
4cf12e7e 3856#endif
0afeb08a 3857
4cf12e7e
RH
3858 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
3859 internal (non-global) label that's being emitted, it didn't make
3860 sense to have .type information for local labels. This caused
3861 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
3862 me debug info for a label that you're declaring non-global?) this
3863 was changed to call ASM_OUTPUT_LABEL() instead. */
3864
3865 ASM_OUTPUT_LABEL (file, pic_label_name);
3866
3867 xops[0] = pic_offset_table_rtx;
3868 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3869 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3870 output_asm_insn ("ret", xops);
32b5b1aa 3871}
32b5b1aa 3872
e075ae69
RH
3873void
3874load_pic_register ()
32b5b1aa 3875{
e075ae69 3876 rtx gotsym, pclab;
32b5b1aa 3877
0d7d98ee 3878 if (TARGET_64BIT)
b531087a 3879 abort ();
0d7d98ee 3880
a8a05998 3881 gotsym = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
32b5b1aa 3882
e075ae69 3883 if (TARGET_DEEP_BRANCH_PREDICTION)
32b5b1aa 3884 {
4cf12e7e
RH
3885 if (! pic_label_name[0])
3886 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
e075ae69 3887 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
32b5b1aa 3888 }
e075ae69 3889 else
e5cb57e8 3890 {
e075ae69 3891 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
e5cb57e8 3892 }
e5cb57e8 3893
e075ae69 3894 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
2a2ab3f9 3895
e075ae69
RH
3896 if (! TARGET_DEEP_BRANCH_PREDICTION)
3897 emit_insn (gen_popsi1 (pic_offset_table_rtx));
79325812 3898
e075ae69 3899 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
e9a25f70 3900}
8dfe5673 3901
0d7d98ee 3902/* Generate an "push" pattern for input ARG. */
e9a25f70 3903
e075ae69
RH
3904static rtx
3905gen_push (arg)
3906 rtx arg;
e9a25f70 3907{
c5c76735 3908 return gen_rtx_SET (VOIDmode,
0d7d98ee
JH
3909 gen_rtx_MEM (Pmode,
3910 gen_rtx_PRE_DEC (Pmode,
c5c76735
JL
3911 stack_pointer_rtx)),
3912 arg);
e9a25f70
JL
3913}
3914
4dd2ac2c
JH
3915/* Return 1 if we need to save REGNO. */
3916static int
1020a5ab
RH
3917ix86_save_reg (regno, maybe_eh_return)
3918 int regno;
37a58036 3919 int maybe_eh_return;
1020a5ab 3920{
5b43fed1 3921 if (regno == PIC_OFFSET_TABLE_REGNUM
1020a5ab
RH
3922 && (current_function_uses_pic_offset_table
3923 || current_function_uses_const_pool
3924 || current_function_calls_eh_return))
3925 return 1;
3926
3927 if (current_function_calls_eh_return && maybe_eh_return)
3928 {
3929 unsigned i;
3930 for (i = 0; ; i++)
3931 {
b531087a 3932 unsigned test = EH_RETURN_DATA_REGNO (i);
1020a5ab
RH
3933 if (test == INVALID_REGNUM)
3934 break;
3935 if (test == (unsigned) regno)
3936 return 1;
3937 }
3938 }
4dd2ac2c 3939
1020a5ab
RH
3940 return (regs_ever_live[regno]
3941 && !call_used_regs[regno]
3942 && !fixed_regs[regno]
3943 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4dd2ac2c
JH
3944}
3945
0903fcab
JH
3946/* Return number of registers to be saved on the stack. */
3947
3948static int
3949ix86_nsaved_regs ()
3950{
3951 int nregs = 0;
0903fcab
JH
3952 int regno;
3953
4dd2ac2c 3954 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 3955 if (ix86_save_reg (regno, true))
4dd2ac2c 3956 nregs++;
0903fcab
JH
3957 return nregs;
3958}
3959
3960/* Return the offset between two registers, one to be eliminated, and the other
3961 its replacement, at the start of a routine. */
3962
3963HOST_WIDE_INT
3964ix86_initial_elimination_offset (from, to)
3965 int from;
3966 int to;
3967{
4dd2ac2c
JH
3968 struct ix86_frame frame;
3969 ix86_compute_frame_layout (&frame);
564d80f4
JH
3970
3971 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 3972 return frame.hard_frame_pointer_offset;
564d80f4
JH
3973 else if (from == FRAME_POINTER_REGNUM
3974 && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 3975 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
3976 else
3977 {
564d80f4
JH
3978 if (to != STACK_POINTER_REGNUM)
3979 abort ();
3980 else if (from == ARG_POINTER_REGNUM)
4dd2ac2c 3981 return frame.stack_pointer_offset;
564d80f4
JH
3982 else if (from != FRAME_POINTER_REGNUM)
3983 abort ();
0903fcab 3984 else
4dd2ac2c 3985 return frame.stack_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
3986 }
3987}
3988
4dd2ac2c 3989/* Fill structure ix86_frame about frame of currently computed function. */
0f290768 3990
4dd2ac2c
JH
3991static void
3992ix86_compute_frame_layout (frame)
3993 struct ix86_frame *frame;
65954bd8 3994{
65954bd8 3995 HOST_WIDE_INT total_size;
564d80f4 3996 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
44affdae
JH
3997 int offset;
3998 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4dd2ac2c 3999 HOST_WIDE_INT size = get_frame_size ();
65954bd8 4000
4dd2ac2c 4001 frame->nregs = ix86_nsaved_regs ();
564d80f4 4002 total_size = size;
65954bd8 4003
9ba81eaa 4004 /* Skip return address and saved base pointer. */
4dd2ac2c
JH
4005 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4006
4007 frame->hard_frame_pointer_offset = offset;
564d80f4 4008
fcbfaa65
RK
4009 /* Do some sanity checking of stack_alignment_needed and
4010 preferred_alignment, since i386 port is the only using those features
f710504c 4011 that may break easily. */
564d80f4 4012
44affdae
JH
4013 if (size && !stack_alignment_needed)
4014 abort ();
44affdae
JH
4015 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4016 abort ();
4017 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4018 abort ();
4019 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4020 abort ();
564d80f4 4021
4dd2ac2c
JH
4022 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4023 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
564d80f4 4024
4dd2ac2c
JH
4025 /* Register save area */
4026 offset += frame->nregs * UNITS_PER_WORD;
65954bd8 4027
8362f420
JH
4028 /* Va-arg area */
4029 if (ix86_save_varrargs_registers)
4030 {
4031 offset += X86_64_VARARGS_SIZE;
4032 frame->va_arg_size = X86_64_VARARGS_SIZE;
4033 }
4034 else
4035 frame->va_arg_size = 0;
4036
4dd2ac2c
JH
4037 /* Align start of frame for local function. */
4038 frame->padding1 = ((offset + stack_alignment_needed - 1)
4039 & -stack_alignment_needed) - offset;
f73ad30e 4040
4dd2ac2c 4041 offset += frame->padding1;
65954bd8 4042
4dd2ac2c
JH
4043 /* Frame pointer points here. */
4044 frame->frame_pointer_offset = offset;
54ff41b7 4045
4dd2ac2c 4046 offset += size;
65954bd8 4047
4dd2ac2c 4048 /* Add outgoing arguments area. */
f73ad30e 4049 if (ACCUMULATE_OUTGOING_ARGS)
4dd2ac2c
JH
4050 {
4051 offset += current_function_outgoing_args_size;
4052 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4053 }
4054 else
4055 frame->outgoing_arguments_size = 0;
564d80f4 4056
4dd2ac2c
JH
4057 /* Align stack boundary. */
4058 frame->padding2 = ((offset + preferred_alignment - 1)
4059 & -preferred_alignment) - offset;
4060
4061 offset += frame->padding2;
4062
4063 /* We've reached end of stack frame. */
4064 frame->stack_pointer_offset = offset;
4065
4066 /* Size prologue needs to allocate. */
4067 frame->to_allocate =
4068 (size + frame->padding1 + frame->padding2
8362f420 4069 + frame->outgoing_arguments_size + frame->va_arg_size);
4dd2ac2c 4070
8362f420
JH
4071 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4072 && current_function_is_leaf)
4073 {
4074 frame->red_zone_size = frame->to_allocate;
4075 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4076 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4077 }
4078 else
4079 frame->red_zone_size = 0;
4080 frame->to_allocate -= frame->red_zone_size;
4081 frame->stack_pointer_offset -= frame->red_zone_size;
4dd2ac2c
JH
4082#if 0
4083 fprintf (stderr, "nregs: %i\n", frame->nregs);
4084 fprintf (stderr, "size: %i\n", size);
4085 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4086 fprintf (stderr, "padding1: %i\n", frame->padding1);
8362f420 4087 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4dd2ac2c
JH
4088 fprintf (stderr, "padding2: %i\n", frame->padding2);
4089 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
8362f420 4090 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4dd2ac2c
JH
4091 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4092 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4093 frame->hard_frame_pointer_offset);
4094 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4095#endif
65954bd8
JL
4096}
4097
0903fcab
JH
4098/* Emit code to save registers in the prologue. */
4099
4100static void
4101ix86_emit_save_regs ()
4102{
4103 register int regno;
0903fcab 4104 rtx insn;
0903fcab 4105
4dd2ac2c 4106 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 4107 if (ix86_save_reg (regno, true))
0903fcab 4108 {
0d7d98ee 4109 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
0903fcab
JH
4110 RTX_FRAME_RELATED_P (insn) = 1;
4111 }
4112}
4113
c6036a37
JH
4114/* Emit code to save registers using MOV insns. First register
4115 is restored from POINTER + OFFSET. */
4116static void
4117ix86_emit_save_regs_using_mov (pointer, offset)
b72f00af
RK
4118 rtx pointer;
4119 HOST_WIDE_INT offset;
c6036a37
JH
4120{
4121 int regno;
4122 rtx insn;
4123
4124 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4125 if (ix86_save_reg (regno, true))
4126 {
b72f00af
RK
4127 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4128 Pmode, offset),
c6036a37
JH
4129 gen_rtx_REG (Pmode, regno));
4130 RTX_FRAME_RELATED_P (insn) = 1;
4131 offset += UNITS_PER_WORD;
4132 }
4133}
4134
0f290768 4135/* Expand the prologue into a bunch of separate insns. */
e075ae69
RH
4136
4137void
4138ix86_expand_prologue ()
2a2ab3f9 4139{
564d80f4 4140 rtx insn;
0d7d98ee
JH
4141 int pic_reg_used = (flag_pic && (current_function_uses_pic_offset_table
4142 || current_function_uses_const_pool)
4143 && !TARGET_64BIT);
4dd2ac2c 4144 struct ix86_frame frame;
6ab16dd9 4145 int use_mov = 0;
c6036a37 4146 HOST_WIDE_INT allocate;
4dd2ac2c 4147
2ab0437e 4148 if (!optimize_size)
6ab16dd9
JH
4149 {
4150 use_fast_prologue_epilogue
4151 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
2ab0437e
JH
4152 if (TARGET_PROLOGUE_USING_MOVE)
4153 use_mov = use_fast_prologue_epilogue;
6ab16dd9 4154 }
4dd2ac2c 4155 ix86_compute_frame_layout (&frame);
79325812 4156
e075ae69
RH
4157 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4158 slower on all targets. Also sdb doesn't like it. */
e9a25f70 4159
2a2ab3f9
JVA
4160 if (frame_pointer_needed)
4161 {
564d80f4 4162 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
e075ae69 4163 RTX_FRAME_RELATED_P (insn) = 1;
e9a25f70 4164
564d80f4 4165 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
e075ae69 4166 RTX_FRAME_RELATED_P (insn) = 1;
2a2ab3f9
JVA
4167 }
4168
c6036a37
JH
4169 allocate = frame.to_allocate;
4170 /* In case we are dealing only with single register and empty frame,
4171 push is equivalent of the mov+add sequence. */
4172 if (allocate == 0 && frame.nregs <= 1)
4173 use_mov = 0;
4174
4175 if (!use_mov)
4176 ix86_emit_save_regs ();
4177 else
4178 allocate += frame.nregs * UNITS_PER_WORD;
564d80f4 4179
c6036a37 4180 if (allocate == 0)
8dfe5673 4181 ;
e323735c 4182 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
469ac993 4183 {
f2042df3
RH
4184 insn = emit_insn (gen_pro_epilogue_adjust_stack
4185 (stack_pointer_rtx, stack_pointer_rtx,
e323735c 4186 GEN_INT (-allocate)));
e075ae69 4187 RTX_FRAME_RELATED_P (insn) = 1;
469ac993 4188 }
79325812 4189 else
8dfe5673 4190 {
e075ae69 4191 /* ??? Is this only valid for Win32? */
e9a25f70 4192
e075ae69 4193 rtx arg0, sym;
e9a25f70 4194
8362f420 4195 if (TARGET_64BIT)
b531087a 4196 abort ();
8362f420 4197
e075ae69 4198 arg0 = gen_rtx_REG (SImode, 0);
c6036a37 4199 emit_move_insn (arg0, GEN_INT (allocate));
77a989d1 4200
e075ae69
RH
4201 sym = gen_rtx_MEM (FUNCTION_MODE,
4202 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
32ee7d1d 4203 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
e075ae69
RH
4204
4205 CALL_INSN_FUNCTION_USAGE (insn)
276ab4a4
RH
4206 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4207 CALL_INSN_FUNCTION_USAGE (insn));
e075ae69 4208 }
c6036a37
JH
4209 if (use_mov)
4210 {
4211 if (!frame_pointer_needed || !frame.to_allocate)
4212 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4213 else
4214 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4215 -frame.nregs * UNITS_PER_WORD);
4216 }
e9a25f70 4217
84530511
SC
4218#ifdef SUBTARGET_PROLOGUE
4219 SUBTARGET_PROLOGUE;
0f290768 4220#endif
84530511 4221
e9a25f70 4222 if (pic_reg_used)
36ad2436 4223 load_pic_register ();
77a989d1 4224
e9a25f70
JL
4225 /* If we are profiling, make sure no instructions are scheduled before
4226 the call to mcount. However, if -fpic, the above call will have
4227 done that. */
70f4f91c 4228 if (current_function_profile && ! pic_reg_used)
e9a25f70 4229 emit_insn (gen_blockage ());
77a989d1
SC
4230}
4231
da2d1d3a
JH
4232/* Emit code to restore saved registers using MOV insns. First register
4233 is restored from POINTER + OFFSET. */
4234static void
1020a5ab
RH
4235ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4236 rtx pointer;
4237 int offset;
37a58036 4238 int maybe_eh_return;
da2d1d3a
JH
4239{
4240 int regno;
da2d1d3a 4241
4dd2ac2c 4242 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 4243 if (ix86_save_reg (regno, maybe_eh_return))
da2d1d3a 4244 {
4dd2ac2c 4245 emit_move_insn (gen_rtx_REG (Pmode, regno),
b72f00af
RK
4246 adjust_address (gen_rtx_MEM (Pmode, pointer),
4247 Pmode, offset));
4dd2ac2c 4248 offset += UNITS_PER_WORD;
da2d1d3a
JH
4249 }
4250}
4251
0f290768 4252/* Restore function stack, frame, and registers. */
e9a25f70 4253
2a2ab3f9 4254void
1020a5ab
RH
4255ix86_expand_epilogue (style)
4256 int style;
2a2ab3f9 4257{
1c71e60e 4258 int regno;
fdb8a883 4259 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4dd2ac2c 4260 struct ix86_frame frame;
65954bd8 4261 HOST_WIDE_INT offset;
4dd2ac2c
JH
4262
4263 ix86_compute_frame_layout (&frame);
2a2ab3f9 4264
a4f31c00 4265 /* Calculate start of saved registers relative to ebp. Special care
84e306b4
RH
4266 must be taken for the normal return case of a function using
4267 eh_return: the eax and edx registers are marked as saved, but not
4268 restored along this path. */
4269 offset = frame.nregs;
4270 if (current_function_calls_eh_return && style != 2)
4271 offset -= 2;
4272 offset *= -UNITS_PER_WORD;
2a2ab3f9 4273
fdb8a883
JW
4274 /* If we're only restoring one register and sp is not valid then
4275 using a move instruction to restore the register since it's
0f290768 4276 less work than reloading sp and popping the register.
da2d1d3a
JH
4277
4278 The default code result in stack adjustment using add/lea instruction,
4279 while this code results in LEAVE instruction (or discrete equivalent),
4280 so it is profitable in some other cases as well. Especially when there
4281 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4282 and there is exactly one register to pop. This heruistic may need some
4283 tuning in future. */
4dd2ac2c 4284 if ((!sp_valid && frame.nregs <= 1)
2ab0437e 4285 || (TARGET_EPILOGUE_USING_MOVE
6ab16dd9 4286 && use_fast_prologue_epilogue
c6036a37 4287 && (frame.nregs > 1 || frame.to_allocate))
4dd2ac2c 4288 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
2ab0437e 4289 || (frame_pointer_needed && TARGET_USE_LEAVE
6ab16dd9 4290 && use_fast_prologue_epilogue && frame.nregs == 1)
2ab0437e 4291 || current_function_calls_eh_return)
2a2ab3f9 4292 {
da2d1d3a
JH
4293 /* Restore registers. We can use ebp or esp to address the memory
4294 locations. If both are available, default to ebp, since offsets
4295 are known to be small. Only exception is esp pointing directly to the
4296 end of block of saved registers, where we may simplify addressing
4297 mode. */
4298
4dd2ac2c 4299 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
1020a5ab
RH
4300 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4301 frame.to_allocate, style == 2);
da2d1d3a 4302 else
1020a5ab
RH
4303 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4304 offset, style == 2);
4305
4306 /* eh_return epilogues need %ecx added to the stack pointer. */
4307 if (style == 2)
4308 {
4309 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
2a2ab3f9 4310
1020a5ab
RH
4311 if (frame_pointer_needed)
4312 {
4313 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4314 tmp = plus_constant (tmp, UNITS_PER_WORD);
4315 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4316
4317 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4318 emit_move_insn (hard_frame_pointer_rtx, tmp);
4319
4320 emit_insn (gen_pro_epilogue_adjust_stack
f2042df3 4321 (stack_pointer_rtx, sa, const0_rtx));
1020a5ab
RH
4322 }
4323 else
4324 {
4325 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4326 tmp = plus_constant (tmp, (frame.to_allocate
4327 + frame.nregs * UNITS_PER_WORD));
4328 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4329 }
4330 }
4331 else if (!frame_pointer_needed)
f2042df3
RH
4332 emit_insn (gen_pro_epilogue_adjust_stack
4333 (stack_pointer_rtx, stack_pointer_rtx,
4334 GEN_INT (frame.to_allocate
4335 + frame.nregs * UNITS_PER_WORD)));
0f290768 4336 /* If not an i386, mov & pop is faster than "leave". */
6ab16dd9 4337 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
8362f420 4338 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
c8c5cb99 4339 else
2a2ab3f9 4340 {
1c71e60e
JH
4341 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4342 hard_frame_pointer_rtx,
f2042df3 4343 const0_rtx));
8362f420
JH
4344 if (TARGET_64BIT)
4345 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4346 else
4347 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
e9a25f70
JL
4348 }
4349 }
1c71e60e 4350 else
68f654ec 4351 {
1c71e60e
JH
4352 /* First step is to deallocate the stack frame so that we can
4353 pop the registers. */
4354 if (!sp_valid)
4355 {
4356 if (!frame_pointer_needed)
4357 abort ();
4358 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4359 hard_frame_pointer_rtx,
f2042df3 4360 GEN_INT (offset)));
1c71e60e 4361 }
4dd2ac2c 4362 else if (frame.to_allocate)
f2042df3
RH
4363 emit_insn (gen_pro_epilogue_adjust_stack
4364 (stack_pointer_rtx, stack_pointer_rtx,
4365 GEN_INT (frame.to_allocate)));
1c71e60e 4366
4dd2ac2c 4367 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 4368 if (ix86_save_reg (regno, false))
8362f420
JH
4369 {
4370 if (TARGET_64BIT)
4371 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4372 else
4373 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4374 }
4dd2ac2c 4375 if (frame_pointer_needed)
8362f420 4376 {
f5143c46 4377 /* Leave results in shorter dependency chains on CPUs that are
2ab0437e
JH
4378 able to grok it fast. */
4379 if (TARGET_USE_LEAVE)
4380 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4381 else if (TARGET_64BIT)
8362f420
JH
4382 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4383 else
4384 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4385 }
68f654ec 4386 }
68f654ec 4387
cbbf65e0 4388 /* Sibcall epilogues don't want a return instruction. */
1020a5ab 4389 if (style == 0)
cbbf65e0
RH
4390 return;
4391
2a2ab3f9
JVA
4392 if (current_function_pops_args && current_function_args_size)
4393 {
e075ae69 4394 rtx popc = GEN_INT (current_function_pops_args);
2a2ab3f9 4395
b8c752c8
UD
4396 /* i386 can only pop 64K bytes. If asked to pop more, pop
4397 return address, do explicit add, and jump indirectly to the
0f290768 4398 caller. */
2a2ab3f9 4399
b8c752c8 4400 if (current_function_pops_args >= 65536)
2a2ab3f9 4401 {
e075ae69 4402 rtx ecx = gen_rtx_REG (SImode, 2);
e9a25f70 4403
8362f420
JH
4404 /* There are is no "pascal" calling convention in 64bit ABI. */
4405 if (TARGET_64BIT)
b531087a 4406 abort ();
8362f420 4407
e075ae69
RH
4408 emit_insn (gen_popsi1 (ecx));
4409 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
11837777 4410 emit_jump_insn (gen_return_indirect_internal (ecx));
e9a25f70 4411 }
79325812 4412 else
e075ae69
RH
4413 emit_jump_insn (gen_return_pop_internal (popc));
4414 }
4415 else
4416 emit_jump_insn (gen_return_internal ());
4417}
4418\f
4419/* Extract the parts of an RTL expression that is a valid memory address
b446e5a2
JH
4420 for an instruction. Return 0 if the structure of the address is
4421 grossly off. Return -1 if the address contains ASHIFT, so it is not
4422 strictly valid, but still used for computing length of lea instruction.
4423 */
e075ae69
RH
4424
4425static int
4426ix86_decompose_address (addr, out)
4427 register rtx addr;
4428 struct ix86_address *out;
4429{
4430 rtx base = NULL_RTX;
4431 rtx index = NULL_RTX;
4432 rtx disp = NULL_RTX;
4433 HOST_WIDE_INT scale = 1;
4434 rtx scale_rtx = NULL_RTX;
b446e5a2 4435 int retval = 1;
e075ae69 4436
1540f9eb 4437 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
e075ae69
RH
4438 base = addr;
4439 else if (GET_CODE (addr) == PLUS)
4440 {
4441 rtx op0 = XEXP (addr, 0);
4442 rtx op1 = XEXP (addr, 1);
4443 enum rtx_code code0 = GET_CODE (op0);
4444 enum rtx_code code1 = GET_CODE (op1);
4445
4446 if (code0 == REG || code0 == SUBREG)
4447 {
4448 if (code1 == REG || code1 == SUBREG)
4449 index = op0, base = op1; /* index + base */
4450 else
4451 base = op0, disp = op1; /* base + displacement */
4452 }
4453 else if (code0 == MULT)
e9a25f70 4454 {
e075ae69
RH
4455 index = XEXP (op0, 0);
4456 scale_rtx = XEXP (op0, 1);
4457 if (code1 == REG || code1 == SUBREG)
4458 base = op1; /* index*scale + base */
e9a25f70 4459 else
e075ae69
RH
4460 disp = op1; /* index*scale + disp */
4461 }
4462 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4463 {
4464 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4465 scale_rtx = XEXP (XEXP (op0, 0), 1);
4466 base = XEXP (op0, 1);
4467 disp = op1;
2a2ab3f9 4468 }
e075ae69
RH
4469 else if (code0 == PLUS)
4470 {
4471 index = XEXP (op0, 0); /* index + base + disp */
4472 base = XEXP (op0, 1);
4473 disp = op1;
4474 }
4475 else
b446e5a2 4476 return 0;
e075ae69
RH
4477 }
4478 else if (GET_CODE (addr) == MULT)
4479 {
4480 index = XEXP (addr, 0); /* index*scale */
4481 scale_rtx = XEXP (addr, 1);
4482 }
4483 else if (GET_CODE (addr) == ASHIFT)
4484 {
4485 rtx tmp;
4486
4487 /* We're called for lea too, which implements ashift on occasion. */
4488 index = XEXP (addr, 0);
4489 tmp = XEXP (addr, 1);
4490 if (GET_CODE (tmp) != CONST_INT)
b446e5a2 4491 return 0;
e075ae69
RH
4492 scale = INTVAL (tmp);
4493 if ((unsigned HOST_WIDE_INT) scale > 3)
b446e5a2 4494 return 0;
e075ae69 4495 scale = 1 << scale;
b446e5a2 4496 retval = -1;
2a2ab3f9 4497 }
2a2ab3f9 4498 else
e075ae69
RH
4499 disp = addr; /* displacement */
4500
4501 /* Extract the integral value of scale. */
4502 if (scale_rtx)
e9a25f70 4503 {
e075ae69 4504 if (GET_CODE (scale_rtx) != CONST_INT)
b446e5a2 4505 return 0;
e075ae69 4506 scale = INTVAL (scale_rtx);
e9a25f70 4507 }
3b3c6a3f 4508
e075ae69
RH
4509 /* Allow arg pointer and stack pointer as index if there is not scaling */
4510 if (base && index && scale == 1
564d80f4
JH
4511 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4512 || index == stack_pointer_rtx))
e075ae69
RH
4513 {
4514 rtx tmp = base;
4515 base = index;
4516 index = tmp;
4517 }
4518
4519 /* Special case: %ebp cannot be encoded as a base without a displacement. */
564d80f4
JH
4520 if ((base == hard_frame_pointer_rtx
4521 || base == frame_pointer_rtx
4522 || base == arg_pointer_rtx) && !disp)
e075ae69
RH
4523 disp = const0_rtx;
4524
4525 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4526 Avoid this by transforming to [%esi+0]. */
4527 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4528 && base && !index && !disp
329e1d01 4529 && REG_P (base)
e075ae69
RH
4530 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4531 disp = const0_rtx;
4532
4533 /* Special case: encode reg+reg instead of reg*2. */
4534 if (!base && index && scale && scale == 2)
4535 base = index, scale = 1;
0f290768 4536
e075ae69
RH
4537 /* Special case: scaling cannot be encoded without base or displacement. */
4538 if (!base && !disp && index && scale != 1)
4539 disp = const0_rtx;
4540
4541 out->base = base;
4542 out->index = index;
4543 out->disp = disp;
4544 out->scale = scale;
3b3c6a3f 4545
b446e5a2 4546 return retval;
e075ae69 4547}
01329426
JH
4548\f
4549/* Return cost of the memory address x.
4550 For i386, it is better to use a complex address than let gcc copy
4551 the address into a reg and make a new pseudo. But not if the address
4552 requires to two regs - that would mean more pseudos with longer
4553 lifetimes. */
4554int
4555ix86_address_cost (x)
4556 rtx x;
4557{
4558 struct ix86_address parts;
4559 int cost = 1;
3b3c6a3f 4560
01329426
JH
4561 if (!ix86_decompose_address (x, &parts))
4562 abort ();
4563
1540f9eb
JH
4564 if (parts.base && GET_CODE (parts.base) == SUBREG)
4565 parts.base = SUBREG_REG (parts.base);
4566 if (parts.index && GET_CODE (parts.index) == SUBREG)
4567 parts.index = SUBREG_REG (parts.index);
4568
01329426
JH
4569 /* More complex memory references are better. */
4570 if (parts.disp && parts.disp != const0_rtx)
4571 cost--;
4572
4573 /* Attempt to minimize number of registers in the address. */
4574 if ((parts.base
4575 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4576 || (parts.index
4577 && (!REG_P (parts.index)
4578 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4579 cost++;
4580
4581 if (parts.base
4582 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4583 && parts.index
4584 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4585 && parts.base != parts.index)
4586 cost++;
4587
4588 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4589 since it's predecode logic can't detect the length of instructions
4590 and it degenerates to vector decoded. Increase cost of such
4591 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
0f290768 4592 to split such addresses or even refuse such addresses at all.
01329426
JH
4593
4594 Following addressing modes are affected:
4595 [base+scale*index]
4596 [scale*index+disp]
4597 [base+index]
0f290768 4598
01329426
JH
4599 The first and last case may be avoidable by explicitly coding the zero in
4600 memory address, but I don't have AMD-K6 machine handy to check this
4601 theory. */
4602
4603 if (TARGET_K6
4604 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4605 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4606 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4607 cost += 10;
0f290768 4608
01329426
JH
4609 return cost;
4610}
4611\f
b949ea8b
JW
4612/* If X is a machine specific address (i.e. a symbol or label being
4613 referenced as a displacement from the GOT implemented using an
4614 UNSPEC), then return the base term. Otherwise return X. */
4615
4616rtx
4617ix86_find_base_term (x)
4618 rtx x;
4619{
4620 rtx term;
4621
6eb791fc
JH
4622 if (TARGET_64BIT)
4623 {
4624 if (GET_CODE (x) != CONST)
4625 return x;
4626 term = XEXP (x, 0);
4627 if (GET_CODE (term) == PLUS
4628 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4629 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4630 term = XEXP (term, 0);
4631 if (GET_CODE (term) != UNSPEC
4632 || XVECLEN (term, 0) != 1
4633 || XINT (term, 1) != 15)
4634 return x;
4635
4636 term = XVECEXP (term, 0, 0);
4637
4638 if (GET_CODE (term) != SYMBOL_REF
4639 && GET_CODE (term) != LABEL_REF)
4640 return x;
4641
4642 return term;
4643 }
4644
b949ea8b
JW
4645 if (GET_CODE (x) != PLUS
4646 || XEXP (x, 0) != pic_offset_table_rtx
4647 || GET_CODE (XEXP (x, 1)) != CONST)
4648 return x;
4649
4650 term = XEXP (XEXP (x, 1), 0);
4651
4652 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4653 term = XEXP (term, 0);
4654
4655 if (GET_CODE (term) != UNSPEC
4656 || XVECLEN (term, 0) != 1
4657 || XINT (term, 1) != 7)
4658 return x;
4659
4660 term = XVECEXP (term, 0, 0);
4661
4662 if (GET_CODE (term) != SYMBOL_REF
4663 && GET_CODE (term) != LABEL_REF)
4664 return x;
4665
4666 return term;
4667}
4668\f
e075ae69
RH
4669/* Determine if a given CONST RTX is a valid memory displacement
4670 in PIC mode. */
0f290768 4671
59be65f6 4672int
91bb873f
RH
4673legitimate_pic_address_disp_p (disp)
4674 register rtx disp;
4675{
6eb791fc
JH
4676 /* In 64bit mode we can allow direct addresses of symbols and labels
4677 when they are not dynamic symbols. */
4678 if (TARGET_64BIT)
4679 {
4680 rtx x = disp;
4681 if (GET_CODE (disp) == CONST)
4682 x = XEXP (disp, 0);
4683 /* ??? Handle PIC code models */
4684 if (GET_CODE (x) == PLUS
4685 && (GET_CODE (XEXP (x, 1)) == CONST_INT
4686 && ix86_cmodel == CM_SMALL_PIC
4687 && INTVAL (XEXP (x, 1)) < 1024*1024*1024
4688 && INTVAL (XEXP (x, 1)) > -1024*1024*1024))
4689 x = XEXP (x, 0);
4690 if (local_symbolic_operand (x, Pmode))
4691 return 1;
4692 }
91bb873f
RH
4693 if (GET_CODE (disp) != CONST)
4694 return 0;
4695 disp = XEXP (disp, 0);
4696
6eb791fc
JH
4697 if (TARGET_64BIT)
4698 {
4699 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4700 of GOT tables. We should not need these anyway. */
4701 if (GET_CODE (disp) != UNSPEC
4702 || XVECLEN (disp, 0) != 1
4703 || XINT (disp, 1) != 15)
4704 return 0;
4705
4706 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
4707 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
4708 return 0;
4709 return 1;
4710 }
4711
91bb873f
RH
4712 if (GET_CODE (disp) == PLUS)
4713 {
4714 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
4715 return 0;
4716 disp = XEXP (disp, 0);
4717 }
4718
4719 if (GET_CODE (disp) != UNSPEC
4720 || XVECLEN (disp, 0) != 1)
4721 return 0;
4722
4723 /* Must be @GOT or @GOTOFF. */
623fe810
RH
4724 switch (XINT (disp, 1))
4725 {
4726 case 6: /* @GOT */
4727 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
91bb873f 4728
623fe810
RH
4729 case 7: /* @GOTOFF */
4730 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
4731 }
4732
4733 return 0;
91bb873f
RH
4734}
4735
e075ae69
RH
4736/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
4737 memory address for an instruction. The MODE argument is the machine mode
4738 for the MEM expression that wants to use this address.
4739
4740 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
4741 convert common non-canonical forms to canonical form so that they will
4742 be recognized. */
4743
3b3c6a3f
MM
4744int
4745legitimate_address_p (mode, addr, strict)
4746 enum machine_mode mode;
4747 register rtx addr;
4748 int strict;
4749{
e075ae69
RH
4750 struct ix86_address parts;
4751 rtx base, index, disp;
4752 HOST_WIDE_INT scale;
4753 const char *reason = NULL;
4754 rtx reason_rtx = NULL_RTX;
3b3c6a3f
MM
4755
4756 if (TARGET_DEBUG_ADDR)
4757 {
4758 fprintf (stderr,
e9a25f70 4759 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
3b3c6a3f 4760 GET_MODE_NAME (mode), strict);
3b3c6a3f
MM
4761 debug_rtx (addr);
4762 }
4763
b446e5a2 4764 if (ix86_decompose_address (addr, &parts) <= 0)
3b3c6a3f 4765 {
e075ae69 4766 reason = "decomposition failed";
50e60bc3 4767 goto report_error;
3b3c6a3f
MM
4768 }
4769
e075ae69
RH
4770 base = parts.base;
4771 index = parts.index;
4772 disp = parts.disp;
4773 scale = parts.scale;
91f0226f 4774
e075ae69 4775 /* Validate base register.
e9a25f70
JL
4776
4777 Don't allow SUBREG's here, it can lead to spill failures when the base
3d771dfd
MM
4778 is one word out of a two word structure, which is represented internally
4779 as a DImode int. */
e9a25f70 4780
3b3c6a3f
MM
4781 if (base)
4782 {
1540f9eb 4783 rtx reg;
e075ae69
RH
4784 reason_rtx = base;
4785
1540f9eb
JH
4786 if (GET_CODE (base) == SUBREG)
4787 reg = SUBREG_REG (base);
4788 else
4789 reg = base;
4790
4791 if (GET_CODE (reg) != REG)
3b3c6a3f 4792 {
e075ae69 4793 reason = "base is not a register";
50e60bc3 4794 goto report_error;
3b3c6a3f
MM
4795 }
4796
c954bd01
RH
4797 if (GET_MODE (base) != Pmode)
4798 {
e075ae69 4799 reason = "base is not in Pmode";
50e60bc3 4800 goto report_error;
c954bd01
RH
4801 }
4802
1540f9eb
JH
4803 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
4804 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
3b3c6a3f 4805 {
e075ae69 4806 reason = "base is not valid";
50e60bc3 4807 goto report_error;
3b3c6a3f
MM
4808 }
4809 }
4810
e075ae69 4811 /* Validate index register.
e9a25f70
JL
4812
4813 Don't allow SUBREG's here, it can lead to spill failures when the index
3d771dfd
MM
4814 is one word out of a two word structure, which is represented internally
4815 as a DImode int. */
e075ae69
RH
4816
4817 if (index)
3b3c6a3f 4818 {
1540f9eb 4819 rtx reg;
e075ae69
RH
4820 reason_rtx = index;
4821
1540f9eb
JH
4822 if (GET_CODE (index) == SUBREG)
4823 reg = SUBREG_REG (index);
4824 else
4825 reg = index;
4826
4827 if (GET_CODE (reg) != REG)
3b3c6a3f 4828 {
e075ae69 4829 reason = "index is not a register";
50e60bc3 4830 goto report_error;
3b3c6a3f
MM
4831 }
4832
e075ae69 4833 if (GET_MODE (index) != Pmode)
c954bd01 4834 {
e075ae69 4835 reason = "index is not in Pmode";
50e60bc3 4836 goto report_error;
c954bd01
RH
4837 }
4838
1540f9eb
JH
4839 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
4840 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
3b3c6a3f 4841 {
e075ae69 4842 reason = "index is not valid";
50e60bc3 4843 goto report_error;
3b3c6a3f
MM
4844 }
4845 }
3b3c6a3f 4846
e075ae69
RH
4847 /* Validate scale factor. */
4848 if (scale != 1)
3b3c6a3f 4849 {
e075ae69
RH
4850 reason_rtx = GEN_INT (scale);
4851 if (!index)
3b3c6a3f 4852 {
e075ae69 4853 reason = "scale without index";
50e60bc3 4854 goto report_error;
3b3c6a3f
MM
4855 }
4856
e075ae69 4857 if (scale != 2 && scale != 4 && scale != 8)
3b3c6a3f 4858 {
e075ae69 4859 reason = "scale is not a valid multiplier";
50e60bc3 4860 goto report_error;
3b3c6a3f
MM
4861 }
4862 }
4863
91bb873f 4864 /* Validate displacement. */
3b3c6a3f
MM
4865 if (disp)
4866 {
e075ae69
RH
4867 reason_rtx = disp;
4868
91bb873f 4869 if (!CONSTANT_ADDRESS_P (disp))
3b3c6a3f 4870 {
e075ae69 4871 reason = "displacement is not constant";
50e60bc3 4872 goto report_error;
3b3c6a3f
MM
4873 }
4874
0d7d98ee 4875 if (TARGET_64BIT)
3b3c6a3f 4876 {
0d7d98ee
JH
4877 if (!x86_64_sign_extended_value (disp))
4878 {
4879 reason = "displacement is out of range";
4880 goto report_error;
4881 }
4882 }
4883 else
4884 {
4885 if (GET_CODE (disp) == CONST_DOUBLE)
4886 {
4887 reason = "displacement is a const_double";
4888 goto report_error;
4889 }
3b3c6a3f
MM
4890 }
4891
91bb873f 4892 if (flag_pic && SYMBOLIC_CONST (disp))
3b3c6a3f 4893 {
0d7d98ee
JH
4894 if (TARGET_64BIT && (index || base))
4895 {
4896 reason = "non-constant pic memory reference";
4897 goto report_error;
4898 }
91bb873f
RH
4899 if (! legitimate_pic_address_disp_p (disp))
4900 {
e075ae69 4901 reason = "displacement is an invalid pic construct";
50e60bc3 4902 goto report_error;
91bb873f
RH
4903 }
4904
4e9efe54 4905 /* This code used to verify that a symbolic pic displacement
0f290768
KH
4906 includes the pic_offset_table_rtx register.
4907
4e9efe54
JH
4908 While this is good idea, unfortunately these constructs may
4909 be created by "adds using lea" optimization for incorrect
4910 code like:
4911
4912 int a;
4913 int foo(int i)
4914 {
4915 return *(&a+i);
4916 }
4917
50e60bc3 4918 This code is nonsensical, but results in addressing
4e9efe54 4919 GOT table with pic_offset_table_rtx base. We can't
f710504c 4920 just refuse it easily, since it gets matched by
4e9efe54
JH
4921 "addsi3" pattern, that later gets split to lea in the
4922 case output register differs from input. While this
4923 can be handled by separate addsi pattern for this case
4924 that never results in lea, this seems to be easier and
4925 correct fix for crash to disable this test. */
3b3c6a3f 4926 }
91bb873f 4927 else if (HALF_PIC_P ())
3b3c6a3f 4928 {
91bb873f 4929 if (! HALF_PIC_ADDRESS_P (disp)
e075ae69 4930 || (base != NULL_RTX || index != NULL_RTX))
91bb873f 4931 {
e075ae69 4932 reason = "displacement is an invalid half-pic reference";
50e60bc3 4933 goto report_error;
91bb873f 4934 }
3b3c6a3f
MM
4935 }
4936 }
4937
e075ae69 4938 /* Everything looks valid. */
3b3c6a3f 4939 if (TARGET_DEBUG_ADDR)
e075ae69 4940 fprintf (stderr, "Success.\n");
3b3c6a3f 4941 return TRUE;
e075ae69 4942
50e60bc3 4943report_error:
e075ae69
RH
4944 if (TARGET_DEBUG_ADDR)
4945 {
4946 fprintf (stderr, "Error: %s\n", reason);
4947 debug_rtx (reason_rtx);
4948 }
4949 return FALSE;
3b3c6a3f 4950}
3b3c6a3f 4951\f
55efb413
JW
4952/* Return an unique alias set for the GOT. */
4953
0f290768 4954static HOST_WIDE_INT
55efb413
JW
4955ix86_GOT_alias_set ()
4956{
4957 static HOST_WIDE_INT set = -1;
4958 if (set == -1)
4959 set = new_alias_set ();
4960 return set;
0f290768 4961}
55efb413 4962
3b3c6a3f
MM
4963/* Return a legitimate reference for ORIG (an address) using the
4964 register REG. If REG is 0, a new pseudo is generated.
4965
91bb873f 4966 There are two types of references that must be handled:
3b3c6a3f
MM
4967
4968 1. Global data references must load the address from the GOT, via
4969 the PIC reg. An insn is emitted to do this load, and the reg is
4970 returned.
4971
91bb873f
RH
4972 2. Static data references, constant pool addresses, and code labels
4973 compute the address as an offset from the GOT, whose base is in
4974 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
4975 differentiate them from global data objects. The returned
4976 address is the PIC reg + an unspec constant.
3b3c6a3f
MM
4977
4978 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
91bb873f 4979 reg also appears in the address. */
3b3c6a3f
MM
4980
4981rtx
4982legitimize_pic_address (orig, reg)
4983 rtx orig;
4984 rtx reg;
4985{
4986 rtx addr = orig;
4987 rtx new = orig;
91bb873f 4988 rtx base;
3b3c6a3f 4989
623fe810 4990 if (local_symbolic_operand (addr, Pmode))
3b3c6a3f 4991 {
14f73b5a
JH
4992 /* In 64bit mode we can address such objects directly. */
4993 if (TARGET_64BIT)
4994 new = addr;
4995 else
4996 {
4997 /* This symbol may be referenced via a displacement from the PIC
4998 base address (@GOTOFF). */
3b3c6a3f 4999
14f73b5a
JH
5000 current_function_uses_pic_offset_table = 1;
5001 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 7);
5002 new = gen_rtx_CONST (Pmode, new);
5003 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3b3c6a3f 5004
14f73b5a
JH
5005 if (reg != 0)
5006 {
5007 emit_move_insn (reg, new);
5008 new = reg;
5009 }
5010 }
3b3c6a3f 5011 }
91bb873f 5012 else if (GET_CODE (addr) == SYMBOL_REF)
3b3c6a3f 5013 {
14f73b5a
JH
5014 if (TARGET_64BIT)
5015 {
5016 current_function_uses_pic_offset_table = 1;
5017 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 15);
5018 new = gen_rtx_CONST (Pmode, new);
5019 new = gen_rtx_MEM (Pmode, new);
5020 RTX_UNCHANGING_P (new) = 1;
5021 set_mem_alias_set (new, ix86_GOT_alias_set ());
5022
5023 if (reg == 0)
5024 reg = gen_reg_rtx (Pmode);
5025 /* Use directly gen_movsi, otherwise the address is loaded
5026 into register for CSE. We don't want to CSE this addresses,
5027 instead we CSE addresses from the GOT table, so skip this. */
5028 emit_insn (gen_movsi (reg, new));
5029 new = reg;
5030 }
5031 else
5032 {
5033 /* This symbol must be referenced via a load from the
5034 Global Offset Table (@GOT). */
3b3c6a3f 5035
14f73b5a
JH
5036 current_function_uses_pic_offset_table = 1;
5037 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 6);
5038 new = gen_rtx_CONST (Pmode, new);
5039 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5040 new = gen_rtx_MEM (Pmode, new);
5041 RTX_UNCHANGING_P (new) = 1;
5042 set_mem_alias_set (new, ix86_GOT_alias_set ());
3b3c6a3f 5043
14f73b5a
JH
5044 if (reg == 0)
5045 reg = gen_reg_rtx (Pmode);
5046 emit_move_insn (reg, new);
5047 new = reg;
5048 }
0f290768 5049 }
91bb873f
RH
5050 else
5051 {
5052 if (GET_CODE (addr) == CONST)
3b3c6a3f 5053 {
91bb873f 5054 addr = XEXP (addr, 0);
e3c8ea67
RH
5055
5056 /* We must match stuff we generate before. Assume the only
5057 unspecs that can get here are ours. Not that we could do
5058 anything with them anyway... */
5059 if (GET_CODE (addr) == UNSPEC
5060 || (GET_CODE (addr) == PLUS
5061 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5062 return orig;
5063 if (GET_CODE (addr) != PLUS)
564d80f4 5064 abort ();
3b3c6a3f 5065 }
91bb873f
RH
5066 if (GET_CODE (addr) == PLUS)
5067 {
5068 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
e9a25f70 5069
91bb873f
RH
5070 /* Check first to see if this is a constant offset from a @GOTOFF
5071 symbol reference. */
623fe810 5072 if (local_symbolic_operand (op0, Pmode)
91bb873f
RH
5073 && GET_CODE (op1) == CONST_INT)
5074 {
6eb791fc
JH
5075 if (!TARGET_64BIT)
5076 {
5077 current_function_uses_pic_offset_table = 1;
5078 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 7);
5079 new = gen_rtx_PLUS (Pmode, new, op1);
5080 new = gen_rtx_CONST (Pmode, new);
5081 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
91bb873f 5082
6eb791fc
JH
5083 if (reg != 0)
5084 {
5085 emit_move_insn (reg, new);
5086 new = reg;
5087 }
5088 }
5089 else
91bb873f 5090 {
6eb791fc 5091 /* ??? We need to limit offsets here. */
91bb873f
RH
5092 }
5093 }
5094 else
5095 {
5096 base = legitimize_pic_address (XEXP (addr, 0), reg);
5097 new = legitimize_pic_address (XEXP (addr, 1),
5098 base == reg ? NULL_RTX : reg);
5099
5100 if (GET_CODE (new) == CONST_INT)
5101 new = plus_constant (base, INTVAL (new));
5102 else
5103 {
5104 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5105 {
5106 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5107 new = XEXP (new, 1);
5108 }
5109 new = gen_rtx_PLUS (Pmode, base, new);
5110 }
5111 }
5112 }
3b3c6a3f
MM
5113 }
5114 return new;
5115}
5116\f
3b3c6a3f
MM
5117/* Try machine-dependent ways of modifying an illegitimate address
5118 to be legitimate. If we find one, return the new, valid address.
5119 This macro is used in only one place: `memory_address' in explow.c.
5120
5121 OLDX is the address as it was before break_out_memory_refs was called.
5122 In some cases it is useful to look at this to decide what needs to be done.
5123
5124 MODE and WIN are passed so that this macro can use
5125 GO_IF_LEGITIMATE_ADDRESS.
5126
5127 It is always safe for this macro to do nothing. It exists to recognize
5128 opportunities to optimize the output.
5129
5130 For the 80386, we handle X+REG by loading X into a register R and
5131 using R+REG. R will go in a general reg and indexing will be used.
5132 However, if REG is a broken-out memory address or multiplication,
5133 nothing needs to be done because REG can certainly go in a general reg.
5134
5135 When -fpic is used, special handling is needed for symbolic references.
5136 See comments by legitimize_pic_address in i386.c for details. */
5137
5138rtx
5139legitimize_address (x, oldx, mode)
5140 register rtx x;
bb5177ac 5141 register rtx oldx ATTRIBUTE_UNUSED;
3b3c6a3f
MM
5142 enum machine_mode mode;
5143{
5144 int changed = 0;
5145 unsigned log;
5146
5147 if (TARGET_DEBUG_ADDR)
5148 {
e9a25f70
JL
5149 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5150 GET_MODE_NAME (mode));
3b3c6a3f
MM
5151 debug_rtx (x);
5152 }
5153
5154 if (flag_pic && SYMBOLIC_CONST (x))
5155 return legitimize_pic_address (x, 0);
5156
5157 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5158 if (GET_CODE (x) == ASHIFT
5159 && GET_CODE (XEXP (x, 1)) == CONST_INT
b531087a 5160 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
3b3c6a3f
MM
5161 {
5162 changed = 1;
a269a03c
JC
5163 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5164 GEN_INT (1 << log));
3b3c6a3f
MM
5165 }
5166
5167 if (GET_CODE (x) == PLUS)
5168 {
0f290768 5169 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
e9a25f70 5170
3b3c6a3f
MM
5171 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5172 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
b531087a 5173 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
3b3c6a3f
MM
5174 {
5175 changed = 1;
c5c76735
JL
5176 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5177 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5178 GEN_INT (1 << log));
3b3c6a3f
MM
5179 }
5180
5181 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5182 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
b531087a 5183 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
3b3c6a3f
MM
5184 {
5185 changed = 1;
c5c76735
JL
5186 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5187 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5188 GEN_INT (1 << log));
3b3c6a3f
MM
5189 }
5190
0f290768 5191 /* Put multiply first if it isn't already. */
3b3c6a3f
MM
5192 if (GET_CODE (XEXP (x, 1)) == MULT)
5193 {
5194 rtx tmp = XEXP (x, 0);
5195 XEXP (x, 0) = XEXP (x, 1);
5196 XEXP (x, 1) = tmp;
5197 changed = 1;
5198 }
5199
5200 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5201 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5202 created by virtual register instantiation, register elimination, and
5203 similar optimizations. */
5204 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5205 {
5206 changed = 1;
c5c76735
JL
5207 x = gen_rtx_PLUS (Pmode,
5208 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5209 XEXP (XEXP (x, 1), 0)),
5210 XEXP (XEXP (x, 1), 1));
3b3c6a3f
MM
5211 }
5212
e9a25f70
JL
5213 /* Canonicalize
5214 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3b3c6a3f
MM
5215 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5216 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5217 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5218 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5219 && CONSTANT_P (XEXP (x, 1)))
5220 {
00c79232
ML
5221 rtx constant;
5222 rtx other = NULL_RTX;
3b3c6a3f
MM
5223
5224 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5225 {
5226 constant = XEXP (x, 1);
5227 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5228 }
5229 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5230 {
5231 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5232 other = XEXP (x, 1);
5233 }
5234 else
5235 constant = 0;
5236
5237 if (constant)
5238 {
5239 changed = 1;
c5c76735
JL
5240 x = gen_rtx_PLUS (Pmode,
5241 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5242 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5243 plus_constant (other, INTVAL (constant)));
3b3c6a3f
MM
5244 }
5245 }
5246
5247 if (changed && legitimate_address_p (mode, x, FALSE))
5248 return x;
5249
5250 if (GET_CODE (XEXP (x, 0)) == MULT)
5251 {
5252 changed = 1;
5253 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5254 }
5255
5256 if (GET_CODE (XEXP (x, 1)) == MULT)
5257 {
5258 changed = 1;
5259 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5260 }
5261
5262 if (changed
5263 && GET_CODE (XEXP (x, 1)) == REG
5264 && GET_CODE (XEXP (x, 0)) == REG)
5265 return x;
5266
5267 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5268 {
5269 changed = 1;
5270 x = legitimize_pic_address (x, 0);
5271 }
5272
5273 if (changed && legitimate_address_p (mode, x, FALSE))
5274 return x;
5275
5276 if (GET_CODE (XEXP (x, 0)) == REG)
5277 {
5278 register rtx temp = gen_reg_rtx (Pmode);
5279 register rtx val = force_operand (XEXP (x, 1), temp);
5280 if (val != temp)
5281 emit_move_insn (temp, val);
5282
5283 XEXP (x, 1) = temp;
5284 return x;
5285 }
5286
5287 else if (GET_CODE (XEXP (x, 1)) == REG)
5288 {
5289 register rtx temp = gen_reg_rtx (Pmode);
5290 register rtx val = force_operand (XEXP (x, 0), temp);
5291 if (val != temp)
5292 emit_move_insn (temp, val);
5293
5294 XEXP (x, 0) = temp;
5295 return x;
5296 }
5297 }
5298
5299 return x;
5300}
2a2ab3f9
JVA
5301\f
5302/* Print an integer constant expression in assembler syntax. Addition
5303 and subtraction are the only arithmetic that may appear in these
5304 expressions. FILE is the stdio stream to write to, X is the rtx, and
5305 CODE is the operand print code from the output string. */
5306
5307static void
5308output_pic_addr_const (file, x, code)
5309 FILE *file;
5310 rtx x;
5311 int code;
5312{
5313 char buf[256];
5314
5315 switch (GET_CODE (x))
5316 {
5317 case PC:
5318 if (flag_pic)
5319 putc ('.', file);
5320 else
5321 abort ();
5322 break;
5323
5324 case SYMBOL_REF:
91bb873f
RH
5325 assemble_name (file, XSTR (x, 0));
5326 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
5327 fputs ("@PLT", file);
2a2ab3f9
JVA
5328 break;
5329
91bb873f
RH
5330 case LABEL_REF:
5331 x = XEXP (x, 0);
5332 /* FALLTHRU */
2a2ab3f9
JVA
5333 case CODE_LABEL:
5334 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5335 assemble_name (asm_out_file, buf);
5336 break;
5337
5338 case CONST_INT:
f64cecad 5339 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2a2ab3f9
JVA
5340 break;
5341
5342 case CONST:
5343 /* This used to output parentheses around the expression,
5344 but that does not work on the 386 (either ATT or BSD assembler). */
5345 output_pic_addr_const (file, XEXP (x, 0), code);
5346 break;
5347
5348 case CONST_DOUBLE:
5349 if (GET_MODE (x) == VOIDmode)
5350 {
5351 /* We can use %d if the number is <32 bits and positive. */
5352 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
f64cecad
JC
5353 fprintf (file, "0x%lx%08lx",
5354 (unsigned long) CONST_DOUBLE_HIGH (x),
5355 (unsigned long) CONST_DOUBLE_LOW (x));
2a2ab3f9 5356 else
f64cecad 5357 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2a2ab3f9
JVA
5358 }
5359 else
5360 /* We can't handle floating point constants;
5361 PRINT_OPERAND must handle them. */
5362 output_operand_lossage ("floating constant misused");
5363 break;
5364
5365 case PLUS:
e9a25f70 5366 /* Some assemblers need integer constants to appear first. */
2a2ab3f9
JVA
5367 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5368 {
2a2ab3f9 5369 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 5370 putc ('+', file);
e9a25f70 5371 output_pic_addr_const (file, XEXP (x, 1), code);
2a2ab3f9 5372 }
91bb873f 5373 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2a2ab3f9 5374 {
2a2ab3f9 5375 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 5376 putc ('+', file);
e9a25f70 5377 output_pic_addr_const (file, XEXP (x, 0), code);
2a2ab3f9 5378 }
91bb873f
RH
5379 else
5380 abort ();
2a2ab3f9
JVA
5381 break;
5382
5383 case MINUS:
80f33d06 5384 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
2a2ab3f9 5385 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 5386 putc ('-', file);
2a2ab3f9 5387 output_pic_addr_const (file, XEXP (x, 1), code);
80f33d06 5388 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
2a2ab3f9
JVA
5389 break;
5390
91bb873f
RH
5391 case UNSPEC:
5392 if (XVECLEN (x, 0) != 1)
77ebd435 5393 abort ();
91bb873f
RH
5394 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5395 switch (XINT (x, 1))
77ebd435
AJ
5396 {
5397 case 6:
5398 fputs ("@GOT", file);
5399 break;
5400 case 7:
5401 fputs ("@GOTOFF", file);
5402 break;
5403 case 8:
5404 fputs ("@PLT", file);
5405 break;
6eb791fc
JH
5406 case 15:
5407 fputs ("@GOTPCREL(%RIP)", file);
5408 break;
77ebd435
AJ
5409 default:
5410 output_operand_lossage ("invalid UNSPEC as operand");
5411 break;
5412 }
91bb873f
RH
5413 break;
5414
2a2ab3f9
JVA
5415 default:
5416 output_operand_lossage ("invalid expression as operand");
5417 }
5418}
1865dbb5 5419
0f290768 5420/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
1865dbb5
JM
5421 We need to handle our special PIC relocations. */
5422
0f290768 5423void
1865dbb5
JM
5424i386_dwarf_output_addr_const (file, x)
5425 FILE *file;
5426 rtx x;
5427{
14f73b5a 5428#ifdef ASM_QUAD
18b5b8d6 5429 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
14f73b5a
JH
5430#else
5431 if (TARGET_64BIT)
5432 abort ();
18b5b8d6 5433 fprintf (file, "%s", ASM_LONG);
14f73b5a 5434#endif
1865dbb5
JM
5435 if (flag_pic)
5436 output_pic_addr_const (file, x, '\0');
5437 else
5438 output_addr_const (file, x);
5439 fputc ('\n', file);
5440}
5441
5442/* In the name of slightly smaller debug output, and to cater to
5443 general assembler losage, recognize PIC+GOTOFF and turn it back
5444 into a direct symbol reference. */
5445
5446rtx
5447i386_simplify_dwarf_addr (orig_x)
5448 rtx orig_x;
5449{
ec65b2e3 5450 rtx x = orig_x, y;
1865dbb5 5451
4c8c0dec
JJ
5452 if (GET_CODE (x) == MEM)
5453 x = XEXP (x, 0);
5454
6eb791fc
JH
5455 if (TARGET_64BIT)
5456 {
5457 if (GET_CODE (x) != CONST
5458 || GET_CODE (XEXP (x, 0)) != UNSPEC
4c8c0dec
JJ
5459 || XINT (XEXP (x, 0), 1) != 15
5460 || GET_CODE (orig_x) != MEM)
6eb791fc
JH
5461 return orig_x;
5462 return XVECEXP (XEXP (x, 0), 0, 0);
5463 }
5464
1865dbb5 5465 if (GET_CODE (x) != PLUS
1865dbb5
JM
5466 || GET_CODE (XEXP (x, 1)) != CONST)
5467 return orig_x;
5468
ec65b2e3
JJ
5469 if (GET_CODE (XEXP (x, 0)) == REG
5470 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
5471 /* %ebx + GOT/GOTOFF */
5472 y = NULL;
5473 else if (GET_CODE (XEXP (x, 0)) == PLUS)
5474 {
5475 /* %ebx + %reg * scale + GOT/GOTOFF */
5476 y = XEXP (x, 0);
5477 if (GET_CODE (XEXP (y, 0)) == REG
5478 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
5479 y = XEXP (y, 1);
5480 else if (GET_CODE (XEXP (y, 1)) == REG
5481 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
5482 y = XEXP (y, 0);
5483 else
5484 return orig_x;
5485 if (GET_CODE (y) != REG
5486 && GET_CODE (y) != MULT
5487 && GET_CODE (y) != ASHIFT)
5488 return orig_x;
5489 }
5490 else
5491 return orig_x;
5492
1865dbb5
JM
5493 x = XEXP (XEXP (x, 1), 0);
5494 if (GET_CODE (x) == UNSPEC
4c8c0dec
JJ
5495 && ((XINT (x, 1) == 6 && GET_CODE (orig_x) == MEM)
5496 || (XINT (x, 1) == 7 && GET_CODE (orig_x) != MEM)))
ec65b2e3
JJ
5497 {
5498 if (y)
5499 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
5500 return XVECEXP (x, 0, 0);
5501 }
1865dbb5
JM
5502
5503 if (GET_CODE (x) == PLUS
5504 && GET_CODE (XEXP (x, 0)) == UNSPEC
5505 && GET_CODE (XEXP (x, 1)) == CONST_INT
4c8c0dec
JJ
5506 && ((XINT (XEXP (x, 0), 1) == 6 && GET_CODE (orig_x) == MEM)
5507 || (XINT (XEXP (x, 0), 1) == 7 && GET_CODE (orig_x) != MEM)))
ec65b2e3
JJ
5508 {
5509 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
5510 if (y)
5511 return gen_rtx_PLUS (Pmode, y, x);
5512 return x;
5513 }
1865dbb5
JM
5514
5515 return orig_x;
5516}
2a2ab3f9 5517\f
a269a03c 5518static void
e075ae69 5519put_condition_code (code, mode, reverse, fp, file)
a269a03c 5520 enum rtx_code code;
e075ae69
RH
5521 enum machine_mode mode;
5522 int reverse, fp;
a269a03c
JC
5523 FILE *file;
5524{
a269a03c
JC
5525 const char *suffix;
5526
9a915772
JH
5527 if (mode == CCFPmode || mode == CCFPUmode)
5528 {
5529 enum rtx_code second_code, bypass_code;
5530 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
5531 if (bypass_code != NIL || second_code != NIL)
b531087a 5532 abort ();
9a915772
JH
5533 code = ix86_fp_compare_code_to_integer (code);
5534 mode = CCmode;
5535 }
a269a03c
JC
5536 if (reverse)
5537 code = reverse_condition (code);
e075ae69 5538
a269a03c
JC
5539 switch (code)
5540 {
5541 case EQ:
5542 suffix = "e";
5543 break;
a269a03c
JC
5544 case NE:
5545 suffix = "ne";
5546 break;
a269a03c 5547 case GT:
7e08e190 5548 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
e075ae69
RH
5549 abort ();
5550 suffix = "g";
a269a03c 5551 break;
a269a03c 5552 case GTU:
e075ae69
RH
5553 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
5554 Those same assemblers have the same but opposite losage on cmov. */
7e08e190 5555 if (mode != CCmode)
0f290768 5556 abort ();
e075ae69 5557 suffix = fp ? "nbe" : "a";
a269a03c 5558 break;
a269a03c 5559 case LT:
9076b9c1 5560 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 5561 suffix = "s";
7e08e190 5562 else if (mode == CCmode || mode == CCGCmode)
e075ae69 5563 suffix = "l";
9076b9c1 5564 else
0f290768 5565 abort ();
a269a03c 5566 break;
a269a03c 5567 case LTU:
9076b9c1 5568 if (mode != CCmode)
0f290768 5569 abort ();
a269a03c
JC
5570 suffix = "b";
5571 break;
a269a03c 5572 case GE:
9076b9c1 5573 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 5574 suffix = "ns";
7e08e190 5575 else if (mode == CCmode || mode == CCGCmode)
e075ae69 5576 suffix = "ge";
9076b9c1 5577 else
0f290768 5578 abort ();
a269a03c 5579 break;
a269a03c 5580 case GEU:
e075ae69 5581 /* ??? As above. */
7e08e190 5582 if (mode != CCmode)
0f290768 5583 abort ();
7e08e190 5584 suffix = fp ? "nb" : "ae";
a269a03c 5585 break;
a269a03c 5586 case LE:
7e08e190 5587 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
e075ae69
RH
5588 abort ();
5589 suffix = "le";
a269a03c 5590 break;
a269a03c 5591 case LEU:
9076b9c1
JH
5592 if (mode != CCmode)
5593 abort ();
7e08e190 5594 suffix = "be";
a269a03c 5595 break;
3a3677ff 5596 case UNORDERED:
9e7adcb3 5597 suffix = fp ? "u" : "p";
3a3677ff
RH
5598 break;
5599 case ORDERED:
9e7adcb3 5600 suffix = fp ? "nu" : "np";
3a3677ff 5601 break;
a269a03c
JC
5602 default:
5603 abort ();
5604 }
5605 fputs (suffix, file);
5606}
5607
e075ae69
RH
5608void
5609print_reg (x, code, file)
5610 rtx x;
5611 int code;
5612 FILE *file;
e5cb57e8 5613{
e075ae69 5614 if (REGNO (x) == ARG_POINTER_REGNUM
564d80f4 5615 || REGNO (x) == FRAME_POINTER_REGNUM
e075ae69
RH
5616 || REGNO (x) == FLAGS_REG
5617 || REGNO (x) == FPSR_REG)
5618 abort ();
e9a25f70 5619
80f33d06 5620 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
e075ae69
RH
5621 putc ('%', file);
5622
ef6257cd 5623 if (code == 'w' || MMX_REG_P (x))
e075ae69
RH
5624 code = 2;
5625 else if (code == 'b')
5626 code = 1;
5627 else if (code == 'k')
5628 code = 4;
3f3f2124
JH
5629 else if (code == 'q')
5630 code = 8;
e075ae69
RH
5631 else if (code == 'y')
5632 code = 3;
5633 else if (code == 'h')
5634 code = 0;
5635 else
5636 code = GET_MODE_SIZE (GET_MODE (x));
e9a25f70 5637
3f3f2124
JH
5638 /* Irritatingly, AMD extended registers use different naming convention
5639 from the normal registers. */
5640 if (REX_INT_REG_P (x))
5641 {
885a70fd
JH
5642 if (!TARGET_64BIT)
5643 abort ();
3f3f2124
JH
5644 switch (code)
5645 {
ef6257cd 5646 case 0:
c725bd79 5647 error ("extended registers have no high halves");
3f3f2124
JH
5648 break;
5649 case 1:
5650 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
5651 break;
5652 case 2:
5653 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
5654 break;
5655 case 4:
5656 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
5657 break;
5658 case 8:
5659 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
5660 break;
5661 default:
c725bd79 5662 error ("unsupported operand size for extended register");
3f3f2124
JH
5663 break;
5664 }
5665 return;
5666 }
e075ae69
RH
5667 switch (code)
5668 {
5669 case 3:
5670 if (STACK_TOP_P (x))
5671 {
5672 fputs ("st(0)", file);
5673 break;
5674 }
5675 /* FALLTHRU */
e075ae69 5676 case 8:
3f3f2124 5677 case 4:
e075ae69 5678 case 12:
446988df 5679 if (! ANY_FP_REG_P (x))
885a70fd 5680 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
e075ae69 5681 /* FALLTHRU */
a7180f70 5682 case 16:
e075ae69
RH
5683 case 2:
5684 fputs (hi_reg_name[REGNO (x)], file);
5685 break;
5686 case 1:
5687 fputs (qi_reg_name[REGNO (x)], file);
5688 break;
5689 case 0:
5690 fputs (qi_high_reg_name[REGNO (x)], file);
5691 break;
5692 default:
5693 abort ();
fe25fea3 5694 }
e5cb57e8
SC
5695}
5696
2a2ab3f9 5697/* Meaning of CODE:
fe25fea3 5698 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
e5cb57e8 5699 C -- print opcode suffix for set/cmov insn.
fe25fea3 5700 c -- like C, but print reversed condition
ef6257cd 5701 F,f -- likewise, but for floating-point.
048b1c95
JJ
5702 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
5703 nothing
2a2ab3f9
JVA
5704 R -- print the prefix for register names.
5705 z -- print the opcode suffix for the size of the current operand.
5706 * -- print a star (in certain assembler syntax)
fb204271 5707 A -- print an absolute memory reference.
2a2ab3f9 5708 w -- print the operand as if it's a "word" (HImode) even if it isn't.
2d49677f
SC
5709 s -- print a shift double count, followed by the assemblers argument
5710 delimiter.
fe25fea3
SC
5711 b -- print the QImode name of the register for the indicated operand.
5712 %b0 would print %al if operands[0] is reg 0.
5713 w -- likewise, print the HImode name of the register.
5714 k -- likewise, print the SImode name of the register.
3f3f2124 5715 q -- likewise, print the DImode name of the register.
ef6257cd
JH
5716 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
5717 y -- print "st(0)" instead of "st" as a register.
a46d1d38 5718 D -- print condition for SSE cmp instruction.
ef6257cd
JH
5719 P -- if PIC, print an @PLT suffix.
5720 X -- don't print any sort of PIC '@' suffix for a symbol.
a46d1d38 5721 */
2a2ab3f9
JVA
5722
5723void
5724print_operand (file, x, code)
5725 FILE *file;
5726 rtx x;
5727 int code;
5728{
5729 if (code)
5730 {
5731 switch (code)
5732 {
5733 case '*':
80f33d06 5734 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9
JVA
5735 putc ('*', file);
5736 return;
5737
fb204271 5738 case 'A':
80f33d06 5739 if (ASSEMBLER_DIALECT == ASM_ATT)
fb204271 5740 putc ('*', file);
80f33d06 5741 else if (ASSEMBLER_DIALECT == ASM_INTEL)
fb204271
DN
5742 {
5743 /* Intel syntax. For absolute addresses, registers should not
5744 be surrounded by braces. */
5745 if (GET_CODE (x) != REG)
5746 {
5747 putc ('[', file);
5748 PRINT_OPERAND (file, x, 0);
5749 putc (']', file);
5750 return;
5751 }
5752 }
80f33d06
GS
5753 else
5754 abort ();
fb204271
DN
5755
5756 PRINT_OPERAND (file, x, 0);
5757 return;
5758
5759
2a2ab3f9 5760 case 'L':
80f33d06 5761 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 5762 putc ('l', file);
2a2ab3f9
JVA
5763 return;
5764
5765 case 'W':
80f33d06 5766 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 5767 putc ('w', file);
2a2ab3f9
JVA
5768 return;
5769
5770 case 'B':
80f33d06 5771 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 5772 putc ('b', file);
2a2ab3f9
JVA
5773 return;
5774
5775 case 'Q':
80f33d06 5776 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 5777 putc ('l', file);
2a2ab3f9
JVA
5778 return;
5779
5780 case 'S':
80f33d06 5781 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 5782 putc ('s', file);
2a2ab3f9
JVA
5783 return;
5784
5f1ec3e6 5785 case 'T':
80f33d06 5786 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 5787 putc ('t', file);
5f1ec3e6
JVA
5788 return;
5789
2a2ab3f9
JVA
5790 case 'z':
5791 /* 387 opcodes don't get size suffixes if the operands are
0f290768 5792 registers. */
2a2ab3f9
JVA
5793 if (STACK_REG_P (x))
5794 return;
5795
831c4e87
KC
5796 /* Likewise if using Intel opcodes. */
5797 if (ASSEMBLER_DIALECT == ASM_INTEL)
5798 return;
5799
5800 /* This is the size of op from size of operand. */
2a2ab3f9
JVA
5801 switch (GET_MODE_SIZE (GET_MODE (x)))
5802 {
2a2ab3f9 5803 case 2:
155d8a47
JW
5804#ifdef HAVE_GAS_FILDS_FISTS
5805 putc ('s', file);
5806#endif
2a2ab3f9
JVA
5807 return;
5808
5809 case 4:
5810 if (GET_MODE (x) == SFmode)
5811 {
e075ae69 5812 putc ('s', file);
2a2ab3f9
JVA
5813 return;
5814 }
5815 else
e075ae69 5816 putc ('l', file);
2a2ab3f9
JVA
5817 return;
5818
5f1ec3e6 5819 case 12:
2b589241 5820 case 16:
e075ae69
RH
5821 putc ('t', file);
5822 return;
5f1ec3e6 5823
2a2ab3f9
JVA
5824 case 8:
5825 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
56c0e8fa
JVA
5826 {
5827#ifdef GAS_MNEMONICS
e075ae69 5828 putc ('q', file);
56c0e8fa 5829#else
e075ae69
RH
5830 putc ('l', file);
5831 putc ('l', file);
56c0e8fa
JVA
5832#endif
5833 }
e075ae69
RH
5834 else
5835 putc ('l', file);
2a2ab3f9 5836 return;
155d8a47
JW
5837
5838 default:
5839 abort ();
2a2ab3f9 5840 }
4af3895e
JVA
5841
5842 case 'b':
5843 case 'w':
5844 case 'k':
3f3f2124 5845 case 'q':
4af3895e
JVA
5846 case 'h':
5847 case 'y':
5cb6195d 5848 case 'X':
e075ae69 5849 case 'P':
4af3895e
JVA
5850 break;
5851
2d49677f
SC
5852 case 's':
5853 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
5854 {
5855 PRINT_OPERAND (file, x, 0);
e075ae69 5856 putc (',', file);
2d49677f 5857 }
a269a03c
JC
5858 return;
5859
a46d1d38
JH
5860 case 'D':
5861 /* Little bit of braindamage here. The SSE compare instructions
5862 does use completely different names for the comparisons that the
5863 fp conditional moves. */
5864 switch (GET_CODE (x))
5865 {
5866 case EQ:
5867 case UNEQ:
5868 fputs ("eq", file);
5869 break;
5870 case LT:
5871 case UNLT:
5872 fputs ("lt", file);
5873 break;
5874 case LE:
5875 case UNLE:
5876 fputs ("le", file);
5877 break;
5878 case UNORDERED:
5879 fputs ("unord", file);
5880 break;
5881 case NE:
5882 case LTGT:
5883 fputs ("neq", file);
5884 break;
5885 case UNGE:
5886 case GE:
5887 fputs ("nlt", file);
5888 break;
5889 case UNGT:
5890 case GT:
5891 fputs ("nle", file);
5892 break;
5893 case ORDERED:
5894 fputs ("ord", file);
5895 break;
5896 default:
5897 abort ();
5898 break;
5899 }
5900 return;
048b1c95
JJ
5901 case 'O':
5902#ifdef CMOV_SUN_AS_SYNTAX
5903 if (ASSEMBLER_DIALECT == ASM_ATT)
5904 {
5905 switch (GET_MODE (x))
5906 {
5907 case HImode: putc ('w', file); break;
5908 case SImode:
5909 case SFmode: putc ('l', file); break;
5910 case DImode:
5911 case DFmode: putc ('q', file); break;
5912 default: abort ();
5913 }
5914 putc ('.', file);
5915 }
5916#endif
5917 return;
1853aadd 5918 case 'C':
e075ae69 5919 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
1853aadd 5920 return;
fe25fea3 5921 case 'F':
048b1c95
JJ
5922#ifdef CMOV_SUN_AS_SYNTAX
5923 if (ASSEMBLER_DIALECT == ASM_ATT)
5924 putc ('.', file);
5925#endif
e075ae69 5926 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
fe25fea3
SC
5927 return;
5928
e9a25f70 5929 /* Like above, but reverse condition */
e075ae69 5930 case 'c':
c1d5afc4
CR
5931 /* Check to see if argument to %c is really a constant
5932 and not a condition code which needs to be reversed. */
5933 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
5934 {
5935 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
5936 return;
5937 }
e075ae69
RH
5938 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
5939 return;
fe25fea3 5940 case 'f':
048b1c95
JJ
5941#ifdef CMOV_SUN_AS_SYNTAX
5942 if (ASSEMBLER_DIALECT == ASM_ATT)
5943 putc ('.', file);
5944#endif
e075ae69 5945 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
1853aadd 5946 return;
ef6257cd
JH
5947 case '+':
5948 {
5949 rtx x;
e5cb57e8 5950
ef6257cd
JH
5951 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
5952 return;
a4f31c00 5953
ef6257cd
JH
5954 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
5955 if (x)
5956 {
5957 int pred_val = INTVAL (XEXP (x, 0));
5958
5959 if (pred_val < REG_BR_PROB_BASE * 45 / 100
5960 || pred_val > REG_BR_PROB_BASE * 55 / 100)
5961 {
5962 int taken = pred_val > REG_BR_PROB_BASE / 2;
5963 int cputaken = final_forward_branch_p (current_output_insn) == 0;
5964
5965 /* Emit hints only in the case default branch prediction
5966 heruistics would fail. */
5967 if (taken != cputaken)
5968 {
5969 /* We use 3e (DS) prefix for taken branches and
5970 2e (CS) prefix for not taken branches. */
5971 if (taken)
5972 fputs ("ds ; ", file);
5973 else
5974 fputs ("cs ; ", file);
5975 }
5976 }
5977 }
5978 return;
5979 }
4af3895e 5980 default:
a52453cc 5981 output_operand_lossage ("invalid operand code `%c'", code);
2a2ab3f9
JVA
5982 }
5983 }
e9a25f70 5984
2a2ab3f9
JVA
5985 if (GET_CODE (x) == REG)
5986 {
5987 PRINT_REG (x, code, file);
5988 }
e9a25f70 5989
2a2ab3f9
JVA
5990 else if (GET_CODE (x) == MEM)
5991 {
e075ae69 5992 /* No `byte ptr' prefix for call instructions. */
80f33d06 5993 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
2a2ab3f9 5994 {
69ddee61 5995 const char * size;
e075ae69
RH
5996 switch (GET_MODE_SIZE (GET_MODE (x)))
5997 {
5998 case 1: size = "BYTE"; break;
5999 case 2: size = "WORD"; break;
6000 case 4: size = "DWORD"; break;
6001 case 8: size = "QWORD"; break;
6002 case 12: size = "XWORD"; break;
a7180f70 6003 case 16: size = "XMMWORD"; break;
e075ae69 6004 default:
564d80f4 6005 abort ();
e075ae69 6006 }
fb204271
DN
6007
6008 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6009 if (code == 'b')
6010 size = "BYTE";
6011 else if (code == 'w')
6012 size = "WORD";
6013 else if (code == 'k')
6014 size = "DWORD";
6015
e075ae69
RH
6016 fputs (size, file);
6017 fputs (" PTR ", file);
2a2ab3f9 6018 }
e075ae69
RH
6019
6020 x = XEXP (x, 0);
6021 if (flag_pic && CONSTANT_ADDRESS_P (x))
6022 output_pic_addr_const (file, x, code);
0d7d98ee
JH
6023 /* Avoid (%rip) for call operands. */
6024 else if (CONSTANT_ADDRESS_P (x) && code =='P'
6025 && GET_CODE (x) != CONST_INT)
6026 output_addr_const (file, x);
c8b94768
RH
6027 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6028 output_operand_lossage ("invalid constraints for operand");
2a2ab3f9 6029 else
e075ae69 6030 output_address (x);
2a2ab3f9 6031 }
e9a25f70 6032
2a2ab3f9
JVA
6033 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6034 {
e9a25f70
JL
6035 REAL_VALUE_TYPE r;
6036 long l;
6037
5f1ec3e6
JVA
6038 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6039 REAL_VALUE_TO_TARGET_SINGLE (r, l);
e075ae69 6040
80f33d06 6041 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6042 putc ('$', file);
52267fcb 6043 fprintf (file, "0x%lx", l);
5f1ec3e6 6044 }
e9a25f70 6045
0f290768 6046 /* These float cases don't actually occur as immediate operands. */
5f1ec3e6
JVA
6047 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6048 {
e9a25f70
JL
6049 REAL_VALUE_TYPE r;
6050 char dstr[30];
6051
5f1ec3e6
JVA
6052 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6053 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
6054 fprintf (file, "%s", dstr);
2a2ab3f9 6055 }
e9a25f70 6056
2b589241
JH
6057 else if (GET_CODE (x) == CONST_DOUBLE
6058 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
2a2ab3f9 6059 {
e9a25f70
JL
6060 REAL_VALUE_TYPE r;
6061 char dstr[30];
6062
5f1ec3e6
JVA
6063 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6064 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
6065 fprintf (file, "%s", dstr);
2a2ab3f9 6066 }
79325812 6067 else
2a2ab3f9 6068 {
4af3895e 6069 if (code != 'P')
2a2ab3f9 6070 {
695dac07 6071 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
e075ae69 6072 {
80f33d06 6073 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
6074 putc ('$', file);
6075 }
2a2ab3f9
JVA
6076 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6077 || GET_CODE (x) == LABEL_REF)
e075ae69 6078 {
80f33d06 6079 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
6080 putc ('$', file);
6081 else
6082 fputs ("OFFSET FLAT:", file);
6083 }
2a2ab3f9 6084 }
e075ae69
RH
6085 if (GET_CODE (x) == CONST_INT)
6086 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6087 else if (flag_pic)
2a2ab3f9
JVA
6088 output_pic_addr_const (file, x, code);
6089 else
6090 output_addr_const (file, x);
6091 }
6092}
6093\f
6094/* Print a memory operand whose address is ADDR. */
6095
6096void
6097print_operand_address (file, addr)
6098 FILE *file;
6099 register rtx addr;
6100{
e075ae69
RH
6101 struct ix86_address parts;
6102 rtx base, index, disp;
6103 int scale;
e9a25f70 6104
e075ae69
RH
6105 if (! ix86_decompose_address (addr, &parts))
6106 abort ();
e9a25f70 6107
e075ae69
RH
6108 base = parts.base;
6109 index = parts.index;
6110 disp = parts.disp;
6111 scale = parts.scale;
e9a25f70 6112
e075ae69
RH
6113 if (!base && !index)
6114 {
6115 /* Displacement only requires special attention. */
e9a25f70 6116
e075ae69 6117 if (GET_CODE (disp) == CONST_INT)
2a2ab3f9 6118 {
80f33d06 6119 if (ASSEMBLER_DIALECT == ASM_INTEL)
fb204271
DN
6120 {
6121 if (USER_LABEL_PREFIX[0] == 0)
6122 putc ('%', file);
6123 fputs ("ds:", file);
6124 }
e075ae69 6125 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
2a2ab3f9 6126 }
e075ae69
RH
6127 else if (flag_pic)
6128 output_pic_addr_const (file, addr, 0);
6129 else
6130 output_addr_const (file, addr);
0d7d98ee
JH
6131
6132 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6133 if (GET_CODE (disp) != CONST_INT && TARGET_64BIT)
6134 fputs ("(%rip)", file);
e075ae69
RH
6135 }
6136 else
6137 {
80f33d06 6138 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9 6139 {
e075ae69 6140 if (disp)
2a2ab3f9 6141 {
c399861d 6142 if (flag_pic)
e075ae69
RH
6143 output_pic_addr_const (file, disp, 0);
6144 else if (GET_CODE (disp) == LABEL_REF)
6145 output_asm_label (disp);
2a2ab3f9 6146 else
e075ae69 6147 output_addr_const (file, disp);
2a2ab3f9
JVA
6148 }
6149
e075ae69
RH
6150 putc ('(', file);
6151 if (base)
6152 PRINT_REG (base, 0, file);
6153 if (index)
2a2ab3f9 6154 {
e075ae69
RH
6155 putc (',', file);
6156 PRINT_REG (index, 0, file);
6157 if (scale != 1)
6158 fprintf (file, ",%d", scale);
2a2ab3f9 6159 }
e075ae69 6160 putc (')', file);
2a2ab3f9 6161 }
2a2ab3f9
JVA
6162 else
6163 {
e075ae69 6164 rtx offset = NULL_RTX;
e9a25f70 6165
e075ae69
RH
6166 if (disp)
6167 {
6168 /* Pull out the offset of a symbol; print any symbol itself. */
6169 if (GET_CODE (disp) == CONST
6170 && GET_CODE (XEXP (disp, 0)) == PLUS
6171 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6172 {
6173 offset = XEXP (XEXP (disp, 0), 1);
6174 disp = gen_rtx_CONST (VOIDmode,
6175 XEXP (XEXP (disp, 0), 0));
6176 }
ce193852 6177
e075ae69
RH
6178 if (flag_pic)
6179 output_pic_addr_const (file, disp, 0);
6180 else if (GET_CODE (disp) == LABEL_REF)
6181 output_asm_label (disp);
6182 else if (GET_CODE (disp) == CONST_INT)
6183 offset = disp;
6184 else
6185 output_addr_const (file, disp);
6186 }
e9a25f70 6187
e075ae69
RH
6188 putc ('[', file);
6189 if (base)
a8620236 6190 {
e075ae69
RH
6191 PRINT_REG (base, 0, file);
6192 if (offset)
6193 {
6194 if (INTVAL (offset) >= 0)
6195 putc ('+', file);
6196 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6197 }
a8620236 6198 }
e075ae69
RH
6199 else if (offset)
6200 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
2247a58c 6201 else
e075ae69 6202 putc ('0', file);
e9a25f70 6203
e075ae69
RH
6204 if (index)
6205 {
6206 putc ('+', file);
6207 PRINT_REG (index, 0, file);
6208 if (scale != 1)
6209 fprintf (file, "*%d", scale);
6210 }
6211 putc (']', file);
6212 }
2a2ab3f9
JVA
6213 }
6214}
6215\f
6216/* Split one or more DImode RTL references into pairs of SImode
6217 references. The RTL can be REG, offsettable MEM, integer constant, or
6218 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6219 split and "num" is its length. lo_half and hi_half are output arrays
0f290768 6220 that parallel "operands". */
2a2ab3f9
JVA
6221
6222void
6223split_di (operands, num, lo_half, hi_half)
6224 rtx operands[];
6225 int num;
6226 rtx lo_half[], hi_half[];
6227{
6228 while (num--)
6229 {
57dbca5e 6230 rtx op = operands[num];
b932f770
JH
6231
6232 /* simplify_subreg refuse to split volatile memory addresses,
6233 but we still have to handle it. */
6234 if (GET_CODE (op) == MEM)
2a2ab3f9 6235 {
f4ef873c 6236 lo_half[num] = adjust_address (op, SImode, 0);
b72f00af 6237 hi_half[num] = adjust_address (op, SImode, 4);
2a2ab3f9
JVA
6238 }
6239 else
b932f770 6240 {
38ca929b
JH
6241 lo_half[num] = simplify_gen_subreg (SImode, op,
6242 GET_MODE (op) == VOIDmode
6243 ? DImode : GET_MODE (op), 0);
6244 hi_half[num] = simplify_gen_subreg (SImode, op,
6245 GET_MODE (op) == VOIDmode
6246 ? DImode : GET_MODE (op), 4);
b932f770 6247 }
2a2ab3f9
JVA
6248 }
6249}
44cf5b6a
JH
6250/* Split one or more TImode RTL references into pairs of SImode
6251 references. The RTL can be REG, offsettable MEM, integer constant, or
6252 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6253 split and "num" is its length. lo_half and hi_half are output arrays
6254 that parallel "operands". */
6255
6256void
6257split_ti (operands, num, lo_half, hi_half)
6258 rtx operands[];
6259 int num;
6260 rtx lo_half[], hi_half[];
6261{
6262 while (num--)
6263 {
6264 rtx op = operands[num];
b932f770
JH
6265
6266 /* simplify_subreg refuse to split volatile memory addresses, but we
6267 still have to handle it. */
6268 if (GET_CODE (op) == MEM)
44cf5b6a
JH
6269 {
6270 lo_half[num] = adjust_address (op, DImode, 0);
6271 hi_half[num] = adjust_address (op, DImode, 8);
6272 }
6273 else
b932f770
JH
6274 {
6275 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6276 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6277 }
44cf5b6a
JH
6278 }
6279}
2a2ab3f9 6280\f
2a2ab3f9
JVA
6281/* Output code to perform a 387 binary operation in INSN, one of PLUS,
6282 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6283 is the expression of the binary operation. The output may either be
6284 emitted here, or returned to the caller, like all output_* functions.
6285
6286 There is no guarantee that the operands are the same mode, as they
0f290768 6287 might be within FLOAT or FLOAT_EXTEND expressions. */
2a2ab3f9 6288
e3c2afab
AM
6289#ifndef SYSV386_COMPAT
6290/* Set to 1 for compatibility with brain-damaged assemblers. No-one
6291 wants to fix the assemblers because that causes incompatibility
6292 with gcc. No-one wants to fix gcc because that causes
6293 incompatibility with assemblers... You can use the option of
6294 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6295#define SYSV386_COMPAT 1
6296#endif
6297
69ddee61 6298const char *
2a2ab3f9
JVA
6299output_387_binary_op (insn, operands)
6300 rtx insn;
6301 rtx *operands;
6302{
e3c2afab 6303 static char buf[30];
69ddee61 6304 const char *p;
1deaa899
JH
6305 const char *ssep;
6306 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
2a2ab3f9 6307
e3c2afab
AM
6308#ifdef ENABLE_CHECKING
6309 /* Even if we do not want to check the inputs, this documents input
6310 constraints. Which helps in understanding the following code. */
6311 if (STACK_REG_P (operands[0])
6312 && ((REG_P (operands[1])
6313 && REGNO (operands[0]) == REGNO (operands[1])
6314 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
6315 || (REG_P (operands[2])
6316 && REGNO (operands[0]) == REGNO (operands[2])
6317 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
6318 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
6319 ; /* ok */
1deaa899 6320 else if (!is_sse)
e3c2afab
AM
6321 abort ();
6322#endif
6323
2a2ab3f9
JVA
6324 switch (GET_CODE (operands[3]))
6325 {
6326 case PLUS:
e075ae69
RH
6327 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6328 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6329 p = "fiadd";
6330 else
6331 p = "fadd";
1deaa899 6332 ssep = "add";
2a2ab3f9
JVA
6333 break;
6334
6335 case MINUS:
e075ae69
RH
6336 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6337 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6338 p = "fisub";
6339 else
6340 p = "fsub";
1deaa899 6341 ssep = "sub";
2a2ab3f9
JVA
6342 break;
6343
6344 case MULT:
e075ae69
RH
6345 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6346 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6347 p = "fimul";
6348 else
6349 p = "fmul";
1deaa899 6350 ssep = "mul";
2a2ab3f9
JVA
6351 break;
6352
6353 case DIV:
e075ae69
RH
6354 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6355 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6356 p = "fidiv";
6357 else
6358 p = "fdiv";
1deaa899 6359 ssep = "div";
2a2ab3f9
JVA
6360 break;
6361
6362 default:
6363 abort ();
6364 }
6365
1deaa899
JH
6366 if (is_sse)
6367 {
6368 strcpy (buf, ssep);
6369 if (GET_MODE (operands[0]) == SFmode)
6370 strcat (buf, "ss\t{%2, %0|%0, %2}");
6371 else
6372 strcat (buf, "sd\t{%2, %0|%0, %2}");
6373 return buf;
6374 }
e075ae69 6375 strcpy (buf, p);
2a2ab3f9
JVA
6376
6377 switch (GET_CODE (operands[3]))
6378 {
6379 case MULT:
6380 case PLUS:
6381 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
6382 {
e3c2afab 6383 rtx temp = operands[2];
2a2ab3f9
JVA
6384 operands[2] = operands[1];
6385 operands[1] = temp;
6386 }
6387
e3c2afab
AM
6388 /* know operands[0] == operands[1]. */
6389
2a2ab3f9 6390 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
6391 {
6392 p = "%z2\t%2";
6393 break;
6394 }
2a2ab3f9
JVA
6395
6396 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63
JL
6397 {
6398 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
6399 /* How is it that we are storing to a dead operand[2]?
6400 Well, presumably operands[1] is dead too. We can't
6401 store the result to st(0) as st(0) gets popped on this
6402 instruction. Instead store to operands[2] (which I
6403 think has to be st(1)). st(1) will be popped later.
6404 gcc <= 2.8.1 didn't have this check and generated
6405 assembly code that the Unixware assembler rejected. */
6406 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 6407 else
e3c2afab 6408 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
e075ae69 6409 break;
6b28fd63 6410 }
2a2ab3f9
JVA
6411
6412 if (STACK_TOP_P (operands[0]))
e3c2afab 6413 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 6414 else
e3c2afab 6415 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
e075ae69 6416 break;
2a2ab3f9
JVA
6417
6418 case MINUS:
6419 case DIV:
6420 if (GET_CODE (operands[1]) == MEM)
e075ae69
RH
6421 {
6422 p = "r%z1\t%1";
6423 break;
6424 }
2a2ab3f9
JVA
6425
6426 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
6427 {
6428 p = "%z2\t%2";
6429 break;
6430 }
2a2ab3f9 6431
2a2ab3f9 6432 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63 6433 {
e3c2afab
AM
6434#if SYSV386_COMPAT
6435 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
6436 derived assemblers, confusingly reverse the direction of
6437 the operation for fsub{r} and fdiv{r} when the
6438 destination register is not st(0). The Intel assembler
6439 doesn't have this brain damage. Read !SYSV386_COMPAT to
6440 figure out what the hardware really does. */
6441 if (STACK_TOP_P (operands[0]))
6442 p = "{p\t%0, %2|rp\t%2, %0}";
6443 else
6444 p = "{rp\t%2, %0|p\t%0, %2}";
6445#else
6b28fd63 6446 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
6447 /* As above for fmul/fadd, we can't store to st(0). */
6448 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 6449 else
e3c2afab
AM
6450 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6451#endif
e075ae69 6452 break;
6b28fd63 6453 }
2a2ab3f9
JVA
6454
6455 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6b28fd63 6456 {
e3c2afab 6457#if SYSV386_COMPAT
6b28fd63 6458 if (STACK_TOP_P (operands[0]))
e3c2afab 6459 p = "{rp\t%0, %1|p\t%1, %0}";
6b28fd63 6460 else
e3c2afab
AM
6461 p = "{p\t%1, %0|rp\t%0, %1}";
6462#else
6463 if (STACK_TOP_P (operands[0]))
6464 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
6465 else
6466 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
6467#endif
e075ae69 6468 break;
6b28fd63 6469 }
2a2ab3f9
JVA
6470
6471 if (STACK_TOP_P (operands[0]))
6472 {
6473 if (STACK_TOP_P (operands[1]))
e3c2afab 6474 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 6475 else
e3c2afab 6476 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
e075ae69 6477 break;
2a2ab3f9
JVA
6478 }
6479 else if (STACK_TOP_P (operands[1]))
e3c2afab
AM
6480 {
6481#if SYSV386_COMPAT
6482 p = "{\t%1, %0|r\t%0, %1}";
6483#else
6484 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
6485#endif
6486 }
2a2ab3f9 6487 else
e3c2afab
AM
6488 {
6489#if SYSV386_COMPAT
6490 p = "{r\t%2, %0|\t%0, %2}";
6491#else
6492 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6493#endif
6494 }
e075ae69 6495 break;
2a2ab3f9
JVA
6496
6497 default:
6498 abort ();
6499 }
e075ae69
RH
6500
6501 strcat (buf, p);
6502 return buf;
2a2ab3f9 6503}
e075ae69 6504
a4f31c00 6505/* Output code to initialize control word copies used by
7a2e09f4
JH
6506 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
6507 is set to control word rounding downwards. */
6508void
6509emit_i387_cw_initialization (normal, round_down)
6510 rtx normal, round_down;
6511{
6512 rtx reg = gen_reg_rtx (HImode);
6513
6514 emit_insn (gen_x86_fnstcw_1 (normal));
6515 emit_move_insn (reg, normal);
6516 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
6517 && !TARGET_64BIT)
6518 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
6519 else
6520 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
6521 emit_move_insn (round_down, reg);
6522}
6523
2a2ab3f9 6524/* Output code for INSN to convert a float to a signed int. OPERANDS
46d21d2c 6525 are the insn operands. The output may be [HSD]Imode and the input
e075ae69 6526 operand may be [SDX]Fmode. */
2a2ab3f9 6527
69ddee61 6528const char *
2a2ab3f9
JVA
6529output_fix_trunc (insn, operands)
6530 rtx insn;
6531 rtx *operands;
6532{
6533 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
e075ae69 6534 int dimode_p = GET_MODE (operands[0]) == DImode;
2a2ab3f9 6535
e075ae69
RH
6536 /* Jump through a hoop or two for DImode, since the hardware has no
6537 non-popping instruction. We used to do this a different way, but
6538 that was somewhat fragile and broke with post-reload splitters. */
a05924f9
JH
6539 if (dimode_p && !stack_top_dies)
6540 output_asm_insn ("fld\t%y1", operands);
e075ae69 6541
7a2e09f4 6542 if (!STACK_TOP_P (operands[1]))
10195bd8
JW
6543 abort ();
6544
e075ae69 6545 if (GET_CODE (operands[0]) != MEM)
7a2e09f4 6546 abort ();
e9a25f70 6547
7a2e09f4 6548 output_asm_insn ("fldcw\t%3", operands);
e075ae69 6549 if (stack_top_dies || dimode_p)
7a2e09f4 6550 output_asm_insn ("fistp%z0\t%0", operands);
10195bd8 6551 else
7a2e09f4 6552 output_asm_insn ("fist%z0\t%0", operands);
e075ae69 6553 output_asm_insn ("fldcw\t%2", operands);
10195bd8 6554
e075ae69 6555 return "";
2a2ab3f9 6556}
cda749b1 6557
e075ae69
RH
6558/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
6559 should be used and 2 when fnstsw should be used. UNORDERED_P is true
6560 when fucom should be used. */
6561
69ddee61 6562const char *
e075ae69 6563output_fp_compare (insn, operands, eflags_p, unordered_p)
cda749b1
JW
6564 rtx insn;
6565 rtx *operands;
e075ae69 6566 int eflags_p, unordered_p;
cda749b1 6567{
e075ae69
RH
6568 int stack_top_dies;
6569 rtx cmp_op0 = operands[0];
6570 rtx cmp_op1 = operands[1];
0644b628 6571 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
e075ae69
RH
6572
6573 if (eflags_p == 2)
6574 {
6575 cmp_op0 = cmp_op1;
6576 cmp_op1 = operands[2];
6577 }
0644b628
JH
6578 if (is_sse)
6579 {
6580 if (GET_MODE (operands[0]) == SFmode)
6581 if (unordered_p)
6582 return "ucomiss\t{%1, %0|%0, %1}";
6583 else
6584 return "comiss\t{%1, %0|%0, %y}";
6585 else
6586 if (unordered_p)
6587 return "ucomisd\t{%1, %0|%0, %1}";
6588 else
6589 return "comisd\t{%1, %0|%0, %y}";
6590 }
cda749b1 6591
e075ae69 6592 if (! STACK_TOP_P (cmp_op0))
cda749b1
JW
6593 abort ();
6594
e075ae69 6595 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
cda749b1 6596
e075ae69
RH
6597 if (STACK_REG_P (cmp_op1)
6598 && stack_top_dies
6599 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
6600 && REGNO (cmp_op1) != FIRST_STACK_REG)
cda749b1 6601 {
e075ae69
RH
6602 /* If both the top of the 387 stack dies, and the other operand
6603 is also a stack register that dies, then this must be a
6604 `fcompp' float compare */
6605
6606 if (eflags_p == 1)
6607 {
6608 /* There is no double popping fcomi variant. Fortunately,
6609 eflags is immune from the fstp's cc clobbering. */
6610 if (unordered_p)
6611 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
6612 else
6613 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
6614 return "fstp\t%y0";
6615 }
6616 else
cda749b1 6617 {
e075ae69
RH
6618 if (eflags_p == 2)
6619 {
6620 if (unordered_p)
6621 return "fucompp\n\tfnstsw\t%0";
6622 else
6623 return "fcompp\n\tfnstsw\t%0";
6624 }
cda749b1
JW
6625 else
6626 {
e075ae69
RH
6627 if (unordered_p)
6628 return "fucompp";
6629 else
6630 return "fcompp";
cda749b1
JW
6631 }
6632 }
cda749b1
JW
6633 }
6634 else
6635 {
e075ae69 6636 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
cda749b1 6637
0f290768 6638 static const char * const alt[24] =
e075ae69
RH
6639 {
6640 "fcom%z1\t%y1",
6641 "fcomp%z1\t%y1",
6642 "fucom%z1\t%y1",
6643 "fucomp%z1\t%y1",
0f290768 6644
e075ae69
RH
6645 "ficom%z1\t%y1",
6646 "ficomp%z1\t%y1",
6647 NULL,
6648 NULL,
6649
6650 "fcomi\t{%y1, %0|%0, %y1}",
6651 "fcomip\t{%y1, %0|%0, %y1}",
6652 "fucomi\t{%y1, %0|%0, %y1}",
6653 "fucomip\t{%y1, %0|%0, %y1}",
6654
6655 NULL,
6656 NULL,
6657 NULL,
6658 NULL,
6659
6660 "fcom%z2\t%y2\n\tfnstsw\t%0",
6661 "fcomp%z2\t%y2\n\tfnstsw\t%0",
6662 "fucom%z2\t%y2\n\tfnstsw\t%0",
6663 "fucomp%z2\t%y2\n\tfnstsw\t%0",
0f290768 6664
e075ae69
RH
6665 "ficom%z2\t%y2\n\tfnstsw\t%0",
6666 "ficomp%z2\t%y2\n\tfnstsw\t%0",
6667 NULL,
6668 NULL
6669 };
6670
6671 int mask;
69ddee61 6672 const char *ret;
e075ae69
RH
6673
6674 mask = eflags_p << 3;
6675 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
6676 mask |= unordered_p << 1;
6677 mask |= stack_top_dies;
6678
6679 if (mask >= 24)
6680 abort ();
6681 ret = alt[mask];
6682 if (ret == NULL)
6683 abort ();
cda749b1 6684
e075ae69 6685 return ret;
cda749b1
JW
6686 }
6687}
2a2ab3f9 6688
f88c65f7
RH
6689void
6690ix86_output_addr_vec_elt (file, value)
6691 FILE *file;
6692 int value;
6693{
6694 const char *directive = ASM_LONG;
6695
6696 if (TARGET_64BIT)
6697 {
6698#ifdef ASM_QUAD
6699 directive = ASM_QUAD;
6700#else
6701 abort ();
6702#endif
6703 }
6704
6705 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
6706}
6707
6708void
6709ix86_output_addr_diff_elt (file, value, rel)
6710 FILE *file;
6711 int value, rel;
6712{
6713 if (TARGET_64BIT)
74411039 6714 fprintf (file, "%s%s%d-%s%d\n",
f88c65f7
RH
6715 ASM_LONG, LPREFIX, value, LPREFIX, rel);
6716 else if (HAVE_AS_GOTOFF_IN_DATA)
6717 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
6718 else
6719 asm_fprintf (file, "%s%U_GLOBAL_OFFSET_TABLE_+[.-%s%d]\n",
6720 ASM_LONG, LPREFIX, value);
6721}
32b5b1aa 6722\f
a8bac9ab
RH
6723/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
6724 for the target. */
6725
6726void
6727ix86_expand_clear (dest)
6728 rtx dest;
6729{
6730 rtx tmp;
6731
6732 /* We play register width games, which are only valid after reload. */
6733 if (!reload_completed)
6734 abort ();
6735
6736 /* Avoid HImode and its attendant prefix byte. */
6737 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
6738 dest = gen_rtx_REG (SImode, REGNO (dest));
6739
6740 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
6741
6742 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
6743 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
6744 {
6745 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
6746 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
6747 }
6748
6749 emit_insn (tmp);
6750}
6751
79325812 6752void
e075ae69
RH
6753ix86_expand_move (mode, operands)
6754 enum machine_mode mode;
6755 rtx operands[];
32b5b1aa 6756{
e075ae69 6757 int strict = (reload_in_progress || reload_completed);
e075ae69 6758 rtx insn;
e9a25f70 6759
e075ae69 6760 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
32b5b1aa 6761 {
e075ae69 6762 /* Emit insns to move operands[1] into operands[0]. */
e9a25f70 6763
e075ae69
RH
6764 if (GET_CODE (operands[0]) == MEM)
6765 operands[1] = force_reg (Pmode, operands[1]);
6766 else
32b5b1aa 6767 {
e075ae69
RH
6768 rtx temp = operands[0];
6769 if (GET_CODE (temp) != REG)
6770 temp = gen_reg_rtx (Pmode);
6771 temp = legitimize_pic_address (operands[1], temp);
6772 if (temp == operands[0])
6773 return;
6774 operands[1] = temp;
32b5b1aa 6775 }
e075ae69
RH
6776 }
6777 else
6778 {
d7a29404 6779 if (GET_CODE (operands[0]) == MEM
44cf5b6a 6780 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
d7a29404
JH
6781 || !push_operand (operands[0], mode))
6782 && GET_CODE (operands[1]) == MEM)
e075ae69 6783 operands[1] = force_reg (mode, operands[1]);
e9a25f70 6784
2c5a510c
RH
6785 if (push_operand (operands[0], mode)
6786 && ! general_no_elim_operand (operands[1], mode))
6787 operands[1] = copy_to_mode_reg (mode, operands[1]);
6788
44cf5b6a
JH
6789 /* Force large constants in 64bit compilation into register
6790 to get them CSEed. */
6791 if (TARGET_64BIT && mode == DImode
6792 && immediate_operand (operands[1], mode)
6793 && !x86_64_zero_extended_value (operands[1])
6794 && !register_operand (operands[0], mode)
6795 && optimize && !reload_completed && !reload_in_progress)
6796 operands[1] = copy_to_mode_reg (mode, operands[1]);
6797
e075ae69 6798 if (FLOAT_MODE_P (mode))
32b5b1aa 6799 {
d7a29404
JH
6800 /* If we are loading a floating point constant to a register,
6801 force the value to memory now, since we'll get better code
6802 out the back end. */
e075ae69
RH
6803
6804 if (strict)
6805 ;
e075ae69 6806 else if (GET_CODE (operands[1]) == CONST_DOUBLE
d7a29404 6807 && register_operand (operands[0], mode))
e075ae69 6808 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
32b5b1aa 6809 }
32b5b1aa 6810 }
e9a25f70 6811
e075ae69 6812 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
e9a25f70 6813
e075ae69
RH
6814 emit_insn (insn);
6815}
e9a25f70 6816
e37af218
RH
6817void
6818ix86_expand_vector_move (mode, operands)
6819 enum machine_mode mode;
6820 rtx operands[];
6821{
6822 /* Force constants other than zero into memory. We do not know how
6823 the instructions used to build constants modify the upper 64 bits
6824 of the register, once we have that information we may be able
6825 to handle some of them more efficiently. */
6826 if ((reload_in_progress | reload_completed) == 0
6827 && register_operand (operands[0], mode)
6828 && CONSTANT_P (operands[1]))
6829 {
6830 rtx addr = gen_reg_rtx (Pmode);
6831 emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0));
6832 operands[1] = gen_rtx_MEM (mode, addr);
6833 }
6834
6835 /* Make operand1 a register if it isn't already. */
6836 if ((reload_in_progress | reload_completed) == 0
6837 && !register_operand (operands[0], mode)
6838 && !register_operand (operands[1], mode)
6839 && operands[1] != CONST0_RTX (mode))
6840 {
59bef189 6841 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
e37af218
RH
6842 emit_move_insn (operands[0], temp);
6843 return;
6844 }
6845
6846 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
6847}
6848
e075ae69
RH
6849/* Attempt to expand a binary operator. Make the expansion closer to the
6850 actual machine, then just general_operand, which will allow 3 separate
9d81fc27 6851 memory references (one output, two input) in a single insn. */
e9a25f70 6852
e075ae69
RH
6853void
6854ix86_expand_binary_operator (code, mode, operands)
6855 enum rtx_code code;
6856 enum machine_mode mode;
6857 rtx operands[];
6858{
6859 int matching_memory;
6860 rtx src1, src2, dst, op, clob;
6861
6862 dst = operands[0];
6863 src1 = operands[1];
6864 src2 = operands[2];
6865
6866 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
6867 if (GET_RTX_CLASS (code) == 'c'
6868 && (rtx_equal_p (dst, src2)
6869 || immediate_operand (src1, mode)))
6870 {
6871 rtx temp = src1;
6872 src1 = src2;
6873 src2 = temp;
32b5b1aa 6874 }
e9a25f70 6875
e075ae69
RH
6876 /* If the destination is memory, and we do not have matching source
6877 operands, do things in registers. */
6878 matching_memory = 0;
6879 if (GET_CODE (dst) == MEM)
32b5b1aa 6880 {
e075ae69
RH
6881 if (rtx_equal_p (dst, src1))
6882 matching_memory = 1;
6883 else if (GET_RTX_CLASS (code) == 'c'
6884 && rtx_equal_p (dst, src2))
6885 matching_memory = 2;
6886 else
6887 dst = gen_reg_rtx (mode);
6888 }
0f290768 6889
e075ae69
RH
6890 /* Both source operands cannot be in memory. */
6891 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
6892 {
6893 if (matching_memory != 2)
6894 src2 = force_reg (mode, src2);
6895 else
6896 src1 = force_reg (mode, src1);
32b5b1aa 6897 }
e9a25f70 6898
06a964de
JH
6899 /* If the operation is not commutable, source 1 cannot be a constant
6900 or non-matching memory. */
0f290768 6901 if ((CONSTANT_P (src1)
06a964de
JH
6902 || (!matching_memory && GET_CODE (src1) == MEM))
6903 && GET_RTX_CLASS (code) != 'c')
e075ae69 6904 src1 = force_reg (mode, src1);
0f290768 6905
e075ae69 6906 /* If optimizing, copy to regs to improve CSE */
fe577e58 6907 if (optimize && ! no_new_pseudos)
32b5b1aa 6908 {
e075ae69
RH
6909 if (GET_CODE (dst) == MEM)
6910 dst = gen_reg_rtx (mode);
6911 if (GET_CODE (src1) == MEM)
6912 src1 = force_reg (mode, src1);
6913 if (GET_CODE (src2) == MEM)
6914 src2 = force_reg (mode, src2);
32b5b1aa 6915 }
e9a25f70 6916
e075ae69
RH
6917 /* Emit the instruction. */
6918
6919 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
6920 if (reload_in_progress)
6921 {
6922 /* Reload doesn't know about the flags register, and doesn't know that
6923 it doesn't want to clobber it. We can only do this with PLUS. */
6924 if (code != PLUS)
6925 abort ();
6926 emit_insn (op);
6927 }
6928 else
32b5b1aa 6929 {
e075ae69
RH
6930 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6931 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
32b5b1aa 6932 }
e9a25f70 6933
e075ae69
RH
6934 /* Fix up the destination if needed. */
6935 if (dst != operands[0])
6936 emit_move_insn (operands[0], dst);
6937}
6938
6939/* Return TRUE or FALSE depending on whether the binary operator meets the
6940 appropriate constraints. */
6941
6942int
6943ix86_binary_operator_ok (code, mode, operands)
6944 enum rtx_code code;
6945 enum machine_mode mode ATTRIBUTE_UNUSED;
6946 rtx operands[3];
6947{
6948 /* Both source operands cannot be in memory. */
6949 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
6950 return 0;
6951 /* If the operation is not commutable, source 1 cannot be a constant. */
6952 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
6953 return 0;
6954 /* If the destination is memory, we must have a matching source operand. */
6955 if (GET_CODE (operands[0]) == MEM
6956 && ! (rtx_equal_p (operands[0], operands[1])
6957 || (GET_RTX_CLASS (code) == 'c'
6958 && rtx_equal_p (operands[0], operands[2]))))
6959 return 0;
06a964de 6960 /* If the operation is not commutable and the source 1 is memory, we must
d6a7951f 6961 have a matching destination. */
06a964de
JH
6962 if (GET_CODE (operands[1]) == MEM
6963 && GET_RTX_CLASS (code) != 'c'
6964 && ! rtx_equal_p (operands[0], operands[1]))
6965 return 0;
e075ae69
RH
6966 return 1;
6967}
6968
6969/* Attempt to expand a unary operator. Make the expansion closer to the
6970 actual machine, then just general_operand, which will allow 2 separate
9d81fc27 6971 memory references (one output, one input) in a single insn. */
e075ae69 6972
9d81fc27 6973void
e075ae69
RH
6974ix86_expand_unary_operator (code, mode, operands)
6975 enum rtx_code code;
6976 enum machine_mode mode;
6977 rtx operands[];
6978{
06a964de
JH
6979 int matching_memory;
6980 rtx src, dst, op, clob;
6981
6982 dst = operands[0];
6983 src = operands[1];
e075ae69 6984
06a964de
JH
6985 /* If the destination is memory, and we do not have matching source
6986 operands, do things in registers. */
6987 matching_memory = 0;
6988 if (GET_CODE (dst) == MEM)
32b5b1aa 6989 {
06a964de
JH
6990 if (rtx_equal_p (dst, src))
6991 matching_memory = 1;
e075ae69 6992 else
06a964de 6993 dst = gen_reg_rtx (mode);
32b5b1aa 6994 }
e9a25f70 6995
06a964de
JH
6996 /* When source operand is memory, destination must match. */
6997 if (!matching_memory && GET_CODE (src) == MEM)
6998 src = force_reg (mode, src);
0f290768 6999
06a964de 7000 /* If optimizing, copy to regs to improve CSE */
fe577e58 7001 if (optimize && ! no_new_pseudos)
06a964de
JH
7002 {
7003 if (GET_CODE (dst) == MEM)
7004 dst = gen_reg_rtx (mode);
7005 if (GET_CODE (src) == MEM)
7006 src = force_reg (mode, src);
7007 }
7008
7009 /* Emit the instruction. */
7010
7011 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
7012 if (reload_in_progress || code == NOT)
7013 {
7014 /* Reload doesn't know about the flags register, and doesn't know that
7015 it doesn't want to clobber it. */
7016 if (code != NOT)
7017 abort ();
7018 emit_insn (op);
7019 }
7020 else
7021 {
7022 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7023 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7024 }
7025
7026 /* Fix up the destination if needed. */
7027 if (dst != operands[0])
7028 emit_move_insn (operands[0], dst);
e075ae69
RH
7029}
7030
7031/* Return TRUE or FALSE depending on whether the unary operator meets the
7032 appropriate constraints. */
7033
7034int
7035ix86_unary_operator_ok (code, mode, operands)
7036 enum rtx_code code ATTRIBUTE_UNUSED;
7037 enum machine_mode mode ATTRIBUTE_UNUSED;
7038 rtx operands[2] ATTRIBUTE_UNUSED;
7039{
06a964de
JH
7040 /* If one of operands is memory, source and destination must match. */
7041 if ((GET_CODE (operands[0]) == MEM
7042 || GET_CODE (operands[1]) == MEM)
7043 && ! rtx_equal_p (operands[0], operands[1]))
7044 return FALSE;
e075ae69
RH
7045 return TRUE;
7046}
7047
16189740
RH
7048/* Return TRUE or FALSE depending on whether the first SET in INSN
7049 has source and destination with matching CC modes, and that the
7050 CC mode is at least as constrained as REQ_MODE. */
7051
7052int
7053ix86_match_ccmode (insn, req_mode)
7054 rtx insn;
7055 enum machine_mode req_mode;
7056{
7057 rtx set;
7058 enum machine_mode set_mode;
7059
7060 set = PATTERN (insn);
7061 if (GET_CODE (set) == PARALLEL)
7062 set = XVECEXP (set, 0, 0);
7063 if (GET_CODE (set) != SET)
7064 abort ();
9076b9c1
JH
7065 if (GET_CODE (SET_SRC (set)) != COMPARE)
7066 abort ();
16189740
RH
7067
7068 set_mode = GET_MODE (SET_DEST (set));
7069 switch (set_mode)
7070 {
9076b9c1
JH
7071 case CCNOmode:
7072 if (req_mode != CCNOmode
7073 && (req_mode != CCmode
7074 || XEXP (SET_SRC (set), 1) != const0_rtx))
7075 return 0;
7076 break;
16189740 7077 case CCmode:
9076b9c1 7078 if (req_mode == CCGCmode)
16189740
RH
7079 return 0;
7080 /* FALLTHRU */
9076b9c1
JH
7081 case CCGCmode:
7082 if (req_mode == CCGOCmode || req_mode == CCNOmode)
7083 return 0;
7084 /* FALLTHRU */
7085 case CCGOCmode:
16189740
RH
7086 if (req_mode == CCZmode)
7087 return 0;
7088 /* FALLTHRU */
7089 case CCZmode:
7090 break;
7091
7092 default:
7093 abort ();
7094 }
7095
7096 return (GET_MODE (SET_SRC (set)) == set_mode);
7097}
7098
e075ae69
RH
7099/* Generate insn patterns to do an integer compare of OPERANDS. */
7100
7101static rtx
7102ix86_expand_int_compare (code, op0, op1)
7103 enum rtx_code code;
7104 rtx op0, op1;
7105{
7106 enum machine_mode cmpmode;
7107 rtx tmp, flags;
7108
7109 cmpmode = SELECT_CC_MODE (code, op0, op1);
7110 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
7111
7112 /* This is very simple, but making the interface the same as in the
7113 FP case makes the rest of the code easier. */
7114 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
7115 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
7116
7117 /* Return the test that should be put into the flags user, i.e.
7118 the bcc, scc, or cmov instruction. */
7119 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
7120}
7121
3a3677ff
RH
7122/* Figure out whether to use ordered or unordered fp comparisons.
7123 Return the appropriate mode to use. */
e075ae69 7124
b1cdafbb 7125enum machine_mode
3a3677ff 7126ix86_fp_compare_mode (code)
8752c357 7127 enum rtx_code code ATTRIBUTE_UNUSED;
e075ae69 7128{
9e7adcb3
JH
7129 /* ??? In order to make all comparisons reversible, we do all comparisons
7130 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7131 all forms trapping and nontrapping comparisons, we can make inequality
7132 comparisons trapping again, since it results in better code when using
7133 FCOM based compares. */
7134 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
3a3677ff
RH
7135}
7136
9076b9c1
JH
7137enum machine_mode
7138ix86_cc_mode (code, op0, op1)
7139 enum rtx_code code;
7140 rtx op0, op1;
7141{
7142 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7143 return ix86_fp_compare_mode (code);
7144 switch (code)
7145 {
7146 /* Only zero flag is needed. */
7147 case EQ: /* ZF=0 */
7148 case NE: /* ZF!=0 */
7149 return CCZmode;
7150 /* Codes needing carry flag. */
265dab10
JH
7151 case GEU: /* CF=0 */
7152 case GTU: /* CF=0 & ZF=0 */
7e08e190
JH
7153 case LTU: /* CF=1 */
7154 case LEU: /* CF=1 | ZF=1 */
265dab10 7155 return CCmode;
9076b9c1
JH
7156 /* Codes possibly doable only with sign flag when
7157 comparing against zero. */
7158 case GE: /* SF=OF or SF=0 */
7e08e190 7159 case LT: /* SF<>OF or SF=1 */
9076b9c1
JH
7160 if (op1 == const0_rtx)
7161 return CCGOCmode;
7162 else
7163 /* For other cases Carry flag is not required. */
7164 return CCGCmode;
7165 /* Codes doable only with sign flag when comparing
7166 against zero, but we miss jump instruction for it
7167 so we need to use relational tests agains overflow
7168 that thus needs to be zero. */
7169 case GT: /* ZF=0 & SF=OF */
7170 case LE: /* ZF=1 | SF<>OF */
7171 if (op1 == const0_rtx)
7172 return CCNOmode;
7173 else
7174 return CCGCmode;
7fcd7218
JH
7175 /* strcmp pattern do (use flags) and combine may ask us for proper
7176 mode. */
7177 case USE:
7178 return CCmode;
9076b9c1 7179 default:
0f290768 7180 abort ();
9076b9c1
JH
7181 }
7182}
7183
3a3677ff
RH
7184/* Return true if we should use an FCOMI instruction for this fp comparison. */
7185
a940d8bd 7186int
3a3677ff 7187ix86_use_fcomi_compare (code)
9e7adcb3 7188 enum rtx_code code ATTRIBUTE_UNUSED;
3a3677ff 7189{
9e7adcb3
JH
7190 enum rtx_code swapped_code = swap_condition (code);
7191 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
7192 || (ix86_fp_comparison_cost (swapped_code)
7193 == ix86_fp_comparison_fcomi_cost (swapped_code)));
3a3677ff
RH
7194}
7195
0f290768 7196/* Swap, force into registers, or otherwise massage the two operands
3a3677ff
RH
7197 to a fp comparison. The operands are updated in place; the new
7198 comparsion code is returned. */
7199
7200static enum rtx_code
7201ix86_prepare_fp_compare_args (code, pop0, pop1)
7202 enum rtx_code code;
7203 rtx *pop0, *pop1;
7204{
7205 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
7206 rtx op0 = *pop0, op1 = *pop1;
7207 enum machine_mode op_mode = GET_MODE (op0);
0644b628 7208 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
3a3677ff 7209
e075ae69 7210 /* All of the unordered compare instructions only work on registers.
3a3677ff
RH
7211 The same is true of the XFmode compare instructions. The same is
7212 true of the fcomi compare instructions. */
7213
0644b628
JH
7214 if (!is_sse
7215 && (fpcmp_mode == CCFPUmode
7216 || op_mode == XFmode
7217 || op_mode == TFmode
7218 || ix86_use_fcomi_compare (code)))
e075ae69 7219 {
3a3677ff
RH
7220 op0 = force_reg (op_mode, op0);
7221 op1 = force_reg (op_mode, op1);
e075ae69
RH
7222 }
7223 else
7224 {
7225 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
7226 things around if they appear profitable, otherwise force op0
7227 into a register. */
7228
7229 if (standard_80387_constant_p (op0) == 0
7230 || (GET_CODE (op0) == MEM
7231 && ! (standard_80387_constant_p (op1) == 0
7232 || GET_CODE (op1) == MEM)))
32b5b1aa 7233 {
e075ae69
RH
7234 rtx tmp;
7235 tmp = op0, op0 = op1, op1 = tmp;
7236 code = swap_condition (code);
7237 }
7238
7239 if (GET_CODE (op0) != REG)
3a3677ff 7240 op0 = force_reg (op_mode, op0);
e075ae69
RH
7241
7242 if (CONSTANT_P (op1))
7243 {
7244 if (standard_80387_constant_p (op1))
3a3677ff 7245 op1 = force_reg (op_mode, op1);
e075ae69 7246 else
3a3677ff 7247 op1 = validize_mem (force_const_mem (op_mode, op1));
32b5b1aa
SC
7248 }
7249 }
e9a25f70 7250
9e7adcb3
JH
7251 /* Try to rearrange the comparison to make it cheaper. */
7252 if (ix86_fp_comparison_cost (code)
7253 > ix86_fp_comparison_cost (swap_condition (code))
558740bf 7254 && (GET_CODE (op1) == REG || !no_new_pseudos))
9e7adcb3
JH
7255 {
7256 rtx tmp;
7257 tmp = op0, op0 = op1, op1 = tmp;
7258 code = swap_condition (code);
7259 if (GET_CODE (op0) != REG)
7260 op0 = force_reg (op_mode, op0);
7261 }
7262
3a3677ff
RH
7263 *pop0 = op0;
7264 *pop1 = op1;
7265 return code;
7266}
7267
c0c102a9
JH
7268/* Convert comparison codes we use to represent FP comparison to integer
7269 code that will result in proper branch. Return UNKNOWN if no such code
7270 is available. */
7271static enum rtx_code
7272ix86_fp_compare_code_to_integer (code)
7273 enum rtx_code code;
7274{
7275 switch (code)
7276 {
7277 case GT:
7278 return GTU;
7279 case GE:
7280 return GEU;
7281 case ORDERED:
7282 case UNORDERED:
7283 return code;
7284 break;
7285 case UNEQ:
7286 return EQ;
7287 break;
7288 case UNLT:
7289 return LTU;
7290 break;
7291 case UNLE:
7292 return LEU;
7293 break;
7294 case LTGT:
7295 return NE;
7296 break;
7297 default:
7298 return UNKNOWN;
7299 }
7300}
7301
7302/* Split comparison code CODE into comparisons we can do using branch
7303 instructions. BYPASS_CODE is comparison code for branch that will
7304 branch around FIRST_CODE and SECOND_CODE. If some of branches
7305 is not required, set value to NIL.
7306 We never require more than two branches. */
7307static void
7308ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
7309 enum rtx_code code, *bypass_code, *first_code, *second_code;
7310{
7311 *first_code = code;
7312 *bypass_code = NIL;
7313 *second_code = NIL;
7314
7315 /* The fcomi comparison sets flags as follows:
7316
7317 cmp ZF PF CF
7318 > 0 0 0
7319 < 0 0 1
7320 = 1 0 0
7321 un 1 1 1 */
7322
7323 switch (code)
7324 {
7325 case GT: /* GTU - CF=0 & ZF=0 */
7326 case GE: /* GEU - CF=0 */
7327 case ORDERED: /* PF=0 */
7328 case UNORDERED: /* PF=1 */
7329 case UNEQ: /* EQ - ZF=1 */
7330 case UNLT: /* LTU - CF=1 */
7331 case UNLE: /* LEU - CF=1 | ZF=1 */
7332 case LTGT: /* EQ - ZF=0 */
7333 break;
7334 case LT: /* LTU - CF=1 - fails on unordered */
7335 *first_code = UNLT;
7336 *bypass_code = UNORDERED;
7337 break;
7338 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
7339 *first_code = UNLE;
7340 *bypass_code = UNORDERED;
7341 break;
7342 case EQ: /* EQ - ZF=1 - fails on unordered */
7343 *first_code = UNEQ;
7344 *bypass_code = UNORDERED;
7345 break;
7346 case NE: /* NE - ZF=0 - fails on unordered */
7347 *first_code = LTGT;
7348 *second_code = UNORDERED;
7349 break;
7350 case UNGE: /* GEU - CF=0 - fails on unordered */
7351 *first_code = GE;
7352 *second_code = UNORDERED;
7353 break;
7354 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
7355 *first_code = GT;
7356 *second_code = UNORDERED;
7357 break;
7358 default:
7359 abort ();
7360 }
7361 if (!TARGET_IEEE_FP)
7362 {
7363 *second_code = NIL;
7364 *bypass_code = NIL;
7365 }
7366}
7367
9e7adcb3
JH
7368/* Return cost of comparison done fcom + arithmetics operations on AX.
7369 All following functions do use number of instructions as an cost metrics.
7370 In future this should be tweaked to compute bytes for optimize_size and
7371 take into account performance of various instructions on various CPUs. */
7372static int
7373ix86_fp_comparison_arithmetics_cost (code)
7374 enum rtx_code code;
7375{
7376 if (!TARGET_IEEE_FP)
7377 return 4;
7378 /* The cost of code output by ix86_expand_fp_compare. */
7379 switch (code)
7380 {
7381 case UNLE:
7382 case UNLT:
7383 case LTGT:
7384 case GT:
7385 case GE:
7386 case UNORDERED:
7387 case ORDERED:
7388 case UNEQ:
7389 return 4;
7390 break;
7391 case LT:
7392 case NE:
7393 case EQ:
7394 case UNGE:
7395 return 5;
7396 break;
7397 case LE:
7398 case UNGT:
7399 return 6;
7400 break;
7401 default:
7402 abort ();
7403 }
7404}
7405
7406/* Return cost of comparison done using fcomi operation.
7407 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7408static int
7409ix86_fp_comparison_fcomi_cost (code)
7410 enum rtx_code code;
7411{
7412 enum rtx_code bypass_code, first_code, second_code;
7413 /* Return arbitarily high cost when instruction is not supported - this
7414 prevents gcc from using it. */
7415 if (!TARGET_CMOVE)
7416 return 1024;
7417 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7418 return (bypass_code != NIL || second_code != NIL) + 2;
7419}
7420
7421/* Return cost of comparison done using sahf operation.
7422 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7423static int
7424ix86_fp_comparison_sahf_cost (code)
7425 enum rtx_code code;
7426{
7427 enum rtx_code bypass_code, first_code, second_code;
7428 /* Return arbitarily high cost when instruction is not preferred - this
7429 avoids gcc from using it. */
7430 if (!TARGET_USE_SAHF && !optimize_size)
7431 return 1024;
7432 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7433 return (bypass_code != NIL || second_code != NIL) + 3;
7434}
7435
7436/* Compute cost of the comparison done using any method.
7437 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7438static int
7439ix86_fp_comparison_cost (code)
7440 enum rtx_code code;
7441{
7442 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
7443 int min;
7444
7445 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
7446 sahf_cost = ix86_fp_comparison_sahf_cost (code);
7447
7448 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
7449 if (min > sahf_cost)
7450 min = sahf_cost;
7451 if (min > fcomi_cost)
7452 min = fcomi_cost;
7453 return min;
7454}
c0c102a9 7455
3a3677ff
RH
7456/* Generate insn patterns to do a floating point compare of OPERANDS. */
7457
9e7adcb3
JH
7458static rtx
7459ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
3a3677ff
RH
7460 enum rtx_code code;
7461 rtx op0, op1, scratch;
9e7adcb3
JH
7462 rtx *second_test;
7463 rtx *bypass_test;
3a3677ff
RH
7464{
7465 enum machine_mode fpcmp_mode, intcmp_mode;
c0c102a9 7466 rtx tmp, tmp2;
9e7adcb3 7467 int cost = ix86_fp_comparison_cost (code);
c0c102a9 7468 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
7469
7470 fpcmp_mode = ix86_fp_compare_mode (code);
7471 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
7472
9e7adcb3
JH
7473 if (second_test)
7474 *second_test = NULL_RTX;
7475 if (bypass_test)
7476 *bypass_test = NULL_RTX;
7477
c0c102a9
JH
7478 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7479
9e7adcb3
JH
7480 /* Do fcomi/sahf based test when profitable. */
7481 if ((bypass_code == NIL || bypass_test)
7482 && (second_code == NIL || second_test)
7483 && ix86_fp_comparison_arithmetics_cost (code) > cost)
32b5b1aa 7484 {
c0c102a9
JH
7485 if (TARGET_CMOVE)
7486 {
7487 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7488 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
7489 tmp);
7490 emit_insn (tmp);
7491 }
7492 else
7493 {
7494 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7495 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
bf71a4f8
JH
7496 if (!scratch)
7497 scratch = gen_reg_rtx (HImode);
c0c102a9
JH
7498 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
7499 emit_insn (gen_x86_sahf_1 (scratch));
7500 }
e075ae69
RH
7501
7502 /* The FP codes work out to act like unsigned. */
9a915772 7503 intcmp_mode = fpcmp_mode;
9e7adcb3
JH
7504 code = first_code;
7505 if (bypass_code != NIL)
7506 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
7507 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7508 const0_rtx);
7509 if (second_code != NIL)
7510 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
7511 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7512 const0_rtx);
e075ae69
RH
7513 }
7514 else
7515 {
7516 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
e075ae69
RH
7517 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7518 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
bf71a4f8
JH
7519 if (!scratch)
7520 scratch = gen_reg_rtx (HImode);
3a3677ff 7521 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
e075ae69 7522
9a915772
JH
7523 /* In the unordered case, we have to check C2 for NaN's, which
7524 doesn't happen to work out to anything nice combination-wise.
7525 So do some bit twiddling on the value we've got in AH to come
7526 up with an appropriate set of condition codes. */
e075ae69 7527
9a915772
JH
7528 intcmp_mode = CCNOmode;
7529 switch (code)
32b5b1aa 7530 {
9a915772
JH
7531 case GT:
7532 case UNGT:
7533 if (code == GT || !TARGET_IEEE_FP)
32b5b1aa 7534 {
3a3677ff 7535 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
e075ae69 7536 code = EQ;
9a915772
JH
7537 }
7538 else
7539 {
7540 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7541 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
7542 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
7543 intcmp_mode = CCmode;
7544 code = GEU;
7545 }
7546 break;
7547 case LT:
7548 case UNLT:
7549 if (code == LT && TARGET_IEEE_FP)
7550 {
3a3677ff
RH
7551 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7552 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
e075ae69
RH
7553 intcmp_mode = CCmode;
7554 code = EQ;
9a915772
JH
7555 }
7556 else
7557 {
7558 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
7559 code = NE;
7560 }
7561 break;
7562 case GE:
7563 case UNGE:
7564 if (code == GE || !TARGET_IEEE_FP)
7565 {
3a3677ff 7566 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
e075ae69 7567 code = EQ;
9a915772
JH
7568 }
7569 else
7570 {
7571 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7572 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
7573 GEN_INT (0x01)));
7574 code = NE;
7575 }
7576 break;
7577 case LE:
7578 case UNLE:
7579 if (code == LE && TARGET_IEEE_FP)
7580 {
3a3677ff
RH
7581 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7582 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
7583 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
7584 intcmp_mode = CCmode;
7585 code = LTU;
9a915772
JH
7586 }
7587 else
7588 {
7589 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
7590 code = NE;
7591 }
7592 break;
7593 case EQ:
7594 case UNEQ:
7595 if (code == EQ && TARGET_IEEE_FP)
7596 {
3a3677ff
RH
7597 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7598 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
7599 intcmp_mode = CCmode;
7600 code = EQ;
9a915772
JH
7601 }
7602 else
7603 {
3a3677ff
RH
7604 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
7605 code = NE;
7606 break;
9a915772
JH
7607 }
7608 break;
7609 case NE:
7610 case LTGT:
7611 if (code == NE && TARGET_IEEE_FP)
7612 {
3a3677ff 7613 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9a915772
JH
7614 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
7615 GEN_INT (0x40)));
3a3677ff 7616 code = NE;
9a915772
JH
7617 }
7618 else
7619 {
3a3677ff
RH
7620 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
7621 code = EQ;
32b5b1aa 7622 }
9a915772
JH
7623 break;
7624
7625 case UNORDERED:
7626 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
7627 code = NE;
7628 break;
7629 case ORDERED:
7630 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
7631 code = EQ;
7632 break;
7633
7634 default:
7635 abort ();
32b5b1aa 7636 }
32b5b1aa 7637 }
e075ae69
RH
7638
7639 /* Return the test that should be put into the flags user, i.e.
7640 the bcc, scc, or cmov instruction. */
7641 return gen_rtx_fmt_ee (code, VOIDmode,
7642 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7643 const0_rtx);
7644}
7645
9e3e266c 7646rtx
a1b8572c 7647ix86_expand_compare (code, second_test, bypass_test)
e075ae69 7648 enum rtx_code code;
a1b8572c 7649 rtx *second_test, *bypass_test;
e075ae69
RH
7650{
7651 rtx op0, op1, ret;
7652 op0 = ix86_compare_op0;
7653 op1 = ix86_compare_op1;
7654
a1b8572c
JH
7655 if (second_test)
7656 *second_test = NULL_RTX;
7657 if (bypass_test)
7658 *bypass_test = NULL_RTX;
7659
e075ae69 7660 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
bf71a4f8 7661 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
77ebd435 7662 second_test, bypass_test);
32b5b1aa 7663 else
e075ae69
RH
7664 ret = ix86_expand_int_compare (code, op0, op1);
7665
7666 return ret;
7667}
7668
03598dea
JH
7669/* Return true if the CODE will result in nontrivial jump sequence. */
7670bool
7671ix86_fp_jump_nontrivial_p (code)
7672 enum rtx_code code;
7673{
7674 enum rtx_code bypass_code, first_code, second_code;
7675 if (!TARGET_CMOVE)
7676 return true;
7677 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7678 return bypass_code != NIL || second_code != NIL;
7679}
7680
e075ae69 7681void
3a3677ff 7682ix86_expand_branch (code, label)
e075ae69 7683 enum rtx_code code;
e075ae69
RH
7684 rtx label;
7685{
3a3677ff 7686 rtx tmp;
e075ae69 7687
3a3677ff 7688 switch (GET_MODE (ix86_compare_op0))
32b5b1aa 7689 {
3a3677ff
RH
7690 case QImode:
7691 case HImode:
7692 case SImode:
0d7d98ee 7693 simple:
a1b8572c 7694 tmp = ix86_expand_compare (code, NULL, NULL);
e075ae69
RH
7695 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7696 gen_rtx_LABEL_REF (VOIDmode, label),
7697 pc_rtx);
7698 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32b5b1aa 7699 return;
e075ae69 7700
3a3677ff
RH
7701 case SFmode:
7702 case DFmode:
0f290768 7703 case XFmode:
2b589241 7704 case TFmode:
3a3677ff
RH
7705 {
7706 rtvec vec;
7707 int use_fcomi;
03598dea 7708 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
7709
7710 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
7711 &ix86_compare_op1);
03598dea
JH
7712
7713 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7714
7715 /* Check whether we will use the natural sequence with one jump. If
7716 so, we can expand jump early. Otherwise delay expansion by
7717 creating compound insn to not confuse optimizers. */
7718 if (bypass_code == NIL && second_code == NIL
7719 && TARGET_CMOVE)
7720 {
7721 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
7722 gen_rtx_LABEL_REF (VOIDmode, label),
7723 pc_rtx, NULL_RTX);
7724 }
7725 else
7726 {
7727 tmp = gen_rtx_fmt_ee (code, VOIDmode,
7728 ix86_compare_op0, ix86_compare_op1);
7729 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7730 gen_rtx_LABEL_REF (VOIDmode, label),
7731 pc_rtx);
7732 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
7733
7734 use_fcomi = ix86_use_fcomi_compare (code);
7735 vec = rtvec_alloc (3 + !use_fcomi);
7736 RTVEC_ELT (vec, 0) = tmp;
7737 RTVEC_ELT (vec, 1)
7738 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
7739 RTVEC_ELT (vec, 2)
7740 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
7741 if (! use_fcomi)
7742 RTVEC_ELT (vec, 3)
7743 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
7744
7745 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
7746 }
3a3677ff
RH
7747 return;
7748 }
32b5b1aa 7749
3a3677ff 7750 case DImode:
0d7d98ee
JH
7751 if (TARGET_64BIT)
7752 goto simple;
3a3677ff
RH
7753 /* Expand DImode branch into multiple compare+branch. */
7754 {
7755 rtx lo[2], hi[2], label2;
7756 enum rtx_code code1, code2, code3;
32b5b1aa 7757
3a3677ff
RH
7758 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
7759 {
7760 tmp = ix86_compare_op0;
7761 ix86_compare_op0 = ix86_compare_op1;
7762 ix86_compare_op1 = tmp;
7763 code = swap_condition (code);
7764 }
7765 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
7766 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
32b5b1aa 7767
3a3677ff
RH
7768 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
7769 avoid two branches. This costs one extra insn, so disable when
7770 optimizing for size. */
32b5b1aa 7771
3a3677ff
RH
7772 if ((code == EQ || code == NE)
7773 && (!optimize_size
7774 || hi[1] == const0_rtx || lo[1] == const0_rtx))
7775 {
7776 rtx xor0, xor1;
32b5b1aa 7777
3a3677ff
RH
7778 xor1 = hi[0];
7779 if (hi[1] != const0_rtx)
7780 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
7781 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 7782
3a3677ff
RH
7783 xor0 = lo[0];
7784 if (lo[1] != const0_rtx)
7785 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
7786 NULL_RTX, 0, OPTAB_WIDEN);
e075ae69 7787
3a3677ff
RH
7788 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
7789 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 7790
3a3677ff
RH
7791 ix86_compare_op0 = tmp;
7792 ix86_compare_op1 = const0_rtx;
7793 ix86_expand_branch (code, label);
7794 return;
7795 }
e075ae69 7796
1f9124e4
JJ
7797 /* Otherwise, if we are doing less-than or greater-or-equal-than,
7798 op1 is a constant and the low word is zero, then we can just
7799 examine the high word. */
32b5b1aa 7800
1f9124e4
JJ
7801 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
7802 switch (code)
7803 {
7804 case LT: case LTU: case GE: case GEU:
7805 ix86_compare_op0 = hi[0];
7806 ix86_compare_op1 = hi[1];
7807 ix86_expand_branch (code, label);
7808 return;
7809 default:
7810 break;
7811 }
e075ae69 7812
3a3677ff 7813 /* Otherwise, we need two or three jumps. */
e075ae69 7814
3a3677ff 7815 label2 = gen_label_rtx ();
e075ae69 7816
3a3677ff
RH
7817 code1 = code;
7818 code2 = swap_condition (code);
7819 code3 = unsigned_condition (code);
e075ae69 7820
3a3677ff
RH
7821 switch (code)
7822 {
7823 case LT: case GT: case LTU: case GTU:
7824 break;
e075ae69 7825
3a3677ff
RH
7826 case LE: code1 = LT; code2 = GT; break;
7827 case GE: code1 = GT; code2 = LT; break;
7828 case LEU: code1 = LTU; code2 = GTU; break;
7829 case GEU: code1 = GTU; code2 = LTU; break;
e075ae69 7830
3a3677ff
RH
7831 case EQ: code1 = NIL; code2 = NE; break;
7832 case NE: code2 = NIL; break;
e075ae69 7833
3a3677ff
RH
7834 default:
7835 abort ();
7836 }
e075ae69 7837
3a3677ff
RH
7838 /*
7839 * a < b =>
7840 * if (hi(a) < hi(b)) goto true;
7841 * if (hi(a) > hi(b)) goto false;
7842 * if (lo(a) < lo(b)) goto true;
7843 * false:
7844 */
7845
7846 ix86_compare_op0 = hi[0];
7847 ix86_compare_op1 = hi[1];
7848
7849 if (code1 != NIL)
7850 ix86_expand_branch (code1, label);
7851 if (code2 != NIL)
7852 ix86_expand_branch (code2, label2);
7853
7854 ix86_compare_op0 = lo[0];
7855 ix86_compare_op1 = lo[1];
7856 ix86_expand_branch (code3, label);
7857
7858 if (code2 != NIL)
7859 emit_label (label2);
7860 return;
7861 }
e075ae69 7862
3a3677ff
RH
7863 default:
7864 abort ();
7865 }
32b5b1aa 7866}
e075ae69 7867
9e7adcb3
JH
7868/* Split branch based on floating point condition. */
7869void
03598dea
JH
7870ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
7871 enum rtx_code code;
7872 rtx op1, op2, target1, target2, tmp;
9e7adcb3
JH
7873{
7874 rtx second, bypass;
7875 rtx label = NULL_RTX;
03598dea 7876 rtx condition;
6b24c259
JH
7877 int bypass_probability = -1, second_probability = -1, probability = -1;
7878 rtx i;
9e7adcb3
JH
7879
7880 if (target2 != pc_rtx)
7881 {
7882 rtx tmp = target2;
7883 code = reverse_condition_maybe_unordered (code);
7884 target2 = target1;
7885 target1 = tmp;
7886 }
7887
7888 condition = ix86_expand_fp_compare (code, op1, op2,
7889 tmp, &second, &bypass);
6b24c259
JH
7890
7891 if (split_branch_probability >= 0)
7892 {
7893 /* Distribute the probabilities across the jumps.
7894 Assume the BYPASS and SECOND to be always test
7895 for UNORDERED. */
7896 probability = split_branch_probability;
7897
d6a7951f 7898 /* Value of 1 is low enough to make no need for probability
6b24c259
JH
7899 to be updated. Later we may run some experiments and see
7900 if unordered values are more frequent in practice. */
7901 if (bypass)
7902 bypass_probability = 1;
7903 if (second)
7904 second_probability = 1;
7905 }
9e7adcb3
JH
7906 if (bypass != NULL_RTX)
7907 {
7908 label = gen_label_rtx ();
6b24c259
JH
7909 i = emit_jump_insn (gen_rtx_SET
7910 (VOIDmode, pc_rtx,
7911 gen_rtx_IF_THEN_ELSE (VOIDmode,
7912 bypass,
7913 gen_rtx_LABEL_REF (VOIDmode,
7914 label),
7915 pc_rtx)));
7916 if (bypass_probability >= 0)
7917 REG_NOTES (i)
7918 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7919 GEN_INT (bypass_probability),
7920 REG_NOTES (i));
7921 }
7922 i = emit_jump_insn (gen_rtx_SET
9e7adcb3
JH
7923 (VOIDmode, pc_rtx,
7924 gen_rtx_IF_THEN_ELSE (VOIDmode,
6b24c259
JH
7925 condition, target1, target2)));
7926 if (probability >= 0)
7927 REG_NOTES (i)
7928 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7929 GEN_INT (probability),
7930 REG_NOTES (i));
7931 if (second != NULL_RTX)
9e7adcb3 7932 {
6b24c259
JH
7933 i = emit_jump_insn (gen_rtx_SET
7934 (VOIDmode, pc_rtx,
7935 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
7936 target2)));
7937 if (second_probability >= 0)
7938 REG_NOTES (i)
7939 = gen_rtx_EXPR_LIST (REG_BR_PROB,
7940 GEN_INT (second_probability),
7941 REG_NOTES (i));
9e7adcb3 7942 }
9e7adcb3
JH
7943 if (label != NULL_RTX)
7944 emit_label (label);
7945}
7946
32b5b1aa 7947int
3a3677ff 7948ix86_expand_setcc (code, dest)
e075ae69 7949 enum rtx_code code;
e075ae69 7950 rtx dest;
32b5b1aa 7951{
a1b8572c
JH
7952 rtx ret, tmp, tmpreg;
7953 rtx second_test, bypass_test;
e075ae69 7954
885a70fd
JH
7955 if (GET_MODE (ix86_compare_op0) == DImode
7956 && !TARGET_64BIT)
e075ae69
RH
7957 return 0; /* FAIL */
7958
b932f770
JH
7959 if (GET_MODE (dest) != QImode)
7960 abort ();
e075ae69 7961
a1b8572c 7962 ret = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
7963 PUT_MODE (ret, QImode);
7964
7965 tmp = dest;
a1b8572c 7966 tmpreg = dest;
32b5b1aa 7967
e075ae69 7968 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
a1b8572c
JH
7969 if (bypass_test || second_test)
7970 {
7971 rtx test = second_test;
7972 int bypass = 0;
7973 rtx tmp2 = gen_reg_rtx (QImode);
7974 if (bypass_test)
7975 {
7976 if (second_test)
b531087a 7977 abort ();
a1b8572c
JH
7978 test = bypass_test;
7979 bypass = 1;
7980 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
7981 }
7982 PUT_MODE (test, QImode);
7983 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
7984
7985 if (bypass)
7986 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
7987 else
7988 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
7989 }
e075ae69 7990
e075ae69 7991 return 1; /* DONE */
32b5b1aa 7992}
e075ae69 7993
32b5b1aa 7994int
e075ae69
RH
7995ix86_expand_int_movcc (operands)
7996 rtx operands[];
32b5b1aa 7997{
e075ae69
RH
7998 enum rtx_code code = GET_CODE (operands[1]), compare_code;
7999 rtx compare_seq, compare_op;
a1b8572c 8000 rtx second_test, bypass_test;
635559ab 8001 enum machine_mode mode = GET_MODE (operands[0]);
32b5b1aa 8002
36583fea
JH
8003 /* When the compare code is not LTU or GEU, we can not use sbbl case.
8004 In case comparsion is done with immediate, we can convert it to LTU or
8005 GEU by altering the integer. */
8006
8007 if ((code == LEU || code == GTU)
8008 && GET_CODE (ix86_compare_op1) == CONST_INT
635559ab 8009 && mode != HImode
b531087a 8010 && (unsigned int) INTVAL (ix86_compare_op1) != 0xffffffff
74411039
JH
8011 /* The operand still must be representable as sign extended value. */
8012 && (!TARGET_64BIT
8013 || GET_MODE (ix86_compare_op0) != DImode
8014 || (unsigned int) INTVAL (ix86_compare_op1) != 0x7fffffff)
0f290768 8015 && GET_CODE (operands[2]) == CONST_INT
36583fea
JH
8016 && GET_CODE (operands[3]) == CONST_INT)
8017 {
8018 if (code == LEU)
8019 code = LTU;
8020 else
8021 code = GEU;
ce8076ad
JJ
8022 ix86_compare_op1
8023 = gen_int_mode (INTVAL (ix86_compare_op1) + 1,
8024 GET_MODE (ix86_compare_op0));
36583fea 8025 }
3a3677ff 8026
e075ae69 8027 start_sequence ();
a1b8572c 8028 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
8029 compare_seq = gen_sequence ();
8030 end_sequence ();
8031
8032 compare_code = GET_CODE (compare_op);
8033
8034 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
8035 HImode insns, we'd be swallowed in word prefix ops. */
8036
635559ab
JH
8037 if (mode != HImode
8038 && (mode != DImode || TARGET_64BIT)
0f290768 8039 && GET_CODE (operands[2]) == CONST_INT
e075ae69
RH
8040 && GET_CODE (operands[3]) == CONST_INT)
8041 {
8042 rtx out = operands[0];
8043 HOST_WIDE_INT ct = INTVAL (operands[2]);
8044 HOST_WIDE_INT cf = INTVAL (operands[3]);
8045 HOST_WIDE_INT diff;
8046
a1b8572c
JH
8047 if ((compare_code == LTU || compare_code == GEU)
8048 && !second_test && !bypass_test)
e075ae69 8049 {
e075ae69
RH
8050
8051 /* Detect overlap between destination and compare sources. */
8052 rtx tmp = out;
8053
0f290768 8054 /* To simplify rest of code, restrict to the GEU case. */
36583fea
JH
8055 if (compare_code == LTU)
8056 {
8057 int tmp = ct;
8058 ct = cf;
8059 cf = tmp;
8060 compare_code = reverse_condition (compare_code);
8061 code = reverse_condition (code);
8062 }
8063 diff = ct - cf;
8064
e075ae69 8065 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
a500c31b 8066 || reg_overlap_mentioned_p (out, ix86_compare_op1))
635559ab 8067 tmp = gen_reg_rtx (mode);
e075ae69
RH
8068
8069 emit_insn (compare_seq);
635559ab 8070 if (mode == DImode)
14f73b5a
JH
8071 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
8072 else
8073 emit_insn (gen_x86_movsicc_0_m1 (tmp));
e075ae69 8074
36583fea
JH
8075 if (diff == 1)
8076 {
8077 /*
8078 * cmpl op0,op1
8079 * sbbl dest,dest
8080 * [addl dest, ct]
8081 *
8082 * Size 5 - 8.
8083 */
8084 if (ct)
635559ab
JH
8085 tmp = expand_simple_binop (mode, PLUS,
8086 tmp, GEN_INT (ct),
8087 tmp, 1, OPTAB_DIRECT);
36583fea
JH
8088 }
8089 else if (cf == -1)
8090 {
8091 /*
8092 * cmpl op0,op1
8093 * sbbl dest,dest
8094 * orl $ct, dest
8095 *
8096 * Size 8.
8097 */
635559ab
JH
8098 tmp = expand_simple_binop (mode, IOR,
8099 tmp, GEN_INT (ct),
8100 tmp, 1, OPTAB_DIRECT);
36583fea
JH
8101 }
8102 else if (diff == -1 && ct)
8103 {
8104 /*
8105 * cmpl op0,op1
8106 * sbbl dest,dest
8107 * xorl $-1, dest
8108 * [addl dest, cf]
8109 *
8110 * Size 8 - 11.
8111 */
635559ab
JH
8112 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8113 if (cf)
8114 tmp = expand_simple_binop (mode, PLUS,
8115 tmp, GEN_INT (cf),
8116 tmp, 1, OPTAB_DIRECT);
36583fea
JH
8117 }
8118 else
8119 {
8120 /*
8121 * cmpl op0,op1
8122 * sbbl dest,dest
8123 * andl cf - ct, dest
8124 * [addl dest, ct]
8125 *
8126 * Size 8 - 11.
8127 */
635559ab
JH
8128 tmp = expand_simple_binop (mode, AND,
8129 tmp,
d8bf17f9 8130 gen_int_mode (cf - ct, mode),
635559ab
JH
8131 tmp, 1, OPTAB_DIRECT);
8132 if (ct)
8133 tmp = expand_simple_binop (mode, PLUS,
8134 tmp, GEN_INT (ct),
8135 tmp, 1, OPTAB_DIRECT);
36583fea 8136 }
e075ae69
RH
8137
8138 if (tmp != out)
8139 emit_move_insn (out, tmp);
8140
8141 return 1; /* DONE */
8142 }
8143
8144 diff = ct - cf;
8145 if (diff < 0)
8146 {
8147 HOST_WIDE_INT tmp;
8148 tmp = ct, ct = cf, cf = tmp;
8149 diff = -diff;
734dba19
JH
8150 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8151 {
8152 /* We may be reversing unordered compare to normal compare, that
8153 is not valid in general (we may convert non-trapping condition
8154 to trapping one), however on i386 we currently emit all
8155 comparisons unordered. */
8156 compare_code = reverse_condition_maybe_unordered (compare_code);
8157 code = reverse_condition_maybe_unordered (code);
8158 }
8159 else
8160 {
8161 compare_code = reverse_condition (compare_code);
8162 code = reverse_condition (code);
8163 }
e075ae69 8164 }
0f2a3457
JJ
8165
8166 compare_code = NIL;
8167 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
8168 && GET_CODE (ix86_compare_op1) == CONST_INT)
8169 {
8170 if (ix86_compare_op1 == const0_rtx
8171 && (code == LT || code == GE))
8172 compare_code = code;
8173 else if (ix86_compare_op1 == constm1_rtx)
8174 {
8175 if (code == LE)
8176 compare_code = LT;
8177 else if (code == GT)
8178 compare_code = GE;
8179 }
8180 }
8181
8182 /* Optimize dest = (op0 < 0) ? -1 : cf. */
8183 if (compare_code != NIL
8184 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
8185 && (cf == -1 || ct == -1))
8186 {
8187 /* If lea code below could be used, only optimize
8188 if it results in a 2 insn sequence. */
8189
8190 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
8191 || diff == 3 || diff == 5 || diff == 9)
8192 || (compare_code == LT && ct == -1)
8193 || (compare_code == GE && cf == -1))
8194 {
8195 /*
8196 * notl op1 (if necessary)
8197 * sarl $31, op1
8198 * orl cf, op1
8199 */
8200 if (ct != -1)
8201 {
8202 cf = ct;
8203 ct = -1;
8204 code = reverse_condition (code);
8205 }
8206
8207 out = emit_store_flag (out, code, ix86_compare_op0,
8208 ix86_compare_op1, VOIDmode, 0, -1);
8209
8210 out = expand_simple_binop (mode, IOR,
8211 out, GEN_INT (cf),
8212 out, 1, OPTAB_DIRECT);
8213 if (out != operands[0])
8214 emit_move_insn (operands[0], out);
8215
8216 return 1; /* DONE */
8217 }
8218 }
8219
635559ab
JH
8220 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
8221 || diff == 3 || diff == 5 || diff == 9)
8222 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
e075ae69
RH
8223 {
8224 /*
8225 * xorl dest,dest
8226 * cmpl op1,op2
8227 * setcc dest
8228 * lea cf(dest*(ct-cf)),dest
8229 *
8230 * Size 14.
8231 *
8232 * This also catches the degenerate setcc-only case.
8233 */
8234
8235 rtx tmp;
8236 int nops;
8237
8238 out = emit_store_flag (out, code, ix86_compare_op0,
8239 ix86_compare_op1, VOIDmode, 0, 1);
8240
8241 nops = 0;
885a70fd
JH
8242 /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics
8243 done in proper mode to match. */
e075ae69 8244 if (diff == 1)
14f73b5a 8245 tmp = out;
e075ae69
RH
8246 else
8247 {
885a70fd 8248 rtx out1;
14f73b5a 8249 out1 = out;
635559ab 8250 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
e075ae69
RH
8251 nops++;
8252 if (diff & 1)
8253 {
635559ab 8254 tmp = gen_rtx_PLUS (mode, tmp, out1);
e075ae69
RH
8255 nops++;
8256 }
8257 }
8258 if (cf != 0)
8259 {
635559ab 8260 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
e075ae69
RH
8261 nops++;
8262 }
885a70fd
JH
8263 if (tmp != out
8264 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
e075ae69 8265 {
14f73b5a 8266 if (nops == 1)
e075ae69
RH
8267 {
8268 rtx clob;
8269
8270 clob = gen_rtx_REG (CCmode, FLAGS_REG);
8271 clob = gen_rtx_CLOBBER (VOIDmode, clob);
8272
8273 tmp = gen_rtx_SET (VOIDmode, out, tmp);
8274 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8275 emit_insn (tmp);
8276 }
8277 else
8278 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
8279 }
8280 if (out != operands[0])
8281 emit_move_insn (operands[0], out);
8282
8283 return 1; /* DONE */
8284 }
8285
8286 /*
8287 * General case: Jumpful:
8288 * xorl dest,dest cmpl op1, op2
8289 * cmpl op1, op2 movl ct, dest
8290 * setcc dest jcc 1f
8291 * decl dest movl cf, dest
8292 * andl (cf-ct),dest 1:
8293 * addl ct,dest
0f290768 8294 *
e075ae69
RH
8295 * Size 20. Size 14.
8296 *
8297 * This is reasonably steep, but branch mispredict costs are
8298 * high on modern cpus, so consider failing only if optimizing
8299 * for space.
8300 *
8301 * %%% Parameterize branch_cost on the tuning architecture, then
8302 * use that. The 80386 couldn't care less about mispredicts.
8303 */
8304
8305 if (!optimize_size && !TARGET_CMOVE)
8306 {
8307 if (ct == 0)
8308 {
8309 ct = cf;
8310 cf = 0;
734dba19 8311 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
0f2a3457
JJ
8312 /* We may be reversing unordered compare to normal compare,
8313 that is not valid in general (we may convert non-trapping
8314 condition to trapping one), however on i386 we currently
8315 emit all comparisons unordered. */
8316 code = reverse_condition_maybe_unordered (code);
8317 else
8318 {
8319 code = reverse_condition (code);
8320 if (compare_code != NIL)
8321 compare_code = reverse_condition (compare_code);
8322 }
8323 }
8324
8325 if (compare_code != NIL)
8326 {
8327 /* notl op1 (if needed)
8328 sarl $31, op1
8329 andl (cf-ct), op1
8330 addl ct, op1
8331
8332 For x < 0 (resp. x <= -1) there will be no notl,
8333 so if possible swap the constants to get rid of the
8334 complement.
8335 True/false will be -1/0 while code below (store flag
8336 followed by decrement) is 0/-1, so the constants need
8337 to be exchanged once more. */
8338
8339 if (compare_code == GE || !cf)
734dba19 8340 {
0f2a3457
JJ
8341 code = reverse_condition (code);
8342 compare_code = LT;
734dba19
JH
8343 }
8344 else
8345 {
0f2a3457
JJ
8346 HOST_WIDE_INT tmp = cf;
8347 cf = ct;
8348 ct = tmp;
734dba19 8349 }
0f2a3457
JJ
8350
8351 out = emit_store_flag (out, code, ix86_compare_op0,
8352 ix86_compare_op1, VOIDmode, 0, -1);
e075ae69 8353 }
0f2a3457
JJ
8354 else
8355 {
8356 out = emit_store_flag (out, code, ix86_compare_op0,
8357 ix86_compare_op1, VOIDmode, 0, 1);
e075ae69 8358
0f2a3457
JJ
8359 out = expand_simple_binop (mode, PLUS,
8360 out, constm1_rtx,
8361 out, 1, OPTAB_DIRECT);
8362 }
e075ae69 8363
635559ab
JH
8364 out = expand_simple_binop (mode, AND,
8365 out,
d8bf17f9 8366 gen_int_mode (cf - ct, mode),
635559ab
JH
8367 out, 1, OPTAB_DIRECT);
8368 out = expand_simple_binop (mode, PLUS,
8369 out, GEN_INT (ct),
8370 out, 1, OPTAB_DIRECT);
e075ae69
RH
8371 if (out != operands[0])
8372 emit_move_insn (operands[0], out);
8373
8374 return 1; /* DONE */
8375 }
8376 }
8377
8378 if (!TARGET_CMOVE)
8379 {
8380 /* Try a few things more with specific constants and a variable. */
8381
78a0d70c 8382 optab op;
e075ae69
RH
8383 rtx var, orig_out, out, tmp;
8384
8385 if (optimize_size)
8386 return 0; /* FAIL */
8387
0f290768 8388 /* If one of the two operands is an interesting constant, load a
e075ae69 8389 constant with the above and mask it in with a logical operation. */
0f290768 8390
e075ae69
RH
8391 if (GET_CODE (operands[2]) == CONST_INT)
8392 {
8393 var = operands[3];
8394 if (INTVAL (operands[2]) == 0)
8395 operands[3] = constm1_rtx, op = and_optab;
8396 else if (INTVAL (operands[2]) == -1)
8397 operands[3] = const0_rtx, op = ior_optab;
78a0d70c
ZW
8398 else
8399 return 0; /* FAIL */
e075ae69
RH
8400 }
8401 else if (GET_CODE (operands[3]) == CONST_INT)
8402 {
8403 var = operands[2];
8404 if (INTVAL (operands[3]) == 0)
8405 operands[2] = constm1_rtx, op = and_optab;
8406 else if (INTVAL (operands[3]) == -1)
8407 operands[2] = const0_rtx, op = ior_optab;
78a0d70c
ZW
8408 else
8409 return 0; /* FAIL */
e075ae69 8410 }
78a0d70c 8411 else
e075ae69
RH
8412 return 0; /* FAIL */
8413
8414 orig_out = operands[0];
635559ab 8415 tmp = gen_reg_rtx (mode);
e075ae69
RH
8416 operands[0] = tmp;
8417
8418 /* Recurse to get the constant loaded. */
8419 if (ix86_expand_int_movcc (operands) == 0)
8420 return 0; /* FAIL */
8421
8422 /* Mask in the interesting variable. */
635559ab 8423 out = expand_binop (mode, op, var, tmp, orig_out, 0,
e075ae69
RH
8424 OPTAB_WIDEN);
8425 if (out != orig_out)
8426 emit_move_insn (orig_out, out);
8427
8428 return 1; /* DONE */
8429 }
8430
8431 /*
8432 * For comparison with above,
8433 *
8434 * movl cf,dest
8435 * movl ct,tmp
8436 * cmpl op1,op2
8437 * cmovcc tmp,dest
8438 *
8439 * Size 15.
8440 */
8441
635559ab
JH
8442 if (! nonimmediate_operand (operands[2], mode))
8443 operands[2] = force_reg (mode, operands[2]);
8444 if (! nonimmediate_operand (operands[3], mode))
8445 operands[3] = force_reg (mode, operands[3]);
e075ae69 8446
a1b8572c
JH
8447 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
8448 {
635559ab 8449 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
8450 emit_move_insn (tmp, operands[3]);
8451 operands[3] = tmp;
8452 }
8453 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
8454 {
635559ab 8455 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
8456 emit_move_insn (tmp, operands[2]);
8457 operands[2] = tmp;
8458 }
c9682caf
JH
8459 if (! register_operand (operands[2], VOIDmode)
8460 && ! register_operand (operands[3], VOIDmode))
635559ab 8461 operands[2] = force_reg (mode, operands[2]);
a1b8572c 8462
e075ae69
RH
8463 emit_insn (compare_seq);
8464 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 8465 gen_rtx_IF_THEN_ELSE (mode,
e075ae69
RH
8466 compare_op, operands[2],
8467 operands[3])));
a1b8572c
JH
8468 if (bypass_test)
8469 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 8470 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c
JH
8471 bypass_test,
8472 operands[3],
8473 operands[0])));
8474 if (second_test)
8475 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 8476 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c
JH
8477 second_test,
8478 operands[2],
8479 operands[0])));
e075ae69
RH
8480
8481 return 1; /* DONE */
e9a25f70 8482}
e075ae69 8483
32b5b1aa 8484int
e075ae69
RH
8485ix86_expand_fp_movcc (operands)
8486 rtx operands[];
32b5b1aa 8487{
e075ae69 8488 enum rtx_code code;
e075ae69 8489 rtx tmp;
a1b8572c 8490 rtx compare_op, second_test, bypass_test;
32b5b1aa 8491
0073023d
JH
8492 /* For SF/DFmode conditional moves based on comparisons
8493 in same mode, we may want to use SSE min/max instructions. */
965f5423
JH
8494 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
8495 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
0073023d 8496 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
fa9f36a1
JH
8497 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
8498 && (!TARGET_IEEE_FP
8499 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
0073023d
JH
8500 /* We may be called from the post-reload splitter. */
8501 && (!REG_P (operands[0])
8502 || SSE_REG_P (operands[0])
52a661a6 8503 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
0073023d
JH
8504 {
8505 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
8506 code = GET_CODE (operands[1]);
8507
8508 /* See if we have (cross) match between comparison operands and
8509 conditional move operands. */
8510 if (rtx_equal_p (operands[2], op1))
8511 {
8512 rtx tmp = op0;
8513 op0 = op1;
8514 op1 = tmp;
8515 code = reverse_condition_maybe_unordered (code);
8516 }
8517 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
8518 {
8519 /* Check for min operation. */
8520 if (code == LT)
8521 {
8522 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
8523 if (memory_operand (op0, VOIDmode))
8524 op0 = force_reg (GET_MODE (operands[0]), op0);
8525 if (GET_MODE (operands[0]) == SFmode)
8526 emit_insn (gen_minsf3 (operands[0], op0, op1));
8527 else
8528 emit_insn (gen_mindf3 (operands[0], op0, op1));
8529 return 1;
8530 }
8531 /* Check for max operation. */
8532 if (code == GT)
8533 {
8534 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
8535 if (memory_operand (op0, VOIDmode))
8536 op0 = force_reg (GET_MODE (operands[0]), op0);
8537 if (GET_MODE (operands[0]) == SFmode)
8538 emit_insn (gen_maxsf3 (operands[0], op0, op1));
8539 else
8540 emit_insn (gen_maxdf3 (operands[0], op0, op1));
8541 return 1;
8542 }
8543 }
8544 /* Manage condition to be sse_comparison_operator. In case we are
8545 in non-ieee mode, try to canonicalize the destination operand
8546 to be first in the comparison - this helps reload to avoid extra
8547 moves. */
8548 if (!sse_comparison_operator (operands[1], VOIDmode)
8549 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
8550 {
8551 rtx tmp = ix86_compare_op0;
8552 ix86_compare_op0 = ix86_compare_op1;
8553 ix86_compare_op1 = tmp;
8554 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
8555 VOIDmode, ix86_compare_op0,
8556 ix86_compare_op1);
8557 }
8558 /* Similary try to manage result to be first operand of conditional
fa9f36a1
JH
8559 move. We also don't support the NE comparison on SSE, so try to
8560 avoid it. */
037f20f1
JH
8561 if ((rtx_equal_p (operands[0], operands[3])
8562 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
8563 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
0073023d
JH
8564 {
8565 rtx tmp = operands[2];
8566 operands[2] = operands[3];
92d0fb09 8567 operands[3] = tmp;
0073023d
JH
8568 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
8569 (GET_CODE (operands[1])),
8570 VOIDmode, ix86_compare_op0,
8571 ix86_compare_op1);
8572 }
8573 if (GET_MODE (operands[0]) == SFmode)
8574 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
8575 operands[2], operands[3],
8576 ix86_compare_op0, ix86_compare_op1));
8577 else
8578 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
8579 operands[2], operands[3],
8580 ix86_compare_op0, ix86_compare_op1));
8581 return 1;
8582 }
8583
e075ae69 8584 /* The floating point conditional move instructions don't directly
0f290768 8585 support conditions resulting from a signed integer comparison. */
32b5b1aa 8586
e075ae69 8587 code = GET_CODE (operands[1]);
a1b8572c 8588 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9e7adcb3
JH
8589
8590 /* The floating point conditional move instructions don't directly
8591 support signed integer comparisons. */
8592
a1b8572c 8593 if (!fcmov_comparison_operator (compare_op, VOIDmode))
e075ae69 8594 {
a1b8572c 8595 if (second_test != NULL || bypass_test != NULL)
b531087a 8596 abort ();
e075ae69 8597 tmp = gen_reg_rtx (QImode);
3a3677ff 8598 ix86_expand_setcc (code, tmp);
e075ae69
RH
8599 code = NE;
8600 ix86_compare_op0 = tmp;
8601 ix86_compare_op1 = const0_rtx;
a1b8572c
JH
8602 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8603 }
8604 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
8605 {
8606 tmp = gen_reg_rtx (GET_MODE (operands[0]));
8607 emit_move_insn (tmp, operands[3]);
8608 operands[3] = tmp;
8609 }
8610 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
8611 {
8612 tmp = gen_reg_rtx (GET_MODE (operands[0]));
8613 emit_move_insn (tmp, operands[2]);
8614 operands[2] = tmp;
e075ae69 8615 }
e9a25f70 8616
e075ae69
RH
8617 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8618 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9e7adcb3 8619 compare_op,
e075ae69
RH
8620 operands[2],
8621 operands[3])));
a1b8572c
JH
8622 if (bypass_test)
8623 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8624 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8625 bypass_test,
8626 operands[3],
8627 operands[0])));
8628 if (second_test)
8629 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8630 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8631 second_test,
8632 operands[2],
8633 operands[0])));
32b5b1aa 8634
e075ae69 8635 return 1;
32b5b1aa
SC
8636}
8637
2450a057
JH
8638/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
8639 works for floating pointer parameters and nonoffsetable memories.
8640 For pushes, it returns just stack offsets; the values will be saved
8641 in the right order. Maximally three parts are generated. */
8642
2b589241 8643static int
2450a057
JH
8644ix86_split_to_parts (operand, parts, mode)
8645 rtx operand;
8646 rtx *parts;
8647 enum machine_mode mode;
32b5b1aa 8648{
26e5b205
JH
8649 int size;
8650
8651 if (!TARGET_64BIT)
8652 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
8653 else
8654 size = (GET_MODE_SIZE (mode) + 4) / 8;
2450a057 8655
a7180f70
BS
8656 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
8657 abort ();
2450a057
JH
8658 if (size < 2 || size > 3)
8659 abort ();
8660
d7a29404
JH
8661 /* Optimize constant pool reference to immediates. This is used by fp moves,
8662 that force all constants to memory to allow combining. */
8663
8664 if (GET_CODE (operand) == MEM
8665 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
8666 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
8667 operand = get_pool_constant (XEXP (operand, 0));
8668
2450a057 8669 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
e075ae69 8670 {
2450a057
JH
8671 /* The only non-offsetable memories we handle are pushes. */
8672 if (! push_operand (operand, VOIDmode))
8673 abort ();
8674
26e5b205
JH
8675 operand = copy_rtx (operand);
8676 PUT_MODE (operand, Pmode);
2450a057
JH
8677 parts[0] = parts[1] = parts[2] = operand;
8678 }
26e5b205 8679 else if (!TARGET_64BIT)
2450a057
JH
8680 {
8681 if (mode == DImode)
8682 split_di (&operand, 1, &parts[0], &parts[1]);
8683 else
e075ae69 8684 {
2450a057
JH
8685 if (REG_P (operand))
8686 {
8687 if (!reload_completed)
8688 abort ();
8689 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
8690 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
8691 if (size == 3)
8692 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
8693 }
8694 else if (offsettable_memref_p (operand))
8695 {
f4ef873c 8696 operand = adjust_address (operand, SImode, 0);
2450a057 8697 parts[0] = operand;
b72f00af 8698 parts[1] = adjust_address (operand, SImode, 4);
2450a057 8699 if (size == 3)
b72f00af 8700 parts[2] = adjust_address (operand, SImode, 8);
2450a057
JH
8701 }
8702 else if (GET_CODE (operand) == CONST_DOUBLE)
8703 {
8704 REAL_VALUE_TYPE r;
2b589241 8705 long l[4];
2450a057
JH
8706
8707 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
8708 switch (mode)
8709 {
8710 case XFmode:
2b589241 8711 case TFmode:
2450a057 8712 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
d8bf17f9 8713 parts[2] = gen_int_mode (l[2], SImode);
2450a057
JH
8714 break;
8715 case DFmode:
8716 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
8717 break;
8718 default:
8719 abort ();
8720 }
d8bf17f9
LB
8721 parts[1] = gen_int_mode (l[1], SImode);
8722 parts[0] = gen_int_mode (l[0], SImode);
2450a057
JH
8723 }
8724 else
8725 abort ();
e075ae69 8726 }
2450a057 8727 }
26e5b205
JH
8728 else
8729 {
44cf5b6a
JH
8730 if (mode == TImode)
8731 split_ti (&operand, 1, &parts[0], &parts[1]);
26e5b205
JH
8732 if (mode == XFmode || mode == TFmode)
8733 {
8734 if (REG_P (operand))
8735 {
8736 if (!reload_completed)
8737 abort ();
8738 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
8739 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
8740 }
8741 else if (offsettable_memref_p (operand))
8742 {
b72f00af 8743 operand = adjust_address (operand, DImode, 0);
26e5b205 8744 parts[0] = operand;
b72f00af 8745 parts[1] = adjust_address (operand, SImode, 8);
26e5b205
JH
8746 }
8747 else if (GET_CODE (operand) == CONST_DOUBLE)
8748 {
8749 REAL_VALUE_TYPE r;
8750 long l[3];
8751
8752 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
8753 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
8754 /* Do not use shift by 32 to avoid warning on 32bit systems. */
8755 if (HOST_BITS_PER_WIDE_INT >= 64)
523fbd9d 8756 parts[0]
d8bf17f9 8757 = gen_int_mode
44cf5b6a 8758 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
b531087a 8759 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
d8bf17f9 8760 DImode);
26e5b205
JH
8761 else
8762 parts[0] = immed_double_const (l[0], l[1], DImode);
d8bf17f9 8763 parts[1] = gen_int_mode (l[2], SImode);
26e5b205
JH
8764 }
8765 else
8766 abort ();
8767 }
8768 }
2450a057 8769
2b589241 8770 return size;
2450a057
JH
8771}
8772
8773/* Emit insns to perform a move or push of DI, DF, and XF values.
8774 Return false when normal moves are needed; true when all required
8775 insns have been emitted. Operands 2-4 contain the input values
8776 int the correct order; operands 5-7 contain the output values. */
8777
26e5b205
JH
8778void
8779ix86_split_long_move (operands)
8780 rtx operands[];
2450a057
JH
8781{
8782 rtx part[2][3];
26e5b205 8783 int nparts;
2450a057
JH
8784 int push = 0;
8785 int collisions = 0;
26e5b205
JH
8786 enum machine_mode mode = GET_MODE (operands[0]);
8787
8788 /* The DFmode expanders may ask us to move double.
8789 For 64bit target this is single move. By hiding the fact
8790 here we simplify i386.md splitters. */
8791 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
8792 {
8cdfa312
RH
8793 /* Optimize constant pool reference to immediates. This is used by
8794 fp moves, that force all constants to memory to allow combining. */
26e5b205
JH
8795
8796 if (GET_CODE (operands[1]) == MEM
8797 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
8798 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
8799 operands[1] = get_pool_constant (XEXP (operands[1], 0));
8800 if (push_operand (operands[0], VOIDmode))
b47b4f21
JH
8801 {
8802 operands[0] = copy_rtx (operands[0]);
8803 PUT_MODE (operands[0], Pmode);
8804 }
26e5b205
JH
8805 else
8806 operands[0] = gen_lowpart (DImode, operands[0]);
8807 operands[1] = gen_lowpart (DImode, operands[1]);
8808 emit_move_insn (operands[0], operands[1]);
8809 return;
8810 }
2450a057 8811
2450a057
JH
8812 /* The only non-offsettable memory we handle is push. */
8813 if (push_operand (operands[0], VOIDmode))
8814 push = 1;
8815 else if (GET_CODE (operands[0]) == MEM
8816 && ! offsettable_memref_p (operands[0]))
8817 abort ();
8818
26e5b205
JH
8819 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
8820 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
2450a057
JH
8821
8822 /* When emitting push, take care for source operands on the stack. */
8823 if (push && GET_CODE (operands[1]) == MEM
8824 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
8825 {
26e5b205 8826 if (nparts == 3)
886cbb88
JH
8827 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
8828 XEXP (part[1][2], 0));
8829 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
8830 XEXP (part[1][1], 0));
2450a057
JH
8831 }
8832
0f290768 8833 /* We need to do copy in the right order in case an address register
2450a057
JH
8834 of the source overlaps the destination. */
8835 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
8836 {
8837 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
8838 collisions++;
8839 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
8840 collisions++;
26e5b205 8841 if (nparts == 3
2450a057
JH
8842 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
8843 collisions++;
8844
8845 /* Collision in the middle part can be handled by reordering. */
26e5b205 8846 if (collisions == 1 && nparts == 3
2450a057 8847 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
e075ae69 8848 {
2450a057
JH
8849 rtx tmp;
8850 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
8851 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
8852 }
e075ae69 8853
2450a057
JH
8854 /* If there are more collisions, we can't handle it by reordering.
8855 Do an lea to the last part and use only one colliding move. */
8856 else if (collisions > 1)
8857 {
8858 collisions = 1;
26e5b205 8859 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
2450a057 8860 XEXP (part[1][0], 0)));
26e5b205
JH
8861 part[1][0] = change_address (part[1][0],
8862 TARGET_64BIT ? DImode : SImode,
8863 part[0][nparts - 1]);
b72f00af 8864 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
26e5b205 8865 if (nparts == 3)
b72f00af 8866 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
2450a057
JH
8867 }
8868 }
8869
8870 if (push)
8871 {
26e5b205 8872 if (!TARGET_64BIT)
2b589241 8873 {
26e5b205
JH
8874 if (nparts == 3)
8875 {
8876 /* We use only first 12 bytes of TFmode value, but for pushing we
8877 are required to adjust stack as if we were pushing real 16byte
8878 value. */
8879 if (mode == TFmode && !TARGET_64BIT)
8880 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
8881 GEN_INT (-4)));
8882 emit_move_insn (part[0][2], part[1][2]);
8883 }
2b589241 8884 }
26e5b205
JH
8885 else
8886 {
8887 /* In 64bit mode we don't have 32bit push available. In case this is
8888 register, it is OK - we will just use larger counterpart. We also
8889 retype memory - these comes from attempt to avoid REX prefix on
8890 moving of second half of TFmode value. */
8891 if (GET_MODE (part[1][1]) == SImode)
8892 {
8893 if (GET_CODE (part[1][1]) == MEM)
f4ef873c 8894 part[1][1] = adjust_address (part[1][1], DImode, 0);
26e5b205
JH
8895 else if (REG_P (part[1][1]))
8896 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
8897 else
b531087a 8898 abort ();
886cbb88
JH
8899 if (GET_MODE (part[1][0]) == SImode)
8900 part[1][0] = part[1][1];
26e5b205
JH
8901 }
8902 }
8903 emit_move_insn (part[0][1], part[1][1]);
8904 emit_move_insn (part[0][0], part[1][0]);
8905 return;
2450a057
JH
8906 }
8907
8908 /* Choose correct order to not overwrite the source before it is copied. */
8909 if ((REG_P (part[0][0])
8910 && REG_P (part[1][1])
8911 && (REGNO (part[0][0]) == REGNO (part[1][1])
26e5b205 8912 || (nparts == 3
2450a057
JH
8913 && REGNO (part[0][0]) == REGNO (part[1][2]))))
8914 || (collisions > 0
8915 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
8916 {
26e5b205 8917 if (nparts == 3)
2450a057 8918 {
26e5b205
JH
8919 operands[2] = part[0][2];
8920 operands[3] = part[0][1];
8921 operands[4] = part[0][0];
8922 operands[5] = part[1][2];
8923 operands[6] = part[1][1];
8924 operands[7] = part[1][0];
2450a057
JH
8925 }
8926 else
8927 {
26e5b205
JH
8928 operands[2] = part[0][1];
8929 operands[3] = part[0][0];
8930 operands[5] = part[1][1];
8931 operands[6] = part[1][0];
2450a057
JH
8932 }
8933 }
8934 else
8935 {
26e5b205 8936 if (nparts == 3)
2450a057 8937 {
26e5b205
JH
8938 operands[2] = part[0][0];
8939 operands[3] = part[0][1];
8940 operands[4] = part[0][2];
8941 operands[5] = part[1][0];
8942 operands[6] = part[1][1];
8943 operands[7] = part[1][2];
2450a057
JH
8944 }
8945 else
8946 {
26e5b205
JH
8947 operands[2] = part[0][0];
8948 operands[3] = part[0][1];
8949 operands[5] = part[1][0];
8950 operands[6] = part[1][1];
e075ae69
RH
8951 }
8952 }
26e5b205
JH
8953 emit_move_insn (operands[2], operands[5]);
8954 emit_move_insn (operands[3], operands[6]);
8955 if (nparts == 3)
8956 emit_move_insn (operands[4], operands[7]);
32b5b1aa 8957
26e5b205 8958 return;
32b5b1aa 8959}
32b5b1aa 8960
e075ae69
RH
8961void
8962ix86_split_ashldi (operands, scratch)
8963 rtx *operands, scratch;
32b5b1aa 8964{
e075ae69
RH
8965 rtx low[2], high[2];
8966 int count;
b985a30f 8967
e075ae69
RH
8968 if (GET_CODE (operands[2]) == CONST_INT)
8969 {
8970 split_di (operands, 2, low, high);
8971 count = INTVAL (operands[2]) & 63;
32b5b1aa 8972
e075ae69
RH
8973 if (count >= 32)
8974 {
8975 emit_move_insn (high[0], low[1]);
8976 emit_move_insn (low[0], const0_rtx);
b985a30f 8977
e075ae69
RH
8978 if (count > 32)
8979 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
8980 }
8981 else
8982 {
8983 if (!rtx_equal_p (operands[0], operands[1]))
8984 emit_move_insn (operands[0], operands[1]);
8985 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
8986 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
8987 }
8988 }
8989 else
8990 {
8991 if (!rtx_equal_p (operands[0], operands[1]))
8992 emit_move_insn (operands[0], operands[1]);
b985a30f 8993
e075ae69 8994 split_di (operands, 1, low, high);
b985a30f 8995
e075ae69
RH
8996 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
8997 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
32b5b1aa 8998
fe577e58 8999 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 9000 {
fe577e58 9001 if (! no_new_pseudos)
e075ae69
RH
9002 scratch = force_reg (SImode, const0_rtx);
9003 else
9004 emit_move_insn (scratch, const0_rtx);
9005
9006 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
9007 scratch));
9008 }
9009 else
9010 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
9011 }
e9a25f70 9012}
32b5b1aa 9013
e075ae69
RH
9014void
9015ix86_split_ashrdi (operands, scratch)
9016 rtx *operands, scratch;
32b5b1aa 9017{
e075ae69
RH
9018 rtx low[2], high[2];
9019 int count;
32b5b1aa 9020
e075ae69
RH
9021 if (GET_CODE (operands[2]) == CONST_INT)
9022 {
9023 split_di (operands, 2, low, high);
9024 count = INTVAL (operands[2]) & 63;
32b5b1aa 9025
e075ae69
RH
9026 if (count >= 32)
9027 {
9028 emit_move_insn (low[0], high[1]);
32b5b1aa 9029
e075ae69
RH
9030 if (! reload_completed)
9031 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
9032 else
9033 {
9034 emit_move_insn (high[0], low[0]);
9035 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
9036 }
9037
9038 if (count > 32)
9039 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
9040 }
9041 else
9042 {
9043 if (!rtx_equal_p (operands[0], operands[1]))
9044 emit_move_insn (operands[0], operands[1]);
9045 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9046 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
9047 }
9048 }
9049 else
32b5b1aa 9050 {
e075ae69
RH
9051 if (!rtx_equal_p (operands[0], operands[1]))
9052 emit_move_insn (operands[0], operands[1]);
9053
9054 split_di (operands, 1, low, high);
9055
9056 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9057 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
9058
fe577e58 9059 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 9060 {
fe577e58 9061 if (! no_new_pseudos)
e075ae69
RH
9062 scratch = gen_reg_rtx (SImode);
9063 emit_move_insn (scratch, high[0]);
9064 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
9065 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9066 scratch));
9067 }
9068 else
9069 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
32b5b1aa 9070 }
e075ae69 9071}
32b5b1aa 9072
e075ae69
RH
9073void
9074ix86_split_lshrdi (operands, scratch)
9075 rtx *operands, scratch;
9076{
9077 rtx low[2], high[2];
9078 int count;
32b5b1aa 9079
e075ae69 9080 if (GET_CODE (operands[2]) == CONST_INT)
32b5b1aa 9081 {
e075ae69
RH
9082 split_di (operands, 2, low, high);
9083 count = INTVAL (operands[2]) & 63;
9084
9085 if (count >= 32)
c7271385 9086 {
e075ae69
RH
9087 emit_move_insn (low[0], high[1]);
9088 emit_move_insn (high[0], const0_rtx);
32b5b1aa 9089
e075ae69
RH
9090 if (count > 32)
9091 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
9092 }
9093 else
9094 {
9095 if (!rtx_equal_p (operands[0], operands[1]))
9096 emit_move_insn (operands[0], operands[1]);
9097 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9098 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
9099 }
32b5b1aa 9100 }
e075ae69
RH
9101 else
9102 {
9103 if (!rtx_equal_p (operands[0], operands[1]))
9104 emit_move_insn (operands[0], operands[1]);
32b5b1aa 9105
e075ae69
RH
9106 split_di (operands, 1, low, high);
9107
9108 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9109 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
9110
9111 /* Heh. By reversing the arguments, we can reuse this pattern. */
fe577e58 9112 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 9113 {
fe577e58 9114 if (! no_new_pseudos)
e075ae69
RH
9115 scratch = force_reg (SImode, const0_rtx);
9116 else
9117 emit_move_insn (scratch, const0_rtx);
9118
9119 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9120 scratch));
9121 }
9122 else
9123 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
9124 }
32b5b1aa 9125}
3f803cd9 9126
0407c02b 9127/* Helper function for the string operations below. Dest VARIABLE whether
0945b39d
JH
9128 it is aligned to VALUE bytes. If true, jump to the label. */
9129static rtx
9130ix86_expand_aligntest (variable, value)
9131 rtx variable;
9132 int value;
9133{
9134 rtx label = gen_label_rtx ();
9135 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
9136 if (GET_MODE (variable) == DImode)
9137 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
9138 else
9139 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
9140 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
d43e0b7d 9141 1, label);
0945b39d
JH
9142 return label;
9143}
9144
9145/* Adjust COUNTER by the VALUE. */
9146static void
9147ix86_adjust_counter (countreg, value)
9148 rtx countreg;
9149 HOST_WIDE_INT value;
9150{
9151 if (GET_MODE (countreg) == DImode)
9152 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
9153 else
9154 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
9155}
9156
9157/* Zero extend possibly SImode EXP to Pmode register. */
d24b3457 9158rtx
0945b39d
JH
9159ix86_zero_extend_to_Pmode (exp)
9160 rtx exp;
9161{
9162 rtx r;
9163 if (GET_MODE (exp) == VOIDmode)
9164 return force_reg (Pmode, exp);
9165 if (GET_MODE (exp) == Pmode)
9166 return copy_to_mode_reg (Pmode, exp);
9167 r = gen_reg_rtx (Pmode);
9168 emit_insn (gen_zero_extendsidi2 (r, exp));
9169 return r;
9170}
9171
9172/* Expand string move (memcpy) operation. Use i386 string operations when
9173 profitable. expand_clrstr contains similar code. */
9174int
9175ix86_expand_movstr (dst, src, count_exp, align_exp)
9176 rtx dst, src, count_exp, align_exp;
9177{
9178 rtx srcreg, destreg, countreg;
9179 enum machine_mode counter_mode;
9180 HOST_WIDE_INT align = 0;
9181 unsigned HOST_WIDE_INT count = 0;
9182 rtx insns;
9183
9184 start_sequence ();
9185
9186 if (GET_CODE (align_exp) == CONST_INT)
9187 align = INTVAL (align_exp);
9188
5519a4f9 9189 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
9190 if (!TARGET_ALIGN_STRINGOPS)
9191 align = 64;
9192
9193 if (GET_CODE (count_exp) == CONST_INT)
9194 count = INTVAL (count_exp);
9195
9196 /* Figure out proper mode for counter. For 32bits it is always SImode,
9197 for 64bits use SImode when possible, otherwise DImode.
9198 Set count to number of bytes copied when known at compile time. */
9199 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9200 || x86_64_zero_extended_value (count_exp))
9201 counter_mode = SImode;
9202 else
9203 counter_mode = DImode;
9204
9205 if (counter_mode != SImode && counter_mode != DImode)
9206 abort ();
9207
9208 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
9209 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9210
9211 emit_insn (gen_cld ());
9212
9213 /* When optimizing for size emit simple rep ; movsb instruction for
9214 counts not divisible by 4. */
9215
9216 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9217 {
9218 countreg = ix86_zero_extend_to_Pmode (count_exp);
9219 if (TARGET_64BIT)
9220 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
9221 destreg, srcreg, countreg));
9222 else
9223 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
9224 destreg, srcreg, countreg));
9225 }
9226
9227 /* For constant aligned (or small unaligned) copies use rep movsl
9228 followed by code copying the rest. For PentiumPro ensure 8 byte
9229 alignment to allow rep movsl acceleration. */
9230
9231 else if (count != 0
9232 && (align >= 8
9233 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 9234 || optimize_size || count < (unsigned int) 64))
0945b39d
JH
9235 {
9236 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9237 if (count & ~(size - 1))
9238 {
9239 countreg = copy_to_mode_reg (counter_mode,
9240 GEN_INT ((count >> (size == 4 ? 2 : 3))
9241 & (TARGET_64BIT ? -1 : 0x3fffffff)));
9242 countreg = ix86_zero_extend_to_Pmode (countreg);
9243 if (size == 4)
9244 {
9245 if (TARGET_64BIT)
9246 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
9247 destreg, srcreg, countreg));
9248 else
9249 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
9250 destreg, srcreg, countreg));
9251 }
9252 else
9253 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
9254 destreg, srcreg, countreg));
9255 }
9256 if (size == 8 && (count & 0x04))
9257 emit_insn (gen_strmovsi (destreg, srcreg));
9258 if (count & 0x02)
9259 emit_insn (gen_strmovhi (destreg, srcreg));
9260 if (count & 0x01)
9261 emit_insn (gen_strmovqi (destreg, srcreg));
9262 }
9263 /* The generic code based on the glibc implementation:
9264 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
9265 allowing accelerated copying there)
9266 - copy the data using rep movsl
9267 - copy the rest. */
9268 else
9269 {
9270 rtx countreg2;
9271 rtx label = NULL;
37ad04a5
JH
9272 int desired_alignment = (TARGET_PENTIUMPRO
9273 && (count == 0 || count >= (unsigned int) 260)
9274 ? 8 : UNITS_PER_WORD);
0945b39d
JH
9275
9276 /* In case we don't know anything about the alignment, default to
9277 library version, since it is usually equally fast and result in
9278 shorter code. */
9279 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9280 {
9281 end_sequence ();
9282 return 0;
9283 }
9284
9285 if (TARGET_SINGLE_STRINGOP)
9286 emit_insn (gen_cld ());
9287
9288 countreg2 = gen_reg_rtx (Pmode);
9289 countreg = copy_to_mode_reg (counter_mode, count_exp);
9290
9291 /* We don't use loops to align destination and to copy parts smaller
9292 than 4 bytes, because gcc is able to optimize such code better (in
9293 the case the destination or the count really is aligned, gcc is often
9294 able to predict the branches) and also it is friendlier to the
a4f31c00 9295 hardware branch prediction.
0945b39d
JH
9296
9297 Using loops is benefical for generic case, because we can
9298 handle small counts using the loops. Many CPUs (such as Athlon)
9299 have large REP prefix setup costs.
9300
9301 This is quite costy. Maybe we can revisit this decision later or
9302 add some customizability to this code. */
9303
37ad04a5 9304 if (count == 0 && align < desired_alignment)
0945b39d
JH
9305 {
9306 label = gen_label_rtx ();
9307 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
d43e0b7d 9308 LEU, 0, counter_mode, 1, label);
0945b39d
JH
9309 }
9310 if (align <= 1)
9311 {
9312 rtx label = ix86_expand_aligntest (destreg, 1);
9313 emit_insn (gen_strmovqi (destreg, srcreg));
9314 ix86_adjust_counter (countreg, 1);
9315 emit_label (label);
9316 LABEL_NUSES (label) = 1;
9317 }
9318 if (align <= 2)
9319 {
9320 rtx label = ix86_expand_aligntest (destreg, 2);
9321 emit_insn (gen_strmovhi (destreg, srcreg));
9322 ix86_adjust_counter (countreg, 2);
9323 emit_label (label);
9324 LABEL_NUSES (label) = 1;
9325 }
37ad04a5 9326 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
9327 {
9328 rtx label = ix86_expand_aligntest (destreg, 4);
9329 emit_insn (gen_strmovsi (destreg, srcreg));
9330 ix86_adjust_counter (countreg, 4);
9331 emit_label (label);
9332 LABEL_NUSES (label) = 1;
9333 }
9334
37ad04a5
JH
9335 if (label && desired_alignment > 4 && !TARGET_64BIT)
9336 {
9337 emit_label (label);
9338 LABEL_NUSES (label) = 1;
9339 label = NULL_RTX;
9340 }
0945b39d
JH
9341 if (!TARGET_SINGLE_STRINGOP)
9342 emit_insn (gen_cld ());
9343 if (TARGET_64BIT)
9344 {
9345 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9346 GEN_INT (3)));
9347 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
9348 destreg, srcreg, countreg2));
9349 }
9350 else
9351 {
9352 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9353 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
9354 destreg, srcreg, countreg2));
9355 }
9356
9357 if (label)
9358 {
9359 emit_label (label);
9360 LABEL_NUSES (label) = 1;
9361 }
9362 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9363 emit_insn (gen_strmovsi (destreg, srcreg));
9364 if ((align <= 4 || count == 0) && TARGET_64BIT)
9365 {
9366 rtx label = ix86_expand_aligntest (countreg, 4);
9367 emit_insn (gen_strmovsi (destreg, srcreg));
9368 emit_label (label);
9369 LABEL_NUSES (label) = 1;
9370 }
9371 if (align > 2 && count != 0 && (count & 2))
9372 emit_insn (gen_strmovhi (destreg, srcreg));
9373 if (align <= 2 || count == 0)
9374 {
9375 rtx label = ix86_expand_aligntest (countreg, 2);
9376 emit_insn (gen_strmovhi (destreg, srcreg));
9377 emit_label (label);
9378 LABEL_NUSES (label) = 1;
9379 }
9380 if (align > 1 && count != 0 && (count & 1))
9381 emit_insn (gen_strmovqi (destreg, srcreg));
9382 if (align <= 1 || count == 0)
9383 {
9384 rtx label = ix86_expand_aligntest (countreg, 1);
9385 emit_insn (gen_strmovqi (destreg, srcreg));
9386 emit_label (label);
9387 LABEL_NUSES (label) = 1;
9388 }
9389 }
9390
9391 insns = get_insns ();
9392 end_sequence ();
9393
9394 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
9395 emit_insns (insns);
9396 return 1;
9397}
9398
9399/* Expand string clear operation (bzero). Use i386 string operations when
9400 profitable. expand_movstr contains similar code. */
9401int
9402ix86_expand_clrstr (src, count_exp, align_exp)
9403 rtx src, count_exp, align_exp;
9404{
9405 rtx destreg, zeroreg, countreg;
9406 enum machine_mode counter_mode;
9407 HOST_WIDE_INT align = 0;
9408 unsigned HOST_WIDE_INT count = 0;
9409
9410 if (GET_CODE (align_exp) == CONST_INT)
9411 align = INTVAL (align_exp);
9412
5519a4f9 9413 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
9414 if (!TARGET_ALIGN_STRINGOPS)
9415 align = 32;
9416
9417 if (GET_CODE (count_exp) == CONST_INT)
9418 count = INTVAL (count_exp);
9419 /* Figure out proper mode for counter. For 32bits it is always SImode,
9420 for 64bits use SImode when possible, otherwise DImode.
9421 Set count to number of bytes copied when known at compile time. */
9422 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9423 || x86_64_zero_extended_value (count_exp))
9424 counter_mode = SImode;
9425 else
9426 counter_mode = DImode;
9427
9428 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9429
9430 emit_insn (gen_cld ());
9431
9432 /* When optimizing for size emit simple rep ; movsb instruction for
9433 counts not divisible by 4. */
9434
9435 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9436 {
9437 countreg = ix86_zero_extend_to_Pmode (count_exp);
9438 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
9439 if (TARGET_64BIT)
9440 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
9441 destreg, countreg));
9442 else
9443 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
9444 destreg, countreg));
9445 }
9446 else if (count != 0
9447 && (align >= 8
9448 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 9449 || optimize_size || count < (unsigned int) 64))
0945b39d
JH
9450 {
9451 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9452 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
9453 if (count & ~(size - 1))
9454 {
9455 countreg = copy_to_mode_reg (counter_mode,
9456 GEN_INT ((count >> (size == 4 ? 2 : 3))
9457 & (TARGET_64BIT ? -1 : 0x3fffffff)));
9458 countreg = ix86_zero_extend_to_Pmode (countreg);
9459 if (size == 4)
9460 {
9461 if (TARGET_64BIT)
9462 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
9463 destreg, countreg));
9464 else
9465 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
9466 destreg, countreg));
9467 }
9468 else
9469 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
9470 destreg, countreg));
9471 }
9472 if (size == 8 && (count & 0x04))
9473 emit_insn (gen_strsetsi (destreg,
9474 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9475 if (count & 0x02)
9476 emit_insn (gen_strsethi (destreg,
9477 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9478 if (count & 0x01)
9479 emit_insn (gen_strsetqi (destreg,
9480 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9481 }
9482 else
9483 {
9484 rtx countreg2;
9485 rtx label = NULL;
37ad04a5
JH
9486 /* Compute desired alignment of the string operation. */
9487 int desired_alignment = (TARGET_PENTIUMPRO
9488 && (count == 0 || count >= (unsigned int) 260)
9489 ? 8 : UNITS_PER_WORD);
0945b39d
JH
9490
9491 /* In case we don't know anything about the alignment, default to
9492 library version, since it is usually equally fast and result in
9493 shorter code. */
9494 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9495 return 0;
9496
9497 if (TARGET_SINGLE_STRINGOP)
9498 emit_insn (gen_cld ());
9499
9500 countreg2 = gen_reg_rtx (Pmode);
9501 countreg = copy_to_mode_reg (counter_mode, count_exp);
9502 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
9503
37ad04a5 9504 if (count == 0 && align < desired_alignment)
0945b39d
JH
9505 {
9506 label = gen_label_rtx ();
37ad04a5 9507 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
d43e0b7d 9508 LEU, 0, counter_mode, 1, label);
0945b39d
JH
9509 }
9510 if (align <= 1)
9511 {
9512 rtx label = ix86_expand_aligntest (destreg, 1);
9513 emit_insn (gen_strsetqi (destreg,
9514 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9515 ix86_adjust_counter (countreg, 1);
9516 emit_label (label);
9517 LABEL_NUSES (label) = 1;
9518 }
9519 if (align <= 2)
9520 {
9521 rtx label = ix86_expand_aligntest (destreg, 2);
9522 emit_insn (gen_strsethi (destreg,
9523 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9524 ix86_adjust_counter (countreg, 2);
9525 emit_label (label);
9526 LABEL_NUSES (label) = 1;
9527 }
37ad04a5 9528 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
9529 {
9530 rtx label = ix86_expand_aligntest (destreg, 4);
9531 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
9532 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
9533 : zeroreg)));
9534 ix86_adjust_counter (countreg, 4);
9535 emit_label (label);
9536 LABEL_NUSES (label) = 1;
9537 }
9538
37ad04a5
JH
9539 if (label && desired_alignment > 4 && !TARGET_64BIT)
9540 {
9541 emit_label (label);
9542 LABEL_NUSES (label) = 1;
9543 label = NULL_RTX;
9544 }
9545
0945b39d
JH
9546 if (!TARGET_SINGLE_STRINGOP)
9547 emit_insn (gen_cld ());
9548 if (TARGET_64BIT)
9549 {
9550 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9551 GEN_INT (3)));
9552 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
9553 destreg, countreg2));
9554 }
9555 else
9556 {
9557 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9558 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
9559 destreg, countreg2));
9560 }
0945b39d
JH
9561 if (label)
9562 {
9563 emit_label (label);
9564 LABEL_NUSES (label) = 1;
9565 }
37ad04a5 9566
0945b39d
JH
9567 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9568 emit_insn (gen_strsetsi (destreg,
9569 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9570 if (TARGET_64BIT && (align <= 4 || count == 0))
9571 {
74411039 9572 rtx label = ix86_expand_aligntest (countreg, 2);
0945b39d
JH
9573 emit_insn (gen_strsetsi (destreg,
9574 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9575 emit_label (label);
9576 LABEL_NUSES (label) = 1;
9577 }
9578 if (align > 2 && count != 0 && (count & 2))
9579 emit_insn (gen_strsethi (destreg,
9580 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9581 if (align <= 2 || count == 0)
9582 {
74411039 9583 rtx label = ix86_expand_aligntest (countreg, 2);
0945b39d
JH
9584 emit_insn (gen_strsethi (destreg,
9585 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9586 emit_label (label);
9587 LABEL_NUSES (label) = 1;
9588 }
9589 if (align > 1 && count != 0 && (count & 1))
9590 emit_insn (gen_strsetqi (destreg,
9591 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9592 if (align <= 1 || count == 0)
9593 {
74411039 9594 rtx label = ix86_expand_aligntest (countreg, 1);
0945b39d
JH
9595 emit_insn (gen_strsetqi (destreg,
9596 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9597 emit_label (label);
9598 LABEL_NUSES (label) = 1;
9599 }
9600 }
9601 return 1;
9602}
9603/* Expand strlen. */
9604int
9605ix86_expand_strlen (out, src, eoschar, align)
9606 rtx out, src, eoschar, align;
9607{
9608 rtx addr, scratch1, scratch2, scratch3, scratch4;
9609
9610 /* The generic case of strlen expander is long. Avoid it's
9611 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
9612
9613 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
9614 && !TARGET_INLINE_ALL_STRINGOPS
9615 && !optimize_size
9616 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
9617 return 0;
9618
9619 addr = force_reg (Pmode, XEXP (src, 0));
9620 scratch1 = gen_reg_rtx (Pmode);
9621
9622 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
9623 && !optimize_size)
9624 {
9625 /* Well it seems that some optimizer does not combine a call like
9626 foo(strlen(bar), strlen(bar));
9627 when the move and the subtraction is done here. It does calculate
9628 the length just once when these instructions are done inside of
9629 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
9630 often used and I use one fewer register for the lifetime of
9631 output_strlen_unroll() this is better. */
9632
9633 emit_move_insn (out, addr);
9634
9635 ix86_expand_strlensi_unroll_1 (out, align);
9636
9637 /* strlensi_unroll_1 returns the address of the zero at the end of
9638 the string, like memchr(), so compute the length by subtracting
9639 the start address. */
9640 if (TARGET_64BIT)
9641 emit_insn (gen_subdi3 (out, out, addr));
9642 else
9643 emit_insn (gen_subsi3 (out, out, addr));
9644 }
9645 else
9646 {
9647 scratch2 = gen_reg_rtx (Pmode);
9648 scratch3 = gen_reg_rtx (Pmode);
9649 scratch4 = force_reg (Pmode, constm1_rtx);
9650
9651 emit_move_insn (scratch3, addr);
9652 eoschar = force_reg (QImode, eoschar);
9653
9654 emit_insn (gen_cld ());
9655 if (TARGET_64BIT)
9656 {
9657 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
9658 align, scratch4, scratch3));
9659 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
9660 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
9661 }
9662 else
9663 {
9664 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
9665 align, scratch4, scratch3));
9666 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
9667 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
9668 }
9669 }
9670 return 1;
9671}
9672
e075ae69
RH
9673/* Expand the appropriate insns for doing strlen if not just doing
9674 repnz; scasb
9675
9676 out = result, initialized with the start address
9677 align_rtx = alignment of the address.
9678 scratch = scratch register, initialized with the startaddress when
77ebd435 9679 not aligned, otherwise undefined
3f803cd9
SC
9680
9681 This is just the body. It needs the initialisations mentioned above and
9682 some address computing at the end. These things are done in i386.md. */
9683
0945b39d
JH
9684static void
9685ix86_expand_strlensi_unroll_1 (out, align_rtx)
9686 rtx out, align_rtx;
3f803cd9 9687{
e075ae69
RH
9688 int align;
9689 rtx tmp;
9690 rtx align_2_label = NULL_RTX;
9691 rtx align_3_label = NULL_RTX;
9692 rtx align_4_label = gen_label_rtx ();
9693 rtx end_0_label = gen_label_rtx ();
e075ae69 9694 rtx mem;
e2e52e1b 9695 rtx tmpreg = gen_reg_rtx (SImode);
0945b39d 9696 rtx scratch = gen_reg_rtx (SImode);
e075ae69
RH
9697
9698 align = 0;
9699 if (GET_CODE (align_rtx) == CONST_INT)
9700 align = INTVAL (align_rtx);
3f803cd9 9701
e9a25f70 9702 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
3f803cd9 9703
e9a25f70 9704 /* Is there a known alignment and is it less than 4? */
e075ae69 9705 if (align < 4)
3f803cd9 9706 {
0945b39d
JH
9707 rtx scratch1 = gen_reg_rtx (Pmode);
9708 emit_move_insn (scratch1, out);
e9a25f70 9709 /* Is there a known alignment and is it not 2? */
e075ae69 9710 if (align != 2)
3f803cd9 9711 {
e075ae69
RH
9712 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
9713 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
9714
9715 /* Leave just the 3 lower bits. */
0945b39d 9716 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
e075ae69
RH
9717 NULL_RTX, 0, OPTAB_WIDEN);
9718
9076b9c1 9719 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 9720 Pmode, 1, align_4_label);
9076b9c1 9721 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
d43e0b7d 9722 Pmode, 1, align_2_label);
9076b9c1 9723 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
d43e0b7d 9724 Pmode, 1, align_3_label);
3f803cd9
SC
9725 }
9726 else
9727 {
e9a25f70
JL
9728 /* Since the alignment is 2, we have to check 2 or 0 bytes;
9729 check if is aligned to 4 - byte. */
e9a25f70 9730
0945b39d 9731 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
e075ae69
RH
9732 NULL_RTX, 0, OPTAB_WIDEN);
9733
9076b9c1 9734 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 9735 Pmode, 1, align_4_label);
3f803cd9
SC
9736 }
9737
e075ae69 9738 mem = gen_rtx_MEM (QImode, out);
e9a25f70 9739
e075ae69 9740 /* Now compare the bytes. */
e9a25f70 9741
0f290768 9742 /* Compare the first n unaligned byte on a byte per byte basis. */
9076b9c1 9743 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
d43e0b7d 9744 QImode, 1, end_0_label);
3f803cd9 9745
0f290768 9746 /* Increment the address. */
0945b39d
JH
9747 if (TARGET_64BIT)
9748 emit_insn (gen_adddi3 (out, out, const1_rtx));
9749 else
9750 emit_insn (gen_addsi3 (out, out, const1_rtx));
e9a25f70 9751
e075ae69
RH
9752 /* Not needed with an alignment of 2 */
9753 if (align != 2)
9754 {
9755 emit_label (align_2_label);
3f803cd9 9756
d43e0b7d
RK
9757 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
9758 end_0_label);
e075ae69 9759
0945b39d
JH
9760 if (TARGET_64BIT)
9761 emit_insn (gen_adddi3 (out, out, const1_rtx));
9762 else
9763 emit_insn (gen_addsi3 (out, out, const1_rtx));
e075ae69
RH
9764
9765 emit_label (align_3_label);
9766 }
9767
d43e0b7d
RK
9768 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
9769 end_0_label);
e075ae69 9770
0945b39d
JH
9771 if (TARGET_64BIT)
9772 emit_insn (gen_adddi3 (out, out, const1_rtx));
9773 else
9774 emit_insn (gen_addsi3 (out, out, const1_rtx));
3f803cd9
SC
9775 }
9776
e075ae69
RH
9777 /* Generate loop to check 4 bytes at a time. It is not a good idea to
9778 align this loop. It gives only huge programs, but does not help to
9779 speed up. */
9780 emit_label (align_4_label);
3f803cd9 9781
e075ae69
RH
9782 mem = gen_rtx_MEM (SImode, out);
9783 emit_move_insn (scratch, mem);
0945b39d
JH
9784 if (TARGET_64BIT)
9785 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
9786 else
9787 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
e075ae69 9788
e2e52e1b
JH
9789 /* This formula yields a nonzero result iff one of the bytes is zero.
9790 This saves three branches inside loop and many cycles. */
9791
9792 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
9793 emit_insn (gen_one_cmplsi2 (scratch, scratch));
9794 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
7471a1f0 9795 emit_insn (gen_andsi3 (tmpreg, tmpreg,
d8bf17f9 9796 gen_int_mode (0x80808080, SImode)));
d43e0b7d
RK
9797 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
9798 align_4_label);
e2e52e1b
JH
9799
9800 if (TARGET_CMOVE)
9801 {
9802 rtx reg = gen_reg_rtx (SImode);
0945b39d 9803 rtx reg2 = gen_reg_rtx (Pmode);
e2e52e1b
JH
9804 emit_move_insn (reg, tmpreg);
9805 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
9806
0f290768 9807 /* If zero is not in the first two bytes, move two bytes forward. */
16189740 9808 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
9809 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9810 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
9811 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
9812 gen_rtx_IF_THEN_ELSE (SImode, tmp,
77ebd435
AJ
9813 reg,
9814 tmpreg)));
e2e52e1b 9815 /* Emit lea manually to avoid clobbering of flags. */
0945b39d
JH
9816 emit_insn (gen_rtx_SET (SImode, reg2,
9817 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
e2e52e1b
JH
9818
9819 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9820 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
9821 emit_insn (gen_rtx_SET (VOIDmode, out,
0945b39d 9822 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
a4f31c00
AJ
9823 reg2,
9824 out)));
e2e52e1b
JH
9825
9826 }
9827 else
9828 {
9829 rtx end_2_label = gen_label_rtx ();
9830 /* Is zero in the first two bytes? */
9831
16189740 9832 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
9833 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9834 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
9835 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9836 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
9837 pc_rtx);
9838 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9839 JUMP_LABEL (tmp) = end_2_label;
9840
0f290768 9841 /* Not in the first two. Move two bytes forward. */
e2e52e1b 9842 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
0945b39d
JH
9843 if (TARGET_64BIT)
9844 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
9845 else
9846 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
e2e52e1b
JH
9847
9848 emit_label (end_2_label);
9849
9850 }
9851
0f290768 9852 /* Avoid branch in fixing the byte. */
e2e52e1b 9853 tmpreg = gen_lowpart (QImode, tmpreg);
7e08e190 9854 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
0945b39d
JH
9855 if (TARGET_64BIT)
9856 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
9857 else
9858 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
e075ae69
RH
9859
9860 emit_label (end_0_label);
9861}
9862\f
e075ae69
RH
9863/* Clear stack slot assignments remembered from previous functions.
9864 This is called from INIT_EXPANDERS once before RTL is emitted for each
9865 function. */
9866
36edd3cc
BS
9867static void
9868ix86_init_machine_status (p)
1526a060 9869 struct function *p;
e075ae69 9870{
37b15744
RH
9871 p->machine = (struct machine_function *)
9872 xcalloc (1, sizeof (struct machine_function));
e075ae69
RH
9873}
9874
1526a060
BS
9875/* Mark machine specific bits of P for GC. */
9876static void
9877ix86_mark_machine_status (p)
9878 struct function *p;
9879{
37b15744 9880 struct machine_function *machine = p->machine;
1526a060
BS
9881 enum machine_mode mode;
9882 int n;
9883
37b15744
RH
9884 if (! machine)
9885 return;
9886
1526a060
BS
9887 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
9888 mode = (enum machine_mode) ((int) mode + 1))
9889 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
37b15744
RH
9890 ggc_mark_rtx (machine->stack_locals[(int) mode][n]);
9891}
9892
9893static void
9894ix86_free_machine_status (p)
9895 struct function *p;
9896{
9897 free (p->machine);
9898 p->machine = NULL;
1526a060
BS
9899}
9900
e075ae69
RH
9901/* Return a MEM corresponding to a stack slot with mode MODE.
9902 Allocate a new slot if necessary.
9903
9904 The RTL for a function can have several slots available: N is
9905 which slot to use. */
9906
9907rtx
9908assign_386_stack_local (mode, n)
9909 enum machine_mode mode;
9910 int n;
9911{
9912 if (n < 0 || n >= MAX_386_STACK_LOCALS)
9913 abort ();
9914
9915 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
9916 ix86_stack_locals[(int) mode][n]
9917 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
9918
9919 return ix86_stack_locals[(int) mode][n];
9920}
9921\f
9922/* Calculate the length of the memory address in the instruction
9923 encoding. Does not include the one-byte modrm, opcode, or prefix. */
9924
9925static int
9926memory_address_length (addr)
9927 rtx addr;
9928{
9929 struct ix86_address parts;
9930 rtx base, index, disp;
9931 int len;
9932
9933 if (GET_CODE (addr) == PRE_DEC
0d7d98ee
JH
9934 || GET_CODE (addr) == POST_INC
9935 || GET_CODE (addr) == PRE_MODIFY
9936 || GET_CODE (addr) == POST_MODIFY)
e075ae69 9937 return 0;
3f803cd9 9938
e075ae69
RH
9939 if (! ix86_decompose_address (addr, &parts))
9940 abort ();
3f803cd9 9941
e075ae69
RH
9942 base = parts.base;
9943 index = parts.index;
9944 disp = parts.disp;
9945 len = 0;
3f803cd9 9946
e075ae69
RH
9947 /* Register Indirect. */
9948 if (base && !index && !disp)
9949 {
9950 /* Special cases: ebp and esp need the two-byte modrm form. */
9951 if (addr == stack_pointer_rtx
9952 || addr == arg_pointer_rtx
564d80f4
JH
9953 || addr == frame_pointer_rtx
9954 || addr == hard_frame_pointer_rtx)
e075ae69 9955 len = 1;
3f803cd9 9956 }
e9a25f70 9957
e075ae69
RH
9958 /* Direct Addressing. */
9959 else if (disp && !base && !index)
9960 len = 4;
9961
3f803cd9
SC
9962 else
9963 {
e075ae69
RH
9964 /* Find the length of the displacement constant. */
9965 if (disp)
9966 {
9967 if (GET_CODE (disp) == CONST_INT
9968 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
9969 len = 1;
9970 else
9971 len = 4;
9972 }
3f803cd9 9973
e075ae69
RH
9974 /* An index requires the two-byte modrm form. */
9975 if (index)
9976 len += 1;
3f803cd9
SC
9977 }
9978
e075ae69
RH
9979 return len;
9980}
79325812 9981
6ef67412
JH
9982/* Compute default value for "length_immediate" attribute. When SHORTFORM is set
9983 expect that insn have 8bit immediate alternative. */
e075ae69 9984int
6ef67412 9985ix86_attr_length_immediate_default (insn, shortform)
e075ae69 9986 rtx insn;
6ef67412 9987 int shortform;
e075ae69 9988{
6ef67412
JH
9989 int len = 0;
9990 int i;
6c698a6d 9991 extract_insn_cached (insn);
6ef67412
JH
9992 for (i = recog_data.n_operands - 1; i >= 0; --i)
9993 if (CONSTANT_P (recog_data.operand[i]))
3071fab5 9994 {
6ef67412 9995 if (len)
3071fab5 9996 abort ();
6ef67412
JH
9997 if (shortform
9998 && GET_CODE (recog_data.operand[i]) == CONST_INT
9999 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
10000 len = 1;
10001 else
10002 {
10003 switch (get_attr_mode (insn))
10004 {
10005 case MODE_QI:
10006 len+=1;
10007 break;
10008 case MODE_HI:
10009 len+=2;
10010 break;
10011 case MODE_SI:
10012 len+=4;
10013 break;
14f73b5a
JH
10014 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
10015 case MODE_DI:
10016 len+=4;
10017 break;
6ef67412 10018 default:
c725bd79 10019 fatal_insn ("unknown insn mode", insn);
6ef67412
JH
10020 }
10021 }
3071fab5 10022 }
6ef67412
JH
10023 return len;
10024}
10025/* Compute default value for "length_address" attribute. */
10026int
10027ix86_attr_length_address_default (insn)
10028 rtx insn;
10029{
10030 int i;
6c698a6d 10031 extract_insn_cached (insn);
1ccbefce
RH
10032 for (i = recog_data.n_operands - 1; i >= 0; --i)
10033 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 10034 {
6ef67412 10035 return memory_address_length (XEXP (recog_data.operand[i], 0));
e075ae69
RH
10036 break;
10037 }
6ef67412 10038 return 0;
3f803cd9 10039}
e075ae69
RH
10040\f
10041/* Return the maximum number of instructions a cpu can issue. */
b657fc39 10042
c237e94a 10043static int
e075ae69 10044ix86_issue_rate ()
b657fc39 10045{
e075ae69 10046 switch (ix86_cpu)
b657fc39 10047 {
e075ae69
RH
10048 case PROCESSOR_PENTIUM:
10049 case PROCESSOR_K6:
10050 return 2;
79325812 10051
e075ae69 10052 case PROCESSOR_PENTIUMPRO:
b4e89e2d
JH
10053 case PROCESSOR_PENTIUM4:
10054 case PROCESSOR_ATHLON:
e075ae69 10055 return 3;
b657fc39 10056
b657fc39 10057 default:
e075ae69 10058 return 1;
b657fc39 10059 }
b657fc39
L
10060}
10061
e075ae69
RH
10062/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
10063 by DEP_INSN and nothing set by DEP_INSN. */
b657fc39 10064
e075ae69
RH
10065static int
10066ix86_flags_dependant (insn, dep_insn, insn_type)
10067 rtx insn, dep_insn;
10068 enum attr_type insn_type;
10069{
10070 rtx set, set2;
b657fc39 10071
e075ae69
RH
10072 /* Simplify the test for uninteresting insns. */
10073 if (insn_type != TYPE_SETCC
10074 && insn_type != TYPE_ICMOV
10075 && insn_type != TYPE_FCMOV
10076 && insn_type != TYPE_IBR)
10077 return 0;
b657fc39 10078
e075ae69
RH
10079 if ((set = single_set (dep_insn)) != 0)
10080 {
10081 set = SET_DEST (set);
10082 set2 = NULL_RTX;
10083 }
10084 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
10085 && XVECLEN (PATTERN (dep_insn), 0) == 2
10086 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
10087 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
10088 {
10089 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10090 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10091 }
78a0d70c
ZW
10092 else
10093 return 0;
b657fc39 10094
78a0d70c
ZW
10095 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
10096 return 0;
b657fc39 10097
f5143c46 10098 /* This test is true if the dependent insn reads the flags but
78a0d70c
ZW
10099 not any other potentially set register. */
10100 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
10101 return 0;
10102
10103 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
10104 return 0;
10105
10106 return 1;
e075ae69 10107}
b657fc39 10108
e075ae69
RH
10109/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
10110 address with operands set by DEP_INSN. */
10111
10112static int
10113ix86_agi_dependant (insn, dep_insn, insn_type)
10114 rtx insn, dep_insn;
10115 enum attr_type insn_type;
10116{
10117 rtx addr;
10118
6ad48e84
JH
10119 if (insn_type == TYPE_LEA
10120 && TARGET_PENTIUM)
5fbdde42
RH
10121 {
10122 addr = PATTERN (insn);
10123 if (GET_CODE (addr) == SET)
10124 ;
10125 else if (GET_CODE (addr) == PARALLEL
10126 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
10127 addr = XVECEXP (addr, 0, 0);
10128 else
10129 abort ();
10130 addr = SET_SRC (addr);
10131 }
e075ae69
RH
10132 else
10133 {
10134 int i;
6c698a6d 10135 extract_insn_cached (insn);
1ccbefce
RH
10136 for (i = recog_data.n_operands - 1; i >= 0; --i)
10137 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 10138 {
1ccbefce 10139 addr = XEXP (recog_data.operand[i], 0);
e075ae69
RH
10140 goto found;
10141 }
10142 return 0;
10143 found:;
b657fc39
L
10144 }
10145
e075ae69 10146 return modified_in_p (addr, dep_insn);
b657fc39 10147}
a269a03c 10148
c237e94a 10149static int
e075ae69 10150ix86_adjust_cost (insn, link, dep_insn, cost)
a269a03c
JC
10151 rtx insn, link, dep_insn;
10152 int cost;
10153{
e075ae69 10154 enum attr_type insn_type, dep_insn_type;
6ad48e84 10155 enum attr_memory memory, dep_memory;
e075ae69 10156 rtx set, set2;
9b00189f 10157 int dep_insn_code_number;
a269a03c 10158
309ada50 10159 /* Anti and output depenancies have zero cost on all CPUs. */
e075ae69 10160 if (REG_NOTE_KIND (link) != 0)
309ada50 10161 return 0;
a269a03c 10162
9b00189f
JH
10163 dep_insn_code_number = recog_memoized (dep_insn);
10164
e075ae69 10165 /* If we can't recognize the insns, we can't really do anything. */
9b00189f 10166 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
e075ae69 10167 return cost;
a269a03c 10168
1c71e60e
JH
10169 insn_type = get_attr_type (insn);
10170 dep_insn_type = get_attr_type (dep_insn);
9b00189f 10171
a269a03c
JC
10172 switch (ix86_cpu)
10173 {
10174 case PROCESSOR_PENTIUM:
e075ae69
RH
10175 /* Address Generation Interlock adds a cycle of latency. */
10176 if (ix86_agi_dependant (insn, dep_insn, insn_type))
10177 cost += 1;
10178
10179 /* ??? Compares pair with jump/setcc. */
10180 if (ix86_flags_dependant (insn, dep_insn, insn_type))
10181 cost = 0;
10182
10183 /* Floating point stores require value to be ready one cycle ealier. */
0f290768 10184 if (insn_type == TYPE_FMOV
e075ae69
RH
10185 && get_attr_memory (insn) == MEMORY_STORE
10186 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10187 cost += 1;
10188 break;
a269a03c 10189
e075ae69 10190 case PROCESSOR_PENTIUMPRO:
6ad48e84
JH
10191 memory = get_attr_memory (insn);
10192 dep_memory = get_attr_memory (dep_insn);
10193
0f290768 10194 /* Since we can't represent delayed latencies of load+operation,
e075ae69
RH
10195 increase the cost here for non-imov insns. */
10196 if (dep_insn_type != TYPE_IMOV
6ad48e84
JH
10197 && dep_insn_type != TYPE_FMOV
10198 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
e075ae69
RH
10199 cost += 1;
10200
10201 /* INT->FP conversion is expensive. */
10202 if (get_attr_fp_int_src (dep_insn))
10203 cost += 5;
10204
10205 /* There is one cycle extra latency between an FP op and a store. */
10206 if (insn_type == TYPE_FMOV
10207 && (set = single_set (dep_insn)) != NULL_RTX
10208 && (set2 = single_set (insn)) != NULL_RTX
10209 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
10210 && GET_CODE (SET_DEST (set2)) == MEM)
10211 cost += 1;
6ad48e84
JH
10212
10213 /* Show ability of reorder buffer to hide latency of load by executing
10214 in parallel with previous instruction in case
10215 previous instruction is not needed to compute the address. */
10216 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10217 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10218 {
10219 /* Claim moves to take one cycle, as core can issue one load
10220 at time and the next load can start cycle later. */
10221 if (dep_insn_type == TYPE_IMOV
10222 || dep_insn_type == TYPE_FMOV)
10223 cost = 1;
10224 else if (cost > 1)
10225 cost--;
10226 }
e075ae69 10227 break;
a269a03c 10228
e075ae69 10229 case PROCESSOR_K6:
6ad48e84
JH
10230 memory = get_attr_memory (insn);
10231 dep_memory = get_attr_memory (dep_insn);
e075ae69
RH
10232 /* The esp dependency is resolved before the instruction is really
10233 finished. */
10234 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
10235 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
10236 return 1;
a269a03c 10237
0f290768 10238 /* Since we can't represent delayed latencies of load+operation,
e075ae69 10239 increase the cost here for non-imov insns. */
6ad48e84 10240 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
e075ae69
RH
10241 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
10242
10243 /* INT->FP conversion is expensive. */
10244 if (get_attr_fp_int_src (dep_insn))
10245 cost += 5;
6ad48e84
JH
10246
10247 /* Show ability of reorder buffer to hide latency of load by executing
10248 in parallel with previous instruction in case
10249 previous instruction is not needed to compute the address. */
10250 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10251 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10252 {
10253 /* Claim moves to take one cycle, as core can issue one load
10254 at time and the next load can start cycle later. */
10255 if (dep_insn_type == TYPE_IMOV
10256 || dep_insn_type == TYPE_FMOV)
10257 cost = 1;
10258 else if (cost > 2)
10259 cost -= 2;
10260 else
10261 cost = 1;
10262 }
a14003ee 10263 break;
e075ae69 10264
309ada50 10265 case PROCESSOR_ATHLON:
6ad48e84
JH
10266 memory = get_attr_memory (insn);
10267 dep_memory = get_attr_memory (dep_insn);
10268
10269 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
0b5107cf
JH
10270 {
10271 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
10272 cost += 2;
10273 else
10274 cost += 3;
10275 }
6ad48e84
JH
10276 /* Show ability of reorder buffer to hide latency of load by executing
10277 in parallel with previous instruction in case
10278 previous instruction is not needed to compute the address. */
10279 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10280 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10281 {
10282 /* Claim moves to take one cycle, as core can issue one load
10283 at time and the next load can start cycle later. */
10284 if (dep_insn_type == TYPE_IMOV
10285 || dep_insn_type == TYPE_FMOV)
10286 cost = 0;
10287 else if (cost >= 3)
10288 cost -= 3;
10289 else
10290 cost = 0;
10291 }
309ada50 10292
a269a03c 10293 default:
a269a03c
JC
10294 break;
10295 }
10296
10297 return cost;
10298}
0a726ef1 10299
e075ae69
RH
10300static union
10301{
10302 struct ppro_sched_data
10303 {
10304 rtx decode[3];
10305 int issued_this_cycle;
10306 } ppro;
10307} ix86_sched_data;
0a726ef1 10308
e075ae69
RH
10309static int
10310ix86_safe_length (insn)
10311 rtx insn;
10312{
10313 if (recog_memoized (insn) >= 0)
b531087a 10314 return get_attr_length (insn);
e075ae69
RH
10315 else
10316 return 128;
10317}
0a726ef1 10318
e075ae69
RH
10319static int
10320ix86_safe_length_prefix (insn)
10321 rtx insn;
10322{
10323 if (recog_memoized (insn) >= 0)
b531087a 10324 return get_attr_length (insn);
e075ae69
RH
10325 else
10326 return 0;
10327}
10328
10329static enum attr_memory
10330ix86_safe_memory (insn)
10331 rtx insn;
10332{
10333 if (recog_memoized (insn) >= 0)
b531087a 10334 return get_attr_memory (insn);
e075ae69
RH
10335 else
10336 return MEMORY_UNKNOWN;
10337}
0a726ef1 10338
e075ae69
RH
10339static enum attr_ppro_uops
10340ix86_safe_ppro_uops (insn)
10341 rtx insn;
10342{
10343 if (recog_memoized (insn) >= 0)
10344 return get_attr_ppro_uops (insn);
10345 else
10346 return PPRO_UOPS_MANY;
10347}
0a726ef1 10348
e075ae69
RH
10349static void
10350ix86_dump_ppro_packet (dump)
10351 FILE *dump;
0a726ef1 10352{
e075ae69 10353 if (ix86_sched_data.ppro.decode[0])
0a726ef1 10354 {
e075ae69
RH
10355 fprintf (dump, "PPRO packet: %d",
10356 INSN_UID (ix86_sched_data.ppro.decode[0]));
10357 if (ix86_sched_data.ppro.decode[1])
10358 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
10359 if (ix86_sched_data.ppro.decode[2])
10360 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
10361 fputc ('\n', dump);
10362 }
10363}
0a726ef1 10364
e075ae69 10365/* We're beginning a new block. Initialize data structures as necessary. */
0a726ef1 10366
c237e94a
ZW
10367static void
10368ix86_sched_init (dump, sched_verbose, veclen)
e075ae69
RH
10369 FILE *dump ATTRIBUTE_UNUSED;
10370 int sched_verbose ATTRIBUTE_UNUSED;
c237e94a 10371 int veclen ATTRIBUTE_UNUSED;
e075ae69
RH
10372{
10373 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
10374}
10375
10376/* Shift INSN to SLOT, and shift everything else down. */
10377
10378static void
10379ix86_reorder_insn (insnp, slot)
10380 rtx *insnp, *slot;
10381{
10382 if (insnp != slot)
10383 {
10384 rtx insn = *insnp;
0f290768 10385 do
e075ae69
RH
10386 insnp[0] = insnp[1];
10387 while (++insnp != slot);
10388 *insnp = insn;
0a726ef1 10389 }
e075ae69
RH
10390}
10391
c6991660 10392static void
78a0d70c
ZW
10393ix86_sched_reorder_ppro (ready, e_ready)
10394 rtx *ready;
10395 rtx *e_ready;
10396{
10397 rtx decode[3];
10398 enum attr_ppro_uops cur_uops;
10399 int issued_this_cycle;
10400 rtx *insnp;
10401 int i;
e075ae69 10402
0f290768 10403 /* At this point .ppro.decode contains the state of the three
78a0d70c 10404 decoders from last "cycle". That is, those insns that were
0f290768 10405 actually independent. But here we're scheduling for the
78a0d70c
ZW
10406 decoder, and we may find things that are decodable in the
10407 same cycle. */
e075ae69 10408
0f290768 10409 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
78a0d70c 10410 issued_this_cycle = 0;
e075ae69 10411
78a0d70c
ZW
10412 insnp = e_ready;
10413 cur_uops = ix86_safe_ppro_uops (*insnp);
0a726ef1 10414
78a0d70c
ZW
10415 /* If the decoders are empty, and we've a complex insn at the
10416 head of the priority queue, let it issue without complaint. */
10417 if (decode[0] == NULL)
10418 {
10419 if (cur_uops == PPRO_UOPS_MANY)
10420 {
10421 decode[0] = *insnp;
10422 goto ppro_done;
10423 }
10424
10425 /* Otherwise, search for a 2-4 uop unsn to issue. */
10426 while (cur_uops != PPRO_UOPS_FEW)
10427 {
10428 if (insnp == ready)
10429 break;
10430 cur_uops = ix86_safe_ppro_uops (*--insnp);
10431 }
10432
10433 /* If so, move it to the head of the line. */
10434 if (cur_uops == PPRO_UOPS_FEW)
10435 ix86_reorder_insn (insnp, e_ready);
0a726ef1 10436
78a0d70c
ZW
10437 /* Issue the head of the queue. */
10438 issued_this_cycle = 1;
10439 decode[0] = *e_ready--;
10440 }
fb693d44 10441
78a0d70c
ZW
10442 /* Look for simple insns to fill in the other two slots. */
10443 for (i = 1; i < 3; ++i)
10444 if (decode[i] == NULL)
10445 {
10446 if (ready >= e_ready)
10447 goto ppro_done;
fb693d44 10448
e075ae69
RH
10449 insnp = e_ready;
10450 cur_uops = ix86_safe_ppro_uops (*insnp);
78a0d70c
ZW
10451 while (cur_uops != PPRO_UOPS_ONE)
10452 {
10453 if (insnp == ready)
10454 break;
10455 cur_uops = ix86_safe_ppro_uops (*--insnp);
10456 }
fb693d44 10457
78a0d70c
ZW
10458 /* Found one. Move it to the head of the queue and issue it. */
10459 if (cur_uops == PPRO_UOPS_ONE)
e075ae69 10460 {
78a0d70c
ZW
10461 ix86_reorder_insn (insnp, e_ready);
10462 decode[i] = *e_ready--;
10463 issued_this_cycle++;
10464 continue;
10465 }
fb693d44 10466
78a0d70c
ZW
10467 /* ??? Didn't find one. Ideally, here we would do a lazy split
10468 of 2-uop insns, issue one and queue the other. */
10469 }
fb693d44 10470
78a0d70c
ZW
10471 ppro_done:
10472 if (issued_this_cycle == 0)
10473 issued_this_cycle = 1;
10474 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
10475}
fb693d44 10476
0f290768 10477/* We are about to being issuing insns for this clock cycle.
78a0d70c 10478 Override the default sort algorithm to better slot instructions. */
c237e94a
ZW
10479static int
10480ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
78a0d70c
ZW
10481 FILE *dump ATTRIBUTE_UNUSED;
10482 int sched_verbose ATTRIBUTE_UNUSED;
10483 rtx *ready;
c237e94a 10484 int *n_readyp;
78a0d70c
ZW
10485 int clock_var ATTRIBUTE_UNUSED;
10486{
c237e94a 10487 int n_ready = *n_readyp;
78a0d70c 10488 rtx *e_ready = ready + n_ready - 1;
fb693d44 10489
78a0d70c
ZW
10490 if (n_ready < 2)
10491 goto out;
e075ae69 10492
78a0d70c
ZW
10493 switch (ix86_cpu)
10494 {
10495 default:
10496 break;
e075ae69 10497
78a0d70c
ZW
10498 case PROCESSOR_PENTIUMPRO:
10499 ix86_sched_reorder_ppro (ready, e_ready);
e075ae69 10500 break;
fb693d44
RH
10501 }
10502
e075ae69
RH
10503out:
10504 return ix86_issue_rate ();
10505}
fb693d44 10506
e075ae69
RH
10507/* We are about to issue INSN. Return the number of insns left on the
10508 ready queue that can be issued this cycle. */
b222082e 10509
c237e94a 10510static int
e075ae69
RH
10511ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
10512 FILE *dump;
10513 int sched_verbose;
10514 rtx insn;
10515 int can_issue_more;
10516{
10517 int i;
10518 switch (ix86_cpu)
fb693d44 10519 {
e075ae69
RH
10520 default:
10521 return can_issue_more - 1;
fb693d44 10522
e075ae69
RH
10523 case PROCESSOR_PENTIUMPRO:
10524 {
10525 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
fb693d44 10526
e075ae69
RH
10527 if (uops == PPRO_UOPS_MANY)
10528 {
10529 if (sched_verbose)
10530 ix86_dump_ppro_packet (dump);
10531 ix86_sched_data.ppro.decode[0] = insn;
10532 ix86_sched_data.ppro.decode[1] = NULL;
10533 ix86_sched_data.ppro.decode[2] = NULL;
10534 if (sched_verbose)
10535 ix86_dump_ppro_packet (dump);
10536 ix86_sched_data.ppro.decode[0] = NULL;
10537 }
10538 else if (uops == PPRO_UOPS_FEW)
10539 {
10540 if (sched_verbose)
10541 ix86_dump_ppro_packet (dump);
10542 ix86_sched_data.ppro.decode[0] = insn;
10543 ix86_sched_data.ppro.decode[1] = NULL;
10544 ix86_sched_data.ppro.decode[2] = NULL;
10545 }
10546 else
10547 {
10548 for (i = 0; i < 3; ++i)
10549 if (ix86_sched_data.ppro.decode[i] == NULL)
10550 {
10551 ix86_sched_data.ppro.decode[i] = insn;
10552 break;
10553 }
10554 if (i == 3)
10555 abort ();
10556 if (i == 2)
10557 {
10558 if (sched_verbose)
10559 ix86_dump_ppro_packet (dump);
10560 ix86_sched_data.ppro.decode[0] = NULL;
10561 ix86_sched_data.ppro.decode[1] = NULL;
10562 ix86_sched_data.ppro.decode[2] = NULL;
10563 }
10564 }
10565 }
10566 return --ix86_sched_data.ppro.issued_this_cycle;
10567 }
fb693d44 10568}
a7180f70 10569\f
0e4970d7
RK
10570/* Walk through INSNS and look for MEM references whose address is DSTREG or
10571 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
10572 appropriate. */
10573
10574void
10575ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
10576 rtx insns;
10577 rtx dstref, srcref, dstreg, srcreg;
10578{
10579 rtx insn;
10580
10581 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
10582 if (INSN_P (insn))
10583 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
10584 dstreg, srcreg);
10585}
10586
10587/* Subroutine of above to actually do the updating by recursively walking
10588 the rtx. */
10589
10590static void
10591ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
10592 rtx x;
10593 rtx dstref, srcref, dstreg, srcreg;
10594{
10595 enum rtx_code code = GET_CODE (x);
10596 const char *format_ptr = GET_RTX_FORMAT (code);
10597 int i, j;
10598
10599 if (code == MEM && XEXP (x, 0) == dstreg)
10600 MEM_COPY_ATTRIBUTES (x, dstref);
10601 else if (code == MEM && XEXP (x, 0) == srcreg)
10602 MEM_COPY_ATTRIBUTES (x, srcref);
10603
10604 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
10605 {
10606 if (*format_ptr == 'e')
10607 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
10608 dstreg, srcreg);
10609 else if (*format_ptr == 'E')
10610 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
397be6cf 10611 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
0e4970d7
RK
10612 dstreg, srcreg);
10613 }
10614}
10615\f
a7180f70
BS
10616/* Compute the alignment given to a constant that is being placed in memory.
10617 EXP is the constant and ALIGN is the alignment that the object would
10618 ordinarily have.
10619 The value of this function is used instead of that alignment to align
10620 the object. */
10621
10622int
10623ix86_constant_alignment (exp, align)
10624 tree exp;
10625 int align;
10626{
10627 if (TREE_CODE (exp) == REAL_CST)
10628 {
10629 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
10630 return 64;
10631 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
10632 return 128;
10633 }
10634 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
10635 && align < 256)
10636 return 256;
10637
10638 return align;
10639}
10640
10641/* Compute the alignment for a static variable.
10642 TYPE is the data type, and ALIGN is the alignment that
10643 the object would ordinarily have. The value of this function is used
10644 instead of that alignment to align the object. */
10645
10646int
10647ix86_data_alignment (type, align)
10648 tree type;
10649 int align;
10650{
10651 if (AGGREGATE_TYPE_P (type)
10652 && TYPE_SIZE (type)
10653 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10654 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
10655 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
10656 return 256;
10657
0d7d98ee
JH
10658 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10659 to 16byte boundary. */
10660 if (TARGET_64BIT)
10661 {
10662 if (AGGREGATE_TYPE_P (type)
10663 && TYPE_SIZE (type)
10664 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10665 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
10666 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
10667 return 128;
10668 }
10669
a7180f70
BS
10670 if (TREE_CODE (type) == ARRAY_TYPE)
10671 {
10672 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
10673 return 64;
10674 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
10675 return 128;
10676 }
10677 else if (TREE_CODE (type) == COMPLEX_TYPE)
10678 {
0f290768 10679
a7180f70
BS
10680 if (TYPE_MODE (type) == DCmode && align < 64)
10681 return 64;
10682 if (TYPE_MODE (type) == XCmode && align < 128)
10683 return 128;
10684 }
10685 else if ((TREE_CODE (type) == RECORD_TYPE
10686 || TREE_CODE (type) == UNION_TYPE
10687 || TREE_CODE (type) == QUAL_UNION_TYPE)
10688 && TYPE_FIELDS (type))
10689 {
10690 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
10691 return 64;
10692 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
10693 return 128;
10694 }
10695 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
10696 || TREE_CODE (type) == INTEGER_TYPE)
10697 {
10698 if (TYPE_MODE (type) == DFmode && align < 64)
10699 return 64;
10700 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
10701 return 128;
10702 }
10703
10704 return align;
10705}
10706
10707/* Compute the alignment for a local variable.
10708 TYPE is the data type, and ALIGN is the alignment that
10709 the object would ordinarily have. The value of this macro is used
10710 instead of that alignment to align the object. */
10711
10712int
10713ix86_local_alignment (type, align)
10714 tree type;
10715 int align;
10716{
0d7d98ee
JH
10717 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10718 to 16byte boundary. */
10719 if (TARGET_64BIT)
10720 {
10721 if (AGGREGATE_TYPE_P (type)
10722 && TYPE_SIZE (type)
10723 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10724 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
10725 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
10726 return 128;
10727 }
a7180f70
BS
10728 if (TREE_CODE (type) == ARRAY_TYPE)
10729 {
10730 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
10731 return 64;
10732 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
10733 return 128;
10734 }
10735 else if (TREE_CODE (type) == COMPLEX_TYPE)
10736 {
10737 if (TYPE_MODE (type) == DCmode && align < 64)
10738 return 64;
10739 if (TYPE_MODE (type) == XCmode && align < 128)
10740 return 128;
10741 }
10742 else if ((TREE_CODE (type) == RECORD_TYPE
10743 || TREE_CODE (type) == UNION_TYPE
10744 || TREE_CODE (type) == QUAL_UNION_TYPE)
10745 && TYPE_FIELDS (type))
10746 {
10747 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
10748 return 64;
10749 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
10750 return 128;
10751 }
10752 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
10753 || TREE_CODE (type) == INTEGER_TYPE)
10754 {
0f290768 10755
a7180f70
BS
10756 if (TYPE_MODE (type) == DFmode && align < 64)
10757 return 64;
10758 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
10759 return 128;
10760 }
10761 return align;
10762}
0ed08620
JH
10763\f
10764/* Emit RTL insns to initialize the variable parts of a trampoline.
10765 FNADDR is an RTX for the address of the function's pure code.
10766 CXT is an RTX for the static chain value for the function. */
10767void
10768x86_initialize_trampoline (tramp, fnaddr, cxt)
10769 rtx tramp, fnaddr, cxt;
10770{
10771 if (!TARGET_64BIT)
10772 {
10773 /* Compute offset from the end of the jmp to the target function. */
10774 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
10775 plus_constant (tramp, 10),
10776 NULL_RTX, 1, OPTAB_DIRECT);
10777 emit_move_insn (gen_rtx_MEM (QImode, tramp),
d8bf17f9 10778 gen_int_mode (0xb9, QImode));
0ed08620
JH
10779 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
10780 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
d8bf17f9 10781 gen_int_mode (0xe9, QImode));
0ed08620
JH
10782 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
10783 }
10784 else
10785 {
10786 int offset = 0;
10787 /* Try to load address using shorter movl instead of movabs.
10788 We may want to support movq for kernel mode, but kernel does not use
10789 trampolines at the moment. */
10790 if (x86_64_zero_extended_value (fnaddr))
10791 {
10792 fnaddr = copy_to_mode_reg (DImode, fnaddr);
10793 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 10794 gen_int_mode (0xbb41, HImode));
0ed08620
JH
10795 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
10796 gen_lowpart (SImode, fnaddr));
10797 offset += 6;
10798 }
10799 else
10800 {
10801 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 10802 gen_int_mode (0xbb49, HImode));
0ed08620
JH
10803 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
10804 fnaddr);
10805 offset += 10;
10806 }
10807 /* Load static chain using movabs to r10. */
10808 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 10809 gen_int_mode (0xba49, HImode));
0ed08620
JH
10810 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
10811 cxt);
10812 offset += 10;
10813 /* Jump to the r11 */
10814 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 10815 gen_int_mode (0xff49, HImode));
0ed08620 10816 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
d8bf17f9 10817 gen_int_mode (0xe3, QImode));
0ed08620
JH
10818 offset += 3;
10819 if (offset > TRAMPOLINE_SIZE)
b531087a 10820 abort ();
0ed08620
JH
10821 }
10822}
eeb06b1b
BS
10823\f
10824#define def_builtin(MASK, NAME, TYPE, CODE) \
10825do { \
10826 if ((MASK) & target_flags) \
10827 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL); \
10828} while (0)
bd793c65 10829
bd793c65
BS
10830struct builtin_description
10831{
8b60264b
KG
10832 const unsigned int mask;
10833 const enum insn_code icode;
10834 const char *const name;
10835 const enum ix86_builtins code;
10836 const enum rtx_code comparison;
10837 const unsigned int flag;
bd793c65
BS
10838};
10839
fbe5eb6d
BS
10840/* Used for builtins that are enabled both by -msse and -msse2. */
10841#define MASK_SSE1 (MASK_SSE | MASK_SSE2)
10842
8b60264b 10843static const struct builtin_description bdesc_comi[] =
bd793c65 10844{
fbe5eb6d
BS
10845 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
10846 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
10847 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
10848 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
10849 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
10850 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
10851 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
10852 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
10853 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
10854 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
10855 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
10856 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 },
10857 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, EQ, 0 },
10858 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, LT, 0 },
10859 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, LE, 0 },
10860 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, LT, 1 },
10861 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, LE, 1 },
10862 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, NE, 0 },
10863 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, EQ, 0 },
10864 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, LT, 0 },
10865 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, LE, 0 },
10866 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, LT, 1 },
10867 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, LE, 1 },
10868 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, NE, 0 },
bd793c65
BS
10869};
10870
8b60264b 10871static const struct builtin_description bdesc_2arg[] =
bd793c65
BS
10872{
10873 /* SSE */
fbe5eb6d
BS
10874 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
10875 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
10876 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
10877 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
10878 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
10879 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
10880 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
10881 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
10882
10883 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
10884 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
10885 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
10886 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
10887 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
10888 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
10889 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
10890 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
10891 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
10892 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
10893 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
10894 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
10895 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
10896 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
10897 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
10898 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
10899 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
10900 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
10901 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
10902 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
10903 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
10904 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
10905 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
10906 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
10907
10908 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
10909 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
10910 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
10911 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
10912
10913 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
10914 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
10915 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
10916 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
10917 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
bd793c65
BS
10918
10919 /* MMX */
eeb06b1b
BS
10920 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
10921 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
10922 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
10923 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
10924 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
10925 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
10926
10927 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
10928 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
10929 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
10930 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
10931 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
10932 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
10933 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
10934 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
10935
10936 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
10937 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
fbe5eb6d 10938 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
eeb06b1b
BS
10939
10940 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
10941 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
10942 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
10943 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
10944
fbe5eb6d
BS
10945 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
10946 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
eeb06b1b
BS
10947
10948 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
10949 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
10950 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
10951 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
10952 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
10953 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
10954
fbe5eb6d
BS
10955 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
10956 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
10957 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
10958 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
eeb06b1b
BS
10959
10960 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
10961 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
10962 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
10963 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
10964 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
10965 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
bd793c65
BS
10966
10967 /* Special. */
eeb06b1b
BS
10968 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
10969 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
10970 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
10971
fbe5eb6d
BS
10972 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
10973 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
eeb06b1b
BS
10974
10975 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
10976 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
10977 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
10978 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
10979 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
10980 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
10981
10982 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
10983 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
10984 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
10985 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
10986 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
10987 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
10988
10989 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
10990 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
10991 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
10992 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
10993
fbe5eb6d
BS
10994 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
10995 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
10996
10997 /* SSE2 */
10998 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
10999 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
11000 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
11001 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
11002 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
11003 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
11004 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
11005 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
11006
11007 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
11008 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
11009 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
11010 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
11011 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
11012 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
11013 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
11014 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
11015 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
11016 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
11017 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
11018 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
11019 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
11020 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
11021 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
11022 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgtsd", IX86_BUILTIN_CMPGTSD, LT, 1 },
11023 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgesd", IX86_BUILTIN_CMPGESD, LE, 1 },
11024 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
11025 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
11026 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
11027 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
11028 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngtsd", IX86_BUILTIN_CMPNGTSD, LT, 1 },
11029 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngesd", IX86_BUILTIN_CMPNGESD, LE, 1 },
11030 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
11031
11032 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
11033 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
11034 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
11035 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
11036
11037 { MASK_SSE2, CODE_FOR_sse2_anddf3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
11038 { MASK_SSE2, CODE_FOR_sse2_nanddf3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
11039 { MASK_SSE2, CODE_FOR_sse2_iordf3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
11040 { MASK_SSE2, CODE_FOR_sse2_xordf3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
11041
11042 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
11043 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
11044 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
11045
11046 /* SSE2 MMX */
11047 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
11048 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
11049 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
11050 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
11051 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
11052 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
11053 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
11054 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
11055
11056 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
11057 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
11058 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
11059 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
11060 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
11061 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
11062 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
11063 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
11064
11065 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
11066 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
11067 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
11068 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
11069
916b60b7
BS
11070 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
11071 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
11072 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
11073 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
fbe5eb6d
BS
11074
11075 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
11076 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
11077
11078 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
11079 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
11080 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
11081 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
11082 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
11083 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
11084
11085 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
11086 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
11087 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
11088 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
11089
11090 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
11091 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
11092 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
11093 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
11094 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
11095 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
11096
916b60b7
BS
11097 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
11098 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
11099 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
11100
11101 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
11102 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
11103
11104 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
11105 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
11106 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
11107 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
11108 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
11109 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
11110
11111 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
11112 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
11113 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
11114 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
11115 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
11116 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
11117
11118 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
11119 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
11120 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
11121 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
11122
11123 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
11124
fbe5eb6d
BS
11125 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
11126 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
11127 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
bd793c65
BS
11128};
11129
8b60264b 11130static const struct builtin_description bdesc_1arg[] =
bd793c65 11131{
fbe5eb6d
BS
11132 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
11133 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
11134
11135 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
11136 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
11137 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
11138
11139 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
11140 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
11141 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
11142 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
11143
11144 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
11145 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
11146 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
11147
11148 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
11149
11150 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
11151 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
bd793c65 11152
fbe5eb6d
BS
11153 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
11154 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
11155 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
11156 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
11157 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
bd793c65 11158
fbe5eb6d 11159 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
bd793c65 11160
fbe5eb6d
BS
11161 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
11162 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
11163
11164 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
11165 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
11166 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 }
bd793c65
BS
11167};
11168
f6155fda
SS
11169void
11170ix86_init_builtins ()
11171{
11172 if (TARGET_MMX)
11173 ix86_init_mmx_sse_builtins ();
11174}
11175
11176/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
bd793c65
BS
11177 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
11178 builtins. */
e37af218 11179static void
f6155fda 11180ix86_init_mmx_sse_builtins ()
bd793c65 11181{
8b60264b 11182 const struct builtin_description * d;
77ebd435 11183 size_t i;
cbd5937a 11184 tree endlink = void_list_node;
bd793c65
BS
11185
11186 tree pchar_type_node = build_pointer_type (char_type_node);
11187 tree pfloat_type_node = build_pointer_type (float_type_node);
11188 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
916b60b7 11189 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
bd793c65
BS
11190 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
11191
11192 /* Comparisons. */
11193 tree int_ftype_v4sf_v4sf
11194 = build_function_type (integer_type_node,
11195 tree_cons (NULL_TREE, V4SF_type_node,
11196 tree_cons (NULL_TREE,
11197 V4SF_type_node,
11198 endlink)));
11199 tree v4si_ftype_v4sf_v4sf
11200 = build_function_type (V4SI_type_node,
11201 tree_cons (NULL_TREE, V4SF_type_node,
11202 tree_cons (NULL_TREE,
11203 V4SF_type_node,
11204 endlink)));
11205 /* MMX/SSE/integer conversions. */
bd793c65
BS
11206 tree int_ftype_v4sf
11207 = build_function_type (integer_type_node,
11208 tree_cons (NULL_TREE, V4SF_type_node,
11209 endlink));
11210 tree int_ftype_v8qi
11211 = build_function_type (integer_type_node,
11212 tree_cons (NULL_TREE, V8QI_type_node,
11213 endlink));
bd793c65 11214 tree v4sf_ftype_v4sf_int
21e1b5f1 11215 = build_function_type (V4SF_type_node,
bd793c65
BS
11216 tree_cons (NULL_TREE, V4SF_type_node,
11217 tree_cons (NULL_TREE, integer_type_node,
11218 endlink)));
11219 tree v4sf_ftype_v4sf_v2si
11220 = build_function_type (V4SF_type_node,
11221 tree_cons (NULL_TREE, V4SF_type_node,
11222 tree_cons (NULL_TREE, V2SI_type_node,
11223 endlink)));
11224 tree int_ftype_v4hi_int
11225 = build_function_type (integer_type_node,
11226 tree_cons (NULL_TREE, V4HI_type_node,
11227 tree_cons (NULL_TREE, integer_type_node,
11228 endlink)));
11229 tree v4hi_ftype_v4hi_int_int
332316cd 11230 = build_function_type (V4HI_type_node,
bd793c65
BS
11231 tree_cons (NULL_TREE, V4HI_type_node,
11232 tree_cons (NULL_TREE, integer_type_node,
11233 tree_cons (NULL_TREE,
11234 integer_type_node,
11235 endlink))));
11236 /* Miscellaneous. */
11237 tree v8qi_ftype_v4hi_v4hi
11238 = build_function_type (V8QI_type_node,
11239 tree_cons (NULL_TREE, V4HI_type_node,
11240 tree_cons (NULL_TREE, V4HI_type_node,
11241 endlink)));
11242 tree v4hi_ftype_v2si_v2si
11243 = build_function_type (V4HI_type_node,
11244 tree_cons (NULL_TREE, V2SI_type_node,
11245 tree_cons (NULL_TREE, V2SI_type_node,
11246 endlink)));
11247 tree v4sf_ftype_v4sf_v4sf_int
11248 = build_function_type (V4SF_type_node,
11249 tree_cons (NULL_TREE, V4SF_type_node,
11250 tree_cons (NULL_TREE, V4SF_type_node,
11251 tree_cons (NULL_TREE,
11252 integer_type_node,
11253 endlink))));
bd793c65
BS
11254 tree v2si_ftype_v4hi_v4hi
11255 = build_function_type (V2SI_type_node,
11256 tree_cons (NULL_TREE, V4HI_type_node,
11257 tree_cons (NULL_TREE, V4HI_type_node,
11258 endlink)));
11259 tree v4hi_ftype_v4hi_int
11260 = build_function_type (V4HI_type_node,
11261 tree_cons (NULL_TREE, V4HI_type_node,
11262 tree_cons (NULL_TREE, integer_type_node,
11263 endlink)));
bd793c65
BS
11264 tree v4hi_ftype_v4hi_di
11265 = build_function_type (V4HI_type_node,
11266 tree_cons (NULL_TREE, V4HI_type_node,
11267 tree_cons (NULL_TREE,
11268 long_long_integer_type_node,
11269 endlink)));
11270 tree v2si_ftype_v2si_di
11271 = build_function_type (V2SI_type_node,
11272 tree_cons (NULL_TREE, V2SI_type_node,
11273 tree_cons (NULL_TREE,
11274 long_long_integer_type_node,
11275 endlink)));
11276 tree void_ftype_void
11277 = build_function_type (void_type_node, endlink);
bd793c65
BS
11278 tree void_ftype_unsigned
11279 = build_function_type (void_type_node,
11280 tree_cons (NULL_TREE, unsigned_type_node,
11281 endlink));
11282 tree unsigned_ftype_void
11283 = build_function_type (unsigned_type_node, endlink);
11284 tree di_ftype_void
11285 = build_function_type (long_long_unsigned_type_node, endlink);
e37af218
RH
11286 tree v4sf_ftype_void
11287 = build_function_type (V4SF_type_node, endlink);
bd793c65
BS
11288 tree v2si_ftype_v4sf
11289 = build_function_type (V2SI_type_node,
11290 tree_cons (NULL_TREE, V4SF_type_node,
11291 endlink));
11292 /* Loads/stores. */
11293 tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node,
11294 tree_cons (NULL_TREE, V8QI_type_node,
11295 tree_cons (NULL_TREE,
11296 pchar_type_node,
11297 endlink)));
11298 tree void_ftype_v8qi_v8qi_pchar
11299 = build_function_type (void_type_node, maskmovq_args);
11300 tree v4sf_ftype_pfloat
11301 = build_function_type (V4SF_type_node,
11302 tree_cons (NULL_TREE, pfloat_type_node,
11303 endlink));
bd793c65
BS
11304 /* @@@ the type is bogus */
11305 tree v4sf_ftype_v4sf_pv2si
11306 = build_function_type (V4SF_type_node,
11307 tree_cons (NULL_TREE, V4SF_type_node,
11308 tree_cons (NULL_TREE, pv2si_type_node,
11309 endlink)));
1255c85c
BS
11310 tree void_ftype_pv2si_v4sf
11311 = build_function_type (void_type_node,
11312 tree_cons (NULL_TREE, pv2si_type_node,
11313 tree_cons (NULL_TREE, V4SF_type_node,
bd793c65
BS
11314 endlink)));
11315 tree void_ftype_pfloat_v4sf
11316 = build_function_type (void_type_node,
11317 tree_cons (NULL_TREE, pfloat_type_node,
11318 tree_cons (NULL_TREE, V4SF_type_node,
11319 endlink)));
11320 tree void_ftype_pdi_di
11321 = build_function_type (void_type_node,
11322 tree_cons (NULL_TREE, pdi_type_node,
11323 tree_cons (NULL_TREE,
11324 long_long_unsigned_type_node,
11325 endlink)));
916b60b7
BS
11326 tree void_ftype_pv2di_v2di
11327 = build_function_type (void_type_node,
11328 tree_cons (NULL_TREE, pv2di_type_node,
11329 tree_cons (NULL_TREE,
11330 V2DI_type_node,
11331 endlink)));
bd793c65
BS
11332 /* Normal vector unops. */
11333 tree v4sf_ftype_v4sf
11334 = build_function_type (V4SF_type_node,
11335 tree_cons (NULL_TREE, V4SF_type_node,
11336 endlink));
0f290768 11337
bd793c65
BS
11338 /* Normal vector binops. */
11339 tree v4sf_ftype_v4sf_v4sf
11340 = build_function_type (V4SF_type_node,
11341 tree_cons (NULL_TREE, V4SF_type_node,
11342 tree_cons (NULL_TREE, V4SF_type_node,
11343 endlink)));
11344 tree v8qi_ftype_v8qi_v8qi
11345 = build_function_type (V8QI_type_node,
11346 tree_cons (NULL_TREE, V8QI_type_node,
11347 tree_cons (NULL_TREE, V8QI_type_node,
11348 endlink)));
11349 tree v4hi_ftype_v4hi_v4hi
11350 = build_function_type (V4HI_type_node,
11351 tree_cons (NULL_TREE, V4HI_type_node,
11352 tree_cons (NULL_TREE, V4HI_type_node,
11353 endlink)));
11354 tree v2si_ftype_v2si_v2si
11355 = build_function_type (V2SI_type_node,
11356 tree_cons (NULL_TREE, V2SI_type_node,
11357 tree_cons (NULL_TREE, V2SI_type_node,
11358 endlink)));
bd793c65
BS
11359 tree di_ftype_di_di
11360 = build_function_type (long_long_unsigned_type_node,
11361 tree_cons (NULL_TREE, long_long_unsigned_type_node,
11362 tree_cons (NULL_TREE,
11363 long_long_unsigned_type_node,
11364 endlink)));
11365
47f339cf
BS
11366 tree v2si_ftype_v2sf
11367 = build_function_type (V2SI_type_node,
11368 tree_cons (NULL_TREE, V2SF_type_node,
11369 endlink));
11370 tree v2sf_ftype_v2si
11371 = build_function_type (V2SF_type_node,
11372 tree_cons (NULL_TREE, V2SI_type_node,
11373 endlink));
11374 tree v2si_ftype_v2si
11375 = build_function_type (V2SI_type_node,
11376 tree_cons (NULL_TREE, V2SI_type_node,
11377 endlink));
11378 tree v2sf_ftype_v2sf
11379 = build_function_type (V2SF_type_node,
11380 tree_cons (NULL_TREE, V2SF_type_node,
11381 endlink));
11382 tree v2sf_ftype_v2sf_v2sf
11383 = build_function_type (V2SF_type_node,
11384 tree_cons (NULL_TREE, V2SF_type_node,
11385 tree_cons (NULL_TREE,
11386 V2SF_type_node,
11387 endlink)));
11388 tree v2si_ftype_v2sf_v2sf
11389 = build_function_type (V2SI_type_node,
11390 tree_cons (NULL_TREE, V2SF_type_node,
11391 tree_cons (NULL_TREE,
11392 V2SF_type_node,
11393 endlink)));
fbe5eb6d
BS
11394 tree pint_type_node = build_pointer_type (integer_type_node);
11395 tree pdouble_type_node = build_pointer_type (double_type_node);
11396 tree int_ftype_v2df_v2df
11397 = build_function_type (integer_type_node,
11398 tree_cons (NULL_TREE, V2DF_type_node,
11399 tree_cons (NULL_TREE, V2DF_type_node, endlink)));
11400
11401 tree ti_ftype_void
11402 = build_function_type (intTI_type_node, endlink);
11403 tree ti_ftype_ti_ti
11404 = build_function_type (intTI_type_node,
11405 tree_cons (NULL_TREE, intTI_type_node,
11406 tree_cons (NULL_TREE, intTI_type_node,
11407 endlink)));
11408 tree void_ftype_pvoid
11409 = build_function_type (void_type_node,
11410 tree_cons (NULL_TREE, ptr_type_node, endlink));
11411 tree v2di_ftype_di
11412 = build_function_type (V2DI_type_node,
11413 tree_cons (NULL_TREE, long_long_unsigned_type_node,
11414 endlink));
11415 tree v4sf_ftype_v4si
11416 = build_function_type (V4SF_type_node,
11417 tree_cons (NULL_TREE, V4SI_type_node, endlink));
11418 tree v4si_ftype_v4sf
11419 = build_function_type (V4SI_type_node,
11420 tree_cons (NULL_TREE, V4SF_type_node, endlink));
11421 tree v2df_ftype_v4si
11422 = build_function_type (V2DF_type_node,
11423 tree_cons (NULL_TREE, V4SI_type_node, endlink));
11424 tree v4si_ftype_v2df
11425 = build_function_type (V4SI_type_node,
11426 tree_cons (NULL_TREE, V2DF_type_node, endlink));
11427 tree v2si_ftype_v2df
11428 = build_function_type (V2SI_type_node,
11429 tree_cons (NULL_TREE, V2DF_type_node, endlink));
11430 tree v4sf_ftype_v2df
11431 = build_function_type (V4SF_type_node,
11432 tree_cons (NULL_TREE, V2DF_type_node, endlink));
11433 tree v2df_ftype_v2si
11434 = build_function_type (V2DF_type_node,
11435 tree_cons (NULL_TREE, V2SI_type_node, endlink));
11436 tree v2df_ftype_v4sf
11437 = build_function_type (V2DF_type_node,
11438 tree_cons (NULL_TREE, V4SF_type_node, endlink));
11439 tree int_ftype_v2df
11440 = build_function_type (integer_type_node,
11441 tree_cons (NULL_TREE, V2DF_type_node, endlink));
11442 tree v2df_ftype_v2df_int
11443 = build_function_type (V2DF_type_node,
11444 tree_cons (NULL_TREE, V2DF_type_node,
11445 tree_cons (NULL_TREE, integer_type_node,
11446 endlink)));
11447 tree v4sf_ftype_v4sf_v2df
11448 = build_function_type (V4SF_type_node,
11449 tree_cons (NULL_TREE, V4SF_type_node,
11450 tree_cons (NULL_TREE, V2DF_type_node,
11451 endlink)));
11452 tree v2df_ftype_v2df_v4sf
11453 = build_function_type (V2DF_type_node,
11454 tree_cons (NULL_TREE, V2DF_type_node,
11455 tree_cons (NULL_TREE, V4SF_type_node,
11456 endlink)));
11457 tree v2df_ftype_v2df_v2df_int
11458 = build_function_type (V2DF_type_node,
11459 tree_cons (NULL_TREE, V2DF_type_node,
11460 tree_cons (NULL_TREE, V2DF_type_node,
11461 tree_cons (NULL_TREE,
11462 integer_type_node,
11463 endlink))));
11464 tree v2df_ftype_v2df_pv2si
11465 = build_function_type (V2DF_type_node,
11466 tree_cons (NULL_TREE, V2DF_type_node,
11467 tree_cons (NULL_TREE, pv2si_type_node,
11468 endlink)));
11469 tree void_ftype_pv2si_v2df
11470 = build_function_type (void_type_node,
11471 tree_cons (NULL_TREE, pv2si_type_node,
11472 tree_cons (NULL_TREE, V2DF_type_node,
11473 endlink)));
11474 tree void_ftype_pdouble_v2df
11475 = build_function_type (void_type_node,
11476 tree_cons (NULL_TREE, pdouble_type_node,
11477 tree_cons (NULL_TREE, V2DF_type_node,
11478 endlink)));
11479 tree void_ftype_pint_int
11480 = build_function_type (void_type_node,
11481 tree_cons (NULL_TREE, pint_type_node,
11482 tree_cons (NULL_TREE, integer_type_node,
11483 endlink)));
11484 tree maskmovdqu_args = tree_cons (NULL_TREE, V16QI_type_node,
11485 tree_cons (NULL_TREE, V16QI_type_node,
11486 tree_cons (NULL_TREE,
11487 pchar_type_node,
11488 endlink)));
11489 tree void_ftype_v16qi_v16qi_pchar
11490 = build_function_type (void_type_node, maskmovdqu_args);
11491 tree v2df_ftype_pdouble
11492 = build_function_type (V2DF_type_node,
11493 tree_cons (NULL_TREE, pdouble_type_node,
11494 endlink));
11495 tree v2df_ftype_v2df_v2df
11496 = build_function_type (V2DF_type_node,
11497 tree_cons (NULL_TREE, V2DF_type_node,
11498 tree_cons (NULL_TREE, V2DF_type_node,
11499 endlink)));
11500 tree v16qi_ftype_v16qi_v16qi
11501 = build_function_type (V16QI_type_node,
11502 tree_cons (NULL_TREE, V16QI_type_node,
11503 tree_cons (NULL_TREE, V16QI_type_node,
11504 endlink)));
11505 tree v8hi_ftype_v8hi_v8hi
11506 = build_function_type (V8HI_type_node,
11507 tree_cons (NULL_TREE, V8HI_type_node,
11508 tree_cons (NULL_TREE, V8HI_type_node,
11509 endlink)));
11510 tree v4si_ftype_v4si_v4si
11511 = build_function_type (V4SI_type_node,
11512 tree_cons (NULL_TREE, V4SI_type_node,
11513 tree_cons (NULL_TREE, V4SI_type_node,
11514 endlink)));
11515 tree v2di_ftype_v2di_v2di
11516 = build_function_type (V2DI_type_node,
11517 tree_cons (NULL_TREE, V2DI_type_node,
11518 tree_cons (NULL_TREE, V2DI_type_node,
11519 endlink)));
11520 tree v2di_ftype_v2df_v2df
11521 = build_function_type (V2DI_type_node,
11522 tree_cons (NULL_TREE, V2DF_type_node,
11523 tree_cons (NULL_TREE, V2DF_type_node,
11524 endlink)));
11525 tree v2df_ftype_v2df
11526 = build_function_type (V2DF_type_node,
11527 tree_cons (NULL_TREE, V2DF_type_node,
11528 endlink));
11529 tree v2df_ftype_double
11530 = build_function_type (V2DF_type_node,
11531 tree_cons (NULL_TREE, double_type_node,
11532 endlink));
11533 tree v2df_ftype_double_double
11534 = build_function_type (V2DF_type_node,
11535 tree_cons (NULL_TREE, double_type_node,
11536 tree_cons (NULL_TREE, double_type_node,
11537 endlink)));
11538 tree int_ftype_v8hi_int
11539 = build_function_type (integer_type_node,
11540 tree_cons (NULL_TREE, V8HI_type_node,
11541 tree_cons (NULL_TREE, integer_type_node,
11542 endlink)));
11543 tree v8hi_ftype_v8hi_int_int
11544 = build_function_type (V8HI_type_node,
11545 tree_cons (NULL_TREE, V8HI_type_node,
11546 tree_cons (NULL_TREE, integer_type_node,
11547 tree_cons (NULL_TREE,
11548 integer_type_node,
11549 endlink))));
916b60b7
BS
11550 tree v2di_ftype_v2di_int
11551 = build_function_type (V2DI_type_node,
11552 tree_cons (NULL_TREE, V2DI_type_node,
11553 tree_cons (NULL_TREE, integer_type_node,
11554 endlink)));
fbe5eb6d
BS
11555 tree v4si_ftype_v4si_int
11556 = build_function_type (V4SI_type_node,
11557 tree_cons (NULL_TREE, V4SI_type_node,
11558 tree_cons (NULL_TREE, integer_type_node,
11559 endlink)));
11560 tree v8hi_ftype_v8hi_int
11561 = build_function_type (V8HI_type_node,
11562 tree_cons (NULL_TREE, V8HI_type_node,
11563 tree_cons (NULL_TREE, integer_type_node,
11564 endlink)));
916b60b7
BS
11565 tree v8hi_ftype_v8hi_v2di
11566 = build_function_type (V8HI_type_node,
11567 tree_cons (NULL_TREE, V8HI_type_node,
11568 tree_cons (NULL_TREE, V2DI_type_node,
11569 endlink)));
11570 tree v4si_ftype_v4si_v2di
11571 = build_function_type (V4SI_type_node,
11572 tree_cons (NULL_TREE, V4SI_type_node,
11573 tree_cons (NULL_TREE, V2DI_type_node,
11574 endlink)));
11575 tree v4si_ftype_v8hi_v8hi
11576 = build_function_type (V4SI_type_node,
11577 tree_cons (NULL_TREE, V8HI_type_node,
11578 tree_cons (NULL_TREE, V8HI_type_node,
11579 endlink)));
11580 tree di_ftype_v8qi_v8qi
11581 = build_function_type (long_long_unsigned_type_node,
11582 tree_cons (NULL_TREE, V8QI_type_node,
11583 tree_cons (NULL_TREE, V8QI_type_node,
11584 endlink)));
11585 tree v2di_ftype_v16qi_v16qi
11586 = build_function_type (V2DI_type_node,
11587 tree_cons (NULL_TREE, V16QI_type_node,
11588 tree_cons (NULL_TREE, V16QI_type_node,
11589 endlink)));
11590 tree int_ftype_v16qi
11591 = build_function_type (integer_type_node,
11592 tree_cons (NULL_TREE, V16QI_type_node, endlink));
47f339cf 11593
bd793c65
BS
11594 /* Add all builtins that are more or less simple operations on two
11595 operands. */
ca7558fc 11596 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
11597 {
11598 /* Use one of the operands; the target can have a different mode for
11599 mask-generating compares. */
11600 enum machine_mode mode;
11601 tree type;
11602
11603 if (d->name == 0)
11604 continue;
11605 mode = insn_data[d->icode].operand[1].mode;
11606
bd793c65
BS
11607 switch (mode)
11608 {
fbe5eb6d
BS
11609 case V16QImode:
11610 type = v16qi_ftype_v16qi_v16qi;
11611 break;
11612 case V8HImode:
11613 type = v8hi_ftype_v8hi_v8hi;
11614 break;
11615 case V4SImode:
11616 type = v4si_ftype_v4si_v4si;
11617 break;
11618 case V2DImode:
11619 type = v2di_ftype_v2di_v2di;
11620 break;
11621 case V2DFmode:
11622 type = v2df_ftype_v2df_v2df;
11623 break;
11624 case TImode:
11625 type = ti_ftype_ti_ti;
11626 break;
bd793c65
BS
11627 case V4SFmode:
11628 type = v4sf_ftype_v4sf_v4sf;
11629 break;
11630 case V8QImode:
11631 type = v8qi_ftype_v8qi_v8qi;
11632 break;
11633 case V4HImode:
11634 type = v4hi_ftype_v4hi_v4hi;
11635 break;
11636 case V2SImode:
11637 type = v2si_ftype_v2si_v2si;
11638 break;
bd793c65
BS
11639 case DImode:
11640 type = di_ftype_di_di;
11641 break;
11642
11643 default:
11644 abort ();
11645 }
0f290768 11646
bd793c65
BS
11647 /* Override for comparisons. */
11648 if (d->icode == CODE_FOR_maskcmpv4sf3
11649 || d->icode == CODE_FOR_maskncmpv4sf3
11650 || d->icode == CODE_FOR_vmmaskcmpv4sf3
11651 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
11652 type = v4si_ftype_v4sf_v4sf;
11653
fbe5eb6d
BS
11654 if (d->icode == CODE_FOR_maskcmpv2df3
11655 || d->icode == CODE_FOR_maskncmpv2df3
11656 || d->icode == CODE_FOR_vmmaskcmpv2df3
11657 || d->icode == CODE_FOR_vmmaskncmpv2df3)
11658 type = v2di_ftype_v2df_v2df;
11659
eeb06b1b 11660 def_builtin (d->mask, d->name, type, d->code);
bd793c65
BS
11661 }
11662
11663 /* Add the remaining MMX insns with somewhat more complicated types. */
eeb06b1b
BS
11664 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
11665 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
11666 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
11667 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
11668 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
11669 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
11670 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
11671
11672 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
11673 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
11674 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
11675
11676 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
11677 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
11678
11679 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
11680 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
0f290768 11681
bd793c65 11682 /* comi/ucomi insns. */
ca7558fc 11683 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
fbe5eb6d
BS
11684 if (d->mask == MASK_SSE2)
11685 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
11686 else
11687 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
bd793c65 11688
1255c85c
BS
11689 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
11690 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
11691 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
bd793c65 11692
fbe5eb6d
BS
11693 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
11694 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
11695 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
11696 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
11697 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
11698 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
bd793c65 11699
fbe5eb6d
BS
11700 def_builtin (MASK_SSE1, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS);
11701 def_builtin (MASK_SSE1, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS);
11702 def_builtin (MASK_SSE1, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS);
11703 def_builtin (MASK_SSE1, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS);
e37af218 11704
fbe5eb6d
BS
11705 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
11706 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
bd793c65 11707
fbe5eb6d 11708 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
bd793c65 11709
fbe5eb6d
BS
11710 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
11711 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
11712 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
11713 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
11714 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
11715 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
bd793c65 11716
fbe5eb6d
BS
11717 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
11718 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
11719 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
11720 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
bd793c65 11721
fbe5eb6d
BS
11722 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
11723 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
11724 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
11725 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
bd793c65 11726
fbe5eb6d 11727 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
bd793c65 11728
916b60b7 11729 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
bd793c65 11730
fbe5eb6d
BS
11731 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
11732 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
11733 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
11734 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
11735 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
11736 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
bd793c65 11737
fbe5eb6d 11738 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
bd793c65 11739
47f339cf
BS
11740 /* Original 3DNow! */
11741 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
11742 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
11743 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
11744 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
11745 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
11746 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
11747 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
11748 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
11749 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
11750 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
11751 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
11752 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
11753 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
11754 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
11755 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
11756 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
11757 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
11758 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
11759 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
11760 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
47f339cf
BS
11761
11762 /* 3DNow! extension as used in the Athlon CPU. */
11763 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
11764 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
11765 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
11766 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
11767 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
11768 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
11769
fbe5eb6d
BS
11770 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
11771
11772 /* SSE2 */
11773 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
11774 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
11775
11776 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
11777 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
11778
11779 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pdouble, IX86_BUILTIN_LOADAPD);
11780 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pdouble, IX86_BUILTIN_LOADUPD);
11781 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pdouble, IX86_BUILTIN_LOADSD);
11782 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
11783 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
11784 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
11785
11786 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
11787 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
11788 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
11789 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
11790
11791 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
916b60b7 11792 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
fbe5eb6d
BS
11793 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
11794 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
916b60b7 11795 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
fbe5eb6d
BS
11796
11797 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
11798 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
11799 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
916b60b7 11800 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
fbe5eb6d
BS
11801
11802 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
11803 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
11804
11805 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
11806
11807 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
916b60b7 11808 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
fbe5eb6d
BS
11809
11810 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
11811 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
11812 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
11813 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
11814 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
11815
11816 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
11817
11818 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
11819 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
11820
11821 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
11822 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
11823 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
11824
11825 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
11826 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
11827 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
11828
11829 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
11830 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
11831 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
11832 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pdouble, IX86_BUILTIN_LOADPD1);
11833 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pdouble, IX86_BUILTIN_LOADRPD);
11834 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
11835 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
11836
11837 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pvoid, IX86_BUILTIN_CLFLUSH);
11838 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
11839 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
916b60b7
BS
11840
11841 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
11842 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
11843 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
11844
11845 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
11846 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
11847 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
11848
11849 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
11850 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
11851
11852 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
11853 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
11854 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
11855
11856 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
11857 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
11858 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
11859
11860 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
11861 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
11862
11863 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
bd793c65
BS
11864}
11865
11866/* Errors in the source file can cause expand_expr to return const0_rtx
11867 where we expect a vector. To avoid crashing, use one of the vector
11868 clear instructions. */
11869static rtx
11870safe_vector_operand (x, mode)
11871 rtx x;
11872 enum machine_mode mode;
11873{
11874 if (x != const0_rtx)
11875 return x;
11876 x = gen_reg_rtx (mode);
11877
47f339cf 11878 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
bd793c65
BS
11879 emit_insn (gen_mmx_clrdi (mode == DImode ? x
11880 : gen_rtx_SUBREG (DImode, x, 0)));
11881 else
e37af218
RH
11882 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
11883 : gen_rtx_SUBREG (V4SFmode, x, 0)));
bd793c65
BS
11884 return x;
11885}
11886
11887/* Subroutine of ix86_expand_builtin to take care of binop insns. */
11888
11889static rtx
11890ix86_expand_binop_builtin (icode, arglist, target)
11891 enum insn_code icode;
11892 tree arglist;
11893 rtx target;
11894{
11895 rtx pat;
11896 tree arg0 = TREE_VALUE (arglist);
11897 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11898 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11899 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11900 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11901 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11902 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
11903
11904 if (VECTOR_MODE_P (mode0))
11905 op0 = safe_vector_operand (op0, mode0);
11906 if (VECTOR_MODE_P (mode1))
11907 op1 = safe_vector_operand (op1, mode1);
11908
11909 if (! target
11910 || GET_MODE (target) != tmode
11911 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11912 target = gen_reg_rtx (tmode);
11913
11914 /* In case the insn wants input operands in modes different from
11915 the result, abort. */
11916 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
11917 abort ();
11918
11919 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11920 op0 = copy_to_mode_reg (mode0, op0);
11921 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11922 op1 = copy_to_mode_reg (mode1, op1);
11923
59bef189
RH
11924 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
11925 yet one of the two must not be a memory. This is normally enforced
11926 by expanders, but we didn't bother to create one here. */
11927 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
11928 op0 = copy_to_mode_reg (mode0, op0);
11929
bd793c65
BS
11930 pat = GEN_FCN (icode) (target, op0, op1);
11931 if (! pat)
11932 return 0;
11933 emit_insn (pat);
11934 return target;
11935}
11936
e37af218
RH
11937/* In type_for_mode we restrict the ability to create TImode types
11938 to hosts with 64-bit H_W_I. So we've defined the SSE logicals
11939 to have a V4SFmode signature. Convert them in-place to TImode. */
11940
11941static rtx
11942ix86_expand_timode_binop_builtin (icode, arglist, target)
11943 enum insn_code icode;
11944 tree arglist;
11945 rtx target;
11946{
11947 rtx pat;
11948 tree arg0 = TREE_VALUE (arglist);
11949 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11950 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11951 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11952
11953 op0 = gen_lowpart (TImode, op0);
11954 op1 = gen_lowpart (TImode, op1);
11955 target = gen_reg_rtx (TImode);
11956
11957 if (! (*insn_data[icode].operand[1].predicate) (op0, TImode))
11958 op0 = copy_to_mode_reg (TImode, op0);
11959 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
11960 op1 = copy_to_mode_reg (TImode, op1);
11961
59bef189
RH
11962 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
11963 yet one of the two must not be a memory. This is normally enforced
11964 by expanders, but we didn't bother to create one here. */
11965 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
11966 op0 = copy_to_mode_reg (TImode, op0);
11967
e37af218
RH
11968 pat = GEN_FCN (icode) (target, op0, op1);
11969 if (! pat)
11970 return 0;
11971 emit_insn (pat);
11972
11973 return gen_lowpart (V4SFmode, target);
11974}
11975
bd793c65
BS
11976/* Subroutine of ix86_expand_builtin to take care of stores. */
11977
11978static rtx
e37af218 11979ix86_expand_store_builtin (icode, arglist)
bd793c65
BS
11980 enum insn_code icode;
11981 tree arglist;
bd793c65
BS
11982{
11983 rtx pat;
11984 tree arg0 = TREE_VALUE (arglist);
11985 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11986 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11987 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11988 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
11989 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
11990
11991 if (VECTOR_MODE_P (mode1))
11992 op1 = safe_vector_operand (op1, mode1);
11993
11994 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
59bef189
RH
11995
11996 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11997 op1 = copy_to_mode_reg (mode1, op1);
11998
bd793c65
BS
11999 pat = GEN_FCN (icode) (op0, op1);
12000 if (pat)
12001 emit_insn (pat);
12002 return 0;
12003}
12004
12005/* Subroutine of ix86_expand_builtin to take care of unop insns. */
12006
12007static rtx
12008ix86_expand_unop_builtin (icode, arglist, target, do_load)
12009 enum insn_code icode;
12010 tree arglist;
12011 rtx target;
12012 int do_load;
12013{
12014 rtx pat;
12015 tree arg0 = TREE_VALUE (arglist);
12016 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12017 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12018 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12019
12020 if (! target
12021 || GET_MODE (target) != tmode
12022 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12023 target = gen_reg_rtx (tmode);
12024 if (do_load)
12025 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12026 else
12027 {
12028 if (VECTOR_MODE_P (mode0))
12029 op0 = safe_vector_operand (op0, mode0);
12030
12031 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12032 op0 = copy_to_mode_reg (mode0, op0);
12033 }
12034
12035 pat = GEN_FCN (icode) (target, op0);
12036 if (! pat)
12037 return 0;
12038 emit_insn (pat);
12039 return target;
12040}
12041
12042/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12043 sqrtss, rsqrtss, rcpss. */
12044
12045static rtx
12046ix86_expand_unop1_builtin (icode, arglist, target)
12047 enum insn_code icode;
12048 tree arglist;
12049 rtx target;
12050{
12051 rtx pat;
12052 tree arg0 = TREE_VALUE (arglist);
59bef189 12053 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
bd793c65
BS
12054 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12055 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12056
12057 if (! target
12058 || GET_MODE (target) != tmode
12059 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12060 target = gen_reg_rtx (tmode);
12061
12062 if (VECTOR_MODE_P (mode0))
12063 op0 = safe_vector_operand (op0, mode0);
12064
12065 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12066 op0 = copy_to_mode_reg (mode0, op0);
59bef189
RH
12067
12068 op1 = op0;
12069 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
12070 op1 = copy_to_mode_reg (mode0, op1);
12071
12072 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
12073 if (! pat)
12074 return 0;
12075 emit_insn (pat);
12076 return target;
12077}
12078
12079/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
12080
12081static rtx
12082ix86_expand_sse_compare (d, arglist, target)
8b60264b 12083 const struct builtin_description *d;
bd793c65
BS
12084 tree arglist;
12085 rtx target;
12086{
12087 rtx pat;
12088 tree arg0 = TREE_VALUE (arglist);
12089 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12090 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12091 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12092 rtx op2;
12093 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
12094 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
12095 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
12096 enum rtx_code comparison = d->comparison;
12097
12098 if (VECTOR_MODE_P (mode0))
12099 op0 = safe_vector_operand (op0, mode0);
12100 if (VECTOR_MODE_P (mode1))
12101 op1 = safe_vector_operand (op1, mode1);
12102
12103 /* Swap operands if we have a comparison that isn't available in
12104 hardware. */
12105 if (d->flag)
12106 {
21e1b5f1
BS
12107 rtx tmp = gen_reg_rtx (mode1);
12108 emit_move_insn (tmp, op1);
bd793c65 12109 op1 = op0;
21e1b5f1 12110 op0 = tmp;
bd793c65 12111 }
21e1b5f1
BS
12112
12113 if (! target
12114 || GET_MODE (target) != tmode
12115 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
bd793c65
BS
12116 target = gen_reg_rtx (tmode);
12117
12118 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
12119 op0 = copy_to_mode_reg (mode0, op0);
12120 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
12121 op1 = copy_to_mode_reg (mode1, op1);
12122
12123 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12124 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
12125 if (! pat)
12126 return 0;
12127 emit_insn (pat);
12128 return target;
12129}
12130
12131/* Subroutine of ix86_expand_builtin to take care of comi insns. */
12132
12133static rtx
12134ix86_expand_sse_comi (d, arglist, target)
8b60264b 12135 const struct builtin_description *d;
bd793c65
BS
12136 tree arglist;
12137 rtx target;
12138{
12139 rtx pat;
12140 tree arg0 = TREE_VALUE (arglist);
12141 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12142 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12143 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12144 rtx op2;
12145 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
12146 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
12147 enum rtx_code comparison = d->comparison;
12148
12149 if (VECTOR_MODE_P (mode0))
12150 op0 = safe_vector_operand (op0, mode0);
12151 if (VECTOR_MODE_P (mode1))
12152 op1 = safe_vector_operand (op1, mode1);
12153
12154 /* Swap operands if we have a comparison that isn't available in
12155 hardware. */
12156 if (d->flag)
12157 {
12158 rtx tmp = op1;
12159 op1 = op0;
12160 op0 = tmp;
bd793c65
BS
12161 }
12162
12163 target = gen_reg_rtx (SImode);
12164 emit_move_insn (target, const0_rtx);
12165 target = gen_rtx_SUBREG (QImode, target, 0);
12166
12167 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
12168 op0 = copy_to_mode_reg (mode0, op0);
12169 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
12170 op1 = copy_to_mode_reg (mode1, op1);
12171
12172 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12173 pat = GEN_FCN (d->icode) (op0, op1, op2);
12174 if (! pat)
12175 return 0;
12176 emit_insn (pat);
29628f27
BS
12177 emit_insn (gen_rtx_SET (VOIDmode,
12178 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
12179 gen_rtx_fmt_ee (comparison, QImode,
12180 gen_rtx_REG (CCmode, FLAGS_REG),
12181 const0_rtx)));
bd793c65 12182
6f1a6c5b 12183 return SUBREG_REG (target);
bd793c65
BS
12184}
12185
12186/* Expand an expression EXP that calls a built-in function,
12187 with result going to TARGET if that's convenient
12188 (and in mode MODE if that's convenient).
12189 SUBTARGET may be used as the target for computing one of EXP's operands.
12190 IGNORE is nonzero if the value is to be ignored. */
12191
12192rtx
12193ix86_expand_builtin (exp, target, subtarget, mode, ignore)
12194 tree exp;
12195 rtx target;
12196 rtx subtarget ATTRIBUTE_UNUSED;
12197 enum machine_mode mode ATTRIBUTE_UNUSED;
12198 int ignore ATTRIBUTE_UNUSED;
12199{
8b60264b 12200 const struct builtin_description *d;
77ebd435 12201 size_t i;
bd793c65
BS
12202 enum insn_code icode;
12203 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
12204 tree arglist = TREE_OPERAND (exp, 1);
e37af218 12205 tree arg0, arg1, arg2;
bd793c65
BS
12206 rtx op0, op1, op2, pat;
12207 enum machine_mode tmode, mode0, mode1, mode2;
8752c357 12208 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
bd793c65
BS
12209
12210 switch (fcode)
12211 {
12212 case IX86_BUILTIN_EMMS:
12213 emit_insn (gen_emms ());
12214 return 0;
12215
12216 case IX86_BUILTIN_SFENCE:
12217 emit_insn (gen_sfence ());
12218 return 0;
12219
bd793c65 12220 case IX86_BUILTIN_PEXTRW:
fbe5eb6d
BS
12221 case IX86_BUILTIN_PEXTRW128:
12222 icode = (fcode == IX86_BUILTIN_PEXTRW
12223 ? CODE_FOR_mmx_pextrw
12224 : CODE_FOR_sse2_pextrw);
bd793c65
BS
12225 arg0 = TREE_VALUE (arglist);
12226 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12227 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12228 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12229 tmode = insn_data[icode].operand[0].mode;
12230 mode0 = insn_data[icode].operand[1].mode;
12231 mode1 = insn_data[icode].operand[2].mode;
12232
12233 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12234 op0 = copy_to_mode_reg (mode0, op0);
12235 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12236 {
12237 /* @@@ better error message */
12238 error ("selector must be an immediate");
6f1a6c5b 12239 return gen_reg_rtx (tmode);
bd793c65
BS
12240 }
12241 if (target == 0
12242 || GET_MODE (target) != tmode
12243 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12244 target = gen_reg_rtx (tmode);
12245 pat = GEN_FCN (icode) (target, op0, op1);
12246 if (! pat)
12247 return 0;
12248 emit_insn (pat);
12249 return target;
12250
12251 case IX86_BUILTIN_PINSRW:
fbe5eb6d
BS
12252 case IX86_BUILTIN_PINSRW128:
12253 icode = (fcode == IX86_BUILTIN_PINSRW
12254 ? CODE_FOR_mmx_pinsrw
12255 : CODE_FOR_sse2_pinsrw);
bd793c65
BS
12256 arg0 = TREE_VALUE (arglist);
12257 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12258 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12259 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12260 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12261 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12262 tmode = insn_data[icode].operand[0].mode;
12263 mode0 = insn_data[icode].operand[1].mode;
12264 mode1 = insn_data[icode].operand[2].mode;
12265 mode2 = insn_data[icode].operand[3].mode;
12266
12267 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12268 op0 = copy_to_mode_reg (mode0, op0);
12269 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12270 op1 = copy_to_mode_reg (mode1, op1);
12271 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12272 {
12273 /* @@@ better error message */
12274 error ("selector must be an immediate");
12275 return const0_rtx;
12276 }
12277 if (target == 0
12278 || GET_MODE (target) != tmode
12279 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12280 target = gen_reg_rtx (tmode);
12281 pat = GEN_FCN (icode) (target, op0, op1, op2);
12282 if (! pat)
12283 return 0;
12284 emit_insn (pat);
12285 return target;
12286
12287 case IX86_BUILTIN_MASKMOVQ:
fbe5eb6d
BS
12288 icode = (fcode == IX86_BUILTIN_MASKMOVQ
12289 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
12290 : CODE_FOR_sse2_maskmovdqu);
bd793c65
BS
12291 /* Note the arg order is different from the operand order. */
12292 arg1 = TREE_VALUE (arglist);
12293 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
12294 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12295 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12296 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12297 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12298 mode0 = insn_data[icode].operand[0].mode;
12299 mode1 = insn_data[icode].operand[1].mode;
12300 mode2 = insn_data[icode].operand[2].mode;
12301
5c464583 12302 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
bd793c65
BS
12303 op0 = copy_to_mode_reg (mode0, op0);
12304 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12305 op1 = copy_to_mode_reg (mode1, op1);
12306 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
12307 op2 = copy_to_mode_reg (mode2, op2);
12308 pat = GEN_FCN (icode) (op0, op1, op2);
12309 if (! pat)
12310 return 0;
12311 emit_insn (pat);
12312 return 0;
12313
12314 case IX86_BUILTIN_SQRTSS:
12315 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
12316 case IX86_BUILTIN_RSQRTSS:
12317 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
12318 case IX86_BUILTIN_RCPSS:
12319 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
12320
e37af218
RH
12321 case IX86_BUILTIN_ANDPS:
12322 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3,
12323 arglist, target);
12324 case IX86_BUILTIN_ANDNPS:
12325 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3,
12326 arglist, target);
12327 case IX86_BUILTIN_ORPS:
12328 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3,
12329 arglist, target);
12330 case IX86_BUILTIN_XORPS:
12331 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3,
12332 arglist, target);
12333
bd793c65
BS
12334 case IX86_BUILTIN_LOADAPS:
12335 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
12336
12337 case IX86_BUILTIN_LOADUPS:
12338 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
12339
12340 case IX86_BUILTIN_STOREAPS:
e37af218 12341 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
bd793c65 12342 case IX86_BUILTIN_STOREUPS:
e37af218 12343 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
bd793c65
BS
12344
12345 case IX86_BUILTIN_LOADSS:
12346 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
12347
12348 case IX86_BUILTIN_STORESS:
e37af218 12349 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
bd793c65 12350
0f290768 12351 case IX86_BUILTIN_LOADHPS:
bd793c65 12352 case IX86_BUILTIN_LOADLPS:
fbe5eb6d
BS
12353 case IX86_BUILTIN_LOADHPD:
12354 case IX86_BUILTIN_LOADLPD:
12355 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
12356 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
12357 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
12358 : CODE_FOR_sse2_movlpd);
bd793c65
BS
12359 arg0 = TREE_VALUE (arglist);
12360 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12361 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12362 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12363 tmode = insn_data[icode].operand[0].mode;
12364 mode0 = insn_data[icode].operand[1].mode;
12365 mode1 = insn_data[icode].operand[2].mode;
12366
12367 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12368 op0 = copy_to_mode_reg (mode0, op0);
12369 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
12370 if (target == 0
12371 || GET_MODE (target) != tmode
12372 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12373 target = gen_reg_rtx (tmode);
12374 pat = GEN_FCN (icode) (target, op0, op1);
12375 if (! pat)
12376 return 0;
12377 emit_insn (pat);
12378 return target;
0f290768 12379
bd793c65
BS
12380 case IX86_BUILTIN_STOREHPS:
12381 case IX86_BUILTIN_STORELPS:
fbe5eb6d
BS
12382 case IX86_BUILTIN_STOREHPD:
12383 case IX86_BUILTIN_STORELPD:
12384 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
12385 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
12386 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
12387 : CODE_FOR_sse2_movlpd);
bd793c65
BS
12388 arg0 = TREE_VALUE (arglist);
12389 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12390 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12391 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12392 mode0 = insn_data[icode].operand[1].mode;
12393 mode1 = insn_data[icode].operand[2].mode;
12394
12395 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12396 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12397 op1 = copy_to_mode_reg (mode1, op1);
12398
12399 pat = GEN_FCN (icode) (op0, op0, op1);
12400 if (! pat)
12401 return 0;
12402 emit_insn (pat);
12403 return 0;
12404
12405 case IX86_BUILTIN_MOVNTPS:
e37af218 12406 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
bd793c65 12407 case IX86_BUILTIN_MOVNTQ:
e37af218 12408 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
bd793c65
BS
12409
12410 case IX86_BUILTIN_LDMXCSR:
12411 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
12412 target = assign_386_stack_local (SImode, 0);
12413 emit_move_insn (target, op0);
12414 emit_insn (gen_ldmxcsr (target));
12415 return 0;
12416
12417 case IX86_BUILTIN_STMXCSR:
12418 target = assign_386_stack_local (SImode, 0);
12419 emit_insn (gen_stmxcsr (target));
12420 return copy_to_mode_reg (SImode, target);
12421
bd793c65 12422 case IX86_BUILTIN_SHUFPS:
fbe5eb6d
BS
12423 case IX86_BUILTIN_SHUFPD:
12424 icode = (fcode == IX86_BUILTIN_SHUFPS
12425 ? CODE_FOR_sse_shufps
12426 : CODE_FOR_sse2_shufpd);
bd793c65
BS
12427 arg0 = TREE_VALUE (arglist);
12428 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12429 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12430 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12431 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12432 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12433 tmode = insn_data[icode].operand[0].mode;
12434 mode0 = insn_data[icode].operand[1].mode;
12435 mode1 = insn_data[icode].operand[2].mode;
12436 mode2 = insn_data[icode].operand[3].mode;
12437
12438 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12439 op0 = copy_to_mode_reg (mode0, op0);
12440 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12441 op1 = copy_to_mode_reg (mode1, op1);
12442 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12443 {
12444 /* @@@ better error message */
12445 error ("mask must be an immediate");
6f1a6c5b 12446 return gen_reg_rtx (tmode);
bd793c65
BS
12447 }
12448 if (target == 0
12449 || GET_MODE (target) != tmode
12450 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12451 target = gen_reg_rtx (tmode);
12452 pat = GEN_FCN (icode) (target, op0, op1, op2);
12453 if (! pat)
12454 return 0;
12455 emit_insn (pat);
12456 return target;
12457
12458 case IX86_BUILTIN_PSHUFW:
fbe5eb6d
BS
12459 case IX86_BUILTIN_PSHUFD:
12460 case IX86_BUILTIN_PSHUFHW:
12461 case IX86_BUILTIN_PSHUFLW:
12462 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
12463 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
12464 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
12465 : CODE_FOR_mmx_pshufw);
bd793c65
BS
12466 arg0 = TREE_VALUE (arglist);
12467 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12468 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12469 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12470 tmode = insn_data[icode].operand[0].mode;
29628f27
BS
12471 mode1 = insn_data[icode].operand[1].mode;
12472 mode2 = insn_data[icode].operand[2].mode;
bd793c65 12473
29628f27
BS
12474 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
12475 op0 = copy_to_mode_reg (mode1, op0);
12476 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
bd793c65
BS
12477 {
12478 /* @@@ better error message */
12479 error ("mask must be an immediate");
12480 return const0_rtx;
12481 }
12482 if (target == 0
12483 || GET_MODE (target) != tmode
12484 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12485 target = gen_reg_rtx (tmode);
29628f27 12486 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
12487 if (! pat)
12488 return 0;
12489 emit_insn (pat);
12490 return target;
12491
47f339cf
BS
12492 case IX86_BUILTIN_FEMMS:
12493 emit_insn (gen_femms ());
12494 return NULL_RTX;
12495
12496 case IX86_BUILTIN_PAVGUSB:
12497 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
12498
12499 case IX86_BUILTIN_PF2ID:
12500 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
12501
12502 case IX86_BUILTIN_PFACC:
12503 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
12504
12505 case IX86_BUILTIN_PFADD:
12506 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
12507
12508 case IX86_BUILTIN_PFCMPEQ:
12509 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
12510
12511 case IX86_BUILTIN_PFCMPGE:
12512 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
12513
12514 case IX86_BUILTIN_PFCMPGT:
12515 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
12516
12517 case IX86_BUILTIN_PFMAX:
12518 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
12519
12520 case IX86_BUILTIN_PFMIN:
12521 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
12522
12523 case IX86_BUILTIN_PFMUL:
12524 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
12525
12526 case IX86_BUILTIN_PFRCP:
12527 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
12528
12529 case IX86_BUILTIN_PFRCPIT1:
12530 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
12531
12532 case IX86_BUILTIN_PFRCPIT2:
12533 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
12534
12535 case IX86_BUILTIN_PFRSQIT1:
12536 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
12537
12538 case IX86_BUILTIN_PFRSQRT:
12539 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
12540
12541 case IX86_BUILTIN_PFSUB:
12542 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
12543
12544 case IX86_BUILTIN_PFSUBR:
12545 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
12546
12547 case IX86_BUILTIN_PI2FD:
12548 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
12549
12550 case IX86_BUILTIN_PMULHRW:
12551 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
12552
47f339cf
BS
12553 case IX86_BUILTIN_PF2IW:
12554 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
12555
12556 case IX86_BUILTIN_PFNACC:
12557 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
12558
12559 case IX86_BUILTIN_PFPNACC:
12560 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
12561
12562 case IX86_BUILTIN_PI2FW:
12563 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
12564
12565 case IX86_BUILTIN_PSWAPDSI:
12566 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
12567
12568 case IX86_BUILTIN_PSWAPDSF:
12569 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
12570
e37af218
RH
12571 case IX86_BUILTIN_SSE_ZERO:
12572 target = gen_reg_rtx (V4SFmode);
12573 emit_insn (gen_sse_clrv4sf (target));
bd793c65
BS
12574 return target;
12575
bd793c65
BS
12576 case IX86_BUILTIN_MMX_ZERO:
12577 target = gen_reg_rtx (DImode);
12578 emit_insn (gen_mmx_clrdi (target));
12579 return target;
12580
fbe5eb6d
BS
12581 case IX86_BUILTIN_SQRTSD:
12582 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
12583 case IX86_BUILTIN_LOADAPD:
12584 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
12585 case IX86_BUILTIN_LOADUPD:
12586 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
12587
12588 case IX86_BUILTIN_STOREAPD:
12589 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
12590 case IX86_BUILTIN_STOREUPD:
12591 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
12592
12593 case IX86_BUILTIN_LOADSD:
12594 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
12595
12596 case IX86_BUILTIN_STORESD:
12597 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
12598
12599 case IX86_BUILTIN_SETPD1:
12600 target = assign_386_stack_local (DFmode, 0);
12601 arg0 = TREE_VALUE (arglist);
12602 emit_move_insn (adjust_address (target, DFmode, 0),
12603 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
12604 op0 = gen_reg_rtx (V2DFmode);
12605 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
12606 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
12607 return op0;
12608
12609 case IX86_BUILTIN_SETPD:
12610 target = assign_386_stack_local (V2DFmode, 0);
12611 arg0 = TREE_VALUE (arglist);
12612 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12613 emit_move_insn (adjust_address (target, DFmode, 0),
12614 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
12615 emit_move_insn (adjust_address (target, DFmode, 8),
12616 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
12617 op0 = gen_reg_rtx (V2DFmode);
12618 emit_insn (gen_sse2_movapd (op0, target));
12619 return op0;
12620
12621 case IX86_BUILTIN_LOADRPD:
12622 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
12623 gen_reg_rtx (V2DFmode), 1);
12624 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
12625 return target;
12626
12627 case IX86_BUILTIN_LOADPD1:
12628 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
12629 gen_reg_rtx (V2DFmode), 1);
12630 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
12631 return target;
12632
12633 case IX86_BUILTIN_STOREPD1:
12634 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
12635 case IX86_BUILTIN_STORERPD:
12636 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
12637
12638 case IX86_BUILTIN_MFENCE:
12639 emit_insn (gen_sse2_mfence ());
12640 return 0;
12641 case IX86_BUILTIN_LFENCE:
12642 emit_insn (gen_sse2_lfence ());
12643 return 0;
12644
12645 case IX86_BUILTIN_CLFLUSH:
12646 arg0 = TREE_VALUE (arglist);
12647 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12648 icode = CODE_FOR_sse2_clflush;
12649 mode0 = insn_data[icode].operand[0].mode;
12650 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
12651 op0 = copy_to_mode_reg (mode0, op0);
12652
12653 emit_insn (gen_sse2_clflush (op0));
12654 return 0;
12655
12656 case IX86_BUILTIN_MOVNTPD:
12657 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
12658 case IX86_BUILTIN_MOVNTDQ:
916b60b7 12659 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
fbe5eb6d
BS
12660 case IX86_BUILTIN_MOVNTI:
12661 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
12662
bd793c65
BS
12663 default:
12664 break;
12665 }
12666
ca7558fc 12667 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
12668 if (d->code == fcode)
12669 {
12670 /* Compares are treated specially. */
12671 if (d->icode == CODE_FOR_maskcmpv4sf3
12672 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12673 || d->icode == CODE_FOR_maskncmpv4sf3
fbe5eb6d
BS
12674 || d->icode == CODE_FOR_vmmaskncmpv4sf3
12675 || d->icode == CODE_FOR_maskcmpv2df3
12676 || d->icode == CODE_FOR_vmmaskcmpv2df3
12677 || d->icode == CODE_FOR_maskncmpv2df3
12678 || d->icode == CODE_FOR_vmmaskncmpv2df3)
bd793c65
BS
12679 return ix86_expand_sse_compare (d, arglist, target);
12680
12681 return ix86_expand_binop_builtin (d->icode, arglist, target);
12682 }
12683
ca7558fc 12684 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
bd793c65
BS
12685 if (d->code == fcode)
12686 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
0f290768 12687
ca7558fc 12688 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
bd793c65
BS
12689 if (d->code == fcode)
12690 return ix86_expand_sse_comi (d, arglist, target);
0f290768 12691
bd793c65
BS
12692 /* @@@ Should really do something sensible here. */
12693 return 0;
bd793c65 12694}
4211a8fb
JH
12695
12696/* Store OPERAND to the memory after reload is completed. This means
f710504c 12697 that we can't easily use assign_stack_local. */
4211a8fb
JH
12698rtx
12699ix86_force_to_memory (mode, operand)
12700 enum machine_mode mode;
12701 rtx operand;
12702{
898d374d 12703 rtx result;
4211a8fb
JH
12704 if (!reload_completed)
12705 abort ();
898d374d
JH
12706 if (TARGET_64BIT && TARGET_RED_ZONE)
12707 {
12708 result = gen_rtx_MEM (mode,
12709 gen_rtx_PLUS (Pmode,
12710 stack_pointer_rtx,
12711 GEN_INT (-RED_ZONE_SIZE)));
12712 emit_move_insn (result, operand);
12713 }
12714 else if (TARGET_64BIT && !TARGET_RED_ZONE)
4211a8fb 12715 {
898d374d 12716 switch (mode)
4211a8fb 12717 {
898d374d
JH
12718 case HImode:
12719 case SImode:
12720 operand = gen_lowpart (DImode, operand);
12721 /* FALLTHRU */
12722 case DImode:
4211a8fb 12723 emit_insn (
898d374d
JH
12724 gen_rtx_SET (VOIDmode,
12725 gen_rtx_MEM (DImode,
12726 gen_rtx_PRE_DEC (DImode,
12727 stack_pointer_rtx)),
12728 operand));
12729 break;
12730 default:
12731 abort ();
12732 }
12733 result = gen_rtx_MEM (mode, stack_pointer_rtx);
12734 }
12735 else
12736 {
12737 switch (mode)
12738 {
12739 case DImode:
12740 {
12741 rtx operands[2];
12742 split_di (&operand, 1, operands, operands + 1);
12743 emit_insn (
12744 gen_rtx_SET (VOIDmode,
12745 gen_rtx_MEM (SImode,
12746 gen_rtx_PRE_DEC (Pmode,
12747 stack_pointer_rtx)),
12748 operands[1]));
12749 emit_insn (
12750 gen_rtx_SET (VOIDmode,
12751 gen_rtx_MEM (SImode,
12752 gen_rtx_PRE_DEC (Pmode,
12753 stack_pointer_rtx)),
12754 operands[0]));
12755 }
12756 break;
12757 case HImode:
12758 /* It is better to store HImodes as SImodes. */
12759 if (!TARGET_PARTIAL_REG_STALL)
12760 operand = gen_lowpart (SImode, operand);
12761 /* FALLTHRU */
12762 case SImode:
4211a8fb 12763 emit_insn (
898d374d
JH
12764 gen_rtx_SET (VOIDmode,
12765 gen_rtx_MEM (GET_MODE (operand),
12766 gen_rtx_PRE_DEC (SImode,
12767 stack_pointer_rtx)),
12768 operand));
12769 break;
12770 default:
12771 abort ();
4211a8fb 12772 }
898d374d 12773 result = gen_rtx_MEM (mode, stack_pointer_rtx);
4211a8fb 12774 }
898d374d 12775 return result;
4211a8fb
JH
12776}
12777
12778/* Free operand from the memory. */
12779void
12780ix86_free_from_memory (mode)
12781 enum machine_mode mode;
12782{
898d374d
JH
12783 if (!TARGET_64BIT || !TARGET_RED_ZONE)
12784 {
12785 int size;
12786
12787 if (mode == DImode || TARGET_64BIT)
12788 size = 8;
12789 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
12790 size = 2;
12791 else
12792 size = 4;
12793 /* Use LEA to deallocate stack space. In peephole2 it will be converted
12794 to pop or add instruction if registers are available. */
12795 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12796 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12797 GEN_INT (size))));
12798 }
4211a8fb 12799}
a946dd00 12800
f84aa48a
JH
12801/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
12802 QImode must go into class Q_REGS.
12803 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
892a2d68 12804 movdf to do mem-to-mem moves through integer regs. */
f84aa48a
JH
12805enum reg_class
12806ix86_preferred_reload_class (x, class)
12807 rtx x;
12808 enum reg_class class;
12809{
12810 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
12811 {
12812 /* SSE can't load any constant directly yet. */
12813 if (SSE_CLASS_P (class))
12814 return NO_REGS;
12815 /* Floats can load 0 and 1. */
12816 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
12817 {
12818 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
12819 if (MAYBE_SSE_CLASS_P (class))
12820 return (reg_class_subset_p (class, GENERAL_REGS)
12821 ? GENERAL_REGS : FLOAT_REGS);
12822 else
12823 return class;
12824 }
12825 /* General regs can load everything. */
12826 if (reg_class_subset_p (class, GENERAL_REGS))
12827 return GENERAL_REGS;
12828 /* In case we haven't resolved FLOAT or SSE yet, give up. */
12829 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
12830 return NO_REGS;
12831 }
12832 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
12833 return NO_REGS;
12834 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
12835 return Q_REGS;
12836 return class;
12837}
12838
12839/* If we are copying between general and FP registers, we need a memory
12840 location. The same is true for SSE and MMX registers.
12841
12842 The macro can't work reliably when one of the CLASSES is class containing
12843 registers from multiple units (SSE, MMX, integer). We avoid this by never
12844 combining those units in single alternative in the machine description.
12845 Ensure that this constraint holds to avoid unexpected surprises.
12846
12847 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
12848 enforce these sanity checks. */
12849int
12850ix86_secondary_memory_needed (class1, class2, mode, strict)
12851 enum reg_class class1, class2;
12852 enum machine_mode mode;
12853 int strict;
12854{
12855 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
12856 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
12857 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
12858 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
12859 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
12860 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
12861 {
12862 if (strict)
12863 abort ();
12864 else
12865 return 1;
12866 }
12867 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
12868 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
12869 && (mode) != SImode)
12870 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
12871 && (mode) != SImode));
12872}
12873/* Return the cost of moving data from a register in class CLASS1 to
a4f31c00 12874 one in class CLASS2.
f84aa48a
JH
12875
12876 It is not required that the cost always equal 2 when FROM is the same as TO;
12877 on some machines it is expensive to move between registers if they are not
12878 general registers. */
12879int
12880ix86_register_move_cost (mode, class1, class2)
12881 enum machine_mode mode;
12882 enum reg_class class1, class2;
12883{
12884 /* In case we require secondary memory, compute cost of the store followed
12885 by load. In case of copying from general_purpose_register we may emit
12886 multiple stores followed by single load causing memory size mismatch
12887 stall. Count this as arbitarily high cost of 20. */
12888 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
12889 {
92d0fb09 12890 int add_cost = 0;
62415523 12891 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
92d0fb09 12892 add_cost = 20;
62415523 12893 return (MEMORY_MOVE_COST (mode, class1, 0)
92d0fb09 12894 + MEMORY_MOVE_COST (mode, class2, 1) + add_cost);
f84aa48a 12895 }
92d0fb09 12896 /* Moves between SSE/MMX and integer unit are expensive. */
62415523
JH
12897 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
12898 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
fa79946e
JH
12899 return ix86_cost->mmxsse_to_integer;
12900 if (MAYBE_FLOAT_CLASS_P (class1))
12901 return ix86_cost->fp_move;
12902 if (MAYBE_SSE_CLASS_P (class1))
12903 return ix86_cost->sse_move;
12904 if (MAYBE_MMX_CLASS_P (class1))
12905 return ix86_cost->mmx_move;
f84aa48a
JH
12906 return 2;
12907}
12908
a946dd00
JH
12909/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
12910int
12911ix86_hard_regno_mode_ok (regno, mode)
12912 int regno;
12913 enum machine_mode mode;
12914{
12915 /* Flags and only flags can only hold CCmode values. */
12916 if (CC_REGNO_P (regno))
12917 return GET_MODE_CLASS (mode) == MODE_CC;
12918 if (GET_MODE_CLASS (mode) == MODE_CC
12919 || GET_MODE_CLASS (mode) == MODE_RANDOM
12920 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
12921 return 0;
12922 if (FP_REGNO_P (regno))
12923 return VALID_FP_MODE_P (mode);
12924 if (SSE_REGNO_P (regno))
12925 return VALID_SSE_REG_MODE (mode);
12926 if (MMX_REGNO_P (regno))
47f339cf 12927 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
a946dd00
JH
12928 /* We handle both integer and floats in the general purpose registers.
12929 In future we should be able to handle vector modes as well. */
12930 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
12931 return 0;
12932 /* Take care for QImode values - they can be in non-QI regs, but then
12933 they do cause partial register stalls. */
d2836273 12934 if (regno < 4 || mode != QImode || TARGET_64BIT)
a946dd00
JH
12935 return 1;
12936 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
12937}
fa79946e
JH
12938
12939/* Return the cost of moving data of mode M between a
12940 register and memory. A value of 2 is the default; this cost is
12941 relative to those in `REGISTER_MOVE_COST'.
12942
12943 If moving between registers and memory is more expensive than
12944 between two registers, you should define this macro to express the
a4f31c00
AJ
12945 relative cost.
12946
fa79946e
JH
12947 Model also increased moving costs of QImode registers in non
12948 Q_REGS classes.
12949 */
12950int
12951ix86_memory_move_cost (mode, class, in)
12952 enum machine_mode mode;
12953 enum reg_class class;
12954 int in;
12955{
12956 if (FLOAT_CLASS_P (class))
12957 {
12958 int index;
12959 switch (mode)
12960 {
12961 case SFmode:
12962 index = 0;
12963 break;
12964 case DFmode:
12965 index = 1;
12966 break;
12967 case XFmode:
12968 case TFmode:
12969 index = 2;
12970 break;
12971 default:
12972 return 100;
12973 }
12974 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
12975 }
12976 if (SSE_CLASS_P (class))
12977 {
12978 int index;
12979 switch (GET_MODE_SIZE (mode))
12980 {
12981 case 4:
12982 index = 0;
12983 break;
12984 case 8:
12985 index = 1;
12986 break;
12987 case 16:
12988 index = 2;
12989 break;
12990 default:
12991 return 100;
12992 }
12993 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
12994 }
12995 if (MMX_CLASS_P (class))
12996 {
12997 int index;
12998 switch (GET_MODE_SIZE (mode))
12999 {
13000 case 4:
13001 index = 0;
13002 break;
13003 case 8:
13004 index = 1;
13005 break;
13006 default:
13007 return 100;
13008 }
13009 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
13010 }
13011 switch (GET_MODE_SIZE (mode))
13012 {
13013 case 1:
13014 if (in)
13015 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
13016 : ix86_cost->movzbl_load);
13017 else
13018 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
13019 : ix86_cost->int_store[0] + 4);
13020 break;
13021 case 2:
13022 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
13023 default:
13024 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
13025 if (mode == TFmode)
13026 mode = XFmode;
3bb7e126 13027 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
fa79946e
JH
13028 * (int) GET_MODE_SIZE (mode) / 4);
13029 }
13030}
0ecf09f9 13031
2cc07db4
RH
13032#ifdef DO_GLOBAL_CTORS_BODY
13033static void
13034ix86_svr3_asm_out_constructor (symbol, priority)
13035 rtx symbol;
13036 int priority ATTRIBUTE_UNUSED;
13037{
13038 init_section ();
13039 fputs ("\tpushl $", asm_out_file);
13040 assemble_name (asm_out_file, XSTR (symbol, 0));
13041 fputc ('\n', asm_out_file);
13042}
13043#endif
162f023b
JH
13044
13045/* Order the registers for register allocator. */
13046
13047void
13048x86_order_regs_for_local_alloc ()
13049{
13050 int pos = 0;
13051 int i;
13052
13053 /* First allocate the local general purpose registers. */
13054 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13055 if (GENERAL_REGNO_P (i) && call_used_regs[i])
13056 reg_alloc_order [pos++] = i;
13057
13058 /* Global general purpose registers. */
13059 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13060 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
13061 reg_alloc_order [pos++] = i;
13062
13063 /* x87 registers come first in case we are doing FP math
13064 using them. */
13065 if (!TARGET_SSE_MATH)
13066 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13067 reg_alloc_order [pos++] = i;
13068
13069 /* SSE registers. */
13070 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
13071 reg_alloc_order [pos++] = i;
13072 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
13073 reg_alloc_order [pos++] = i;
13074
13075 /* x87 registerts. */
13076 if (TARGET_SSE_MATH)
13077 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13078 reg_alloc_order [pos++] = i;
13079
13080 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
13081 reg_alloc_order [pos++] = i;
13082
13083 /* Initialize the rest of array as we do not allocate some registers
13084 at all. */
13085 while (pos < FIRST_PSEUDO_REGISTER)
13086 reg_alloc_order [pos++] = 0;
13087}
This page took 3.188753 seconds and 5 git commands to generate.