]> gcc.gnu.org Git - gcc.git/blame - gcc/config/i386/i386.c
Daily bump.
[gcc.git] / gcc / config / i386 / i386.c
CommitLineData
e075ae69 1/* Subroutines used for code generation on IA-32.
07933f72 2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
36210500 3 2002, 2003 Free Software Foundation, Inc.
2a2ab3f9 4
188fc5b5 5This file is part of GCC.
2a2ab3f9 6
188fc5b5 7GCC is free software; you can redistribute it and/or modify
2a2ab3f9
JVA
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
188fc5b5 12GCC is distributed in the hope that it will be useful,
2a2ab3f9
JVA
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
188fc5b5 18along with GCC; see the file COPYING. If not, write to
97aadbb9 19the Free Software Foundation, 59 Temple Place - Suite 330,
0f290768 20Boston, MA 02111-1307, USA. */
2a2ab3f9 21
2a2ab3f9 22#include "config.h"
bb5177ac 23#include "system.h"
4977bab6
ZW
24#include "coretypes.h"
25#include "tm.h"
2a2ab3f9 26#include "rtl.h"
6baf1cc8
BS
27#include "tree.h"
28#include "tm_p.h"
2a2ab3f9
JVA
29#include "regs.h"
30#include "hard-reg-set.h"
31#include "real.h"
32#include "insn-config.h"
33#include "conditions.h"
2a2ab3f9
JVA
34#include "output.h"
35#include "insn-attr.h"
2a2ab3f9 36#include "flags.h"
a8ffcc81 37#include "except.h"
ecbc4695 38#include "function.h"
00c79232 39#include "recog.h"
ced8dd8c 40#include "expr.h"
e78d8e51 41#include "optabs.h"
f103890b 42#include "toplev.h"
e075ae69 43#include "basic-block.h"
1526a060 44#include "ggc.h"
672a6f42
NB
45#include "target.h"
46#include "target-def.h"
f1e639b1 47#include "langhooks.h"
dafc5b82 48#include "cgraph.h"
2a2ab3f9 49
8dfe5673 50#ifndef CHECK_STACK_LIMIT
07933f72 51#define CHECK_STACK_LIMIT (-1)
8dfe5673
RK
52#endif
53
3c50106f
RH
54/* Return index of given mode in mult and division cost tables. */
55#define MODE_INDEX(mode) \
56 ((mode) == QImode ? 0 \
57 : (mode) == HImode ? 1 \
58 : (mode) == SImode ? 2 \
59 : (mode) == DImode ? 3 \
60 : 4)
61
2ab0437e 62/* Processor costs (relative to an add) */
fce5a9f2 63static const
2ab0437e
JH
64struct processor_costs size_cost = { /* costs for tunning for size */
65 2, /* cost of an add instruction */
66 3, /* cost of a lea instruction */
67 2, /* variable shift costs */
68 3, /* constant shift costs */
4977bab6 69 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
2ab0437e 70 0, /* cost of multiply per each bit set */
4977bab6 71 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
44cf5b6a
JH
72 3, /* cost of movsx */
73 3, /* cost of movzx */
2ab0437e
JH
74 0, /* "large" insn */
75 2, /* MOVE_RATIO */
76 2, /* cost for loading QImode using movzbl */
77 {2, 2, 2}, /* cost of loading integer registers
78 in QImode, HImode and SImode.
79 Relative to reg-reg move (2). */
80 {2, 2, 2}, /* cost of storing integer registers */
81 2, /* cost of reg,reg fld/fst */
82 {2, 2, 2}, /* cost of loading fp registers
83 in SFmode, DFmode and XFmode */
84 {2, 2, 2}, /* cost of loading integer registers */
85 3, /* cost of moving MMX register */
86 {3, 3}, /* cost of loading MMX registers
87 in SImode and DImode */
88 {3, 3}, /* cost of storing MMX registers
89 in SImode and DImode */
90 3, /* cost of moving SSE register */
91 {3, 3, 3}, /* cost of loading SSE registers
92 in SImode, DImode and TImode */
93 {3, 3, 3}, /* cost of storing SSE registers
94 in SImode, DImode and TImode */
95 3, /* MMX or SSE register to integer */
f4365627
JH
96 0, /* size of prefetch block */
97 0, /* number of parallel prefetches */
4977bab6 98 1, /* Branch cost */
229b303a
RS
99 2, /* cost of FADD and FSUB insns. */
100 2, /* cost of FMUL instruction. */
101 2, /* cost of FDIV instruction. */
102 2, /* cost of FABS instruction. */
103 2, /* cost of FCHS instruction. */
104 2, /* cost of FSQRT instruction. */
2ab0437e 105};
229b303a 106
32b5b1aa 107/* Processor costs (relative to an add) */
fce5a9f2 108static const
32b5b1aa 109struct processor_costs i386_cost = { /* 386 specific costs */
e9a25f70 110 1, /* cost of an add instruction */
32b5b1aa
SC
111 1, /* cost of a lea instruction */
112 3, /* variable shift costs */
113 2, /* constant shift costs */
4977bab6 114 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
32b5b1aa 115 1, /* cost of multiply per each bit set */
4977bab6 116 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
44cf5b6a
JH
117 3, /* cost of movsx */
118 2, /* cost of movzx */
96e7ae40 119 15, /* "large" insn */
e2e52e1b 120 3, /* MOVE_RATIO */
7c6b971d 121 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
122 {2, 4, 2}, /* cost of loading integer registers
123 in QImode, HImode and SImode.
0f290768 124 Relative to reg-reg move (2). */
96e7ae40
JH
125 {2, 4, 2}, /* cost of storing integer registers */
126 2, /* cost of reg,reg fld/fst */
127 {8, 8, 8}, /* cost of loading fp registers
128 in SFmode, DFmode and XFmode */
fa79946e
JH
129 {8, 8, 8}, /* cost of loading integer registers */
130 2, /* cost of moving MMX register */
131 {4, 8}, /* cost of loading MMX registers
132 in SImode and DImode */
133 {4, 8}, /* cost of storing MMX registers
134 in SImode and DImode */
135 2, /* cost of moving SSE register */
136 {4, 8, 16}, /* cost of loading SSE registers
137 in SImode, DImode and TImode */
138 {4, 8, 16}, /* cost of storing SSE registers
139 in SImode, DImode and TImode */
140 3, /* MMX or SSE register to integer */
f4365627
JH
141 0, /* size of prefetch block */
142 0, /* number of parallel prefetches */
4977bab6 143 1, /* Branch cost */
229b303a
RS
144 23, /* cost of FADD and FSUB insns. */
145 27, /* cost of FMUL instruction. */
146 88, /* cost of FDIV instruction. */
147 22, /* cost of FABS instruction. */
148 24, /* cost of FCHS instruction. */
149 122, /* cost of FSQRT instruction. */
32b5b1aa
SC
150};
151
fce5a9f2 152static const
32b5b1aa
SC
153struct processor_costs i486_cost = { /* 486 specific costs */
154 1, /* cost of an add instruction */
155 1, /* cost of a lea instruction */
156 3, /* variable shift costs */
157 2, /* constant shift costs */
4977bab6 158 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
32b5b1aa 159 1, /* cost of multiply per each bit set */
4977bab6 160 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
44cf5b6a
JH
161 3, /* cost of movsx */
162 2, /* cost of movzx */
96e7ae40 163 15, /* "large" insn */
e2e52e1b 164 3, /* MOVE_RATIO */
7c6b971d 165 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
166 {2, 4, 2}, /* cost of loading integer registers
167 in QImode, HImode and SImode.
0f290768 168 Relative to reg-reg move (2). */
96e7ae40
JH
169 {2, 4, 2}, /* cost of storing integer registers */
170 2, /* cost of reg,reg fld/fst */
171 {8, 8, 8}, /* cost of loading fp registers
172 in SFmode, DFmode and XFmode */
fa79946e
JH
173 {8, 8, 8}, /* cost of loading integer registers */
174 2, /* cost of moving MMX register */
175 {4, 8}, /* cost of loading MMX registers
176 in SImode and DImode */
177 {4, 8}, /* cost of storing MMX registers
178 in SImode and DImode */
179 2, /* cost of moving SSE register */
180 {4, 8, 16}, /* cost of loading SSE registers
181 in SImode, DImode and TImode */
182 {4, 8, 16}, /* cost of storing SSE registers
183 in SImode, DImode and TImode */
f4365627
JH
184 3, /* MMX or SSE register to integer */
185 0, /* size of prefetch block */
186 0, /* number of parallel prefetches */
4977bab6 187 1, /* Branch cost */
229b303a
RS
188 8, /* cost of FADD and FSUB insns. */
189 16, /* cost of FMUL instruction. */
190 73, /* cost of FDIV instruction. */
191 3, /* cost of FABS instruction. */
192 3, /* cost of FCHS instruction. */
193 83, /* cost of FSQRT instruction. */
32b5b1aa
SC
194};
195
fce5a9f2 196static const
e5cb57e8 197struct processor_costs pentium_cost = {
32b5b1aa
SC
198 1, /* cost of an add instruction */
199 1, /* cost of a lea instruction */
856b07a1 200 4, /* variable shift costs */
e5cb57e8 201 1, /* constant shift costs */
4977bab6 202 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
856b07a1 203 0, /* cost of multiply per each bit set */
4977bab6 204 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
44cf5b6a
JH
205 3, /* cost of movsx */
206 2, /* cost of movzx */
96e7ae40 207 8, /* "large" insn */
e2e52e1b 208 6, /* MOVE_RATIO */
7c6b971d 209 6, /* cost for loading QImode using movzbl */
96e7ae40
JH
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
0f290768 212 Relative to reg-reg move (2). */
96e7ae40
JH
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {2, 2, 6}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
fa79946e
JH
217 {4, 4, 6}, /* cost of loading integer registers */
218 8, /* cost of moving MMX register */
219 {8, 8}, /* cost of loading MMX registers
220 in SImode and DImode */
221 {8, 8}, /* cost of storing MMX registers
222 in SImode and DImode */
223 2, /* cost of moving SSE register */
224 {4, 8, 16}, /* cost of loading SSE registers
225 in SImode, DImode and TImode */
226 {4, 8, 16}, /* cost of storing SSE registers
227 in SImode, DImode and TImode */
f4365627
JH
228 3, /* MMX or SSE register to integer */
229 0, /* size of prefetch block */
230 0, /* number of parallel prefetches */
4977bab6 231 2, /* Branch cost */
229b303a
RS
232 3, /* cost of FADD and FSUB insns. */
233 3, /* cost of FMUL instruction. */
234 39, /* cost of FDIV instruction. */
235 1, /* cost of FABS instruction. */
236 1, /* cost of FCHS instruction. */
237 70, /* cost of FSQRT instruction. */
32b5b1aa
SC
238};
239
fce5a9f2 240static const
856b07a1
SC
241struct processor_costs pentiumpro_cost = {
242 1, /* cost of an add instruction */
243 1, /* cost of a lea instruction */
e075ae69 244 1, /* variable shift costs */
856b07a1 245 1, /* constant shift costs */
4977bab6 246 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
856b07a1 247 0, /* cost of multiply per each bit set */
4977bab6 248 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
44cf5b6a
JH
249 1, /* cost of movsx */
250 1, /* cost of movzx */
96e7ae40 251 8, /* "large" insn */
e2e52e1b 252 6, /* MOVE_RATIO */
7c6b971d 253 2, /* cost for loading QImode using movzbl */
96e7ae40
JH
254 {4, 4, 4}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
0f290768 256 Relative to reg-reg move (2). */
96e7ae40
JH
257 {2, 2, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
fa79946e
JH
261 {4, 4, 6}, /* cost of loading integer registers */
262 2, /* cost of moving MMX register */
263 {2, 2}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {2, 2}, /* cost of storing MMX registers
266 in SImode and DImode */
267 2, /* cost of moving SSE register */
268 {2, 2, 8}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {2, 2, 8}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
f4365627
JH
272 3, /* MMX or SSE register to integer */
273 32, /* size of prefetch block */
274 6, /* number of parallel prefetches */
4977bab6 275 2, /* Branch cost */
229b303a
RS
276 3, /* cost of FADD and FSUB insns. */
277 5, /* cost of FMUL instruction. */
278 56, /* cost of FDIV instruction. */
279 2, /* cost of FABS instruction. */
280 2, /* cost of FCHS instruction. */
281 56, /* cost of FSQRT instruction. */
856b07a1
SC
282};
283
fce5a9f2 284static const
a269a03c
JC
285struct processor_costs k6_cost = {
286 1, /* cost of an add instruction */
e075ae69 287 2, /* cost of a lea instruction */
a269a03c
JC
288 1, /* variable shift costs */
289 1, /* constant shift costs */
4977bab6 290 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
a269a03c 291 0, /* cost of multiply per each bit set */
4977bab6 292 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
44cf5b6a
JH
293 2, /* cost of movsx */
294 2, /* cost of movzx */
96e7ae40 295 8, /* "large" insn */
e2e52e1b 296 4, /* MOVE_RATIO */
7c6b971d 297 3, /* cost for loading QImode using movzbl */
96e7ae40
JH
298 {4, 5, 4}, /* cost of loading integer registers
299 in QImode, HImode and SImode.
0f290768 300 Relative to reg-reg move (2). */
96e7ae40
JH
301 {2, 3, 2}, /* cost of storing integer registers */
302 4, /* cost of reg,reg fld/fst */
303 {6, 6, 6}, /* cost of loading fp registers
304 in SFmode, DFmode and XFmode */
fa79946e
JH
305 {4, 4, 4}, /* cost of loading integer registers */
306 2, /* cost of moving MMX register */
307 {2, 2}, /* cost of loading MMX registers
308 in SImode and DImode */
309 {2, 2}, /* cost of storing MMX registers
310 in SImode and DImode */
311 2, /* cost of moving SSE register */
312 {2, 2, 8}, /* cost of loading SSE registers
313 in SImode, DImode and TImode */
314 {2, 2, 8}, /* cost of storing SSE registers
315 in SImode, DImode and TImode */
f4365627
JH
316 6, /* MMX or SSE register to integer */
317 32, /* size of prefetch block */
318 1, /* number of parallel prefetches */
4977bab6 319 1, /* Branch cost */
229b303a
RS
320 2, /* cost of FADD and FSUB insns. */
321 2, /* cost of FMUL instruction. */
4f770e7b
RS
322 56, /* cost of FDIV instruction. */
323 2, /* cost of FABS instruction. */
229b303a
RS
324 2, /* cost of FCHS instruction. */
325 56, /* cost of FSQRT instruction. */
a269a03c
JC
326};
327
fce5a9f2 328static const
309ada50
JH
329struct processor_costs athlon_cost = {
330 1, /* cost of an add instruction */
0b5107cf 331 2, /* cost of a lea instruction */
309ada50
JH
332 1, /* variable shift costs */
333 1, /* constant shift costs */
4977bab6 334 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
309ada50 335 0, /* cost of multiply per each bit set */
4977bab6 336 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
44cf5b6a
JH
337 1, /* cost of movsx */
338 1, /* cost of movzx */
309ada50 339 8, /* "large" insn */
e2e52e1b 340 9, /* MOVE_RATIO */
309ada50 341 4, /* cost for loading QImode using movzbl */
b72b1c29 342 {3, 4, 3}, /* cost of loading integer registers
309ada50 343 in QImode, HImode and SImode.
0f290768 344 Relative to reg-reg move (2). */
b72b1c29 345 {3, 4, 3}, /* cost of storing integer registers */
309ada50 346 4, /* cost of reg,reg fld/fst */
b72b1c29 347 {4, 4, 12}, /* cost of loading fp registers
309ada50 348 in SFmode, DFmode and XFmode */
b72b1c29 349 {6, 6, 8}, /* cost of loading integer registers */
fa79946e 350 2, /* cost of moving MMX register */
b72b1c29 351 {4, 4}, /* cost of loading MMX registers
fa79946e 352 in SImode and DImode */
b72b1c29 353 {4, 4}, /* cost of storing MMX registers
fa79946e
JH
354 in SImode and DImode */
355 2, /* cost of moving SSE register */
b72b1c29 356 {4, 4, 6}, /* cost of loading SSE registers
fa79946e 357 in SImode, DImode and TImode */
b72b1c29 358 {4, 4, 5}, /* cost of storing SSE registers
fa79946e 359 in SImode, DImode and TImode */
b72b1c29 360 5, /* MMX or SSE register to integer */
f4365627
JH
361 64, /* size of prefetch block */
362 6, /* number of parallel prefetches */
4977bab6 363 2, /* Branch cost */
229b303a
RS
364 4, /* cost of FADD and FSUB insns. */
365 4, /* cost of FMUL instruction. */
366 24, /* cost of FDIV instruction. */
367 2, /* cost of FABS instruction. */
368 2, /* cost of FCHS instruction. */
369 35, /* cost of FSQRT instruction. */
309ada50
JH
370};
371
4977bab6
ZW
372static const
373struct processor_costs k8_cost = {
374 1, /* cost of an add instruction */
375 2, /* cost of a lea instruction */
376 1, /* variable shift costs */
377 1, /* constant shift costs */
378 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
379 0, /* cost of multiply per each bit set */
380 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
381 1, /* cost of movsx */
382 1, /* cost of movzx */
383 8, /* "large" insn */
384 9, /* MOVE_RATIO */
385 4, /* cost for loading QImode using movzbl */
386 {3, 4, 3}, /* cost of loading integer registers
387 in QImode, HImode and SImode.
388 Relative to reg-reg move (2). */
389 {3, 4, 3}, /* cost of storing integer registers */
390 4, /* cost of reg,reg fld/fst */
391 {4, 4, 12}, /* cost of loading fp registers
392 in SFmode, DFmode and XFmode */
393 {6, 6, 8}, /* cost of loading integer registers */
394 2, /* cost of moving MMX register */
395 {3, 3}, /* cost of loading MMX registers
396 in SImode and DImode */
397 {4, 4}, /* cost of storing MMX registers
398 in SImode and DImode */
399 2, /* cost of moving SSE register */
400 {4, 3, 6}, /* cost of loading SSE registers
401 in SImode, DImode and TImode */
402 {4, 4, 5}, /* cost of storing SSE registers
403 in SImode, DImode and TImode */
404 5, /* MMX or SSE register to integer */
405 64, /* size of prefetch block */
406 6, /* number of parallel prefetches */
407 2, /* Branch cost */
408 4, /* cost of FADD and FSUB insns. */
409 4, /* cost of FMUL instruction. */
410 19, /* cost of FDIV instruction. */
411 2, /* cost of FABS instruction. */
412 2, /* cost of FCHS instruction. */
413 35, /* cost of FSQRT instruction. */
414};
415
fce5a9f2 416static const
b4e89e2d
JH
417struct processor_costs pentium4_cost = {
418 1, /* cost of an add instruction */
419 1, /* cost of a lea instruction */
4977bab6
ZW
420 4, /* variable shift costs */
421 4, /* constant shift costs */
422 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
b4e89e2d 423 0, /* cost of multiply per each bit set */
4977bab6 424 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
44cf5b6a
JH
425 1, /* cost of movsx */
426 1, /* cost of movzx */
b4e89e2d
JH
427 16, /* "large" insn */
428 6, /* MOVE_RATIO */
429 2, /* cost for loading QImode using movzbl */
430 {4, 5, 4}, /* cost of loading integer registers
431 in QImode, HImode and SImode.
432 Relative to reg-reg move (2). */
433 {2, 3, 2}, /* cost of storing integer registers */
434 2, /* cost of reg,reg fld/fst */
435 {2, 2, 6}, /* cost of loading fp registers
436 in SFmode, DFmode and XFmode */
437 {4, 4, 6}, /* cost of loading integer registers */
438 2, /* cost of moving MMX register */
439 {2, 2}, /* cost of loading MMX registers
440 in SImode and DImode */
441 {2, 2}, /* cost of storing MMX registers
442 in SImode and DImode */
443 12, /* cost of moving SSE register */
444 {12, 12, 12}, /* cost of loading SSE registers
445 in SImode, DImode and TImode */
446 {2, 2, 8}, /* cost of storing SSE registers
447 in SImode, DImode and TImode */
448 10, /* MMX or SSE register to integer */
f4365627
JH
449 64, /* size of prefetch block */
450 6, /* number of parallel prefetches */
4977bab6 451 2, /* Branch cost */
229b303a
RS
452 5, /* cost of FADD and FSUB insns. */
453 7, /* cost of FMUL instruction. */
454 43, /* cost of FDIV instruction. */
455 2, /* cost of FABS instruction. */
456 2, /* cost of FCHS instruction. */
457 43, /* cost of FSQRT instruction. */
b4e89e2d
JH
458};
459
8b60264b 460const struct processor_costs *ix86_cost = &pentium_cost;
32b5b1aa 461
a269a03c
JC
462/* Processor feature/optimization bitmasks. */
463#define m_386 (1<<PROCESSOR_I386)
464#define m_486 (1<<PROCESSOR_I486)
465#define m_PENT (1<<PROCESSOR_PENTIUM)
466#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
467#define m_K6 (1<<PROCESSOR_K6)
309ada50 468#define m_ATHLON (1<<PROCESSOR_ATHLON)
b4e89e2d 469#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
4977bab6
ZW
470#define m_K8 (1<<PROCESSOR_K8)
471#define m_ATHLON_K8 (m_K8 | m_ATHLON)
a269a03c 472
4977bab6
ZW
473const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
474const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
a269a03c 475const int x86_zero_extend_with_and = m_486 | m_PENT;
4977bab6 476const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
e075ae69 477const int x86_double_with_add = ~m_386;
a269a03c 478const int x86_use_bit_test = m_386;
4977bab6
ZW
479const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
480const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
481const int x86_3dnow_a = m_ATHLON_K8;
482const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
ef6257cd 483const int x86_branch_hints = m_PENT4;
b4e89e2d 484const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
e075ae69
RH
485const int x86_partial_reg_stall = m_PPRO;
486const int x86_use_loop = m_K6;
4977bab6 487const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
e075ae69
RH
488const int x86_use_mov0 = m_K6;
489const int x86_use_cltd = ~(m_PENT | m_K6);
490const int x86_read_modify_write = ~m_PENT;
491const int x86_read_modify = ~(m_PENT | m_PPRO);
492const int x86_split_long_moves = m_PPRO;
4977bab6 493const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
285464d0 494const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
b4e89e2d 495const int x86_single_stringop = m_386 | m_PENT4;
d9f32422
JH
496const int x86_qimode_math = ~(0);
497const int x86_promote_qi_regs = 0;
498const int x86_himode_math = ~(m_PPRO);
499const int x86_promote_hi_regs = m_PPRO;
4977bab6
ZW
500const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
501const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
502const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
503const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
504const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
505const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
506const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
507const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
508const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
509const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
b972dd02 510const int x86_decompose_lea = m_PENT4;
495333a6 511const int x86_shift1 = ~m_486;
4977bab6
ZW
512const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
513const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
514/* Set for machines where the type and dependencies are resolved on SSE register
d1f87653 515 parts instead of whole registers, so we may maintain just lower part of
4977bab6
ZW
516 scalar values in proper format leaving the upper part undefined. */
517const int x86_sse_partial_regs = m_ATHLON_K8;
518/* Athlon optimizes partial-register FPS special case, thus avoiding the
519 need for extra instructions beforehand */
520const int x86_sse_partial_regs_for_cvtsd2ss = 0;
521const int x86_sse_typeless_stores = m_ATHLON_K8;
522const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
523const int x86_use_ffreep = m_ATHLON_K8;
524const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
8f62128d 525const int x86_inter_unit_moves = ~(m_ATHLON_K8);
881b2a96 526const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PPRO;
a269a03c 527
d1f87653 528/* In case the average insn count for single function invocation is
6ab16dd9
JH
529 lower than this constant, emit fast (but longer) prologue and
530 epilogue code. */
4977bab6 531#define FAST_PROLOGUE_INSN_COUNT 20
5bf0ebab 532
5bf0ebab
RH
533/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
534static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
535static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
536static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
4c0d89b5
RS
537
538/* Array of the smallest class containing reg number REGNO, indexed by
0f290768 539 REGNO. Used by REGNO_REG_CLASS in i386.h. */
4c0d89b5 540
e075ae69 541enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
4c0d89b5
RS
542{
543 /* ax, dx, cx, bx */
ab408a86 544 AREG, DREG, CREG, BREG,
4c0d89b5 545 /* si, di, bp, sp */
e075ae69 546 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
4c0d89b5
RS
547 /* FP registers */
548 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
79325812 549 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
4c0d89b5 550 /* arg pointer */
83774849 551 NON_Q_REGS,
564d80f4 552 /* flags, fpsr, dirflag, frame */
a7180f70
BS
553 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
554 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
555 SSE_REGS, SSE_REGS,
556 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
3d117b30
JH
557 MMX_REGS, MMX_REGS,
558 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
559 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
560 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
561 SSE_REGS, SSE_REGS,
4c0d89b5 562};
c572e5ba 563
3d117b30 564/* The "default" register map used in 32bit mode. */
83774849 565
0f290768 566int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
567{
568 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
569 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
3d117b30 570 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
571 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
572 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
3d117b30
JH
573 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
574 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
575};
576
5bf0ebab
RH
577static int const x86_64_int_parameter_registers[6] =
578{
579 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
580 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
581};
582
583static int const x86_64_int_return_registers[4] =
584{
585 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
586};
53c17031 587
0f7fa3d0
JH
588/* The "default" register map used in 64bit mode. */
589int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
590{
591 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
3de72741 592 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
0f7fa3d0
JH
593 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
594 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
595 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
596 8,9,10,11,12,13,14,15, /* extended integer registers */
597 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
598};
599
83774849
RH
600/* Define the register numbers to be used in Dwarf debugging information.
601 The SVR4 reference port C compiler uses the following register numbers
602 in its Dwarf output code:
603 0 for %eax (gcc regno = 0)
604 1 for %ecx (gcc regno = 2)
605 2 for %edx (gcc regno = 1)
606 3 for %ebx (gcc regno = 3)
607 4 for %esp (gcc regno = 7)
608 5 for %ebp (gcc regno = 6)
609 6 for %esi (gcc regno = 4)
610 7 for %edi (gcc regno = 5)
611 The following three DWARF register numbers are never generated by
612 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
613 believes these numbers have these meanings.
614 8 for %eip (no gcc equivalent)
615 9 for %eflags (gcc regno = 17)
616 10 for %trapno (no gcc equivalent)
617 It is not at all clear how we should number the FP stack registers
618 for the x86 architecture. If the version of SDB on x86/svr4 were
619 a bit less brain dead with respect to floating-point then we would
620 have a precedent to follow with respect to DWARF register numbers
621 for x86 FP registers, but the SDB on x86/svr4 is so completely
622 broken with respect to FP registers that it is hardly worth thinking
623 of it as something to strive for compatibility with.
624 The version of x86/svr4 SDB I have at the moment does (partially)
625 seem to believe that DWARF register number 11 is associated with
626 the x86 register %st(0), but that's about all. Higher DWARF
627 register numbers don't seem to be associated with anything in
628 particular, and even for DWARF regno 11, SDB only seems to under-
629 stand that it should say that a variable lives in %st(0) (when
630 asked via an `=' command) if we said it was in DWARF regno 11,
631 but SDB still prints garbage when asked for the value of the
632 variable in question (via a `/' command).
633 (Also note that the labels SDB prints for various FP stack regs
634 when doing an `x' command are all wrong.)
635 Note that these problems generally don't affect the native SVR4
636 C compiler because it doesn't allow the use of -O with -g and
637 because when it is *not* optimizing, it allocates a memory
638 location for each floating-point variable, and the memory
639 location is what gets described in the DWARF AT_location
640 attribute for the variable in question.
641 Regardless of the severe mental illness of the x86/svr4 SDB, we
642 do something sensible here and we use the following DWARF
643 register numbers. Note that these are all stack-top-relative
644 numbers.
645 11 for %st(0) (gcc regno = 8)
646 12 for %st(1) (gcc regno = 9)
647 13 for %st(2) (gcc regno = 10)
648 14 for %st(3) (gcc regno = 11)
649 15 for %st(4) (gcc regno = 12)
650 16 for %st(5) (gcc regno = 13)
651 17 for %st(6) (gcc regno = 14)
652 18 for %st(7) (gcc regno = 15)
653*/
0f290768 654int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
655{
656 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
657 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
3f3f2124 658 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
659 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
660 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
d1f87653
KH
661 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
662 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
663};
664
c572e5ba
JVA
665/* Test and compare insns in i386.md store the information needed to
666 generate branch and scc insns here. */
667
07933f72
GS
668rtx ix86_compare_op0 = NULL_RTX;
669rtx ix86_compare_op1 = NULL_RTX;
f5316dfe 670
7a2e09f4 671#define MAX_386_STACK_LOCALS 3
8362f420
JH
672/* Size of the register save area. */
673#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
36edd3cc
BS
674
675/* Define the structure for the machine field in struct function. */
ddb0ae00
ZW
676
677struct stack_local_entry GTY(())
678{
679 unsigned short mode;
680 unsigned short n;
681 rtx rtl;
682 struct stack_local_entry *next;
683};
684
4dd2ac2c
JH
685/* Structure describing stack frame layout.
686 Stack grows downward:
687
688 [arguments]
689 <- ARG_POINTER
690 saved pc
691
692 saved frame pointer if frame_pointer_needed
693 <- HARD_FRAME_POINTER
694 [saved regs]
695
696 [padding1] \
697 )
698 [va_arg registers] (
699 > to_allocate <- FRAME_POINTER
700 [frame] (
701 )
702 [padding2] /
703 */
704struct ix86_frame
705{
706 int nregs;
707 int padding1;
8362f420 708 int va_arg_size;
4dd2ac2c
JH
709 HOST_WIDE_INT frame;
710 int padding2;
711 int outgoing_arguments_size;
8362f420 712 int red_zone_size;
4dd2ac2c
JH
713
714 HOST_WIDE_INT to_allocate;
715 /* The offsets relative to ARG_POINTER. */
716 HOST_WIDE_INT frame_pointer_offset;
717 HOST_WIDE_INT hard_frame_pointer_offset;
718 HOST_WIDE_INT stack_pointer_offset;
d9b40e8d
JH
719
720 /* When save_regs_using_mov is set, emit prologue using
721 move instead of push instructions. */
722 bool save_regs_using_mov;
4dd2ac2c
JH
723};
724
c93e80a5
JH
725/* Used to enable/disable debugging features. */
726const char *ix86_debug_arg_string, *ix86_debug_addr_string;
6189a572
JH
727/* Code model option as passed by user. */
728const char *ix86_cmodel_string;
729/* Parsed value. */
730enum cmodel ix86_cmodel;
80f33d06
GS
731/* Asm dialect. */
732const char *ix86_asm_string;
733enum asm_dialect ix86_asm_dialect = ASM_ATT;
f996902d
RH
734/* TLS dialext. */
735const char *ix86_tls_dialect_string;
736enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
6189a572 737
5bf0ebab 738/* Which unit we are generating floating point math for. */
965f5423
JH
739enum fpmath_unit ix86_fpmath;
740
5bf0ebab 741/* Which cpu are we scheduling for. */
9e555526 742enum processor_type ix86_tune;
5bf0ebab
RH
743/* Which instruction set architecture to use. */
744enum processor_type ix86_arch;
c8c5cb99
SC
745
746/* Strings to hold which cpu and instruction set architecture to use. */
9e555526 747const char *ix86_tune_string; /* for -mtune=<xxx> */
9c23aa47 748const char *ix86_arch_string; /* for -march=<xxx> */
965f5423 749const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
c8c5cb99 750
0f290768 751/* # of registers to use to pass arguments. */
e075ae69 752const char *ix86_regparm_string;
e9a25f70 753
f4365627
JH
754/* true if sse prefetch instruction is not NOOP. */
755int x86_prefetch_sse;
756
e075ae69
RH
757/* ix86_regparm_string as a number */
758int ix86_regparm;
e9a25f70
JL
759
760/* Alignment to use for loops and jumps: */
761
0f290768 762/* Power of two alignment for loops. */
e075ae69 763const char *ix86_align_loops_string;
e9a25f70 764
0f290768 765/* Power of two alignment for non-loop jumps. */
e075ae69 766const char *ix86_align_jumps_string;
e9a25f70 767
3af4bd89 768/* Power of two alignment for stack boundary in bytes. */
e075ae69 769const char *ix86_preferred_stack_boundary_string;
3af4bd89
JH
770
771/* Preferred alignment for stack boundary in bits. */
e075ae69 772int ix86_preferred_stack_boundary;
3af4bd89 773
e9a25f70 774/* Values 1-5: see jump.c */
e075ae69
RH
775int ix86_branch_cost;
776const char *ix86_branch_cost_string;
e9a25f70 777
0f290768 778/* Power of two alignment for functions. */
e075ae69 779const char *ix86_align_funcs_string;
623fe810
RH
780
781/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
782static char internal_label_prefix[16];
783static int internal_label_prefix_len;
e075ae69 784\f
b96a374d
AJ
785static int local_symbolic_operand (rtx, enum machine_mode);
786static int tls_symbolic_operand_1 (rtx, enum tls_model);
787static void output_pic_addr_const (FILE *, rtx, int);
788static void put_condition_code (enum rtx_code, enum machine_mode,
789 int, int, FILE *);
790static const char *get_some_local_dynamic_name (void);
791static int get_some_local_dynamic_name_1 (rtx *, void *);
792static rtx maybe_get_pool_constant (rtx);
793static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
794static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
795 rtx *);
796static rtx get_thread_pointer (int);
797static rtx legitimize_tls_address (rtx, enum tls_model, int);
798static void get_pc_thunk_name (char [32], unsigned int);
799static rtx gen_push (rtx);
800static int memory_address_length (rtx addr);
801static int ix86_flags_dependant (rtx, rtx, enum attr_type);
802static int ix86_agi_dependant (rtx, rtx, enum attr_type);
803static enum attr_ppro_uops ix86_safe_ppro_uops (rtx);
804static void ix86_dump_ppro_packet (FILE *);
805static void ix86_reorder_insn (rtx *, rtx *);
806static struct machine_function * ix86_init_machine_status (void);
807static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
808static int ix86_nsaved_regs (void);
809static void ix86_emit_save_regs (void);
810static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
811static void ix86_emit_restore_regs_using_mov (rtx, int, int);
812static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
813static void ix86_set_move_mem_attrs_1 (rtx, rtx, rtx, rtx, rtx);
814static void ix86_sched_reorder_ppro (rtx *, rtx *);
815static HOST_WIDE_INT ix86_GOT_alias_set (void);
816static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
817static rtx ix86_expand_aligntest (rtx, int);
818static void ix86_expand_strlensi_unroll_1 (rtx, rtx);
819static int ix86_issue_rate (void);
820static int ix86_adjust_cost (rtx, rtx, rtx, int);
821static void ix86_sched_init (FILE *, int, int);
822static int ix86_sched_reorder (FILE *, int, rtx *, int *, int);
823static int ix86_variable_issue (FILE *, int, rtx, int);
824static int ia32_use_dfa_pipeline_interface (void);
825static int ia32_multipass_dfa_lookahead (void);
826static void ix86_init_mmx_sse_builtins (void);
827static rtx x86_this_parameter (tree);
828static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
829 HOST_WIDE_INT, tree);
830static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
831static void x86_file_start (void);
832static void ix86_reorg (void);
c35d187f
RH
833static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
834static tree ix86_build_builtin_va_list (void);
e075ae69
RH
835
836struct ix86_address
837{
838 rtx base, index, disp;
839 HOST_WIDE_INT scale;
74dc3e94 840 enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg;
e075ae69 841};
b08de47e 842
b96a374d
AJ
843static int ix86_decompose_address (rtx, struct ix86_address *);
844static int ix86_address_cost (rtx);
845static bool ix86_cannot_force_const_mem (rtx);
846static rtx ix86_delegitimize_address (rtx);
bd793c65
BS
847
848struct builtin_description;
b96a374d
AJ
849static rtx ix86_expand_sse_comi (const struct builtin_description *,
850 tree, rtx);
851static rtx ix86_expand_sse_compare (const struct builtin_description *,
852 tree, rtx);
853static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
854static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
855static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
856static rtx ix86_expand_store_builtin (enum insn_code, tree);
857static rtx safe_vector_operand (rtx, enum machine_mode);
858static enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code);
859static void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
860 enum rtx_code *, enum rtx_code *);
861static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
862static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
863static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
864static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
865static int ix86_fp_comparison_cost (enum rtx_code code);
866static unsigned int ix86_select_alt_pic_regnum (void);
867static int ix86_save_reg (unsigned int, int);
868static void ix86_compute_frame_layout (struct ix86_frame *);
869static int ix86_comp_type_attributes (tree, tree);
e767b5be 870static int ix86_function_regparm (tree, tree);
91d231cb 871const struct attribute_spec ix86_attribute_table[];
b96a374d
AJ
872static bool ix86_function_ok_for_sibcall (tree, tree);
873static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
874static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
875static int ix86_value_regno (enum machine_mode);
876static bool contains_128bit_aligned_vector_p (tree);
877static bool ix86_ms_bitfield_layout_p (tree);
878static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
879static int extended_reg_mentioned_1 (rtx *, void *);
880static bool ix86_rtx_costs (rtx, int, int, int *);
881static int min_insn_size (rtx);
882static void k8_avoid_jump_misspredicts (void);
7c262518 883
21c318ba 884#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
b96a374d 885static void ix86_svr3_asm_out_constructor (rtx, int);
2cc07db4 886#endif
e56feed6 887
53c17031
JH
888/* Register class used for passing given 64bit part of the argument.
889 These represent classes as documented by the PS ABI, with the exception
890 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
d1f87653 891 use SF or DFmode move instead of DImode to avoid reformatting penalties.
53c17031 892
d1f87653 893 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
53c17031
JH
894 whenever possible (upper half does contain padding).
895 */
896enum x86_64_reg_class
897 {
898 X86_64_NO_CLASS,
899 X86_64_INTEGER_CLASS,
900 X86_64_INTEGERSI_CLASS,
901 X86_64_SSE_CLASS,
902 X86_64_SSESF_CLASS,
903 X86_64_SSEDF_CLASS,
904 X86_64_SSEUP_CLASS,
905 X86_64_X87_CLASS,
906 X86_64_X87UP_CLASS,
907 X86_64_MEMORY_CLASS
908 };
0b5826ac 909static const char * const x86_64_reg_class_name[] =
53c17031
JH
910 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
911
912#define MAX_CLASSES 4
b96a374d
AJ
913static int classify_argument (enum machine_mode, tree,
914 enum x86_64_reg_class [MAX_CLASSES], int);
915static int examine_argument (enum machine_mode, tree, int, int *, int *);
916static rtx construct_container (enum machine_mode, tree, int, int, int,
917 const int *, int);
918static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
919 enum x86_64_reg_class);
881b2a96
RS
920
921/* Table of constants used by fldpi, fldln2, etc... */
922static REAL_VALUE_TYPE ext_80387_constants_table [5];
923static bool ext_80387_constants_init = 0;
b96a374d 924static void init_ext_80387_constants (void);
672a6f42
NB
925\f
926/* Initialize the GCC target structure. */
91d231cb
JM
927#undef TARGET_ATTRIBUTE_TABLE
928#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
672a6f42 929#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
f5f4be42
NB
930# undef TARGET_MERGE_DECL_ATTRIBUTES
931# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
672a6f42
NB
932#endif
933
8d8e52be
JM
934#undef TARGET_COMP_TYPE_ATTRIBUTES
935#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
936
f6155fda
SS
937#undef TARGET_INIT_BUILTINS
938#define TARGET_INIT_BUILTINS ix86_init_builtins
939
940#undef TARGET_EXPAND_BUILTIN
941#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
942
bd09bdeb
RH
943#undef TARGET_ASM_FUNCTION_EPILOGUE
944#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
08c148a8 945
17b53c33
NB
946#undef TARGET_ASM_OPEN_PAREN
947#define TARGET_ASM_OPEN_PAREN ""
948#undef TARGET_ASM_CLOSE_PAREN
949#define TARGET_ASM_CLOSE_PAREN ""
950
301d03af
RS
951#undef TARGET_ASM_ALIGNED_HI_OP
952#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
953#undef TARGET_ASM_ALIGNED_SI_OP
954#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
955#ifdef ASM_QUAD
956#undef TARGET_ASM_ALIGNED_DI_OP
957#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
958#endif
959
960#undef TARGET_ASM_UNALIGNED_HI_OP
961#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
962#undef TARGET_ASM_UNALIGNED_SI_OP
963#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
964#undef TARGET_ASM_UNALIGNED_DI_OP
965#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
966
c237e94a
ZW
967#undef TARGET_SCHED_ADJUST_COST
968#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
969#undef TARGET_SCHED_ISSUE_RATE
970#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
971#undef TARGET_SCHED_VARIABLE_ISSUE
972#define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
973#undef TARGET_SCHED_INIT
974#define TARGET_SCHED_INIT ix86_sched_init
975#undef TARGET_SCHED_REORDER
976#define TARGET_SCHED_REORDER ix86_sched_reorder
fce5a9f2 977#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
9b690711
RH
978#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
979 ia32_use_dfa_pipeline_interface
980#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
981#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
982 ia32_multipass_dfa_lookahead
c237e94a 983
4977bab6
ZW
984#undef TARGET_FUNCTION_OK_FOR_SIBCALL
985#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
986
f996902d
RH
987#ifdef HAVE_AS_TLS
988#undef TARGET_HAVE_TLS
989#define TARGET_HAVE_TLS true
990#endif
3a04ff64
RH
991#undef TARGET_CANNOT_FORCE_CONST_MEM
992#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
f996902d 993
7daebb7a 994#undef TARGET_DELEGITIMIZE_ADDRESS
69bd9368 995#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
7daebb7a 996
4977bab6
ZW
997#undef TARGET_MS_BITFIELD_LAYOUT_P
998#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
999
c590b625
RH
1000#undef TARGET_ASM_OUTPUT_MI_THUNK
1001#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
3961e8fe
RH
1002#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1003#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
c590b625 1004
1bc7c5b6
ZW
1005#undef TARGET_ASM_FILE_START
1006#define TARGET_ASM_FILE_START x86_file_start
1007
3c50106f
RH
1008#undef TARGET_RTX_COSTS
1009#define TARGET_RTX_COSTS ix86_rtx_costs
dcefdf67
RH
1010#undef TARGET_ADDRESS_COST
1011#define TARGET_ADDRESS_COST ix86_address_cost
3c50106f 1012
18dbd950
RS
1013#undef TARGET_MACHINE_DEPENDENT_REORG
1014#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1015
c35d187f
RH
1016#undef TARGET_BUILD_BUILTIN_VA_LIST
1017#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1018
f6897b10 1019struct gcc_target targetm = TARGET_INITIALIZER;
e075ae69 1020\f
67c2b45f
JS
1021/* The svr4 ABI for the i386 says that records and unions are returned
1022 in memory. */
1023#ifndef DEFAULT_PCC_STRUCT_RETURN
1024#define DEFAULT_PCC_STRUCT_RETURN 1
1025#endif
1026
f5316dfe
MM
1027/* Sometimes certain combinations of command options do not make
1028 sense on a particular target machine. You can define a macro
1029 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1030 defined, is executed once just after all the command options have
1031 been parsed.
1032
1033 Don't use this macro to turn on various extra optimizations for
1034 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1035
1036void
b96a374d 1037override_options (void)
f5316dfe 1038{
400500c4 1039 int i;
e075ae69
RH
1040 /* Comes from final.c -- no real reason to change it. */
1041#define MAX_CODE_ALIGN 16
f5316dfe 1042
c8c5cb99
SC
1043 static struct ptt
1044 {
8b60264b
KG
1045 const struct processor_costs *cost; /* Processor costs */
1046 const int target_enable; /* Target flags to enable. */
1047 const int target_disable; /* Target flags to disable. */
1048 const int align_loop; /* Default alignments. */
2cca7283 1049 const int align_loop_max_skip;
8b60264b 1050 const int align_jump;
2cca7283 1051 const int align_jump_max_skip;
8b60264b 1052 const int align_func;
e075ae69 1053 }
0f290768 1054 const processor_target_table[PROCESSOR_max] =
e075ae69 1055 {
4977bab6
ZW
1056 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1057 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1058 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1059 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1060 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1061 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1062 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1063 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
e075ae69
RH
1064 };
1065
f4365627 1066 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
e075ae69
RH
1067 static struct pta
1068 {
8b60264b
KG
1069 const char *const name; /* processor name or nickname. */
1070 const enum processor_type processor;
0dd0e980
JH
1071 const enum pta_flags
1072 {
1073 PTA_SSE = 1,
1074 PTA_SSE2 = 2,
1075 PTA_MMX = 4,
f4365627 1076 PTA_PREFETCH_SSE = 8,
0dd0e980 1077 PTA_3DNOW = 16,
4977bab6
ZW
1078 PTA_3DNOW_A = 64,
1079 PTA_64BIT = 128
0dd0e980 1080 } flags;
e075ae69 1081 }
0f290768 1082 const processor_alias_table[] =
e075ae69 1083 {
0dd0e980
JH
1084 {"i386", PROCESSOR_I386, 0},
1085 {"i486", PROCESSOR_I486, 0},
1086 {"i586", PROCESSOR_PENTIUM, 0},
1087 {"pentium", PROCESSOR_PENTIUM, 0},
1088 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
3780101d
JG
1089 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1090 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1091 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
3462df62 1092 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
0dd0e980
JH
1093 {"i686", PROCESSOR_PENTIUMPRO, 0},
1094 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1095 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
f4365627 1096 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
0dd0e980 1097 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
f4365627 1098 PTA_MMX | PTA_PREFETCH_SSE},
0dd0e980
JH
1099 {"k6", PROCESSOR_K6, PTA_MMX},
1100 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1101 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
f4365627 1102 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1103 | PTA_3DNOW_A},
f4365627 1104 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
0dd0e980 1105 | PTA_3DNOW | PTA_3DNOW_A},
f4365627 1106 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1107 | PTA_3DNOW_A | PTA_SSE},
f4365627 1108 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1109 | PTA_3DNOW_A | PTA_SSE},
f4365627 1110 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1111 | PTA_3DNOW_A | PTA_SSE},
4977bab6
ZW
1112 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1113 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
3af4bd89 1114 };
c8c5cb99 1115
ca7558fc 1116 int const pta_size = ARRAY_SIZE (processor_alias_table);
c8c5cb99 1117
41ed2237 1118 /* Set the default values for switches whose default depends on TARGET_64BIT
d1f87653 1119 in case they weren't overwritten by command line options. */
55ba61f3
JH
1120 if (TARGET_64BIT)
1121 {
1122 if (flag_omit_frame_pointer == 2)
1123 flag_omit_frame_pointer = 1;
1124 if (flag_asynchronous_unwind_tables == 2)
1125 flag_asynchronous_unwind_tables = 1;
1126 if (flag_pcc_struct_return == 2)
1127 flag_pcc_struct_return = 0;
1128 }
1129 else
1130 {
1131 if (flag_omit_frame_pointer == 2)
1132 flag_omit_frame_pointer = 0;
1133 if (flag_asynchronous_unwind_tables == 2)
1134 flag_asynchronous_unwind_tables = 0;
1135 if (flag_pcc_struct_return == 2)
7c712dcc 1136 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
55ba61f3
JH
1137 }
1138
f5316dfe
MM
1139#ifdef SUBTARGET_OVERRIDE_OPTIONS
1140 SUBTARGET_OVERRIDE_OPTIONS;
1141#endif
1142
9e555526
RH
1143 if (!ix86_tune_string && ix86_arch_string)
1144 ix86_tune_string = ix86_arch_string;
1145 if (!ix86_tune_string)
1146 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
f4365627 1147 if (!ix86_arch_string)
4977bab6 1148 ix86_arch_string = TARGET_64BIT ? "k8" : "i386";
e075ae69 1149
6189a572
JH
1150 if (ix86_cmodel_string != 0)
1151 {
1152 if (!strcmp (ix86_cmodel_string, "small"))
1153 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1154 else if (flag_pic)
c725bd79 1155 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
6189a572
JH
1156 else if (!strcmp (ix86_cmodel_string, "32"))
1157 ix86_cmodel = CM_32;
1158 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1159 ix86_cmodel = CM_KERNEL;
1160 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1161 ix86_cmodel = CM_MEDIUM;
1162 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1163 ix86_cmodel = CM_LARGE;
1164 else
1165 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1166 }
1167 else
1168 {
1169 ix86_cmodel = CM_32;
1170 if (TARGET_64BIT)
1171 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1172 }
c93e80a5
JH
1173 if (ix86_asm_string != 0)
1174 {
1175 if (!strcmp (ix86_asm_string, "intel"))
1176 ix86_asm_dialect = ASM_INTEL;
1177 else if (!strcmp (ix86_asm_string, "att"))
1178 ix86_asm_dialect = ASM_ATT;
1179 else
1180 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1181 }
6189a572 1182 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
c725bd79 1183 error ("code model `%s' not supported in the %s bit mode",
6189a572
JH
1184 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1185 if (ix86_cmodel == CM_LARGE)
c725bd79 1186 sorry ("code model `large' not supported yet");
0c2dc519 1187 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
c725bd79 1188 sorry ("%i-bit mode not compiled in",
0c2dc519 1189 (target_flags & MASK_64BIT) ? 64 : 32);
6189a572 1190
f4365627
JH
1191 for (i = 0; i < pta_size; i++)
1192 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1193 {
1194 ix86_arch = processor_alias_table[i].processor;
1195 /* Default cpu tuning to the architecture. */
9e555526 1196 ix86_tune = ix86_arch;
f4365627 1197 if (processor_alias_table[i].flags & PTA_MMX
9ef1b13a 1198 && !(target_flags_explicit & MASK_MMX))
f4365627
JH
1199 target_flags |= MASK_MMX;
1200 if (processor_alias_table[i].flags & PTA_3DNOW
9ef1b13a 1201 && !(target_flags_explicit & MASK_3DNOW))
f4365627
JH
1202 target_flags |= MASK_3DNOW;
1203 if (processor_alias_table[i].flags & PTA_3DNOW_A
9ef1b13a 1204 && !(target_flags_explicit & MASK_3DNOW_A))
f4365627
JH
1205 target_flags |= MASK_3DNOW_A;
1206 if (processor_alias_table[i].flags & PTA_SSE
9ef1b13a 1207 && !(target_flags_explicit & MASK_SSE))
f4365627
JH
1208 target_flags |= MASK_SSE;
1209 if (processor_alias_table[i].flags & PTA_SSE2
9ef1b13a 1210 && !(target_flags_explicit & MASK_SSE2))
f4365627
JH
1211 target_flags |= MASK_SSE2;
1212 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1213 x86_prefetch_sse = true;
4977bab6
ZW
1214 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1215 error ("CPU you selected does not support x86-64 instruction set");
f4365627
JH
1216 break;
1217 }
400500c4 1218
f4365627
JH
1219 if (i == pta_size)
1220 error ("bad value (%s) for -march= switch", ix86_arch_string);
e075ae69 1221
f4365627 1222 for (i = 0; i < pta_size; i++)
9e555526 1223 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
f4365627 1224 {
9e555526 1225 ix86_tune = processor_alias_table[i].processor;
4977bab6
ZW
1226 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1227 error ("CPU you selected does not support x86-64 instruction set");
f4365627
JH
1228 break;
1229 }
1230 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1231 x86_prefetch_sse = true;
1232 if (i == pta_size)
9e555526 1233 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
e075ae69 1234
2ab0437e
JH
1235 if (optimize_size)
1236 ix86_cost = &size_cost;
1237 else
9e555526
RH
1238 ix86_cost = processor_target_table[ix86_tune].cost;
1239 target_flags |= processor_target_table[ix86_tune].target_enable;
1240 target_flags &= ~processor_target_table[ix86_tune].target_disable;
e075ae69 1241
36edd3cc
BS
1242 /* Arrange to set up i386_stack_locals for all functions. */
1243 init_machine_status = ix86_init_machine_status;
fce5a9f2 1244
0f290768 1245 /* Validate -mregparm= value. */
e075ae69 1246 if (ix86_regparm_string)
b08de47e 1247 {
400500c4
RK
1248 i = atoi (ix86_regparm_string);
1249 if (i < 0 || i > REGPARM_MAX)
1250 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1251 else
1252 ix86_regparm = i;
b08de47e 1253 }
0d7d98ee
JH
1254 else
1255 if (TARGET_64BIT)
1256 ix86_regparm = REGPARM_MAX;
b08de47e 1257
3e18fdf6 1258 /* If the user has provided any of the -malign-* options,
a4f31c00 1259 warn and use that value only if -falign-* is not set.
3e18fdf6 1260 Remove this code in GCC 3.2 or later. */
e075ae69 1261 if (ix86_align_loops_string)
b08de47e 1262 {
3e18fdf6
GK
1263 warning ("-malign-loops is obsolete, use -falign-loops");
1264 if (align_loops == 0)
1265 {
1266 i = atoi (ix86_align_loops_string);
1267 if (i < 0 || i > MAX_CODE_ALIGN)
1268 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1269 else
1270 align_loops = 1 << i;
1271 }
b08de47e 1272 }
3af4bd89 1273
e075ae69 1274 if (ix86_align_jumps_string)
b08de47e 1275 {
3e18fdf6
GK
1276 warning ("-malign-jumps is obsolete, use -falign-jumps");
1277 if (align_jumps == 0)
1278 {
1279 i = atoi (ix86_align_jumps_string);
1280 if (i < 0 || i > MAX_CODE_ALIGN)
1281 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1282 else
1283 align_jumps = 1 << i;
1284 }
b08de47e 1285 }
b08de47e 1286
e075ae69 1287 if (ix86_align_funcs_string)
b08de47e 1288 {
3e18fdf6
GK
1289 warning ("-malign-functions is obsolete, use -falign-functions");
1290 if (align_functions == 0)
1291 {
1292 i = atoi (ix86_align_funcs_string);
1293 if (i < 0 || i > MAX_CODE_ALIGN)
1294 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1295 else
1296 align_functions = 1 << i;
1297 }
b08de47e 1298 }
3af4bd89 1299
3e18fdf6 1300 /* Default align_* from the processor table. */
3e18fdf6 1301 if (align_loops == 0)
2cca7283 1302 {
9e555526
RH
1303 align_loops = processor_target_table[ix86_tune].align_loop;
1304 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2cca7283 1305 }
3e18fdf6 1306 if (align_jumps == 0)
2cca7283 1307 {
9e555526
RH
1308 align_jumps = processor_target_table[ix86_tune].align_jump;
1309 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2cca7283 1310 }
3e18fdf6 1311 if (align_functions == 0)
2cca7283 1312 {
9e555526 1313 align_functions = processor_target_table[ix86_tune].align_func;
2cca7283 1314 }
3e18fdf6 1315
e4c0478d 1316 /* Validate -mpreferred-stack-boundary= value, or provide default.
fbb83b43
AO
1317 The default of 128 bits is for Pentium III's SSE __m128, but we
1318 don't want additional code to keep the stack aligned when
1319 optimizing for code size. */
1320 ix86_preferred_stack_boundary = (optimize_size
ef49d42e 1321 ? TARGET_64BIT ? 128 : 32
fbb83b43 1322 : 128);
e075ae69 1323 if (ix86_preferred_stack_boundary_string)
3af4bd89 1324 {
400500c4 1325 i = atoi (ix86_preferred_stack_boundary_string);
ef49d42e 1326 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
c6257c5d 1327 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
ef49d42e 1328 TARGET_64BIT ? 4 : 2);
400500c4
RK
1329 else
1330 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3af4bd89 1331 }
77a989d1 1332
0f290768 1333 /* Validate -mbranch-cost= value, or provide default. */
9e555526 1334 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
e075ae69 1335 if (ix86_branch_cost_string)
804a8ee0 1336 {
400500c4
RK
1337 i = atoi (ix86_branch_cost_string);
1338 if (i < 0 || i > 5)
1339 error ("-mbranch-cost=%d is not between 0 and 5", i);
1340 else
1341 ix86_branch_cost = i;
804a8ee0 1342 }
804a8ee0 1343
f996902d
RH
1344 if (ix86_tls_dialect_string)
1345 {
1346 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1347 ix86_tls_dialect = TLS_DIALECT_GNU;
1348 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1349 ix86_tls_dialect = TLS_DIALECT_SUN;
1350 else
1351 error ("bad value (%s) for -mtls-dialect= switch",
1352 ix86_tls_dialect_string);
1353 }
1354
e9a25f70
JL
1355 /* Keep nonleaf frame pointers. */
1356 if (TARGET_OMIT_LEAF_FRAME_POINTER)
77a989d1 1357 flag_omit_frame_pointer = 1;
e075ae69
RH
1358
1359 /* If we're doing fast math, we don't care about comparison order
1360 wrt NaNs. This lets us use a shorter comparison sequence. */
de6c5979 1361 if (flag_unsafe_math_optimizations)
e075ae69
RH
1362 target_flags &= ~MASK_IEEE_FP;
1363
30c99a84
RH
1364 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1365 since the insns won't need emulation. */
1366 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1367 target_flags &= ~MASK_NO_FANCY_MATH_387;
1368
22c7c85e
L
1369 /* Turn on SSE2 builtins for -mpni. */
1370 if (TARGET_PNI)
1371 target_flags |= MASK_SSE2;
1372
1373 /* Turn on SSE builtins for -msse2. */
1374 if (TARGET_SSE2)
1375 target_flags |= MASK_SSE;
1376
14f73b5a
JH
1377 if (TARGET_64BIT)
1378 {
1379 if (TARGET_ALIGN_DOUBLE)
c725bd79 1380 error ("-malign-double makes no sense in the 64bit mode");
14f73b5a 1381 if (TARGET_RTD)
c725bd79 1382 error ("-mrtd calling convention not supported in the 64bit mode");
14f73b5a 1383 /* Enable by default the SSE and MMX builtins. */
965f5423
JH
1384 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1385 ix86_fpmath = FPMATH_SSE;
14f73b5a 1386 }
965f5423 1387 else
a5b378d6
JH
1388 {
1389 ix86_fpmath = FPMATH_387;
1390 /* i386 ABI does not specify red zone. It still makes sense to use it
1391 when programmer takes care to stack from being destroyed. */
1392 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1393 target_flags |= MASK_NO_RED_ZONE;
1394 }
965f5423
JH
1395
1396 if (ix86_fpmath_string != 0)
1397 {
1398 if (! strcmp (ix86_fpmath_string, "387"))
1399 ix86_fpmath = FPMATH_387;
1400 else if (! strcmp (ix86_fpmath_string, "sse"))
1401 {
1402 if (!TARGET_SSE)
1403 {
1404 warning ("SSE instruction set disabled, using 387 arithmetics");
1405 ix86_fpmath = FPMATH_387;
1406 }
1407 else
1408 ix86_fpmath = FPMATH_SSE;
1409 }
1410 else if (! strcmp (ix86_fpmath_string, "387,sse")
1411 || ! strcmp (ix86_fpmath_string, "sse,387"))
1412 {
1413 if (!TARGET_SSE)
1414 {
1415 warning ("SSE instruction set disabled, using 387 arithmetics");
1416 ix86_fpmath = FPMATH_387;
1417 }
1418 else if (!TARGET_80387)
1419 {
1420 warning ("387 instruction set disabled, using SSE arithmetics");
1421 ix86_fpmath = FPMATH_SSE;
1422 }
1423 else
1424 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1425 }
fce5a9f2 1426 else
965f5423
JH
1427 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1428 }
14f73b5a 1429
a7180f70
BS
1430 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1431 on by -msse. */
1432 if (TARGET_SSE)
e37af218
RH
1433 {
1434 target_flags |= MASK_MMX;
1435 x86_prefetch_sse = true;
1436 }
c6036a37 1437
47f339cf
BS
1438 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1439 if (TARGET_3DNOW)
1440 {
1441 target_flags |= MASK_MMX;
d1f87653 1442 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
47f339cf
BS
1443 extensions it adds. */
1444 if (x86_3dnow_a & (1 << ix86_arch))
1445 target_flags |= MASK_3DNOW_A;
1446 }
9e555526 1447 if ((x86_accumulate_outgoing_args & TUNEMASK)
9ef1b13a 1448 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
c6036a37
JH
1449 && !optimize_size)
1450 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
623fe810
RH
1451
1452 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1453 {
1454 char *p;
1455 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1456 p = strchr (internal_label_prefix, 'X');
1457 internal_label_prefix_len = p - internal_label_prefix;
1458 *p = '\0';
1459 }
f5316dfe
MM
1460}
1461\f
32b5b1aa 1462void
b96a374d 1463optimization_options (int level, int size ATTRIBUTE_UNUSED)
32b5b1aa 1464{
e9a25f70
JL
1465 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1466 make the problem with not enough registers even worse. */
32b5b1aa
SC
1467#ifdef INSN_SCHEDULING
1468 if (level > 1)
1469 flag_schedule_insns = 0;
1470#endif
55ba61f3
JH
1471
1472 /* The default values of these switches depend on the TARGET_64BIT
1473 that is not known at this moment. Mark these values with 2 and
1474 let user the to override these. In case there is no command line option
1475 specifying them, we will set the defaults in override_options. */
1476 if (optimize >= 1)
1477 flag_omit_frame_pointer = 2;
1478 flag_pcc_struct_return = 2;
1479 flag_asynchronous_unwind_tables = 2;
32b5b1aa 1480}
b08de47e 1481\f
91d231cb
JM
1482/* Table of valid machine attributes. */
1483const struct attribute_spec ix86_attribute_table[] =
b08de47e 1484{
91d231cb 1485 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
b08de47e
MM
1486 /* Stdcall attribute says callee is responsible for popping arguments
1487 if they are not variable. */
91d231cb 1488 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
e91f04de
CH
1489 /* Fastcall attribute says callee is responsible for popping arguments
1490 if they are not variable. */
1491 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
91d231cb
JM
1492 /* Cdecl attribute says the callee is a normal C declaration */
1493 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
b08de47e 1494 /* Regparm attribute specifies how many integer arguments are to be
0f290768 1495 passed in registers. */
91d231cb
JM
1496 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1497#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
3da1eb0b
DS
1498 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1499 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1500 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
91d231cb 1501#endif
fe77449a
DR
1502 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1503 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
91d231cb
JM
1504 { NULL, 0, 0, false, false, false, NULL }
1505};
1506
5fbf0217
EB
1507/* Decide whether we can make a sibling call to a function. DECL is the
1508 declaration of the function being targeted by the call and EXP is the
1509 CALL_EXPR representing the call. */
4977bab6
ZW
1510
1511static bool
b96a374d 1512ix86_function_ok_for_sibcall (tree decl, tree exp)
4977bab6
ZW
1513{
1514 /* If we are generating position-independent code, we cannot sibcall
1515 optimize any indirect call, or a direct call to a global function,
1516 as the PLT requires %ebx be live. */
1517 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1518 return false;
1519
1520 /* If we are returning floats on the 80387 register stack, we cannot
1521 make a sibcall from a function that doesn't return a float to a
5fbf0217
EB
1522 function that does or, conversely, from a function that does return
1523 a float to a function that doesn't; the necessary stack adjustment
1524 would not be executed. */
4977bab6 1525 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
5fbf0217 1526 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
4977bab6
ZW
1527 return false;
1528
1529 /* If this call is indirect, we'll need to be able to use a call-clobbered
b96a374d 1530 register for the address of the target function. Make sure that all
4977bab6
ZW
1531 such registers are not used for passing parameters. */
1532 if (!decl && !TARGET_64BIT)
1533 {
e767b5be 1534 tree type;
4977bab6
ZW
1535
1536 /* We're looking at the CALL_EXPR, we need the type of the function. */
1537 type = TREE_OPERAND (exp, 0); /* pointer expression */
1538 type = TREE_TYPE (type); /* pointer type */
1539 type = TREE_TYPE (type); /* function type */
1540
e767b5be 1541 if (ix86_function_regparm (type, NULL) >= 3)
4977bab6
ZW
1542 {
1543 /* ??? Need to count the actual number of registers to be used,
1544 not the possible number of registers. Fix later. */
1545 return false;
1546 }
1547 }
1548
1549 /* Otherwise okay. That also includes certain types of indirect calls. */
1550 return true;
1551}
1552
e91f04de 1553/* Handle a "cdecl", "stdcall", or "fastcall" attribute;
91d231cb
JM
1554 arguments as in struct attribute_spec.handler. */
1555static tree
b96a374d
AJ
1556ix86_handle_cdecl_attribute (tree *node, tree name,
1557 tree args ATTRIBUTE_UNUSED,
1558 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
91d231cb
JM
1559{
1560 if (TREE_CODE (*node) != FUNCTION_TYPE
1561 && TREE_CODE (*node) != METHOD_TYPE
1562 && TREE_CODE (*node) != FIELD_DECL
1563 && TREE_CODE (*node) != TYPE_DECL)
b08de47e 1564 {
91d231cb
JM
1565 warning ("`%s' attribute only applies to functions",
1566 IDENTIFIER_POINTER (name));
1567 *no_add_attrs = true;
1568 }
e91f04de
CH
1569 else
1570 {
1571 if (is_attribute_p ("fastcall", name))
1572 {
1573 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1574 {
1575 error ("fastcall and stdcall attributes are not compatible");
1576 }
1577 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1578 {
1579 error ("fastcall and regparm attributes are not compatible");
1580 }
1581 }
1582 else if (is_attribute_p ("stdcall", name))
1583 {
1584 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1585 {
1586 error ("fastcall and stdcall attributes are not compatible");
1587 }
1588 }
1589 }
b08de47e 1590
91d231cb
JM
1591 if (TARGET_64BIT)
1592 {
1593 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1594 *no_add_attrs = true;
1595 }
b08de47e 1596
91d231cb
JM
1597 return NULL_TREE;
1598}
b08de47e 1599
91d231cb
JM
1600/* Handle a "regparm" attribute;
1601 arguments as in struct attribute_spec.handler. */
1602static tree
b96a374d
AJ
1603ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1604 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
91d231cb
JM
1605{
1606 if (TREE_CODE (*node) != FUNCTION_TYPE
1607 && TREE_CODE (*node) != METHOD_TYPE
1608 && TREE_CODE (*node) != FIELD_DECL
1609 && TREE_CODE (*node) != TYPE_DECL)
1610 {
1611 warning ("`%s' attribute only applies to functions",
1612 IDENTIFIER_POINTER (name));
1613 *no_add_attrs = true;
1614 }
1615 else
1616 {
1617 tree cst;
b08de47e 1618
91d231cb
JM
1619 cst = TREE_VALUE (args);
1620 if (TREE_CODE (cst) != INTEGER_CST)
1621 {
1622 warning ("`%s' attribute requires an integer constant argument",
1623 IDENTIFIER_POINTER (name));
1624 *no_add_attrs = true;
1625 }
1626 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1627 {
1628 warning ("argument to `%s' attribute larger than %d",
1629 IDENTIFIER_POINTER (name), REGPARM_MAX);
1630 *no_add_attrs = true;
1631 }
e91f04de
CH
1632
1633 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
e767b5be
JH
1634 {
1635 error ("fastcall and regparm attributes are not compatible");
1636 }
b08de47e
MM
1637 }
1638
91d231cb 1639 return NULL_TREE;
b08de47e
MM
1640}
1641
1642/* Return 0 if the attributes for two types are incompatible, 1 if they
1643 are compatible, and 2 if they are nearly compatible (which causes a
1644 warning to be generated). */
1645
8d8e52be 1646static int
b96a374d 1647ix86_comp_type_attributes (tree type1, tree type2)
b08de47e 1648{
0f290768 1649 /* Check for mismatch of non-default calling convention. */
27c38fbe 1650 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
afcfe58c
MM
1651
1652 if (TREE_CODE (type1) != FUNCTION_TYPE)
1653 return 1;
1654
b96a374d 1655 /* Check for mismatched fastcall types */
e91f04de
CH
1656 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1657 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
b96a374d 1658 return 0;
e91f04de 1659
afcfe58c 1660 /* Check for mismatched return types (cdecl vs stdcall). */
6093f019
RH
1661 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1662 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
afcfe58c 1663 return 0;
b08de47e
MM
1664 return 1;
1665}
b08de47e 1666\f
e767b5be
JH
1667/* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1668 DECL may be NULL when calling function indirectly
1669 or considerling a libcall. */
483ab821
MM
1670
1671static int
e767b5be 1672ix86_function_regparm (tree type, tree decl)
483ab821
MM
1673{
1674 tree attr;
e767b5be
JH
1675 int regparm = ix86_regparm;
1676 bool user_convention = false;
483ab821 1677
e767b5be
JH
1678 if (!TARGET_64BIT)
1679 {
1680 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1681 if (attr)
1682 {
1683 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1684 user_convention = true;
1685 }
1686
1687 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1688 {
1689 regparm = 2;
1690 user_convention = true;
1691 }
1692
1693 /* Use register calling convention for local functions when possible. */
1694 if (!TARGET_64BIT && !user_convention && decl
cb0bc263 1695 && flag_unit_at_a_time && !profile_flag)
e767b5be
JH
1696 {
1697 struct cgraph_local_info *i = cgraph_local_info (decl);
1698 if (i && i->local)
1699 {
1700 /* We can't use regparm(3) for nested functions as these use
1701 static chain pointer in third argument. */
1702 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1703 regparm = 2;
1704 else
1705 regparm = 3;
1706 }
1707 }
1708 }
1709 return regparm;
483ab821
MM
1710}
1711
fe9f516f
RH
1712/* Return true if EAX is live at the start of the function. Used by
1713 ix86_expand_prologue to determine if we need special help before
1714 calling allocate_stack_worker. */
1715
1716static bool
1717ix86_eax_live_at_start_p (void)
1718{
1719 /* Cheat. Don't bother working forward from ix86_function_regparm
1720 to the function type to whether an actual argument is located in
1721 eax. Instead just look at cfg info, which is still close enough
1722 to correct at this point. This gives false positives for broken
1723 functions that might use uninitialized data that happens to be
1724 allocated in eax, but who cares? */
1725 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1726}
1727
b08de47e
MM
1728/* Value is the number of bytes of arguments automatically
1729 popped when returning from a subroutine call.
1730 FUNDECL is the declaration node of the function (as a tree),
1731 FUNTYPE is the data type of the function (as a tree),
1732 or for a library call it is an identifier node for the subroutine name.
1733 SIZE is the number of bytes of arguments passed on the stack.
1734
1735 On the 80386, the RTD insn may be used to pop them if the number
1736 of args is fixed, but if the number is variable then the caller
1737 must pop them all. RTD can't be used for library calls now
1738 because the library is compiled with the Unix compiler.
1739 Use of RTD is a selectable option, since it is incompatible with
1740 standard Unix calling sequences. If the option is not selected,
1741 the caller must always pop the args.
1742
1743 The attribute stdcall is equivalent to RTD on a per module basis. */
1744
1745int
b96a374d 1746ix86_return_pops_args (tree fundecl, tree funtype, int size)
79325812 1747{
3345ee7d 1748 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
b08de47e 1749
0f290768 1750 /* Cdecl functions override -mrtd, and never pop the stack. */
e9a25f70 1751 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
79325812 1752
e91f04de
CH
1753 /* Stdcall and fastcall functions will pop the stack if not variable args. */
1754 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1755 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
698cdd84 1756 rtd = 1;
79325812 1757
698cdd84
SC
1758 if (rtd
1759 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
e9a25f70
JL
1760 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1761 == void_type_node)))
698cdd84
SC
1762 return size;
1763 }
79325812 1764
232b8f52 1765 /* Lose any fake structure return argument if it is passed on the stack. */
61f71b34 1766 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
0d7d98ee 1767 && !TARGET_64BIT)
232b8f52 1768 {
e767b5be 1769 int nregs = ix86_function_regparm (funtype, fundecl);
232b8f52
JJ
1770
1771 if (!nregs)
1772 return GET_MODE_SIZE (Pmode);
1773 }
1774
1775 return 0;
b08de47e 1776}
b08de47e
MM
1777\f
1778/* Argument support functions. */
1779
53c17031
JH
1780/* Return true when register may be used to pass function parameters. */
1781bool
b96a374d 1782ix86_function_arg_regno_p (int regno)
53c17031
JH
1783{
1784 int i;
1785 if (!TARGET_64BIT)
0333394e
JJ
1786 return (regno < REGPARM_MAX
1787 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
53c17031
JH
1788 if (SSE_REGNO_P (regno) && TARGET_SSE)
1789 return true;
1790 /* RAX is used as hidden argument to va_arg functions. */
1791 if (!regno)
1792 return true;
1793 for (i = 0; i < REGPARM_MAX; i++)
1794 if (regno == x86_64_int_parameter_registers[i])
1795 return true;
1796 return false;
1797}
1798
b08de47e
MM
1799/* Initialize a variable CUM of type CUMULATIVE_ARGS
1800 for a call to a function whose data type is FNTYPE.
1801 For a library call, FNTYPE is 0. */
1802
1803void
b96a374d
AJ
1804init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1805 tree fntype, /* tree ptr for function decl */
1806 rtx libname, /* SYMBOL_REF of library name or 0 */
1807 tree fndecl)
b08de47e
MM
1808{
1809 static CUMULATIVE_ARGS zero_cum;
1810 tree param, next_param;
1811
1812 if (TARGET_DEBUG_ARG)
1813 {
1814 fprintf (stderr, "\ninit_cumulative_args (");
1815 if (fntype)
e9a25f70
JL
1816 fprintf (stderr, "fntype code = %s, ret code = %s",
1817 tree_code_name[(int) TREE_CODE (fntype)],
1818 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
b08de47e
MM
1819 else
1820 fprintf (stderr, "no fntype");
1821
1822 if (libname)
1823 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1824 }
1825
1826 *cum = zero_cum;
1827
1828 /* Set up the number of registers to use for passing arguments. */
e767b5be
JH
1829 if (fntype)
1830 cum->nregs = ix86_function_regparm (fntype, fndecl);
1831 else
1832 cum->nregs = ix86_regparm;
53c17031 1833 cum->sse_nregs = SSE_REGPARM_MAX;
53c17031 1834 cum->maybe_vaarg = false;
b08de47e 1835
e91f04de
CH
1836 /* Use ecx and edx registers if function has fastcall attribute */
1837 if (fntype && !TARGET_64BIT)
1838 {
1839 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1840 {
1841 cum->nregs = 2;
1842 cum->fastcall = 1;
1843 }
1844 }
1845
1846
b08de47e
MM
1847 /* Determine if this function has variable arguments. This is
1848 indicated by the last argument being 'void_type_mode' if there
1849 are no variable arguments. If there are variable arguments, then
1850 we won't pass anything in registers */
1851
1852 if (cum->nregs)
1853 {
1854 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
e9a25f70 1855 param != 0; param = next_param)
b08de47e
MM
1856 {
1857 next_param = TREE_CHAIN (param);
e9a25f70 1858 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
53c17031
JH
1859 {
1860 if (!TARGET_64BIT)
e91f04de
CH
1861 {
1862 cum->nregs = 0;
1863 cum->fastcall = 0;
1864 }
53c17031
JH
1865 cum->maybe_vaarg = true;
1866 }
b08de47e
MM
1867 }
1868 }
53c17031
JH
1869 if ((!fntype && !libname)
1870 || (fntype && !TYPE_ARG_TYPES (fntype)))
1871 cum->maybe_vaarg = 1;
b08de47e
MM
1872
1873 if (TARGET_DEBUG_ARG)
1874 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1875
1876 return;
1877}
1878
d1f87653 1879/* x86-64 register passing implementation. See x86-64 ABI for details. Goal
f710504c 1880 of this code is to classify each 8bytes of incoming argument by the register
53c17031
JH
1881 class and assign registers accordingly. */
1882
1883/* Return the union class of CLASS1 and CLASS2.
1884 See the x86-64 PS ABI for details. */
1885
1886static enum x86_64_reg_class
b96a374d 1887merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
53c17031
JH
1888{
1889 /* Rule #1: If both classes are equal, this is the resulting class. */
1890 if (class1 == class2)
1891 return class1;
1892
1893 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1894 the other class. */
1895 if (class1 == X86_64_NO_CLASS)
1896 return class2;
1897 if (class2 == X86_64_NO_CLASS)
1898 return class1;
1899
1900 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1901 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1902 return X86_64_MEMORY_CLASS;
1903
1904 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1905 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1906 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1907 return X86_64_INTEGERSI_CLASS;
1908 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1909 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1910 return X86_64_INTEGER_CLASS;
1911
1912 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1913 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1914 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1915 return X86_64_MEMORY_CLASS;
1916
1917 /* Rule #6: Otherwise class SSE is used. */
1918 return X86_64_SSE_CLASS;
1919}
1920
1921/* Classify the argument of type TYPE and mode MODE.
1922 CLASSES will be filled by the register class used to pass each word
1923 of the operand. The number of words is returned. In case the parameter
1924 should be passed in memory, 0 is returned. As a special case for zero
1925 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1926
1927 BIT_OFFSET is used internally for handling records and specifies offset
1928 of the offset in bits modulo 256 to avoid overflow cases.
1929
1930 See the x86-64 PS ABI for details.
1931*/
1932
1933static int
b96a374d
AJ
1934classify_argument (enum machine_mode mode, tree type,
1935 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
53c17031 1936{
296e4ae8 1937 HOST_WIDE_INT bytes =
53c17031 1938 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
23327dae 1939 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
53c17031 1940
c60ee6f5
JH
1941 /* Variable sized entities are always passed/returned in memory. */
1942 if (bytes < 0)
1943 return 0;
1944
dafc5b82
JH
1945 if (mode != VOIDmode
1946 && MUST_PASS_IN_STACK (mode, type))
1947 return 0;
1948
53c17031
JH
1949 if (type && AGGREGATE_TYPE_P (type))
1950 {
1951 int i;
1952 tree field;
1953 enum x86_64_reg_class subclasses[MAX_CLASSES];
1954
1955 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1956 if (bytes > 16)
1957 return 0;
1958
1959 for (i = 0; i < words; i++)
1960 classes[i] = X86_64_NO_CLASS;
1961
1962 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1963 signalize memory class, so handle it as special case. */
1964 if (!words)
1965 {
1966 classes[0] = X86_64_NO_CLASS;
1967 return 1;
1968 }
1969
1970 /* Classify each field of record and merge classes. */
1971 if (TREE_CODE (type) == RECORD_TYPE)
1972 {
91ea38f9
JH
1973 /* For classes first merge in the field of the subclasses. */
1974 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1975 {
1976 tree bases = TYPE_BINFO_BASETYPES (type);
1977 int n_bases = TREE_VEC_LENGTH (bases);
1978 int i;
1979
1980 for (i = 0; i < n_bases; ++i)
1981 {
1982 tree binfo = TREE_VEC_ELT (bases, i);
1983 int num;
1984 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1985 tree type = BINFO_TYPE (binfo);
1986
1987 num = classify_argument (TYPE_MODE (type),
1988 type, subclasses,
1989 (offset + bit_offset) % 256);
1990 if (!num)
1991 return 0;
1992 for (i = 0; i < num; i++)
1993 {
db01f480 1994 int pos = (offset + (bit_offset % 64)) / 8 / 8;
91ea38f9
JH
1995 classes[i + pos] =
1996 merge_classes (subclasses[i], classes[i + pos]);
1997 }
1998 }
1999 }
2000 /* And now merge the fields of structure. */
53c17031
JH
2001 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2002 {
2003 if (TREE_CODE (field) == FIELD_DECL)
2004 {
2005 int num;
2006
2007 /* Bitfields are always classified as integer. Handle them
2008 early, since later code would consider them to be
2009 misaligned integers. */
2010 if (DECL_BIT_FIELD (field))
2011 {
2012 for (i = int_bit_position (field) / 8 / 8;
2013 i < (int_bit_position (field)
2014 + tree_low_cst (DECL_SIZE (field), 0)
b96a374d 2015 + 63) / 8 / 8; i++)
53c17031
JH
2016 classes[i] =
2017 merge_classes (X86_64_INTEGER_CLASS,
2018 classes[i]);
2019 }
2020 else
2021 {
2022 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2023 TREE_TYPE (field), subclasses,
2024 (int_bit_position (field)
2025 + bit_offset) % 256);
2026 if (!num)
2027 return 0;
2028 for (i = 0; i < num; i++)
2029 {
2030 int pos =
db01f480 2031 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
53c17031
JH
2032 classes[i + pos] =
2033 merge_classes (subclasses[i], classes[i + pos]);
2034 }
2035 }
2036 }
2037 }
2038 }
2039 /* Arrays are handled as small records. */
2040 else if (TREE_CODE (type) == ARRAY_TYPE)
2041 {
2042 int num;
2043 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2044 TREE_TYPE (type), subclasses, bit_offset);
2045 if (!num)
2046 return 0;
2047
2048 /* The partial classes are now full classes. */
2049 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2050 subclasses[0] = X86_64_SSE_CLASS;
2051 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2052 subclasses[0] = X86_64_INTEGER_CLASS;
2053
2054 for (i = 0; i < words; i++)
2055 classes[i] = subclasses[i % num];
2056 }
2057 /* Unions are similar to RECORD_TYPE but offset is always 0. */
e4dbaed5
AS
2058 else if (TREE_CODE (type) == UNION_TYPE
2059 || TREE_CODE (type) == QUAL_UNION_TYPE)
53c17031 2060 {
91ea38f9
JH
2061 /* For classes first merge in the field of the subclasses. */
2062 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2063 {
2064 tree bases = TYPE_BINFO_BASETYPES (type);
2065 int n_bases = TREE_VEC_LENGTH (bases);
2066 int i;
2067
2068 for (i = 0; i < n_bases; ++i)
2069 {
2070 tree binfo = TREE_VEC_ELT (bases, i);
2071 int num;
2072 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2073 tree type = BINFO_TYPE (binfo);
2074
2075 num = classify_argument (TYPE_MODE (type),
2076 type, subclasses,
db01f480 2077 (offset + (bit_offset % 64)) % 256);
91ea38f9
JH
2078 if (!num)
2079 return 0;
2080 for (i = 0; i < num; i++)
2081 {
c16576e6 2082 int pos = (offset + (bit_offset % 64)) / 8 / 8;
91ea38f9
JH
2083 classes[i + pos] =
2084 merge_classes (subclasses[i], classes[i + pos]);
2085 }
2086 }
2087 }
53c17031
JH
2088 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2089 {
2090 if (TREE_CODE (field) == FIELD_DECL)
2091 {
2092 int num;
2093 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2094 TREE_TYPE (field), subclasses,
2095 bit_offset);
2096 if (!num)
2097 return 0;
2098 for (i = 0; i < num; i++)
2099 classes[i] = merge_classes (subclasses[i], classes[i]);
2100 }
2101 }
2102 }
448ec26c
WH
2103 else if (TREE_CODE (type) == SET_TYPE)
2104 {
2105 if (bytes <= 4)
2106 {
2107 classes[0] = X86_64_INTEGERSI_CLASS;
2108 return 1;
2109 }
2110 else if (bytes <= 8)
2111 {
2112 classes[0] = X86_64_INTEGER_CLASS;
2113 return 1;
2114 }
2115 else if (bytes <= 12)
2116 {
2117 classes[0] = X86_64_INTEGER_CLASS;
2118 classes[1] = X86_64_INTEGERSI_CLASS;
2119 return 2;
2120 }
2121 else
2122 {
2123 classes[0] = X86_64_INTEGER_CLASS;
2124 classes[1] = X86_64_INTEGER_CLASS;
2125 return 2;
2126 }
2127 }
53c17031
JH
2128 else
2129 abort ();
2130
2131 /* Final merger cleanup. */
2132 for (i = 0; i < words; i++)
2133 {
2134 /* If one class is MEMORY, everything should be passed in
2135 memory. */
2136 if (classes[i] == X86_64_MEMORY_CLASS)
2137 return 0;
2138
d6a7951f 2139 /* The X86_64_SSEUP_CLASS should be always preceded by
53c17031
JH
2140 X86_64_SSE_CLASS. */
2141 if (classes[i] == X86_64_SSEUP_CLASS
2142 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2143 classes[i] = X86_64_SSE_CLASS;
2144
d6a7951f 2145 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
53c17031
JH
2146 if (classes[i] == X86_64_X87UP_CLASS
2147 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2148 classes[i] = X86_64_SSE_CLASS;
2149 }
2150 return words;
2151 }
2152
2153 /* Compute alignment needed. We align all types to natural boundaries with
2154 exception of XFmode that is aligned to 64bits. */
2155 if (mode != VOIDmode && mode != BLKmode)
2156 {
2157 int mode_alignment = GET_MODE_BITSIZE (mode);
2158
2159 if (mode == XFmode)
2160 mode_alignment = 128;
2161 else if (mode == XCmode)
2162 mode_alignment = 256;
f5143c46 2163 /* Misaligned fields are always returned in memory. */
53c17031
JH
2164 if (bit_offset % mode_alignment)
2165 return 0;
2166 }
2167
2168 /* Classification of atomic types. */
2169 switch (mode)
2170 {
2171 case DImode:
2172 case SImode:
2173 case HImode:
2174 case QImode:
2175 case CSImode:
2176 case CHImode:
2177 case CQImode:
2178 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2179 classes[0] = X86_64_INTEGERSI_CLASS;
2180 else
2181 classes[0] = X86_64_INTEGER_CLASS;
2182 return 1;
2183 case CDImode:
2184 case TImode:
2185 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2186 return 2;
2187 case CTImode:
2188 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2189 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2190 return 4;
2191 case SFmode:
2192 if (!(bit_offset % 64))
2193 classes[0] = X86_64_SSESF_CLASS;
2194 else
2195 classes[0] = X86_64_SSE_CLASS;
2196 return 1;
2197 case DFmode:
2198 classes[0] = X86_64_SSEDF_CLASS;
2199 return 1;
f8a1ebc6 2200 case XFmode:
53c17031
JH
2201 classes[0] = X86_64_X87_CLASS;
2202 classes[1] = X86_64_X87UP_CLASS;
2203 return 2;
f8a1ebc6 2204 case TFmode:
cf2348cb
JH
2205 case TCmode:
2206 return 0;
f8a1ebc6 2207 case XCmode:
53c17031
JH
2208 classes[0] = X86_64_X87_CLASS;
2209 classes[1] = X86_64_X87UP_CLASS;
2210 classes[2] = X86_64_X87_CLASS;
2211 classes[3] = X86_64_X87UP_CLASS;
2212 return 4;
2213 case DCmode:
2214 classes[0] = X86_64_SSEDF_CLASS;
2215 classes[1] = X86_64_SSEDF_CLASS;
2216 return 2;
2217 case SCmode:
2218 classes[0] = X86_64_SSE_CLASS;
2219 return 1;
e95d6b23
JH
2220 case V4SFmode:
2221 case V4SImode:
495333a6
JH
2222 case V16QImode:
2223 case V8HImode:
2224 case V2DFmode:
2225 case V2DImode:
e95d6b23
JH
2226 classes[0] = X86_64_SSE_CLASS;
2227 classes[1] = X86_64_SSEUP_CLASS;
2228 return 2;
2229 case V2SFmode:
2230 case V2SImode:
2231 case V4HImode:
2232 case V8QImode:
1194ca05 2233 return 0;
53c17031 2234 case BLKmode:
e95d6b23 2235 case VOIDmode:
53c17031
JH
2236 return 0;
2237 default:
2238 abort ();
2239 }
2240}
2241
2242/* Examine the argument and return set number of register required in each
f5143c46 2243 class. Return 0 iff parameter should be passed in memory. */
53c17031 2244static int
b96a374d
AJ
2245examine_argument (enum machine_mode mode, tree type, int in_return,
2246 int *int_nregs, int *sse_nregs)
53c17031
JH
2247{
2248 enum x86_64_reg_class class[MAX_CLASSES];
2249 int n = classify_argument (mode, type, class, 0);
2250
2251 *int_nregs = 0;
2252 *sse_nregs = 0;
2253 if (!n)
2254 return 0;
2255 for (n--; n >= 0; n--)
2256 switch (class[n])
2257 {
2258 case X86_64_INTEGER_CLASS:
2259 case X86_64_INTEGERSI_CLASS:
2260 (*int_nregs)++;
2261 break;
2262 case X86_64_SSE_CLASS:
2263 case X86_64_SSESF_CLASS:
2264 case X86_64_SSEDF_CLASS:
2265 (*sse_nregs)++;
2266 break;
2267 case X86_64_NO_CLASS:
2268 case X86_64_SSEUP_CLASS:
2269 break;
2270 case X86_64_X87_CLASS:
2271 case X86_64_X87UP_CLASS:
2272 if (!in_return)
2273 return 0;
2274 break;
2275 case X86_64_MEMORY_CLASS:
2276 abort ();
2277 }
2278 return 1;
2279}
2280/* Construct container for the argument used by GCC interface. See
2281 FUNCTION_ARG for the detailed description. */
2282static rtx
b96a374d
AJ
2283construct_container (enum machine_mode mode, tree type, int in_return,
2284 int nintregs, int nsseregs, const int * intreg,
2285 int sse_regno)
53c17031
JH
2286{
2287 enum machine_mode tmpmode;
2288 int bytes =
2289 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2290 enum x86_64_reg_class class[MAX_CLASSES];
2291 int n;
2292 int i;
2293 int nexps = 0;
2294 int needed_sseregs, needed_intregs;
2295 rtx exp[MAX_CLASSES];
2296 rtx ret;
2297
2298 n = classify_argument (mode, type, class, 0);
2299 if (TARGET_DEBUG_ARG)
2300 {
2301 if (!n)
2302 fprintf (stderr, "Memory class\n");
2303 else
2304 {
2305 fprintf (stderr, "Classes:");
2306 for (i = 0; i < n; i++)
2307 {
2308 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2309 }
2310 fprintf (stderr, "\n");
2311 }
2312 }
2313 if (!n)
2314 return NULL;
2315 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2316 return NULL;
2317 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2318 return NULL;
2319
2320 /* First construct simple cases. Avoid SCmode, since we want to use
2321 single register to pass this type. */
2322 if (n == 1 && mode != SCmode)
2323 switch (class[0])
2324 {
2325 case X86_64_INTEGER_CLASS:
2326 case X86_64_INTEGERSI_CLASS:
2327 return gen_rtx_REG (mode, intreg[0]);
2328 case X86_64_SSE_CLASS:
2329 case X86_64_SSESF_CLASS:
2330 case X86_64_SSEDF_CLASS:
2331 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2332 case X86_64_X87_CLASS:
2333 return gen_rtx_REG (mode, FIRST_STACK_REG);
2334 case X86_64_NO_CLASS:
2335 /* Zero sized array, struct or class. */
2336 return NULL;
2337 default:
2338 abort ();
2339 }
2340 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
e95d6b23 2341 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
53c17031
JH
2342 if (n == 2
2343 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
f8a1ebc6 2344 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
53c17031
JH
2345 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2346 && class[1] == X86_64_INTEGER_CLASS
f8a1ebc6 2347 && (mode == CDImode || mode == TImode || mode == TFmode)
53c17031
JH
2348 && intreg[0] + 1 == intreg[1])
2349 return gen_rtx_REG (mode, intreg[0]);
2350 if (n == 4
2351 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2352 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
f8a1ebc6 2353 return gen_rtx_REG (XCmode, FIRST_STACK_REG);
53c17031
JH
2354
2355 /* Otherwise figure out the entries of the PARALLEL. */
2356 for (i = 0; i < n; i++)
2357 {
2358 switch (class[i])
2359 {
2360 case X86_64_NO_CLASS:
2361 break;
2362 case X86_64_INTEGER_CLASS:
2363 case X86_64_INTEGERSI_CLASS:
d1f87653 2364 /* Merge TImodes on aligned occasions here too. */
53c17031
JH
2365 if (i * 8 + 8 > bytes)
2366 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2367 else if (class[i] == X86_64_INTEGERSI_CLASS)
2368 tmpmode = SImode;
2369 else
2370 tmpmode = DImode;
2371 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2372 if (tmpmode == BLKmode)
2373 tmpmode = DImode;
2374 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2375 gen_rtx_REG (tmpmode, *intreg),
2376 GEN_INT (i*8));
2377 intreg++;
2378 break;
2379 case X86_64_SSESF_CLASS:
2380 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2381 gen_rtx_REG (SFmode,
2382 SSE_REGNO (sse_regno)),
2383 GEN_INT (i*8));
2384 sse_regno++;
2385 break;
2386 case X86_64_SSEDF_CLASS:
2387 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2388 gen_rtx_REG (DFmode,
2389 SSE_REGNO (sse_regno)),
2390 GEN_INT (i*8));
2391 sse_regno++;
2392 break;
2393 case X86_64_SSE_CLASS:
12f5c45e
JH
2394 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2395 tmpmode = TImode;
53c17031
JH
2396 else
2397 tmpmode = DImode;
2398 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2399 gen_rtx_REG (tmpmode,
2400 SSE_REGNO (sse_regno)),
2401 GEN_INT (i*8));
12f5c45e
JH
2402 if (tmpmode == TImode)
2403 i++;
53c17031
JH
2404 sse_regno++;
2405 break;
2406 default:
2407 abort ();
2408 }
2409 }
2410 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2411 for (i = 0; i < nexps; i++)
2412 XVECEXP (ret, 0, i) = exp [i];
2413 return ret;
2414}
2415
b08de47e
MM
2416/* Update the data in CUM to advance over an argument
2417 of mode MODE and data type TYPE.
2418 (TYPE is null for libcalls where that information may not be available.) */
2419
2420void
b96a374d
AJ
2421function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
2422 enum machine_mode mode, /* current arg mode */
2423 tree type, /* type of the argument or 0 if lib support */
2424 int named) /* whether or not the argument was named */
b08de47e 2425{
5ac9118e
KG
2426 int bytes =
2427 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
2428 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2429
2430 if (TARGET_DEBUG_ARG)
2431 fprintf (stderr,
e9a25f70 2432 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
b08de47e 2433 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
53c17031 2434 if (TARGET_64BIT)
b08de47e 2435 {
53c17031
JH
2436 int int_nregs, sse_nregs;
2437 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2438 cum->words += words;
2439 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
82a127a9 2440 {
53c17031
JH
2441 cum->nregs -= int_nregs;
2442 cum->sse_nregs -= sse_nregs;
2443 cum->regno += int_nregs;
2444 cum->sse_regno += sse_nregs;
82a127a9 2445 }
53c17031
JH
2446 else
2447 cum->words += words;
b08de47e 2448 }
a4f31c00 2449 else
82a127a9 2450 {
53c17031
JH
2451 if (TARGET_SSE && mode == TImode)
2452 {
2453 cum->sse_words += words;
2454 cum->sse_nregs -= 1;
2455 cum->sse_regno += 1;
2456 if (cum->sse_nregs <= 0)
2457 {
2458 cum->sse_nregs = 0;
2459 cum->sse_regno = 0;
2460 }
2461 }
2462 else
82a127a9 2463 {
53c17031
JH
2464 cum->words += words;
2465 cum->nregs -= words;
2466 cum->regno += words;
2467
2468 if (cum->nregs <= 0)
2469 {
2470 cum->nregs = 0;
2471 cum->regno = 0;
2472 }
82a127a9
CM
2473 }
2474 }
b08de47e
MM
2475 return;
2476}
2477
2478/* Define where to put the arguments to a function.
2479 Value is zero to push the argument on the stack,
2480 or a hard register in which to store the argument.
2481
2482 MODE is the argument's machine mode.
2483 TYPE is the data type of the argument (as a tree).
2484 This is null for libcalls where that information may
2485 not be available.
2486 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2487 the preceding args and about the function being called.
2488 NAMED is nonzero if this argument is a named parameter
2489 (otherwise it is an extra parameter matching an ellipsis). */
2490
07933f72 2491rtx
b96a374d
AJ
2492function_arg (CUMULATIVE_ARGS *cum, /* current arg information */
2493 enum machine_mode mode, /* current arg mode */
2494 tree type, /* type of the argument or 0 if lib support */
2495 int named) /* != 0 for normal args, == 0 for ... args */
b08de47e
MM
2496{
2497 rtx ret = NULL_RTX;
5ac9118e
KG
2498 int bytes =
2499 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
2500 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2501
5bdc5878 2502 /* Handle a hidden AL argument containing number of registers for varargs
53c17031
JH
2503 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2504 any AL settings. */
32ee7d1d 2505 if (mode == VOIDmode)
b08de47e 2506 {
53c17031
JH
2507 if (TARGET_64BIT)
2508 return GEN_INT (cum->maybe_vaarg
2509 ? (cum->sse_nregs < 0
2510 ? SSE_REGPARM_MAX
2511 : cum->sse_regno)
2512 : -1);
2513 else
2514 return constm1_rtx;
b08de47e 2515 }
53c17031
JH
2516 if (TARGET_64BIT)
2517 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2518 &x86_64_int_parameter_registers [cum->regno],
2519 cum->sse_regno);
2520 else
2521 switch (mode)
2522 {
2523 /* For now, pass fp/complex values on the stack. */
2524 default:
2525 break;
2526
2527 case BLKmode:
8d454008
RH
2528 if (bytes < 0)
2529 break;
2530 /* FALLTHRU */
53c17031
JH
2531 case DImode:
2532 case SImode:
2533 case HImode:
2534 case QImode:
2535 if (words <= cum->nregs)
b96a374d
AJ
2536 {
2537 int regno = cum->regno;
2538
2539 /* Fastcall allocates the first two DWORD (SImode) or
2540 smaller arguments to ECX and EDX. */
2541 if (cum->fastcall)
2542 {
2543 if (mode == BLKmode || mode == DImode)
2544 break;
2545
2546 /* ECX not EAX is the first allocated register. */
2547 if (regno == 0)
e767b5be 2548 regno = 2;
b96a374d
AJ
2549 }
2550 ret = gen_rtx_REG (mode, regno);
2551 }
53c17031
JH
2552 break;
2553 case TImode:
2554 if (cum->sse_nregs)
2555 ret = gen_rtx_REG (mode, cum->sse_regno);
2556 break;
2557 }
b08de47e
MM
2558
2559 if (TARGET_DEBUG_ARG)
2560 {
2561 fprintf (stderr,
91ea38f9 2562 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
b08de47e
MM
2563 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2564
2565 if (ret)
91ea38f9 2566 print_simple_rtl (stderr, ret);
b08de47e
MM
2567 else
2568 fprintf (stderr, ", stack");
2569
2570 fprintf (stderr, " )\n");
2571 }
2572
2573 return ret;
2574}
53c17031 2575
09b2e78d
ZD
2576/* A C expression that indicates when an argument must be passed by
2577 reference. If nonzero for an argument, a copy of that argument is
2578 made in memory and a pointer to the argument is passed instead of
2579 the argument itself. The pointer is passed in whatever way is
2580 appropriate for passing a pointer to that type. */
2581
2582int
b96a374d
AJ
2583function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2584 enum machine_mode mode ATTRIBUTE_UNUSED,
2585 tree type, int named ATTRIBUTE_UNUSED)
09b2e78d
ZD
2586{
2587 if (!TARGET_64BIT)
2588 return 0;
2589
2590 if (type && int_size_in_bytes (type) == -1)
2591 {
2592 if (TARGET_DEBUG_ARG)
2593 fprintf (stderr, "function_arg_pass_by_reference\n");
2594 return 1;
2595 }
2596
2597 return 0;
2598}
2599
8b978a57
JH
2600/* Return true when TYPE should be 128bit aligned for 32bit argument passing
2601 ABI */
2602static bool
b96a374d 2603contains_128bit_aligned_vector_p (tree type)
8b978a57
JH
2604{
2605 enum machine_mode mode = TYPE_MODE (type);
2606 if (SSE_REG_MODE_P (mode)
2607 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2608 return true;
2609 if (TYPE_ALIGN (type) < 128)
2610 return false;
2611
2612 if (AGGREGATE_TYPE_P (type))
2613 {
2a43945f 2614 /* Walk the aggregates recursively. */
8b978a57
JH
2615 if (TREE_CODE (type) == RECORD_TYPE
2616 || TREE_CODE (type) == UNION_TYPE
2617 || TREE_CODE (type) == QUAL_UNION_TYPE)
2618 {
2619 tree field;
2620
2621 if (TYPE_BINFO (type) != NULL
2622 && TYPE_BINFO_BASETYPES (type) != NULL)
2623 {
2624 tree bases = TYPE_BINFO_BASETYPES (type);
2625 int n_bases = TREE_VEC_LENGTH (bases);
2626 int i;
2627
2628 for (i = 0; i < n_bases; ++i)
2629 {
2630 tree binfo = TREE_VEC_ELT (bases, i);
2631 tree type = BINFO_TYPE (binfo);
2632
2633 if (contains_128bit_aligned_vector_p (type))
2634 return true;
2635 }
2636 }
2637 /* And now merge the fields of structure. */
2638 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2639 {
2640 if (TREE_CODE (field) == FIELD_DECL
2641 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2642 return true;
2643 }
2644 }
2645 /* Just for use if some languages passes arrays by value. */
2646 else if (TREE_CODE (type) == ARRAY_TYPE)
2647 {
2648 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2649 return true;
2650 }
2651 else
2652 abort ();
2653 }
2654 return false;
2655}
2656
bb498ea3
AH
2657/* Gives the alignment boundary, in bits, of an argument with the
2658 specified mode and type. */
53c17031
JH
2659
2660int
b96a374d 2661ix86_function_arg_boundary (enum machine_mode mode, tree type)
53c17031
JH
2662{
2663 int align;
53c17031
JH
2664 if (type)
2665 align = TYPE_ALIGN (type);
2666 else
2667 align = GET_MODE_ALIGNMENT (mode);
2668 if (align < PARM_BOUNDARY)
2669 align = PARM_BOUNDARY;
8b978a57
JH
2670 if (!TARGET_64BIT)
2671 {
2672 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2673 make an exception for SSE modes since these require 128bit
b96a374d 2674 alignment.
8b978a57
JH
2675
2676 The handling here differs from field_alignment. ICC aligns MMX
2677 arguments to 4 byte boundaries, while structure fields are aligned
2678 to 8 byte boundaries. */
2679 if (!type)
2680 {
2681 if (!SSE_REG_MODE_P (mode))
2682 align = PARM_BOUNDARY;
2683 }
2684 else
2685 {
2686 if (!contains_128bit_aligned_vector_p (type))
2687 align = PARM_BOUNDARY;
2688 }
8b978a57 2689 }
53c17031
JH
2690 if (align > 128)
2691 align = 128;
2692 return align;
2693}
2694
2695/* Return true if N is a possible register number of function value. */
2696bool
b96a374d 2697ix86_function_value_regno_p (int regno)
53c17031
JH
2698{
2699 if (!TARGET_64BIT)
2700 {
2701 return ((regno) == 0
2702 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2703 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2704 }
2705 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2706 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2707 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2708}
2709
2710/* Define how to find the value returned by a function.
2711 VALTYPE is the data type of the value (as a tree).
2712 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2713 otherwise, FUNC is 0. */
2714rtx
b96a374d 2715ix86_function_value (tree valtype)
53c17031
JH
2716{
2717 if (TARGET_64BIT)
2718 {
2719 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2720 REGPARM_MAX, SSE_REGPARM_MAX,
2721 x86_64_int_return_registers, 0);
d1f87653
KH
2722 /* For zero sized structures, construct_container return NULL, but we need
2723 to keep rest of compiler happy by returning meaningful value. */
53c17031
JH
2724 if (!ret)
2725 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2726 return ret;
2727 }
2728 else
b069de3b
SS
2729 return gen_rtx_REG (TYPE_MODE (valtype),
2730 ix86_value_regno (TYPE_MODE (valtype)));
53c17031
JH
2731}
2732
f5143c46 2733/* Return false iff type is returned in memory. */
53c17031 2734int
b96a374d 2735ix86_return_in_memory (tree type)
53c17031 2736{
a30b6839
RH
2737 int needed_intregs, needed_sseregs, size;
2738 enum machine_mode mode = TYPE_MODE (type);
2739
53c17031 2740 if (TARGET_64BIT)
a30b6839
RH
2741 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2742
2743 if (mode == BLKmode)
2744 return 1;
2745
2746 size = int_size_in_bytes (type);
2747
2748 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
2749 return 0;
2750
2751 if (VECTOR_MODE_P (mode) || mode == TImode)
53c17031 2752 {
a30b6839
RH
2753 /* User-created vectors small enough to fit in EAX. */
2754 if (size < 8)
5e062767 2755 return 0;
a30b6839
RH
2756
2757 /* MMX/3dNow values are returned on the stack, since we've
2758 got to EMMS/FEMMS before returning. */
2759 if (size == 8)
53c17031 2760 return 1;
a30b6839
RH
2761
2762 /* SSE values are returned in XMM0. */
2763 /* ??? Except when it doesn't exist? We have a choice of
2764 either (1) being abi incompatible with a -march switch,
2765 or (2) generating an error here. Given no good solution,
2766 I think the safest thing is one warning. The user won't
2767 be able to use -Werror, but... */
2768 if (size == 16)
2769 {
2770 static bool warned;
2771
2772 if (TARGET_SSE)
2773 return 0;
2774
2775 if (!warned)
2776 {
2777 warned = true;
2778 warning ("SSE vector return without SSE enabled "
2779 "changes the ABI");
2780 }
2781 return 1;
2782 }
53c17031 2783 }
a30b6839 2784
cf2348cb 2785 if (mode == XFmode)
a30b6839 2786 return 0;
f8a1ebc6 2787
a30b6839
RH
2788 if (size > 12)
2789 return 1;
2790 return 0;
53c17031
JH
2791}
2792
2793/* Define how to find the value returned by a library function
2794 assuming the value has mode MODE. */
2795rtx
b96a374d 2796ix86_libcall_value (enum machine_mode mode)
53c17031
JH
2797{
2798 if (TARGET_64BIT)
2799 {
2800 switch (mode)
2801 {
f8a1ebc6
JH
2802 case SFmode:
2803 case SCmode:
2804 case DFmode:
2805 case DCmode:
2806 return gen_rtx_REG (mode, FIRST_SSE_REG);
2807 case XFmode:
2808 case XCmode:
2809 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2810 case TFmode:
f8a1ebc6
JH
2811 case TCmode:
2812 return NULL;
2813 default:
2814 return gen_rtx_REG (mode, 0);
53c17031
JH
2815 }
2816 }
2817 else
f8a1ebc6 2818 return gen_rtx_REG (mode, ix86_value_regno (mode));
b069de3b
SS
2819}
2820
2821/* Given a mode, return the register to use for a return value. */
2822
2823static int
b96a374d 2824ix86_value_regno (enum machine_mode mode)
b069de3b 2825{
a30b6839 2826 /* Floating point return values in %st(0). */
b069de3b
SS
2827 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2828 return FIRST_FLOAT_REG;
a30b6839
RH
2829 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
2830 we prevent this case when sse is not available. */
2831 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
b069de3b 2832 return FIRST_SSE_REG;
a30b6839 2833 /* Everything else in %eax. */
b069de3b 2834 return 0;
53c17031 2835}
ad919812
JH
2836\f
2837/* Create the va_list data type. */
53c17031 2838
c35d187f
RH
2839static tree
2840ix86_build_builtin_va_list (void)
ad919812
JH
2841{
2842 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
53c17031 2843
ad919812
JH
2844 /* For i386 we use plain pointer to argument area. */
2845 if (!TARGET_64BIT)
2846 return build_pointer_type (char_type_node);
2847
f1e639b1 2848 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
ad919812
JH
2849 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2850
fce5a9f2 2851 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
ad919812 2852 unsigned_type_node);
fce5a9f2 2853 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
ad919812
JH
2854 unsigned_type_node);
2855 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2856 ptr_type_node);
2857 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2858 ptr_type_node);
2859
2860 DECL_FIELD_CONTEXT (f_gpr) = record;
2861 DECL_FIELD_CONTEXT (f_fpr) = record;
2862 DECL_FIELD_CONTEXT (f_ovf) = record;
2863 DECL_FIELD_CONTEXT (f_sav) = record;
2864
2865 TREE_CHAIN (record) = type_decl;
2866 TYPE_NAME (record) = type_decl;
2867 TYPE_FIELDS (record) = f_gpr;
2868 TREE_CHAIN (f_gpr) = f_fpr;
2869 TREE_CHAIN (f_fpr) = f_ovf;
2870 TREE_CHAIN (f_ovf) = f_sav;
2871
2872 layout_type (record);
2873
2874 /* The correct type is an array type of one element. */
2875 return build_array_type (record, build_index_type (size_zero_node));
2876}
2877
2878/* Perform any needed actions needed for a function that is receiving a
fce5a9f2 2879 variable number of arguments.
ad919812
JH
2880
2881 CUM is as above.
2882
2883 MODE and TYPE are the mode and type of the current parameter.
2884
2885 PRETEND_SIZE is a variable that should be set to the amount of stack
2886 that must be pushed by the prolog to pretend that our caller pushed
2887 it.
2888
2889 Normally, this macro will push all remaining incoming registers on the
2890 stack and set PRETEND_SIZE to the length of the registers pushed. */
2891
2892void
b96a374d
AJ
2893ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
2894 tree type, int *pretend_size ATTRIBUTE_UNUSED,
2895 int no_rtl)
ad919812
JH
2896{
2897 CUMULATIVE_ARGS next_cum;
2898 rtx save_area = NULL_RTX, mem;
2899 rtx label;
2900 rtx label_ref;
2901 rtx tmp_reg;
2902 rtx nsse_reg;
2903 int set;
2904 tree fntype;
2905 int stdarg_p;
2906 int i;
2907
2908 if (!TARGET_64BIT)
2909 return;
2910
2911 /* Indicate to allocate space on the stack for varargs save area. */
2912 ix86_save_varrargs_registers = 1;
2913
5474eed5
JH
2914 cfun->stack_alignment_needed = 128;
2915
ad919812
JH
2916 fntype = TREE_TYPE (current_function_decl);
2917 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2918 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2919 != void_type_node));
2920
2921 /* For varargs, we do not want to skip the dummy va_dcl argument.
2922 For stdargs, we do want to skip the last named argument. */
2923 next_cum = *cum;
2924 if (stdarg_p)
2925 function_arg_advance (&next_cum, mode, type, 1);
2926
2927 if (!no_rtl)
2928 save_area = frame_pointer_rtx;
2929
2930 set = get_varargs_alias_set ();
2931
2932 for (i = next_cum.regno; i < ix86_regparm; i++)
2933 {
2934 mem = gen_rtx_MEM (Pmode,
2935 plus_constant (save_area, i * UNITS_PER_WORD));
0692acba 2936 set_mem_alias_set (mem, set);
ad919812
JH
2937 emit_move_insn (mem, gen_rtx_REG (Pmode,
2938 x86_64_int_parameter_registers[i]));
2939 }
2940
2941 if (next_cum.sse_nregs)
2942 {
2943 /* Now emit code to save SSE registers. The AX parameter contains number
d1f87653 2944 of SSE parameter registers used to call this function. We use
ad919812
JH
2945 sse_prologue_save insn template that produces computed jump across
2946 SSE saves. We need some preparation work to get this working. */
2947
2948 label = gen_label_rtx ();
2949 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2950
2951 /* Compute address to jump to :
2952 label - 5*eax + nnamed_sse_arguments*5 */
2953 tmp_reg = gen_reg_rtx (Pmode);
2954 nsse_reg = gen_reg_rtx (Pmode);
2955 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2956 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
44cf5b6a 2957 gen_rtx_MULT (Pmode, nsse_reg,
ad919812
JH
2958 GEN_INT (4))));
2959 if (next_cum.sse_regno)
2960 emit_move_insn
2961 (nsse_reg,
2962 gen_rtx_CONST (DImode,
2963 gen_rtx_PLUS (DImode,
2964 label_ref,
2965 GEN_INT (next_cum.sse_regno * 4))));
2966 else
2967 emit_move_insn (nsse_reg, label_ref);
2968 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2969
2970 /* Compute address of memory block we save into. We always use pointer
2971 pointing 127 bytes after first byte to store - this is needed to keep
2972 instruction size limited by 4 bytes. */
2973 tmp_reg = gen_reg_rtx (Pmode);
8ac61af7
RK
2974 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2975 plus_constant (save_area,
2976 8 * REGPARM_MAX + 127)));
ad919812 2977 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
14f73b5a 2978 set_mem_alias_set (mem, set);
8ac61af7 2979 set_mem_align (mem, BITS_PER_WORD);
ad919812
JH
2980
2981 /* And finally do the dirty job! */
8ac61af7
RK
2982 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2983 GEN_INT (next_cum.sse_regno), label));
ad919812
JH
2984 }
2985
2986}
2987
2988/* Implement va_start. */
2989
2990void
b96a374d 2991ix86_va_start (tree valist, rtx nextarg)
ad919812
JH
2992{
2993 HOST_WIDE_INT words, n_gpr, n_fpr;
2994 tree f_gpr, f_fpr, f_ovf, f_sav;
2995 tree gpr, fpr, ovf, sav, t;
2996
2997 /* Only 64bit target needs something special. */
2998 if (!TARGET_64BIT)
2999 {
e5faf155 3000 std_expand_builtin_va_start (valist, nextarg);
ad919812
JH
3001 return;
3002 }
3003
3004 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3005 f_fpr = TREE_CHAIN (f_gpr);
3006 f_ovf = TREE_CHAIN (f_fpr);
3007 f_sav = TREE_CHAIN (f_ovf);
3008
3009 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3010 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3011 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3012 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3013 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3014
3015 /* Count number of gp and fp argument registers used. */
3016 words = current_function_args_info.words;
3017 n_gpr = current_function_args_info.regno;
3018 n_fpr = current_function_args_info.sse_regno;
3019
3020 if (TARGET_DEBUG_ARG)
3021 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
b531087a 3022 (int) words, (int) n_gpr, (int) n_fpr);
ad919812
JH
3023
3024 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3025 build_int_2 (n_gpr * 8, 0));
3026 TREE_SIDE_EFFECTS (t) = 1;
3027 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3028
3029 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3030 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
3031 TREE_SIDE_EFFECTS (t) = 1;
3032 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3033
3034 /* Find the overflow area. */
3035 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3036 if (words != 0)
3037 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3038 build_int_2 (words * UNITS_PER_WORD, 0));
3039 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3040 TREE_SIDE_EFFECTS (t) = 1;
3041 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3042
3043 /* Find the register save area.
3044 Prologue of the function save it right above stack frame. */
3045 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3046 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3047 TREE_SIDE_EFFECTS (t) = 1;
3048 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3049}
3050
3051/* Implement va_arg. */
3052rtx
b96a374d 3053ix86_va_arg (tree valist, tree type)
ad919812 3054{
0139adca 3055 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
ad919812
JH
3056 tree f_gpr, f_fpr, f_ovf, f_sav;
3057 tree gpr, fpr, ovf, sav, t;
b932f770 3058 int size, rsize;
ad919812
JH
3059 rtx lab_false, lab_over = NULL_RTX;
3060 rtx addr_rtx, r;
3061 rtx container;
09b2e78d 3062 int indirect_p = 0;
ad919812
JH
3063
3064 /* Only 64bit target needs something special. */
3065 if (!TARGET_64BIT)
3066 {
3067 return std_expand_builtin_va_arg (valist, type);
3068 }
3069
3070 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3071 f_fpr = TREE_CHAIN (f_gpr);
3072 f_ovf = TREE_CHAIN (f_fpr);
3073 f_sav = TREE_CHAIN (f_ovf);
3074
3075 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3076 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3077 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3078 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3079 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3080
3081 size = int_size_in_bytes (type);
09b2e78d
ZD
3082 if (size == -1)
3083 {
3084 /* Passed by reference. */
3085 indirect_p = 1;
3086 type = build_pointer_type (type);
3087 size = int_size_in_bytes (type);
3088 }
ad919812
JH
3089 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3090
3091 container = construct_container (TYPE_MODE (type), type, 0,
3092 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3093 /*
3094 * Pull the value out of the saved registers ...
3095 */
3096
3097 addr_rtx = gen_reg_rtx (Pmode);
3098
3099 if (container)
3100 {
3101 rtx int_addr_rtx, sse_addr_rtx;
3102 int needed_intregs, needed_sseregs;
3103 int need_temp;
3104
3105 lab_over = gen_label_rtx ();
3106 lab_false = gen_label_rtx ();
8bad7136 3107
ad919812
JH
3108 examine_argument (TYPE_MODE (type), type, 0,
3109 &needed_intregs, &needed_sseregs);
3110
3111
3112 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
3113 || TYPE_ALIGN (type) > 128);
3114
d1f87653 3115 /* In case we are passing structure, verify that it is consecutive block
ad919812
JH
3116 on the register save area. If not we need to do moves. */
3117 if (!need_temp && !REG_P (container))
3118 {
d1f87653 3119 /* Verify that all registers are strictly consecutive */
ad919812
JH
3120 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3121 {
3122 int i;
3123
3124 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3125 {
3126 rtx slot = XVECEXP (container, 0, i);
b531087a 3127 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
ad919812
JH
3128 || INTVAL (XEXP (slot, 1)) != i * 16)
3129 need_temp = 1;
3130 }
3131 }
3132 else
3133 {
3134 int i;
3135
3136 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3137 {
3138 rtx slot = XVECEXP (container, 0, i);
b531087a 3139 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
ad919812
JH
3140 || INTVAL (XEXP (slot, 1)) != i * 8)
3141 need_temp = 1;
3142 }
3143 }
3144 }
3145 if (!need_temp)
3146 {
3147 int_addr_rtx = addr_rtx;
3148 sse_addr_rtx = addr_rtx;
3149 }
3150 else
3151 {
3152 int_addr_rtx = gen_reg_rtx (Pmode);
3153 sse_addr_rtx = gen_reg_rtx (Pmode);
3154 }
3155 /* First ensure that we fit completely in registers. */
3156 if (needed_intregs)
3157 {
3158 emit_cmp_and_jump_insns (expand_expr
3159 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
3160 GEN_INT ((REGPARM_MAX - needed_intregs +
3161 1) * 8), GE, const1_rtx, SImode,
d43e0b7d 3162 1, lab_false);
ad919812
JH
3163 }
3164 if (needed_sseregs)
3165 {
3166 emit_cmp_and_jump_insns (expand_expr
3167 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
3168 GEN_INT ((SSE_REGPARM_MAX -
3169 needed_sseregs + 1) * 16 +
3170 REGPARM_MAX * 8), GE, const1_rtx,
d43e0b7d 3171 SImode, 1, lab_false);
ad919812
JH
3172 }
3173
3174 /* Compute index to start of area used for integer regs. */
3175 if (needed_intregs)
3176 {
3177 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3178 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3179 if (r != int_addr_rtx)
3180 emit_move_insn (int_addr_rtx, r);
3181 }
3182 if (needed_sseregs)
3183 {
3184 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3185 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3186 if (r != sse_addr_rtx)
3187 emit_move_insn (sse_addr_rtx, r);
3188 }
3189 if (need_temp)
3190 {
3191 int i;
3192 rtx mem;
70642ee3 3193 rtx x;
ad919812 3194
b932f770 3195 /* Never use the memory itself, as it has the alias set. */
70642ee3
JH
3196 x = XEXP (assign_temp (type, 0, 1, 0), 0);
3197 mem = gen_rtx_MEM (BLKmode, x);
3198 force_operand (x, addr_rtx);
0692acba 3199 set_mem_alias_set (mem, get_varargs_alias_set ());
8ac61af7 3200 set_mem_align (mem, BITS_PER_UNIT);
b932f770 3201
ad919812
JH
3202 for (i = 0; i < XVECLEN (container, 0); i++)
3203 {
3204 rtx slot = XVECEXP (container, 0, i);
3205 rtx reg = XEXP (slot, 0);
3206 enum machine_mode mode = GET_MODE (reg);
3207 rtx src_addr;
3208 rtx src_mem;
3209 int src_offset;
3210 rtx dest_mem;
3211
3212 if (SSE_REGNO_P (REGNO (reg)))
3213 {
3214 src_addr = sse_addr_rtx;
3215 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3216 }
3217 else
3218 {
3219 src_addr = int_addr_rtx;
3220 src_offset = REGNO (reg) * 8;
3221 }
3222 src_mem = gen_rtx_MEM (mode, src_addr);
0692acba 3223 set_mem_alias_set (src_mem, get_varargs_alias_set ());
ad919812
JH
3224 src_mem = adjust_address (src_mem, mode, src_offset);
3225 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
ad919812
JH
3226 emit_move_insn (dest_mem, src_mem);
3227 }
3228 }
3229
3230 if (needed_intregs)
3231 {
3232 t =
3233 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3234 build_int_2 (needed_intregs * 8, 0));
3235 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3236 TREE_SIDE_EFFECTS (t) = 1;
3237 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3238 }
3239 if (needed_sseregs)
3240 {
3241 t =
3242 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3243 build_int_2 (needed_sseregs * 16, 0));
3244 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3245 TREE_SIDE_EFFECTS (t) = 1;
3246 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3247 }
3248
3249 emit_jump_insn (gen_jump (lab_over));
3250 emit_barrier ();
3251 emit_label (lab_false);
3252 }
3253
3254 /* ... otherwise out of the overflow area. */
3255
3256 /* Care for on-stack alignment if needed. */
3257 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3258 t = ovf;
3259 else
3260 {
3261 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3262 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3263 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3264 }
3265 t = save_expr (t);
3266
3267 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3268 if (r != addr_rtx)
3269 emit_move_insn (addr_rtx, r);
3270
3271 t =
3272 build (PLUS_EXPR, TREE_TYPE (t), t,
3273 build_int_2 (rsize * UNITS_PER_WORD, 0));
3274 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3275 TREE_SIDE_EFFECTS (t) = 1;
3276 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3277
3278 if (container)
3279 emit_label (lab_over);
3280
09b2e78d
ZD
3281 if (indirect_p)
3282 {
3283 r = gen_rtx_MEM (Pmode, addr_rtx);
3284 set_mem_alias_set (r, get_varargs_alias_set ());
3285 emit_move_insn (addr_rtx, r);
3286 }
3287
ad919812
JH
3288 return addr_rtx;
3289}
3290\f
c3c637e3
GS
3291/* Return nonzero if OP is either a i387 or SSE fp register. */
3292int
b96a374d 3293any_fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
c3c637e3
GS
3294{
3295 return ANY_FP_REG_P (op);
3296}
3297
3298/* Return nonzero if OP is an i387 fp register. */
3299int
b96a374d 3300fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
c3c637e3
GS
3301{
3302 return FP_REG_P (op);
3303}
3304
3305/* Return nonzero if OP is a non-fp register_operand. */
3306int
b96a374d 3307register_and_not_any_fp_reg_operand (rtx op, enum machine_mode mode)
c3c637e3
GS
3308{
3309 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3310}
3311
40b982a9 3312/* Return nonzero if OP is a register operand other than an
c3c637e3
GS
3313 i387 fp register. */
3314int
b96a374d 3315register_and_not_fp_reg_operand (rtx op, enum machine_mode mode)
c3c637e3
GS
3316{
3317 return register_operand (op, mode) && !FP_REG_P (op);
3318}
3319
7dd4b4a3
JH
3320/* Return nonzero if OP is general operand representable on x86_64. */
3321
3322int
b96a374d 3323x86_64_general_operand (rtx op, enum machine_mode mode)
7dd4b4a3
JH
3324{
3325 if (!TARGET_64BIT)
3326 return general_operand (op, mode);
3327 if (nonimmediate_operand (op, mode))
3328 return 1;
c05dbe81 3329 return x86_64_sign_extended_value (op);
7dd4b4a3
JH
3330}
3331
3332/* Return nonzero if OP is general operand representable on x86_64
d6a7951f 3333 as either sign extended or zero extended constant. */
7dd4b4a3
JH
3334
3335int
b96a374d 3336x86_64_szext_general_operand (rtx op, enum machine_mode mode)
7dd4b4a3
JH
3337{
3338 if (!TARGET_64BIT)
3339 return general_operand (op, mode);
3340 if (nonimmediate_operand (op, mode))
3341 return 1;
c05dbe81 3342 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
7dd4b4a3
JH
3343}
3344
3345/* Return nonzero if OP is nonmemory operand representable on x86_64. */
3346
3347int
b96a374d 3348x86_64_nonmemory_operand (rtx op, enum machine_mode mode)
7dd4b4a3
JH
3349{
3350 if (!TARGET_64BIT)
3351 return nonmemory_operand (op, mode);
3352 if (register_operand (op, mode))
3353 return 1;
c05dbe81 3354 return x86_64_sign_extended_value (op);
7dd4b4a3
JH
3355}
3356
3357/* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3358
3359int
b96a374d 3360x86_64_movabs_operand (rtx op, enum machine_mode mode)
7dd4b4a3
JH
3361{
3362 if (!TARGET_64BIT || !flag_pic)
3363 return nonmemory_operand (op, mode);
c05dbe81 3364 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
7dd4b4a3
JH
3365 return 1;
3366 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3367 return 1;
3368 return 0;
3369}
3370
7e6dc358
JJ
3371/* Return nonzero if OPNUM's MEM should be matched
3372 in movabs* patterns. */
3373
3374int
3375ix86_check_movabs (rtx insn, int opnum)
3376{
3377 rtx set, mem;
3378
3379 set = PATTERN (insn);
3380 if (GET_CODE (set) == PARALLEL)
3381 set = XVECEXP (set, 0, 0);
3382 if (GET_CODE (set) != SET)
3383 abort ();
3384 mem = XEXP (set, opnum);
3385 while (GET_CODE (mem) == SUBREG)
3386 mem = SUBREG_REG (mem);
3387 if (GET_CODE (mem) != MEM)
3388 abort ();
3389 return (volatile_ok || !MEM_VOLATILE_P (mem));
3390}
3391
7dd4b4a3
JH
3392/* Return nonzero if OP is nonmemory operand representable on x86_64. */
3393
3394int
b96a374d 3395x86_64_szext_nonmemory_operand (rtx op, enum machine_mode mode)
7dd4b4a3
JH
3396{
3397 if (!TARGET_64BIT)
3398 return nonmemory_operand (op, mode);
3399 if (register_operand (op, mode))
3400 return 1;
c05dbe81 3401 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
7dd4b4a3
JH
3402}
3403
3404/* Return nonzero if OP is immediate operand representable on x86_64. */
3405
3406int
b96a374d 3407x86_64_immediate_operand (rtx op, enum machine_mode mode)
7dd4b4a3
JH
3408{
3409 if (!TARGET_64BIT)
3410 return immediate_operand (op, mode);
c05dbe81 3411 return x86_64_sign_extended_value (op);
7dd4b4a3
JH
3412}
3413
3414/* Return nonzero if OP is immediate operand representable on x86_64. */
3415
3416int
b96a374d 3417x86_64_zext_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7dd4b4a3
JH
3418{
3419 return x86_64_zero_extended_value (op);
3420}
3421
8bad7136
JL
3422/* Return nonzero if OP is (const_int 1), else return zero. */
3423
3424int
b96a374d 3425const_int_1_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8bad7136 3426{
dac4a0de 3427 return op == const1_rtx;
8bad7136
JL
3428}
3429
794a292d
JJ
3430/* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3431 for shift & compare patterns, as shifting by 0 does not change flags),
3432 else return zero. */
3433
3434int
b96a374d 3435const_int_1_31_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
794a292d
JJ
3436{
3437 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3438}
3439
e075ae69
RH
3440/* Returns 1 if OP is either a symbol reference or a sum of a symbol
3441 reference and a constant. */
b08de47e
MM
3442
3443int
b96a374d 3444symbolic_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
2a2ab3f9 3445{
e075ae69 3446 switch (GET_CODE (op))
2a2ab3f9 3447 {
e075ae69
RH
3448 case SYMBOL_REF:
3449 case LABEL_REF:
3450 return 1;
3451
3452 case CONST:
3453 op = XEXP (op, 0);
3454 if (GET_CODE (op) == SYMBOL_REF
3455 || GET_CODE (op) == LABEL_REF
3456 || (GET_CODE (op) == UNSPEC
8ee41eaf
RH
3457 && (XINT (op, 1) == UNSPEC_GOT
3458 || XINT (op, 1) == UNSPEC_GOTOFF
3459 || XINT (op, 1) == UNSPEC_GOTPCREL)))
e075ae69
RH
3460 return 1;
3461 if (GET_CODE (op) != PLUS
3462 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3463 return 0;
3464
3465 op = XEXP (op, 0);
3466 if (GET_CODE (op) == SYMBOL_REF
3467 || GET_CODE (op) == LABEL_REF)
3468 return 1;
3469 /* Only @GOTOFF gets offsets. */
3470 if (GET_CODE (op) != UNSPEC
8ee41eaf 3471 || XINT (op, 1) != UNSPEC_GOTOFF)
e075ae69
RH
3472 return 0;
3473
3474 op = XVECEXP (op, 0, 0);
3475 if (GET_CODE (op) == SYMBOL_REF
3476 || GET_CODE (op) == LABEL_REF)
3477 return 1;
3478 return 0;
3479
3480 default:
3481 return 0;
2a2ab3f9
JVA
3482 }
3483}
2a2ab3f9 3484
e075ae69 3485/* Return true if the operand contains a @GOT or @GOTOFF reference. */
3b3c6a3f 3486
e075ae69 3487int
b96a374d 3488pic_symbolic_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69 3489{
6eb791fc
JH
3490 if (GET_CODE (op) != CONST)
3491 return 0;
3492 op = XEXP (op, 0);
3493 if (TARGET_64BIT)
3494 {
a0c8285b
JH
3495 if (GET_CODE (op) == UNSPEC
3496 && XINT (op, 1) == UNSPEC_GOTPCREL)
3497 return 1;
3498 if (GET_CODE (op) == PLUS
fdacb904
JH
3499 && GET_CODE (XEXP (op, 0)) == UNSPEC
3500 && XINT (XEXP (op, 0), 1) == UNSPEC_GOTPCREL)
6eb791fc
JH
3501 return 1;
3502 }
fce5a9f2 3503 else
2a2ab3f9 3504 {
e075ae69
RH
3505 if (GET_CODE (op) == UNSPEC)
3506 return 1;
3507 if (GET_CODE (op) != PLUS
3508 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3509 return 0;
3510 op = XEXP (op, 0);
3511 if (GET_CODE (op) == UNSPEC)
3512 return 1;
2a2ab3f9 3513 }
e075ae69 3514 return 0;
2a2ab3f9 3515}
2a2ab3f9 3516
623fe810
RH
3517/* Return true if OP is a symbolic operand that resolves locally. */
3518
3519static int
b96a374d 3520local_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
623fe810 3521{
623fe810
RH
3522 if (GET_CODE (op) == CONST
3523 && GET_CODE (XEXP (op, 0)) == PLUS
c05dbe81 3524 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
623fe810
RH
3525 op = XEXP (XEXP (op, 0), 0);
3526
8bfb45f8
JJ
3527 if (GET_CODE (op) == LABEL_REF)
3528 return 1;
3529
623fe810
RH
3530 if (GET_CODE (op) != SYMBOL_REF)
3531 return 0;
3532
2ae5ae57 3533 if (SYMBOL_REF_LOCAL_P (op))
623fe810
RH
3534 return 1;
3535
3536 /* There is, however, a not insubstantial body of code in the rest of
fce5a9f2 3537 the compiler that assumes it can just stick the results of
623fe810
RH
3538 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3539 /* ??? This is a hack. Should update the body of the compiler to
fb49053f 3540 always create a DECL an invoke targetm.encode_section_info. */
623fe810
RH
3541 if (strncmp (XSTR (op, 0), internal_label_prefix,
3542 internal_label_prefix_len) == 0)
3543 return 1;
3544
3545 return 0;
3546}
3547
2ae5ae57 3548/* Test for various thread-local symbols. */
f996902d
RH
3549
3550int
b96a374d 3551tls_symbolic_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
f996902d 3552{
f996902d
RH
3553 if (GET_CODE (op) != SYMBOL_REF)
3554 return 0;
2ae5ae57 3555 return SYMBOL_REF_TLS_MODEL (op);
f996902d
RH
3556}
3557
2ae5ae57 3558static inline int
b96a374d 3559tls_symbolic_operand_1 (rtx op, enum tls_model kind)
f996902d 3560{
f996902d
RH
3561 if (GET_CODE (op) != SYMBOL_REF)
3562 return 0;
2ae5ae57 3563 return SYMBOL_REF_TLS_MODEL (op) == kind;
f996902d
RH
3564}
3565
3566int
b96a374d
AJ
3567global_dynamic_symbolic_operand (register rtx op,
3568 enum machine_mode mode ATTRIBUTE_UNUSED)
f996902d
RH
3569{
3570 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3571}
3572
3573int
b96a374d
AJ
3574local_dynamic_symbolic_operand (register rtx op,
3575 enum machine_mode mode ATTRIBUTE_UNUSED)
f996902d
RH
3576{
3577 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3578}
3579
3580int
b96a374d
AJ
3581initial_exec_symbolic_operand (register rtx op,
3582 enum machine_mode mode ATTRIBUTE_UNUSED)
f996902d
RH
3583{
3584 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3585}
3586
3587int
b96a374d
AJ
3588local_exec_symbolic_operand (register rtx op,
3589 enum machine_mode mode ATTRIBUTE_UNUSED)
f996902d
RH
3590{
3591 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3592}
3593
28d52ffb
RH
3594/* Test for a valid operand for a call instruction. Don't allow the
3595 arg pointer register or virtual regs since they may decay into
3596 reg + const, which the patterns can't handle. */
2a2ab3f9 3597
e075ae69 3598int
b96a374d 3599call_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69 3600{
e075ae69
RH
3601 /* Disallow indirect through a virtual register. This leads to
3602 compiler aborts when trying to eliminate them. */
3603 if (GET_CODE (op) == REG
3604 && (op == arg_pointer_rtx
564d80f4 3605 || op == frame_pointer_rtx
e075ae69
RH
3606 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3607 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3608 return 0;
2a2ab3f9 3609
28d52ffb
RH
3610 /* Disallow `call 1234'. Due to varying assembler lameness this
3611 gets either rejected or translated to `call .+1234'. */
3612 if (GET_CODE (op) == CONST_INT)
3613 return 0;
3614
cbbf65e0
RH
3615 /* Explicitly allow SYMBOL_REF even if pic. */
3616 if (GET_CODE (op) == SYMBOL_REF)
e075ae69 3617 return 1;
2a2ab3f9 3618
cbbf65e0
RH
3619 /* Otherwise we can allow any general_operand in the address. */
3620 return general_operand (op, Pmode);
e075ae69 3621}
79325812 3622
4977bab6
ZW
3623/* Test for a valid operand for a call instruction. Don't allow the
3624 arg pointer register or virtual regs since they may decay into
3625 reg + const, which the patterns can't handle. */
3626
3627int
b96a374d 3628sibcall_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4977bab6
ZW
3629{
3630 /* Disallow indirect through a virtual register. This leads to
3631 compiler aborts when trying to eliminate them. */
3632 if (GET_CODE (op) == REG
3633 && (op == arg_pointer_rtx
3634 || op == frame_pointer_rtx
3635 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3636 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3637 return 0;
3638
3639 /* Explicitly allow SYMBOL_REF even if pic. */
3640 if (GET_CODE (op) == SYMBOL_REF)
3641 return 1;
3642
3643 /* Otherwise we can only allow register operands. */
3644 return register_operand (op, Pmode);
3645}
3646
e075ae69 3647int
b96a374d 3648constant_call_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69 3649{
eaf19aba
JJ
3650 if (GET_CODE (op) == CONST
3651 && GET_CODE (XEXP (op, 0)) == PLUS
3652 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3653 op = XEXP (XEXP (op, 0), 0);
e1ff012c 3654 return GET_CODE (op) == SYMBOL_REF;
e075ae69 3655}
2a2ab3f9 3656
e075ae69 3657/* Match exactly zero and one. */
e9a25f70 3658
0f290768 3659int
b96a374d 3660const0_operand (register rtx op, enum machine_mode mode)
e075ae69
RH
3661{
3662 return op == CONST0_RTX (mode);
3663}
e9a25f70 3664
0f290768 3665int
b96a374d 3666const1_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69
RH
3667{
3668 return op == const1_rtx;
3669}
2a2ab3f9 3670
e075ae69 3671/* Match 2, 4, or 8. Used for leal multiplicands. */
e9a25f70 3672
e075ae69 3673int
b96a374d 3674const248_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69
RH
3675{
3676 return (GET_CODE (op) == CONST_INT
3677 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3678}
e9a25f70 3679
ebe75517
JH
3680int
3681const_0_to_3_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3682{
3683 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 4);
3684}
3685
3686int
3687const_0_to_7_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3688{
3689 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 8);
3690}
3691
3692int
3693const_0_to_15_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3694{
3695 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 16);
3696}
3697
3698int
3699const_0_to_255_operand (register rtx op,
3700 enum machine_mode mode ATTRIBUTE_UNUSED)
3701{
3702 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 256);
3703}
3704
3705
d1f87653 3706/* True if this is a constant appropriate for an increment or decrement. */
81fd0956 3707
e075ae69 3708int
b96a374d 3709incdec_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69 3710{
f5143c46 3711 /* On Pentium4, the inc and dec operations causes extra dependency on flag
b4e89e2d
JH
3712 registers, since carry flag is not set. */
3713 if (TARGET_PENTIUM4 && !optimize_size)
3714 return 0;
2b1c08f5 3715 return op == const1_rtx || op == constm1_rtx;
e075ae69 3716}
2a2ab3f9 3717
371bc54b
JH
3718/* Return nonzero if OP is acceptable as operand of DImode shift
3719 expander. */
3720
3721int
b96a374d 3722shiftdi_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
371bc54b
JH
3723{
3724 if (TARGET_64BIT)
3725 return nonimmediate_operand (op, mode);
3726 else
3727 return register_operand (op, mode);
3728}
3729
0f290768 3730/* Return false if this is the stack pointer, or any other fake
e075ae69
RH
3731 register eliminable to the stack pointer. Otherwise, this is
3732 a register operand.
2a2ab3f9 3733
e075ae69
RH
3734 This is used to prevent esp from being used as an index reg.
3735 Which would only happen in pathological cases. */
5f1ec3e6 3736
e075ae69 3737int
b96a374d 3738reg_no_sp_operand (register rtx op, enum machine_mode mode)
e075ae69
RH
3739{
3740 rtx t = op;
3741 if (GET_CODE (t) == SUBREG)
3742 t = SUBREG_REG (t);
564d80f4 3743 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
e075ae69 3744 return 0;
2a2ab3f9 3745
e075ae69 3746 return register_operand (op, mode);
2a2ab3f9 3747}
b840bfb0 3748
915119a5 3749int
b96a374d 3750mmx_reg_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
915119a5
BS
3751{
3752 return MMX_REG_P (op);
3753}
3754
2c5a510c
RH
3755/* Return false if this is any eliminable register. Otherwise
3756 general_operand. */
3757
3758int
b96a374d 3759general_no_elim_operand (register rtx op, enum machine_mode mode)
2c5a510c
RH
3760{
3761 rtx t = op;
3762 if (GET_CODE (t) == SUBREG)
3763 t = SUBREG_REG (t);
3764 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3765 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3766 || t == virtual_stack_dynamic_rtx)
3767 return 0;
1020a5ab
RH
3768 if (REG_P (t)
3769 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3770 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3771 return 0;
2c5a510c
RH
3772
3773 return general_operand (op, mode);
3774}
3775
3776/* Return false if this is any eliminable register. Otherwise
3777 register_operand or const_int. */
3778
3779int
b96a374d 3780nonmemory_no_elim_operand (register rtx op, enum machine_mode mode)
2c5a510c
RH
3781{
3782 rtx t = op;
3783 if (GET_CODE (t) == SUBREG)
3784 t = SUBREG_REG (t);
3785 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3786 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3787 || t == virtual_stack_dynamic_rtx)
3788 return 0;
3789
3790 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3791}
3792
7ec70495
JH
3793/* Return false if this is any eliminable register or stack register,
3794 otherwise work like register_operand. */
3795
3796int
b96a374d 3797index_register_operand (register rtx op, enum machine_mode mode)
7ec70495
JH
3798{
3799 rtx t = op;
3800 if (GET_CODE (t) == SUBREG)
3801 t = SUBREG_REG (t);
3802 if (!REG_P (t))
3803 return 0;
3804 if (t == arg_pointer_rtx
3805 || t == frame_pointer_rtx
3806 || t == virtual_incoming_args_rtx
3807 || t == virtual_stack_vars_rtx
3808 || t == virtual_stack_dynamic_rtx
3809 || REGNO (t) == STACK_POINTER_REGNUM)
3810 return 0;
3811
3812 return general_operand (op, mode);
3813}
3814
e075ae69 3815/* Return true if op is a Q_REGS class register. */
b840bfb0 3816
e075ae69 3817int
b96a374d 3818q_regs_operand (register rtx op, enum machine_mode mode)
b840bfb0 3819{
e075ae69
RH
3820 if (mode != VOIDmode && GET_MODE (op) != mode)
3821 return 0;
3822 if (GET_CODE (op) == SUBREG)
3823 op = SUBREG_REG (op);
7799175f 3824 return ANY_QI_REG_P (op);
0f290768 3825}
b840bfb0 3826
4977bab6
ZW
3827/* Return true if op is an flags register. */
3828
3829int
b96a374d 3830flags_reg_operand (register rtx op, enum machine_mode mode)
4977bab6
ZW
3831{
3832 if (mode != VOIDmode && GET_MODE (op) != mode)
3833 return 0;
3834 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3835}
3836
e075ae69 3837/* Return true if op is a NON_Q_REGS class register. */
b840bfb0 3838
e075ae69 3839int
b96a374d 3840non_q_regs_operand (register rtx op, enum machine_mode mode)
e075ae69
RH
3841{
3842 if (mode != VOIDmode && GET_MODE (op) != mode)
3843 return 0;
3844 if (GET_CODE (op) == SUBREG)
3845 op = SUBREG_REG (op);
3846 return NON_QI_REG_P (op);
0f290768 3847}
b840bfb0 3848
4977bab6 3849int
b96a374d
AJ
3850zero_extended_scalar_load_operand (rtx op,
3851 enum machine_mode mode ATTRIBUTE_UNUSED)
4977bab6
ZW
3852{
3853 unsigned n_elts;
3854 if (GET_CODE (op) != MEM)
3855 return 0;
3856 op = maybe_get_pool_constant (op);
3857 if (!op)
3858 return 0;
3859 if (GET_CODE (op) != CONST_VECTOR)
3860 return 0;
3861 n_elts =
3862 (GET_MODE_SIZE (GET_MODE (op)) /
3863 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3864 for (n_elts--; n_elts > 0; n_elts--)
3865 {
3866 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3867 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3868 return 0;
3869 }
3870 return 1;
3871}
3872
fdc4b40b
JH
3873/* Return 1 when OP is operand acceptable for standard SSE move. */
3874int
b96a374d 3875vector_move_operand (rtx op, enum machine_mode mode)
fdc4b40b
JH
3876{
3877 if (nonimmediate_operand (op, mode))
3878 return 1;
3879 if (GET_MODE (op) != mode && mode != VOIDmode)
3880 return 0;
3881 return (op == CONST0_RTX (GET_MODE (op)));
3882}
3883
74dc3e94
RH
3884/* Return true if op if a valid address, and does not contain
3885 a segment override. */
3886
3887int
b96a374d 3888no_seg_address_operand (register rtx op, enum machine_mode mode)
74dc3e94
RH
3889{
3890 struct ix86_address parts;
3891
3892 if (! address_operand (op, mode))
3893 return 0;
3894
3895 if (! ix86_decompose_address (op, &parts))
3896 abort ();
3897
3898 return parts.seg == SEG_DEFAULT;
3899}
3900
915119a5
BS
3901/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3902 insns. */
3903int
b96a374d 3904sse_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
915119a5
BS
3905{
3906 enum rtx_code code = GET_CODE (op);
a46d1d38
JH
3907 switch (code)
3908 {
3909 /* Operations supported directly. */
3910 case EQ:
3911 case LT:
3912 case LE:
3913 case UNORDERED:
3914 case NE:
3915 case UNGE:
3916 case UNGT:
3917 case ORDERED:
3918 return 1;
3919 /* These are equivalent to ones above in non-IEEE comparisons. */
3920 case UNEQ:
3921 case UNLT:
3922 case UNLE:
3923 case LTGT:
3924 case GE:
3925 case GT:
3926 return !TARGET_IEEE_FP;
3927 default:
3928 return 0;
3929 }
915119a5 3930}
9076b9c1 3931/* Return 1 if OP is a valid comparison operator in valid mode. */
e075ae69 3932int
b96a374d 3933ix86_comparison_operator (register rtx op, enum machine_mode mode)
e075ae69 3934{
9076b9c1 3935 enum machine_mode inmode;
9a915772 3936 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
3937 if (mode != VOIDmode && GET_MODE (op) != mode)
3938 return 0;
9a915772
JH
3939 if (GET_RTX_CLASS (code) != '<')
3940 return 0;
3941 inmode = GET_MODE (XEXP (op, 0));
3942
3943 if (inmode == CCFPmode || inmode == CCFPUmode)
3944 {
3945 enum rtx_code second_code, bypass_code;
3946 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3947 return (bypass_code == NIL && second_code == NIL);
3948 }
3949 switch (code)
3a3677ff
RH
3950 {
3951 case EQ: case NE:
3a3677ff 3952 return 1;
9076b9c1 3953 case LT: case GE:
7e08e190 3954 if (inmode == CCmode || inmode == CCGCmode
9076b9c1
JH
3955 || inmode == CCGOCmode || inmode == CCNOmode)
3956 return 1;
3957 return 0;
7e08e190 3958 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
7e08e190 3959 if (inmode == CCmode)
9076b9c1
JH
3960 return 1;
3961 return 0;
3962 case GT: case LE:
7e08e190 3963 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
9076b9c1
JH
3964 return 1;
3965 return 0;
3a3677ff
RH
3966 default:
3967 return 0;
3968 }
3969}
3970
e6e81735
JH
3971/* Return 1 if OP is a valid comparison operator testing carry flag
3972 to be set. */
3973int
b96a374d 3974ix86_carry_flag_operator (register rtx op, enum machine_mode mode)
e6e81735
JH
3975{
3976 enum machine_mode inmode;
3977 enum rtx_code code = GET_CODE (op);
3978
3979 if (mode != VOIDmode && GET_MODE (op) != mode)
3980 return 0;
3981 if (GET_RTX_CLASS (code) != '<')
3982 return 0;
3983 inmode = GET_MODE (XEXP (op, 0));
3984 if (GET_CODE (XEXP (op, 0)) != REG
3985 || REGNO (XEXP (op, 0)) != 17
3986 || XEXP (op, 1) != const0_rtx)
3987 return 0;
3988
3989 if (inmode == CCFPmode || inmode == CCFPUmode)
3990 {
3991 enum rtx_code second_code, bypass_code;
3992
3993 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3994 if (bypass_code != NIL || second_code != NIL)
3995 return 0;
3996 code = ix86_fp_compare_code_to_integer (code);
3997 }
3998 else if (inmode != CCmode)
3999 return 0;
4000 return code == LTU;
4001}
4002
9076b9c1 4003/* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3a3677ff 4004
9076b9c1 4005int
b96a374d 4006fcmov_comparison_operator (register rtx op, enum machine_mode mode)
3a3677ff 4007{
b62d22a2 4008 enum machine_mode inmode;
9a915772 4009 enum rtx_code code = GET_CODE (op);
e6e81735 4010
3a3677ff
RH
4011 if (mode != VOIDmode && GET_MODE (op) != mode)
4012 return 0;
9a915772
JH
4013 if (GET_RTX_CLASS (code) != '<')
4014 return 0;
4015 inmode = GET_MODE (XEXP (op, 0));
4016 if (inmode == CCFPmode || inmode == CCFPUmode)
3a3677ff 4017 {
9a915772 4018 enum rtx_code second_code, bypass_code;
e6e81735 4019
9a915772
JH
4020 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4021 if (bypass_code != NIL || second_code != NIL)
4022 return 0;
4023 code = ix86_fp_compare_code_to_integer (code);
4024 }
4025 /* i387 supports just limited amount of conditional codes. */
4026 switch (code)
4027 {
4028 case LTU: case GTU: case LEU: case GEU:
4029 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
9076b9c1
JH
4030 return 1;
4031 return 0;
9a915772
JH
4032 case ORDERED: case UNORDERED:
4033 case EQ: case NE:
4034 return 1;
3a3677ff
RH
4035 default:
4036 return 0;
4037 }
e075ae69 4038}
b840bfb0 4039
e9e80858
JH
4040/* Return 1 if OP is a binary operator that can be promoted to wider mode. */
4041
4042int
b96a374d
AJ
4043promotable_binary_operator (register rtx op,
4044 enum machine_mode mode ATTRIBUTE_UNUSED)
e9e80858
JH
4045{
4046 switch (GET_CODE (op))
4047 {
4048 case MULT:
4049 /* Modern CPUs have same latency for HImode and SImode multiply,
4050 but 386 and 486 do HImode multiply faster. */
9e555526 4051 return ix86_tune > PROCESSOR_I486;
e9e80858
JH
4052 case PLUS:
4053 case AND:
4054 case IOR:
4055 case XOR:
4056 case ASHIFT:
4057 return 1;
4058 default:
4059 return 0;
4060 }
4061}
4062
e075ae69
RH
4063/* Nearly general operand, but accept any const_double, since we wish
4064 to be able to drop them into memory rather than have them get pulled
4065 into registers. */
b840bfb0 4066
2a2ab3f9 4067int
b96a374d 4068cmp_fp_expander_operand (register rtx op, enum machine_mode mode)
2a2ab3f9 4069{
e075ae69 4070 if (mode != VOIDmode && mode != GET_MODE (op))
0b6b2900 4071 return 0;
e075ae69 4072 if (GET_CODE (op) == CONST_DOUBLE)
2a2ab3f9 4073 return 1;
e075ae69 4074 return general_operand (op, mode);
2a2ab3f9
JVA
4075}
4076
e075ae69 4077/* Match an SI or HImode register for a zero_extract. */
2a2ab3f9
JVA
4078
4079int
b96a374d 4080ext_register_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
2a2ab3f9 4081{
3522082b 4082 int regno;
0d7d98ee
JH
4083 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
4084 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
e075ae69 4085 return 0;
3522082b
JH
4086
4087 if (!register_operand (op, VOIDmode))
4088 return 0;
4089
d1f87653 4090 /* Be careful to accept only registers having upper parts. */
3522082b
JH
4091 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
4092 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
e075ae69
RH
4093}
4094
4095/* Return 1 if this is a valid binary floating-point operation.
0f290768 4096 OP is the expression matched, and MODE is its mode. */
e075ae69
RH
4097
4098int
b96a374d 4099binary_fp_operator (register rtx op, enum machine_mode mode)
e075ae69
RH
4100{
4101 if (mode != VOIDmode && mode != GET_MODE (op))
4102 return 0;
4103
2a2ab3f9
JVA
4104 switch (GET_CODE (op))
4105 {
e075ae69
RH
4106 case PLUS:
4107 case MINUS:
4108 case MULT:
4109 case DIV:
4110 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
e9a25f70 4111
2a2ab3f9
JVA
4112 default:
4113 return 0;
4114 }
4115}
fee2770d 4116
e075ae69 4117int
b96a374d 4118mult_operator (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69
RH
4119{
4120 return GET_CODE (op) == MULT;
4121}
4122
4123int
b96a374d 4124div_operator (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69
RH
4125{
4126 return GET_CODE (op) == DIV;
4127}
0a726ef1
JL
4128
4129int
b96a374d 4130arith_or_logical_operator (rtx op, enum machine_mode mode)
0a726ef1 4131{
e075ae69
RH
4132 return ((mode == VOIDmode || GET_MODE (op) == mode)
4133 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
4134 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
0a726ef1
JL
4135}
4136
e075ae69 4137/* Returns 1 if OP is memory operand with a displacement. */
fee2770d
RS
4138
4139int
b96a374d 4140memory_displacement_operand (register rtx op, enum machine_mode mode)
4f2c8ebb 4141{
e075ae69 4142 struct ix86_address parts;
e9a25f70 4143
e075ae69
RH
4144 if (! memory_operand (op, mode))
4145 return 0;
4146
4147 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4148 abort ();
4149
4150 return parts.disp != NULL_RTX;
4f2c8ebb
RS
4151}
4152
16189740 4153/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
e075ae69
RH
4154 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4155
4156 ??? It seems likely that this will only work because cmpsi is an
4157 expander, and no actual insns use this. */
4f2c8ebb
RS
4158
4159int
b96a374d 4160cmpsi_operand (rtx op, enum machine_mode mode)
fee2770d 4161{
b9b2c339 4162 if (nonimmediate_operand (op, mode))
e075ae69
RH
4163 return 1;
4164
4165 if (GET_CODE (op) == AND
4166 && GET_MODE (op) == SImode
4167 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4168 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4169 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4170 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4171 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4172 && GET_CODE (XEXP (op, 1)) == CONST_INT)
fee2770d 4173 return 1;
e9a25f70 4174
fee2770d
RS
4175 return 0;
4176}
d784886d 4177
e075ae69
RH
4178/* Returns 1 if OP is memory operand that can not be represented by the
4179 modRM array. */
d784886d
RK
4180
4181int
b96a374d 4182long_memory_operand (register rtx op, enum machine_mode mode)
d784886d 4183{
e075ae69 4184 if (! memory_operand (op, mode))
d784886d
RK
4185 return 0;
4186
e075ae69 4187 return memory_address_length (op) != 0;
d784886d 4188}
2247f6ed
JH
4189
4190/* Return nonzero if the rtx is known aligned. */
4191
4192int
b96a374d 4193aligned_operand (rtx op, enum machine_mode mode)
2247f6ed
JH
4194{
4195 struct ix86_address parts;
4196
4197 if (!general_operand (op, mode))
4198 return 0;
4199
0f290768 4200 /* Registers and immediate operands are always "aligned". */
2247f6ed
JH
4201 if (GET_CODE (op) != MEM)
4202 return 1;
4203
0f290768 4204 /* Don't even try to do any aligned optimizations with volatiles. */
2247f6ed
JH
4205 if (MEM_VOLATILE_P (op))
4206 return 0;
4207
4208 op = XEXP (op, 0);
4209
4210 /* Pushes and pops are only valid on the stack pointer. */
4211 if (GET_CODE (op) == PRE_DEC
4212 || GET_CODE (op) == POST_INC)
4213 return 1;
4214
4215 /* Decode the address. */
4216 if (! ix86_decompose_address (op, &parts))
4217 abort ();
4218
1540f9eb
JH
4219 if (parts.base && GET_CODE (parts.base) == SUBREG)
4220 parts.base = SUBREG_REG (parts.base);
4221 if (parts.index && GET_CODE (parts.index) == SUBREG)
4222 parts.index = SUBREG_REG (parts.index);
4223
2247f6ed
JH
4224 /* Look for some component that isn't known to be aligned. */
4225 if (parts.index)
4226 {
4227 if (parts.scale < 4
bdb429a5 4228 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
2247f6ed
JH
4229 return 0;
4230 }
4231 if (parts.base)
4232 {
bdb429a5 4233 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
2247f6ed
JH
4234 return 0;
4235 }
4236 if (parts.disp)
4237 {
4238 if (GET_CODE (parts.disp) != CONST_INT
4239 || (INTVAL (parts.disp) & 3) != 0)
4240 return 0;
4241 }
4242
4243 /* Didn't find one -- this must be an aligned address. */
4244 return 1;
4245}
e075ae69 4246\f
881b2a96
RS
4247/* Initialize the table of extra 80387 mathematical constants. */
4248
4249static void
b96a374d 4250init_ext_80387_constants (void)
881b2a96
RS
4251{
4252 static const char * cst[5] =
4253 {
4254 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4255 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4256 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4257 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4258 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4259 };
4260 int i;
4261
4262 for (i = 0; i < 5; i++)
4263 {
4264 real_from_string (&ext_80387_constants_table[i], cst[i]);
4265 /* Ensure each constant is rounded to XFmode precision. */
1f48e56d 4266 real_convert (&ext_80387_constants_table[i],
f8a1ebc6 4267 XFmode, &ext_80387_constants_table[i]);
881b2a96
RS
4268 }
4269
4270 ext_80387_constants_init = 1;
4271}
4272
e075ae69 4273/* Return true if the constant is something that can be loaded with
881b2a96 4274 a special instruction. */
57dbca5e
BS
4275
4276int
b96a374d 4277standard_80387_constant_p (rtx x)
57dbca5e 4278{
2b04e52b 4279 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
e075ae69 4280 return -1;
881b2a96 4281
2b04e52b
JH
4282 if (x == CONST0_RTX (GET_MODE (x)))
4283 return 1;
4284 if (x == CONST1_RTX (GET_MODE (x)))
4285 return 2;
881b2a96
RS
4286
4287 /* For XFmode constants, try to find a special 80387 instruction on
4288 those CPUs that benefit from them. */
f8a1ebc6 4289 if (GET_MODE (x) == XFmode
9e555526 4290 && x86_ext_80387_constants & TUNEMASK)
881b2a96
RS
4291 {
4292 REAL_VALUE_TYPE r;
4293 int i;
4294
4295 if (! ext_80387_constants_init)
4296 init_ext_80387_constants ();
4297
4298 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4299 for (i = 0; i < 5; i++)
4300 if (real_identical (&r, &ext_80387_constants_table[i]))
4301 return i + 3;
4302 }
4303
e075ae69 4304 return 0;
57dbca5e
BS
4305}
4306
881b2a96
RS
4307/* Return the opcode of the special instruction to be used to load
4308 the constant X. */
4309
4310const char *
b96a374d 4311standard_80387_constant_opcode (rtx x)
881b2a96
RS
4312{
4313 switch (standard_80387_constant_p (x))
4314 {
b96a374d 4315 case 1:
881b2a96
RS
4316 return "fldz";
4317 case 2:
4318 return "fld1";
b96a374d 4319 case 3:
881b2a96
RS
4320 return "fldlg2";
4321 case 4:
4322 return "fldln2";
b96a374d 4323 case 5:
881b2a96
RS
4324 return "fldl2e";
4325 case 6:
4326 return "fldl2t";
b96a374d 4327 case 7:
881b2a96
RS
4328 return "fldpi";
4329 }
4330 abort ();
4331}
4332
4333/* Return the CONST_DOUBLE representing the 80387 constant that is
4334 loaded by the specified special instruction. The argument IDX
4335 matches the return value from standard_80387_constant_p. */
4336
4337rtx
b96a374d 4338standard_80387_constant_rtx (int idx)
881b2a96
RS
4339{
4340 int i;
4341
4342 if (! ext_80387_constants_init)
4343 init_ext_80387_constants ();
4344
4345 switch (idx)
4346 {
4347 case 3:
4348 case 4:
4349 case 5:
4350 case 6:
4351 case 7:
4352 i = idx - 3;
4353 break;
4354
4355 default:
4356 abort ();
4357 }
4358
1f48e56d 4359 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
f8a1ebc6 4360 XFmode);
881b2a96
RS
4361}
4362
2b04e52b
JH
4363/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4364 */
4365int
b96a374d 4366standard_sse_constant_p (rtx x)
2b04e52b 4367{
0e67d460
JH
4368 if (x == const0_rtx)
4369 return 1;
2b04e52b
JH
4370 return (x == CONST0_RTX (GET_MODE (x)));
4371}
4372
2a2ab3f9
JVA
4373/* Returns 1 if OP contains a symbol reference */
4374
4375int
b96a374d 4376symbolic_reference_mentioned_p (rtx op)
2a2ab3f9 4377{
6f7d635c 4378 register const char *fmt;
2a2ab3f9
JVA
4379 register int i;
4380
4381 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4382 return 1;
4383
4384 fmt = GET_RTX_FORMAT (GET_CODE (op));
4385 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4386 {
4387 if (fmt[i] == 'E')
4388 {
4389 register int j;
4390
4391 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4392 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4393 return 1;
4394 }
e9a25f70 4395
2a2ab3f9
JVA
4396 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4397 return 1;
4398 }
4399
4400 return 0;
4401}
e075ae69
RH
4402
4403/* Return 1 if it is appropriate to emit `ret' instructions in the
4404 body of a function. Do this only if the epilogue is simple, needing a
4405 couple of insns. Prior to reloading, we can't tell how many registers
4406 must be saved, so return 0 then. Return 0 if there is no frame
4407 marker to de-allocate.
4408
4409 If NON_SAVING_SETJMP is defined and true, then it is not possible
4410 for the epilogue to be simple, so return 0. This is a special case
4411 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4412 until final, but jump_optimize may need to know sooner if a
4413 `return' is OK. */
32b5b1aa
SC
4414
4415int
b96a374d 4416ix86_can_use_return_insn_p (void)
32b5b1aa 4417{
4dd2ac2c 4418 struct ix86_frame frame;
9a7372d6 4419
e075ae69
RH
4420#ifdef NON_SAVING_SETJMP
4421 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4422 return 0;
4423#endif
9a7372d6
RH
4424
4425 if (! reload_completed || frame_pointer_needed)
4426 return 0;
32b5b1aa 4427
9a7372d6
RH
4428 /* Don't allow more than 32 pop, since that's all we can do
4429 with one instruction. */
4430 if (current_function_pops_args
4431 && current_function_args_size >= 32768)
e075ae69 4432 return 0;
32b5b1aa 4433
4dd2ac2c
JH
4434 ix86_compute_frame_layout (&frame);
4435 return frame.to_allocate == 0 && frame.nregs == 0;
e075ae69 4436}
6189a572
JH
4437\f
4438/* Return 1 if VALUE can be stored in the sign extended immediate field. */
4439int
b96a374d 4440x86_64_sign_extended_value (rtx value)
6189a572
JH
4441{
4442 switch (GET_CODE (value))
4443 {
4444 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4445 to be at least 32 and this all acceptable constants are
4446 represented as CONST_INT. */
4447 case CONST_INT:
4448 if (HOST_BITS_PER_WIDE_INT == 32)
4449 return 1;
4450 else
4451 {
4452 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
fa9f36a1 4453 return trunc_int_for_mode (val, SImode) == val;
6189a572
JH
4454 }
4455 break;
4456
75d38379
JJ
4457 /* For certain code models, the symbolic references are known to fit.
4458 in CM_SMALL_PIC model we know it fits if it is local to the shared
4459 library. Don't count TLS SYMBOL_REFs here, since they should fit
4460 only if inside of UNSPEC handled below. */
6189a572 4461 case SYMBOL_REF:
d7222e38
JH
4462 /* TLS symbols are not constant. */
4463 if (tls_symbolic_operand (value, Pmode))
4464 return false;
c05dbe81 4465 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
6189a572
JH
4466
4467 /* For certain code models, the code is near as well. */
4468 case LABEL_REF:
c05dbe81
JH
4469 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4470 || ix86_cmodel == CM_KERNEL);
6189a572
JH
4471
4472 /* We also may accept the offsetted memory references in certain special
4473 cases. */
4474 case CONST:
75d38379
JJ
4475 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4476 switch (XINT (XEXP (value, 0), 1))
4477 {
4478 case UNSPEC_GOTPCREL:
4479 case UNSPEC_DTPOFF:
4480 case UNSPEC_GOTNTPOFF:
4481 case UNSPEC_NTPOFF:
4482 return 1;
4483 default:
4484 break;
4485 }
4486 if (GET_CODE (XEXP (value, 0)) == PLUS)
6189a572
JH
4487 {
4488 rtx op1 = XEXP (XEXP (value, 0), 0);
4489 rtx op2 = XEXP (XEXP (value, 0), 1);
4490 HOST_WIDE_INT offset;
4491
4492 if (ix86_cmodel == CM_LARGE)
4493 return 0;
4494 if (GET_CODE (op2) != CONST_INT)
4495 return 0;
4496 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4497 switch (GET_CODE (op1))
4498 {
4499 case SYMBOL_REF:
75d38379 4500 /* For CM_SMALL assume that latest object is 16MB before
6189a572
JH
4501 end of 31bits boundary. We may also accept pretty
4502 large negative constants knowing that all objects are
4503 in the positive half of address space. */
4504 if (ix86_cmodel == CM_SMALL
75d38379 4505 && offset < 16*1024*1024
6189a572
JH
4506 && trunc_int_for_mode (offset, SImode) == offset)
4507 return 1;
4508 /* For CM_KERNEL we know that all object resist in the
4509 negative half of 32bits address space. We may not
4510 accept negative offsets, since they may be just off
d6a7951f 4511 and we may accept pretty large positive ones. */
6189a572
JH
4512 if (ix86_cmodel == CM_KERNEL
4513 && offset > 0
4514 && trunc_int_for_mode (offset, SImode) == offset)
4515 return 1;
4516 break;
4517 case LABEL_REF:
4518 /* These conditions are similar to SYMBOL_REF ones, just the
4519 constraints for code models differ. */
c05dbe81 4520 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
75d38379 4521 && offset < 16*1024*1024
6189a572
JH
4522 && trunc_int_for_mode (offset, SImode) == offset)
4523 return 1;
4524 if (ix86_cmodel == CM_KERNEL
4525 && offset > 0
4526 && trunc_int_for_mode (offset, SImode) == offset)
4527 return 1;
4528 break;
75d38379
JJ
4529 case UNSPEC:
4530 switch (XINT (op1, 1))
4531 {
4532 case UNSPEC_DTPOFF:
4533 case UNSPEC_NTPOFF:
4534 if (offset > 0
4535 && trunc_int_for_mode (offset, SImode) == offset)
4536 return 1;
4537 }
4538 break;
6189a572
JH
4539 default:
4540 return 0;
4541 }
4542 }
4543 return 0;
4544 default:
4545 return 0;
4546 }
4547}
4548
4549/* Return 1 if VALUE can be stored in the zero extended immediate field. */
4550int
b96a374d 4551x86_64_zero_extended_value (rtx value)
6189a572
JH
4552{
4553 switch (GET_CODE (value))
4554 {
4555 case CONST_DOUBLE:
4556 if (HOST_BITS_PER_WIDE_INT == 32)
4557 return (GET_MODE (value) == VOIDmode
4558 && !CONST_DOUBLE_HIGH (value));
4559 else
4560 return 0;
4561 case CONST_INT:
4562 if (HOST_BITS_PER_WIDE_INT == 32)
4563 return INTVAL (value) >= 0;
4564 else
b531087a 4565 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
6189a572
JH
4566 break;
4567
4568 /* For certain code models, the symbolic references are known to fit. */
4569 case SYMBOL_REF:
d7222e38
JH
4570 /* TLS symbols are not constant. */
4571 if (tls_symbolic_operand (value, Pmode))
4572 return false;
6189a572
JH
4573 return ix86_cmodel == CM_SMALL;
4574
4575 /* For certain code models, the code is near as well. */
4576 case LABEL_REF:
4577 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4578
4579 /* We also may accept the offsetted memory references in certain special
4580 cases. */
4581 case CONST:
4582 if (GET_CODE (XEXP (value, 0)) == PLUS)
4583 {
4584 rtx op1 = XEXP (XEXP (value, 0), 0);
4585 rtx op2 = XEXP (XEXP (value, 0), 1);
4586
4587 if (ix86_cmodel == CM_LARGE)
4588 return 0;
4589 switch (GET_CODE (op1))
4590 {
4591 case SYMBOL_REF:
4592 return 0;
d6a7951f 4593 /* For small code model we may accept pretty large positive
6189a572
JH
4594 offsets, since one bit is available for free. Negative
4595 offsets are limited by the size of NULL pointer area
4596 specified by the ABI. */
4597 if (ix86_cmodel == CM_SMALL
4598 && GET_CODE (op2) == CONST_INT
4599 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4600 && (trunc_int_for_mode (INTVAL (op2), SImode)
4601 == INTVAL (op2)))
4602 return 1;
4603 /* ??? For the kernel, we may accept adjustment of
4604 -0x10000000, since we know that it will just convert
d6a7951f 4605 negative address space to positive, but perhaps this
6189a572
JH
4606 is not worthwhile. */
4607 break;
4608 case LABEL_REF:
4609 /* These conditions are similar to SYMBOL_REF ones, just the
4610 constraints for code models differ. */
4611 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4612 && GET_CODE (op2) == CONST_INT
4613 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4614 && (trunc_int_for_mode (INTVAL (op2), SImode)
4615 == INTVAL (op2)))
4616 return 1;
4617 break;
4618 default:
4619 return 0;
4620 }
4621 }
4622 return 0;
4623 default:
4624 return 0;
4625 }
4626}
6fca22eb
RH
4627
4628/* Value should be nonzero if functions must have frame pointers.
4629 Zero means the frame pointer need not be set up (and parms may
4630 be accessed via the stack pointer) in functions that seem suitable. */
4631
4632int
b96a374d 4633ix86_frame_pointer_required (void)
6fca22eb
RH
4634{
4635 /* If we accessed previous frames, then the generated code expects
4636 to be able to access the saved ebp value in our frame. */
4637 if (cfun->machine->accesses_prev_frame)
4638 return 1;
a4f31c00 4639
6fca22eb
RH
4640 /* Several x86 os'es need a frame pointer for other reasons,
4641 usually pertaining to setjmp. */
4642 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4643 return 1;
4644
4645 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4646 the frame pointer by default. Turn it back on now if we've not
4647 got a leaf function. */
a7943381 4648 if (TARGET_OMIT_LEAF_FRAME_POINTER
55ba61f3
JH
4649 && (!current_function_is_leaf))
4650 return 1;
4651
4652 if (current_function_profile)
6fca22eb
RH
4653 return 1;
4654
4655 return 0;
4656}
4657
4658/* Record that the current function accesses previous call frames. */
4659
4660void
b96a374d 4661ix86_setup_frame_addresses (void)
6fca22eb
RH
4662{
4663 cfun->machine->accesses_prev_frame = 1;
4664}
e075ae69 4665\f
145aacc2
RH
4666#if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4667# define USE_HIDDEN_LINKONCE 1
4668#else
4669# define USE_HIDDEN_LINKONCE 0
4670#endif
4671
bd09bdeb 4672static int pic_labels_used;
e9a25f70 4673
145aacc2
RH
4674/* Fills in the label name that should be used for a pc thunk for
4675 the given register. */
4676
4677static void
b96a374d 4678get_pc_thunk_name (char name[32], unsigned int regno)
145aacc2
RH
4679{
4680 if (USE_HIDDEN_LINKONCE)
4681 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4682 else
4683 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4684}
4685
4686
e075ae69
RH
4687/* This function generates code for -fpic that loads %ebx with
4688 the return address of the caller and then returns. */
4689
4690void
b96a374d 4691ix86_file_end (void)
e075ae69
RH
4692{
4693 rtx xops[2];
bd09bdeb 4694 int regno;
32b5b1aa 4695
bd09bdeb 4696 for (regno = 0; regno < 8; ++regno)
7c262518 4697 {
145aacc2
RH
4698 char name[32];
4699
bd09bdeb
RH
4700 if (! ((pic_labels_used >> regno) & 1))
4701 continue;
4702
145aacc2 4703 get_pc_thunk_name (name, regno);
bd09bdeb 4704
145aacc2
RH
4705 if (USE_HIDDEN_LINKONCE)
4706 {
4707 tree decl;
4708
4709 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4710 error_mark_node);
4711 TREE_PUBLIC (decl) = 1;
4712 TREE_STATIC (decl) = 1;
4713 DECL_ONE_ONLY (decl) = 1;
4714
4715 (*targetm.asm_out.unique_section) (decl, 0);
4716 named_section (decl, NULL, 0);
4717
a5fe455b
ZW
4718 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4719 fputs ("\t.hidden\t", asm_out_file);
4720 assemble_name (asm_out_file, name);
4721 fputc ('\n', asm_out_file);
4722 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
145aacc2
RH
4723 }
4724 else
4725 {
4726 text_section ();
a5fe455b 4727 ASM_OUTPUT_LABEL (asm_out_file, name);
145aacc2 4728 }
bd09bdeb
RH
4729
4730 xops[0] = gen_rtx_REG (SImode, regno);
4731 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4732 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4733 output_asm_insn ("ret", xops);
7c262518 4734 }
3edc56a9 4735
a5fe455b
ZW
4736 if (NEED_INDICATE_EXEC_STACK)
4737 file_end_indicate_exec_stack ();
32b5b1aa 4738}
32b5b1aa 4739
c8c03509 4740/* Emit code for the SET_GOT patterns. */
32b5b1aa 4741
c8c03509 4742const char *
b96a374d 4743output_set_got (rtx dest)
c8c03509
RH
4744{
4745 rtx xops[3];
0d7d98ee 4746
c8c03509 4747 xops[0] = dest;
5fc0e5df 4748 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
32b5b1aa 4749
c8c03509 4750 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
32b5b1aa 4751 {
c8c03509
RH
4752 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4753
4754 if (!flag_pic)
4755 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4756 else
4757 output_asm_insn ("call\t%a2", xops);
4758
b069de3b
SS
4759#if TARGET_MACHO
4760 /* Output the "canonical" label name ("Lxx$pb") here too. This
4761 is what will be referred to by the Mach-O PIC subsystem. */
4762 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4763#endif
4977bab6 4764 (*targetm.asm_out.internal_label) (asm_out_file, "L",
c8c03509
RH
4765 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4766
4767 if (flag_pic)
4768 output_asm_insn ("pop{l}\t%0", xops);
32b5b1aa 4769 }
e075ae69 4770 else
e5cb57e8 4771 {
145aacc2
RH
4772 char name[32];
4773 get_pc_thunk_name (name, REGNO (dest));
bd09bdeb 4774 pic_labels_used |= 1 << REGNO (dest);
f996902d 4775
145aacc2 4776 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
c8c03509
RH
4777 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4778 output_asm_insn ("call\t%X2", xops);
e5cb57e8 4779 }
e5cb57e8 4780
c8c03509
RH
4781 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4782 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
b069de3b 4783 else if (!TARGET_MACHO)
8e9fadc3 4784 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
79325812 4785
c8c03509 4786 return "";
e9a25f70 4787}
8dfe5673 4788
0d7d98ee 4789/* Generate an "push" pattern for input ARG. */
e9a25f70 4790
e075ae69 4791static rtx
b96a374d 4792gen_push (rtx arg)
e9a25f70 4793{
c5c76735 4794 return gen_rtx_SET (VOIDmode,
0d7d98ee
JH
4795 gen_rtx_MEM (Pmode,
4796 gen_rtx_PRE_DEC (Pmode,
c5c76735
JL
4797 stack_pointer_rtx)),
4798 arg);
e9a25f70
JL
4799}
4800
bd09bdeb
RH
4801/* Return >= 0 if there is an unused call-clobbered register available
4802 for the entire function. */
4803
4804static unsigned int
b96a374d 4805ix86_select_alt_pic_regnum (void)
bd09bdeb
RH
4806{
4807 if (current_function_is_leaf && !current_function_profile)
4808 {
4809 int i;
4810 for (i = 2; i >= 0; --i)
4811 if (!regs_ever_live[i])
4812 return i;
4813 }
4814
4815 return INVALID_REGNUM;
4816}
fce5a9f2 4817
4dd2ac2c
JH
4818/* Return 1 if we need to save REGNO. */
4819static int
b96a374d 4820ix86_save_reg (unsigned int regno, int maybe_eh_return)
1020a5ab 4821{
bd09bdeb
RH
4822 if (pic_offset_table_rtx
4823 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4824 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
66edd3b4 4825 || current_function_profile
8c38a24f
MM
4826 || current_function_calls_eh_return
4827 || current_function_uses_const_pool))
bd09bdeb
RH
4828 {
4829 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4830 return 0;
4831 return 1;
4832 }
1020a5ab
RH
4833
4834 if (current_function_calls_eh_return && maybe_eh_return)
4835 {
4836 unsigned i;
4837 for (i = 0; ; i++)
4838 {
b531087a 4839 unsigned test = EH_RETURN_DATA_REGNO (i);
1020a5ab
RH
4840 if (test == INVALID_REGNUM)
4841 break;
9b690711 4842 if (test == regno)
1020a5ab
RH
4843 return 1;
4844 }
4845 }
4dd2ac2c 4846
1020a5ab
RH
4847 return (regs_ever_live[regno]
4848 && !call_used_regs[regno]
4849 && !fixed_regs[regno]
4850 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4dd2ac2c
JH
4851}
4852
0903fcab
JH
4853/* Return number of registers to be saved on the stack. */
4854
4855static int
b96a374d 4856ix86_nsaved_regs (void)
0903fcab
JH
4857{
4858 int nregs = 0;
0903fcab
JH
4859 int regno;
4860
4dd2ac2c 4861 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 4862 if (ix86_save_reg (regno, true))
4dd2ac2c 4863 nregs++;
0903fcab
JH
4864 return nregs;
4865}
4866
4867/* Return the offset between two registers, one to be eliminated, and the other
4868 its replacement, at the start of a routine. */
4869
4870HOST_WIDE_INT
b96a374d 4871ix86_initial_elimination_offset (int from, int to)
0903fcab 4872{
4dd2ac2c
JH
4873 struct ix86_frame frame;
4874 ix86_compute_frame_layout (&frame);
564d80f4
JH
4875
4876 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 4877 return frame.hard_frame_pointer_offset;
564d80f4
JH
4878 else if (from == FRAME_POINTER_REGNUM
4879 && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 4880 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
4881 else
4882 {
564d80f4
JH
4883 if (to != STACK_POINTER_REGNUM)
4884 abort ();
4885 else if (from == ARG_POINTER_REGNUM)
4dd2ac2c 4886 return frame.stack_pointer_offset;
564d80f4
JH
4887 else if (from != FRAME_POINTER_REGNUM)
4888 abort ();
0903fcab 4889 else
4dd2ac2c 4890 return frame.stack_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
4891 }
4892}
4893
4dd2ac2c 4894/* Fill structure ix86_frame about frame of currently computed function. */
0f290768 4895
4dd2ac2c 4896static void
b96a374d 4897ix86_compute_frame_layout (struct ix86_frame *frame)
65954bd8 4898{
65954bd8 4899 HOST_WIDE_INT total_size;
564d80f4 4900 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
b19ee4bd 4901 HOST_WIDE_INT offset;
44affdae 4902 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4dd2ac2c 4903 HOST_WIDE_INT size = get_frame_size ();
65954bd8 4904
4dd2ac2c 4905 frame->nregs = ix86_nsaved_regs ();
564d80f4 4906 total_size = size;
65954bd8 4907
d7394366
JH
4908 /* During reload iteration the amount of registers saved can change.
4909 Recompute the value as needed. Do not recompute when amount of registers
4910 didn't change as reload does mutiple calls to the function and does not
4911 expect the decision to change within single iteration. */
4912 if (!optimize_size
4913 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
d9b40e8d
JH
4914 {
4915 int count = frame->nregs;
4916
d7394366 4917 cfun->machine->use_fast_prologue_epilogue_nregs = count;
d9b40e8d
JH
4918 /* The fast prologue uses move instead of push to save registers. This
4919 is significantly longer, but also executes faster as modern hardware
4920 can execute the moves in parallel, but can't do that for push/pop.
b96a374d 4921
d9b40e8d
JH
4922 Be careful about choosing what prologue to emit: When function takes
4923 many instructions to execute we may use slow version as well as in
4924 case function is known to be outside hot spot (this is known with
4925 feedback only). Weight the size of function by number of registers
4926 to save as it is cheap to use one or two push instructions but very
4927 slow to use many of them. */
4928 if (count)
4929 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4930 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4931 || (flag_branch_probabilities
4932 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4933 cfun->machine->use_fast_prologue_epilogue = false;
4934 else
4935 cfun->machine->use_fast_prologue_epilogue
4936 = !expensive_function_p (count);
4937 }
4938 if (TARGET_PROLOGUE_USING_MOVE
4939 && cfun->machine->use_fast_prologue_epilogue)
4940 frame->save_regs_using_mov = true;
4941 else
4942 frame->save_regs_using_mov = false;
4943
4944
9ba81eaa 4945 /* Skip return address and saved base pointer. */
4dd2ac2c
JH
4946 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4947
4948 frame->hard_frame_pointer_offset = offset;
564d80f4 4949
fcbfaa65
RK
4950 /* Do some sanity checking of stack_alignment_needed and
4951 preferred_alignment, since i386 port is the only using those features
f710504c 4952 that may break easily. */
564d80f4 4953
44affdae
JH
4954 if (size && !stack_alignment_needed)
4955 abort ();
44affdae
JH
4956 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4957 abort ();
4958 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4959 abort ();
4960 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4961 abort ();
564d80f4 4962
4dd2ac2c
JH
4963 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4964 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
564d80f4 4965
4dd2ac2c
JH
4966 /* Register save area */
4967 offset += frame->nregs * UNITS_PER_WORD;
65954bd8 4968
8362f420
JH
4969 /* Va-arg area */
4970 if (ix86_save_varrargs_registers)
4971 {
4972 offset += X86_64_VARARGS_SIZE;
4973 frame->va_arg_size = X86_64_VARARGS_SIZE;
4974 }
4975 else
4976 frame->va_arg_size = 0;
4977
4dd2ac2c
JH
4978 /* Align start of frame for local function. */
4979 frame->padding1 = ((offset + stack_alignment_needed - 1)
4980 & -stack_alignment_needed) - offset;
f73ad30e 4981
4dd2ac2c 4982 offset += frame->padding1;
65954bd8 4983
4dd2ac2c
JH
4984 /* Frame pointer points here. */
4985 frame->frame_pointer_offset = offset;
54ff41b7 4986
4dd2ac2c 4987 offset += size;
65954bd8 4988
0b7ae565
RH
4989 /* Add outgoing arguments area. Can be skipped if we eliminated
4990 all the function calls as dead code. */
4991 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4dd2ac2c
JH
4992 {
4993 offset += current_function_outgoing_args_size;
4994 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4995 }
4996 else
4997 frame->outgoing_arguments_size = 0;
564d80f4 4998
002ff5bc
RH
4999 /* Align stack boundary. Only needed if we're calling another function
5000 or using alloca. */
5001 if (!current_function_is_leaf || current_function_calls_alloca)
0b7ae565
RH
5002 frame->padding2 = ((offset + preferred_alignment - 1)
5003 & -preferred_alignment) - offset;
5004 else
5005 frame->padding2 = 0;
4dd2ac2c
JH
5006
5007 offset += frame->padding2;
5008
5009 /* We've reached end of stack frame. */
5010 frame->stack_pointer_offset = offset;
5011
5012 /* Size prologue needs to allocate. */
5013 frame->to_allocate =
5014 (size + frame->padding1 + frame->padding2
8362f420 5015 + frame->outgoing_arguments_size + frame->va_arg_size);
4dd2ac2c 5016
b19ee4bd
JJ
5017 if ((!frame->to_allocate && frame->nregs <= 1)
5018 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
d9b40e8d
JH
5019 frame->save_regs_using_mov = false;
5020
a5b378d6 5021 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
8362f420
JH
5022 && current_function_is_leaf)
5023 {
5024 frame->red_zone_size = frame->to_allocate;
d9b40e8d
JH
5025 if (frame->save_regs_using_mov)
5026 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8362f420
JH
5027 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5028 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5029 }
5030 else
5031 frame->red_zone_size = 0;
5032 frame->to_allocate -= frame->red_zone_size;
5033 frame->stack_pointer_offset -= frame->red_zone_size;
4dd2ac2c
JH
5034#if 0
5035 fprintf (stderr, "nregs: %i\n", frame->nregs);
5036 fprintf (stderr, "size: %i\n", size);
5037 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5038 fprintf (stderr, "padding1: %i\n", frame->padding1);
8362f420 5039 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4dd2ac2c
JH
5040 fprintf (stderr, "padding2: %i\n", frame->padding2);
5041 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
8362f420 5042 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4dd2ac2c
JH
5043 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5044 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5045 frame->hard_frame_pointer_offset);
5046 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5047#endif
65954bd8
JL
5048}
5049
0903fcab
JH
5050/* Emit code to save registers in the prologue. */
5051
5052static void
b96a374d 5053ix86_emit_save_regs (void)
0903fcab
JH
5054{
5055 register int regno;
0903fcab 5056 rtx insn;
0903fcab 5057
4dd2ac2c 5058 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 5059 if (ix86_save_reg (regno, true))
0903fcab 5060 {
0d7d98ee 5061 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
0903fcab
JH
5062 RTX_FRAME_RELATED_P (insn) = 1;
5063 }
5064}
5065
c6036a37
JH
5066/* Emit code to save registers using MOV insns. First register
5067 is restored from POINTER + OFFSET. */
5068static void
b96a374d 5069ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
c6036a37
JH
5070{
5071 int regno;
5072 rtx insn;
5073
5074 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5075 if (ix86_save_reg (regno, true))
5076 {
b72f00af
RK
5077 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5078 Pmode, offset),
c6036a37
JH
5079 gen_rtx_REG (Pmode, regno));
5080 RTX_FRAME_RELATED_P (insn) = 1;
5081 offset += UNITS_PER_WORD;
5082 }
5083}
5084
b19ee4bd
JJ
5085/* Expand prologue or epilogue stack adjustement.
5086 The pattern exist to put a dependency on all ebp-based memory accesses.
5087 STYLE should be negative if instructions should be marked as frame related,
5088 zero if %r11 register is live and cannot be freely used and positive
5089 otherwise. */
5090
5091static void
5092pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5093{
5094 rtx insn;
5095
5096 if (! TARGET_64BIT)
5097 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5098 else if (x86_64_immediate_operand (offset, DImode))
5099 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5100 else
5101 {
5102 rtx r11;
5103 /* r11 is used by indirect sibcall return as well, set before the
5104 epilogue and used after the epilogue. ATM indirect sibcall
5105 shouldn't be used together with huge frame sizes in one
5106 function because of the frame_size check in sibcall.c. */
5107 if (style == 0)
5108 abort ();
5109 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5110 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5111 if (style < 0)
5112 RTX_FRAME_RELATED_P (insn) = 1;
5113 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5114 offset));
5115 }
5116 if (style < 0)
5117 RTX_FRAME_RELATED_P (insn) = 1;
5118}
5119
0f290768 5120/* Expand the prologue into a bunch of separate insns. */
e075ae69
RH
5121
5122void
b96a374d 5123ix86_expand_prologue (void)
2a2ab3f9 5124{
564d80f4 5125 rtx insn;
bd09bdeb 5126 bool pic_reg_used;
4dd2ac2c 5127 struct ix86_frame frame;
c6036a37 5128 HOST_WIDE_INT allocate;
4dd2ac2c 5129
4977bab6 5130 ix86_compute_frame_layout (&frame);
79325812 5131
e075ae69
RH
5132 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5133 slower on all targets. Also sdb doesn't like it. */
e9a25f70 5134
2a2ab3f9
JVA
5135 if (frame_pointer_needed)
5136 {
564d80f4 5137 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
e075ae69 5138 RTX_FRAME_RELATED_P (insn) = 1;
e9a25f70 5139
564d80f4 5140 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
e075ae69 5141 RTX_FRAME_RELATED_P (insn) = 1;
2a2ab3f9
JVA
5142 }
5143
c6036a37 5144 allocate = frame.to_allocate;
c6036a37 5145
d9b40e8d 5146 if (!frame.save_regs_using_mov)
c6036a37
JH
5147 ix86_emit_save_regs ();
5148 else
5149 allocate += frame.nregs * UNITS_PER_WORD;
564d80f4 5150
d9b40e8d
JH
5151 /* When using red zone we may start register saving before allocating
5152 the stack frame saving one cycle of the prologue. */
5153 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5154 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5155 : stack_pointer_rtx,
5156 -frame.nregs * UNITS_PER_WORD);
5157
c6036a37 5158 if (allocate == 0)
8dfe5673 5159 ;
e323735c 5160 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
b19ee4bd
JJ
5161 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5162 GEN_INT (-allocate), -1);
79325812 5163 else
8dfe5673 5164 {
fe9f516f
RH
5165 /* Only valid for Win32. */
5166 rtx eax = gen_rtx_REG (SImode, 0);
5167 bool eax_live = ix86_eax_live_at_start_p ();
e9a25f70 5168
8362f420 5169 if (TARGET_64BIT)
b1177d69 5170 abort ();
e075ae69 5171
fe9f516f
RH
5172 if (eax_live)
5173 {
5174 emit_insn (gen_push (eax));
5175 allocate -= 4;
5176 }
5177
5178 insn = emit_move_insn (eax, GEN_INT (allocate));
b1177d69 5179 RTX_FRAME_RELATED_P (insn) = 1;
98417968 5180
b1177d69
KC
5181 insn = emit_insn (gen_allocate_stack_worker (eax));
5182 RTX_FRAME_RELATED_P (insn) = 1;
fe9f516f
RH
5183
5184 if (eax_live)
5185 {
5186 rtx t = plus_constant (stack_pointer_rtx, allocate);
5187 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5188 }
e075ae69 5189 }
fe9f516f 5190
d9b40e8d 5191 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
c6036a37
JH
5192 {
5193 if (!frame_pointer_needed || !frame.to_allocate)
5194 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5195 else
5196 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5197 -frame.nregs * UNITS_PER_WORD);
5198 }
e9a25f70 5199
bd09bdeb
RH
5200 pic_reg_used = false;
5201 if (pic_offset_table_rtx
5202 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5203 || current_function_profile))
5204 {
5205 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5206
5207 if (alt_pic_reg_used != INVALID_REGNUM)
5208 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5209
5210 pic_reg_used = true;
5211 }
5212
e9a25f70 5213 if (pic_reg_used)
c8c03509
RH
5214 {
5215 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5216
66edd3b4
RH
5217 /* Even with accurate pre-reload life analysis, we can wind up
5218 deleting all references to the pic register after reload.
5219 Consider if cross-jumping unifies two sides of a branch
d1f87653 5220 controlled by a comparison vs the only read from a global.
66edd3b4
RH
5221 In which case, allow the set_got to be deleted, though we're
5222 too late to do anything about the ebx save in the prologue. */
c8c03509
RH
5223 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5224 }
77a989d1 5225
66edd3b4
RH
5226 /* Prevent function calls from be scheduled before the call to mcount.
5227 In the pic_reg_used case, make sure that the got load isn't deleted. */
5228 if (current_function_profile)
5229 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
77a989d1
SC
5230}
5231
da2d1d3a
JH
5232/* Emit code to restore saved registers using MOV insns. First register
5233 is restored from POINTER + OFFSET. */
5234static void
b96a374d 5235ix86_emit_restore_regs_using_mov (rtx pointer, int offset, int maybe_eh_return)
da2d1d3a
JH
5236{
5237 int regno;
da2d1d3a 5238
4dd2ac2c 5239 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 5240 if (ix86_save_reg (regno, maybe_eh_return))
da2d1d3a 5241 {
4dd2ac2c 5242 emit_move_insn (gen_rtx_REG (Pmode, regno),
b72f00af
RK
5243 adjust_address (gen_rtx_MEM (Pmode, pointer),
5244 Pmode, offset));
4dd2ac2c 5245 offset += UNITS_PER_WORD;
da2d1d3a
JH
5246 }
5247}
5248
0f290768 5249/* Restore function stack, frame, and registers. */
e9a25f70 5250
2a2ab3f9 5251void
b96a374d 5252ix86_expand_epilogue (int style)
2a2ab3f9 5253{
1c71e60e 5254 int regno;
fdb8a883 5255 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4dd2ac2c 5256 struct ix86_frame frame;
65954bd8 5257 HOST_WIDE_INT offset;
4dd2ac2c
JH
5258
5259 ix86_compute_frame_layout (&frame);
2a2ab3f9 5260
a4f31c00 5261 /* Calculate start of saved registers relative to ebp. Special care
84e306b4
RH
5262 must be taken for the normal return case of a function using
5263 eh_return: the eax and edx registers are marked as saved, but not
5264 restored along this path. */
5265 offset = frame.nregs;
5266 if (current_function_calls_eh_return && style != 2)
5267 offset -= 2;
5268 offset *= -UNITS_PER_WORD;
2a2ab3f9 5269
fdb8a883
JW
5270 /* If we're only restoring one register and sp is not valid then
5271 using a move instruction to restore the register since it's
0f290768 5272 less work than reloading sp and popping the register.
da2d1d3a
JH
5273
5274 The default code result in stack adjustment using add/lea instruction,
5275 while this code results in LEAVE instruction (or discrete equivalent),
5276 so it is profitable in some other cases as well. Especially when there
5277 are no registers to restore. We also use this code when TARGET_USE_LEAVE
d1f87653 5278 and there is exactly one register to pop. This heuristic may need some
da2d1d3a 5279 tuning in future. */
4dd2ac2c 5280 if ((!sp_valid && frame.nregs <= 1)
2ab0437e 5281 || (TARGET_EPILOGUE_USING_MOVE
d9b40e8d 5282 && cfun->machine->use_fast_prologue_epilogue
c6036a37 5283 && (frame.nregs > 1 || frame.to_allocate))
4dd2ac2c 5284 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
2ab0437e 5285 || (frame_pointer_needed && TARGET_USE_LEAVE
d9b40e8d
JH
5286 && cfun->machine->use_fast_prologue_epilogue
5287 && frame.nregs == 1)
2ab0437e 5288 || current_function_calls_eh_return)
2a2ab3f9 5289 {
da2d1d3a
JH
5290 /* Restore registers. We can use ebp or esp to address the memory
5291 locations. If both are available, default to ebp, since offsets
5292 are known to be small. Only exception is esp pointing directly to the
5293 end of block of saved registers, where we may simplify addressing
5294 mode. */
5295
4dd2ac2c 5296 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
1020a5ab
RH
5297 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5298 frame.to_allocate, style == 2);
da2d1d3a 5299 else
1020a5ab
RH
5300 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5301 offset, style == 2);
5302
5303 /* eh_return epilogues need %ecx added to the stack pointer. */
5304 if (style == 2)
5305 {
5306 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
2a2ab3f9 5307
1020a5ab
RH
5308 if (frame_pointer_needed)
5309 {
5310 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5311 tmp = plus_constant (tmp, UNITS_PER_WORD);
5312 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5313
5314 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5315 emit_move_insn (hard_frame_pointer_rtx, tmp);
5316
b19ee4bd
JJ
5317 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5318 const0_rtx, style);
1020a5ab
RH
5319 }
5320 else
5321 {
5322 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5323 tmp = plus_constant (tmp, (frame.to_allocate
5324 + frame.nregs * UNITS_PER_WORD));
5325 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5326 }
5327 }
5328 else if (!frame_pointer_needed)
b19ee4bd
JJ
5329 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5330 GEN_INT (frame.to_allocate
5331 + frame.nregs * UNITS_PER_WORD),
5332 style);
0f290768 5333 /* If not an i386, mov & pop is faster than "leave". */
d9b40e8d
JH
5334 else if (TARGET_USE_LEAVE || optimize_size
5335 || !cfun->machine->use_fast_prologue_epilogue)
8362f420 5336 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
c8c5cb99 5337 else
2a2ab3f9 5338 {
b19ee4bd
JJ
5339 pro_epilogue_adjust_stack (stack_pointer_rtx,
5340 hard_frame_pointer_rtx,
5341 const0_rtx, style);
8362f420
JH
5342 if (TARGET_64BIT)
5343 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5344 else
5345 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
e9a25f70
JL
5346 }
5347 }
1c71e60e 5348 else
68f654ec 5349 {
1c71e60e
JH
5350 /* First step is to deallocate the stack frame so that we can
5351 pop the registers. */
5352 if (!sp_valid)
5353 {
5354 if (!frame_pointer_needed)
5355 abort ();
b19ee4bd
JJ
5356 pro_epilogue_adjust_stack (stack_pointer_rtx,
5357 hard_frame_pointer_rtx,
5358 GEN_INT (offset), style);
1c71e60e 5359 }
4dd2ac2c 5360 else if (frame.to_allocate)
b19ee4bd
JJ
5361 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5362 GEN_INT (frame.to_allocate), style);
1c71e60e 5363
4dd2ac2c 5364 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 5365 if (ix86_save_reg (regno, false))
8362f420
JH
5366 {
5367 if (TARGET_64BIT)
5368 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5369 else
5370 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5371 }
4dd2ac2c 5372 if (frame_pointer_needed)
8362f420 5373 {
f5143c46 5374 /* Leave results in shorter dependency chains on CPUs that are
2ab0437e
JH
5375 able to grok it fast. */
5376 if (TARGET_USE_LEAVE)
5377 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5378 else if (TARGET_64BIT)
8362f420
JH
5379 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5380 else
5381 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5382 }
68f654ec 5383 }
68f654ec 5384
cbbf65e0 5385 /* Sibcall epilogues don't want a return instruction. */
1020a5ab 5386 if (style == 0)
cbbf65e0
RH
5387 return;
5388
2a2ab3f9
JVA
5389 if (current_function_pops_args && current_function_args_size)
5390 {
e075ae69 5391 rtx popc = GEN_INT (current_function_pops_args);
2a2ab3f9 5392
b8c752c8
UD
5393 /* i386 can only pop 64K bytes. If asked to pop more, pop
5394 return address, do explicit add, and jump indirectly to the
0f290768 5395 caller. */
2a2ab3f9 5396
b8c752c8 5397 if (current_function_pops_args >= 65536)
2a2ab3f9 5398 {
e075ae69 5399 rtx ecx = gen_rtx_REG (SImode, 2);
e9a25f70 5400
b19ee4bd 5401 /* There is no "pascal" calling convention in 64bit ABI. */
8362f420 5402 if (TARGET_64BIT)
b531087a 5403 abort ();
8362f420 5404
e075ae69
RH
5405 emit_insn (gen_popsi1 (ecx));
5406 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
11837777 5407 emit_jump_insn (gen_return_indirect_internal (ecx));
e9a25f70 5408 }
79325812 5409 else
e075ae69
RH
5410 emit_jump_insn (gen_return_pop_internal (popc));
5411 }
5412 else
5413 emit_jump_insn (gen_return_internal ());
5414}
bd09bdeb
RH
5415
5416/* Reset from the function's potential modifications. */
5417
5418static void
b96a374d
AJ
5419ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5420 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
bd09bdeb
RH
5421{
5422 if (pic_offset_table_rtx)
5423 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5424}
e075ae69
RH
5425\f
5426/* Extract the parts of an RTL expression that is a valid memory address
b446e5a2
JH
5427 for an instruction. Return 0 if the structure of the address is
5428 grossly off. Return -1 if the address contains ASHIFT, so it is not
74dc3e94 5429 strictly valid, but still used for computing length of lea instruction. */
e075ae69
RH
5430
5431static int
b96a374d 5432ix86_decompose_address (register rtx addr, struct ix86_address *out)
e075ae69
RH
5433{
5434 rtx base = NULL_RTX;
5435 rtx index = NULL_RTX;
5436 rtx disp = NULL_RTX;
5437 HOST_WIDE_INT scale = 1;
5438 rtx scale_rtx = NULL_RTX;
b446e5a2 5439 int retval = 1;
74dc3e94 5440 enum ix86_address_seg seg = SEG_DEFAULT;
e075ae69 5441
1540f9eb 5442 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
e075ae69
RH
5443 base = addr;
5444 else if (GET_CODE (addr) == PLUS)
5445 {
74dc3e94
RH
5446 rtx addends[4], op;
5447 int n = 0, i;
e075ae69 5448
74dc3e94
RH
5449 op = addr;
5450 do
e075ae69 5451 {
74dc3e94
RH
5452 if (n >= 4)
5453 return 0;
5454 addends[n++] = XEXP (op, 1);
5455 op = XEXP (op, 0);
2a2ab3f9 5456 }
74dc3e94
RH
5457 while (GET_CODE (op) == PLUS);
5458 if (n >= 4)
5459 return 0;
5460 addends[n] = op;
5461
5462 for (i = n; i >= 0; --i)
e075ae69 5463 {
74dc3e94
RH
5464 op = addends[i];
5465 switch (GET_CODE (op))
5466 {
5467 case MULT:
5468 if (index)
5469 return 0;
5470 index = XEXP (op, 0);
5471 scale_rtx = XEXP (op, 1);
5472 break;
5473
5474 case UNSPEC:
5475 if (XINT (op, 1) == UNSPEC_TP
5476 && TARGET_TLS_DIRECT_SEG_REFS
5477 && seg == SEG_DEFAULT)
5478 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5479 else
5480 return 0;
5481 break;
5482
5483 case REG:
5484 case SUBREG:
5485 if (!base)
5486 base = op;
5487 else if (!index)
5488 index = op;
5489 else
5490 return 0;
5491 break;
5492
5493 case CONST:
5494 case CONST_INT:
5495 case SYMBOL_REF:
5496 case LABEL_REF:
5497 if (disp)
5498 return 0;
5499 disp = op;
5500 break;
5501
5502 default:
5503 return 0;
5504 }
e075ae69 5505 }
e075ae69
RH
5506 }
5507 else if (GET_CODE (addr) == MULT)
5508 {
5509 index = XEXP (addr, 0); /* index*scale */
5510 scale_rtx = XEXP (addr, 1);
5511 }
5512 else if (GET_CODE (addr) == ASHIFT)
5513 {
5514 rtx tmp;
5515
5516 /* We're called for lea too, which implements ashift on occasion. */
5517 index = XEXP (addr, 0);
5518 tmp = XEXP (addr, 1);
5519 if (GET_CODE (tmp) != CONST_INT)
b446e5a2 5520 return 0;
e075ae69
RH
5521 scale = INTVAL (tmp);
5522 if ((unsigned HOST_WIDE_INT) scale > 3)
b446e5a2 5523 return 0;
e075ae69 5524 scale = 1 << scale;
b446e5a2 5525 retval = -1;
2a2ab3f9 5526 }
2a2ab3f9 5527 else
e075ae69
RH
5528 disp = addr; /* displacement */
5529
5530 /* Extract the integral value of scale. */
5531 if (scale_rtx)
e9a25f70 5532 {
e075ae69 5533 if (GET_CODE (scale_rtx) != CONST_INT)
b446e5a2 5534 return 0;
e075ae69 5535 scale = INTVAL (scale_rtx);
e9a25f70 5536 }
3b3c6a3f 5537
74dc3e94 5538 /* Allow arg pointer and stack pointer as index if there is not scaling. */
e075ae69 5539 if (base && index && scale == 1
74dc3e94
RH
5540 && (index == arg_pointer_rtx
5541 || index == frame_pointer_rtx
5542 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
e075ae69
RH
5543 {
5544 rtx tmp = base;
5545 base = index;
5546 index = tmp;
5547 }
5548
5549 /* Special case: %ebp cannot be encoded as a base without a displacement. */
564d80f4
JH
5550 if ((base == hard_frame_pointer_rtx
5551 || base == frame_pointer_rtx
5552 || base == arg_pointer_rtx) && !disp)
e075ae69
RH
5553 disp = const0_rtx;
5554
5555 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5556 Avoid this by transforming to [%esi+0]. */
9e555526 5557 if (ix86_tune == PROCESSOR_K6 && !optimize_size
e075ae69 5558 && base && !index && !disp
329e1d01 5559 && REG_P (base)
e075ae69
RH
5560 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5561 disp = const0_rtx;
5562
5563 /* Special case: encode reg+reg instead of reg*2. */
5564 if (!base && index && scale && scale == 2)
5565 base = index, scale = 1;
0f290768 5566
e075ae69
RH
5567 /* Special case: scaling cannot be encoded without base or displacement. */
5568 if (!base && !disp && index && scale != 1)
5569 disp = const0_rtx;
5570
5571 out->base = base;
5572 out->index = index;
5573 out->disp = disp;
5574 out->scale = scale;
74dc3e94 5575 out->seg = seg;
3b3c6a3f 5576
b446e5a2 5577 return retval;
e075ae69 5578}
01329426
JH
5579\f
5580/* Return cost of the memory address x.
5581 For i386, it is better to use a complex address than let gcc copy
5582 the address into a reg and make a new pseudo. But not if the address
5583 requires to two regs - that would mean more pseudos with longer
5584 lifetimes. */
dcefdf67 5585static int
b96a374d 5586ix86_address_cost (rtx x)
01329426
JH
5587{
5588 struct ix86_address parts;
5589 int cost = 1;
3b3c6a3f 5590
01329426
JH
5591 if (!ix86_decompose_address (x, &parts))
5592 abort ();
5593
1540f9eb
JH
5594 if (parts.base && GET_CODE (parts.base) == SUBREG)
5595 parts.base = SUBREG_REG (parts.base);
5596 if (parts.index && GET_CODE (parts.index) == SUBREG)
5597 parts.index = SUBREG_REG (parts.index);
5598
01329426
JH
5599 /* More complex memory references are better. */
5600 if (parts.disp && parts.disp != const0_rtx)
5601 cost--;
74dc3e94
RH
5602 if (parts.seg != SEG_DEFAULT)
5603 cost--;
01329426
JH
5604
5605 /* Attempt to minimize number of registers in the address. */
5606 if ((parts.base
5607 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5608 || (parts.index
5609 && (!REG_P (parts.index)
5610 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5611 cost++;
5612
5613 if (parts.base
5614 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5615 && parts.index
5616 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5617 && parts.base != parts.index)
5618 cost++;
5619
5620 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5621 since it's predecode logic can't detect the length of instructions
5622 and it degenerates to vector decoded. Increase cost of such
5623 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
0f290768 5624 to split such addresses or even refuse such addresses at all.
01329426
JH
5625
5626 Following addressing modes are affected:
5627 [base+scale*index]
5628 [scale*index+disp]
5629 [base+index]
0f290768 5630
01329426
JH
5631 The first and last case may be avoidable by explicitly coding the zero in
5632 memory address, but I don't have AMD-K6 machine handy to check this
5633 theory. */
5634
5635 if (TARGET_K6
5636 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5637 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5638 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5639 cost += 10;
0f290768 5640
01329426
JH
5641 return cost;
5642}
5643\f
b949ea8b
JW
5644/* If X is a machine specific address (i.e. a symbol or label being
5645 referenced as a displacement from the GOT implemented using an
5646 UNSPEC), then return the base term. Otherwise return X. */
5647
5648rtx
b96a374d 5649ix86_find_base_term (rtx x)
b949ea8b
JW
5650{
5651 rtx term;
5652
6eb791fc
JH
5653 if (TARGET_64BIT)
5654 {
5655 if (GET_CODE (x) != CONST)
5656 return x;
5657 term = XEXP (x, 0);
5658 if (GET_CODE (term) == PLUS
5659 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5660 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5661 term = XEXP (term, 0);
5662 if (GET_CODE (term) != UNSPEC
8ee41eaf 5663 || XINT (term, 1) != UNSPEC_GOTPCREL)
6eb791fc
JH
5664 return x;
5665
5666 term = XVECEXP (term, 0, 0);
5667
5668 if (GET_CODE (term) != SYMBOL_REF
5669 && GET_CODE (term) != LABEL_REF)
5670 return x;
5671
5672 return term;
5673 }
5674
69bd9368 5675 term = ix86_delegitimize_address (x);
b949ea8b
JW
5676
5677 if (GET_CODE (term) != SYMBOL_REF
5678 && GET_CODE (term) != LABEL_REF)
5679 return x;
5680
5681 return term;
5682}
5683\f
f996902d
RH
5684/* Determine if a given RTX is a valid constant. We already know this
5685 satisfies CONSTANT_P. */
5686
5687bool
b96a374d 5688legitimate_constant_p (rtx x)
f996902d
RH
5689{
5690 rtx inner;
5691
5692 switch (GET_CODE (x))
5693 {
5694 case SYMBOL_REF:
5695 /* TLS symbols are not constant. */
5696 if (tls_symbolic_operand (x, Pmode))
5697 return false;
5698 break;
5699
5700 case CONST:
5701 inner = XEXP (x, 0);
5702
5703 /* Offsets of TLS symbols are never valid.
5704 Discourage CSE from creating them. */
5705 if (GET_CODE (inner) == PLUS
5706 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5707 return false;
5708
799b33a0
JH
5709 if (GET_CODE (inner) == PLUS)
5710 {
5711 if (GET_CODE (XEXP (inner, 1)) != CONST_INT)
5712 return false;
5713 inner = XEXP (inner, 0);
5714 }
5715
f996902d
RH
5716 /* Only some unspecs are valid as "constants". */
5717 if (GET_CODE (inner) == UNSPEC)
5718 switch (XINT (inner, 1))
5719 {
5720 case UNSPEC_TPOFF:
cb0e3e3f 5721 case UNSPEC_NTPOFF:
f996902d 5722 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
cb0e3e3f
RH
5723 case UNSPEC_DTPOFF:
5724 return local_dynamic_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
f996902d
RH
5725 default:
5726 return false;
5727 }
5728 break;
5729
5730 default:
5731 break;
5732 }
5733
5734 /* Otherwise we handle everything else in the move patterns. */
5735 return true;
5736}
5737
3a04ff64
RH
5738/* Determine if it's legal to put X into the constant pool. This
5739 is not possible for the address of thread-local symbols, which
5740 is checked above. */
5741
5742static bool
b96a374d 5743ix86_cannot_force_const_mem (rtx x)
3a04ff64
RH
5744{
5745 return !legitimate_constant_p (x);
5746}
5747
f996902d
RH
5748/* Determine if a given RTX is a valid constant address. */
5749
5750bool
b96a374d 5751constant_address_p (rtx x)
f996902d 5752{
a94f136b 5753 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
f996902d
RH
5754}
5755
5756/* Nonzero if the constant value X is a legitimate general operand
fce5a9f2 5757 when generating PIC code. It is given that flag_pic is on and
f996902d
RH
5758 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5759
5760bool
b96a374d 5761legitimate_pic_operand_p (rtx x)
f996902d
RH
5762{
5763 rtx inner;
5764
5765 switch (GET_CODE (x))
5766 {
5767 case CONST:
5768 inner = XEXP (x, 0);
5769
5770 /* Only some unspecs are valid as "constants". */
5771 if (GET_CODE (inner) == UNSPEC)
5772 switch (XINT (inner, 1))
5773 {
5774 case UNSPEC_TPOFF:
5775 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
f996902d
RH
5776 default:
5777 return false;
5778 }
5779 /* FALLTHRU */
5780
5781 case SYMBOL_REF:
5782 case LABEL_REF:
5783 return legitimate_pic_address_disp_p (x);
5784
5785 default:
5786 return true;
5787 }
5788}
5789
e075ae69
RH
5790/* Determine if a given CONST RTX is a valid memory displacement
5791 in PIC mode. */
0f290768 5792
59be65f6 5793int
b96a374d 5794legitimate_pic_address_disp_p (register rtx disp)
91bb873f 5795{
f996902d
RH
5796 bool saw_plus;
5797
6eb791fc
JH
5798 /* In 64bit mode we can allow direct addresses of symbols and labels
5799 when they are not dynamic symbols. */
c05dbe81
JH
5800 if (TARGET_64BIT)
5801 {
5802 /* TLS references should always be enclosed in UNSPEC. */
5803 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5804 return 0;
5805 if (GET_CODE (disp) == SYMBOL_REF
5806 && ix86_cmodel == CM_SMALL_PIC
2ae5ae57 5807 && SYMBOL_REF_LOCAL_P (disp))
c05dbe81
JH
5808 return 1;
5809 if (GET_CODE (disp) == LABEL_REF)
5810 return 1;
5811 if (GET_CODE (disp) == CONST
a132b6a8
JJ
5812 && GET_CODE (XEXP (disp, 0)) == PLUS)
5813 {
5814 rtx op0 = XEXP (XEXP (disp, 0), 0);
5815 rtx op1 = XEXP (XEXP (disp, 0), 1);
5816
5817 /* TLS references should always be enclosed in UNSPEC. */
5818 if (tls_symbolic_operand (op0, GET_MODE (op0)))
5819 return 0;
5820 if (((GET_CODE (op0) == SYMBOL_REF
5821 && ix86_cmodel == CM_SMALL_PIC
5822 && SYMBOL_REF_LOCAL_P (op0))
5823 || GET_CODE (op0) == LABEL_REF)
5824 && GET_CODE (op1) == CONST_INT
5825 && INTVAL (op1) < 16*1024*1024
5826 && INTVAL (op1) >= -16*1024*1024)
5827 return 1;
5828 }
c05dbe81 5829 }
91bb873f
RH
5830 if (GET_CODE (disp) != CONST)
5831 return 0;
5832 disp = XEXP (disp, 0);
5833
6eb791fc
JH
5834 if (TARGET_64BIT)
5835 {
5836 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5837 of GOT tables. We should not need these anyway. */
5838 if (GET_CODE (disp) != UNSPEC
8ee41eaf 5839 || XINT (disp, 1) != UNSPEC_GOTPCREL)
6eb791fc
JH
5840 return 0;
5841
5842 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5843 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5844 return 0;
5845 return 1;
5846 }
5847
f996902d 5848 saw_plus = false;
91bb873f
RH
5849 if (GET_CODE (disp) == PLUS)
5850 {
5851 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5852 return 0;
5853 disp = XEXP (disp, 0);
f996902d 5854 saw_plus = true;
91bb873f
RH
5855 }
5856
b069de3b
SS
5857 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5858 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5859 {
5860 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5861 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5862 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5863 {
5864 const char *sym_name = XSTR (XEXP (disp, 1), 0);
86ecdfb6 5865 if (! strcmp (sym_name, "<pic base>"))
b069de3b
SS
5866 return 1;
5867 }
5868 }
5869
8ee41eaf 5870 if (GET_CODE (disp) != UNSPEC)
91bb873f
RH
5871 return 0;
5872
623fe810
RH
5873 switch (XINT (disp, 1))
5874 {
8ee41eaf 5875 case UNSPEC_GOT:
f996902d
RH
5876 if (saw_plus)
5877 return false;
623fe810 5878 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
8ee41eaf 5879 case UNSPEC_GOTOFF:
799b33a0
JH
5880 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5881 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5882 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5883 return false;
f996902d 5884 case UNSPEC_GOTTPOFF:
dea73790
JJ
5885 case UNSPEC_GOTNTPOFF:
5886 case UNSPEC_INDNTPOFF:
f996902d
RH
5887 if (saw_plus)
5888 return false;
5889 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5890 case UNSPEC_NTPOFF:
f996902d
RH
5891 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5892 case UNSPEC_DTPOFF:
f996902d 5893 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
623fe810 5894 }
fce5a9f2 5895
623fe810 5896 return 0;
91bb873f
RH
5897}
5898
e075ae69
RH
5899/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5900 memory address for an instruction. The MODE argument is the machine mode
5901 for the MEM expression that wants to use this address.
5902
5903 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5904 convert common non-canonical forms to canonical form so that they will
5905 be recognized. */
5906
3b3c6a3f 5907int
b96a374d 5908legitimate_address_p (enum machine_mode mode, register rtx addr, int strict)
3b3c6a3f 5909{
e075ae69
RH
5910 struct ix86_address parts;
5911 rtx base, index, disp;
5912 HOST_WIDE_INT scale;
5913 const char *reason = NULL;
5914 rtx reason_rtx = NULL_RTX;
3b3c6a3f
MM
5915
5916 if (TARGET_DEBUG_ADDR)
5917 {
5918 fprintf (stderr,
e9a25f70 5919 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
3b3c6a3f 5920 GET_MODE_NAME (mode), strict);
3b3c6a3f
MM
5921 debug_rtx (addr);
5922 }
5923
b446e5a2 5924 if (ix86_decompose_address (addr, &parts) <= 0)
3b3c6a3f 5925 {
e075ae69 5926 reason = "decomposition failed";
50e60bc3 5927 goto report_error;
3b3c6a3f
MM
5928 }
5929
e075ae69
RH
5930 base = parts.base;
5931 index = parts.index;
5932 disp = parts.disp;
5933 scale = parts.scale;
91f0226f 5934
e075ae69 5935 /* Validate base register.
e9a25f70
JL
5936
5937 Don't allow SUBREG's here, it can lead to spill failures when the base
3d771dfd
MM
5938 is one word out of a two word structure, which is represented internally
5939 as a DImode int. */
e9a25f70 5940
3b3c6a3f
MM
5941 if (base)
5942 {
1540f9eb 5943 rtx reg;
e075ae69
RH
5944 reason_rtx = base;
5945
1540f9eb
JH
5946 if (GET_CODE (base) == SUBREG)
5947 reg = SUBREG_REG (base);
5948 else
5949 reg = base;
5950
5951 if (GET_CODE (reg) != REG)
3b3c6a3f 5952 {
e075ae69 5953 reason = "base is not a register";
50e60bc3 5954 goto report_error;
3b3c6a3f
MM
5955 }
5956
c954bd01
RH
5957 if (GET_MODE (base) != Pmode)
5958 {
e075ae69 5959 reason = "base is not in Pmode";
50e60bc3 5960 goto report_error;
c954bd01
RH
5961 }
5962
1540f9eb
JH
5963 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5964 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
3b3c6a3f 5965 {
e075ae69 5966 reason = "base is not valid";
50e60bc3 5967 goto report_error;
3b3c6a3f
MM
5968 }
5969 }
5970
e075ae69 5971 /* Validate index register.
e9a25f70
JL
5972
5973 Don't allow SUBREG's here, it can lead to spill failures when the index
3d771dfd
MM
5974 is one word out of a two word structure, which is represented internally
5975 as a DImode int. */
e075ae69
RH
5976
5977 if (index)
3b3c6a3f 5978 {
1540f9eb 5979 rtx reg;
e075ae69
RH
5980 reason_rtx = index;
5981
1540f9eb
JH
5982 if (GET_CODE (index) == SUBREG)
5983 reg = SUBREG_REG (index);
5984 else
5985 reg = index;
5986
5987 if (GET_CODE (reg) != REG)
3b3c6a3f 5988 {
e075ae69 5989 reason = "index is not a register";
50e60bc3 5990 goto report_error;
3b3c6a3f
MM
5991 }
5992
e075ae69 5993 if (GET_MODE (index) != Pmode)
c954bd01 5994 {
e075ae69 5995 reason = "index is not in Pmode";
50e60bc3 5996 goto report_error;
c954bd01
RH
5997 }
5998
1540f9eb
JH
5999 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6000 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
3b3c6a3f 6001 {
e075ae69 6002 reason = "index is not valid";
50e60bc3 6003 goto report_error;
3b3c6a3f
MM
6004 }
6005 }
3b3c6a3f 6006
e075ae69
RH
6007 /* Validate scale factor. */
6008 if (scale != 1)
3b3c6a3f 6009 {
e075ae69
RH
6010 reason_rtx = GEN_INT (scale);
6011 if (!index)
3b3c6a3f 6012 {
e075ae69 6013 reason = "scale without index";
50e60bc3 6014 goto report_error;
3b3c6a3f
MM
6015 }
6016
e075ae69 6017 if (scale != 2 && scale != 4 && scale != 8)
3b3c6a3f 6018 {
e075ae69 6019 reason = "scale is not a valid multiplier";
50e60bc3 6020 goto report_error;
3b3c6a3f
MM
6021 }
6022 }
6023
91bb873f 6024 /* Validate displacement. */
3b3c6a3f
MM
6025 if (disp)
6026 {
e075ae69
RH
6027 reason_rtx = disp;
6028
f996902d
RH
6029 if (GET_CODE (disp) == CONST
6030 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6031 switch (XINT (XEXP (disp, 0), 1))
6032 {
6033 case UNSPEC_GOT:
6034 case UNSPEC_GOTOFF:
6035 case UNSPEC_GOTPCREL:
6036 if (!flag_pic)
6037 abort ();
6038 goto is_legitimate_pic;
6039
6040 case UNSPEC_GOTTPOFF:
dea73790
JJ
6041 case UNSPEC_GOTNTPOFF:
6042 case UNSPEC_INDNTPOFF:
f996902d
RH
6043 case UNSPEC_NTPOFF:
6044 case UNSPEC_DTPOFF:
6045 break;
6046
6047 default:
6048 reason = "invalid address unspec";
6049 goto report_error;
6050 }
6051
b069de3b
SS
6052 else if (flag_pic && (SYMBOLIC_CONST (disp)
6053#if TARGET_MACHO
6054 && !machopic_operand_p (disp)
6055#endif
6056 ))
3b3c6a3f 6057 {
f996902d 6058 is_legitimate_pic:
0d7d98ee
JH
6059 if (TARGET_64BIT && (index || base))
6060 {
75d38379
JJ
6061 /* foo@dtpoff(%rX) is ok. */
6062 if (GET_CODE (disp) != CONST
6063 || GET_CODE (XEXP (disp, 0)) != PLUS
6064 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6065 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6066 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6067 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6068 {
6069 reason = "non-constant pic memory reference";
6070 goto report_error;
6071 }
0d7d98ee 6072 }
75d38379 6073 else if (! legitimate_pic_address_disp_p (disp))
91bb873f 6074 {
e075ae69 6075 reason = "displacement is an invalid pic construct";
50e60bc3 6076 goto report_error;
91bb873f
RH
6077 }
6078
4e9efe54 6079 /* This code used to verify that a symbolic pic displacement
0f290768
KH
6080 includes the pic_offset_table_rtx register.
6081
4e9efe54
JH
6082 While this is good idea, unfortunately these constructs may
6083 be created by "adds using lea" optimization for incorrect
6084 code like:
6085
6086 int a;
6087 int foo(int i)
6088 {
6089 return *(&a+i);
6090 }
6091
50e60bc3 6092 This code is nonsensical, but results in addressing
4e9efe54 6093 GOT table with pic_offset_table_rtx base. We can't
f710504c 6094 just refuse it easily, since it gets matched by
4e9efe54
JH
6095 "addsi3" pattern, that later gets split to lea in the
6096 case output register differs from input. While this
6097 can be handled by separate addsi pattern for this case
6098 that never results in lea, this seems to be easier and
6099 correct fix for crash to disable this test. */
3b3c6a3f 6100 }
a94f136b
JH
6101 else if (GET_CODE (disp) != LABEL_REF
6102 && GET_CODE (disp) != CONST_INT
6103 && (GET_CODE (disp) != CONST
6104 || !legitimate_constant_p (disp))
6105 && (GET_CODE (disp) != SYMBOL_REF
6106 || !legitimate_constant_p (disp)))
f996902d
RH
6107 {
6108 reason = "displacement is not constant";
6109 goto report_error;
6110 }
c05dbe81
JH
6111 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
6112 {
6113 reason = "displacement is out of range";
6114 goto report_error;
6115 }
3b3c6a3f
MM
6116 }
6117
e075ae69 6118 /* Everything looks valid. */
3b3c6a3f 6119 if (TARGET_DEBUG_ADDR)
e075ae69 6120 fprintf (stderr, "Success.\n");
3b3c6a3f 6121 return TRUE;
e075ae69 6122
5bf0ebab 6123 report_error:
e075ae69
RH
6124 if (TARGET_DEBUG_ADDR)
6125 {
6126 fprintf (stderr, "Error: %s\n", reason);
6127 debug_rtx (reason_rtx);
6128 }
6129 return FALSE;
3b3c6a3f 6130}
3b3c6a3f 6131\f
55efb413
JW
6132/* Return an unique alias set for the GOT. */
6133
0f290768 6134static HOST_WIDE_INT
b96a374d 6135ix86_GOT_alias_set (void)
55efb413 6136{
5bf0ebab
RH
6137 static HOST_WIDE_INT set = -1;
6138 if (set == -1)
6139 set = new_alias_set ();
6140 return set;
0f290768 6141}
55efb413 6142
3b3c6a3f
MM
6143/* Return a legitimate reference for ORIG (an address) using the
6144 register REG. If REG is 0, a new pseudo is generated.
6145
91bb873f 6146 There are two types of references that must be handled:
3b3c6a3f
MM
6147
6148 1. Global data references must load the address from the GOT, via
6149 the PIC reg. An insn is emitted to do this load, and the reg is
6150 returned.
6151
91bb873f
RH
6152 2. Static data references, constant pool addresses, and code labels
6153 compute the address as an offset from the GOT, whose base is in
2ae5ae57 6154 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
91bb873f
RH
6155 differentiate them from global data objects. The returned
6156 address is the PIC reg + an unspec constant.
3b3c6a3f
MM
6157
6158 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
91bb873f 6159 reg also appears in the address. */
3b3c6a3f
MM
6160
6161rtx
b96a374d 6162legitimize_pic_address (rtx orig, rtx reg)
3b3c6a3f
MM
6163{
6164 rtx addr = orig;
6165 rtx new = orig;
91bb873f 6166 rtx base;
3b3c6a3f 6167
b069de3b
SS
6168#if TARGET_MACHO
6169 if (reg == 0)
6170 reg = gen_reg_rtx (Pmode);
6171 /* Use the generic Mach-O PIC machinery. */
6172 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6173#endif
6174
c05dbe81
JH
6175 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6176 new = addr;
6177 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
3b3c6a3f 6178 {
c05dbe81
JH
6179 /* This symbol may be referenced via a displacement from the PIC
6180 base address (@GOTOFF). */
3b3c6a3f 6181
c05dbe81
JH
6182 if (reload_in_progress)
6183 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
799b33a0
JH
6184 if (GET_CODE (addr) == CONST)
6185 addr = XEXP (addr, 0);
6186 if (GET_CODE (addr) == PLUS)
6187 {
6188 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6189 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6190 }
6191 else
6192 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
c05dbe81
JH
6193 new = gen_rtx_CONST (Pmode, new);
6194 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3b3c6a3f 6195
c05dbe81
JH
6196 if (reg != 0)
6197 {
6198 emit_move_insn (reg, new);
6199 new = reg;
6200 }
3b3c6a3f 6201 }
91bb873f 6202 else if (GET_CODE (addr) == SYMBOL_REF)
3b3c6a3f 6203 {
14f73b5a
JH
6204 if (TARGET_64BIT)
6205 {
8ee41eaf 6206 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
14f73b5a
JH
6207 new = gen_rtx_CONST (Pmode, new);
6208 new = gen_rtx_MEM (Pmode, new);
6209 RTX_UNCHANGING_P (new) = 1;
6210 set_mem_alias_set (new, ix86_GOT_alias_set ());
6211
6212 if (reg == 0)
6213 reg = gen_reg_rtx (Pmode);
6214 /* Use directly gen_movsi, otherwise the address is loaded
6215 into register for CSE. We don't want to CSE this addresses,
6216 instead we CSE addresses from the GOT table, so skip this. */
6217 emit_insn (gen_movsi (reg, new));
6218 new = reg;
6219 }
6220 else
6221 {
6222 /* This symbol must be referenced via a load from the
6223 Global Offset Table (@GOT). */
3b3c6a3f 6224
66edd3b4
RH
6225 if (reload_in_progress)
6226 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf 6227 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
14f73b5a
JH
6228 new = gen_rtx_CONST (Pmode, new);
6229 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6230 new = gen_rtx_MEM (Pmode, new);
6231 RTX_UNCHANGING_P (new) = 1;
6232 set_mem_alias_set (new, ix86_GOT_alias_set ());
3b3c6a3f 6233
14f73b5a
JH
6234 if (reg == 0)
6235 reg = gen_reg_rtx (Pmode);
6236 emit_move_insn (reg, new);
6237 new = reg;
6238 }
0f290768 6239 }
91bb873f
RH
6240 else
6241 {
6242 if (GET_CODE (addr) == CONST)
3b3c6a3f 6243 {
91bb873f 6244 addr = XEXP (addr, 0);
e3c8ea67
RH
6245
6246 /* We must match stuff we generate before. Assume the only
6247 unspecs that can get here are ours. Not that we could do
6248 anything with them anyway... */
6249 if (GET_CODE (addr) == UNSPEC
6250 || (GET_CODE (addr) == PLUS
6251 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6252 return orig;
6253 if (GET_CODE (addr) != PLUS)
564d80f4 6254 abort ();
3b3c6a3f 6255 }
91bb873f
RH
6256 if (GET_CODE (addr) == PLUS)
6257 {
6258 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
e9a25f70 6259
91bb873f
RH
6260 /* Check first to see if this is a constant offset from a @GOTOFF
6261 symbol reference. */
623fe810 6262 if (local_symbolic_operand (op0, Pmode)
91bb873f
RH
6263 && GET_CODE (op1) == CONST_INT)
6264 {
6eb791fc
JH
6265 if (!TARGET_64BIT)
6266 {
66edd3b4
RH
6267 if (reload_in_progress)
6268 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf
RH
6269 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6270 UNSPEC_GOTOFF);
6eb791fc
JH
6271 new = gen_rtx_PLUS (Pmode, new, op1);
6272 new = gen_rtx_CONST (Pmode, new);
6273 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
91bb873f 6274
6eb791fc
JH
6275 if (reg != 0)
6276 {
6277 emit_move_insn (reg, new);
6278 new = reg;
6279 }
6280 }
6281 else
91bb873f 6282 {
75d38379
JJ
6283 if (INTVAL (op1) < -16*1024*1024
6284 || INTVAL (op1) >= 16*1024*1024)
6285 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
91bb873f
RH
6286 }
6287 }
6288 else
6289 {
6290 base = legitimize_pic_address (XEXP (addr, 0), reg);
6291 new = legitimize_pic_address (XEXP (addr, 1),
6292 base == reg ? NULL_RTX : reg);
6293
6294 if (GET_CODE (new) == CONST_INT)
6295 new = plus_constant (base, INTVAL (new));
6296 else
6297 {
6298 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6299 {
6300 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6301 new = XEXP (new, 1);
6302 }
6303 new = gen_rtx_PLUS (Pmode, base, new);
6304 }
6305 }
6306 }
3b3c6a3f
MM
6307 }
6308 return new;
6309}
6310\f
74dc3e94 6311/* Load the thread pointer. If TO_REG is true, force it into a register. */
f996902d
RH
6312
6313static rtx
b96a374d 6314get_thread_pointer (int to_reg)
f996902d 6315{
74dc3e94 6316 rtx tp, reg, insn;
f996902d
RH
6317
6318 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
74dc3e94
RH
6319 if (!to_reg)
6320 return tp;
f996902d 6321
74dc3e94
RH
6322 reg = gen_reg_rtx (Pmode);
6323 insn = gen_rtx_SET (VOIDmode, reg, tp);
6324 insn = emit_insn (insn);
6325
6326 return reg;
6327}
6328
6329/* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6330 false if we expect this to be used for a memory address and true if
6331 we expect to load the address into a register. */
6332
6333static rtx
b96a374d 6334legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
74dc3e94
RH
6335{
6336 rtx dest, base, off, pic;
6337 int type;
6338
6339 switch (model)
6340 {
6341 case TLS_MODEL_GLOBAL_DYNAMIC:
6342 dest = gen_reg_rtx (Pmode);
6343 if (TARGET_64BIT)
6344 {
6345 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6346
6347 start_sequence ();
6348 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6349 insns = get_insns ();
6350 end_sequence ();
6351
6352 emit_libcall_block (insns, dest, rax, x);
6353 }
6354 else
6355 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6356 break;
6357
6358 case TLS_MODEL_LOCAL_DYNAMIC:
6359 base = gen_reg_rtx (Pmode);
6360 if (TARGET_64BIT)
6361 {
6362 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6363
6364 start_sequence ();
6365 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6366 insns = get_insns ();
6367 end_sequence ();
6368
6369 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6370 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6371 emit_libcall_block (insns, base, rax, note);
6372 }
6373 else
6374 emit_insn (gen_tls_local_dynamic_base_32 (base));
6375
6376 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6377 off = gen_rtx_CONST (Pmode, off);
6378
6379 return gen_rtx_PLUS (Pmode, base, off);
6380
6381 case TLS_MODEL_INITIAL_EXEC:
6382 if (TARGET_64BIT)
6383 {
6384 pic = NULL;
6385 type = UNSPEC_GOTNTPOFF;
6386 }
6387 else if (flag_pic)
6388 {
6389 if (reload_in_progress)
6390 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6391 pic = pic_offset_table_rtx;
6392 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6393 }
6394 else if (!TARGET_GNU_TLS)
6395 {
6396 pic = gen_reg_rtx (Pmode);
6397 emit_insn (gen_set_got (pic));
6398 type = UNSPEC_GOTTPOFF;
6399 }
6400 else
6401 {
6402 pic = NULL;
6403 type = UNSPEC_INDNTPOFF;
6404 }
6405
6406 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6407 off = gen_rtx_CONST (Pmode, off);
6408 if (pic)
6409 off = gen_rtx_PLUS (Pmode, pic, off);
6410 off = gen_rtx_MEM (Pmode, off);
6411 RTX_UNCHANGING_P (off) = 1;
6412 set_mem_alias_set (off, ix86_GOT_alias_set ());
6413
6414 if (TARGET_64BIT || TARGET_GNU_TLS)
6415 {
6416 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6417 off = force_reg (Pmode, off);
6418 return gen_rtx_PLUS (Pmode, base, off);
6419 }
6420 else
6421 {
6422 base = get_thread_pointer (true);
6423 dest = gen_reg_rtx (Pmode);
6424 emit_insn (gen_subsi3 (dest, base, off));
6425 }
6426 break;
6427
6428 case TLS_MODEL_LOCAL_EXEC:
6429 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6430 (TARGET_64BIT || TARGET_GNU_TLS)
6431 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6432 off = gen_rtx_CONST (Pmode, off);
6433
6434 if (TARGET_64BIT || TARGET_GNU_TLS)
6435 {
6436 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6437 return gen_rtx_PLUS (Pmode, base, off);
6438 }
6439 else
6440 {
6441 base = get_thread_pointer (true);
6442 dest = gen_reg_rtx (Pmode);
6443 emit_insn (gen_subsi3 (dest, base, off));
6444 }
6445 break;
6446
6447 default:
6448 abort ();
6449 }
6450
6451 return dest;
f996902d 6452}
fce5a9f2 6453
3b3c6a3f
MM
6454/* Try machine-dependent ways of modifying an illegitimate address
6455 to be legitimate. If we find one, return the new, valid address.
6456 This macro is used in only one place: `memory_address' in explow.c.
6457
6458 OLDX is the address as it was before break_out_memory_refs was called.
6459 In some cases it is useful to look at this to decide what needs to be done.
6460
6461 MODE and WIN are passed so that this macro can use
6462 GO_IF_LEGITIMATE_ADDRESS.
6463
6464 It is always safe for this macro to do nothing. It exists to recognize
6465 opportunities to optimize the output.
6466
6467 For the 80386, we handle X+REG by loading X into a register R and
6468 using R+REG. R will go in a general reg and indexing will be used.
6469 However, if REG is a broken-out memory address or multiplication,
6470 nothing needs to be done because REG can certainly go in a general reg.
6471
6472 When -fpic is used, special handling is needed for symbolic references.
6473 See comments by legitimize_pic_address in i386.c for details. */
6474
6475rtx
b96a374d
AJ
6476legitimize_address (register rtx x, register rtx oldx ATTRIBUTE_UNUSED,
6477 enum machine_mode mode)
3b3c6a3f
MM
6478{
6479 int changed = 0;
6480 unsigned log;
6481
6482 if (TARGET_DEBUG_ADDR)
6483 {
e9a25f70
JL
6484 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6485 GET_MODE_NAME (mode));
3b3c6a3f
MM
6486 debug_rtx (x);
6487 }
6488
f996902d
RH
6489 log = tls_symbolic_operand (x, mode);
6490 if (log)
74dc3e94 6491 return legitimize_tls_address (x, log, false);
f996902d 6492
3b3c6a3f
MM
6493 if (flag_pic && SYMBOLIC_CONST (x))
6494 return legitimize_pic_address (x, 0);
6495
6496 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6497 if (GET_CODE (x) == ASHIFT
6498 && GET_CODE (XEXP (x, 1)) == CONST_INT
b531087a 6499 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
3b3c6a3f
MM
6500 {
6501 changed = 1;
a269a03c
JC
6502 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6503 GEN_INT (1 << log));
3b3c6a3f
MM
6504 }
6505
6506 if (GET_CODE (x) == PLUS)
6507 {
0f290768 6508 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
e9a25f70 6509
3b3c6a3f
MM
6510 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6511 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
b531087a 6512 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
3b3c6a3f
MM
6513 {
6514 changed = 1;
c5c76735
JL
6515 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6516 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6517 GEN_INT (1 << log));
3b3c6a3f
MM
6518 }
6519
6520 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6521 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
b531087a 6522 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
3b3c6a3f
MM
6523 {
6524 changed = 1;
c5c76735
JL
6525 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6526 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6527 GEN_INT (1 << log));
3b3c6a3f
MM
6528 }
6529
0f290768 6530 /* Put multiply first if it isn't already. */
3b3c6a3f
MM
6531 if (GET_CODE (XEXP (x, 1)) == MULT)
6532 {
6533 rtx tmp = XEXP (x, 0);
6534 XEXP (x, 0) = XEXP (x, 1);
6535 XEXP (x, 1) = tmp;
6536 changed = 1;
6537 }
6538
6539 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6540 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6541 created by virtual register instantiation, register elimination, and
6542 similar optimizations. */
6543 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6544 {
6545 changed = 1;
c5c76735
JL
6546 x = gen_rtx_PLUS (Pmode,
6547 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6548 XEXP (XEXP (x, 1), 0)),
6549 XEXP (XEXP (x, 1), 1));
3b3c6a3f
MM
6550 }
6551
e9a25f70
JL
6552 /* Canonicalize
6553 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3b3c6a3f
MM
6554 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6555 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6556 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6557 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6558 && CONSTANT_P (XEXP (x, 1)))
6559 {
00c79232
ML
6560 rtx constant;
6561 rtx other = NULL_RTX;
3b3c6a3f
MM
6562
6563 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6564 {
6565 constant = XEXP (x, 1);
6566 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6567 }
6568 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6569 {
6570 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6571 other = XEXP (x, 1);
6572 }
6573 else
6574 constant = 0;
6575
6576 if (constant)
6577 {
6578 changed = 1;
c5c76735
JL
6579 x = gen_rtx_PLUS (Pmode,
6580 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6581 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6582 plus_constant (other, INTVAL (constant)));
3b3c6a3f
MM
6583 }
6584 }
6585
6586 if (changed && legitimate_address_p (mode, x, FALSE))
6587 return x;
6588
6589 if (GET_CODE (XEXP (x, 0)) == MULT)
6590 {
6591 changed = 1;
6592 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6593 }
6594
6595 if (GET_CODE (XEXP (x, 1)) == MULT)
6596 {
6597 changed = 1;
6598 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6599 }
6600
6601 if (changed
6602 && GET_CODE (XEXP (x, 1)) == REG
6603 && GET_CODE (XEXP (x, 0)) == REG)
6604 return x;
6605
6606 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6607 {
6608 changed = 1;
6609 x = legitimize_pic_address (x, 0);
6610 }
6611
6612 if (changed && legitimate_address_p (mode, x, FALSE))
6613 return x;
6614
6615 if (GET_CODE (XEXP (x, 0)) == REG)
6616 {
6617 register rtx temp = gen_reg_rtx (Pmode);
6618 register rtx val = force_operand (XEXP (x, 1), temp);
6619 if (val != temp)
6620 emit_move_insn (temp, val);
6621
6622 XEXP (x, 1) = temp;
6623 return x;
6624 }
6625
6626 else if (GET_CODE (XEXP (x, 1)) == REG)
6627 {
6628 register rtx temp = gen_reg_rtx (Pmode);
6629 register rtx val = force_operand (XEXP (x, 0), temp);
6630 if (val != temp)
6631 emit_move_insn (temp, val);
6632
6633 XEXP (x, 0) = temp;
6634 return x;
6635 }
6636 }
6637
6638 return x;
6639}
2a2ab3f9
JVA
6640\f
6641/* Print an integer constant expression in assembler syntax. Addition
6642 and subtraction are the only arithmetic that may appear in these
6643 expressions. FILE is the stdio stream to write to, X is the rtx, and
6644 CODE is the operand print code from the output string. */
6645
6646static void
b96a374d 6647output_pic_addr_const (FILE *file, rtx x, int code)
2a2ab3f9
JVA
6648{
6649 char buf[256];
6650
6651 switch (GET_CODE (x))
6652 {
6653 case PC:
6654 if (flag_pic)
6655 putc ('.', file);
6656 else
6657 abort ();
6658 break;
6659
6660 case SYMBOL_REF:
91bb873f 6661 assemble_name (file, XSTR (x, 0));
12969f45 6662 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
91bb873f 6663 fputs ("@PLT", file);
2a2ab3f9
JVA
6664 break;
6665
91bb873f
RH
6666 case LABEL_REF:
6667 x = XEXP (x, 0);
6668 /* FALLTHRU */
2a2ab3f9
JVA
6669 case CODE_LABEL:
6670 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6671 assemble_name (asm_out_file, buf);
6672 break;
6673
6674 case CONST_INT:
f64cecad 6675 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2a2ab3f9
JVA
6676 break;
6677
6678 case CONST:
6679 /* This used to output parentheses around the expression,
6680 but that does not work on the 386 (either ATT or BSD assembler). */
6681 output_pic_addr_const (file, XEXP (x, 0), code);
6682 break;
6683
6684 case CONST_DOUBLE:
6685 if (GET_MODE (x) == VOIDmode)
6686 {
6687 /* We can use %d if the number is <32 bits and positive. */
6688 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
f64cecad
JC
6689 fprintf (file, "0x%lx%08lx",
6690 (unsigned long) CONST_DOUBLE_HIGH (x),
6691 (unsigned long) CONST_DOUBLE_LOW (x));
2a2ab3f9 6692 else
f64cecad 6693 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2a2ab3f9
JVA
6694 }
6695 else
6696 /* We can't handle floating point constants;
6697 PRINT_OPERAND must handle them. */
6698 output_operand_lossage ("floating constant misused");
6699 break;
6700
6701 case PLUS:
e9a25f70 6702 /* Some assemblers need integer constants to appear first. */
2a2ab3f9
JVA
6703 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6704 {
2a2ab3f9 6705 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 6706 putc ('+', file);
e9a25f70 6707 output_pic_addr_const (file, XEXP (x, 1), code);
2a2ab3f9 6708 }
91bb873f 6709 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2a2ab3f9 6710 {
2a2ab3f9 6711 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 6712 putc ('+', file);
e9a25f70 6713 output_pic_addr_const (file, XEXP (x, 0), code);
2a2ab3f9 6714 }
91bb873f
RH
6715 else
6716 abort ();
2a2ab3f9
JVA
6717 break;
6718
6719 case MINUS:
b069de3b
SS
6720 if (!TARGET_MACHO)
6721 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
2a2ab3f9 6722 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 6723 putc ('-', file);
2a2ab3f9 6724 output_pic_addr_const (file, XEXP (x, 1), code);
b069de3b
SS
6725 if (!TARGET_MACHO)
6726 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
2a2ab3f9
JVA
6727 break;
6728
91bb873f
RH
6729 case UNSPEC:
6730 if (XVECLEN (x, 0) != 1)
5bf0ebab 6731 abort ();
91bb873f
RH
6732 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6733 switch (XINT (x, 1))
77ebd435 6734 {
8ee41eaf 6735 case UNSPEC_GOT:
77ebd435
AJ
6736 fputs ("@GOT", file);
6737 break;
8ee41eaf 6738 case UNSPEC_GOTOFF:
77ebd435
AJ
6739 fputs ("@GOTOFF", file);
6740 break;
8ee41eaf 6741 case UNSPEC_GOTPCREL:
edfe8595 6742 fputs ("@GOTPCREL(%rip)", file);
6eb791fc 6743 break;
f996902d 6744 case UNSPEC_GOTTPOFF:
dea73790 6745 /* FIXME: This might be @TPOFF in Sun ld too. */
f996902d
RH
6746 fputs ("@GOTTPOFF", file);
6747 break;
6748 case UNSPEC_TPOFF:
6749 fputs ("@TPOFF", file);
6750 break;
6751 case UNSPEC_NTPOFF:
75d38379
JJ
6752 if (TARGET_64BIT)
6753 fputs ("@TPOFF", file);
6754 else
6755 fputs ("@NTPOFF", file);
f996902d
RH
6756 break;
6757 case UNSPEC_DTPOFF:
6758 fputs ("@DTPOFF", file);
6759 break;
dea73790 6760 case UNSPEC_GOTNTPOFF:
75d38379
JJ
6761 if (TARGET_64BIT)
6762 fputs ("@GOTTPOFF(%rip)", file);
6763 else
6764 fputs ("@GOTNTPOFF", file);
dea73790
JJ
6765 break;
6766 case UNSPEC_INDNTPOFF:
6767 fputs ("@INDNTPOFF", file);
6768 break;
77ebd435
AJ
6769 default:
6770 output_operand_lossage ("invalid UNSPEC as operand");
6771 break;
6772 }
91bb873f
RH
6773 break;
6774
2a2ab3f9
JVA
6775 default:
6776 output_operand_lossage ("invalid expression as operand");
6777 }
6778}
1865dbb5 6779
0f290768 6780/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
1865dbb5
JM
6781 We need to handle our special PIC relocations. */
6782
0f290768 6783void
b96a374d 6784i386_dwarf_output_addr_const (FILE *file, rtx x)
1865dbb5 6785{
14f73b5a 6786#ifdef ASM_QUAD
18b5b8d6 6787 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
14f73b5a
JH
6788#else
6789 if (TARGET_64BIT)
6790 abort ();
18b5b8d6 6791 fprintf (file, "%s", ASM_LONG);
14f73b5a 6792#endif
1865dbb5
JM
6793 if (flag_pic)
6794 output_pic_addr_const (file, x, '\0');
6795 else
6796 output_addr_const (file, x);
6797 fputc ('\n', file);
6798}
6799
b9203463
RH
6800/* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6801 We need to emit DTP-relative relocations. */
6802
6803void
b96a374d 6804i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
b9203463 6805{
75d38379
JJ
6806 fputs (ASM_LONG, file);
6807 output_addr_const (file, x);
6808 fputs ("@DTPOFF", file);
b9203463
RH
6809 switch (size)
6810 {
6811 case 4:
b9203463
RH
6812 break;
6813 case 8:
75d38379 6814 fputs (", 0", file);
b9203463 6815 break;
b9203463
RH
6816 default:
6817 abort ();
6818 }
b9203463
RH
6819}
6820
1865dbb5
JM
6821/* In the name of slightly smaller debug output, and to cater to
6822 general assembler losage, recognize PIC+GOTOFF and turn it back
6823 into a direct symbol reference. */
6824
69bd9368 6825static rtx
b96a374d 6826ix86_delegitimize_address (rtx orig_x)
1865dbb5 6827{
ec65b2e3 6828 rtx x = orig_x, y;
1865dbb5 6829
4c8c0dec
JJ
6830 if (GET_CODE (x) == MEM)
6831 x = XEXP (x, 0);
6832
6eb791fc
JH
6833 if (TARGET_64BIT)
6834 {
6835 if (GET_CODE (x) != CONST
6836 || GET_CODE (XEXP (x, 0)) != UNSPEC
8ee41eaf 6837 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
4c8c0dec 6838 || GET_CODE (orig_x) != MEM)
6eb791fc
JH
6839 return orig_x;
6840 return XVECEXP (XEXP (x, 0), 0, 0);
6841 }
6842
1865dbb5 6843 if (GET_CODE (x) != PLUS
1865dbb5
JM
6844 || GET_CODE (XEXP (x, 1)) != CONST)
6845 return orig_x;
6846
ec65b2e3
JJ
6847 if (GET_CODE (XEXP (x, 0)) == REG
6848 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6849 /* %ebx + GOT/GOTOFF */
6850 y = NULL;
6851 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6852 {
6853 /* %ebx + %reg * scale + GOT/GOTOFF */
6854 y = XEXP (x, 0);
6855 if (GET_CODE (XEXP (y, 0)) == REG
6856 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6857 y = XEXP (y, 1);
6858 else if (GET_CODE (XEXP (y, 1)) == REG
6859 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6860 y = XEXP (y, 0);
6861 else
6862 return orig_x;
6863 if (GET_CODE (y) != REG
6864 && GET_CODE (y) != MULT
6865 && GET_CODE (y) != ASHIFT)
6866 return orig_x;
6867 }
6868 else
6869 return orig_x;
6870
1865dbb5
JM
6871 x = XEXP (XEXP (x, 1), 0);
6872 if (GET_CODE (x) == UNSPEC
8ee41eaf
RH
6873 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6874 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
ec65b2e3
JJ
6875 {
6876 if (y)
6877 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6878 return XVECEXP (x, 0, 0);
6879 }
1865dbb5
JM
6880
6881 if (GET_CODE (x) == PLUS
6882 && GET_CODE (XEXP (x, 0)) == UNSPEC
6883 && GET_CODE (XEXP (x, 1)) == CONST_INT
8ee41eaf
RH
6884 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6885 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6886 && GET_CODE (orig_x) != MEM)))
ec65b2e3
JJ
6887 {
6888 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6889 if (y)
6890 return gen_rtx_PLUS (Pmode, y, x);
6891 return x;
6892 }
1865dbb5
JM
6893
6894 return orig_x;
6895}
2a2ab3f9 6896\f
a269a03c 6897static void
b96a374d
AJ
6898put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6899 int fp, FILE *file)
a269a03c 6900{
a269a03c
JC
6901 const char *suffix;
6902
9a915772
JH
6903 if (mode == CCFPmode || mode == CCFPUmode)
6904 {
6905 enum rtx_code second_code, bypass_code;
6906 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6907 if (bypass_code != NIL || second_code != NIL)
b531087a 6908 abort ();
9a915772
JH
6909 code = ix86_fp_compare_code_to_integer (code);
6910 mode = CCmode;
6911 }
a269a03c
JC
6912 if (reverse)
6913 code = reverse_condition (code);
e075ae69 6914
a269a03c
JC
6915 switch (code)
6916 {
6917 case EQ:
6918 suffix = "e";
6919 break;
a269a03c
JC
6920 case NE:
6921 suffix = "ne";
6922 break;
a269a03c 6923 case GT:
7e08e190 6924 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
e075ae69
RH
6925 abort ();
6926 suffix = "g";
a269a03c 6927 break;
a269a03c 6928 case GTU:
e075ae69
RH
6929 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6930 Those same assemblers have the same but opposite losage on cmov. */
7e08e190 6931 if (mode != CCmode)
0f290768 6932 abort ();
e075ae69 6933 suffix = fp ? "nbe" : "a";
a269a03c 6934 break;
a269a03c 6935 case LT:
9076b9c1 6936 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 6937 suffix = "s";
7e08e190 6938 else if (mode == CCmode || mode == CCGCmode)
e075ae69 6939 suffix = "l";
9076b9c1 6940 else
0f290768 6941 abort ();
a269a03c 6942 break;
a269a03c 6943 case LTU:
9076b9c1 6944 if (mode != CCmode)
0f290768 6945 abort ();
a269a03c
JC
6946 suffix = "b";
6947 break;
a269a03c 6948 case GE:
9076b9c1 6949 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 6950 suffix = "ns";
7e08e190 6951 else if (mode == CCmode || mode == CCGCmode)
e075ae69 6952 suffix = "ge";
9076b9c1 6953 else
0f290768 6954 abort ();
a269a03c 6955 break;
a269a03c 6956 case GEU:
e075ae69 6957 /* ??? As above. */
7e08e190 6958 if (mode != CCmode)
0f290768 6959 abort ();
7e08e190 6960 suffix = fp ? "nb" : "ae";
a269a03c 6961 break;
a269a03c 6962 case LE:
7e08e190 6963 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
e075ae69
RH
6964 abort ();
6965 suffix = "le";
a269a03c 6966 break;
a269a03c 6967 case LEU:
9076b9c1
JH
6968 if (mode != CCmode)
6969 abort ();
7e08e190 6970 suffix = "be";
a269a03c 6971 break;
3a3677ff 6972 case UNORDERED:
9e7adcb3 6973 suffix = fp ? "u" : "p";
3a3677ff
RH
6974 break;
6975 case ORDERED:
9e7adcb3 6976 suffix = fp ? "nu" : "np";
3a3677ff 6977 break;
a269a03c
JC
6978 default:
6979 abort ();
6980 }
6981 fputs (suffix, file);
6982}
6983
e075ae69 6984void
b96a374d 6985print_reg (rtx x, int code, FILE *file)
e5cb57e8 6986{
9a623a65
ZW
6987 /* Code -1 indicates we are called from print_rtx, and it is not
6988 an error for a virtual register to appear here. */
6989 if (code == -1)
6990 code = 0;
6991 else if (REGNO (x) == ARG_POINTER_REGNUM
6992 || REGNO (x) == FRAME_POINTER_REGNUM
6993 || REGNO (x) == FLAGS_REG
6994 || REGNO (x) == FPSR_REG)
480feac0
ZW
6995 abort ();
6996
5bf0ebab 6997 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
e075ae69
RH
6998 putc ('%', file);
6999
ef6257cd 7000 if (code == 'w' || MMX_REG_P (x))
e075ae69
RH
7001 code = 2;
7002 else if (code == 'b')
7003 code = 1;
7004 else if (code == 'k')
7005 code = 4;
3f3f2124
JH
7006 else if (code == 'q')
7007 code = 8;
e075ae69
RH
7008 else if (code == 'y')
7009 code = 3;
7010 else if (code == 'h')
7011 code = 0;
7012 else
7013 code = GET_MODE_SIZE (GET_MODE (x));
e9a25f70 7014
3f3f2124
JH
7015 /* Irritatingly, AMD extended registers use different naming convention
7016 from the normal registers. */
7017 if (REX_INT_REG_P (x))
7018 {
885a70fd
JH
7019 if (!TARGET_64BIT)
7020 abort ();
3f3f2124
JH
7021 switch (code)
7022 {
ef6257cd 7023 case 0:
c725bd79 7024 error ("extended registers have no high halves");
3f3f2124
JH
7025 break;
7026 case 1:
7027 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7028 break;
7029 case 2:
7030 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7031 break;
7032 case 4:
7033 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7034 break;
7035 case 8:
7036 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7037 break;
7038 default:
c725bd79 7039 error ("unsupported operand size for extended register");
3f3f2124
JH
7040 break;
7041 }
7042 return;
7043 }
e075ae69
RH
7044 switch (code)
7045 {
7046 case 3:
7047 if (STACK_TOP_P (x))
7048 {
7049 fputs ("st(0)", file);
7050 break;
7051 }
7052 /* FALLTHRU */
e075ae69 7053 case 8:
3f3f2124 7054 case 4:
e075ae69 7055 case 12:
446988df 7056 if (! ANY_FP_REG_P (x))
885a70fd 7057 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
e075ae69 7058 /* FALLTHRU */
a7180f70 7059 case 16:
e075ae69 7060 case 2:
d4c32b6f 7061 normal:
e075ae69
RH
7062 fputs (hi_reg_name[REGNO (x)], file);
7063 break;
7064 case 1:
d4c32b6f
RH
7065 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7066 goto normal;
e075ae69
RH
7067 fputs (qi_reg_name[REGNO (x)], file);
7068 break;
7069 case 0:
d4c32b6f
RH
7070 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7071 goto normal;
e075ae69
RH
7072 fputs (qi_high_reg_name[REGNO (x)], file);
7073 break;
7074 default:
7075 abort ();
fe25fea3 7076 }
e5cb57e8
SC
7077}
7078
f996902d
RH
7079/* Locate some local-dynamic symbol still in use by this function
7080 so that we can print its name in some tls_local_dynamic_base
7081 pattern. */
7082
7083static const char *
b96a374d 7084get_some_local_dynamic_name (void)
f996902d
RH
7085{
7086 rtx insn;
7087
7088 if (cfun->machine->some_ld_name)
7089 return cfun->machine->some_ld_name;
7090
7091 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7092 if (INSN_P (insn)
7093 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7094 return cfun->machine->some_ld_name;
7095
7096 abort ();
7097}
7098
7099static int
b96a374d 7100get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
f996902d
RH
7101{
7102 rtx x = *px;
7103
7104 if (GET_CODE (x) == SYMBOL_REF
7105 && local_dynamic_symbolic_operand (x, Pmode))
7106 {
7107 cfun->machine->some_ld_name = XSTR (x, 0);
7108 return 1;
7109 }
7110
7111 return 0;
7112}
7113
2a2ab3f9 7114/* Meaning of CODE:
fe25fea3 7115 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
e5cb57e8 7116 C -- print opcode suffix for set/cmov insn.
fe25fea3 7117 c -- like C, but print reversed condition
ef6257cd 7118 F,f -- likewise, but for floating-point.
f6f5dff2
RO
7119 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7120 otherwise nothing
2a2ab3f9
JVA
7121 R -- print the prefix for register names.
7122 z -- print the opcode suffix for the size of the current operand.
7123 * -- print a star (in certain assembler syntax)
fb204271 7124 A -- print an absolute memory reference.
2a2ab3f9 7125 w -- print the operand as if it's a "word" (HImode) even if it isn't.
2d49677f
SC
7126 s -- print a shift double count, followed by the assemblers argument
7127 delimiter.
fe25fea3
SC
7128 b -- print the QImode name of the register for the indicated operand.
7129 %b0 would print %al if operands[0] is reg 0.
7130 w -- likewise, print the HImode name of the register.
7131 k -- likewise, print the SImode name of the register.
3f3f2124 7132 q -- likewise, print the DImode name of the register.
ef6257cd
JH
7133 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7134 y -- print "st(0)" instead of "st" as a register.
a46d1d38 7135 D -- print condition for SSE cmp instruction.
ef6257cd
JH
7136 P -- if PIC, print an @PLT suffix.
7137 X -- don't print any sort of PIC '@' suffix for a symbol.
f996902d 7138 & -- print some in-use local-dynamic symbol name.
a46d1d38 7139 */
2a2ab3f9
JVA
7140
7141void
b96a374d 7142print_operand (FILE *file, rtx x, int code)
2a2ab3f9
JVA
7143{
7144 if (code)
7145 {
7146 switch (code)
7147 {
7148 case '*':
80f33d06 7149 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9
JVA
7150 putc ('*', file);
7151 return;
7152
f996902d
RH
7153 case '&':
7154 assemble_name (file, get_some_local_dynamic_name ());
7155 return;
7156
fb204271 7157 case 'A':
80f33d06 7158 if (ASSEMBLER_DIALECT == ASM_ATT)
fb204271 7159 putc ('*', file);
80f33d06 7160 else if (ASSEMBLER_DIALECT == ASM_INTEL)
fb204271
DN
7161 {
7162 /* Intel syntax. For absolute addresses, registers should not
7163 be surrounded by braces. */
7164 if (GET_CODE (x) != REG)
7165 {
7166 putc ('[', file);
7167 PRINT_OPERAND (file, x, 0);
7168 putc (']', file);
7169 return;
7170 }
7171 }
80f33d06
GS
7172 else
7173 abort ();
fb204271
DN
7174
7175 PRINT_OPERAND (file, x, 0);
7176 return;
7177
7178
2a2ab3f9 7179 case 'L':
80f33d06 7180 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7181 putc ('l', file);
2a2ab3f9
JVA
7182 return;
7183
7184 case 'W':
80f33d06 7185 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7186 putc ('w', file);
2a2ab3f9
JVA
7187 return;
7188
7189 case 'B':
80f33d06 7190 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7191 putc ('b', file);
2a2ab3f9
JVA
7192 return;
7193
7194 case 'Q':
80f33d06 7195 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7196 putc ('l', file);
2a2ab3f9
JVA
7197 return;
7198
7199 case 'S':
80f33d06 7200 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7201 putc ('s', file);
2a2ab3f9
JVA
7202 return;
7203
5f1ec3e6 7204 case 'T':
80f33d06 7205 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7206 putc ('t', file);
5f1ec3e6
JVA
7207 return;
7208
2a2ab3f9
JVA
7209 case 'z':
7210 /* 387 opcodes don't get size suffixes if the operands are
0f290768 7211 registers. */
2a2ab3f9
JVA
7212 if (STACK_REG_P (x))
7213 return;
7214
831c4e87
KC
7215 /* Likewise if using Intel opcodes. */
7216 if (ASSEMBLER_DIALECT == ASM_INTEL)
7217 return;
7218
7219 /* This is the size of op from size of operand. */
2a2ab3f9
JVA
7220 switch (GET_MODE_SIZE (GET_MODE (x)))
7221 {
2a2ab3f9 7222 case 2:
155d8a47
JW
7223#ifdef HAVE_GAS_FILDS_FISTS
7224 putc ('s', file);
7225#endif
2a2ab3f9
JVA
7226 return;
7227
7228 case 4:
7229 if (GET_MODE (x) == SFmode)
7230 {
e075ae69 7231 putc ('s', file);
2a2ab3f9
JVA
7232 return;
7233 }
7234 else
e075ae69 7235 putc ('l', file);
2a2ab3f9
JVA
7236 return;
7237
5f1ec3e6 7238 case 12:
2b589241 7239 case 16:
e075ae69
RH
7240 putc ('t', file);
7241 return;
5f1ec3e6 7242
2a2ab3f9
JVA
7243 case 8:
7244 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
56c0e8fa
JVA
7245 {
7246#ifdef GAS_MNEMONICS
e075ae69 7247 putc ('q', file);
56c0e8fa 7248#else
e075ae69
RH
7249 putc ('l', file);
7250 putc ('l', file);
56c0e8fa
JVA
7251#endif
7252 }
e075ae69
RH
7253 else
7254 putc ('l', file);
2a2ab3f9 7255 return;
155d8a47
JW
7256
7257 default:
7258 abort ();
2a2ab3f9 7259 }
4af3895e
JVA
7260
7261 case 'b':
7262 case 'w':
7263 case 'k':
3f3f2124 7264 case 'q':
4af3895e
JVA
7265 case 'h':
7266 case 'y':
5cb6195d 7267 case 'X':
e075ae69 7268 case 'P':
4af3895e
JVA
7269 break;
7270
2d49677f
SC
7271 case 's':
7272 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7273 {
7274 PRINT_OPERAND (file, x, 0);
e075ae69 7275 putc (',', file);
2d49677f 7276 }
a269a03c
JC
7277 return;
7278
a46d1d38
JH
7279 case 'D':
7280 /* Little bit of braindamage here. The SSE compare instructions
7281 does use completely different names for the comparisons that the
7282 fp conditional moves. */
7283 switch (GET_CODE (x))
7284 {
7285 case EQ:
7286 case UNEQ:
7287 fputs ("eq", file);
7288 break;
7289 case LT:
7290 case UNLT:
7291 fputs ("lt", file);
7292 break;
7293 case LE:
7294 case UNLE:
7295 fputs ("le", file);
7296 break;
7297 case UNORDERED:
7298 fputs ("unord", file);
7299 break;
7300 case NE:
7301 case LTGT:
7302 fputs ("neq", file);
7303 break;
7304 case UNGE:
7305 case GE:
7306 fputs ("nlt", file);
7307 break;
7308 case UNGT:
7309 case GT:
7310 fputs ("nle", file);
7311 break;
7312 case ORDERED:
7313 fputs ("ord", file);
7314 break;
7315 default:
7316 abort ();
7317 break;
7318 }
7319 return;
048b1c95 7320 case 'O':
f6f5dff2 7321#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
048b1c95
JJ
7322 if (ASSEMBLER_DIALECT == ASM_ATT)
7323 {
7324 switch (GET_MODE (x))
7325 {
7326 case HImode: putc ('w', file); break;
7327 case SImode:
7328 case SFmode: putc ('l', file); break;
7329 case DImode:
7330 case DFmode: putc ('q', file); break;
7331 default: abort ();
7332 }
7333 putc ('.', file);
7334 }
7335#endif
7336 return;
1853aadd 7337 case 'C':
e075ae69 7338 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
1853aadd 7339 return;
fe25fea3 7340 case 'F':
f6f5dff2 7341#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
048b1c95
JJ
7342 if (ASSEMBLER_DIALECT == ASM_ATT)
7343 putc ('.', file);
7344#endif
e075ae69 7345 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
fe25fea3
SC
7346 return;
7347
e9a25f70 7348 /* Like above, but reverse condition */
e075ae69 7349 case 'c':
fce5a9f2 7350 /* Check to see if argument to %c is really a constant
c1d5afc4
CR
7351 and not a condition code which needs to be reversed. */
7352 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7353 {
7354 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7355 return;
7356 }
e075ae69
RH
7357 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7358 return;
fe25fea3 7359 case 'f':
f6f5dff2 7360#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
048b1c95
JJ
7361 if (ASSEMBLER_DIALECT == ASM_ATT)
7362 putc ('.', file);
7363#endif
e075ae69 7364 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
1853aadd 7365 return;
ef6257cd
JH
7366 case '+':
7367 {
7368 rtx x;
e5cb57e8 7369
ef6257cd
JH
7370 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7371 return;
a4f31c00 7372
ef6257cd
JH
7373 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7374 if (x)
7375 {
7376 int pred_val = INTVAL (XEXP (x, 0));
7377
7378 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7379 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7380 {
7381 int taken = pred_val > REG_BR_PROB_BASE / 2;
7382 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7383
7384 /* Emit hints only in the case default branch prediction
d1f87653 7385 heuristics would fail. */
ef6257cd
JH
7386 if (taken != cputaken)
7387 {
7388 /* We use 3e (DS) prefix for taken branches and
7389 2e (CS) prefix for not taken branches. */
7390 if (taken)
7391 fputs ("ds ; ", file);
7392 else
7393 fputs ("cs ; ", file);
7394 }
7395 }
7396 }
7397 return;
7398 }
4af3895e 7399 default:
a52453cc 7400 output_operand_lossage ("invalid operand code `%c'", code);
2a2ab3f9
JVA
7401 }
7402 }
e9a25f70 7403
2a2ab3f9
JVA
7404 if (GET_CODE (x) == REG)
7405 {
7406 PRINT_REG (x, code, file);
7407 }
e9a25f70 7408
2a2ab3f9
JVA
7409 else if (GET_CODE (x) == MEM)
7410 {
e075ae69 7411 /* No `byte ptr' prefix for call instructions. */
80f33d06 7412 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
2a2ab3f9 7413 {
69ddee61 7414 const char * size;
e075ae69
RH
7415 switch (GET_MODE_SIZE (GET_MODE (x)))
7416 {
7417 case 1: size = "BYTE"; break;
7418 case 2: size = "WORD"; break;
7419 case 4: size = "DWORD"; break;
7420 case 8: size = "QWORD"; break;
7421 case 12: size = "XWORD"; break;
a7180f70 7422 case 16: size = "XMMWORD"; break;
e075ae69 7423 default:
564d80f4 7424 abort ();
e075ae69 7425 }
fb204271
DN
7426
7427 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7428 if (code == 'b')
7429 size = "BYTE";
7430 else if (code == 'w')
7431 size = "WORD";
7432 else if (code == 'k')
7433 size = "DWORD";
7434
e075ae69
RH
7435 fputs (size, file);
7436 fputs (" PTR ", file);
2a2ab3f9 7437 }
e075ae69
RH
7438
7439 x = XEXP (x, 0);
0d7d98ee 7440 /* Avoid (%rip) for call operands. */
d10f5ecf 7441 if (CONSTANT_ADDRESS_P (x) && code == 'P'
0d7d98ee
JH
7442 && GET_CODE (x) != CONST_INT)
7443 output_addr_const (file, x);
c8b94768
RH
7444 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7445 output_operand_lossage ("invalid constraints for operand");
2a2ab3f9 7446 else
e075ae69 7447 output_address (x);
2a2ab3f9 7448 }
e9a25f70 7449
2a2ab3f9
JVA
7450 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7451 {
e9a25f70
JL
7452 REAL_VALUE_TYPE r;
7453 long l;
7454
5f1ec3e6
JVA
7455 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7456 REAL_VALUE_TO_TARGET_SINGLE (r, l);
e075ae69 7457
80f33d06 7458 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7459 putc ('$', file);
52267fcb 7460 fprintf (file, "0x%lx", l);
5f1ec3e6 7461 }
e9a25f70 7462
74dc3e94
RH
7463 /* These float cases don't actually occur as immediate operands. */
7464 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
5f1ec3e6 7465 {
e9a25f70
JL
7466 char dstr[30];
7467
da6eec72 7468 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
5f1ec3e6 7469 fprintf (file, "%s", dstr);
2a2ab3f9 7470 }
e9a25f70 7471
2b589241 7472 else if (GET_CODE (x) == CONST_DOUBLE
f8a1ebc6 7473 && GET_MODE (x) == XFmode)
2a2ab3f9 7474 {
e9a25f70
JL
7475 char dstr[30];
7476
da6eec72 7477 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
5f1ec3e6 7478 fprintf (file, "%s", dstr);
2a2ab3f9 7479 }
f996902d 7480
79325812 7481 else
2a2ab3f9 7482 {
4af3895e 7483 if (code != 'P')
2a2ab3f9 7484 {
695dac07 7485 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
e075ae69 7486 {
80f33d06 7487 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
7488 putc ('$', file);
7489 }
2a2ab3f9
JVA
7490 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7491 || GET_CODE (x) == LABEL_REF)
e075ae69 7492 {
80f33d06 7493 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
7494 putc ('$', file);
7495 else
7496 fputs ("OFFSET FLAT:", file);
7497 }
2a2ab3f9 7498 }
e075ae69
RH
7499 if (GET_CODE (x) == CONST_INT)
7500 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7501 else if (flag_pic)
2a2ab3f9
JVA
7502 output_pic_addr_const (file, x, code);
7503 else
7504 output_addr_const (file, x);
7505 }
7506}
7507\f
7508/* Print a memory operand whose address is ADDR. */
7509
7510void
b96a374d 7511print_operand_address (FILE *file, register rtx addr)
2a2ab3f9 7512{
e075ae69
RH
7513 struct ix86_address parts;
7514 rtx base, index, disp;
7515 int scale;
e9a25f70 7516
e075ae69
RH
7517 if (! ix86_decompose_address (addr, &parts))
7518 abort ();
e9a25f70 7519
e075ae69
RH
7520 base = parts.base;
7521 index = parts.index;
7522 disp = parts.disp;
7523 scale = parts.scale;
e9a25f70 7524
74dc3e94
RH
7525 switch (parts.seg)
7526 {
7527 case SEG_DEFAULT:
7528 break;
7529 case SEG_FS:
7530 case SEG_GS:
7531 if (USER_LABEL_PREFIX[0] == 0)
7532 putc ('%', file);
7533 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7534 break;
7535 default:
7536 abort ();
7537 }
7538
e075ae69
RH
7539 if (!base && !index)
7540 {
7541 /* Displacement only requires special attention. */
e9a25f70 7542
e075ae69 7543 if (GET_CODE (disp) == CONST_INT)
2a2ab3f9 7544 {
74dc3e94 7545 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
fb204271
DN
7546 {
7547 if (USER_LABEL_PREFIX[0] == 0)
7548 putc ('%', file);
7549 fputs ("ds:", file);
7550 }
74dc3e94 7551 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
2a2ab3f9 7552 }
e075ae69 7553 else if (flag_pic)
74dc3e94 7554 output_pic_addr_const (file, disp, 0);
e075ae69 7555 else
74dc3e94 7556 output_addr_const (file, disp);
0d7d98ee
JH
7557
7558 /* Use one byte shorter RIP relative addressing for 64bit mode. */
edfe8595 7559 if (TARGET_64BIT
74dc3e94
RH
7560 && ((GET_CODE (disp) == SYMBOL_REF
7561 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7562 || GET_CODE (disp) == LABEL_REF
7563 || (GET_CODE (disp) == CONST
7564 && GET_CODE (XEXP (disp, 0)) == PLUS
7565 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7566 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7567 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
0d7d98ee 7568 fputs ("(%rip)", file);
e075ae69
RH
7569 }
7570 else
7571 {
80f33d06 7572 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9 7573 {
e075ae69 7574 if (disp)
2a2ab3f9 7575 {
c399861d 7576 if (flag_pic)
e075ae69
RH
7577 output_pic_addr_const (file, disp, 0);
7578 else if (GET_CODE (disp) == LABEL_REF)
7579 output_asm_label (disp);
2a2ab3f9 7580 else
e075ae69 7581 output_addr_const (file, disp);
2a2ab3f9
JVA
7582 }
7583
e075ae69
RH
7584 putc ('(', file);
7585 if (base)
7586 PRINT_REG (base, 0, file);
7587 if (index)
2a2ab3f9 7588 {
e075ae69
RH
7589 putc (',', file);
7590 PRINT_REG (index, 0, file);
7591 if (scale != 1)
7592 fprintf (file, ",%d", scale);
2a2ab3f9 7593 }
e075ae69 7594 putc (')', file);
2a2ab3f9 7595 }
2a2ab3f9
JVA
7596 else
7597 {
e075ae69 7598 rtx offset = NULL_RTX;
e9a25f70 7599
e075ae69
RH
7600 if (disp)
7601 {
7602 /* Pull out the offset of a symbol; print any symbol itself. */
7603 if (GET_CODE (disp) == CONST
7604 && GET_CODE (XEXP (disp, 0)) == PLUS
7605 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7606 {
7607 offset = XEXP (XEXP (disp, 0), 1);
7608 disp = gen_rtx_CONST (VOIDmode,
7609 XEXP (XEXP (disp, 0), 0));
7610 }
ce193852 7611
e075ae69
RH
7612 if (flag_pic)
7613 output_pic_addr_const (file, disp, 0);
7614 else if (GET_CODE (disp) == LABEL_REF)
7615 output_asm_label (disp);
7616 else if (GET_CODE (disp) == CONST_INT)
7617 offset = disp;
7618 else
7619 output_addr_const (file, disp);
7620 }
e9a25f70 7621
e075ae69
RH
7622 putc ('[', file);
7623 if (base)
a8620236 7624 {
e075ae69
RH
7625 PRINT_REG (base, 0, file);
7626 if (offset)
7627 {
7628 if (INTVAL (offset) >= 0)
7629 putc ('+', file);
7630 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7631 }
a8620236 7632 }
e075ae69
RH
7633 else if (offset)
7634 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
2247a58c 7635 else
e075ae69 7636 putc ('0', file);
e9a25f70 7637
e075ae69
RH
7638 if (index)
7639 {
7640 putc ('+', file);
7641 PRINT_REG (index, 0, file);
7642 if (scale != 1)
7643 fprintf (file, "*%d", scale);
7644 }
7645 putc (']', file);
7646 }
2a2ab3f9
JVA
7647 }
7648}
f996902d
RH
7649
7650bool
b96a374d 7651output_addr_const_extra (FILE *file, rtx x)
f996902d
RH
7652{
7653 rtx op;
7654
7655 if (GET_CODE (x) != UNSPEC)
7656 return false;
7657
7658 op = XVECEXP (x, 0, 0);
7659 switch (XINT (x, 1))
7660 {
7661 case UNSPEC_GOTTPOFF:
7662 output_addr_const (file, op);
dea73790 7663 /* FIXME: This might be @TPOFF in Sun ld. */
f996902d
RH
7664 fputs ("@GOTTPOFF", file);
7665 break;
7666 case UNSPEC_TPOFF:
7667 output_addr_const (file, op);
7668 fputs ("@TPOFF", file);
7669 break;
7670 case UNSPEC_NTPOFF:
7671 output_addr_const (file, op);
75d38379
JJ
7672 if (TARGET_64BIT)
7673 fputs ("@TPOFF", file);
7674 else
7675 fputs ("@NTPOFF", file);
f996902d
RH
7676 break;
7677 case UNSPEC_DTPOFF:
7678 output_addr_const (file, op);
7679 fputs ("@DTPOFF", file);
7680 break;
dea73790
JJ
7681 case UNSPEC_GOTNTPOFF:
7682 output_addr_const (file, op);
75d38379
JJ
7683 if (TARGET_64BIT)
7684 fputs ("@GOTTPOFF(%rip)", file);
7685 else
7686 fputs ("@GOTNTPOFF", file);
dea73790
JJ
7687 break;
7688 case UNSPEC_INDNTPOFF:
7689 output_addr_const (file, op);
7690 fputs ("@INDNTPOFF", file);
7691 break;
f996902d
RH
7692
7693 default:
7694 return false;
7695 }
7696
7697 return true;
7698}
2a2ab3f9
JVA
7699\f
7700/* Split one or more DImode RTL references into pairs of SImode
7701 references. The RTL can be REG, offsettable MEM, integer constant, or
7702 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7703 split and "num" is its length. lo_half and hi_half are output arrays
0f290768 7704 that parallel "operands". */
2a2ab3f9
JVA
7705
7706void
b96a374d 7707split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
2a2ab3f9
JVA
7708{
7709 while (num--)
7710 {
57dbca5e 7711 rtx op = operands[num];
b932f770
JH
7712
7713 /* simplify_subreg refuse to split volatile memory addresses,
7714 but we still have to handle it. */
7715 if (GET_CODE (op) == MEM)
2a2ab3f9 7716 {
f4ef873c 7717 lo_half[num] = adjust_address (op, SImode, 0);
b72f00af 7718 hi_half[num] = adjust_address (op, SImode, 4);
2a2ab3f9
JVA
7719 }
7720 else
b932f770 7721 {
38ca929b
JH
7722 lo_half[num] = simplify_gen_subreg (SImode, op,
7723 GET_MODE (op) == VOIDmode
7724 ? DImode : GET_MODE (op), 0);
7725 hi_half[num] = simplify_gen_subreg (SImode, op,
7726 GET_MODE (op) == VOIDmode
7727 ? DImode : GET_MODE (op), 4);
b932f770 7728 }
2a2ab3f9
JVA
7729 }
7730}
44cf5b6a
JH
7731/* Split one or more TImode RTL references into pairs of SImode
7732 references. The RTL can be REG, offsettable MEM, integer constant, or
7733 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7734 split and "num" is its length. lo_half and hi_half are output arrays
7735 that parallel "operands". */
7736
7737void
b96a374d 7738split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
44cf5b6a
JH
7739{
7740 while (num--)
7741 {
7742 rtx op = operands[num];
b932f770
JH
7743
7744 /* simplify_subreg refuse to split volatile memory addresses, but we
7745 still have to handle it. */
7746 if (GET_CODE (op) == MEM)
44cf5b6a
JH
7747 {
7748 lo_half[num] = adjust_address (op, DImode, 0);
7749 hi_half[num] = adjust_address (op, DImode, 8);
7750 }
7751 else
b932f770
JH
7752 {
7753 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7754 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7755 }
44cf5b6a
JH
7756 }
7757}
2a2ab3f9 7758\f
2a2ab3f9
JVA
7759/* Output code to perform a 387 binary operation in INSN, one of PLUS,
7760 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7761 is the expression of the binary operation. The output may either be
7762 emitted here, or returned to the caller, like all output_* functions.
7763
7764 There is no guarantee that the operands are the same mode, as they
0f290768 7765 might be within FLOAT or FLOAT_EXTEND expressions. */
2a2ab3f9 7766
e3c2afab
AM
7767#ifndef SYSV386_COMPAT
7768/* Set to 1 for compatibility with brain-damaged assemblers. No-one
7769 wants to fix the assemblers because that causes incompatibility
7770 with gcc. No-one wants to fix gcc because that causes
7771 incompatibility with assemblers... You can use the option of
7772 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7773#define SYSV386_COMPAT 1
7774#endif
7775
69ddee61 7776const char *
b96a374d 7777output_387_binary_op (rtx insn, rtx *operands)
2a2ab3f9 7778{
e3c2afab 7779 static char buf[30];
69ddee61 7780 const char *p;
1deaa899
JH
7781 const char *ssep;
7782 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
2a2ab3f9 7783
e3c2afab
AM
7784#ifdef ENABLE_CHECKING
7785 /* Even if we do not want to check the inputs, this documents input
7786 constraints. Which helps in understanding the following code. */
7787 if (STACK_REG_P (operands[0])
7788 && ((REG_P (operands[1])
7789 && REGNO (operands[0]) == REGNO (operands[1])
7790 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7791 || (REG_P (operands[2])
7792 && REGNO (operands[0]) == REGNO (operands[2])
7793 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7794 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7795 ; /* ok */
1deaa899 7796 else if (!is_sse)
e3c2afab
AM
7797 abort ();
7798#endif
7799
2a2ab3f9
JVA
7800 switch (GET_CODE (operands[3]))
7801 {
7802 case PLUS:
e075ae69
RH
7803 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7804 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7805 p = "fiadd";
7806 else
7807 p = "fadd";
1deaa899 7808 ssep = "add";
2a2ab3f9
JVA
7809 break;
7810
7811 case MINUS:
e075ae69
RH
7812 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7813 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7814 p = "fisub";
7815 else
7816 p = "fsub";
1deaa899 7817 ssep = "sub";
2a2ab3f9
JVA
7818 break;
7819
7820 case MULT:
e075ae69
RH
7821 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7822 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7823 p = "fimul";
7824 else
7825 p = "fmul";
1deaa899 7826 ssep = "mul";
2a2ab3f9
JVA
7827 break;
7828
7829 case DIV:
e075ae69
RH
7830 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7831 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7832 p = "fidiv";
7833 else
7834 p = "fdiv";
1deaa899 7835 ssep = "div";
2a2ab3f9
JVA
7836 break;
7837
7838 default:
7839 abort ();
7840 }
7841
1deaa899
JH
7842 if (is_sse)
7843 {
7844 strcpy (buf, ssep);
7845 if (GET_MODE (operands[0]) == SFmode)
7846 strcat (buf, "ss\t{%2, %0|%0, %2}");
7847 else
7848 strcat (buf, "sd\t{%2, %0|%0, %2}");
7849 return buf;
7850 }
e075ae69 7851 strcpy (buf, p);
2a2ab3f9
JVA
7852
7853 switch (GET_CODE (operands[3]))
7854 {
7855 case MULT:
7856 case PLUS:
7857 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7858 {
e3c2afab 7859 rtx temp = operands[2];
2a2ab3f9
JVA
7860 operands[2] = operands[1];
7861 operands[1] = temp;
7862 }
7863
e3c2afab
AM
7864 /* know operands[0] == operands[1]. */
7865
2a2ab3f9 7866 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
7867 {
7868 p = "%z2\t%2";
7869 break;
7870 }
2a2ab3f9
JVA
7871
7872 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63
JL
7873 {
7874 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
7875 /* How is it that we are storing to a dead operand[2]?
7876 Well, presumably operands[1] is dead too. We can't
7877 store the result to st(0) as st(0) gets popped on this
7878 instruction. Instead store to operands[2] (which I
7879 think has to be st(1)). st(1) will be popped later.
7880 gcc <= 2.8.1 didn't have this check and generated
7881 assembly code that the Unixware assembler rejected. */
7882 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 7883 else
e3c2afab 7884 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
e075ae69 7885 break;
6b28fd63 7886 }
2a2ab3f9
JVA
7887
7888 if (STACK_TOP_P (operands[0]))
e3c2afab 7889 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 7890 else
e3c2afab 7891 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
e075ae69 7892 break;
2a2ab3f9
JVA
7893
7894 case MINUS:
7895 case DIV:
7896 if (GET_CODE (operands[1]) == MEM)
e075ae69
RH
7897 {
7898 p = "r%z1\t%1";
7899 break;
7900 }
2a2ab3f9
JVA
7901
7902 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
7903 {
7904 p = "%z2\t%2";
7905 break;
7906 }
2a2ab3f9 7907
2a2ab3f9 7908 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63 7909 {
e3c2afab
AM
7910#if SYSV386_COMPAT
7911 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7912 derived assemblers, confusingly reverse the direction of
7913 the operation for fsub{r} and fdiv{r} when the
7914 destination register is not st(0). The Intel assembler
7915 doesn't have this brain damage. Read !SYSV386_COMPAT to
7916 figure out what the hardware really does. */
7917 if (STACK_TOP_P (operands[0]))
7918 p = "{p\t%0, %2|rp\t%2, %0}";
7919 else
7920 p = "{rp\t%2, %0|p\t%0, %2}";
7921#else
6b28fd63 7922 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
7923 /* As above for fmul/fadd, we can't store to st(0). */
7924 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 7925 else
e3c2afab
AM
7926 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7927#endif
e075ae69 7928 break;
6b28fd63 7929 }
2a2ab3f9
JVA
7930
7931 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6b28fd63 7932 {
e3c2afab 7933#if SYSV386_COMPAT
6b28fd63 7934 if (STACK_TOP_P (operands[0]))
e3c2afab 7935 p = "{rp\t%0, %1|p\t%1, %0}";
6b28fd63 7936 else
e3c2afab
AM
7937 p = "{p\t%1, %0|rp\t%0, %1}";
7938#else
7939 if (STACK_TOP_P (operands[0]))
7940 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7941 else
7942 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7943#endif
e075ae69 7944 break;
6b28fd63 7945 }
2a2ab3f9
JVA
7946
7947 if (STACK_TOP_P (operands[0]))
7948 {
7949 if (STACK_TOP_P (operands[1]))
e3c2afab 7950 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 7951 else
e3c2afab 7952 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
e075ae69 7953 break;
2a2ab3f9
JVA
7954 }
7955 else if (STACK_TOP_P (operands[1]))
e3c2afab
AM
7956 {
7957#if SYSV386_COMPAT
7958 p = "{\t%1, %0|r\t%0, %1}";
7959#else
7960 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7961#endif
7962 }
2a2ab3f9 7963 else
e3c2afab
AM
7964 {
7965#if SYSV386_COMPAT
7966 p = "{r\t%2, %0|\t%0, %2}";
7967#else
7968 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7969#endif
7970 }
e075ae69 7971 break;
2a2ab3f9
JVA
7972
7973 default:
7974 abort ();
7975 }
e075ae69
RH
7976
7977 strcat (buf, p);
7978 return buf;
2a2ab3f9 7979}
e075ae69 7980
a4f31c00 7981/* Output code to initialize control word copies used by
7a2e09f4
JH
7982 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7983 is set to control word rounding downwards. */
7984void
b96a374d 7985emit_i387_cw_initialization (rtx normal, rtx round_down)
7a2e09f4
JH
7986{
7987 rtx reg = gen_reg_rtx (HImode);
7988
7989 emit_insn (gen_x86_fnstcw_1 (normal));
7990 emit_move_insn (reg, normal);
7991 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7992 && !TARGET_64BIT)
7993 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7994 else
7995 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7996 emit_move_insn (round_down, reg);
7997}
7998
2a2ab3f9 7999/* Output code for INSN to convert a float to a signed int. OPERANDS
46d21d2c 8000 are the insn operands. The output may be [HSD]Imode and the input
e075ae69 8001 operand may be [SDX]Fmode. */
2a2ab3f9 8002
69ddee61 8003const char *
b96a374d 8004output_fix_trunc (rtx insn, rtx *operands)
2a2ab3f9
JVA
8005{
8006 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
e075ae69 8007 int dimode_p = GET_MODE (operands[0]) == DImode;
2a2ab3f9 8008
e075ae69
RH
8009 /* Jump through a hoop or two for DImode, since the hardware has no
8010 non-popping instruction. We used to do this a different way, but
8011 that was somewhat fragile and broke with post-reload splitters. */
a05924f9
JH
8012 if (dimode_p && !stack_top_dies)
8013 output_asm_insn ("fld\t%y1", operands);
e075ae69 8014
7a2e09f4 8015 if (!STACK_TOP_P (operands[1]))
10195bd8
JW
8016 abort ();
8017
e075ae69 8018 if (GET_CODE (operands[0]) != MEM)
7a2e09f4 8019 abort ();
e9a25f70 8020
7a2e09f4 8021 output_asm_insn ("fldcw\t%3", operands);
e075ae69 8022 if (stack_top_dies || dimode_p)
7a2e09f4 8023 output_asm_insn ("fistp%z0\t%0", operands);
10195bd8 8024 else
7a2e09f4 8025 output_asm_insn ("fist%z0\t%0", operands);
e075ae69 8026 output_asm_insn ("fldcw\t%2", operands);
10195bd8 8027
e075ae69 8028 return "";
2a2ab3f9 8029}
cda749b1 8030
e075ae69
RH
8031/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8032 should be used and 2 when fnstsw should be used. UNORDERED_P is true
8033 when fucom should be used. */
8034
69ddee61 8035const char *
b96a374d 8036output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
cda749b1 8037{
e075ae69
RH
8038 int stack_top_dies;
8039 rtx cmp_op0 = operands[0];
8040 rtx cmp_op1 = operands[1];
0644b628 8041 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
e075ae69
RH
8042
8043 if (eflags_p == 2)
8044 {
8045 cmp_op0 = cmp_op1;
8046 cmp_op1 = operands[2];
8047 }
0644b628
JH
8048 if (is_sse)
8049 {
8050 if (GET_MODE (operands[0]) == SFmode)
8051 if (unordered_p)
8052 return "ucomiss\t{%1, %0|%0, %1}";
8053 else
a5cf80f0 8054 return "comiss\t{%1, %0|%0, %1}";
0644b628
JH
8055 else
8056 if (unordered_p)
8057 return "ucomisd\t{%1, %0|%0, %1}";
8058 else
a5cf80f0 8059 return "comisd\t{%1, %0|%0, %1}";
0644b628 8060 }
cda749b1 8061
e075ae69 8062 if (! STACK_TOP_P (cmp_op0))
cda749b1
JW
8063 abort ();
8064
e075ae69 8065 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
cda749b1 8066
e075ae69
RH
8067 if (STACK_REG_P (cmp_op1)
8068 && stack_top_dies
8069 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8070 && REGNO (cmp_op1) != FIRST_STACK_REG)
cda749b1 8071 {
e075ae69
RH
8072 /* If both the top of the 387 stack dies, and the other operand
8073 is also a stack register that dies, then this must be a
8074 `fcompp' float compare */
8075
8076 if (eflags_p == 1)
8077 {
8078 /* There is no double popping fcomi variant. Fortunately,
8079 eflags is immune from the fstp's cc clobbering. */
8080 if (unordered_p)
8081 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8082 else
8083 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8084 return "fstp\t%y0";
8085 }
8086 else
cda749b1 8087 {
e075ae69
RH
8088 if (eflags_p == 2)
8089 {
8090 if (unordered_p)
8091 return "fucompp\n\tfnstsw\t%0";
8092 else
8093 return "fcompp\n\tfnstsw\t%0";
8094 }
cda749b1
JW
8095 else
8096 {
e075ae69
RH
8097 if (unordered_p)
8098 return "fucompp";
8099 else
8100 return "fcompp";
cda749b1
JW
8101 }
8102 }
cda749b1
JW
8103 }
8104 else
8105 {
e075ae69 8106 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
cda749b1 8107
0f290768 8108 static const char * const alt[24] =
e075ae69
RH
8109 {
8110 "fcom%z1\t%y1",
8111 "fcomp%z1\t%y1",
8112 "fucom%z1\t%y1",
8113 "fucomp%z1\t%y1",
0f290768 8114
e075ae69
RH
8115 "ficom%z1\t%y1",
8116 "ficomp%z1\t%y1",
8117 NULL,
8118 NULL,
8119
8120 "fcomi\t{%y1, %0|%0, %y1}",
8121 "fcomip\t{%y1, %0|%0, %y1}",
8122 "fucomi\t{%y1, %0|%0, %y1}",
8123 "fucomip\t{%y1, %0|%0, %y1}",
8124
8125 NULL,
8126 NULL,
8127 NULL,
8128 NULL,
8129
8130 "fcom%z2\t%y2\n\tfnstsw\t%0",
8131 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8132 "fucom%z2\t%y2\n\tfnstsw\t%0",
8133 "fucomp%z2\t%y2\n\tfnstsw\t%0",
0f290768 8134
e075ae69
RH
8135 "ficom%z2\t%y2\n\tfnstsw\t%0",
8136 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8137 NULL,
8138 NULL
8139 };
8140
8141 int mask;
69ddee61 8142 const char *ret;
e075ae69
RH
8143
8144 mask = eflags_p << 3;
8145 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
8146 mask |= unordered_p << 1;
8147 mask |= stack_top_dies;
8148
8149 if (mask >= 24)
8150 abort ();
8151 ret = alt[mask];
8152 if (ret == NULL)
8153 abort ();
cda749b1 8154
e075ae69 8155 return ret;
cda749b1
JW
8156 }
8157}
2a2ab3f9 8158
f88c65f7 8159void
b96a374d 8160ix86_output_addr_vec_elt (FILE *file, int value)
f88c65f7
RH
8161{
8162 const char *directive = ASM_LONG;
8163
8164 if (TARGET_64BIT)
8165 {
8166#ifdef ASM_QUAD
8167 directive = ASM_QUAD;
8168#else
8169 abort ();
8170#endif
8171 }
8172
8173 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8174}
8175
8176void
b96a374d 8177ix86_output_addr_diff_elt (FILE *file, int value, int rel)
f88c65f7
RH
8178{
8179 if (TARGET_64BIT)
74411039 8180 fprintf (file, "%s%s%d-%s%d\n",
f88c65f7
RH
8181 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8182 else if (HAVE_AS_GOTOFF_IN_DATA)
8183 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
b069de3b
SS
8184#if TARGET_MACHO
8185 else if (TARGET_MACHO)
86ecdfb6
AP
8186 {
8187 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8188 machopic_output_function_base_name (file);
8189 fprintf(file, "\n");
8190 }
b069de3b 8191#endif
f88c65f7 8192 else
5fc0e5df
KW
8193 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8194 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
f88c65f7 8195}
32b5b1aa 8196\f
a8bac9ab
RH
8197/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8198 for the target. */
8199
8200void
b96a374d 8201ix86_expand_clear (rtx dest)
a8bac9ab
RH
8202{
8203 rtx tmp;
8204
8205 /* We play register width games, which are only valid after reload. */
8206 if (!reload_completed)
8207 abort ();
8208
8209 /* Avoid HImode and its attendant prefix byte. */
8210 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8211 dest = gen_rtx_REG (SImode, REGNO (dest));
8212
8213 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8214
8215 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8216 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8217 {
8218 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8219 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8220 }
8221
8222 emit_insn (tmp);
8223}
8224
f996902d
RH
8225/* X is an unchanging MEM. If it is a constant pool reference, return
8226 the constant pool rtx, else NULL. */
8227
8228static rtx
b96a374d 8229maybe_get_pool_constant (rtx x)
f996902d 8230{
69bd9368 8231 x = ix86_delegitimize_address (XEXP (x, 0));
f996902d
RH
8232
8233 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8234 return get_pool_constant (x);
8235
8236 return NULL_RTX;
8237}
8238
79325812 8239void
b96a374d 8240ix86_expand_move (enum machine_mode mode, rtx operands[])
32b5b1aa 8241{
e075ae69 8242 int strict = (reload_in_progress || reload_completed);
74dc3e94
RH
8243 rtx op0, op1;
8244 enum tls_model model;
f996902d
RH
8245
8246 op0 = operands[0];
8247 op1 = operands[1];
8248
74dc3e94
RH
8249 model = tls_symbolic_operand (op1, Pmode);
8250 if (model)
f996902d 8251 {
74dc3e94
RH
8252 op1 = legitimize_tls_address (op1, model, true);
8253 op1 = force_operand (op1, op0);
8254 if (op1 == op0)
8255 return;
f996902d 8256 }
74dc3e94
RH
8257
8258 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
f996902d 8259 {
b069de3b
SS
8260#if TARGET_MACHO
8261 if (MACHOPIC_PURE)
8262 {
8263 rtx temp = ((reload_in_progress
8264 || ((op0 && GET_CODE (op0) == REG)
8265 && mode == Pmode))
8266 ? op0 : gen_reg_rtx (Pmode));
8267 op1 = machopic_indirect_data_reference (op1, temp);
8268 op1 = machopic_legitimize_pic_address (op1, mode,
8269 temp == op1 ? 0 : temp);
8270 }
74dc3e94
RH
8271 else if (MACHOPIC_INDIRECT)
8272 op1 = machopic_indirect_data_reference (op1, 0);
8273 if (op0 == op1)
8274 return;
8275#else
f996902d
RH
8276 if (GET_CODE (op0) == MEM)
8277 op1 = force_reg (Pmode, op1);
e075ae69 8278 else
32b5b1aa 8279 {
f996902d 8280 rtx temp = op0;
e075ae69
RH
8281 if (GET_CODE (temp) != REG)
8282 temp = gen_reg_rtx (Pmode);
f996902d
RH
8283 temp = legitimize_pic_address (op1, temp);
8284 if (temp == op0)
e075ae69 8285 return;
f996902d 8286 op1 = temp;
32b5b1aa 8287 }
74dc3e94 8288#endif /* TARGET_MACHO */
e075ae69
RH
8289 }
8290 else
8291 {
f996902d 8292 if (GET_CODE (op0) == MEM
44cf5b6a 8293 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
f996902d
RH
8294 || !push_operand (op0, mode))
8295 && GET_CODE (op1) == MEM)
8296 op1 = force_reg (mode, op1);
e9a25f70 8297
f996902d
RH
8298 if (push_operand (op0, mode)
8299 && ! general_no_elim_operand (op1, mode))
8300 op1 = copy_to_mode_reg (mode, op1);
2c5a510c 8301
44cf5b6a
JH
8302 /* Force large constants in 64bit compilation into register
8303 to get them CSEed. */
8304 if (TARGET_64BIT && mode == DImode
f996902d
RH
8305 && immediate_operand (op1, mode)
8306 && !x86_64_zero_extended_value (op1)
8307 && !register_operand (op0, mode)
44cf5b6a 8308 && optimize && !reload_completed && !reload_in_progress)
f996902d 8309 op1 = copy_to_mode_reg (mode, op1);
44cf5b6a 8310
e075ae69 8311 if (FLOAT_MODE_P (mode))
32b5b1aa 8312 {
d7a29404
JH
8313 /* If we are loading a floating point constant to a register,
8314 force the value to memory now, since we'll get better code
8315 out the back end. */
e075ae69
RH
8316
8317 if (strict)
8318 ;
ddc67067
MM
8319 else if (GET_CODE (op1) == CONST_DOUBLE)
8320 {
8321 op1 = validize_mem (force_const_mem (mode, op1));
8322 if (!register_operand (op0, mode))
8323 {
8324 rtx temp = gen_reg_rtx (mode);
8325 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8326 emit_move_insn (op0, temp);
8327 return;
8328 }
8329 }
32b5b1aa 8330 }
32b5b1aa 8331 }
e9a25f70 8332
74dc3e94 8333 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
e075ae69 8334}
e9a25f70 8335
e37af218 8336void
b96a374d 8337ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
e37af218
RH
8338{
8339 /* Force constants other than zero into memory. We do not know how
8340 the instructions used to build constants modify the upper 64 bits
8341 of the register, once we have that information we may be able
8342 to handle some of them more efficiently. */
8343 if ((reload_in_progress | reload_completed) == 0
8344 && register_operand (operands[0], mode)
fdc4b40b 8345 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
2b28d405 8346 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
e37af218
RH
8347
8348 /* Make operand1 a register if it isn't already. */
f8ca7923 8349 if (!no_new_pseudos
e37af218 8350 && !register_operand (operands[0], mode)
b105d6da 8351 && !register_operand (operands[1], mode))
e37af218 8352 {
59bef189 8353 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
e37af218
RH
8354 emit_move_insn (operands[0], temp);
8355 return;
8356 }
8357
8358 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
fce5a9f2 8359}
e37af218 8360
e075ae69
RH
8361/* Attempt to expand a binary operator. Make the expansion closer to the
8362 actual machine, then just general_operand, which will allow 3 separate
9d81fc27 8363 memory references (one output, two input) in a single insn. */
e9a25f70 8364
e075ae69 8365void
b96a374d
AJ
8366ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8367 rtx operands[])
e075ae69
RH
8368{
8369 int matching_memory;
8370 rtx src1, src2, dst, op, clob;
8371
8372 dst = operands[0];
8373 src1 = operands[1];
8374 src2 = operands[2];
8375
8376 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8377 if (GET_RTX_CLASS (code) == 'c'
8378 && (rtx_equal_p (dst, src2)
8379 || immediate_operand (src1, mode)))
8380 {
8381 rtx temp = src1;
8382 src1 = src2;
8383 src2 = temp;
32b5b1aa 8384 }
e9a25f70 8385
e075ae69
RH
8386 /* If the destination is memory, and we do not have matching source
8387 operands, do things in registers. */
8388 matching_memory = 0;
8389 if (GET_CODE (dst) == MEM)
32b5b1aa 8390 {
e075ae69
RH
8391 if (rtx_equal_p (dst, src1))
8392 matching_memory = 1;
8393 else if (GET_RTX_CLASS (code) == 'c'
8394 && rtx_equal_p (dst, src2))
8395 matching_memory = 2;
8396 else
8397 dst = gen_reg_rtx (mode);
8398 }
0f290768 8399
e075ae69
RH
8400 /* Both source operands cannot be in memory. */
8401 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8402 {
8403 if (matching_memory != 2)
8404 src2 = force_reg (mode, src2);
8405 else
8406 src1 = force_reg (mode, src1);
32b5b1aa 8407 }
e9a25f70 8408
06a964de
JH
8409 /* If the operation is not commutable, source 1 cannot be a constant
8410 or non-matching memory. */
0f290768 8411 if ((CONSTANT_P (src1)
06a964de
JH
8412 || (!matching_memory && GET_CODE (src1) == MEM))
8413 && GET_RTX_CLASS (code) != 'c')
e075ae69 8414 src1 = force_reg (mode, src1);
0f290768 8415
e075ae69 8416 /* If optimizing, copy to regs to improve CSE */
fe577e58 8417 if (optimize && ! no_new_pseudos)
32b5b1aa 8418 {
e075ae69
RH
8419 if (GET_CODE (dst) == MEM)
8420 dst = gen_reg_rtx (mode);
8421 if (GET_CODE (src1) == MEM)
8422 src1 = force_reg (mode, src1);
8423 if (GET_CODE (src2) == MEM)
8424 src2 = force_reg (mode, src2);
32b5b1aa 8425 }
e9a25f70 8426
e075ae69
RH
8427 /* Emit the instruction. */
8428
8429 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8430 if (reload_in_progress)
8431 {
8432 /* Reload doesn't know about the flags register, and doesn't know that
8433 it doesn't want to clobber it. We can only do this with PLUS. */
8434 if (code != PLUS)
8435 abort ();
8436 emit_insn (op);
8437 }
8438 else
32b5b1aa 8439 {
e075ae69
RH
8440 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8441 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
32b5b1aa 8442 }
e9a25f70 8443
e075ae69
RH
8444 /* Fix up the destination if needed. */
8445 if (dst != operands[0])
8446 emit_move_insn (operands[0], dst);
8447}
8448
8449/* Return TRUE or FALSE depending on whether the binary operator meets the
8450 appropriate constraints. */
8451
8452int
b96a374d
AJ
8453ix86_binary_operator_ok (enum rtx_code code,
8454 enum machine_mode mode ATTRIBUTE_UNUSED,
8455 rtx operands[3])
e075ae69
RH
8456{
8457 /* Both source operands cannot be in memory. */
8458 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8459 return 0;
8460 /* If the operation is not commutable, source 1 cannot be a constant. */
8461 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8462 return 0;
8463 /* If the destination is memory, we must have a matching source operand. */
8464 if (GET_CODE (operands[0]) == MEM
8465 && ! (rtx_equal_p (operands[0], operands[1])
8466 || (GET_RTX_CLASS (code) == 'c'
8467 && rtx_equal_p (operands[0], operands[2]))))
8468 return 0;
06a964de 8469 /* If the operation is not commutable and the source 1 is memory, we must
d6a7951f 8470 have a matching destination. */
06a964de
JH
8471 if (GET_CODE (operands[1]) == MEM
8472 && GET_RTX_CLASS (code) != 'c'
8473 && ! rtx_equal_p (operands[0], operands[1]))
8474 return 0;
e075ae69
RH
8475 return 1;
8476}
8477
8478/* Attempt to expand a unary operator. Make the expansion closer to the
8479 actual machine, then just general_operand, which will allow 2 separate
9d81fc27 8480 memory references (one output, one input) in a single insn. */
e075ae69 8481
9d81fc27 8482void
b96a374d
AJ
8483ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8484 rtx operands[])
e075ae69 8485{
06a964de
JH
8486 int matching_memory;
8487 rtx src, dst, op, clob;
8488
8489 dst = operands[0];
8490 src = operands[1];
e075ae69 8491
06a964de
JH
8492 /* If the destination is memory, and we do not have matching source
8493 operands, do things in registers. */
8494 matching_memory = 0;
8495 if (GET_CODE (dst) == MEM)
32b5b1aa 8496 {
06a964de
JH
8497 if (rtx_equal_p (dst, src))
8498 matching_memory = 1;
e075ae69 8499 else
06a964de 8500 dst = gen_reg_rtx (mode);
32b5b1aa 8501 }
e9a25f70 8502
06a964de
JH
8503 /* When source operand is memory, destination must match. */
8504 if (!matching_memory && GET_CODE (src) == MEM)
8505 src = force_reg (mode, src);
0f290768 8506
06a964de 8507 /* If optimizing, copy to regs to improve CSE */
fe577e58 8508 if (optimize && ! no_new_pseudos)
06a964de
JH
8509 {
8510 if (GET_CODE (dst) == MEM)
8511 dst = gen_reg_rtx (mode);
8512 if (GET_CODE (src) == MEM)
8513 src = force_reg (mode, src);
8514 }
8515
8516 /* Emit the instruction. */
8517
8518 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8519 if (reload_in_progress || code == NOT)
8520 {
8521 /* Reload doesn't know about the flags register, and doesn't know that
8522 it doesn't want to clobber it. */
8523 if (code != NOT)
8524 abort ();
8525 emit_insn (op);
8526 }
8527 else
8528 {
8529 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8530 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8531 }
8532
8533 /* Fix up the destination if needed. */
8534 if (dst != operands[0])
8535 emit_move_insn (operands[0], dst);
e075ae69
RH
8536}
8537
8538/* Return TRUE or FALSE depending on whether the unary operator meets the
8539 appropriate constraints. */
8540
8541int
b96a374d
AJ
8542ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8543 enum machine_mode mode ATTRIBUTE_UNUSED,
8544 rtx operands[2] ATTRIBUTE_UNUSED)
e075ae69 8545{
06a964de
JH
8546 /* If one of operands is memory, source and destination must match. */
8547 if ((GET_CODE (operands[0]) == MEM
8548 || GET_CODE (operands[1]) == MEM)
8549 && ! rtx_equal_p (operands[0], operands[1]))
8550 return FALSE;
e075ae69
RH
8551 return TRUE;
8552}
8553
16189740
RH
8554/* Return TRUE or FALSE depending on whether the first SET in INSN
8555 has source and destination with matching CC modes, and that the
8556 CC mode is at least as constrained as REQ_MODE. */
8557
8558int
b96a374d 8559ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
16189740
RH
8560{
8561 rtx set;
8562 enum machine_mode set_mode;
8563
8564 set = PATTERN (insn);
8565 if (GET_CODE (set) == PARALLEL)
8566 set = XVECEXP (set, 0, 0);
8567 if (GET_CODE (set) != SET)
8568 abort ();
9076b9c1
JH
8569 if (GET_CODE (SET_SRC (set)) != COMPARE)
8570 abort ();
16189740
RH
8571
8572 set_mode = GET_MODE (SET_DEST (set));
8573 switch (set_mode)
8574 {
9076b9c1
JH
8575 case CCNOmode:
8576 if (req_mode != CCNOmode
8577 && (req_mode != CCmode
8578 || XEXP (SET_SRC (set), 1) != const0_rtx))
8579 return 0;
8580 break;
16189740 8581 case CCmode:
9076b9c1 8582 if (req_mode == CCGCmode)
16189740
RH
8583 return 0;
8584 /* FALLTHRU */
9076b9c1
JH
8585 case CCGCmode:
8586 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8587 return 0;
8588 /* FALLTHRU */
8589 case CCGOCmode:
16189740
RH
8590 if (req_mode == CCZmode)
8591 return 0;
8592 /* FALLTHRU */
8593 case CCZmode:
8594 break;
8595
8596 default:
8597 abort ();
8598 }
8599
8600 return (GET_MODE (SET_SRC (set)) == set_mode);
8601}
8602
e075ae69
RH
8603/* Generate insn patterns to do an integer compare of OPERANDS. */
8604
8605static rtx
b96a374d 8606ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
e075ae69
RH
8607{
8608 enum machine_mode cmpmode;
8609 rtx tmp, flags;
8610
8611 cmpmode = SELECT_CC_MODE (code, op0, op1);
8612 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8613
8614 /* This is very simple, but making the interface the same as in the
8615 FP case makes the rest of the code easier. */
8616 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8617 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8618
8619 /* Return the test that should be put into the flags user, i.e.
8620 the bcc, scc, or cmov instruction. */
8621 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8622}
8623
3a3677ff
RH
8624/* Figure out whether to use ordered or unordered fp comparisons.
8625 Return the appropriate mode to use. */
e075ae69 8626
b1cdafbb 8627enum machine_mode
b96a374d 8628ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
e075ae69 8629{
9e7adcb3
JH
8630 /* ??? In order to make all comparisons reversible, we do all comparisons
8631 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8632 all forms trapping and nontrapping comparisons, we can make inequality
8633 comparisons trapping again, since it results in better code when using
8634 FCOM based compares. */
8635 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
3a3677ff
RH
8636}
8637
9076b9c1 8638enum machine_mode
b96a374d 8639ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
9076b9c1
JH
8640{
8641 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8642 return ix86_fp_compare_mode (code);
8643 switch (code)
8644 {
8645 /* Only zero flag is needed. */
8646 case EQ: /* ZF=0 */
8647 case NE: /* ZF!=0 */
8648 return CCZmode;
8649 /* Codes needing carry flag. */
265dab10
JH
8650 case GEU: /* CF=0 */
8651 case GTU: /* CF=0 & ZF=0 */
7e08e190
JH
8652 case LTU: /* CF=1 */
8653 case LEU: /* CF=1 | ZF=1 */
265dab10 8654 return CCmode;
9076b9c1
JH
8655 /* Codes possibly doable only with sign flag when
8656 comparing against zero. */
8657 case GE: /* SF=OF or SF=0 */
7e08e190 8658 case LT: /* SF<>OF or SF=1 */
9076b9c1
JH
8659 if (op1 == const0_rtx)
8660 return CCGOCmode;
8661 else
8662 /* For other cases Carry flag is not required. */
8663 return CCGCmode;
8664 /* Codes doable only with sign flag when comparing
8665 against zero, but we miss jump instruction for it
4aae8a9a 8666 so we need to use relational tests against overflow
9076b9c1
JH
8667 that thus needs to be zero. */
8668 case GT: /* ZF=0 & SF=OF */
8669 case LE: /* ZF=1 | SF<>OF */
8670 if (op1 == const0_rtx)
8671 return CCNOmode;
8672 else
8673 return CCGCmode;
7fcd7218
JH
8674 /* strcmp pattern do (use flags) and combine may ask us for proper
8675 mode. */
8676 case USE:
8677 return CCmode;
9076b9c1 8678 default:
0f290768 8679 abort ();
9076b9c1
JH
8680 }
8681}
8682
3a3677ff
RH
8683/* Return true if we should use an FCOMI instruction for this fp comparison. */
8684
a940d8bd 8685int
b96a374d 8686ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
3a3677ff 8687{
9e7adcb3
JH
8688 enum rtx_code swapped_code = swap_condition (code);
8689 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8690 || (ix86_fp_comparison_cost (swapped_code)
8691 == ix86_fp_comparison_fcomi_cost (swapped_code)));
3a3677ff
RH
8692}
8693
0f290768 8694/* Swap, force into registers, or otherwise massage the two operands
3a3677ff 8695 to a fp comparison. The operands are updated in place; the new
d1f87653 8696 comparison code is returned. */
3a3677ff
RH
8697
8698static enum rtx_code
b96a374d 8699ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
3a3677ff
RH
8700{
8701 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8702 rtx op0 = *pop0, op1 = *pop1;
8703 enum machine_mode op_mode = GET_MODE (op0);
0644b628 8704 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
3a3677ff 8705
e075ae69 8706 /* All of the unordered compare instructions only work on registers.
3a3677ff
RH
8707 The same is true of the XFmode compare instructions. The same is
8708 true of the fcomi compare instructions. */
8709
0644b628
JH
8710 if (!is_sse
8711 && (fpcmp_mode == CCFPUmode
8712 || op_mode == XFmode
0644b628 8713 || ix86_use_fcomi_compare (code)))
e075ae69 8714 {
3a3677ff
RH
8715 op0 = force_reg (op_mode, op0);
8716 op1 = force_reg (op_mode, op1);
e075ae69
RH
8717 }
8718 else
8719 {
8720 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8721 things around if they appear profitable, otherwise force op0
8722 into a register. */
8723
8724 if (standard_80387_constant_p (op0) == 0
8725 || (GET_CODE (op0) == MEM
8726 && ! (standard_80387_constant_p (op1) == 0
8727 || GET_CODE (op1) == MEM)))
32b5b1aa 8728 {
e075ae69
RH
8729 rtx tmp;
8730 tmp = op0, op0 = op1, op1 = tmp;
8731 code = swap_condition (code);
8732 }
8733
8734 if (GET_CODE (op0) != REG)
3a3677ff 8735 op0 = force_reg (op_mode, op0);
e075ae69
RH
8736
8737 if (CONSTANT_P (op1))
8738 {
8739 if (standard_80387_constant_p (op1))
3a3677ff 8740 op1 = force_reg (op_mode, op1);
e075ae69 8741 else
3a3677ff 8742 op1 = validize_mem (force_const_mem (op_mode, op1));
32b5b1aa
SC
8743 }
8744 }
e9a25f70 8745
9e7adcb3
JH
8746 /* Try to rearrange the comparison to make it cheaper. */
8747 if (ix86_fp_comparison_cost (code)
8748 > ix86_fp_comparison_cost (swap_condition (code))
558740bf 8749 && (GET_CODE (op1) == REG || !no_new_pseudos))
9e7adcb3
JH
8750 {
8751 rtx tmp;
8752 tmp = op0, op0 = op1, op1 = tmp;
8753 code = swap_condition (code);
8754 if (GET_CODE (op0) != REG)
8755 op0 = force_reg (op_mode, op0);
8756 }
8757
3a3677ff
RH
8758 *pop0 = op0;
8759 *pop1 = op1;
8760 return code;
8761}
8762
c0c102a9
JH
8763/* Convert comparison codes we use to represent FP comparison to integer
8764 code that will result in proper branch. Return UNKNOWN if no such code
8765 is available. */
8766static enum rtx_code
b96a374d 8767ix86_fp_compare_code_to_integer (enum rtx_code code)
c0c102a9
JH
8768{
8769 switch (code)
8770 {
8771 case GT:
8772 return GTU;
8773 case GE:
8774 return GEU;
8775 case ORDERED:
8776 case UNORDERED:
8777 return code;
8778 break;
8779 case UNEQ:
8780 return EQ;
8781 break;
8782 case UNLT:
8783 return LTU;
8784 break;
8785 case UNLE:
8786 return LEU;
8787 break;
8788 case LTGT:
8789 return NE;
8790 break;
8791 default:
8792 return UNKNOWN;
8793 }
8794}
8795
8796/* Split comparison code CODE into comparisons we can do using branch
8797 instructions. BYPASS_CODE is comparison code for branch that will
8798 branch around FIRST_CODE and SECOND_CODE. If some of branches
8799 is not required, set value to NIL.
8800 We never require more than two branches. */
8801static void
b96a374d
AJ
8802ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8803 enum rtx_code *first_code,
8804 enum rtx_code *second_code)
c0c102a9
JH
8805{
8806 *first_code = code;
8807 *bypass_code = NIL;
8808 *second_code = NIL;
8809
8810 /* The fcomi comparison sets flags as follows:
8811
8812 cmp ZF PF CF
8813 > 0 0 0
8814 < 0 0 1
8815 = 1 0 0
8816 un 1 1 1 */
8817
8818 switch (code)
8819 {
8820 case GT: /* GTU - CF=0 & ZF=0 */
8821 case GE: /* GEU - CF=0 */
8822 case ORDERED: /* PF=0 */
8823 case UNORDERED: /* PF=1 */
8824 case UNEQ: /* EQ - ZF=1 */
8825 case UNLT: /* LTU - CF=1 */
8826 case UNLE: /* LEU - CF=1 | ZF=1 */
8827 case LTGT: /* EQ - ZF=0 */
8828 break;
8829 case LT: /* LTU - CF=1 - fails on unordered */
8830 *first_code = UNLT;
8831 *bypass_code = UNORDERED;
8832 break;
8833 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8834 *first_code = UNLE;
8835 *bypass_code = UNORDERED;
8836 break;
8837 case EQ: /* EQ - ZF=1 - fails on unordered */
8838 *first_code = UNEQ;
8839 *bypass_code = UNORDERED;
8840 break;
8841 case NE: /* NE - ZF=0 - fails on unordered */
8842 *first_code = LTGT;
8843 *second_code = UNORDERED;
8844 break;
8845 case UNGE: /* GEU - CF=0 - fails on unordered */
8846 *first_code = GE;
8847 *second_code = UNORDERED;
8848 break;
8849 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8850 *first_code = GT;
8851 *second_code = UNORDERED;
8852 break;
8853 default:
8854 abort ();
8855 }
8856 if (!TARGET_IEEE_FP)
8857 {
8858 *second_code = NIL;
8859 *bypass_code = NIL;
8860 }
8861}
8862
9e7adcb3 8863/* Return cost of comparison done fcom + arithmetics operations on AX.
5bdc5878 8864 All following functions do use number of instructions as a cost metrics.
9e7adcb3
JH
8865 In future this should be tweaked to compute bytes for optimize_size and
8866 take into account performance of various instructions on various CPUs. */
8867static int
b96a374d 8868ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
9e7adcb3
JH
8869{
8870 if (!TARGET_IEEE_FP)
8871 return 4;
8872 /* The cost of code output by ix86_expand_fp_compare. */
8873 switch (code)
8874 {
8875 case UNLE:
8876 case UNLT:
8877 case LTGT:
8878 case GT:
8879 case GE:
8880 case UNORDERED:
8881 case ORDERED:
8882 case UNEQ:
8883 return 4;
8884 break;
8885 case LT:
8886 case NE:
8887 case EQ:
8888 case UNGE:
8889 return 5;
8890 break;
8891 case LE:
8892 case UNGT:
8893 return 6;
8894 break;
8895 default:
8896 abort ();
8897 }
8898}
8899
8900/* Return cost of comparison done using fcomi operation.
8901 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8902static int
b96a374d 8903ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9e7adcb3
JH
8904{
8905 enum rtx_code bypass_code, first_code, second_code;
d1f87653 8906 /* Return arbitrarily high cost when instruction is not supported - this
9e7adcb3
JH
8907 prevents gcc from using it. */
8908 if (!TARGET_CMOVE)
8909 return 1024;
8910 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8911 return (bypass_code != NIL || second_code != NIL) + 2;
8912}
8913
8914/* Return cost of comparison done using sahf operation.
8915 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8916static int
b96a374d 8917ix86_fp_comparison_sahf_cost (enum rtx_code code)
9e7adcb3
JH
8918{
8919 enum rtx_code bypass_code, first_code, second_code;
d1f87653 8920 /* Return arbitrarily high cost when instruction is not preferred - this
9e7adcb3
JH
8921 avoids gcc from using it. */
8922 if (!TARGET_USE_SAHF && !optimize_size)
8923 return 1024;
8924 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8925 return (bypass_code != NIL || second_code != NIL) + 3;
8926}
8927
8928/* Compute cost of the comparison done using any method.
8929 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8930static int
b96a374d 8931ix86_fp_comparison_cost (enum rtx_code code)
9e7adcb3
JH
8932{
8933 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8934 int min;
8935
8936 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8937 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8938
8939 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8940 if (min > sahf_cost)
8941 min = sahf_cost;
8942 if (min > fcomi_cost)
8943 min = fcomi_cost;
8944 return min;
8945}
c0c102a9 8946
3a3677ff
RH
8947/* Generate insn patterns to do a floating point compare of OPERANDS. */
8948
9e7adcb3 8949static rtx
b96a374d
AJ
8950ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
8951 rtx *second_test, rtx *bypass_test)
3a3677ff
RH
8952{
8953 enum machine_mode fpcmp_mode, intcmp_mode;
c0c102a9 8954 rtx tmp, tmp2;
9e7adcb3 8955 int cost = ix86_fp_comparison_cost (code);
c0c102a9 8956 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
8957
8958 fpcmp_mode = ix86_fp_compare_mode (code);
8959 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8960
9e7adcb3
JH
8961 if (second_test)
8962 *second_test = NULL_RTX;
8963 if (bypass_test)
8964 *bypass_test = NULL_RTX;
8965
c0c102a9
JH
8966 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8967
9e7adcb3
JH
8968 /* Do fcomi/sahf based test when profitable. */
8969 if ((bypass_code == NIL || bypass_test)
8970 && (second_code == NIL || second_test)
8971 && ix86_fp_comparison_arithmetics_cost (code) > cost)
32b5b1aa 8972 {
c0c102a9
JH
8973 if (TARGET_CMOVE)
8974 {
8975 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8976 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8977 tmp);
8978 emit_insn (tmp);
8979 }
8980 else
8981 {
8982 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 8983 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
8984 if (!scratch)
8985 scratch = gen_reg_rtx (HImode);
c0c102a9
JH
8986 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8987 emit_insn (gen_x86_sahf_1 (scratch));
8988 }
e075ae69
RH
8989
8990 /* The FP codes work out to act like unsigned. */
9a915772 8991 intcmp_mode = fpcmp_mode;
9e7adcb3
JH
8992 code = first_code;
8993 if (bypass_code != NIL)
8994 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8995 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8996 const0_rtx);
8997 if (second_code != NIL)
8998 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8999 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9000 const0_rtx);
e075ae69
RH
9001 }
9002 else
9003 {
9004 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
e075ae69 9005 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 9006 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
9007 if (!scratch)
9008 scratch = gen_reg_rtx (HImode);
3a3677ff 9009 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
e075ae69 9010
9a915772
JH
9011 /* In the unordered case, we have to check C2 for NaN's, which
9012 doesn't happen to work out to anything nice combination-wise.
9013 So do some bit twiddling on the value we've got in AH to come
9014 up with an appropriate set of condition codes. */
e075ae69 9015
9a915772
JH
9016 intcmp_mode = CCNOmode;
9017 switch (code)
32b5b1aa 9018 {
9a915772
JH
9019 case GT:
9020 case UNGT:
9021 if (code == GT || !TARGET_IEEE_FP)
32b5b1aa 9022 {
3a3677ff 9023 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
e075ae69 9024 code = EQ;
9a915772
JH
9025 }
9026 else
9027 {
9028 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9029 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9030 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9031 intcmp_mode = CCmode;
9032 code = GEU;
9033 }
9034 break;
9035 case LT:
9036 case UNLT:
9037 if (code == LT && TARGET_IEEE_FP)
9038 {
3a3677ff
RH
9039 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9040 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
e075ae69
RH
9041 intcmp_mode = CCmode;
9042 code = EQ;
9a915772
JH
9043 }
9044 else
9045 {
9046 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9047 code = NE;
9048 }
9049 break;
9050 case GE:
9051 case UNGE:
9052 if (code == GE || !TARGET_IEEE_FP)
9053 {
3a3677ff 9054 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
e075ae69 9055 code = EQ;
9a915772
JH
9056 }
9057 else
9058 {
9059 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9060 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9061 GEN_INT (0x01)));
9062 code = NE;
9063 }
9064 break;
9065 case LE:
9066 case UNLE:
9067 if (code == LE && TARGET_IEEE_FP)
9068 {
3a3677ff
RH
9069 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9070 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9071 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
9072 intcmp_mode = CCmode;
9073 code = LTU;
9a915772
JH
9074 }
9075 else
9076 {
9077 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9078 code = NE;
9079 }
9080 break;
9081 case EQ:
9082 case UNEQ:
9083 if (code == EQ && TARGET_IEEE_FP)
9084 {
3a3677ff
RH
9085 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9086 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
9087 intcmp_mode = CCmode;
9088 code = EQ;
9a915772
JH
9089 }
9090 else
9091 {
3a3677ff
RH
9092 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9093 code = NE;
9094 break;
9a915772
JH
9095 }
9096 break;
9097 case NE:
9098 case LTGT:
9099 if (code == NE && TARGET_IEEE_FP)
9100 {
3a3677ff 9101 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9a915772
JH
9102 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9103 GEN_INT (0x40)));
3a3677ff 9104 code = NE;
9a915772
JH
9105 }
9106 else
9107 {
3a3677ff
RH
9108 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9109 code = EQ;
32b5b1aa 9110 }
9a915772
JH
9111 break;
9112
9113 case UNORDERED:
9114 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9115 code = NE;
9116 break;
9117 case ORDERED:
9118 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9119 code = EQ;
9120 break;
9121
9122 default:
9123 abort ();
32b5b1aa 9124 }
32b5b1aa 9125 }
e075ae69
RH
9126
9127 /* Return the test that should be put into the flags user, i.e.
9128 the bcc, scc, or cmov instruction. */
9129 return gen_rtx_fmt_ee (code, VOIDmode,
9130 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9131 const0_rtx);
9132}
9133
9e3e266c 9134rtx
b96a374d 9135ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
e075ae69
RH
9136{
9137 rtx op0, op1, ret;
9138 op0 = ix86_compare_op0;
9139 op1 = ix86_compare_op1;
9140
a1b8572c
JH
9141 if (second_test)
9142 *second_test = NULL_RTX;
9143 if (bypass_test)
9144 *bypass_test = NULL_RTX;
9145
e075ae69 9146 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
bf71a4f8 9147 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
77ebd435 9148 second_test, bypass_test);
32b5b1aa 9149 else
e075ae69
RH
9150 ret = ix86_expand_int_compare (code, op0, op1);
9151
9152 return ret;
9153}
9154
03598dea
JH
9155/* Return true if the CODE will result in nontrivial jump sequence. */
9156bool
b96a374d 9157ix86_fp_jump_nontrivial_p (enum rtx_code code)
03598dea
JH
9158{
9159 enum rtx_code bypass_code, first_code, second_code;
9160 if (!TARGET_CMOVE)
9161 return true;
9162 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9163 return bypass_code != NIL || second_code != NIL;
9164}
9165
e075ae69 9166void
b96a374d 9167ix86_expand_branch (enum rtx_code code, rtx label)
e075ae69 9168{
3a3677ff 9169 rtx tmp;
e075ae69 9170
3a3677ff 9171 switch (GET_MODE (ix86_compare_op0))
32b5b1aa 9172 {
3a3677ff
RH
9173 case QImode:
9174 case HImode:
9175 case SImode:
0d7d98ee 9176 simple:
a1b8572c 9177 tmp = ix86_expand_compare (code, NULL, NULL);
e075ae69
RH
9178 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9179 gen_rtx_LABEL_REF (VOIDmode, label),
9180 pc_rtx);
9181 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32b5b1aa 9182 return;
e075ae69 9183
3a3677ff
RH
9184 case SFmode:
9185 case DFmode:
0f290768 9186 case XFmode:
3a3677ff
RH
9187 {
9188 rtvec vec;
9189 int use_fcomi;
03598dea 9190 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
9191
9192 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9193 &ix86_compare_op1);
fce5a9f2 9194
03598dea
JH
9195 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9196
9197 /* Check whether we will use the natural sequence with one jump. If
9198 so, we can expand jump early. Otherwise delay expansion by
9199 creating compound insn to not confuse optimizers. */
9200 if (bypass_code == NIL && second_code == NIL
9201 && TARGET_CMOVE)
9202 {
9203 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9204 gen_rtx_LABEL_REF (VOIDmode, label),
9205 pc_rtx, NULL_RTX);
9206 }
9207 else
9208 {
9209 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9210 ix86_compare_op0, ix86_compare_op1);
9211 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9212 gen_rtx_LABEL_REF (VOIDmode, label),
9213 pc_rtx);
9214 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9215
9216 use_fcomi = ix86_use_fcomi_compare (code);
9217 vec = rtvec_alloc (3 + !use_fcomi);
9218 RTVEC_ELT (vec, 0) = tmp;
9219 RTVEC_ELT (vec, 1)
9220 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9221 RTVEC_ELT (vec, 2)
9222 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9223 if (! use_fcomi)
9224 RTVEC_ELT (vec, 3)
9225 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9226
9227 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9228 }
3a3677ff
RH
9229 return;
9230 }
32b5b1aa 9231
3a3677ff 9232 case DImode:
0d7d98ee
JH
9233 if (TARGET_64BIT)
9234 goto simple;
3a3677ff
RH
9235 /* Expand DImode branch into multiple compare+branch. */
9236 {
9237 rtx lo[2], hi[2], label2;
9238 enum rtx_code code1, code2, code3;
32b5b1aa 9239
3a3677ff
RH
9240 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9241 {
9242 tmp = ix86_compare_op0;
9243 ix86_compare_op0 = ix86_compare_op1;
9244 ix86_compare_op1 = tmp;
9245 code = swap_condition (code);
9246 }
9247 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9248 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
32b5b1aa 9249
3a3677ff
RH
9250 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9251 avoid two branches. This costs one extra insn, so disable when
9252 optimizing for size. */
32b5b1aa 9253
3a3677ff
RH
9254 if ((code == EQ || code == NE)
9255 && (!optimize_size
9256 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9257 {
9258 rtx xor0, xor1;
32b5b1aa 9259
3a3677ff
RH
9260 xor1 = hi[0];
9261 if (hi[1] != const0_rtx)
9262 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9263 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 9264
3a3677ff
RH
9265 xor0 = lo[0];
9266 if (lo[1] != const0_rtx)
9267 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9268 NULL_RTX, 0, OPTAB_WIDEN);
e075ae69 9269
3a3677ff
RH
9270 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9271 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 9272
3a3677ff
RH
9273 ix86_compare_op0 = tmp;
9274 ix86_compare_op1 = const0_rtx;
9275 ix86_expand_branch (code, label);
9276 return;
9277 }
e075ae69 9278
1f9124e4
JJ
9279 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9280 op1 is a constant and the low word is zero, then we can just
9281 examine the high word. */
32b5b1aa 9282
1f9124e4
JJ
9283 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9284 switch (code)
9285 {
9286 case LT: case LTU: case GE: case GEU:
9287 ix86_compare_op0 = hi[0];
9288 ix86_compare_op1 = hi[1];
9289 ix86_expand_branch (code, label);
9290 return;
9291 default:
9292 break;
9293 }
e075ae69 9294
3a3677ff 9295 /* Otherwise, we need two or three jumps. */
e075ae69 9296
3a3677ff 9297 label2 = gen_label_rtx ();
e075ae69 9298
3a3677ff
RH
9299 code1 = code;
9300 code2 = swap_condition (code);
9301 code3 = unsigned_condition (code);
e075ae69 9302
3a3677ff
RH
9303 switch (code)
9304 {
9305 case LT: case GT: case LTU: case GTU:
9306 break;
e075ae69 9307
3a3677ff
RH
9308 case LE: code1 = LT; code2 = GT; break;
9309 case GE: code1 = GT; code2 = LT; break;
9310 case LEU: code1 = LTU; code2 = GTU; break;
9311 case GEU: code1 = GTU; code2 = LTU; break;
e075ae69 9312
3a3677ff
RH
9313 case EQ: code1 = NIL; code2 = NE; break;
9314 case NE: code2 = NIL; break;
e075ae69 9315
3a3677ff
RH
9316 default:
9317 abort ();
9318 }
e075ae69 9319
3a3677ff
RH
9320 /*
9321 * a < b =>
9322 * if (hi(a) < hi(b)) goto true;
9323 * if (hi(a) > hi(b)) goto false;
9324 * if (lo(a) < lo(b)) goto true;
9325 * false:
9326 */
9327
9328 ix86_compare_op0 = hi[0];
9329 ix86_compare_op1 = hi[1];
9330
9331 if (code1 != NIL)
9332 ix86_expand_branch (code1, label);
9333 if (code2 != NIL)
9334 ix86_expand_branch (code2, label2);
9335
9336 ix86_compare_op0 = lo[0];
9337 ix86_compare_op1 = lo[1];
9338 ix86_expand_branch (code3, label);
9339
9340 if (code2 != NIL)
9341 emit_label (label2);
9342 return;
9343 }
e075ae69 9344
3a3677ff
RH
9345 default:
9346 abort ();
9347 }
32b5b1aa 9348}
e075ae69 9349
9e7adcb3
JH
9350/* Split branch based on floating point condition. */
9351void
b96a374d
AJ
9352ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9353 rtx target1, rtx target2, rtx tmp)
9e7adcb3
JH
9354{
9355 rtx second, bypass;
9356 rtx label = NULL_RTX;
03598dea 9357 rtx condition;
6b24c259
JH
9358 int bypass_probability = -1, second_probability = -1, probability = -1;
9359 rtx i;
9e7adcb3
JH
9360
9361 if (target2 != pc_rtx)
9362 {
9363 rtx tmp = target2;
9364 code = reverse_condition_maybe_unordered (code);
9365 target2 = target1;
9366 target1 = tmp;
9367 }
9368
9369 condition = ix86_expand_fp_compare (code, op1, op2,
9370 tmp, &second, &bypass);
6b24c259
JH
9371
9372 if (split_branch_probability >= 0)
9373 {
9374 /* Distribute the probabilities across the jumps.
9375 Assume the BYPASS and SECOND to be always test
9376 for UNORDERED. */
9377 probability = split_branch_probability;
9378
d6a7951f 9379 /* Value of 1 is low enough to make no need for probability
6b24c259
JH
9380 to be updated. Later we may run some experiments and see
9381 if unordered values are more frequent in practice. */
9382 if (bypass)
9383 bypass_probability = 1;
9384 if (second)
9385 second_probability = 1;
9386 }
9e7adcb3
JH
9387 if (bypass != NULL_RTX)
9388 {
9389 label = gen_label_rtx ();
6b24c259
JH
9390 i = emit_jump_insn (gen_rtx_SET
9391 (VOIDmode, pc_rtx,
9392 gen_rtx_IF_THEN_ELSE (VOIDmode,
9393 bypass,
9394 gen_rtx_LABEL_REF (VOIDmode,
9395 label),
9396 pc_rtx)));
9397 if (bypass_probability >= 0)
9398 REG_NOTES (i)
9399 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9400 GEN_INT (bypass_probability),
9401 REG_NOTES (i));
9402 }
9403 i = emit_jump_insn (gen_rtx_SET
9e7adcb3
JH
9404 (VOIDmode, pc_rtx,
9405 gen_rtx_IF_THEN_ELSE (VOIDmode,
6b24c259
JH
9406 condition, target1, target2)));
9407 if (probability >= 0)
9408 REG_NOTES (i)
9409 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9410 GEN_INT (probability),
9411 REG_NOTES (i));
9412 if (second != NULL_RTX)
9e7adcb3 9413 {
6b24c259
JH
9414 i = emit_jump_insn (gen_rtx_SET
9415 (VOIDmode, pc_rtx,
9416 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9417 target2)));
9418 if (second_probability >= 0)
9419 REG_NOTES (i)
9420 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9421 GEN_INT (second_probability),
9422 REG_NOTES (i));
9e7adcb3 9423 }
9e7adcb3
JH
9424 if (label != NULL_RTX)
9425 emit_label (label);
9426}
9427
32b5b1aa 9428int
b96a374d 9429ix86_expand_setcc (enum rtx_code code, rtx dest)
32b5b1aa 9430{
3a627503 9431 rtx ret, tmp, tmpreg, equiv;
a1b8572c 9432 rtx second_test, bypass_test;
e075ae69 9433
885a70fd
JH
9434 if (GET_MODE (ix86_compare_op0) == DImode
9435 && !TARGET_64BIT)
e075ae69
RH
9436 return 0; /* FAIL */
9437
b932f770
JH
9438 if (GET_MODE (dest) != QImode)
9439 abort ();
e075ae69 9440
a1b8572c 9441 ret = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
9442 PUT_MODE (ret, QImode);
9443
9444 tmp = dest;
a1b8572c 9445 tmpreg = dest;
32b5b1aa 9446
e075ae69 9447 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
a1b8572c
JH
9448 if (bypass_test || second_test)
9449 {
9450 rtx test = second_test;
9451 int bypass = 0;
9452 rtx tmp2 = gen_reg_rtx (QImode);
9453 if (bypass_test)
9454 {
9455 if (second_test)
b531087a 9456 abort ();
a1b8572c
JH
9457 test = bypass_test;
9458 bypass = 1;
9459 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9460 }
9461 PUT_MODE (test, QImode);
9462 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9463
9464 if (bypass)
9465 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9466 else
9467 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9468 }
e075ae69 9469
3a627503
RS
9470 /* Attach a REG_EQUAL note describing the comparison result. */
9471 equiv = simplify_gen_relational (code, QImode,
9472 GET_MODE (ix86_compare_op0),
9473 ix86_compare_op0, ix86_compare_op1);
9474 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9475
e075ae69 9476 return 1; /* DONE */
32b5b1aa 9477}
e075ae69 9478
c35d187f
RH
9479/* Expand comparison setting or clearing carry flag. Return true when
9480 successful and set pop for the operation. */
9481static bool
b96a374d 9482ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
4977bab6
ZW
9483{
9484 enum machine_mode mode =
9485 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9486
9487 /* Do not handle DImode compares that go trought special path. Also we can't
9488 deal with FP compares yet. This is possible to add. */
e6e81735
JH
9489 if ((mode == DImode && !TARGET_64BIT))
9490 return false;
9491 if (FLOAT_MODE_P (mode))
9492 {
9493 rtx second_test = NULL, bypass_test = NULL;
9494 rtx compare_op, compare_seq;
9495
9496 /* Shortcut: following common codes never translate into carry flag compares. */
9497 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9498 || code == ORDERED || code == UNORDERED)
9499 return false;
9500
9501 /* These comparisons require zero flag; swap operands so they won't. */
9502 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9503 && !TARGET_IEEE_FP)
9504 {
9505 rtx tmp = op0;
9506 op0 = op1;
9507 op1 = tmp;
9508 code = swap_condition (code);
9509 }
9510
c51e6d85
KH
9511 /* Try to expand the comparison and verify that we end up with carry flag
9512 based comparison. This is fails to be true only when we decide to expand
9513 comparison using arithmetic that is not too common scenario. */
e6e81735
JH
9514 start_sequence ();
9515 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9516 &second_test, &bypass_test);
9517 compare_seq = get_insns ();
9518 end_sequence ();
9519
9520 if (second_test || bypass_test)
9521 return false;
9522 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9523 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9524 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9525 else
9526 code = GET_CODE (compare_op);
9527 if (code != LTU && code != GEU)
9528 return false;
9529 emit_insn (compare_seq);
9530 *pop = compare_op;
9531 return true;
9532 }
9533 if (!INTEGRAL_MODE_P (mode))
4977bab6
ZW
9534 return false;
9535 switch (code)
9536 {
9537 case LTU:
9538 case GEU:
9539 break;
9540
9541 /* Convert a==0 into (unsigned)a<1. */
9542 case EQ:
9543 case NE:
9544 if (op1 != const0_rtx)
9545 return false;
9546 op1 = const1_rtx;
9547 code = (code == EQ ? LTU : GEU);
9548 break;
9549
9550 /* Convert a>b into b<a or a>=b-1. */
9551 case GTU:
9552 case LEU:
9553 if (GET_CODE (op1) == CONST_INT)
9554 {
9555 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9556 /* Bail out on overflow. We still can swap operands but that
9557 would force loading of the constant into register. */
9558 if (op1 == const0_rtx
9559 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9560 return false;
9561 code = (code == GTU ? GEU : LTU);
9562 }
9563 else
9564 {
9565 rtx tmp = op1;
9566 op1 = op0;
9567 op0 = tmp;
9568 code = (code == GTU ? LTU : GEU);
9569 }
9570 break;
9571
ccea753c 9572 /* Convert a>=0 into (unsigned)a<0x80000000. */
4977bab6
ZW
9573 case LT:
9574 case GE:
9575 if (mode == DImode || op1 != const0_rtx)
9576 return false;
ccea753c 9577 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
4977bab6
ZW
9578 code = (code == LT ? GEU : LTU);
9579 break;
9580 case LE:
9581 case GT:
9582 if (mode == DImode || op1 != constm1_rtx)
9583 return false;
ccea753c 9584 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
4977bab6
ZW
9585 code = (code == LE ? GEU : LTU);
9586 break;
9587
9588 default:
9589 return false;
9590 }
ebe75517
JH
9591 /* Swapping operands may cause constant to appear as first operand. */
9592 if (!nonimmediate_operand (op0, VOIDmode))
9593 {
9594 if (no_new_pseudos)
9595 return false;
9596 op0 = force_reg (mode, op0);
9597 }
4977bab6
ZW
9598 ix86_compare_op0 = op0;
9599 ix86_compare_op1 = op1;
9600 *pop = ix86_expand_compare (code, NULL, NULL);
9601 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9602 abort ();
9603 return true;
9604}
9605
32b5b1aa 9606int
b96a374d 9607ix86_expand_int_movcc (rtx operands[])
32b5b1aa 9608{
e075ae69
RH
9609 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9610 rtx compare_seq, compare_op;
a1b8572c 9611 rtx second_test, bypass_test;
635559ab 9612 enum machine_mode mode = GET_MODE (operands[0]);
4977bab6 9613 bool sign_bit_compare_p = false;;
3a3677ff 9614
e075ae69 9615 start_sequence ();
a1b8572c 9616 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
2f937369 9617 compare_seq = get_insns ();
e075ae69
RH
9618 end_sequence ();
9619
9620 compare_code = GET_CODE (compare_op);
9621
4977bab6
ZW
9622 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9623 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9624 sign_bit_compare_p = true;
9625
e075ae69
RH
9626 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9627 HImode insns, we'd be swallowed in word prefix ops. */
9628
4977bab6 9629 if ((mode != HImode || TARGET_FAST_PREFIX)
635559ab 9630 && (mode != DImode || TARGET_64BIT)
0f290768 9631 && GET_CODE (operands[2]) == CONST_INT
e075ae69
RH
9632 && GET_CODE (operands[3]) == CONST_INT)
9633 {
9634 rtx out = operands[0];
9635 HOST_WIDE_INT ct = INTVAL (operands[2]);
9636 HOST_WIDE_INT cf = INTVAL (operands[3]);
9637 HOST_WIDE_INT diff;
9638
4977bab6
ZW
9639 diff = ct - cf;
9640 /* Sign bit compares are better done using shifts than we do by using
b96a374d 9641 sbb. */
4977bab6
ZW
9642 if (sign_bit_compare_p
9643 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9644 ix86_compare_op1, &compare_op))
e075ae69 9645 {
e075ae69
RH
9646 /* Detect overlap between destination and compare sources. */
9647 rtx tmp = out;
9648
4977bab6 9649 if (!sign_bit_compare_p)
36583fea 9650 {
e6e81735
JH
9651 bool fpcmp = false;
9652
4977bab6
ZW
9653 compare_code = GET_CODE (compare_op);
9654
e6e81735
JH
9655 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9656 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9657 {
9658 fpcmp = true;
9659 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9660 }
9661
4977bab6
ZW
9662 /* To simplify rest of code, restrict to the GEU case. */
9663 if (compare_code == LTU)
9664 {
9665 HOST_WIDE_INT tmp = ct;
9666 ct = cf;
9667 cf = tmp;
9668 compare_code = reverse_condition (compare_code);
9669 code = reverse_condition (code);
9670 }
e6e81735
JH
9671 else
9672 {
9673 if (fpcmp)
9674 PUT_CODE (compare_op,
9675 reverse_condition_maybe_unordered
9676 (GET_CODE (compare_op)));
9677 else
9678 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9679 }
4977bab6 9680 diff = ct - cf;
36583fea 9681
4977bab6
ZW
9682 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9683 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9684 tmp = gen_reg_rtx (mode);
e075ae69 9685
4977bab6 9686 if (mode == DImode)
e6e81735 9687 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
4977bab6 9688 else
e6e81735 9689 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
4977bab6 9690 }
14f73b5a 9691 else
4977bab6
ZW
9692 {
9693 if (code == GT || code == GE)
9694 code = reverse_condition (code);
9695 else
9696 {
9697 HOST_WIDE_INT tmp = ct;
9698 ct = cf;
9699 cf = tmp;
5fb48685 9700 diff = ct - cf;
4977bab6
ZW
9701 }
9702 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9703 ix86_compare_op1, VOIDmode, 0, -1);
9704 }
e075ae69 9705
36583fea
JH
9706 if (diff == 1)
9707 {
9708 /*
9709 * cmpl op0,op1
9710 * sbbl dest,dest
9711 * [addl dest, ct]
9712 *
9713 * Size 5 - 8.
9714 */
9715 if (ct)
b96a374d 9716 tmp = expand_simple_binop (mode, PLUS,
635559ab 9717 tmp, GEN_INT (ct),
4977bab6 9718 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
9719 }
9720 else if (cf == -1)
9721 {
9722 /*
9723 * cmpl op0,op1
9724 * sbbl dest,dest
9725 * orl $ct, dest
9726 *
9727 * Size 8.
9728 */
635559ab
JH
9729 tmp = expand_simple_binop (mode, IOR,
9730 tmp, GEN_INT (ct),
4977bab6 9731 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
9732 }
9733 else if (diff == -1 && ct)
9734 {
9735 /*
9736 * cmpl op0,op1
9737 * sbbl dest,dest
06ec023f 9738 * notl dest
36583fea
JH
9739 * [addl dest, cf]
9740 *
9741 * Size 8 - 11.
9742 */
4977bab6 9743 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
635559ab 9744 if (cf)
b96a374d 9745 tmp = expand_simple_binop (mode, PLUS,
4977bab6
ZW
9746 copy_rtx (tmp), GEN_INT (cf),
9747 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
9748 }
9749 else
9750 {
9751 /*
9752 * cmpl op0,op1
9753 * sbbl dest,dest
06ec023f 9754 * [notl dest]
36583fea
JH
9755 * andl cf - ct, dest
9756 * [addl dest, ct]
9757 *
9758 * Size 8 - 11.
9759 */
06ec023f
RB
9760
9761 if (cf == 0)
9762 {
9763 cf = ct;
9764 ct = 0;
4977bab6 9765 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
06ec023f
RB
9766 }
9767
635559ab 9768 tmp = expand_simple_binop (mode, AND,
4977bab6 9769 copy_rtx (tmp),
d8bf17f9 9770 gen_int_mode (cf - ct, mode),
4977bab6 9771 copy_rtx (tmp), 1, OPTAB_DIRECT);
635559ab 9772 if (ct)
b96a374d 9773 tmp = expand_simple_binop (mode, PLUS,
4977bab6
ZW
9774 copy_rtx (tmp), GEN_INT (ct),
9775 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea 9776 }
e075ae69 9777
4977bab6
ZW
9778 if (!rtx_equal_p (tmp, out))
9779 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
e075ae69
RH
9780
9781 return 1; /* DONE */
9782 }
9783
e075ae69
RH
9784 if (diff < 0)
9785 {
9786 HOST_WIDE_INT tmp;
9787 tmp = ct, ct = cf, cf = tmp;
9788 diff = -diff;
734dba19
JH
9789 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9790 {
9791 /* We may be reversing unordered compare to normal compare, that
9792 is not valid in general (we may convert non-trapping condition
9793 to trapping one), however on i386 we currently emit all
9794 comparisons unordered. */
9795 compare_code = reverse_condition_maybe_unordered (compare_code);
9796 code = reverse_condition_maybe_unordered (code);
9797 }
9798 else
9799 {
9800 compare_code = reverse_condition (compare_code);
9801 code = reverse_condition (code);
9802 }
e075ae69 9803 }
0f2a3457
JJ
9804
9805 compare_code = NIL;
9806 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9807 && GET_CODE (ix86_compare_op1) == CONST_INT)
9808 {
9809 if (ix86_compare_op1 == const0_rtx
9810 && (code == LT || code == GE))
9811 compare_code = code;
9812 else if (ix86_compare_op1 == constm1_rtx)
9813 {
9814 if (code == LE)
9815 compare_code = LT;
9816 else if (code == GT)
9817 compare_code = GE;
9818 }
9819 }
9820
9821 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9822 if (compare_code != NIL
9823 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9824 && (cf == -1 || ct == -1))
9825 {
9826 /* If lea code below could be used, only optimize
9827 if it results in a 2 insn sequence. */
9828
9829 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9830 || diff == 3 || diff == 5 || diff == 9)
9831 || (compare_code == LT && ct == -1)
9832 || (compare_code == GE && cf == -1))
9833 {
9834 /*
9835 * notl op1 (if necessary)
9836 * sarl $31, op1
9837 * orl cf, op1
9838 */
9839 if (ct != -1)
9840 {
9841 cf = ct;
b96a374d 9842 ct = -1;
0f2a3457
JJ
9843 code = reverse_condition (code);
9844 }
9845
9846 out = emit_store_flag (out, code, ix86_compare_op0,
9847 ix86_compare_op1, VOIDmode, 0, -1);
9848
9849 out = expand_simple_binop (mode, IOR,
9850 out, GEN_INT (cf),
9851 out, 1, OPTAB_DIRECT);
9852 if (out != operands[0])
9853 emit_move_insn (operands[0], out);
9854
9855 return 1; /* DONE */
9856 }
9857 }
9858
4977bab6 9859
635559ab
JH
9860 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9861 || diff == 3 || diff == 5 || diff == 9)
4977bab6 9862 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
c05dbe81 9863 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
e075ae69
RH
9864 {
9865 /*
9866 * xorl dest,dest
9867 * cmpl op1,op2
9868 * setcc dest
9869 * lea cf(dest*(ct-cf)),dest
9870 *
9871 * Size 14.
9872 *
9873 * This also catches the degenerate setcc-only case.
9874 */
9875
9876 rtx tmp;
9877 int nops;
9878
9879 out = emit_store_flag (out, code, ix86_compare_op0,
9880 ix86_compare_op1, VOIDmode, 0, 1);
9881
9882 nops = 0;
97f51ac4
RB
9883 /* On x86_64 the lea instruction operates on Pmode, so we need
9884 to get arithmetics done in proper mode to match. */
e075ae69 9885 if (diff == 1)
068f5dea 9886 tmp = copy_rtx (out);
e075ae69
RH
9887 else
9888 {
885a70fd 9889 rtx out1;
068f5dea 9890 out1 = copy_rtx (out);
635559ab 9891 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
e075ae69
RH
9892 nops++;
9893 if (diff & 1)
9894 {
635559ab 9895 tmp = gen_rtx_PLUS (mode, tmp, out1);
e075ae69
RH
9896 nops++;
9897 }
9898 }
9899 if (cf != 0)
9900 {
635559ab 9901 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
e075ae69
RH
9902 nops++;
9903 }
4977bab6 9904 if (!rtx_equal_p (tmp, out))
e075ae69 9905 {
14f73b5a 9906 if (nops == 1)
a5cf80f0 9907 out = force_operand (tmp, copy_rtx (out));
e075ae69 9908 else
4977bab6 9909 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
e075ae69 9910 }
4977bab6 9911 if (!rtx_equal_p (out, operands[0]))
1985ef90 9912 emit_move_insn (operands[0], copy_rtx (out));
e075ae69
RH
9913
9914 return 1; /* DONE */
9915 }
9916
9917 /*
9918 * General case: Jumpful:
9919 * xorl dest,dest cmpl op1, op2
9920 * cmpl op1, op2 movl ct, dest
9921 * setcc dest jcc 1f
9922 * decl dest movl cf, dest
9923 * andl (cf-ct),dest 1:
9924 * addl ct,dest
0f290768 9925 *
e075ae69
RH
9926 * Size 20. Size 14.
9927 *
9928 * This is reasonably steep, but branch mispredict costs are
9929 * high on modern cpus, so consider failing only if optimizing
9930 * for space.
e075ae69
RH
9931 */
9932
4977bab6
ZW
9933 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9934 && BRANCH_COST >= 2)
e075ae69 9935 {
97f51ac4 9936 if (cf == 0)
e075ae69 9937 {
97f51ac4
RB
9938 cf = ct;
9939 ct = 0;
734dba19 9940 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
0f2a3457
JJ
9941 /* We may be reversing unordered compare to normal compare,
9942 that is not valid in general (we may convert non-trapping
9943 condition to trapping one), however on i386 we currently
9944 emit all comparisons unordered. */
9945 code = reverse_condition_maybe_unordered (code);
9946 else
9947 {
9948 code = reverse_condition (code);
9949 if (compare_code != NIL)
9950 compare_code = reverse_condition (compare_code);
9951 }
9952 }
9953
9954 if (compare_code != NIL)
9955 {
9956 /* notl op1 (if needed)
9957 sarl $31, op1
9958 andl (cf-ct), op1
b96a374d 9959 addl ct, op1
0f2a3457
JJ
9960
9961 For x < 0 (resp. x <= -1) there will be no notl,
9962 so if possible swap the constants to get rid of the
9963 complement.
9964 True/false will be -1/0 while code below (store flag
9965 followed by decrement) is 0/-1, so the constants need
9966 to be exchanged once more. */
9967
9968 if (compare_code == GE || !cf)
734dba19 9969 {
b96a374d 9970 code = reverse_condition (code);
0f2a3457 9971 compare_code = LT;
734dba19
JH
9972 }
9973 else
9974 {
0f2a3457 9975 HOST_WIDE_INT tmp = cf;
b96a374d 9976 cf = ct;
0f2a3457 9977 ct = tmp;
734dba19 9978 }
0f2a3457
JJ
9979
9980 out = emit_store_flag (out, code, ix86_compare_op0,
9981 ix86_compare_op1, VOIDmode, 0, -1);
e075ae69 9982 }
0f2a3457
JJ
9983 else
9984 {
9985 out = emit_store_flag (out, code, ix86_compare_op0,
9986 ix86_compare_op1, VOIDmode, 0, 1);
e075ae69 9987
4977bab6
ZW
9988 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
9989 copy_rtx (out), 1, OPTAB_DIRECT);
0f2a3457 9990 }
e075ae69 9991
4977bab6 9992 out = expand_simple_binop (mode, AND, copy_rtx (out),
d8bf17f9 9993 gen_int_mode (cf - ct, mode),
4977bab6 9994 copy_rtx (out), 1, OPTAB_DIRECT);
97f51ac4 9995 if (ct)
4977bab6
ZW
9996 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
9997 copy_rtx (out), 1, OPTAB_DIRECT);
9998 if (!rtx_equal_p (out, operands[0]))
9999 emit_move_insn (operands[0], copy_rtx (out));
e075ae69
RH
10000
10001 return 1; /* DONE */
10002 }
10003 }
10004
4977bab6 10005 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
e075ae69
RH
10006 {
10007 /* Try a few things more with specific constants and a variable. */
10008
78a0d70c 10009 optab op;
e075ae69
RH
10010 rtx var, orig_out, out, tmp;
10011
4977bab6 10012 if (BRANCH_COST <= 2)
e075ae69
RH
10013 return 0; /* FAIL */
10014
0f290768 10015 /* If one of the two operands is an interesting constant, load a
e075ae69 10016 constant with the above and mask it in with a logical operation. */
0f290768 10017
e075ae69
RH
10018 if (GET_CODE (operands[2]) == CONST_INT)
10019 {
10020 var = operands[3];
4977bab6 10021 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
e075ae69 10022 operands[3] = constm1_rtx, op = and_optab;
4977bab6 10023 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
e075ae69 10024 operands[3] = const0_rtx, op = ior_optab;
78a0d70c
ZW
10025 else
10026 return 0; /* FAIL */
e075ae69
RH
10027 }
10028 else if (GET_CODE (operands[3]) == CONST_INT)
10029 {
10030 var = operands[2];
4977bab6 10031 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
e075ae69 10032 operands[2] = constm1_rtx, op = and_optab;
4977bab6 10033 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
e075ae69 10034 operands[2] = const0_rtx, op = ior_optab;
78a0d70c
ZW
10035 else
10036 return 0; /* FAIL */
e075ae69 10037 }
78a0d70c 10038 else
e075ae69
RH
10039 return 0; /* FAIL */
10040
10041 orig_out = operands[0];
635559ab 10042 tmp = gen_reg_rtx (mode);
e075ae69
RH
10043 operands[0] = tmp;
10044
10045 /* Recurse to get the constant loaded. */
10046 if (ix86_expand_int_movcc (operands) == 0)
10047 return 0; /* FAIL */
10048
10049 /* Mask in the interesting variable. */
635559ab 10050 out = expand_binop (mode, op, var, tmp, orig_out, 0,
e075ae69 10051 OPTAB_WIDEN);
4977bab6
ZW
10052 if (!rtx_equal_p (out, orig_out))
10053 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
e075ae69
RH
10054
10055 return 1; /* DONE */
10056 }
10057
10058 /*
10059 * For comparison with above,
10060 *
10061 * movl cf,dest
10062 * movl ct,tmp
10063 * cmpl op1,op2
10064 * cmovcc tmp,dest
10065 *
10066 * Size 15.
10067 */
10068
635559ab
JH
10069 if (! nonimmediate_operand (operands[2], mode))
10070 operands[2] = force_reg (mode, operands[2]);
10071 if (! nonimmediate_operand (operands[3], mode))
10072 operands[3] = force_reg (mode, operands[3]);
e075ae69 10073
a1b8572c
JH
10074 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10075 {
635559ab 10076 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
10077 emit_move_insn (tmp, operands[3]);
10078 operands[3] = tmp;
10079 }
10080 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10081 {
635559ab 10082 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
10083 emit_move_insn (tmp, operands[2]);
10084 operands[2] = tmp;
10085 }
4977bab6 10086
c9682caf 10087 if (! register_operand (operands[2], VOIDmode)
b96a374d 10088 && (mode == QImode
4977bab6 10089 || ! register_operand (operands[3], VOIDmode)))
635559ab 10090 operands[2] = force_reg (mode, operands[2]);
a1b8572c 10091
4977bab6
ZW
10092 if (mode == QImode
10093 && ! register_operand (operands[3], VOIDmode))
10094 operands[3] = force_reg (mode, operands[3]);
10095
e075ae69
RH
10096 emit_insn (compare_seq);
10097 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 10098 gen_rtx_IF_THEN_ELSE (mode,
e075ae69
RH
10099 compare_op, operands[2],
10100 operands[3])));
a1b8572c 10101 if (bypass_test)
4977bab6 10102 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
635559ab 10103 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c 10104 bypass_test,
4977bab6
ZW
10105 copy_rtx (operands[3]),
10106 copy_rtx (operands[0]))));
a1b8572c 10107 if (second_test)
4977bab6 10108 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
635559ab 10109 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c 10110 second_test,
4977bab6
ZW
10111 copy_rtx (operands[2]),
10112 copy_rtx (operands[0]))));
e075ae69
RH
10113
10114 return 1; /* DONE */
e9a25f70 10115}
e075ae69 10116
32b5b1aa 10117int
b96a374d 10118ix86_expand_fp_movcc (rtx operands[])
32b5b1aa 10119{
e075ae69 10120 enum rtx_code code;
e075ae69 10121 rtx tmp;
a1b8572c 10122 rtx compare_op, second_test, bypass_test;
32b5b1aa 10123
0073023d
JH
10124 /* For SF/DFmode conditional moves based on comparisons
10125 in same mode, we may want to use SSE min/max instructions. */
965f5423
JH
10126 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
10127 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
0073023d 10128 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
fa9f36a1
JH
10129 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10130 && (!TARGET_IEEE_FP
10131 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
0073023d
JH
10132 /* We may be called from the post-reload splitter. */
10133 && (!REG_P (operands[0])
10134 || SSE_REG_P (operands[0])
52a661a6 10135 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
0073023d
JH
10136 {
10137 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10138 code = GET_CODE (operands[1]);
10139
10140 /* See if we have (cross) match between comparison operands and
10141 conditional move operands. */
10142 if (rtx_equal_p (operands[2], op1))
10143 {
10144 rtx tmp = op0;
10145 op0 = op1;
10146 op1 = tmp;
10147 code = reverse_condition_maybe_unordered (code);
10148 }
10149 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10150 {
10151 /* Check for min operation. */
4977bab6 10152 if (code == LT || code == UNLE)
0073023d 10153 {
4977bab6
ZW
10154 if (code == UNLE)
10155 {
10156 rtx tmp = op0;
10157 op0 = op1;
10158 op1 = tmp;
10159 }
0073023d
JH
10160 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10161 if (memory_operand (op0, VOIDmode))
10162 op0 = force_reg (GET_MODE (operands[0]), op0);
10163 if (GET_MODE (operands[0]) == SFmode)
10164 emit_insn (gen_minsf3 (operands[0], op0, op1));
10165 else
10166 emit_insn (gen_mindf3 (operands[0], op0, op1));
10167 return 1;
10168 }
10169 /* Check for max operation. */
4977bab6 10170 if (code == GT || code == UNGE)
0073023d 10171 {
4977bab6
ZW
10172 if (code == UNGE)
10173 {
10174 rtx tmp = op0;
10175 op0 = op1;
10176 op1 = tmp;
10177 }
0073023d
JH
10178 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10179 if (memory_operand (op0, VOIDmode))
10180 op0 = force_reg (GET_MODE (operands[0]), op0);
10181 if (GET_MODE (operands[0]) == SFmode)
10182 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10183 else
10184 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10185 return 1;
10186 }
10187 }
10188 /* Manage condition to be sse_comparison_operator. In case we are
10189 in non-ieee mode, try to canonicalize the destination operand
10190 to be first in the comparison - this helps reload to avoid extra
10191 moves. */
10192 if (!sse_comparison_operator (operands[1], VOIDmode)
10193 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10194 {
10195 rtx tmp = ix86_compare_op0;
10196 ix86_compare_op0 = ix86_compare_op1;
10197 ix86_compare_op1 = tmp;
10198 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10199 VOIDmode, ix86_compare_op0,
10200 ix86_compare_op1);
10201 }
d1f87653 10202 /* Similarly try to manage result to be first operand of conditional
fa9f36a1
JH
10203 move. We also don't support the NE comparison on SSE, so try to
10204 avoid it. */
037f20f1
JH
10205 if ((rtx_equal_p (operands[0], operands[3])
10206 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10207 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
0073023d
JH
10208 {
10209 rtx tmp = operands[2];
10210 operands[2] = operands[3];
92d0fb09 10211 operands[3] = tmp;
0073023d
JH
10212 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10213 (GET_CODE (operands[1])),
10214 VOIDmode, ix86_compare_op0,
10215 ix86_compare_op1);
10216 }
10217 if (GET_MODE (operands[0]) == SFmode)
10218 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10219 operands[2], operands[3],
10220 ix86_compare_op0, ix86_compare_op1));
10221 else
10222 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10223 operands[2], operands[3],
10224 ix86_compare_op0, ix86_compare_op1));
10225 return 1;
10226 }
10227
e075ae69 10228 /* The floating point conditional move instructions don't directly
0f290768 10229 support conditions resulting from a signed integer comparison. */
32b5b1aa 10230
e075ae69 10231 code = GET_CODE (operands[1]);
a1b8572c 10232 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9e7adcb3
JH
10233
10234 /* The floating point conditional move instructions don't directly
10235 support signed integer comparisons. */
10236
a1b8572c 10237 if (!fcmov_comparison_operator (compare_op, VOIDmode))
e075ae69 10238 {
a1b8572c 10239 if (second_test != NULL || bypass_test != NULL)
b531087a 10240 abort ();
e075ae69 10241 tmp = gen_reg_rtx (QImode);
3a3677ff 10242 ix86_expand_setcc (code, tmp);
e075ae69
RH
10243 code = NE;
10244 ix86_compare_op0 = tmp;
10245 ix86_compare_op1 = const0_rtx;
a1b8572c
JH
10246 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10247 }
10248 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10249 {
10250 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10251 emit_move_insn (tmp, operands[3]);
10252 operands[3] = tmp;
10253 }
10254 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10255 {
10256 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10257 emit_move_insn (tmp, operands[2]);
10258 operands[2] = tmp;
e075ae69 10259 }
e9a25f70 10260
e075ae69
RH
10261 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10262 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9e7adcb3 10263 compare_op,
e075ae69
RH
10264 operands[2],
10265 operands[3])));
a1b8572c
JH
10266 if (bypass_test)
10267 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10268 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10269 bypass_test,
10270 operands[3],
10271 operands[0])));
10272 if (second_test)
10273 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10274 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10275 second_test,
10276 operands[2],
10277 operands[0])));
32b5b1aa 10278
e075ae69 10279 return 1;
32b5b1aa
SC
10280}
10281
7b52eede
JH
10282/* Expand conditional increment or decrement using adb/sbb instructions.
10283 The default case using setcc followed by the conditional move can be
10284 done by generic code. */
10285int
b96a374d 10286ix86_expand_int_addcc (rtx operands[])
7b52eede
JH
10287{
10288 enum rtx_code code = GET_CODE (operands[1]);
10289 rtx compare_op;
10290 rtx val = const0_rtx;
e6e81735 10291 bool fpcmp = false;
e6e81735 10292 enum machine_mode mode = GET_MODE (operands[0]);
7b52eede
JH
10293
10294 if (operands[3] != const1_rtx
10295 && operands[3] != constm1_rtx)
10296 return 0;
10297 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10298 ix86_compare_op1, &compare_op))
10299 return 0;
e6e81735
JH
10300 code = GET_CODE (compare_op);
10301
10302 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10303 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10304 {
10305 fpcmp = true;
10306 code = ix86_fp_compare_code_to_integer (code);
10307 }
10308
10309 if (code != LTU)
10310 {
10311 val = constm1_rtx;
10312 if (fpcmp)
10313 PUT_CODE (compare_op,
10314 reverse_condition_maybe_unordered
10315 (GET_CODE (compare_op)));
10316 else
10317 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10318 }
10319 PUT_MODE (compare_op, mode);
10320
10321 /* Construct either adc or sbb insn. */
10322 if ((code == LTU) == (operands[3] == constm1_rtx))
7b52eede
JH
10323 {
10324 switch (GET_MODE (operands[0]))
10325 {
10326 case QImode:
e6e81735 10327 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10328 break;
10329 case HImode:
e6e81735 10330 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10331 break;
10332 case SImode:
e6e81735 10333 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10334 break;
10335 case DImode:
e6e81735 10336 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
7b52eede
JH
10337 break;
10338 default:
10339 abort ();
10340 }
10341 }
10342 else
10343 {
10344 switch (GET_MODE (operands[0]))
10345 {
10346 case QImode:
e6e81735 10347 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10348 break;
10349 case HImode:
e6e81735 10350 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10351 break;
10352 case SImode:
e6e81735 10353 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10354 break;
10355 case DImode:
e6e81735 10356 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
7b52eede
JH
10357 break;
10358 default:
10359 abort ();
10360 }
10361 }
10362 return 1; /* DONE */
10363}
10364
10365
2450a057
JH
10366/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10367 works for floating pointer parameters and nonoffsetable memories.
10368 For pushes, it returns just stack offsets; the values will be saved
10369 in the right order. Maximally three parts are generated. */
10370
2b589241 10371static int
b96a374d 10372ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
32b5b1aa 10373{
26e5b205
JH
10374 int size;
10375
10376 if (!TARGET_64BIT)
f8a1ebc6 10377 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
26e5b205
JH
10378 else
10379 size = (GET_MODE_SIZE (mode) + 4) / 8;
2450a057 10380
a7180f70
BS
10381 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10382 abort ();
2450a057
JH
10383 if (size < 2 || size > 3)
10384 abort ();
10385
f996902d
RH
10386 /* Optimize constant pool reference to immediates. This is used by fp
10387 moves, that force all constants to memory to allow combining. */
10388 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10389 {
10390 rtx tmp = maybe_get_pool_constant (operand);
10391 if (tmp)
10392 operand = tmp;
10393 }
d7a29404 10394
2450a057 10395 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
e075ae69 10396 {
2450a057
JH
10397 /* The only non-offsetable memories we handle are pushes. */
10398 if (! push_operand (operand, VOIDmode))
10399 abort ();
10400
26e5b205
JH
10401 operand = copy_rtx (operand);
10402 PUT_MODE (operand, Pmode);
2450a057
JH
10403 parts[0] = parts[1] = parts[2] = operand;
10404 }
26e5b205 10405 else if (!TARGET_64BIT)
2450a057
JH
10406 {
10407 if (mode == DImode)
10408 split_di (&operand, 1, &parts[0], &parts[1]);
10409 else
e075ae69 10410 {
2450a057
JH
10411 if (REG_P (operand))
10412 {
10413 if (!reload_completed)
10414 abort ();
10415 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10416 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10417 if (size == 3)
10418 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10419 }
10420 else if (offsettable_memref_p (operand))
10421 {
f4ef873c 10422 operand = adjust_address (operand, SImode, 0);
2450a057 10423 parts[0] = operand;
b72f00af 10424 parts[1] = adjust_address (operand, SImode, 4);
2450a057 10425 if (size == 3)
b72f00af 10426 parts[2] = adjust_address (operand, SImode, 8);
2450a057
JH
10427 }
10428 else if (GET_CODE (operand) == CONST_DOUBLE)
10429 {
10430 REAL_VALUE_TYPE r;
2b589241 10431 long l[4];
2450a057
JH
10432
10433 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10434 switch (mode)
10435 {
10436 case XFmode:
10437 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
d8bf17f9 10438 parts[2] = gen_int_mode (l[2], SImode);
2450a057
JH
10439 break;
10440 case DFmode:
10441 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10442 break;
10443 default:
10444 abort ();
10445 }
d8bf17f9
LB
10446 parts[1] = gen_int_mode (l[1], SImode);
10447 parts[0] = gen_int_mode (l[0], SImode);
2450a057
JH
10448 }
10449 else
10450 abort ();
e075ae69 10451 }
2450a057 10452 }
26e5b205
JH
10453 else
10454 {
44cf5b6a
JH
10455 if (mode == TImode)
10456 split_ti (&operand, 1, &parts[0], &parts[1]);
26e5b205
JH
10457 if (mode == XFmode || mode == TFmode)
10458 {
f8a1ebc6 10459 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
26e5b205
JH
10460 if (REG_P (operand))
10461 {
10462 if (!reload_completed)
10463 abort ();
10464 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
f8a1ebc6 10465 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
26e5b205
JH
10466 }
10467 else if (offsettable_memref_p (operand))
10468 {
b72f00af 10469 operand = adjust_address (operand, DImode, 0);
26e5b205 10470 parts[0] = operand;
f8a1ebc6 10471 parts[1] = adjust_address (operand, upper_mode, 8);
26e5b205
JH
10472 }
10473 else if (GET_CODE (operand) == CONST_DOUBLE)
10474 {
10475 REAL_VALUE_TYPE r;
10476 long l[3];
10477
10478 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10479 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10480 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10481 if (HOST_BITS_PER_WIDE_INT >= 64)
523fbd9d 10482 parts[0]
d8bf17f9 10483 = gen_int_mode
44cf5b6a 10484 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
b531087a 10485 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
d8bf17f9 10486 DImode);
26e5b205
JH
10487 else
10488 parts[0] = immed_double_const (l[0], l[1], DImode);
f8a1ebc6
JH
10489 if (upper_mode == SImode)
10490 parts[1] = gen_int_mode (l[2], SImode);
10491 else if (HOST_BITS_PER_WIDE_INT >= 64)
10492 parts[1]
10493 = gen_int_mode
10494 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10495 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10496 DImode);
10497 else
10498 parts[1] = immed_double_const (l[2], l[3], DImode);
26e5b205
JH
10499 }
10500 else
10501 abort ();
10502 }
10503 }
2450a057 10504
2b589241 10505 return size;
2450a057
JH
10506}
10507
10508/* Emit insns to perform a move or push of DI, DF, and XF values.
10509 Return false when normal moves are needed; true when all required
10510 insns have been emitted. Operands 2-4 contain the input values
10511 int the correct order; operands 5-7 contain the output values. */
10512
26e5b205 10513void
b96a374d 10514ix86_split_long_move (rtx operands[])
2450a057
JH
10515{
10516 rtx part[2][3];
26e5b205 10517 int nparts;
2450a057
JH
10518 int push = 0;
10519 int collisions = 0;
26e5b205
JH
10520 enum machine_mode mode = GET_MODE (operands[0]);
10521
10522 /* The DFmode expanders may ask us to move double.
10523 For 64bit target this is single move. By hiding the fact
10524 here we simplify i386.md splitters. */
10525 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10526 {
8cdfa312
RH
10527 /* Optimize constant pool reference to immediates. This is used by
10528 fp moves, that force all constants to memory to allow combining. */
26e5b205
JH
10529
10530 if (GET_CODE (operands[1]) == MEM
10531 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10532 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10533 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10534 if (push_operand (operands[0], VOIDmode))
b47b4f21
JH
10535 {
10536 operands[0] = copy_rtx (operands[0]);
10537 PUT_MODE (operands[0], Pmode);
10538 }
26e5b205
JH
10539 else
10540 operands[0] = gen_lowpart (DImode, operands[0]);
10541 operands[1] = gen_lowpart (DImode, operands[1]);
10542 emit_move_insn (operands[0], operands[1]);
10543 return;
10544 }
2450a057 10545
2450a057
JH
10546 /* The only non-offsettable memory we handle is push. */
10547 if (push_operand (operands[0], VOIDmode))
10548 push = 1;
10549 else if (GET_CODE (operands[0]) == MEM
10550 && ! offsettable_memref_p (operands[0]))
10551 abort ();
10552
26e5b205
JH
10553 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10554 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
2450a057
JH
10555
10556 /* When emitting push, take care for source operands on the stack. */
10557 if (push && GET_CODE (operands[1]) == MEM
10558 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10559 {
26e5b205 10560 if (nparts == 3)
886cbb88
JH
10561 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10562 XEXP (part[1][2], 0));
10563 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10564 XEXP (part[1][1], 0));
2450a057
JH
10565 }
10566
0f290768 10567 /* We need to do copy in the right order in case an address register
2450a057
JH
10568 of the source overlaps the destination. */
10569 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10570 {
10571 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10572 collisions++;
10573 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10574 collisions++;
26e5b205 10575 if (nparts == 3
2450a057
JH
10576 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10577 collisions++;
10578
10579 /* Collision in the middle part can be handled by reordering. */
26e5b205 10580 if (collisions == 1 && nparts == 3
2450a057 10581 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
e075ae69 10582 {
2450a057
JH
10583 rtx tmp;
10584 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10585 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10586 }
e075ae69 10587
2450a057
JH
10588 /* If there are more collisions, we can't handle it by reordering.
10589 Do an lea to the last part and use only one colliding move. */
10590 else if (collisions > 1)
10591 {
8231b3f9
RH
10592 rtx base;
10593
2450a057 10594 collisions = 1;
8231b3f9
RH
10595
10596 base = part[0][nparts - 1];
10597
10598 /* Handle the case when the last part isn't valid for lea.
10599 Happens in 64-bit mode storing the 12-byte XFmode. */
10600 if (GET_MODE (base) != Pmode)
10601 base = gen_rtx_REG (Pmode, REGNO (base));
10602
10603 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10604 part[1][0] = replace_equiv_address (part[1][0], base);
10605 part[1][1] = replace_equiv_address (part[1][1],
10606 plus_constant (base, UNITS_PER_WORD));
26e5b205 10607 if (nparts == 3)
8231b3f9
RH
10608 part[1][2] = replace_equiv_address (part[1][2],
10609 plus_constant (base, 8));
2450a057
JH
10610 }
10611 }
10612
10613 if (push)
10614 {
26e5b205 10615 if (!TARGET_64BIT)
2b589241 10616 {
26e5b205
JH
10617 if (nparts == 3)
10618 {
f8a1ebc6
JH
10619 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10620 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
26e5b205
JH
10621 emit_move_insn (part[0][2], part[1][2]);
10622 }
2b589241 10623 }
26e5b205
JH
10624 else
10625 {
10626 /* In 64bit mode we don't have 32bit push available. In case this is
10627 register, it is OK - we will just use larger counterpart. We also
10628 retype memory - these comes from attempt to avoid REX prefix on
10629 moving of second half of TFmode value. */
10630 if (GET_MODE (part[1][1]) == SImode)
10631 {
10632 if (GET_CODE (part[1][1]) == MEM)
f4ef873c 10633 part[1][1] = adjust_address (part[1][1], DImode, 0);
26e5b205
JH
10634 else if (REG_P (part[1][1]))
10635 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10636 else
b531087a 10637 abort ();
886cbb88
JH
10638 if (GET_MODE (part[1][0]) == SImode)
10639 part[1][0] = part[1][1];
26e5b205
JH
10640 }
10641 }
10642 emit_move_insn (part[0][1], part[1][1]);
10643 emit_move_insn (part[0][0], part[1][0]);
10644 return;
2450a057
JH
10645 }
10646
10647 /* Choose correct order to not overwrite the source before it is copied. */
10648 if ((REG_P (part[0][0])
10649 && REG_P (part[1][1])
10650 && (REGNO (part[0][0]) == REGNO (part[1][1])
26e5b205 10651 || (nparts == 3
2450a057
JH
10652 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10653 || (collisions > 0
10654 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10655 {
26e5b205 10656 if (nparts == 3)
2450a057 10657 {
26e5b205
JH
10658 operands[2] = part[0][2];
10659 operands[3] = part[0][1];
10660 operands[4] = part[0][0];
10661 operands[5] = part[1][2];
10662 operands[6] = part[1][1];
10663 operands[7] = part[1][0];
2450a057
JH
10664 }
10665 else
10666 {
26e5b205
JH
10667 operands[2] = part[0][1];
10668 operands[3] = part[0][0];
10669 operands[5] = part[1][1];
10670 operands[6] = part[1][0];
2450a057
JH
10671 }
10672 }
10673 else
10674 {
26e5b205 10675 if (nparts == 3)
2450a057 10676 {
26e5b205
JH
10677 operands[2] = part[0][0];
10678 operands[3] = part[0][1];
10679 operands[4] = part[0][2];
10680 operands[5] = part[1][0];
10681 operands[6] = part[1][1];
10682 operands[7] = part[1][2];
2450a057
JH
10683 }
10684 else
10685 {
26e5b205
JH
10686 operands[2] = part[0][0];
10687 operands[3] = part[0][1];
10688 operands[5] = part[1][0];
10689 operands[6] = part[1][1];
e075ae69
RH
10690 }
10691 }
26e5b205
JH
10692 emit_move_insn (operands[2], operands[5]);
10693 emit_move_insn (operands[3], operands[6]);
10694 if (nparts == 3)
10695 emit_move_insn (operands[4], operands[7]);
32b5b1aa 10696
26e5b205 10697 return;
32b5b1aa 10698}
32b5b1aa 10699
e075ae69 10700void
b96a374d 10701ix86_split_ashldi (rtx *operands, rtx scratch)
32b5b1aa 10702{
e075ae69
RH
10703 rtx low[2], high[2];
10704 int count;
b985a30f 10705
e075ae69
RH
10706 if (GET_CODE (operands[2]) == CONST_INT)
10707 {
10708 split_di (operands, 2, low, high);
10709 count = INTVAL (operands[2]) & 63;
32b5b1aa 10710
e075ae69
RH
10711 if (count >= 32)
10712 {
10713 emit_move_insn (high[0], low[1]);
10714 emit_move_insn (low[0], const0_rtx);
b985a30f 10715
e075ae69
RH
10716 if (count > 32)
10717 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10718 }
10719 else
10720 {
10721 if (!rtx_equal_p (operands[0], operands[1]))
10722 emit_move_insn (operands[0], operands[1]);
10723 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10724 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10725 }
10726 }
10727 else
10728 {
10729 if (!rtx_equal_p (operands[0], operands[1]))
10730 emit_move_insn (operands[0], operands[1]);
b985a30f 10731
e075ae69 10732 split_di (operands, 1, low, high);
b985a30f 10733
e075ae69
RH
10734 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10735 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
32b5b1aa 10736
fe577e58 10737 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 10738 {
fe577e58 10739 if (! no_new_pseudos)
e075ae69
RH
10740 scratch = force_reg (SImode, const0_rtx);
10741 else
10742 emit_move_insn (scratch, const0_rtx);
10743
10744 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10745 scratch));
10746 }
10747 else
10748 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10749 }
e9a25f70 10750}
32b5b1aa 10751
e075ae69 10752void
b96a374d 10753ix86_split_ashrdi (rtx *operands, rtx scratch)
32b5b1aa 10754{
e075ae69
RH
10755 rtx low[2], high[2];
10756 int count;
32b5b1aa 10757
e075ae69
RH
10758 if (GET_CODE (operands[2]) == CONST_INT)
10759 {
10760 split_di (operands, 2, low, high);
10761 count = INTVAL (operands[2]) & 63;
32b5b1aa 10762
e075ae69
RH
10763 if (count >= 32)
10764 {
10765 emit_move_insn (low[0], high[1]);
32b5b1aa 10766
e075ae69
RH
10767 if (! reload_completed)
10768 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10769 else
10770 {
10771 emit_move_insn (high[0], low[0]);
10772 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10773 }
10774
10775 if (count > 32)
10776 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10777 }
10778 else
10779 {
10780 if (!rtx_equal_p (operands[0], operands[1]))
10781 emit_move_insn (operands[0], operands[1]);
10782 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10783 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10784 }
10785 }
10786 else
32b5b1aa 10787 {
e075ae69
RH
10788 if (!rtx_equal_p (operands[0], operands[1]))
10789 emit_move_insn (operands[0], operands[1]);
10790
10791 split_di (operands, 1, low, high);
10792
10793 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10794 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10795
fe577e58 10796 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 10797 {
fe577e58 10798 if (! no_new_pseudos)
e075ae69
RH
10799 scratch = gen_reg_rtx (SImode);
10800 emit_move_insn (scratch, high[0]);
10801 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10802 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10803 scratch));
10804 }
10805 else
10806 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
32b5b1aa 10807 }
e075ae69 10808}
32b5b1aa 10809
e075ae69 10810void
b96a374d 10811ix86_split_lshrdi (rtx *operands, rtx scratch)
e075ae69
RH
10812{
10813 rtx low[2], high[2];
10814 int count;
32b5b1aa 10815
e075ae69 10816 if (GET_CODE (operands[2]) == CONST_INT)
32b5b1aa 10817 {
e075ae69
RH
10818 split_di (operands, 2, low, high);
10819 count = INTVAL (operands[2]) & 63;
10820
10821 if (count >= 32)
c7271385 10822 {
e075ae69
RH
10823 emit_move_insn (low[0], high[1]);
10824 emit_move_insn (high[0], const0_rtx);
32b5b1aa 10825
e075ae69
RH
10826 if (count > 32)
10827 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10828 }
10829 else
10830 {
10831 if (!rtx_equal_p (operands[0], operands[1]))
10832 emit_move_insn (operands[0], operands[1]);
10833 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10834 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10835 }
32b5b1aa 10836 }
e075ae69
RH
10837 else
10838 {
10839 if (!rtx_equal_p (operands[0], operands[1]))
10840 emit_move_insn (operands[0], operands[1]);
32b5b1aa 10841
e075ae69
RH
10842 split_di (operands, 1, low, high);
10843
10844 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10845 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10846
10847 /* Heh. By reversing the arguments, we can reuse this pattern. */
fe577e58 10848 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 10849 {
fe577e58 10850 if (! no_new_pseudos)
e075ae69
RH
10851 scratch = force_reg (SImode, const0_rtx);
10852 else
10853 emit_move_insn (scratch, const0_rtx);
10854
10855 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10856 scratch));
10857 }
10858 else
10859 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10860 }
32b5b1aa 10861}
3f803cd9 10862
0407c02b 10863/* Helper function for the string operations below. Dest VARIABLE whether
0945b39d
JH
10864 it is aligned to VALUE bytes. If true, jump to the label. */
10865static rtx
b96a374d 10866ix86_expand_aligntest (rtx variable, int value)
0945b39d
JH
10867{
10868 rtx label = gen_label_rtx ();
10869 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10870 if (GET_MODE (variable) == DImode)
10871 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10872 else
10873 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10874 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
d43e0b7d 10875 1, label);
0945b39d
JH
10876 return label;
10877}
10878
10879/* Adjust COUNTER by the VALUE. */
10880static void
b96a374d 10881ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
0945b39d
JH
10882{
10883 if (GET_MODE (countreg) == DImode)
10884 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10885 else
10886 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10887}
10888
10889/* Zero extend possibly SImode EXP to Pmode register. */
d24b3457 10890rtx
b96a374d 10891ix86_zero_extend_to_Pmode (rtx exp)
0945b39d
JH
10892{
10893 rtx r;
10894 if (GET_MODE (exp) == VOIDmode)
10895 return force_reg (Pmode, exp);
10896 if (GET_MODE (exp) == Pmode)
10897 return copy_to_mode_reg (Pmode, exp);
10898 r = gen_reg_rtx (Pmode);
10899 emit_insn (gen_zero_extendsidi2 (r, exp));
10900 return r;
10901}
10902
10903/* Expand string move (memcpy) operation. Use i386 string operations when
10904 profitable. expand_clrstr contains similar code. */
10905int
b96a374d 10906ix86_expand_movstr (rtx dst, rtx src, rtx count_exp, rtx align_exp)
0945b39d
JH
10907{
10908 rtx srcreg, destreg, countreg;
10909 enum machine_mode counter_mode;
10910 HOST_WIDE_INT align = 0;
10911 unsigned HOST_WIDE_INT count = 0;
10912 rtx insns;
10913
0945b39d
JH
10914 if (GET_CODE (align_exp) == CONST_INT)
10915 align = INTVAL (align_exp);
10916
d0a5295a
RH
10917 /* Can't use any of this if the user has appropriated esi or edi. */
10918 if (global_regs[4] || global_regs[5])
10919 return 0;
10920
5519a4f9 10921 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
10922 if (!TARGET_ALIGN_STRINGOPS)
10923 align = 64;
10924
10925 if (GET_CODE (count_exp) == CONST_INT)
26771da7
JH
10926 {
10927 count = INTVAL (count_exp);
10928 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10929 return 0;
10930 }
0945b39d
JH
10931
10932 /* Figure out proper mode for counter. For 32bits it is always SImode,
10933 for 64bits use SImode when possible, otherwise DImode.
10934 Set count to number of bytes copied when known at compile time. */
10935 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10936 || x86_64_zero_extended_value (count_exp))
10937 counter_mode = SImode;
10938 else
10939 counter_mode = DImode;
10940
26771da7
JH
10941 start_sequence ();
10942
0945b39d
JH
10943 if (counter_mode != SImode && counter_mode != DImode)
10944 abort ();
10945
10946 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10947 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10948
10949 emit_insn (gen_cld ());
10950
10951 /* When optimizing for size emit simple rep ; movsb instruction for
10952 counts not divisible by 4. */
10953
10954 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10955 {
10956 countreg = ix86_zero_extend_to_Pmode (count_exp);
10957 if (TARGET_64BIT)
10958 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
10959 destreg, srcreg, countreg));
10960 else
10961 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
10962 destreg, srcreg, countreg));
10963 }
10964
10965 /* For constant aligned (or small unaligned) copies use rep movsl
10966 followed by code copying the rest. For PentiumPro ensure 8 byte
10967 alignment to allow rep movsl acceleration. */
10968
10969 else if (count != 0
10970 && (align >= 8
10971 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 10972 || optimize_size || count < (unsigned int) 64))
0945b39d
JH
10973 {
10974 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10975 if (count & ~(size - 1))
10976 {
10977 countreg = copy_to_mode_reg (counter_mode,
10978 GEN_INT ((count >> (size == 4 ? 2 : 3))
10979 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10980 countreg = ix86_zero_extend_to_Pmode (countreg);
10981 if (size == 4)
10982 {
10983 if (TARGET_64BIT)
10984 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
10985 destreg, srcreg, countreg));
10986 else
10987 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
10988 destreg, srcreg, countreg));
10989 }
10990 else
10991 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
10992 destreg, srcreg, countreg));
10993 }
10994 if (size == 8 && (count & 0x04))
10995 emit_insn (gen_strmovsi (destreg, srcreg));
10996 if (count & 0x02)
10997 emit_insn (gen_strmovhi (destreg, srcreg));
10998 if (count & 0x01)
10999 emit_insn (gen_strmovqi (destreg, srcreg));
11000 }
11001 /* The generic code based on the glibc implementation:
11002 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11003 allowing accelerated copying there)
11004 - copy the data using rep movsl
11005 - copy the rest. */
11006 else
11007 {
11008 rtx countreg2;
11009 rtx label = NULL;
37ad04a5
JH
11010 int desired_alignment = (TARGET_PENTIUMPRO
11011 && (count == 0 || count >= (unsigned int) 260)
11012 ? 8 : UNITS_PER_WORD);
0945b39d
JH
11013
11014 /* In case we don't know anything about the alignment, default to
11015 library version, since it is usually equally fast and result in
b96a374d 11016 shorter code.
4977bab6
ZW
11017
11018 Also emit call when we know that the count is large and call overhead
11019 will not be important. */
11020 if (!TARGET_INLINE_ALL_STRINGOPS
11021 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
0945b39d
JH
11022 {
11023 end_sequence ();
11024 return 0;
11025 }
11026
11027 if (TARGET_SINGLE_STRINGOP)
11028 emit_insn (gen_cld ());
11029
11030 countreg2 = gen_reg_rtx (Pmode);
11031 countreg = copy_to_mode_reg (counter_mode, count_exp);
11032
11033 /* We don't use loops to align destination and to copy parts smaller
11034 than 4 bytes, because gcc is able to optimize such code better (in
11035 the case the destination or the count really is aligned, gcc is often
11036 able to predict the branches) and also it is friendlier to the
a4f31c00 11037 hardware branch prediction.
0945b39d 11038
d1f87653 11039 Using loops is beneficial for generic case, because we can
0945b39d
JH
11040 handle small counts using the loops. Many CPUs (such as Athlon)
11041 have large REP prefix setup costs.
11042
4aae8a9a 11043 This is quite costly. Maybe we can revisit this decision later or
0945b39d
JH
11044 add some customizability to this code. */
11045
37ad04a5 11046 if (count == 0 && align < desired_alignment)
0945b39d
JH
11047 {
11048 label = gen_label_rtx ();
aaae0bb9 11049 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
d43e0b7d 11050 LEU, 0, counter_mode, 1, label);
0945b39d
JH
11051 }
11052 if (align <= 1)
11053 {
11054 rtx label = ix86_expand_aligntest (destreg, 1);
11055 emit_insn (gen_strmovqi (destreg, srcreg));
11056 ix86_adjust_counter (countreg, 1);
11057 emit_label (label);
11058 LABEL_NUSES (label) = 1;
11059 }
11060 if (align <= 2)
11061 {
11062 rtx label = ix86_expand_aligntest (destreg, 2);
11063 emit_insn (gen_strmovhi (destreg, srcreg));
11064 ix86_adjust_counter (countreg, 2);
11065 emit_label (label);
11066 LABEL_NUSES (label) = 1;
11067 }
37ad04a5 11068 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
11069 {
11070 rtx label = ix86_expand_aligntest (destreg, 4);
11071 emit_insn (gen_strmovsi (destreg, srcreg));
11072 ix86_adjust_counter (countreg, 4);
11073 emit_label (label);
11074 LABEL_NUSES (label) = 1;
11075 }
11076
37ad04a5
JH
11077 if (label && desired_alignment > 4 && !TARGET_64BIT)
11078 {
11079 emit_label (label);
11080 LABEL_NUSES (label) = 1;
11081 label = NULL_RTX;
11082 }
0945b39d
JH
11083 if (!TARGET_SINGLE_STRINGOP)
11084 emit_insn (gen_cld ());
11085 if (TARGET_64BIT)
11086 {
11087 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11088 GEN_INT (3)));
11089 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
11090 destreg, srcreg, countreg2));
11091 }
11092 else
11093 {
11094 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
11095 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
11096 destreg, srcreg, countreg2));
11097 }
11098
11099 if (label)
11100 {
11101 emit_label (label);
11102 LABEL_NUSES (label) = 1;
11103 }
11104 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11105 emit_insn (gen_strmovsi (destreg, srcreg));
11106 if ((align <= 4 || count == 0) && TARGET_64BIT)
11107 {
11108 rtx label = ix86_expand_aligntest (countreg, 4);
11109 emit_insn (gen_strmovsi (destreg, srcreg));
11110 emit_label (label);
11111 LABEL_NUSES (label) = 1;
11112 }
11113 if (align > 2 && count != 0 && (count & 2))
11114 emit_insn (gen_strmovhi (destreg, srcreg));
11115 if (align <= 2 || count == 0)
11116 {
11117 rtx label = ix86_expand_aligntest (countreg, 2);
11118 emit_insn (gen_strmovhi (destreg, srcreg));
11119 emit_label (label);
11120 LABEL_NUSES (label) = 1;
11121 }
11122 if (align > 1 && count != 0 && (count & 1))
11123 emit_insn (gen_strmovqi (destreg, srcreg));
11124 if (align <= 1 || count == 0)
11125 {
11126 rtx label = ix86_expand_aligntest (countreg, 1);
11127 emit_insn (gen_strmovqi (destreg, srcreg));
11128 emit_label (label);
11129 LABEL_NUSES (label) = 1;
11130 }
11131 }
11132
11133 insns = get_insns ();
11134 end_sequence ();
11135
11136 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
2f937369 11137 emit_insn (insns);
0945b39d
JH
11138 return 1;
11139}
11140
11141/* Expand string clear operation (bzero). Use i386 string operations when
11142 profitable. expand_movstr contains similar code. */
11143int
b96a374d 11144ix86_expand_clrstr (rtx src, rtx count_exp, rtx align_exp)
0945b39d
JH
11145{
11146 rtx destreg, zeroreg, countreg;
11147 enum machine_mode counter_mode;
11148 HOST_WIDE_INT align = 0;
11149 unsigned HOST_WIDE_INT count = 0;
11150
11151 if (GET_CODE (align_exp) == CONST_INT)
11152 align = INTVAL (align_exp);
11153
d0a5295a
RH
11154 /* Can't use any of this if the user has appropriated esi. */
11155 if (global_regs[4])
11156 return 0;
11157
5519a4f9 11158 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
11159 if (!TARGET_ALIGN_STRINGOPS)
11160 align = 32;
11161
11162 if (GET_CODE (count_exp) == CONST_INT)
26771da7
JH
11163 {
11164 count = INTVAL (count_exp);
11165 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11166 return 0;
11167 }
0945b39d
JH
11168 /* Figure out proper mode for counter. For 32bits it is always SImode,
11169 for 64bits use SImode when possible, otherwise DImode.
11170 Set count to number of bytes copied when known at compile time. */
11171 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11172 || x86_64_zero_extended_value (count_exp))
11173 counter_mode = SImode;
11174 else
11175 counter_mode = DImode;
11176
11177 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11178
11179 emit_insn (gen_cld ());
11180
11181 /* When optimizing for size emit simple rep ; movsb instruction for
11182 counts not divisible by 4. */
11183
11184 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11185 {
11186 countreg = ix86_zero_extend_to_Pmode (count_exp);
11187 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11188 if (TARGET_64BIT)
11189 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
11190 destreg, countreg));
11191 else
11192 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
11193 destreg, countreg));
11194 }
11195 else if (count != 0
11196 && (align >= 8
11197 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 11198 || optimize_size || count < (unsigned int) 64))
0945b39d
JH
11199 {
11200 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11201 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11202 if (count & ~(size - 1))
11203 {
11204 countreg = copy_to_mode_reg (counter_mode,
11205 GEN_INT ((count >> (size == 4 ? 2 : 3))
11206 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11207 countreg = ix86_zero_extend_to_Pmode (countreg);
11208 if (size == 4)
11209 {
11210 if (TARGET_64BIT)
11211 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
11212 destreg, countreg));
11213 else
11214 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
11215 destreg, countreg));
11216 }
11217 else
11218 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
11219 destreg, countreg));
11220 }
11221 if (size == 8 && (count & 0x04))
11222 emit_insn (gen_strsetsi (destreg,
11223 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11224 if (count & 0x02)
11225 emit_insn (gen_strsethi (destreg,
11226 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11227 if (count & 0x01)
11228 emit_insn (gen_strsetqi (destreg,
11229 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11230 }
11231 else
11232 {
11233 rtx countreg2;
11234 rtx label = NULL;
37ad04a5
JH
11235 /* Compute desired alignment of the string operation. */
11236 int desired_alignment = (TARGET_PENTIUMPRO
11237 && (count == 0 || count >= (unsigned int) 260)
11238 ? 8 : UNITS_PER_WORD);
0945b39d
JH
11239
11240 /* In case we don't know anything about the alignment, default to
11241 library version, since it is usually equally fast and result in
4977bab6
ZW
11242 shorter code.
11243
11244 Also emit call when we know that the count is large and call overhead
11245 will not be important. */
11246 if (!TARGET_INLINE_ALL_STRINGOPS
11247 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
0945b39d
JH
11248 return 0;
11249
11250 if (TARGET_SINGLE_STRINGOP)
11251 emit_insn (gen_cld ());
11252
11253 countreg2 = gen_reg_rtx (Pmode);
11254 countreg = copy_to_mode_reg (counter_mode, count_exp);
11255 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11256
37ad04a5 11257 if (count == 0 && align < desired_alignment)
0945b39d
JH
11258 {
11259 label = gen_label_rtx ();
37ad04a5 11260 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
d43e0b7d 11261 LEU, 0, counter_mode, 1, label);
0945b39d
JH
11262 }
11263 if (align <= 1)
11264 {
11265 rtx label = ix86_expand_aligntest (destreg, 1);
11266 emit_insn (gen_strsetqi (destreg,
11267 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11268 ix86_adjust_counter (countreg, 1);
11269 emit_label (label);
11270 LABEL_NUSES (label) = 1;
11271 }
11272 if (align <= 2)
11273 {
11274 rtx label = ix86_expand_aligntest (destreg, 2);
11275 emit_insn (gen_strsethi (destreg,
11276 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11277 ix86_adjust_counter (countreg, 2);
11278 emit_label (label);
11279 LABEL_NUSES (label) = 1;
11280 }
37ad04a5 11281 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
11282 {
11283 rtx label = ix86_expand_aligntest (destreg, 4);
11284 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
11285 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11286 : zeroreg)));
11287 ix86_adjust_counter (countreg, 4);
11288 emit_label (label);
11289 LABEL_NUSES (label) = 1;
11290 }
11291
37ad04a5
JH
11292 if (label && desired_alignment > 4 && !TARGET_64BIT)
11293 {
11294 emit_label (label);
11295 LABEL_NUSES (label) = 1;
11296 label = NULL_RTX;
11297 }
11298
0945b39d
JH
11299 if (!TARGET_SINGLE_STRINGOP)
11300 emit_insn (gen_cld ());
11301 if (TARGET_64BIT)
11302 {
11303 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11304 GEN_INT (3)));
11305 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
11306 destreg, countreg2));
11307 }
11308 else
11309 {
11310 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
11311 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
11312 destreg, countreg2));
11313 }
0945b39d
JH
11314 if (label)
11315 {
11316 emit_label (label);
11317 LABEL_NUSES (label) = 1;
11318 }
37ad04a5 11319
0945b39d
JH
11320 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11321 emit_insn (gen_strsetsi (destreg,
11322 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11323 if (TARGET_64BIT && (align <= 4 || count == 0))
11324 {
79258dce 11325 rtx label = ix86_expand_aligntest (countreg, 4);
0945b39d
JH
11326 emit_insn (gen_strsetsi (destreg,
11327 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11328 emit_label (label);
11329 LABEL_NUSES (label) = 1;
11330 }
11331 if (align > 2 && count != 0 && (count & 2))
11332 emit_insn (gen_strsethi (destreg,
11333 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11334 if (align <= 2 || count == 0)
11335 {
74411039 11336 rtx label = ix86_expand_aligntest (countreg, 2);
0945b39d
JH
11337 emit_insn (gen_strsethi (destreg,
11338 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11339 emit_label (label);
11340 LABEL_NUSES (label) = 1;
11341 }
11342 if (align > 1 && count != 0 && (count & 1))
11343 emit_insn (gen_strsetqi (destreg,
11344 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11345 if (align <= 1 || count == 0)
11346 {
74411039 11347 rtx label = ix86_expand_aligntest (countreg, 1);
0945b39d
JH
11348 emit_insn (gen_strsetqi (destreg,
11349 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11350 emit_label (label);
11351 LABEL_NUSES (label) = 1;
11352 }
11353 }
11354 return 1;
11355}
11356/* Expand strlen. */
11357int
b96a374d 11358ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
0945b39d
JH
11359{
11360 rtx addr, scratch1, scratch2, scratch3, scratch4;
11361
11362 /* The generic case of strlen expander is long. Avoid it's
11363 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11364
11365 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11366 && !TARGET_INLINE_ALL_STRINGOPS
11367 && !optimize_size
11368 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11369 return 0;
11370
11371 addr = force_reg (Pmode, XEXP (src, 0));
11372 scratch1 = gen_reg_rtx (Pmode);
11373
11374 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11375 && !optimize_size)
11376 {
11377 /* Well it seems that some optimizer does not combine a call like
11378 foo(strlen(bar), strlen(bar));
11379 when the move and the subtraction is done here. It does calculate
11380 the length just once when these instructions are done inside of
11381 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11382 often used and I use one fewer register for the lifetime of
11383 output_strlen_unroll() this is better. */
11384
11385 emit_move_insn (out, addr);
11386
11387 ix86_expand_strlensi_unroll_1 (out, align);
11388
11389 /* strlensi_unroll_1 returns the address of the zero at the end of
11390 the string, like memchr(), so compute the length by subtracting
11391 the start address. */
11392 if (TARGET_64BIT)
11393 emit_insn (gen_subdi3 (out, out, addr));
11394 else
11395 emit_insn (gen_subsi3 (out, out, addr));
11396 }
11397 else
11398 {
11399 scratch2 = gen_reg_rtx (Pmode);
11400 scratch3 = gen_reg_rtx (Pmode);
11401 scratch4 = force_reg (Pmode, constm1_rtx);
11402
11403 emit_move_insn (scratch3, addr);
11404 eoschar = force_reg (QImode, eoschar);
11405
11406 emit_insn (gen_cld ());
11407 if (TARGET_64BIT)
11408 {
11409 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
11410 align, scratch4, scratch3));
11411 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11412 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11413 }
11414 else
11415 {
11416 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
11417 align, scratch4, scratch3));
11418 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11419 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11420 }
11421 }
11422 return 1;
11423}
11424
e075ae69
RH
11425/* Expand the appropriate insns for doing strlen if not just doing
11426 repnz; scasb
11427
11428 out = result, initialized with the start address
11429 align_rtx = alignment of the address.
11430 scratch = scratch register, initialized with the startaddress when
77ebd435 11431 not aligned, otherwise undefined
3f803cd9 11432
39e3f58c 11433 This is just the body. It needs the initializations mentioned above and
3f803cd9
SC
11434 some address computing at the end. These things are done in i386.md. */
11435
0945b39d 11436static void
b96a374d 11437ix86_expand_strlensi_unroll_1 (rtx out, rtx align_rtx)
3f803cd9 11438{
e075ae69
RH
11439 int align;
11440 rtx tmp;
11441 rtx align_2_label = NULL_RTX;
11442 rtx align_3_label = NULL_RTX;
11443 rtx align_4_label = gen_label_rtx ();
11444 rtx end_0_label = gen_label_rtx ();
e075ae69 11445 rtx mem;
e2e52e1b 11446 rtx tmpreg = gen_reg_rtx (SImode);
0945b39d 11447 rtx scratch = gen_reg_rtx (SImode);
e6e81735 11448 rtx cmp;
e075ae69
RH
11449
11450 align = 0;
11451 if (GET_CODE (align_rtx) == CONST_INT)
11452 align = INTVAL (align_rtx);
3f803cd9 11453
e9a25f70 11454 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
3f803cd9 11455
e9a25f70 11456 /* Is there a known alignment and is it less than 4? */
e075ae69 11457 if (align < 4)
3f803cd9 11458 {
0945b39d
JH
11459 rtx scratch1 = gen_reg_rtx (Pmode);
11460 emit_move_insn (scratch1, out);
e9a25f70 11461 /* Is there a known alignment and is it not 2? */
e075ae69 11462 if (align != 2)
3f803cd9 11463 {
e075ae69
RH
11464 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11465 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11466
11467 /* Leave just the 3 lower bits. */
0945b39d 11468 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
e075ae69
RH
11469 NULL_RTX, 0, OPTAB_WIDEN);
11470
9076b9c1 11471 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 11472 Pmode, 1, align_4_label);
9076b9c1 11473 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
d43e0b7d 11474 Pmode, 1, align_2_label);
9076b9c1 11475 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
d43e0b7d 11476 Pmode, 1, align_3_label);
3f803cd9
SC
11477 }
11478 else
11479 {
e9a25f70
JL
11480 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11481 check if is aligned to 4 - byte. */
e9a25f70 11482
0945b39d 11483 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
e075ae69
RH
11484 NULL_RTX, 0, OPTAB_WIDEN);
11485
9076b9c1 11486 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 11487 Pmode, 1, align_4_label);
3f803cd9
SC
11488 }
11489
e075ae69 11490 mem = gen_rtx_MEM (QImode, out);
e9a25f70 11491
e075ae69 11492 /* Now compare the bytes. */
e9a25f70 11493
0f290768 11494 /* Compare the first n unaligned byte on a byte per byte basis. */
9076b9c1 11495 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
d43e0b7d 11496 QImode, 1, end_0_label);
3f803cd9 11497
0f290768 11498 /* Increment the address. */
0945b39d
JH
11499 if (TARGET_64BIT)
11500 emit_insn (gen_adddi3 (out, out, const1_rtx));
11501 else
11502 emit_insn (gen_addsi3 (out, out, const1_rtx));
e9a25f70 11503
e075ae69
RH
11504 /* Not needed with an alignment of 2 */
11505 if (align != 2)
11506 {
11507 emit_label (align_2_label);
3f803cd9 11508
d43e0b7d
RK
11509 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11510 end_0_label);
e075ae69 11511
0945b39d
JH
11512 if (TARGET_64BIT)
11513 emit_insn (gen_adddi3 (out, out, const1_rtx));
11514 else
11515 emit_insn (gen_addsi3 (out, out, const1_rtx));
e075ae69
RH
11516
11517 emit_label (align_3_label);
11518 }
11519
d43e0b7d
RK
11520 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11521 end_0_label);
e075ae69 11522
0945b39d
JH
11523 if (TARGET_64BIT)
11524 emit_insn (gen_adddi3 (out, out, const1_rtx));
11525 else
11526 emit_insn (gen_addsi3 (out, out, const1_rtx));
3f803cd9
SC
11527 }
11528
e075ae69
RH
11529 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11530 align this loop. It gives only huge programs, but does not help to
11531 speed up. */
11532 emit_label (align_4_label);
3f803cd9 11533
e075ae69
RH
11534 mem = gen_rtx_MEM (SImode, out);
11535 emit_move_insn (scratch, mem);
0945b39d
JH
11536 if (TARGET_64BIT)
11537 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11538 else
11539 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
e075ae69 11540
e2e52e1b
JH
11541 /* This formula yields a nonzero result iff one of the bytes is zero.
11542 This saves three branches inside loop and many cycles. */
11543
11544 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11545 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11546 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
7471a1f0 11547 emit_insn (gen_andsi3 (tmpreg, tmpreg,
d8bf17f9 11548 gen_int_mode (0x80808080, SImode)));
d43e0b7d
RK
11549 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11550 align_4_label);
e2e52e1b
JH
11551
11552 if (TARGET_CMOVE)
11553 {
11554 rtx reg = gen_reg_rtx (SImode);
0945b39d 11555 rtx reg2 = gen_reg_rtx (Pmode);
e2e52e1b
JH
11556 emit_move_insn (reg, tmpreg);
11557 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11558
0f290768 11559 /* If zero is not in the first two bytes, move two bytes forward. */
16189740 11560 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
11561 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11562 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11563 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11564 gen_rtx_IF_THEN_ELSE (SImode, tmp,
77ebd435
AJ
11565 reg,
11566 tmpreg)));
e2e52e1b 11567 /* Emit lea manually to avoid clobbering of flags. */
0945b39d
JH
11568 emit_insn (gen_rtx_SET (SImode, reg2,
11569 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
e2e52e1b
JH
11570
11571 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11572 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11573 emit_insn (gen_rtx_SET (VOIDmode, out,
0945b39d 11574 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
a4f31c00
AJ
11575 reg2,
11576 out)));
e2e52e1b
JH
11577
11578 }
11579 else
11580 {
11581 rtx end_2_label = gen_label_rtx ();
11582 /* Is zero in the first two bytes? */
11583
16189740 11584 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
11585 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11586 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11587 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11588 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11589 pc_rtx);
11590 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11591 JUMP_LABEL (tmp) = end_2_label;
11592
0f290768 11593 /* Not in the first two. Move two bytes forward. */
e2e52e1b 11594 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
0945b39d
JH
11595 if (TARGET_64BIT)
11596 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11597 else
11598 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
e2e52e1b
JH
11599
11600 emit_label (end_2_label);
11601
11602 }
11603
0f290768 11604 /* Avoid branch in fixing the byte. */
e2e52e1b 11605 tmpreg = gen_lowpart (QImode, tmpreg);
7e08e190 11606 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
e6e81735 11607 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
0945b39d 11608 if (TARGET_64BIT)
e6e81735 11609 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
0945b39d 11610 else
e6e81735 11611 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
e075ae69
RH
11612
11613 emit_label (end_0_label);
11614}
0e07aff3
RH
11615
11616void
b96a374d
AJ
11617ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, rtx callarg2,
11618 rtx pop, int sibcall)
0e07aff3
RH
11619{
11620 rtx use = NULL, call;
11621
11622 if (pop == const0_rtx)
11623 pop = NULL;
11624 if (TARGET_64BIT && pop)
11625 abort ();
11626
b069de3b
SS
11627#if TARGET_MACHO
11628 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11629 fnaddr = machopic_indirect_call_target (fnaddr);
11630#else
0e07aff3
RH
11631 /* Static functions and indirect calls don't need the pic register. */
11632 if (! TARGET_64BIT && flag_pic
11633 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
12969f45 11634 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
66edd3b4 11635 use_reg (&use, pic_offset_table_rtx);
0e07aff3
RH
11636
11637 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11638 {
11639 rtx al = gen_rtx_REG (QImode, 0);
11640 emit_move_insn (al, callarg2);
11641 use_reg (&use, al);
11642 }
b069de3b 11643#endif /* TARGET_MACHO */
0e07aff3
RH
11644
11645 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11646 {
11647 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11648 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11649 }
4977bab6
ZW
11650 if (sibcall && TARGET_64BIT
11651 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11652 {
11653 rtx addr;
11654 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
b19ee4bd 11655 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
4977bab6
ZW
11656 emit_move_insn (fnaddr, addr);
11657 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11658 }
0e07aff3
RH
11659
11660 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11661 if (retval)
11662 call = gen_rtx_SET (VOIDmode, retval, call);
11663 if (pop)
11664 {
11665 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11666 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11667 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11668 }
11669
11670 call = emit_call_insn (call);
11671 if (use)
11672 CALL_INSN_FUNCTION_USAGE (call) = use;
11673}
fce5a9f2 11674
e075ae69 11675\f
e075ae69
RH
11676/* Clear stack slot assignments remembered from previous functions.
11677 This is called from INIT_EXPANDERS once before RTL is emitted for each
11678 function. */
11679
e2500fed 11680static struct machine_function *
b96a374d 11681ix86_init_machine_status (void)
37b15744 11682{
d7394366
JH
11683 struct machine_function *f;
11684
11685 f = ggc_alloc_cleared (sizeof (struct machine_function));
11686 f->use_fast_prologue_epilogue_nregs = -1;
8330e2c6
AJ
11687
11688 return f;
1526a060
BS
11689}
11690
e075ae69
RH
11691/* Return a MEM corresponding to a stack slot with mode MODE.
11692 Allocate a new slot if necessary.
11693
11694 The RTL for a function can have several slots available: N is
11695 which slot to use. */
11696
11697rtx
b96a374d 11698assign_386_stack_local (enum machine_mode mode, int n)
e075ae69 11699{
ddb0ae00
ZW
11700 struct stack_local_entry *s;
11701
e075ae69
RH
11702 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11703 abort ();
11704
ddb0ae00
ZW
11705 for (s = ix86_stack_locals; s; s = s->next)
11706 if (s->mode == mode && s->n == n)
11707 return s->rtl;
11708
11709 s = (struct stack_local_entry *)
11710 ggc_alloc (sizeof (struct stack_local_entry));
11711 s->n = n;
11712 s->mode = mode;
11713 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
e075ae69 11714
ddb0ae00
ZW
11715 s->next = ix86_stack_locals;
11716 ix86_stack_locals = s;
11717 return s->rtl;
e075ae69 11718}
f996902d
RH
11719
11720/* Construct the SYMBOL_REF for the tls_get_addr function. */
11721
e2500fed 11722static GTY(()) rtx ix86_tls_symbol;
f996902d 11723rtx
b96a374d 11724ix86_tls_get_addr (void)
f996902d 11725{
f996902d 11726
e2500fed 11727 if (!ix86_tls_symbol)
f996902d 11728 {
75d38379
JJ
11729 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11730 (TARGET_GNU_TLS && !TARGET_64BIT)
11731 ? "___tls_get_addr"
11732 : "__tls_get_addr");
f996902d
RH
11733 }
11734
e2500fed 11735 return ix86_tls_symbol;
f996902d 11736}
e075ae69
RH
11737\f
11738/* Calculate the length of the memory address in the instruction
11739 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11740
11741static int
b96a374d 11742memory_address_length (rtx addr)
e075ae69
RH
11743{
11744 struct ix86_address parts;
11745 rtx base, index, disp;
11746 int len;
11747
11748 if (GET_CODE (addr) == PRE_DEC
0d7d98ee
JH
11749 || GET_CODE (addr) == POST_INC
11750 || GET_CODE (addr) == PRE_MODIFY
11751 || GET_CODE (addr) == POST_MODIFY)
e075ae69 11752 return 0;
3f803cd9 11753
e075ae69
RH
11754 if (! ix86_decompose_address (addr, &parts))
11755 abort ();
3f803cd9 11756
e075ae69
RH
11757 base = parts.base;
11758 index = parts.index;
11759 disp = parts.disp;
11760 len = 0;
3f803cd9 11761
7b65ed54
EB
11762 /* Rule of thumb:
11763 - esp as the base always wants an index,
11764 - ebp as the base always wants a displacement. */
11765
e075ae69
RH
11766 /* Register Indirect. */
11767 if (base && !index && !disp)
11768 {
7b65ed54
EB
11769 /* esp (for its index) and ebp (for its displacement) need
11770 the two-byte modrm form. */
e075ae69
RH
11771 if (addr == stack_pointer_rtx
11772 || addr == arg_pointer_rtx
564d80f4
JH
11773 || addr == frame_pointer_rtx
11774 || addr == hard_frame_pointer_rtx)
e075ae69 11775 len = 1;
3f803cd9 11776 }
e9a25f70 11777
e075ae69
RH
11778 /* Direct Addressing. */
11779 else if (disp && !base && !index)
11780 len = 4;
11781
3f803cd9
SC
11782 else
11783 {
e075ae69
RH
11784 /* Find the length of the displacement constant. */
11785 if (disp)
11786 {
11787 if (GET_CODE (disp) == CONST_INT
9b73c90a
EB
11788 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11789 && base)
e075ae69
RH
11790 len = 1;
11791 else
11792 len = 4;
11793 }
7b65ed54
EB
11794 /* ebp always wants a displacement. */
11795 else if (base == hard_frame_pointer_rtx)
11796 len = 1;
3f803cd9 11797
7b65ed54
EB
11798 /* An index requires the two-byte modrm form... */
11799 if (index
11800 /* ...like esp, which always wants an index. */
11801 || base == stack_pointer_rtx
11802 || base == arg_pointer_rtx
11803 || base == frame_pointer_rtx)
e075ae69 11804 len += 1;
3f803cd9
SC
11805 }
11806
e075ae69
RH
11807 return len;
11808}
79325812 11809
5bf0ebab
RH
11810/* Compute default value for "length_immediate" attribute. When SHORTFORM
11811 is set, expect that insn have 8bit immediate alternative. */
e075ae69 11812int
b96a374d 11813ix86_attr_length_immediate_default (rtx insn, int shortform)
e075ae69 11814{
6ef67412
JH
11815 int len = 0;
11816 int i;
6c698a6d 11817 extract_insn_cached (insn);
6ef67412
JH
11818 for (i = recog_data.n_operands - 1; i >= 0; --i)
11819 if (CONSTANT_P (recog_data.operand[i]))
3071fab5 11820 {
6ef67412 11821 if (len)
3071fab5 11822 abort ();
6ef67412
JH
11823 if (shortform
11824 && GET_CODE (recog_data.operand[i]) == CONST_INT
11825 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11826 len = 1;
11827 else
11828 {
11829 switch (get_attr_mode (insn))
11830 {
11831 case MODE_QI:
11832 len+=1;
11833 break;
11834 case MODE_HI:
11835 len+=2;
11836 break;
11837 case MODE_SI:
11838 len+=4;
11839 break;
14f73b5a
JH
11840 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11841 case MODE_DI:
11842 len+=4;
11843 break;
6ef67412 11844 default:
c725bd79 11845 fatal_insn ("unknown insn mode", insn);
6ef67412
JH
11846 }
11847 }
3071fab5 11848 }
6ef67412
JH
11849 return len;
11850}
11851/* Compute default value for "length_address" attribute. */
11852int
b96a374d 11853ix86_attr_length_address_default (rtx insn)
6ef67412
JH
11854{
11855 int i;
9b73c90a
EB
11856
11857 if (get_attr_type (insn) == TYPE_LEA)
11858 {
11859 rtx set = PATTERN (insn);
11860 if (GET_CODE (set) == SET)
11861 ;
11862 else if (GET_CODE (set) == PARALLEL
11863 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
11864 set = XVECEXP (set, 0, 0);
11865 else
11866 {
11867#ifdef ENABLE_CHECKING
11868 abort ();
11869#endif
11870 return 0;
11871 }
11872
11873 return memory_address_length (SET_SRC (set));
11874 }
11875
6c698a6d 11876 extract_insn_cached (insn);
1ccbefce
RH
11877 for (i = recog_data.n_operands - 1; i >= 0; --i)
11878 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 11879 {
6ef67412 11880 return memory_address_length (XEXP (recog_data.operand[i], 0));
e075ae69
RH
11881 break;
11882 }
6ef67412 11883 return 0;
3f803cd9 11884}
e075ae69
RH
11885\f
11886/* Return the maximum number of instructions a cpu can issue. */
b657fc39 11887
c237e94a 11888static int
b96a374d 11889ix86_issue_rate (void)
b657fc39 11890{
9e555526 11891 switch (ix86_tune)
b657fc39 11892 {
e075ae69
RH
11893 case PROCESSOR_PENTIUM:
11894 case PROCESSOR_K6:
11895 return 2;
79325812 11896
e075ae69 11897 case PROCESSOR_PENTIUMPRO:
b4e89e2d
JH
11898 case PROCESSOR_PENTIUM4:
11899 case PROCESSOR_ATHLON:
4977bab6 11900 case PROCESSOR_K8:
e075ae69 11901 return 3;
b657fc39 11902
b657fc39 11903 default:
e075ae69 11904 return 1;
b657fc39 11905 }
b657fc39
L
11906}
11907
e075ae69
RH
11908/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11909 by DEP_INSN and nothing set by DEP_INSN. */
b657fc39 11910
e075ae69 11911static int
b96a374d 11912ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
e075ae69
RH
11913{
11914 rtx set, set2;
b657fc39 11915
e075ae69
RH
11916 /* Simplify the test for uninteresting insns. */
11917 if (insn_type != TYPE_SETCC
11918 && insn_type != TYPE_ICMOV
11919 && insn_type != TYPE_FCMOV
11920 && insn_type != TYPE_IBR)
11921 return 0;
b657fc39 11922
e075ae69
RH
11923 if ((set = single_set (dep_insn)) != 0)
11924 {
11925 set = SET_DEST (set);
11926 set2 = NULL_RTX;
11927 }
11928 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11929 && XVECLEN (PATTERN (dep_insn), 0) == 2
11930 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11931 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11932 {
11933 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11934 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11935 }
78a0d70c
ZW
11936 else
11937 return 0;
b657fc39 11938
78a0d70c
ZW
11939 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11940 return 0;
b657fc39 11941
f5143c46 11942 /* This test is true if the dependent insn reads the flags but
78a0d70c
ZW
11943 not any other potentially set register. */
11944 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11945 return 0;
11946
11947 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11948 return 0;
11949
11950 return 1;
e075ae69 11951}
b657fc39 11952
e075ae69
RH
11953/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11954 address with operands set by DEP_INSN. */
11955
11956static int
b96a374d 11957ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
e075ae69
RH
11958{
11959 rtx addr;
11960
6ad48e84
JH
11961 if (insn_type == TYPE_LEA
11962 && TARGET_PENTIUM)
5fbdde42
RH
11963 {
11964 addr = PATTERN (insn);
11965 if (GET_CODE (addr) == SET)
11966 ;
11967 else if (GET_CODE (addr) == PARALLEL
11968 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11969 addr = XVECEXP (addr, 0, 0);
11970 else
11971 abort ();
11972 addr = SET_SRC (addr);
11973 }
e075ae69
RH
11974 else
11975 {
11976 int i;
6c698a6d 11977 extract_insn_cached (insn);
1ccbefce
RH
11978 for (i = recog_data.n_operands - 1; i >= 0; --i)
11979 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 11980 {
1ccbefce 11981 addr = XEXP (recog_data.operand[i], 0);
e075ae69
RH
11982 goto found;
11983 }
11984 return 0;
11985 found:;
b657fc39
L
11986 }
11987
e075ae69 11988 return modified_in_p (addr, dep_insn);
b657fc39 11989}
a269a03c 11990
c237e94a 11991static int
b96a374d 11992ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
a269a03c 11993{
e075ae69 11994 enum attr_type insn_type, dep_insn_type;
6ad48e84 11995 enum attr_memory memory, dep_memory;
e075ae69 11996 rtx set, set2;
9b00189f 11997 int dep_insn_code_number;
a269a03c 11998
d1f87653 11999 /* Anti and output dependencies have zero cost on all CPUs. */
e075ae69 12000 if (REG_NOTE_KIND (link) != 0)
309ada50 12001 return 0;
a269a03c 12002
9b00189f
JH
12003 dep_insn_code_number = recog_memoized (dep_insn);
12004
e075ae69 12005 /* If we can't recognize the insns, we can't really do anything. */
9b00189f 12006 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
e075ae69 12007 return cost;
a269a03c 12008
1c71e60e
JH
12009 insn_type = get_attr_type (insn);
12010 dep_insn_type = get_attr_type (dep_insn);
9b00189f 12011
9e555526 12012 switch (ix86_tune)
a269a03c
JC
12013 {
12014 case PROCESSOR_PENTIUM:
e075ae69
RH
12015 /* Address Generation Interlock adds a cycle of latency. */
12016 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12017 cost += 1;
12018
12019 /* ??? Compares pair with jump/setcc. */
12020 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12021 cost = 0;
12022
d1f87653 12023 /* Floating point stores require value to be ready one cycle earlier. */
0f290768 12024 if (insn_type == TYPE_FMOV
e075ae69
RH
12025 && get_attr_memory (insn) == MEMORY_STORE
12026 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12027 cost += 1;
12028 break;
a269a03c 12029
e075ae69 12030 case PROCESSOR_PENTIUMPRO:
6ad48e84
JH
12031 memory = get_attr_memory (insn);
12032 dep_memory = get_attr_memory (dep_insn);
12033
0f290768 12034 /* Since we can't represent delayed latencies of load+operation,
e075ae69
RH
12035 increase the cost here for non-imov insns. */
12036 if (dep_insn_type != TYPE_IMOV
6ad48e84
JH
12037 && dep_insn_type != TYPE_FMOV
12038 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
e075ae69
RH
12039 cost += 1;
12040
12041 /* INT->FP conversion is expensive. */
12042 if (get_attr_fp_int_src (dep_insn))
12043 cost += 5;
12044
12045 /* There is one cycle extra latency between an FP op and a store. */
12046 if (insn_type == TYPE_FMOV
12047 && (set = single_set (dep_insn)) != NULL_RTX
12048 && (set2 = single_set (insn)) != NULL_RTX
12049 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12050 && GET_CODE (SET_DEST (set2)) == MEM)
12051 cost += 1;
6ad48e84
JH
12052
12053 /* Show ability of reorder buffer to hide latency of load by executing
12054 in parallel with previous instruction in case
12055 previous instruction is not needed to compute the address. */
12056 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12057 && !ix86_agi_dependant (insn, dep_insn, insn_type))
b96a374d 12058 {
6ad48e84
JH
12059 /* Claim moves to take one cycle, as core can issue one load
12060 at time and the next load can start cycle later. */
12061 if (dep_insn_type == TYPE_IMOV
12062 || dep_insn_type == TYPE_FMOV)
12063 cost = 1;
12064 else if (cost > 1)
12065 cost--;
12066 }
e075ae69 12067 break;
a269a03c 12068
e075ae69 12069 case PROCESSOR_K6:
6ad48e84
JH
12070 memory = get_attr_memory (insn);
12071 dep_memory = get_attr_memory (dep_insn);
e075ae69
RH
12072 /* The esp dependency is resolved before the instruction is really
12073 finished. */
12074 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12075 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12076 return 1;
a269a03c 12077
0f290768 12078 /* Since we can't represent delayed latencies of load+operation,
e075ae69 12079 increase the cost here for non-imov insns. */
6ad48e84 12080 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
e075ae69
RH
12081 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
12082
12083 /* INT->FP conversion is expensive. */
12084 if (get_attr_fp_int_src (dep_insn))
12085 cost += 5;
6ad48e84
JH
12086
12087 /* Show ability of reorder buffer to hide latency of load by executing
12088 in parallel with previous instruction in case
12089 previous instruction is not needed to compute the address. */
12090 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12091 && !ix86_agi_dependant (insn, dep_insn, insn_type))
b96a374d 12092 {
6ad48e84
JH
12093 /* Claim moves to take one cycle, as core can issue one load
12094 at time and the next load can start cycle later. */
12095 if (dep_insn_type == TYPE_IMOV
12096 || dep_insn_type == TYPE_FMOV)
12097 cost = 1;
12098 else if (cost > 2)
12099 cost -= 2;
12100 else
12101 cost = 1;
12102 }
a14003ee 12103 break;
e075ae69 12104
309ada50 12105 case PROCESSOR_ATHLON:
4977bab6 12106 case PROCESSOR_K8:
6ad48e84
JH
12107 memory = get_attr_memory (insn);
12108 dep_memory = get_attr_memory (dep_insn);
12109
6ad48e84
JH
12110 /* Show ability of reorder buffer to hide latency of load by executing
12111 in parallel with previous instruction in case
12112 previous instruction is not needed to compute the address. */
12113 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12114 && !ix86_agi_dependant (insn, dep_insn, insn_type))
b96a374d 12115 {
26f74aa3
JH
12116 enum attr_unit unit = get_attr_unit (insn);
12117 int loadcost = 3;
12118
12119 /* Because of the difference between the length of integer and
12120 floating unit pipeline preparation stages, the memory operands
b96a374d 12121 for floating point are cheaper.
26f74aa3 12122
c51e6d85 12123 ??? For Athlon it the difference is most probably 2. */
26f74aa3
JH
12124 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12125 loadcost = 3;
12126 else
12127 loadcost = TARGET_ATHLON ? 2 : 0;
12128
12129 if (cost >= loadcost)
12130 cost -= loadcost;
6ad48e84
JH
12131 else
12132 cost = 0;
12133 }
309ada50 12134
a269a03c 12135 default:
a269a03c
JC
12136 break;
12137 }
12138
12139 return cost;
12140}
0a726ef1 12141
e075ae69
RH
12142static union
12143{
12144 struct ppro_sched_data
12145 {
12146 rtx decode[3];
12147 int issued_this_cycle;
12148 } ppro;
12149} ix86_sched_data;
0a726ef1 12150
e075ae69 12151static enum attr_ppro_uops
b96a374d 12152ix86_safe_ppro_uops (rtx insn)
e075ae69
RH
12153{
12154 if (recog_memoized (insn) >= 0)
12155 return get_attr_ppro_uops (insn);
12156 else
12157 return PPRO_UOPS_MANY;
12158}
0a726ef1 12159
e075ae69 12160static void
b96a374d 12161ix86_dump_ppro_packet (FILE *dump)
0a726ef1 12162{
e075ae69 12163 if (ix86_sched_data.ppro.decode[0])
0a726ef1 12164 {
e075ae69
RH
12165 fprintf (dump, "PPRO packet: %d",
12166 INSN_UID (ix86_sched_data.ppro.decode[0]));
12167 if (ix86_sched_data.ppro.decode[1])
12168 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
12169 if (ix86_sched_data.ppro.decode[2])
12170 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
12171 fputc ('\n', dump);
12172 }
12173}
0a726ef1 12174
e075ae69 12175/* We're beginning a new block. Initialize data structures as necessary. */
0a726ef1 12176
c237e94a 12177static void
b96a374d
AJ
12178ix86_sched_init (FILE *dump ATTRIBUTE_UNUSED,
12179 int sched_verbose ATTRIBUTE_UNUSED,
12180 int veclen ATTRIBUTE_UNUSED)
e075ae69
RH
12181{
12182 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
12183}
12184
12185/* Shift INSN to SLOT, and shift everything else down. */
12186
12187static void
b96a374d 12188ix86_reorder_insn (rtx *insnp, rtx *slot)
e075ae69
RH
12189{
12190 if (insnp != slot)
12191 {
12192 rtx insn = *insnp;
0f290768 12193 do
e075ae69
RH
12194 insnp[0] = insnp[1];
12195 while (++insnp != slot);
12196 *insnp = insn;
0a726ef1 12197 }
e075ae69
RH
12198}
12199
c6991660 12200static void
b96a374d 12201ix86_sched_reorder_ppro (rtx *ready, rtx *e_ready)
78a0d70c
ZW
12202{
12203 rtx decode[3];
12204 enum attr_ppro_uops cur_uops;
12205 int issued_this_cycle;
12206 rtx *insnp;
12207 int i;
e075ae69 12208
0f290768 12209 /* At this point .ppro.decode contains the state of the three
78a0d70c 12210 decoders from last "cycle". That is, those insns that were
0f290768 12211 actually independent. But here we're scheduling for the
78a0d70c
ZW
12212 decoder, and we may find things that are decodable in the
12213 same cycle. */
e075ae69 12214
0f290768 12215 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
78a0d70c 12216 issued_this_cycle = 0;
e075ae69 12217
78a0d70c
ZW
12218 insnp = e_ready;
12219 cur_uops = ix86_safe_ppro_uops (*insnp);
0a726ef1 12220
78a0d70c
ZW
12221 /* If the decoders are empty, and we've a complex insn at the
12222 head of the priority queue, let it issue without complaint. */
12223 if (decode[0] == NULL)
12224 {
12225 if (cur_uops == PPRO_UOPS_MANY)
12226 {
12227 decode[0] = *insnp;
12228 goto ppro_done;
12229 }
12230
12231 /* Otherwise, search for a 2-4 uop unsn to issue. */
12232 while (cur_uops != PPRO_UOPS_FEW)
12233 {
12234 if (insnp == ready)
12235 break;
12236 cur_uops = ix86_safe_ppro_uops (*--insnp);
12237 }
12238
12239 /* If so, move it to the head of the line. */
12240 if (cur_uops == PPRO_UOPS_FEW)
12241 ix86_reorder_insn (insnp, e_ready);
0a726ef1 12242
78a0d70c
ZW
12243 /* Issue the head of the queue. */
12244 issued_this_cycle = 1;
12245 decode[0] = *e_ready--;
12246 }
fb693d44 12247
78a0d70c
ZW
12248 /* Look for simple insns to fill in the other two slots. */
12249 for (i = 1; i < 3; ++i)
12250 if (decode[i] == NULL)
12251 {
a151daf0 12252 if (ready > e_ready)
78a0d70c 12253 goto ppro_done;
fb693d44 12254
e075ae69
RH
12255 insnp = e_ready;
12256 cur_uops = ix86_safe_ppro_uops (*insnp);
78a0d70c
ZW
12257 while (cur_uops != PPRO_UOPS_ONE)
12258 {
12259 if (insnp == ready)
12260 break;
12261 cur_uops = ix86_safe_ppro_uops (*--insnp);
12262 }
fb693d44 12263
78a0d70c
ZW
12264 /* Found one. Move it to the head of the queue and issue it. */
12265 if (cur_uops == PPRO_UOPS_ONE)
e075ae69 12266 {
78a0d70c
ZW
12267 ix86_reorder_insn (insnp, e_ready);
12268 decode[i] = *e_ready--;
12269 issued_this_cycle++;
12270 continue;
12271 }
fb693d44 12272
78a0d70c
ZW
12273 /* ??? Didn't find one. Ideally, here we would do a lazy split
12274 of 2-uop insns, issue one and queue the other. */
12275 }
fb693d44 12276
78a0d70c
ZW
12277 ppro_done:
12278 if (issued_this_cycle == 0)
12279 issued_this_cycle = 1;
12280 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
12281}
fb693d44 12282
0f290768 12283/* We are about to being issuing insns for this clock cycle.
78a0d70c 12284 Override the default sort algorithm to better slot instructions. */
c237e94a 12285static int
b96a374d
AJ
12286ix86_sched_reorder (FILE *dump ATTRIBUTE_UNUSED,
12287 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
12288 int *n_readyp, int clock_var ATTRIBUTE_UNUSED)
78a0d70c 12289{
c237e94a 12290 int n_ready = *n_readyp;
78a0d70c 12291 rtx *e_ready = ready + n_ready - 1;
fb693d44 12292
fce5a9f2 12293 /* Make sure to go ahead and initialize key items in
a151daf0
JL
12294 ix86_sched_data if we are not going to bother trying to
12295 reorder the ready queue. */
78a0d70c 12296 if (n_ready < 2)
a151daf0
JL
12297 {
12298 ix86_sched_data.ppro.issued_this_cycle = 1;
12299 goto out;
12300 }
e075ae69 12301
9e555526 12302 switch (ix86_tune)
78a0d70c
ZW
12303 {
12304 default:
12305 break;
e075ae69 12306
78a0d70c
ZW
12307 case PROCESSOR_PENTIUMPRO:
12308 ix86_sched_reorder_ppro (ready, e_ready);
e075ae69 12309 break;
fb693d44
RH
12310 }
12311
e075ae69
RH
12312out:
12313 return ix86_issue_rate ();
12314}
fb693d44 12315
e075ae69
RH
12316/* We are about to issue INSN. Return the number of insns left on the
12317 ready queue that can be issued this cycle. */
b222082e 12318
c237e94a 12319static int
b96a374d
AJ
12320ix86_variable_issue (FILE *dump, int sched_verbose, rtx insn,
12321 int can_issue_more)
e075ae69
RH
12322{
12323 int i;
9e555526 12324 switch (ix86_tune)
fb693d44 12325 {
e075ae69
RH
12326 default:
12327 return can_issue_more - 1;
fb693d44 12328
e075ae69
RH
12329 case PROCESSOR_PENTIUMPRO:
12330 {
12331 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
fb693d44 12332
e075ae69
RH
12333 if (uops == PPRO_UOPS_MANY)
12334 {
12335 if (sched_verbose)
12336 ix86_dump_ppro_packet (dump);
12337 ix86_sched_data.ppro.decode[0] = insn;
12338 ix86_sched_data.ppro.decode[1] = NULL;
12339 ix86_sched_data.ppro.decode[2] = NULL;
12340 if (sched_verbose)
12341 ix86_dump_ppro_packet (dump);
12342 ix86_sched_data.ppro.decode[0] = NULL;
12343 }
12344 else if (uops == PPRO_UOPS_FEW)
12345 {
12346 if (sched_verbose)
12347 ix86_dump_ppro_packet (dump);
12348 ix86_sched_data.ppro.decode[0] = insn;
12349 ix86_sched_data.ppro.decode[1] = NULL;
12350 ix86_sched_data.ppro.decode[2] = NULL;
12351 }
12352 else
12353 {
12354 for (i = 0; i < 3; ++i)
12355 if (ix86_sched_data.ppro.decode[i] == NULL)
12356 {
12357 ix86_sched_data.ppro.decode[i] = insn;
12358 break;
12359 }
12360 if (i == 3)
12361 abort ();
12362 if (i == 2)
12363 {
12364 if (sched_verbose)
12365 ix86_dump_ppro_packet (dump);
12366 ix86_sched_data.ppro.decode[0] = NULL;
12367 ix86_sched_data.ppro.decode[1] = NULL;
12368 ix86_sched_data.ppro.decode[2] = NULL;
12369 }
12370 }
12371 }
12372 return --ix86_sched_data.ppro.issued_this_cycle;
12373 }
fb693d44 12374}
9b690711
RH
12375
12376static int
b96a374d 12377ia32_use_dfa_pipeline_interface (void)
9b690711 12378{
4977bab6 12379 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
9b690711
RH
12380 return 1;
12381 return 0;
12382}
12383
12384/* How many alternative schedules to try. This should be as wide as the
12385 scheduling freedom in the DFA, but no wider. Making this value too
12386 large results extra work for the scheduler. */
12387
12388static int
b96a374d 12389ia32_multipass_dfa_lookahead (void)
9b690711 12390{
9e555526 12391 if (ix86_tune == PROCESSOR_PENTIUM)
9b690711
RH
12392 return 2;
12393 else
12394 return 0;
12395}
12396
a7180f70 12397\f
0e4970d7
RK
12398/* Walk through INSNS and look for MEM references whose address is DSTREG or
12399 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
12400 appropriate. */
12401
12402void
b96a374d
AJ
12403ix86_set_move_mem_attrs (rtx insns, rtx dstref, rtx srcref, rtx dstreg,
12404 rtx srcreg)
0e4970d7
RK
12405{
12406 rtx insn;
12407
12408 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
12409 if (INSN_P (insn))
12410 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
12411 dstreg, srcreg);
12412}
12413
12414/* Subroutine of above to actually do the updating by recursively walking
12415 the rtx. */
12416
12417static void
b96a374d
AJ
12418ix86_set_move_mem_attrs_1 (rtx x, rtx dstref, rtx srcref, rtx dstreg,
12419 rtx srcreg)
0e4970d7
RK
12420{
12421 enum rtx_code code = GET_CODE (x);
12422 const char *format_ptr = GET_RTX_FORMAT (code);
12423 int i, j;
12424
12425 if (code == MEM && XEXP (x, 0) == dstreg)
12426 MEM_COPY_ATTRIBUTES (x, dstref);
12427 else if (code == MEM && XEXP (x, 0) == srcreg)
12428 MEM_COPY_ATTRIBUTES (x, srcref);
12429
12430 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
12431 {
12432 if (*format_ptr == 'e')
12433 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
12434 dstreg, srcreg);
12435 else if (*format_ptr == 'E')
12436 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
397be6cf 12437 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
0e4970d7
RK
12438 dstreg, srcreg);
12439 }
12440}
12441\f
a7180f70
BS
12442/* Compute the alignment given to a constant that is being placed in memory.
12443 EXP is the constant and ALIGN is the alignment that the object would
12444 ordinarily have.
12445 The value of this function is used instead of that alignment to align
12446 the object. */
12447
12448int
b96a374d 12449ix86_constant_alignment (tree exp, int align)
a7180f70
BS
12450{
12451 if (TREE_CODE (exp) == REAL_CST)
12452 {
12453 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12454 return 64;
12455 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12456 return 128;
12457 }
12458 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
12459 && align < 256)
12460 return 256;
12461
12462 return align;
12463}
12464
12465/* Compute the alignment for a static variable.
12466 TYPE is the data type, and ALIGN is the alignment that
12467 the object would ordinarily have. The value of this function is used
12468 instead of that alignment to align the object. */
12469
12470int
b96a374d 12471ix86_data_alignment (tree type, int align)
a7180f70
BS
12472{
12473 if (AGGREGATE_TYPE_P (type)
12474 && TYPE_SIZE (type)
12475 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12476 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12477 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12478 return 256;
12479
0d7d98ee
JH
12480 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12481 to 16byte boundary. */
12482 if (TARGET_64BIT)
12483 {
12484 if (AGGREGATE_TYPE_P (type)
12485 && TYPE_SIZE (type)
12486 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12487 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12488 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12489 return 128;
12490 }
12491
a7180f70
BS
12492 if (TREE_CODE (type) == ARRAY_TYPE)
12493 {
12494 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12495 return 64;
12496 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12497 return 128;
12498 }
12499 else if (TREE_CODE (type) == COMPLEX_TYPE)
12500 {
0f290768 12501
a7180f70
BS
12502 if (TYPE_MODE (type) == DCmode && align < 64)
12503 return 64;
12504 if (TYPE_MODE (type) == XCmode && align < 128)
12505 return 128;
12506 }
12507 else if ((TREE_CODE (type) == RECORD_TYPE
12508 || TREE_CODE (type) == UNION_TYPE
12509 || TREE_CODE (type) == QUAL_UNION_TYPE)
12510 && TYPE_FIELDS (type))
12511 {
12512 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12513 return 64;
12514 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12515 return 128;
12516 }
12517 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12518 || TREE_CODE (type) == INTEGER_TYPE)
12519 {
12520 if (TYPE_MODE (type) == DFmode && align < 64)
12521 return 64;
12522 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12523 return 128;
12524 }
12525
12526 return align;
12527}
12528
12529/* Compute the alignment for a local variable.
12530 TYPE is the data type, and ALIGN is the alignment that
12531 the object would ordinarily have. The value of this macro is used
12532 instead of that alignment to align the object. */
12533
12534int
b96a374d 12535ix86_local_alignment (tree type, int align)
a7180f70 12536{
0d7d98ee
JH
12537 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12538 to 16byte boundary. */
12539 if (TARGET_64BIT)
12540 {
12541 if (AGGREGATE_TYPE_P (type)
12542 && TYPE_SIZE (type)
12543 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12544 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12545 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12546 return 128;
12547 }
a7180f70
BS
12548 if (TREE_CODE (type) == ARRAY_TYPE)
12549 {
12550 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12551 return 64;
12552 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12553 return 128;
12554 }
12555 else if (TREE_CODE (type) == COMPLEX_TYPE)
12556 {
12557 if (TYPE_MODE (type) == DCmode && align < 64)
12558 return 64;
12559 if (TYPE_MODE (type) == XCmode && align < 128)
12560 return 128;
12561 }
12562 else if ((TREE_CODE (type) == RECORD_TYPE
12563 || TREE_CODE (type) == UNION_TYPE
12564 || TREE_CODE (type) == QUAL_UNION_TYPE)
12565 && TYPE_FIELDS (type))
12566 {
12567 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12568 return 64;
12569 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12570 return 128;
12571 }
12572 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12573 || TREE_CODE (type) == INTEGER_TYPE)
12574 {
0f290768 12575
a7180f70
BS
12576 if (TYPE_MODE (type) == DFmode && align < 64)
12577 return 64;
12578 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12579 return 128;
12580 }
12581 return align;
12582}
0ed08620
JH
12583\f
12584/* Emit RTL insns to initialize the variable parts of a trampoline.
12585 FNADDR is an RTX for the address of the function's pure code.
12586 CXT is an RTX for the static chain value for the function. */
12587void
b96a374d 12588x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
0ed08620
JH
12589{
12590 if (!TARGET_64BIT)
12591 {
12592 /* Compute offset from the end of the jmp to the target function. */
12593 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12594 plus_constant (tramp, 10),
12595 NULL_RTX, 1, OPTAB_DIRECT);
12596 emit_move_insn (gen_rtx_MEM (QImode, tramp),
d8bf17f9 12597 gen_int_mode (0xb9, QImode));
0ed08620
JH
12598 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12599 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
d8bf17f9 12600 gen_int_mode (0xe9, QImode));
0ed08620
JH
12601 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12602 }
12603 else
12604 {
12605 int offset = 0;
12606 /* Try to load address using shorter movl instead of movabs.
12607 We may want to support movq for kernel mode, but kernel does not use
12608 trampolines at the moment. */
12609 if (x86_64_zero_extended_value (fnaddr))
12610 {
12611 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12612 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12613 gen_int_mode (0xbb41, HImode));
0ed08620
JH
12614 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12615 gen_lowpart (SImode, fnaddr));
12616 offset += 6;
12617 }
12618 else
12619 {
12620 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12621 gen_int_mode (0xbb49, HImode));
0ed08620
JH
12622 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12623 fnaddr);
12624 offset += 10;
12625 }
12626 /* Load static chain using movabs to r10. */
12627 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12628 gen_int_mode (0xba49, HImode));
0ed08620
JH
12629 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12630 cxt);
12631 offset += 10;
12632 /* Jump to the r11 */
12633 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12634 gen_int_mode (0xff49, HImode));
0ed08620 12635 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
d8bf17f9 12636 gen_int_mode (0xe3, QImode));
0ed08620
JH
12637 offset += 3;
12638 if (offset > TRAMPOLINE_SIZE)
b531087a 12639 abort ();
0ed08620 12640 }
5791cc29
JT
12641
12642#ifdef TRANSFER_FROM_TRAMPOLINE
12643 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12644 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12645#endif
0ed08620 12646}
eeb06b1b 12647\f
6a2dd09a
RS
12648#define def_builtin(MASK, NAME, TYPE, CODE) \
12649do { \
453ee231
JH
12650 if ((MASK) & target_flags \
12651 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
6a2dd09a
RS
12652 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12653 NULL, NULL_TREE); \
eeb06b1b 12654} while (0)
bd793c65 12655
bd793c65
BS
12656struct builtin_description
12657{
8b60264b
KG
12658 const unsigned int mask;
12659 const enum insn_code icode;
12660 const char *const name;
12661 const enum ix86_builtins code;
12662 const enum rtx_code comparison;
12663 const unsigned int flag;
bd793c65
BS
12664};
12665
8b60264b 12666static const struct builtin_description bdesc_comi[] =
bd793c65 12667{
37f22004
L
12668 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12669 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12670 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12671 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12672 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12673 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12674 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12675 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12676 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12677 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12678 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12679 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
1194ca05
JH
12680 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12681 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12682 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12683 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12684 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12685 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12686 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12687 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12688 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12689 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12690 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12691 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
bd793c65
BS
12692};
12693
8b60264b 12694static const struct builtin_description bdesc_2arg[] =
bd793c65
BS
12695{
12696 /* SSE */
37f22004
L
12697 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12698 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12699 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12700 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12701 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12702 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12703 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12704 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12705
12706 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12707 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12708 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12709 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12710 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12711 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12712 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12713 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12714 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12715 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12716 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12717 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12718 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12719 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12720 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12721 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12722 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12723 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12724 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12725 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12726
12727 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12728 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12729 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12730 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12731
12732 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12733 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12734 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12735 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12736
12737 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12738 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12739 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12740 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12741 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
bd793c65
BS
12742
12743 /* MMX */
eeb06b1b
BS
12744 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12745 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12746 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
d50672ef 12747 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
eeb06b1b
BS
12748 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12749 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12750 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
d50672ef 12751 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
eeb06b1b
BS
12752
12753 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12754 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12755 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12756 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12757 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12758 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12759 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12760 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12761
12762 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12763 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
37f22004 12764 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
eeb06b1b
BS
12765
12766 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12767 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12768 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12769 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12770
37f22004
L
12771 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12772 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
eeb06b1b
BS
12773
12774 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12775 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12776 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12777 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12778 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12779 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12780
37f22004
L
12781 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12782 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12783 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12784 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
eeb06b1b
BS
12785
12786 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12787 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12788 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12789 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12790 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12791 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
bd793c65
BS
12792
12793 /* Special. */
eeb06b1b
BS
12794 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12795 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12796 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12797
37f22004
L
12798 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12799 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12800 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
eeb06b1b
BS
12801
12802 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12803 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12804 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12805 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12806 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12807 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12808
12809 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12810 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12811 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12812 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12813 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12814 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12815
12816 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12817 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12818 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12819 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12820
37f22004 12821 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
fbe5eb6d
BS
12822 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12823
12824 /* SSE2 */
12825 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12826 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12827 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12828 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12829 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12830 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12831 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12832 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12833
12834 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12835 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12836 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12837 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12838 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12839 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12840 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12841 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12842 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12843 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12844 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12845 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12846 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12847 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12848 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
fbe5eb6d
BS
12849 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12850 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12851 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12852 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
fbe5eb6d
BS
12853 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12854
12855 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12856 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12857 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12858 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12859
1877be45
JH
12860 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12861 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12862 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12863 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
fbe5eb6d
BS
12864
12865 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12866 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12867 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12868
12869 /* SSE2 MMX */
12870 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12871 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12872 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
d50672ef 12873 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
fbe5eb6d
BS
12874 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12875 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12876 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
d50672ef 12877 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
fbe5eb6d
BS
12878
12879 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12880 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12881 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12882 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12883 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12884 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12885 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12886 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12887
12888 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12889 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12890 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12891 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12892
916b60b7
BS
12893 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12894 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12895 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12896 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
fbe5eb6d
BS
12897
12898 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12899 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12900
12901 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12902 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12903 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12904 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12905 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12906 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12907
12908 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12909 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12910 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12911 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12912
12913 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12914 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12915 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
077084dd 12916 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
fbe5eb6d
BS
12917 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12918 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12919 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
f02e1358 12920 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
fbe5eb6d 12921
916b60b7
BS
12922 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12923 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12924 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12925
12926 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12927 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12928
12929 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12930 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12931 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12932 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12933 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12934 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12935
12936 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12937 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12938 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12939 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12940 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12941 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12942
12943 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12944 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12945 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12946 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12947
12948 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12949
fbe5eb6d 12950 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
37f22004 12951 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
fbe5eb6d 12952 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
22c7c85e
L
12953 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
12954
12955 /* PNI MMX */
12956 { MASK_PNI, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
12957 { MASK_PNI, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
12958 { MASK_PNI, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
12959 { MASK_PNI, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
12960 { MASK_PNI, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
12961 { MASK_PNI, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
bd793c65
BS
12962};
12963
8b60264b 12964static const struct builtin_description bdesc_1arg[] =
bd793c65 12965{
37f22004
L
12966 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12967 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
fbe5eb6d 12968
37f22004
L
12969 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12970 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12971 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
fbe5eb6d 12972
37f22004
L
12973 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12974 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12975 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
12976 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12977 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12978 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
fbe5eb6d
BS
12979
12980 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12981 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12982 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
f02e1358 12983 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
fbe5eb6d
BS
12984
12985 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12986
12987 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12988 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
bd793c65 12989
fbe5eb6d
BS
12990 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12991 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12992 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12993 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12994 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
bd793c65 12995
fbe5eb6d 12996 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
bd793c65 12997
fbe5eb6d
BS
12998 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12999 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
37f22004
L
13000 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
13001 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
fbe5eb6d
BS
13002
13003 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
13004 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
f02e1358
JH
13005 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
13006
22c7c85e
L
13007 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
13008
13009 /* PNI */
13010 { MASK_PNI, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
13011 { MASK_PNI, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
13012 { MASK_PNI, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
bd793c65
BS
13013};
13014
f6155fda 13015void
b96a374d 13016ix86_init_builtins (void)
f6155fda
SS
13017{
13018 if (TARGET_MMX)
13019 ix86_init_mmx_sse_builtins ();
13020}
13021
13022/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
bd793c65
BS
13023 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13024 builtins. */
e37af218 13025static void
b96a374d 13026ix86_init_mmx_sse_builtins (void)
bd793c65 13027{
8b60264b 13028 const struct builtin_description * d;
77ebd435 13029 size_t i;
bd793c65
BS
13030
13031 tree pchar_type_node = build_pointer_type (char_type_node);
068f5dea
JH
13032 tree pcchar_type_node = build_pointer_type (
13033 build_type_variant (char_type_node, 1, 0));
bd793c65 13034 tree pfloat_type_node = build_pointer_type (float_type_node);
068f5dea
JH
13035 tree pcfloat_type_node = build_pointer_type (
13036 build_type_variant (float_type_node, 1, 0));
bd793c65 13037 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
916b60b7 13038 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
bd793c65
BS
13039 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13040
13041 /* Comparisons. */
13042 tree int_ftype_v4sf_v4sf
b4de2f7d
AH
13043 = build_function_type_list (integer_type_node,
13044 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13045 tree v4si_ftype_v4sf_v4sf
b4de2f7d
AH
13046 = build_function_type_list (V4SI_type_node,
13047 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13048 /* MMX/SSE/integer conversions. */
bd793c65 13049 tree int_ftype_v4sf
b4de2f7d
AH
13050 = build_function_type_list (integer_type_node,
13051 V4SF_type_node, NULL_TREE);
453ee231
JH
13052 tree int64_ftype_v4sf
13053 = build_function_type_list (long_long_integer_type_node,
13054 V4SF_type_node, NULL_TREE);
bd793c65 13055 tree int_ftype_v8qi
b4de2f7d 13056 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
bd793c65 13057 tree v4sf_ftype_v4sf_int
b4de2f7d
AH
13058 = build_function_type_list (V4SF_type_node,
13059 V4SF_type_node, integer_type_node, NULL_TREE);
453ee231
JH
13060 tree v4sf_ftype_v4sf_int64
13061 = build_function_type_list (V4SF_type_node,
13062 V4SF_type_node, long_long_integer_type_node,
13063 NULL_TREE);
bd793c65 13064 tree v4sf_ftype_v4sf_v2si
b4de2f7d
AH
13065 = build_function_type_list (V4SF_type_node,
13066 V4SF_type_node, V2SI_type_node, NULL_TREE);
bd793c65 13067 tree int_ftype_v4hi_int
b4de2f7d
AH
13068 = build_function_type_list (integer_type_node,
13069 V4HI_type_node, integer_type_node, NULL_TREE);
bd793c65 13070 tree v4hi_ftype_v4hi_int_int
e7a60f56 13071 = build_function_type_list (V4HI_type_node, V4HI_type_node,
b4de2f7d
AH
13072 integer_type_node, integer_type_node,
13073 NULL_TREE);
bd793c65
BS
13074 /* Miscellaneous. */
13075 tree v8qi_ftype_v4hi_v4hi
b4de2f7d
AH
13076 = build_function_type_list (V8QI_type_node,
13077 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 13078 tree v4hi_ftype_v2si_v2si
b4de2f7d
AH
13079 = build_function_type_list (V4HI_type_node,
13080 V2SI_type_node, V2SI_type_node, NULL_TREE);
bd793c65 13081 tree v4sf_ftype_v4sf_v4sf_int
b4de2f7d
AH
13082 = build_function_type_list (V4SF_type_node,
13083 V4SF_type_node, V4SF_type_node,
13084 integer_type_node, NULL_TREE);
bd793c65 13085 tree v2si_ftype_v4hi_v4hi
b4de2f7d
AH
13086 = build_function_type_list (V2SI_type_node,
13087 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 13088 tree v4hi_ftype_v4hi_int
b4de2f7d 13089 = build_function_type_list (V4HI_type_node,
e7a60f56 13090 V4HI_type_node, integer_type_node, NULL_TREE);
bd793c65 13091 tree v4hi_ftype_v4hi_di
b4de2f7d
AH
13092 = build_function_type_list (V4HI_type_node,
13093 V4HI_type_node, long_long_unsigned_type_node,
13094 NULL_TREE);
bd793c65 13095 tree v2si_ftype_v2si_di
b4de2f7d
AH
13096 = build_function_type_list (V2SI_type_node,
13097 V2SI_type_node, long_long_unsigned_type_node,
13098 NULL_TREE);
bd793c65 13099 tree void_ftype_void
b4de2f7d 13100 = build_function_type (void_type_node, void_list_node);
bd793c65 13101 tree void_ftype_unsigned
b4de2f7d 13102 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
22c7c85e
L
13103 tree void_ftype_unsigned_unsigned
13104 = build_function_type_list (void_type_node, unsigned_type_node,
13105 unsigned_type_node, NULL_TREE);
13106 tree void_ftype_pcvoid_unsigned_unsigned
13107 = build_function_type_list (void_type_node, const_ptr_type_node,
13108 unsigned_type_node, unsigned_type_node,
13109 NULL_TREE);
bd793c65 13110 tree unsigned_ftype_void
b4de2f7d 13111 = build_function_type (unsigned_type_node, void_list_node);
bd793c65 13112 tree di_ftype_void
b4de2f7d 13113 = build_function_type (long_long_unsigned_type_node, void_list_node);
e37af218 13114 tree v4sf_ftype_void
b4de2f7d 13115 = build_function_type (V4SF_type_node, void_list_node);
bd793c65 13116 tree v2si_ftype_v4sf
b4de2f7d 13117 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13118 /* Loads/stores. */
bd793c65 13119 tree void_ftype_v8qi_v8qi_pchar
b4de2f7d
AH
13120 = build_function_type_list (void_type_node,
13121 V8QI_type_node, V8QI_type_node,
13122 pchar_type_node, NULL_TREE);
068f5dea
JH
13123 tree v4sf_ftype_pcfloat
13124 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
bd793c65
BS
13125 /* @@@ the type is bogus */
13126 tree v4sf_ftype_v4sf_pv2si
b4de2f7d 13127 = build_function_type_list (V4SF_type_node,
f8ca7923 13128 V4SF_type_node, pv2si_type_node, NULL_TREE);
1255c85c 13129 tree void_ftype_pv2si_v4sf
b4de2f7d 13130 = build_function_type_list (void_type_node,
f8ca7923 13131 pv2si_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13132 tree void_ftype_pfloat_v4sf
b4de2f7d
AH
13133 = build_function_type_list (void_type_node,
13134 pfloat_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13135 tree void_ftype_pdi_di
b4de2f7d
AH
13136 = build_function_type_list (void_type_node,
13137 pdi_type_node, long_long_unsigned_type_node,
13138 NULL_TREE);
916b60b7 13139 tree void_ftype_pv2di_v2di
b4de2f7d
AH
13140 = build_function_type_list (void_type_node,
13141 pv2di_type_node, V2DI_type_node, NULL_TREE);
bd793c65
BS
13142 /* Normal vector unops. */
13143 tree v4sf_ftype_v4sf
b4de2f7d 13144 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
0f290768 13145
bd793c65
BS
13146 /* Normal vector binops. */
13147 tree v4sf_ftype_v4sf_v4sf
b4de2f7d
AH
13148 = build_function_type_list (V4SF_type_node,
13149 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13150 tree v8qi_ftype_v8qi_v8qi
b4de2f7d
AH
13151 = build_function_type_list (V8QI_type_node,
13152 V8QI_type_node, V8QI_type_node, NULL_TREE);
bd793c65 13153 tree v4hi_ftype_v4hi_v4hi
b4de2f7d
AH
13154 = build_function_type_list (V4HI_type_node,
13155 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 13156 tree v2si_ftype_v2si_v2si
b4de2f7d
AH
13157 = build_function_type_list (V2SI_type_node,
13158 V2SI_type_node, V2SI_type_node, NULL_TREE);
bd793c65 13159 tree di_ftype_di_di
b4de2f7d
AH
13160 = build_function_type_list (long_long_unsigned_type_node,
13161 long_long_unsigned_type_node,
13162 long_long_unsigned_type_node, NULL_TREE);
bd793c65 13163
47f339cf 13164 tree v2si_ftype_v2sf
ae3aa00d 13165 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
47f339cf 13166 tree v2sf_ftype_v2si
b4de2f7d 13167 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
47f339cf 13168 tree v2si_ftype_v2si
b4de2f7d 13169 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
47f339cf 13170 tree v2sf_ftype_v2sf
b4de2f7d 13171 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 13172 tree v2sf_ftype_v2sf_v2sf
b4de2f7d
AH
13173 = build_function_type_list (V2SF_type_node,
13174 V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 13175 tree v2si_ftype_v2sf_v2sf
b4de2f7d
AH
13176 = build_function_type_list (V2SI_type_node,
13177 V2SF_type_node, V2SF_type_node, NULL_TREE);
fbe5eb6d 13178 tree pint_type_node = build_pointer_type (integer_type_node);
068f5dea
JH
13179 tree pcint_type_node = build_pointer_type (
13180 build_type_variant (integer_type_node, 1, 0));
fbe5eb6d 13181 tree pdouble_type_node = build_pointer_type (double_type_node);
068f5dea
JH
13182 tree pcdouble_type_node = build_pointer_type (
13183 build_type_variant (double_type_node, 1, 0));
fbe5eb6d 13184 tree int_ftype_v2df_v2df
b4de2f7d
AH
13185 = build_function_type_list (integer_type_node,
13186 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d
BS
13187
13188 tree ti_ftype_void
b4de2f7d 13189 = build_function_type (intTI_type_node, void_list_node);
f02e1358
JH
13190 tree v2di_ftype_void
13191 = build_function_type (V2DI_type_node, void_list_node);
fbe5eb6d 13192 tree ti_ftype_ti_ti
b4de2f7d
AH
13193 = build_function_type_list (intTI_type_node,
13194 intTI_type_node, intTI_type_node, NULL_TREE);
068f5dea
JH
13195 tree void_ftype_pcvoid
13196 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
fbe5eb6d 13197 tree v2di_ftype_di
b4de2f7d
AH
13198 = build_function_type_list (V2DI_type_node,
13199 long_long_unsigned_type_node, NULL_TREE);
f02e1358
JH
13200 tree di_ftype_v2di
13201 = build_function_type_list (long_long_unsigned_type_node,
13202 V2DI_type_node, NULL_TREE);
fbe5eb6d 13203 tree v4sf_ftype_v4si
b4de2f7d 13204 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 13205 tree v4si_ftype_v4sf
b4de2f7d 13206 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 13207 tree v2df_ftype_v4si
b4de2f7d 13208 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 13209 tree v4si_ftype_v2df
b4de2f7d 13210 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13211 tree v2si_ftype_v2df
b4de2f7d 13212 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13213 tree v4sf_ftype_v2df
b4de2f7d 13214 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13215 tree v2df_ftype_v2si
b4de2f7d 13216 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
fbe5eb6d 13217 tree v2df_ftype_v4sf
b4de2f7d 13218 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 13219 tree int_ftype_v2df
b4de2f7d 13220 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
453ee231
JH
13221 tree int64_ftype_v2df
13222 = build_function_type_list (long_long_integer_type_node,
b96a374d 13223 V2DF_type_node, NULL_TREE);
fbe5eb6d 13224 tree v2df_ftype_v2df_int
b4de2f7d
AH
13225 = build_function_type_list (V2DF_type_node,
13226 V2DF_type_node, integer_type_node, NULL_TREE);
453ee231
JH
13227 tree v2df_ftype_v2df_int64
13228 = build_function_type_list (V2DF_type_node,
13229 V2DF_type_node, long_long_integer_type_node,
13230 NULL_TREE);
fbe5eb6d 13231 tree v4sf_ftype_v4sf_v2df
b4de2f7d
AH
13232 = build_function_type_list (V4SF_type_node,
13233 V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13234 tree v2df_ftype_v2df_v4sf
b4de2f7d
AH
13235 = build_function_type_list (V2DF_type_node,
13236 V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 13237 tree v2df_ftype_v2df_v2df_int
b4de2f7d
AH
13238 = build_function_type_list (V2DF_type_node,
13239 V2DF_type_node, V2DF_type_node,
13240 integer_type_node,
13241 NULL_TREE);
fbe5eb6d 13242 tree v2df_ftype_v2df_pv2si
b4de2f7d
AH
13243 = build_function_type_list (V2DF_type_node,
13244 V2DF_type_node, pv2si_type_node, NULL_TREE);
fbe5eb6d 13245 tree void_ftype_pv2si_v2df
b4de2f7d
AH
13246 = build_function_type_list (void_type_node,
13247 pv2si_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13248 tree void_ftype_pdouble_v2df
b4de2f7d
AH
13249 = build_function_type_list (void_type_node,
13250 pdouble_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13251 tree void_ftype_pint_int
b4de2f7d
AH
13252 = build_function_type_list (void_type_node,
13253 pint_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 13254 tree void_ftype_v16qi_v16qi_pchar
b4de2f7d
AH
13255 = build_function_type_list (void_type_node,
13256 V16QI_type_node, V16QI_type_node,
13257 pchar_type_node, NULL_TREE);
068f5dea
JH
13258 tree v2df_ftype_pcdouble
13259 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
fbe5eb6d 13260 tree v2df_ftype_v2df_v2df
b4de2f7d
AH
13261 = build_function_type_list (V2DF_type_node,
13262 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13263 tree v16qi_ftype_v16qi_v16qi
b4de2f7d
AH
13264 = build_function_type_list (V16QI_type_node,
13265 V16QI_type_node, V16QI_type_node, NULL_TREE);
fbe5eb6d 13266 tree v8hi_ftype_v8hi_v8hi
b4de2f7d
AH
13267 = build_function_type_list (V8HI_type_node,
13268 V8HI_type_node, V8HI_type_node, NULL_TREE);
fbe5eb6d 13269 tree v4si_ftype_v4si_v4si
b4de2f7d
AH
13270 = build_function_type_list (V4SI_type_node,
13271 V4SI_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 13272 tree v2di_ftype_v2di_v2di
b4de2f7d
AH
13273 = build_function_type_list (V2DI_type_node,
13274 V2DI_type_node, V2DI_type_node, NULL_TREE);
fbe5eb6d 13275 tree v2di_ftype_v2df_v2df
b4de2f7d
AH
13276 = build_function_type_list (V2DI_type_node,
13277 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13278 tree v2df_ftype_v2df
b4de2f7d 13279 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13280 tree v2df_ftype_double
b4de2f7d 13281 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
fbe5eb6d 13282 tree v2df_ftype_double_double
b4de2f7d
AH
13283 = build_function_type_list (V2DF_type_node,
13284 double_type_node, double_type_node, NULL_TREE);
fbe5eb6d 13285 tree int_ftype_v8hi_int
b4de2f7d
AH
13286 = build_function_type_list (integer_type_node,
13287 V8HI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 13288 tree v8hi_ftype_v8hi_int_int
b4de2f7d
AH
13289 = build_function_type_list (V8HI_type_node,
13290 V8HI_type_node, integer_type_node,
13291 integer_type_node, NULL_TREE);
916b60b7 13292 tree v2di_ftype_v2di_int
b4de2f7d
AH
13293 = build_function_type_list (V2DI_type_node,
13294 V2DI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 13295 tree v4si_ftype_v4si_int
b4de2f7d
AH
13296 = build_function_type_list (V4SI_type_node,
13297 V4SI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 13298 tree v8hi_ftype_v8hi_int
b4de2f7d
AH
13299 = build_function_type_list (V8HI_type_node,
13300 V8HI_type_node, integer_type_node, NULL_TREE);
916b60b7 13301 tree v8hi_ftype_v8hi_v2di
b4de2f7d
AH
13302 = build_function_type_list (V8HI_type_node,
13303 V8HI_type_node, V2DI_type_node, NULL_TREE);
916b60b7 13304 tree v4si_ftype_v4si_v2di
b4de2f7d
AH
13305 = build_function_type_list (V4SI_type_node,
13306 V4SI_type_node, V2DI_type_node, NULL_TREE);
916b60b7 13307 tree v4si_ftype_v8hi_v8hi
b4de2f7d
AH
13308 = build_function_type_list (V4SI_type_node,
13309 V8HI_type_node, V8HI_type_node, NULL_TREE);
916b60b7 13310 tree di_ftype_v8qi_v8qi
b4de2f7d
AH
13311 = build_function_type_list (long_long_unsigned_type_node,
13312 V8QI_type_node, V8QI_type_node, NULL_TREE);
916b60b7 13313 tree v2di_ftype_v16qi_v16qi
b4de2f7d
AH
13314 = build_function_type_list (V2DI_type_node,
13315 V16QI_type_node, V16QI_type_node, NULL_TREE);
916b60b7 13316 tree int_ftype_v16qi
b4de2f7d 13317 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
068f5dea
JH
13318 tree v16qi_ftype_pcchar
13319 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
f02e1358
JH
13320 tree void_ftype_pchar_v16qi
13321 = build_function_type_list (void_type_node,
13322 pchar_type_node, V16QI_type_node, NULL_TREE);
068f5dea
JH
13323 tree v4si_ftype_pcint
13324 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13325 tree void_ftype_pcint_v4si
f02e1358 13326 = build_function_type_list (void_type_node,
068f5dea 13327 pcint_type_node, V4SI_type_node, NULL_TREE);
f02e1358
JH
13328 tree v2di_ftype_v2di
13329 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
47f339cf 13330
f8a1ebc6
JH
13331 tree float80_type;
13332 tree float128_type;
13333
13334 /* The __float80 type. */
13335 if (TYPE_MODE (long_double_type_node) == XFmode)
13336 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
13337 "__float80");
13338 else
13339 {
13340 /* The __float80 type. */
13341 float80_type = make_node (REAL_TYPE);
13342 TYPE_PRECISION (float80_type) = 96;
13343 layout_type (float80_type);
13344 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
13345 }
13346
13347 float128_type = make_node (REAL_TYPE);
13348 TYPE_PRECISION (float128_type) = 128;
13349 layout_type (float128_type);
13350 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
13351
bd793c65
BS
13352 /* Add all builtins that are more or less simple operations on two
13353 operands. */
ca7558fc 13354 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
13355 {
13356 /* Use one of the operands; the target can have a different mode for
13357 mask-generating compares. */
13358 enum machine_mode mode;
13359 tree type;
13360
13361 if (d->name == 0)
13362 continue;
13363 mode = insn_data[d->icode].operand[1].mode;
13364
bd793c65
BS
13365 switch (mode)
13366 {
fbe5eb6d
BS
13367 case V16QImode:
13368 type = v16qi_ftype_v16qi_v16qi;
13369 break;
13370 case V8HImode:
13371 type = v8hi_ftype_v8hi_v8hi;
13372 break;
13373 case V4SImode:
13374 type = v4si_ftype_v4si_v4si;
13375 break;
13376 case V2DImode:
13377 type = v2di_ftype_v2di_v2di;
13378 break;
13379 case V2DFmode:
13380 type = v2df_ftype_v2df_v2df;
13381 break;
13382 case TImode:
13383 type = ti_ftype_ti_ti;
13384 break;
bd793c65
BS
13385 case V4SFmode:
13386 type = v4sf_ftype_v4sf_v4sf;
13387 break;
13388 case V8QImode:
13389 type = v8qi_ftype_v8qi_v8qi;
13390 break;
13391 case V4HImode:
13392 type = v4hi_ftype_v4hi_v4hi;
13393 break;
13394 case V2SImode:
13395 type = v2si_ftype_v2si_v2si;
13396 break;
bd793c65
BS
13397 case DImode:
13398 type = di_ftype_di_di;
13399 break;
13400
13401 default:
13402 abort ();
13403 }
0f290768 13404
bd793c65
BS
13405 /* Override for comparisons. */
13406 if (d->icode == CODE_FOR_maskcmpv4sf3
13407 || d->icode == CODE_FOR_maskncmpv4sf3
13408 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13409 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13410 type = v4si_ftype_v4sf_v4sf;
13411
fbe5eb6d
BS
13412 if (d->icode == CODE_FOR_maskcmpv2df3
13413 || d->icode == CODE_FOR_maskncmpv2df3
13414 || d->icode == CODE_FOR_vmmaskcmpv2df3
13415 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13416 type = v2di_ftype_v2df_v2df;
13417
eeb06b1b 13418 def_builtin (d->mask, d->name, type, d->code);
bd793c65
BS
13419 }
13420
13421 /* Add the remaining MMX insns with somewhat more complicated types. */
eeb06b1b
BS
13422 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13423 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
eeb06b1b
BS
13424 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13425 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13426 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13427
13428 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13429 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13430 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13431
13432 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13433 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13434
13435 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13436 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
0f290768 13437
bd793c65 13438 /* comi/ucomi insns. */
ca7558fc 13439 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
fbe5eb6d
BS
13440 if (d->mask == MASK_SSE2)
13441 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13442 else
13443 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
bd793c65 13444
1255c85c
BS
13445 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13446 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13447 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
bd793c65 13448
37f22004
L
13449 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13450 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13451 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13452 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13453 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13454 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13455 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13456 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13457 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13458 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13459 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13460
13461 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13462 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13463
13464 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13465
13466 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13467 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13468 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13469 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13470 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13471 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13472
13473 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13474 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13475 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13476 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13477
13478 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13479 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13480 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13481 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13482
13483 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13484
13485 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13486
13487 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13488 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13489 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13490 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13491 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13492 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13493
13494 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
bd793c65 13495
47f339cf
BS
13496 /* Original 3DNow! */
13497 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13498 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13499 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13500 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13501 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13502 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13503 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13504 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13505 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13506 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13507 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13508 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13509 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13510 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13511 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13512 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13513 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13514 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13515 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13516 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
47f339cf
BS
13517
13518 /* 3DNow! extension as used in the Athlon CPU. */
13519 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13520 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13521 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13522 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13523 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13524 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13525
37f22004 13526 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
fbe5eb6d
BS
13527
13528 /* SSE2 */
13529 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13530 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13531
13532 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13533 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
f02e1358 13534 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
fbe5eb6d 13535
068f5dea
JH
13536 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13537 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13538 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
fbe5eb6d
BS
13539 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13540 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13541 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13542
13543 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13544 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13545 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13546 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13547
13548 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
916b60b7 13549 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
fbe5eb6d
BS
13550 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13551 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
916b60b7 13552 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
fbe5eb6d
BS
13553
13554 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13555 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13556 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
916b60b7 13557 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
fbe5eb6d
BS
13558
13559 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13560 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13561
13562 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13563
13564 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
916b60b7 13565 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
fbe5eb6d
BS
13566
13567 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13568 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13569 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13570 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13571 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13572
13573 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13574
13575 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13576 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
37f22004
L
13577 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13578 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
fbe5eb6d
BS
13579
13580 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13581 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13582 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13583
13584 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
37f22004 13585 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
fbe5eb6d
BS
13586 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13587 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13588
13589 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13590 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13591 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
068f5dea
JH
13592 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13593 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
fbe5eb6d
BS
13594 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13595 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13596
068f5dea 13597 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
fbe5eb6d
BS
13598 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13599 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
916b60b7 13600
068f5dea
JH
13601 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13602 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13603 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
f02e1358
JH
13604 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13605 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
068f5dea 13606 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
f02e1358
JH
13607 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13608
37f22004 13609 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
f02e1358 13610
916b60b7
BS
13611 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13612 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13613 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13614
13615 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13616 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13617 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13618
13619 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13620 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13621
ab3146fd 13622 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
916b60b7
BS
13623 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13624 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13625 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13626
ab3146fd 13627 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
916b60b7
BS
13628 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13629 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13630 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13631
13632 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13633 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13634
13635 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
22c7c85e
L
13636
13637 /* Prescott New Instructions. */
13638 def_builtin (MASK_PNI, "__builtin_ia32_monitor",
13639 void_ftype_pcvoid_unsigned_unsigned,
13640 IX86_BUILTIN_MONITOR);
13641 def_builtin (MASK_PNI, "__builtin_ia32_mwait",
13642 void_ftype_unsigned_unsigned,
13643 IX86_BUILTIN_MWAIT);
13644 def_builtin (MASK_PNI, "__builtin_ia32_movshdup",
13645 v4sf_ftype_v4sf,
13646 IX86_BUILTIN_MOVSHDUP);
13647 def_builtin (MASK_PNI, "__builtin_ia32_movsldup",
13648 v4sf_ftype_v4sf,
13649 IX86_BUILTIN_MOVSLDUP);
13650 def_builtin (MASK_PNI, "__builtin_ia32_lddqu",
13651 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13652 def_builtin (MASK_PNI, "__builtin_ia32_loadddup",
13653 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
13654 def_builtin (MASK_PNI, "__builtin_ia32_movddup",
13655 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
bd793c65
BS
13656}
13657
13658/* Errors in the source file can cause expand_expr to return const0_rtx
13659 where we expect a vector. To avoid crashing, use one of the vector
13660 clear instructions. */
13661static rtx
b96a374d 13662safe_vector_operand (rtx x, enum machine_mode mode)
bd793c65
BS
13663{
13664 if (x != const0_rtx)
13665 return x;
13666 x = gen_reg_rtx (mode);
13667
47f339cf 13668 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
bd793c65
BS
13669 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13670 : gen_rtx_SUBREG (DImode, x, 0)));
13671 else
e37af218 13672 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
4977bab6
ZW
13673 : gen_rtx_SUBREG (V4SFmode, x, 0),
13674 CONST0_RTX (V4SFmode)));
bd793c65
BS
13675 return x;
13676}
13677
13678/* Subroutine of ix86_expand_builtin to take care of binop insns. */
13679
13680static rtx
b96a374d 13681ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
bd793c65
BS
13682{
13683 rtx pat;
13684 tree arg0 = TREE_VALUE (arglist);
13685 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13686 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13687 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13688 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13689 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13690 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13691
13692 if (VECTOR_MODE_P (mode0))
13693 op0 = safe_vector_operand (op0, mode0);
13694 if (VECTOR_MODE_P (mode1))
13695 op1 = safe_vector_operand (op1, mode1);
13696
13697 if (! target
13698 || GET_MODE (target) != tmode
13699 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13700 target = gen_reg_rtx (tmode);
13701
d9deed68
JH
13702 if (GET_MODE (op1) == SImode && mode1 == TImode)
13703 {
13704 rtx x = gen_reg_rtx (V4SImode);
13705 emit_insn (gen_sse2_loadd (x, op1));
13706 op1 = gen_lowpart (TImode, x);
13707 }
13708
bd793c65
BS
13709 /* In case the insn wants input operands in modes different from
13710 the result, abort. */
ebe75517
JH
13711 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13712 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
bd793c65
BS
13713 abort ();
13714
13715 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13716 op0 = copy_to_mode_reg (mode0, op0);
13717 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13718 op1 = copy_to_mode_reg (mode1, op1);
13719
59bef189
RH
13720 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13721 yet one of the two must not be a memory. This is normally enforced
13722 by expanders, but we didn't bother to create one here. */
13723 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13724 op0 = copy_to_mode_reg (mode0, op0);
13725
bd793c65
BS
13726 pat = GEN_FCN (icode) (target, op0, op1);
13727 if (! pat)
13728 return 0;
13729 emit_insn (pat);
13730 return target;
13731}
13732
13733/* Subroutine of ix86_expand_builtin to take care of stores. */
13734
13735static rtx
b96a374d 13736ix86_expand_store_builtin (enum insn_code icode, tree arglist)
bd793c65
BS
13737{
13738 rtx pat;
13739 tree arg0 = TREE_VALUE (arglist);
13740 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13741 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13742 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13743 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13744 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13745
13746 if (VECTOR_MODE_P (mode1))
13747 op1 = safe_vector_operand (op1, mode1);
13748
13749 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
7f0e57bd 13750 op1 = copy_to_mode_reg (mode1, op1);
59bef189 13751
bd793c65
BS
13752 pat = GEN_FCN (icode) (op0, op1);
13753 if (pat)
13754 emit_insn (pat);
13755 return 0;
13756}
13757
13758/* Subroutine of ix86_expand_builtin to take care of unop insns. */
13759
13760static rtx
b96a374d
AJ
13761ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13762 rtx target, int do_load)
bd793c65
BS
13763{
13764 rtx pat;
13765 tree arg0 = TREE_VALUE (arglist);
13766 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13767 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13768 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13769
13770 if (! target
13771 || GET_MODE (target) != tmode
13772 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13773 target = gen_reg_rtx (tmode);
13774 if (do_load)
13775 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13776 else
13777 {
13778 if (VECTOR_MODE_P (mode0))
13779 op0 = safe_vector_operand (op0, mode0);
13780
13781 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13782 op0 = copy_to_mode_reg (mode0, op0);
13783 }
13784
13785 pat = GEN_FCN (icode) (target, op0);
13786 if (! pat)
13787 return 0;
13788 emit_insn (pat);
13789 return target;
13790}
13791
13792/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13793 sqrtss, rsqrtss, rcpss. */
13794
13795static rtx
b96a374d 13796ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
bd793c65
BS
13797{
13798 rtx pat;
13799 tree arg0 = TREE_VALUE (arglist);
59bef189 13800 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
bd793c65
BS
13801 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13802 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13803
13804 if (! target
13805 || GET_MODE (target) != tmode
13806 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13807 target = gen_reg_rtx (tmode);
13808
13809 if (VECTOR_MODE_P (mode0))
13810 op0 = safe_vector_operand (op0, mode0);
13811
13812 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13813 op0 = copy_to_mode_reg (mode0, op0);
fce5a9f2 13814
59bef189
RH
13815 op1 = op0;
13816 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13817 op1 = copy_to_mode_reg (mode0, op1);
fce5a9f2 13818
59bef189 13819 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
13820 if (! pat)
13821 return 0;
13822 emit_insn (pat);
13823 return target;
13824}
13825
13826/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13827
13828static rtx
b96a374d
AJ
13829ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13830 rtx target)
bd793c65
BS
13831{
13832 rtx pat;
13833 tree arg0 = TREE_VALUE (arglist);
13834 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13835 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13836 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13837 rtx op2;
13838 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13839 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13840 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13841 enum rtx_code comparison = d->comparison;
13842
13843 if (VECTOR_MODE_P (mode0))
13844 op0 = safe_vector_operand (op0, mode0);
13845 if (VECTOR_MODE_P (mode1))
13846 op1 = safe_vector_operand (op1, mode1);
13847
13848 /* Swap operands if we have a comparison that isn't available in
13849 hardware. */
13850 if (d->flag)
13851 {
21e1b5f1
BS
13852 rtx tmp = gen_reg_rtx (mode1);
13853 emit_move_insn (tmp, op1);
bd793c65 13854 op1 = op0;
21e1b5f1 13855 op0 = tmp;
bd793c65 13856 }
21e1b5f1
BS
13857
13858 if (! target
13859 || GET_MODE (target) != tmode
13860 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
bd793c65
BS
13861 target = gen_reg_rtx (tmode);
13862
13863 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13864 op0 = copy_to_mode_reg (mode0, op0);
13865 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13866 op1 = copy_to_mode_reg (mode1, op1);
13867
13868 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13869 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13870 if (! pat)
13871 return 0;
13872 emit_insn (pat);
13873 return target;
13874}
13875
13876/* Subroutine of ix86_expand_builtin to take care of comi insns. */
13877
13878static rtx
b96a374d
AJ
13879ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
13880 rtx target)
bd793c65
BS
13881{
13882 rtx pat;
13883 tree arg0 = TREE_VALUE (arglist);
13884 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13885 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13886 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13887 rtx op2;
13888 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13889 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13890 enum rtx_code comparison = d->comparison;
13891
13892 if (VECTOR_MODE_P (mode0))
13893 op0 = safe_vector_operand (op0, mode0);
13894 if (VECTOR_MODE_P (mode1))
13895 op1 = safe_vector_operand (op1, mode1);
13896
13897 /* Swap operands if we have a comparison that isn't available in
13898 hardware. */
13899 if (d->flag)
13900 {
13901 rtx tmp = op1;
13902 op1 = op0;
13903 op0 = tmp;
bd793c65
BS
13904 }
13905
13906 target = gen_reg_rtx (SImode);
13907 emit_move_insn (target, const0_rtx);
13908 target = gen_rtx_SUBREG (QImode, target, 0);
13909
13910 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13911 op0 = copy_to_mode_reg (mode0, op0);
13912 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13913 op1 = copy_to_mode_reg (mode1, op1);
13914
13915 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
1194ca05 13916 pat = GEN_FCN (d->icode) (op0, op1);
bd793c65
BS
13917 if (! pat)
13918 return 0;
13919 emit_insn (pat);
29628f27
BS
13920 emit_insn (gen_rtx_SET (VOIDmode,
13921 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13922 gen_rtx_fmt_ee (comparison, QImode,
1194ca05 13923 SET_DEST (pat),
29628f27 13924 const0_rtx)));
bd793c65 13925
6f1a6c5b 13926 return SUBREG_REG (target);
bd793c65
BS
13927}
13928
13929/* Expand an expression EXP that calls a built-in function,
13930 with result going to TARGET if that's convenient
13931 (and in mode MODE if that's convenient).
13932 SUBTARGET may be used as the target for computing one of EXP's operands.
13933 IGNORE is nonzero if the value is to be ignored. */
13934
13935rtx
b96a374d
AJ
13936ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
13937 enum machine_mode mode ATTRIBUTE_UNUSED,
13938 int ignore ATTRIBUTE_UNUSED)
bd793c65 13939{
8b60264b 13940 const struct builtin_description *d;
77ebd435 13941 size_t i;
bd793c65
BS
13942 enum insn_code icode;
13943 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13944 tree arglist = TREE_OPERAND (exp, 1);
e37af218 13945 tree arg0, arg1, arg2;
bd793c65
BS
13946 rtx op0, op1, op2, pat;
13947 enum machine_mode tmode, mode0, mode1, mode2;
8752c357 13948 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
bd793c65
BS
13949
13950 switch (fcode)
13951 {
13952 case IX86_BUILTIN_EMMS:
13953 emit_insn (gen_emms ());
13954 return 0;
13955
13956 case IX86_BUILTIN_SFENCE:
13957 emit_insn (gen_sfence ());
13958 return 0;
13959
bd793c65 13960 case IX86_BUILTIN_PEXTRW:
fbe5eb6d
BS
13961 case IX86_BUILTIN_PEXTRW128:
13962 icode = (fcode == IX86_BUILTIN_PEXTRW
13963 ? CODE_FOR_mmx_pextrw
13964 : CODE_FOR_sse2_pextrw);
bd793c65
BS
13965 arg0 = TREE_VALUE (arglist);
13966 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13967 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13968 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13969 tmode = insn_data[icode].operand[0].mode;
13970 mode0 = insn_data[icode].operand[1].mode;
13971 mode1 = insn_data[icode].operand[2].mode;
13972
13973 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13974 op0 = copy_to_mode_reg (mode0, op0);
13975 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13976 {
ebe75517
JH
13977 error ("selector must be an integer constant in the range 0..%i",
13978 fcode == IX86_BUILTIN_PEXTRW ? 3:7);
6f1a6c5b 13979 return gen_reg_rtx (tmode);
bd793c65
BS
13980 }
13981 if (target == 0
13982 || GET_MODE (target) != tmode
13983 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13984 target = gen_reg_rtx (tmode);
13985 pat = GEN_FCN (icode) (target, op0, op1);
13986 if (! pat)
13987 return 0;
13988 emit_insn (pat);
13989 return target;
13990
13991 case IX86_BUILTIN_PINSRW:
fbe5eb6d
BS
13992 case IX86_BUILTIN_PINSRW128:
13993 icode = (fcode == IX86_BUILTIN_PINSRW
13994 ? CODE_FOR_mmx_pinsrw
13995 : CODE_FOR_sse2_pinsrw);
bd793c65
BS
13996 arg0 = TREE_VALUE (arglist);
13997 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13998 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13999 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14000 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14001 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14002 tmode = insn_data[icode].operand[0].mode;
14003 mode0 = insn_data[icode].operand[1].mode;
14004 mode1 = insn_data[icode].operand[2].mode;
14005 mode2 = insn_data[icode].operand[3].mode;
14006
14007 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14008 op0 = copy_to_mode_reg (mode0, op0);
14009 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14010 op1 = copy_to_mode_reg (mode1, op1);
14011 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14012 {
ebe75517
JH
14013 error ("selector must be an integer constant in the range 0..%i",
14014 fcode == IX86_BUILTIN_PINSRW ? 15:255);
bd793c65
BS
14015 return const0_rtx;
14016 }
14017 if (target == 0
14018 || GET_MODE (target) != tmode
14019 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14020 target = gen_reg_rtx (tmode);
14021 pat = GEN_FCN (icode) (target, op0, op1, op2);
14022 if (! pat)
14023 return 0;
14024 emit_insn (pat);
14025 return target;
14026
14027 case IX86_BUILTIN_MASKMOVQ:
077084dd 14028 case IX86_BUILTIN_MASKMOVDQU:
fbe5eb6d
BS
14029 icode = (fcode == IX86_BUILTIN_MASKMOVQ
14030 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
f8ca7923
JH
14031 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
14032 : CODE_FOR_sse2_maskmovdqu));
bd793c65
BS
14033 /* Note the arg order is different from the operand order. */
14034 arg1 = TREE_VALUE (arglist);
14035 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14036 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14037 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14038 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14039 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14040 mode0 = insn_data[icode].operand[0].mode;
14041 mode1 = insn_data[icode].operand[1].mode;
14042 mode2 = insn_data[icode].operand[2].mode;
14043
5c464583 14044 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
bd793c65
BS
14045 op0 = copy_to_mode_reg (mode0, op0);
14046 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14047 op1 = copy_to_mode_reg (mode1, op1);
14048 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14049 op2 = copy_to_mode_reg (mode2, op2);
14050 pat = GEN_FCN (icode) (op0, op1, op2);
14051 if (! pat)
14052 return 0;
14053 emit_insn (pat);
14054 return 0;
14055
14056 case IX86_BUILTIN_SQRTSS:
14057 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
14058 case IX86_BUILTIN_RSQRTSS:
14059 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
14060 case IX86_BUILTIN_RCPSS:
14061 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
14062
14063 case IX86_BUILTIN_LOADAPS:
14064 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
14065
14066 case IX86_BUILTIN_LOADUPS:
14067 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14068
14069 case IX86_BUILTIN_STOREAPS:
e37af218 14070 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
f02e1358 14071
bd793c65 14072 case IX86_BUILTIN_STOREUPS:
e37af218 14073 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
bd793c65
BS
14074
14075 case IX86_BUILTIN_LOADSS:
14076 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
14077
14078 case IX86_BUILTIN_STORESS:
e37af218 14079 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
bd793c65 14080
0f290768 14081 case IX86_BUILTIN_LOADHPS:
bd793c65 14082 case IX86_BUILTIN_LOADLPS:
fbe5eb6d
BS
14083 case IX86_BUILTIN_LOADHPD:
14084 case IX86_BUILTIN_LOADLPD:
14085 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
14086 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
14087 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
14088 : CODE_FOR_sse2_movlpd);
bd793c65
BS
14089 arg0 = TREE_VALUE (arglist);
14090 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14091 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14092 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14093 tmode = insn_data[icode].operand[0].mode;
14094 mode0 = insn_data[icode].operand[1].mode;
14095 mode1 = insn_data[icode].operand[2].mode;
14096
14097 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14098 op0 = copy_to_mode_reg (mode0, op0);
14099 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14100 if (target == 0
14101 || GET_MODE (target) != tmode
14102 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14103 target = gen_reg_rtx (tmode);
14104 pat = GEN_FCN (icode) (target, op0, op1);
14105 if (! pat)
14106 return 0;
14107 emit_insn (pat);
14108 return target;
0f290768 14109
bd793c65
BS
14110 case IX86_BUILTIN_STOREHPS:
14111 case IX86_BUILTIN_STORELPS:
fbe5eb6d
BS
14112 case IX86_BUILTIN_STOREHPD:
14113 case IX86_BUILTIN_STORELPD:
14114 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
14115 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
14116 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
14117 : CODE_FOR_sse2_movlpd);
bd793c65
BS
14118 arg0 = TREE_VALUE (arglist);
14119 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14120 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14121 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14122 mode0 = insn_data[icode].operand[1].mode;
14123 mode1 = insn_data[icode].operand[2].mode;
14124
14125 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14126 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14127 op1 = copy_to_mode_reg (mode1, op1);
14128
14129 pat = GEN_FCN (icode) (op0, op0, op1);
14130 if (! pat)
14131 return 0;
14132 emit_insn (pat);
14133 return 0;
14134
14135 case IX86_BUILTIN_MOVNTPS:
e37af218 14136 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
bd793c65 14137 case IX86_BUILTIN_MOVNTQ:
e37af218 14138 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
bd793c65
BS
14139
14140 case IX86_BUILTIN_LDMXCSR:
14141 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14142 target = assign_386_stack_local (SImode, 0);
14143 emit_move_insn (target, op0);
14144 emit_insn (gen_ldmxcsr (target));
14145 return 0;
14146
14147 case IX86_BUILTIN_STMXCSR:
14148 target = assign_386_stack_local (SImode, 0);
14149 emit_insn (gen_stmxcsr (target));
14150 return copy_to_mode_reg (SImode, target);
14151
bd793c65 14152 case IX86_BUILTIN_SHUFPS:
fbe5eb6d
BS
14153 case IX86_BUILTIN_SHUFPD:
14154 icode = (fcode == IX86_BUILTIN_SHUFPS
14155 ? CODE_FOR_sse_shufps
14156 : CODE_FOR_sse2_shufpd);
bd793c65
BS
14157 arg0 = TREE_VALUE (arglist);
14158 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14159 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14160 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14161 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14162 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14163 tmode = insn_data[icode].operand[0].mode;
14164 mode0 = insn_data[icode].operand[1].mode;
14165 mode1 = insn_data[icode].operand[2].mode;
14166 mode2 = insn_data[icode].operand[3].mode;
14167
14168 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14169 op0 = copy_to_mode_reg (mode0, op0);
14170 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14171 op1 = copy_to_mode_reg (mode1, op1);
14172 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14173 {
14174 /* @@@ better error message */
14175 error ("mask must be an immediate");
6f1a6c5b 14176 return gen_reg_rtx (tmode);
bd793c65
BS
14177 }
14178 if (target == 0
14179 || GET_MODE (target) != tmode
14180 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14181 target = gen_reg_rtx (tmode);
14182 pat = GEN_FCN (icode) (target, op0, op1, op2);
14183 if (! pat)
14184 return 0;
14185 emit_insn (pat);
14186 return target;
14187
14188 case IX86_BUILTIN_PSHUFW:
fbe5eb6d
BS
14189 case IX86_BUILTIN_PSHUFD:
14190 case IX86_BUILTIN_PSHUFHW:
14191 case IX86_BUILTIN_PSHUFLW:
14192 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14193 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14194 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14195 : CODE_FOR_mmx_pshufw);
bd793c65
BS
14196 arg0 = TREE_VALUE (arglist);
14197 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14198 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14199 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14200 tmode = insn_data[icode].operand[0].mode;
29628f27
BS
14201 mode1 = insn_data[icode].operand[1].mode;
14202 mode2 = insn_data[icode].operand[2].mode;
bd793c65 14203
29628f27
BS
14204 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14205 op0 = copy_to_mode_reg (mode1, op0);
14206 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
bd793c65
BS
14207 {
14208 /* @@@ better error message */
14209 error ("mask must be an immediate");
14210 return const0_rtx;
14211 }
14212 if (target == 0
14213 || GET_MODE (target) != tmode
14214 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14215 target = gen_reg_rtx (tmode);
29628f27 14216 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
14217 if (! pat)
14218 return 0;
14219 emit_insn (pat);
14220 return target;
14221
ab3146fd
ZD
14222 case IX86_BUILTIN_PSLLDQI128:
14223 case IX86_BUILTIN_PSRLDQI128:
14224 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14225 : CODE_FOR_sse2_lshrti3);
14226 arg0 = TREE_VALUE (arglist);
14227 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14228 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14229 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14230 tmode = insn_data[icode].operand[0].mode;
14231 mode1 = insn_data[icode].operand[1].mode;
14232 mode2 = insn_data[icode].operand[2].mode;
14233
14234 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14235 {
14236 op0 = copy_to_reg (op0);
14237 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14238 }
14239 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14240 {
14241 error ("shift must be an immediate");
14242 return const0_rtx;
14243 }
14244 target = gen_reg_rtx (V2DImode);
14245 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14246 if (! pat)
14247 return 0;
14248 emit_insn (pat);
14249 return target;
14250
47f339cf
BS
14251 case IX86_BUILTIN_FEMMS:
14252 emit_insn (gen_femms ());
14253 return NULL_RTX;
14254
14255 case IX86_BUILTIN_PAVGUSB:
14256 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14257
14258 case IX86_BUILTIN_PF2ID:
14259 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14260
14261 case IX86_BUILTIN_PFACC:
14262 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14263
14264 case IX86_BUILTIN_PFADD:
14265 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14266
14267 case IX86_BUILTIN_PFCMPEQ:
14268 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14269
14270 case IX86_BUILTIN_PFCMPGE:
14271 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14272
14273 case IX86_BUILTIN_PFCMPGT:
14274 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14275
14276 case IX86_BUILTIN_PFMAX:
14277 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14278
14279 case IX86_BUILTIN_PFMIN:
14280 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14281
14282 case IX86_BUILTIN_PFMUL:
14283 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14284
14285 case IX86_BUILTIN_PFRCP:
14286 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14287
14288 case IX86_BUILTIN_PFRCPIT1:
14289 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14290
14291 case IX86_BUILTIN_PFRCPIT2:
14292 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14293
14294 case IX86_BUILTIN_PFRSQIT1:
14295 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14296
14297 case IX86_BUILTIN_PFRSQRT:
14298 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14299
14300 case IX86_BUILTIN_PFSUB:
14301 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14302
14303 case IX86_BUILTIN_PFSUBR:
14304 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14305
14306 case IX86_BUILTIN_PI2FD:
14307 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14308
14309 case IX86_BUILTIN_PMULHRW:
14310 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14311
47f339cf
BS
14312 case IX86_BUILTIN_PF2IW:
14313 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14314
14315 case IX86_BUILTIN_PFNACC:
14316 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14317
14318 case IX86_BUILTIN_PFPNACC:
14319 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14320
14321 case IX86_BUILTIN_PI2FW:
14322 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14323
14324 case IX86_BUILTIN_PSWAPDSI:
14325 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14326
14327 case IX86_BUILTIN_PSWAPDSF:
14328 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14329
e37af218
RH
14330 case IX86_BUILTIN_SSE_ZERO:
14331 target = gen_reg_rtx (V4SFmode);
4977bab6 14332 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
bd793c65
BS
14333 return target;
14334
bd793c65
BS
14335 case IX86_BUILTIN_MMX_ZERO:
14336 target = gen_reg_rtx (DImode);
14337 emit_insn (gen_mmx_clrdi (target));
14338 return target;
14339
f02e1358
JH
14340 case IX86_BUILTIN_CLRTI:
14341 target = gen_reg_rtx (V2DImode);
14342 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14343 return target;
14344
14345
fbe5eb6d
BS
14346 case IX86_BUILTIN_SQRTSD:
14347 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14348 case IX86_BUILTIN_LOADAPD:
14349 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14350 case IX86_BUILTIN_LOADUPD:
14351 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14352
14353 case IX86_BUILTIN_STOREAPD:
14354 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14355 case IX86_BUILTIN_STOREUPD:
14356 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14357
14358 case IX86_BUILTIN_LOADSD:
14359 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14360
14361 case IX86_BUILTIN_STORESD:
14362 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14363
14364 case IX86_BUILTIN_SETPD1:
14365 target = assign_386_stack_local (DFmode, 0);
14366 arg0 = TREE_VALUE (arglist);
14367 emit_move_insn (adjust_address (target, DFmode, 0),
14368 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14369 op0 = gen_reg_rtx (V2DFmode);
14370 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14371 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
14372 return op0;
14373
14374 case IX86_BUILTIN_SETPD:
14375 target = assign_386_stack_local (V2DFmode, 0);
14376 arg0 = TREE_VALUE (arglist);
14377 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14378 emit_move_insn (adjust_address (target, DFmode, 0),
14379 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14380 emit_move_insn (adjust_address (target, DFmode, 8),
14381 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14382 op0 = gen_reg_rtx (V2DFmode);
14383 emit_insn (gen_sse2_movapd (op0, target));
14384 return op0;
14385
14386 case IX86_BUILTIN_LOADRPD:
14387 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14388 gen_reg_rtx (V2DFmode), 1);
14389 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
14390 return target;
14391
14392 case IX86_BUILTIN_LOADPD1:
14393 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14394 gen_reg_rtx (V2DFmode), 1);
14395 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14396 return target;
14397
14398 case IX86_BUILTIN_STOREPD1:
14399 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14400 case IX86_BUILTIN_STORERPD:
14401 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14402
48126a97
JH
14403 case IX86_BUILTIN_CLRPD:
14404 target = gen_reg_rtx (V2DFmode);
14405 emit_insn (gen_sse_clrv2df (target));
14406 return target;
14407
fbe5eb6d
BS
14408 case IX86_BUILTIN_MFENCE:
14409 emit_insn (gen_sse2_mfence ());
14410 return 0;
14411 case IX86_BUILTIN_LFENCE:
14412 emit_insn (gen_sse2_lfence ());
14413 return 0;
14414
14415 case IX86_BUILTIN_CLFLUSH:
14416 arg0 = TREE_VALUE (arglist);
14417 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14418 icode = CODE_FOR_sse2_clflush;
1194ca05
JH
14419 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14420 op0 = copy_to_mode_reg (Pmode, op0);
fbe5eb6d
BS
14421
14422 emit_insn (gen_sse2_clflush (op0));
14423 return 0;
14424
14425 case IX86_BUILTIN_MOVNTPD:
14426 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14427 case IX86_BUILTIN_MOVNTDQ:
916b60b7 14428 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
fbe5eb6d
BS
14429 case IX86_BUILTIN_MOVNTI:
14430 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14431
f02e1358
JH
14432 case IX86_BUILTIN_LOADDQA:
14433 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14434 case IX86_BUILTIN_LOADDQU:
14435 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14436 case IX86_BUILTIN_LOADD:
14437 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14438
14439 case IX86_BUILTIN_STOREDQA:
14440 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14441 case IX86_BUILTIN_STOREDQU:
14442 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14443 case IX86_BUILTIN_STORED:
14444 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14445
22c7c85e
L
14446 case IX86_BUILTIN_MONITOR:
14447 arg0 = TREE_VALUE (arglist);
14448 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14449 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14450 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14451 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14452 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14453 if (!REG_P (op0))
14454 op0 = copy_to_mode_reg (SImode, op0);
14455 if (!REG_P (op1))
14456 op1 = copy_to_mode_reg (SImode, op1);
14457 if (!REG_P (op2))
14458 op2 = copy_to_mode_reg (SImode, op2);
14459 emit_insn (gen_monitor (op0, op1, op2));
14460 return 0;
14461
14462 case IX86_BUILTIN_MWAIT:
14463 arg0 = TREE_VALUE (arglist);
14464 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14465 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14466 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14467 if (!REG_P (op0))
14468 op0 = copy_to_mode_reg (SImode, op0);
14469 if (!REG_P (op1))
14470 op1 = copy_to_mode_reg (SImode, op1);
14471 emit_insn (gen_mwait (op0, op1));
14472 return 0;
14473
14474 case IX86_BUILTIN_LOADDDUP:
14475 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
14476
14477 case IX86_BUILTIN_LDDQU:
14478 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
14479 1);
14480
bd793c65
BS
14481 default:
14482 break;
14483 }
14484
ca7558fc 14485 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
14486 if (d->code == fcode)
14487 {
14488 /* Compares are treated specially. */
14489 if (d->icode == CODE_FOR_maskcmpv4sf3
14490 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14491 || d->icode == CODE_FOR_maskncmpv4sf3
fbe5eb6d
BS
14492 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14493 || d->icode == CODE_FOR_maskcmpv2df3
14494 || d->icode == CODE_FOR_vmmaskcmpv2df3
14495 || d->icode == CODE_FOR_maskncmpv2df3
14496 || d->icode == CODE_FOR_vmmaskncmpv2df3)
bd793c65
BS
14497 return ix86_expand_sse_compare (d, arglist, target);
14498
14499 return ix86_expand_binop_builtin (d->icode, arglist, target);
14500 }
14501
ca7558fc 14502 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
bd793c65
BS
14503 if (d->code == fcode)
14504 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
0f290768 14505
ca7558fc 14506 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
bd793c65
BS
14507 if (d->code == fcode)
14508 return ix86_expand_sse_comi (d, arglist, target);
0f290768 14509
bd793c65
BS
14510 /* @@@ Should really do something sensible here. */
14511 return 0;
bd793c65 14512}
4211a8fb
JH
14513
14514/* Store OPERAND to the memory after reload is completed. This means
f710504c 14515 that we can't easily use assign_stack_local. */
4211a8fb 14516rtx
b96a374d 14517ix86_force_to_memory (enum machine_mode mode, rtx operand)
4211a8fb 14518{
898d374d 14519 rtx result;
4211a8fb
JH
14520 if (!reload_completed)
14521 abort ();
a5b378d6 14522 if (TARGET_RED_ZONE)
898d374d
JH
14523 {
14524 result = gen_rtx_MEM (mode,
14525 gen_rtx_PLUS (Pmode,
14526 stack_pointer_rtx,
14527 GEN_INT (-RED_ZONE_SIZE)));
14528 emit_move_insn (result, operand);
14529 }
a5b378d6 14530 else if (!TARGET_RED_ZONE && TARGET_64BIT)
4211a8fb 14531 {
898d374d 14532 switch (mode)
4211a8fb 14533 {
898d374d
JH
14534 case HImode:
14535 case SImode:
14536 operand = gen_lowpart (DImode, operand);
14537 /* FALLTHRU */
14538 case DImode:
4211a8fb 14539 emit_insn (
898d374d
JH
14540 gen_rtx_SET (VOIDmode,
14541 gen_rtx_MEM (DImode,
14542 gen_rtx_PRE_DEC (DImode,
14543 stack_pointer_rtx)),
14544 operand));
14545 break;
14546 default:
14547 abort ();
14548 }
14549 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14550 }
14551 else
14552 {
14553 switch (mode)
14554 {
14555 case DImode:
14556 {
14557 rtx operands[2];
14558 split_di (&operand, 1, operands, operands + 1);
14559 emit_insn (
14560 gen_rtx_SET (VOIDmode,
14561 gen_rtx_MEM (SImode,
14562 gen_rtx_PRE_DEC (Pmode,
14563 stack_pointer_rtx)),
14564 operands[1]));
14565 emit_insn (
14566 gen_rtx_SET (VOIDmode,
14567 gen_rtx_MEM (SImode,
14568 gen_rtx_PRE_DEC (Pmode,
14569 stack_pointer_rtx)),
14570 operands[0]));
14571 }
14572 break;
14573 case HImode:
14574 /* It is better to store HImodes as SImodes. */
14575 if (!TARGET_PARTIAL_REG_STALL)
14576 operand = gen_lowpart (SImode, operand);
14577 /* FALLTHRU */
14578 case SImode:
4211a8fb 14579 emit_insn (
898d374d
JH
14580 gen_rtx_SET (VOIDmode,
14581 gen_rtx_MEM (GET_MODE (operand),
14582 gen_rtx_PRE_DEC (SImode,
14583 stack_pointer_rtx)),
14584 operand));
14585 break;
14586 default:
14587 abort ();
4211a8fb 14588 }
898d374d 14589 result = gen_rtx_MEM (mode, stack_pointer_rtx);
4211a8fb 14590 }
898d374d 14591 return result;
4211a8fb
JH
14592}
14593
14594/* Free operand from the memory. */
14595void
b96a374d 14596ix86_free_from_memory (enum machine_mode mode)
4211a8fb 14597{
a5b378d6 14598 if (!TARGET_RED_ZONE)
898d374d
JH
14599 {
14600 int size;
14601
14602 if (mode == DImode || TARGET_64BIT)
14603 size = 8;
14604 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14605 size = 2;
14606 else
14607 size = 4;
14608 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14609 to pop or add instruction if registers are available. */
14610 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14611 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14612 GEN_INT (size))));
14613 }
4211a8fb 14614}
a946dd00 14615
f84aa48a
JH
14616/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14617 QImode must go into class Q_REGS.
14618 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
892a2d68 14619 movdf to do mem-to-mem moves through integer regs. */
f84aa48a 14620enum reg_class
b96a374d 14621ix86_preferred_reload_class (rtx x, enum reg_class class)
f84aa48a 14622{
1877be45
JH
14623 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14624 return NO_REGS;
f84aa48a
JH
14625 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14626 {
14627 /* SSE can't load any constant directly yet. */
14628 if (SSE_CLASS_P (class))
14629 return NO_REGS;
14630 /* Floats can load 0 and 1. */
14631 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14632 {
14633 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14634 if (MAYBE_SSE_CLASS_P (class))
14635 return (reg_class_subset_p (class, GENERAL_REGS)
14636 ? GENERAL_REGS : FLOAT_REGS);
14637 else
14638 return class;
14639 }
14640 /* General regs can load everything. */
14641 if (reg_class_subset_p (class, GENERAL_REGS))
14642 return GENERAL_REGS;
14643 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14644 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14645 return NO_REGS;
14646 }
14647 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14648 return NO_REGS;
14649 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14650 return Q_REGS;
14651 return class;
14652}
14653
14654/* If we are copying between general and FP registers, we need a memory
14655 location. The same is true for SSE and MMX registers.
14656
14657 The macro can't work reliably when one of the CLASSES is class containing
14658 registers from multiple units (SSE, MMX, integer). We avoid this by never
14659 combining those units in single alternative in the machine description.
14660 Ensure that this constraint holds to avoid unexpected surprises.
14661
14662 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14663 enforce these sanity checks. */
14664int
b96a374d
AJ
14665ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14666 enum machine_mode mode, int strict)
f84aa48a
JH
14667{
14668 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14669 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14670 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14671 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14672 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14673 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14674 {
14675 if (strict)
14676 abort ();
14677 else
14678 return 1;
14679 }
14680 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
8f62128d
JH
14681 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14682 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14683 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14684 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
f84aa48a
JH
14685}
14686/* Return the cost of moving data from a register in class CLASS1 to
a4f31c00 14687 one in class CLASS2.
f84aa48a
JH
14688
14689 It is not required that the cost always equal 2 when FROM is the same as TO;
14690 on some machines it is expensive to move between registers if they are not
14691 general registers. */
14692int
b96a374d
AJ
14693ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14694 enum reg_class class2)
f84aa48a
JH
14695{
14696 /* In case we require secondary memory, compute cost of the store followed
b96a374d 14697 by load. In order to avoid bad register allocation choices, we need
d631b80a
RH
14698 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14699
f84aa48a
JH
14700 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14701 {
d631b80a
RH
14702 int cost = 1;
14703
14704 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14705 MEMORY_MOVE_COST (mode, class1, 1));
14706 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14707 MEMORY_MOVE_COST (mode, class2, 1));
b96a374d 14708
d631b80a
RH
14709 /* In case of copying from general_purpose_register we may emit multiple
14710 stores followed by single load causing memory size mismatch stall.
d1f87653 14711 Count this as arbitrarily high cost of 20. */
62415523 14712 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
d631b80a
RH
14713 cost += 20;
14714
14715 /* In the case of FP/MMX moves, the registers actually overlap, and we
14716 have to switch modes in order to treat them differently. */
14717 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14718 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14719 cost += 20;
14720
14721 return cost;
f84aa48a 14722 }
d631b80a 14723
92d0fb09 14724 /* Moves between SSE/MMX and integer unit are expensive. */
62415523
JH
14725 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14726 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
fa79946e
JH
14727 return ix86_cost->mmxsse_to_integer;
14728 if (MAYBE_FLOAT_CLASS_P (class1))
14729 return ix86_cost->fp_move;
14730 if (MAYBE_SSE_CLASS_P (class1))
14731 return ix86_cost->sse_move;
14732 if (MAYBE_MMX_CLASS_P (class1))
14733 return ix86_cost->mmx_move;
f84aa48a
JH
14734 return 2;
14735}
14736
a946dd00
JH
14737/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14738int
b96a374d 14739ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
a946dd00
JH
14740{
14741 /* Flags and only flags can only hold CCmode values. */
14742 if (CC_REGNO_P (regno))
14743 return GET_MODE_CLASS (mode) == MODE_CC;
14744 if (GET_MODE_CLASS (mode) == MODE_CC
14745 || GET_MODE_CLASS (mode) == MODE_RANDOM
14746 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14747 return 0;
14748 if (FP_REGNO_P (regno))
14749 return VALID_FP_MODE_P (mode);
14750 if (SSE_REGNO_P (regno))
a67a3220 14751 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
a946dd00 14752 if (MMX_REGNO_P (regno))
a67a3220
JH
14753 return (TARGET_MMX
14754 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
a946dd00
JH
14755 /* We handle both integer and floats in the general purpose registers.
14756 In future we should be able to handle vector modes as well. */
14757 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14758 return 0;
14759 /* Take care for QImode values - they can be in non-QI regs, but then
14760 they do cause partial register stalls. */
d2836273 14761 if (regno < 4 || mode != QImode || TARGET_64BIT)
a946dd00
JH
14762 return 1;
14763 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14764}
fa79946e
JH
14765
14766/* Return the cost of moving data of mode M between a
14767 register and memory. A value of 2 is the default; this cost is
14768 relative to those in `REGISTER_MOVE_COST'.
14769
14770 If moving between registers and memory is more expensive than
14771 between two registers, you should define this macro to express the
a4f31c00
AJ
14772 relative cost.
14773
fa79946e
JH
14774 Model also increased moving costs of QImode registers in non
14775 Q_REGS classes.
14776 */
14777int
b96a374d 14778ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
fa79946e
JH
14779{
14780 if (FLOAT_CLASS_P (class))
14781 {
14782 int index;
14783 switch (mode)
14784 {
14785 case SFmode:
14786 index = 0;
14787 break;
14788 case DFmode:
14789 index = 1;
14790 break;
14791 case XFmode:
fa79946e
JH
14792 index = 2;
14793 break;
14794 default:
14795 return 100;
14796 }
14797 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14798 }
14799 if (SSE_CLASS_P (class))
14800 {
14801 int index;
14802 switch (GET_MODE_SIZE (mode))
14803 {
14804 case 4:
14805 index = 0;
14806 break;
14807 case 8:
14808 index = 1;
14809 break;
14810 case 16:
14811 index = 2;
14812 break;
14813 default:
14814 return 100;
14815 }
14816 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14817 }
14818 if (MMX_CLASS_P (class))
14819 {
14820 int index;
14821 switch (GET_MODE_SIZE (mode))
14822 {
14823 case 4:
14824 index = 0;
14825 break;
14826 case 8:
14827 index = 1;
14828 break;
14829 default:
14830 return 100;
14831 }
14832 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14833 }
14834 switch (GET_MODE_SIZE (mode))
14835 {
14836 case 1:
14837 if (in)
14838 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14839 : ix86_cost->movzbl_load);
14840 else
14841 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14842 : ix86_cost->int_store[0] + 4);
14843 break;
14844 case 2:
14845 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14846 default:
14847 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14848 if (mode == TFmode)
14849 mode = XFmode;
3bb7e126 14850 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
bce75972
VM
14851 * (((int) GET_MODE_SIZE (mode)
14852 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
fa79946e
JH
14853 }
14854}
0ecf09f9 14855
3c50106f
RH
14856/* Compute a (partial) cost for rtx X. Return true if the complete
14857 cost has been computed, and false if subexpressions should be
14858 scanned. In either case, *TOTAL contains the cost result. */
14859
14860static bool
b96a374d 14861ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
3c50106f
RH
14862{
14863 enum machine_mode mode = GET_MODE (x);
14864
14865 switch (code)
14866 {
14867 case CONST_INT:
14868 case CONST:
14869 case LABEL_REF:
14870 case SYMBOL_REF:
14871 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
14872 *total = 3;
14873 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
14874 *total = 2;
3504dad3
JH
14875 else if (flag_pic && SYMBOLIC_CONST (x)
14876 && (!TARGET_64BIT
14877 || (!GET_CODE (x) != LABEL_REF
14878 && (GET_CODE (x) != SYMBOL_REF
12969f45 14879 || !SYMBOL_REF_LOCAL_P (x)))))
3c50106f
RH
14880 *total = 1;
14881 else
14882 *total = 0;
14883 return true;
14884
14885 case CONST_DOUBLE:
14886 if (mode == VOIDmode)
14887 *total = 0;
14888 else
14889 switch (standard_80387_constant_p (x))
14890 {
14891 case 1: /* 0.0 */
14892 *total = 1;
14893 break;
881b2a96 14894 default: /* Other constants */
3c50106f
RH
14895 *total = 2;
14896 break;
881b2a96
RS
14897 case 0:
14898 case -1:
3c50106f
RH
14899 /* Start with (MEM (SYMBOL_REF)), since that's where
14900 it'll probably end up. Add a penalty for size. */
14901 *total = (COSTS_N_INSNS (1)
3504dad3 14902 + (flag_pic != 0 && !TARGET_64BIT)
3c50106f
RH
14903 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14904 break;
14905 }
14906 return true;
14907
14908 case ZERO_EXTEND:
14909 /* The zero extensions is often completely free on x86_64, so make
14910 it as cheap as possible. */
14911 if (TARGET_64BIT && mode == DImode
14912 && GET_MODE (XEXP (x, 0)) == SImode)
14913 *total = 1;
14914 else if (TARGET_ZERO_EXTEND_WITH_AND)
14915 *total = COSTS_N_INSNS (ix86_cost->add);
14916 else
14917 *total = COSTS_N_INSNS (ix86_cost->movzx);
14918 return false;
14919
14920 case SIGN_EXTEND:
14921 *total = COSTS_N_INSNS (ix86_cost->movsx);
14922 return false;
14923
14924 case ASHIFT:
14925 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14926 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14927 {
14928 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14929 if (value == 1)
14930 {
14931 *total = COSTS_N_INSNS (ix86_cost->add);
14932 return false;
14933 }
14934 if ((value == 2 || value == 3)
14935 && !TARGET_DECOMPOSE_LEA
14936 && ix86_cost->lea <= ix86_cost->shift_const)
14937 {
14938 *total = COSTS_N_INSNS (ix86_cost->lea);
14939 return false;
14940 }
14941 }
14942 /* FALLTHRU */
14943
14944 case ROTATE:
14945 case ASHIFTRT:
14946 case LSHIFTRT:
14947 case ROTATERT:
14948 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
14949 {
14950 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14951 {
14952 if (INTVAL (XEXP (x, 1)) > 32)
14953 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
14954 else
14955 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
14956 }
14957 else
14958 {
14959 if (GET_CODE (XEXP (x, 1)) == AND)
14960 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
14961 else
14962 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
14963 }
14964 }
14965 else
14966 {
14967 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14968 *total = COSTS_N_INSNS (ix86_cost->shift_const);
14969 else
14970 *total = COSTS_N_INSNS (ix86_cost->shift_var);
14971 }
14972 return false;
14973
14974 case MULT:
14975 if (FLOAT_MODE_P (mode))
14976 *total = COSTS_N_INSNS (ix86_cost->fmul);
14977 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14978 {
14979 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14980 int nbits;
14981
14982 for (nbits = 0; value != 0; value >>= 1)
14983 nbits++;
14984
14985 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14986 + nbits * ix86_cost->mult_bit);
14987 }
14988 else
14989 {
14990 /* This is arbitrary */
14991 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14992 + 7 * ix86_cost->mult_bit);
14993 }
14994 return false;
14995
14996 case DIV:
14997 case UDIV:
14998 case MOD:
14999 case UMOD:
15000 if (FLOAT_MODE_P (mode))
15001 *total = COSTS_N_INSNS (ix86_cost->fdiv);
15002 else
15003 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15004 return false;
15005
15006 case PLUS:
15007 if (FLOAT_MODE_P (mode))
15008 *total = COSTS_N_INSNS (ix86_cost->fadd);
15009 else if (!TARGET_DECOMPOSE_LEA
15010 && GET_MODE_CLASS (mode) == MODE_INT
15011 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15012 {
15013 if (GET_CODE (XEXP (x, 0)) == PLUS
15014 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15015 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15016 && CONSTANT_P (XEXP (x, 1)))
15017 {
15018 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15019 if (val == 2 || val == 4 || val == 8)
15020 {
15021 *total = COSTS_N_INSNS (ix86_cost->lea);
15022 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15023 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15024 outer_code);
15025 *total += rtx_cost (XEXP (x, 1), outer_code);
15026 return true;
15027 }
15028 }
15029 else if (GET_CODE (XEXP (x, 0)) == MULT
15030 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15031 {
15032 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15033 if (val == 2 || val == 4 || val == 8)
15034 {
15035 *total = COSTS_N_INSNS (ix86_cost->lea);
15036 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15037 *total += rtx_cost (XEXP (x, 1), outer_code);
15038 return true;
15039 }
15040 }
15041 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15042 {
15043 *total = COSTS_N_INSNS (ix86_cost->lea);
15044 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15045 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15046 *total += rtx_cost (XEXP (x, 1), outer_code);
15047 return true;
15048 }
15049 }
15050 /* FALLTHRU */
15051
15052 case MINUS:
15053 if (FLOAT_MODE_P (mode))
15054 {
15055 *total = COSTS_N_INSNS (ix86_cost->fadd);
15056 return false;
15057 }
15058 /* FALLTHRU */
15059
15060 case AND:
15061 case IOR:
15062 case XOR:
15063 if (!TARGET_64BIT && mode == DImode)
15064 {
15065 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15066 + (rtx_cost (XEXP (x, 0), outer_code)
15067 << (GET_MODE (XEXP (x, 0)) != DImode))
15068 + (rtx_cost (XEXP (x, 1), outer_code)
b96a374d 15069 << (GET_MODE (XEXP (x, 1)) != DImode)));
3c50106f
RH
15070 return true;
15071 }
15072 /* FALLTHRU */
15073
15074 case NEG:
15075 if (FLOAT_MODE_P (mode))
15076 {
15077 *total = COSTS_N_INSNS (ix86_cost->fchs);
15078 return false;
15079 }
15080 /* FALLTHRU */
15081
15082 case NOT:
15083 if (!TARGET_64BIT && mode == DImode)
15084 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15085 else
15086 *total = COSTS_N_INSNS (ix86_cost->add);
15087 return false;
15088
15089 case FLOAT_EXTEND:
15090 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
15091 *total = 0;
15092 return false;
15093
15094 case ABS:
15095 if (FLOAT_MODE_P (mode))
15096 *total = COSTS_N_INSNS (ix86_cost->fabs);
15097 return false;
15098
15099 case SQRT:
15100 if (FLOAT_MODE_P (mode))
15101 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15102 return false;
15103
74dc3e94
RH
15104 case UNSPEC:
15105 if (XINT (x, 1) == UNSPEC_TP)
15106 *total = 0;
15107 return false;
15108
3c50106f
RH
15109 default:
15110 return false;
15111 }
15112}
15113
21c318ba 15114#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
2cc07db4 15115static void
b96a374d 15116ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
2cc07db4
RH
15117{
15118 init_section ();
15119 fputs ("\tpushl $", asm_out_file);
15120 assemble_name (asm_out_file, XSTR (symbol, 0));
15121 fputc ('\n', asm_out_file);
15122}
15123#endif
162f023b 15124
b069de3b
SS
15125#if TARGET_MACHO
15126
15127static int current_machopic_label_num;
15128
15129/* Given a symbol name and its associated stub, write out the
15130 definition of the stub. */
15131
15132void
b96a374d 15133machopic_output_stub (FILE *file, const char *symb, const char *stub)
b069de3b
SS
15134{
15135 unsigned int length;
15136 char *binder_name, *symbol_name, lazy_ptr_name[32];
15137 int label = ++current_machopic_label_num;
15138
15139 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15140 symb = (*targetm.strip_name_encoding) (symb);
15141
15142 length = strlen (stub);
15143 binder_name = alloca (length + 32);
15144 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15145
15146 length = strlen (symb);
15147 symbol_name = alloca (length + 32);
15148 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15149
15150 sprintf (lazy_ptr_name, "L%d$lz", label);
15151
15152 if (MACHOPIC_PURE)
15153 machopic_picsymbol_stub_section ();
15154 else
15155 machopic_symbol_stub_section ();
15156
15157 fprintf (file, "%s:\n", stub);
15158 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15159
15160 if (MACHOPIC_PURE)
15161 {
15162 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15163 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15164 fprintf (file, "\tjmp %%edx\n");
15165 }
15166 else
15167 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
b96a374d 15168
b069de3b 15169 fprintf (file, "%s:\n", binder_name);
b96a374d 15170
b069de3b
SS
15171 if (MACHOPIC_PURE)
15172 {
15173 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15174 fprintf (file, "\tpushl %%eax\n");
15175 }
15176 else
15177 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15178
15179 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15180
15181 machopic_lazy_symbol_ptr_section ();
15182 fprintf (file, "%s:\n", lazy_ptr_name);
15183 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15184 fprintf (file, "\t.long %s\n", binder_name);
15185}
15186#endif /* TARGET_MACHO */
15187
162f023b
JH
15188/* Order the registers for register allocator. */
15189
15190void
b96a374d 15191x86_order_regs_for_local_alloc (void)
162f023b
JH
15192{
15193 int pos = 0;
15194 int i;
15195
15196 /* First allocate the local general purpose registers. */
15197 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15198 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15199 reg_alloc_order [pos++] = i;
15200
15201 /* Global general purpose registers. */
15202 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15203 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15204 reg_alloc_order [pos++] = i;
15205
15206 /* x87 registers come first in case we are doing FP math
15207 using them. */
15208 if (!TARGET_SSE_MATH)
15209 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15210 reg_alloc_order [pos++] = i;
fce5a9f2 15211
162f023b
JH
15212 /* SSE registers. */
15213 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15214 reg_alloc_order [pos++] = i;
15215 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15216 reg_alloc_order [pos++] = i;
15217
d1f87653 15218 /* x87 registers. */
162f023b
JH
15219 if (TARGET_SSE_MATH)
15220 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15221 reg_alloc_order [pos++] = i;
15222
15223 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15224 reg_alloc_order [pos++] = i;
15225
15226 /* Initialize the rest of array as we do not allocate some registers
15227 at all. */
15228 while (pos < FIRST_PSEUDO_REGISTER)
15229 reg_alloc_order [pos++] = 0;
15230}
194734e9 15231
4977bab6
ZW
15232#ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15233#define TARGET_USE_MS_BITFIELD_LAYOUT 0
15234#endif
15235
fe77449a
DR
15236/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15237 struct attribute_spec.handler. */
15238static tree
b96a374d
AJ
15239ix86_handle_struct_attribute (tree *node, tree name,
15240 tree args ATTRIBUTE_UNUSED,
15241 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
fe77449a
DR
15242{
15243 tree *type = NULL;
15244 if (DECL_P (*node))
15245 {
15246 if (TREE_CODE (*node) == TYPE_DECL)
15247 type = &TREE_TYPE (*node);
15248 }
15249 else
15250 type = node;
15251
15252 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15253 || TREE_CODE (*type) == UNION_TYPE)))
15254 {
15255 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15256 *no_add_attrs = true;
15257 }
15258
15259 else if ((is_attribute_p ("ms_struct", name)
15260 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15261 || ((is_attribute_p ("gcc_struct", name)
15262 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15263 {
15264 warning ("`%s' incompatible attribute ignored",
15265 IDENTIFIER_POINTER (name));
15266 *no_add_attrs = true;
15267 }
15268
15269 return NULL_TREE;
15270}
15271
4977bab6 15272static bool
b96a374d 15273ix86_ms_bitfield_layout_p (tree record_type)
4977bab6 15274{
fe77449a 15275 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
021bad8e 15276 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
fe77449a 15277 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
4977bab6
ZW
15278}
15279
483ab821
MM
15280/* Returns an expression indicating where the this parameter is
15281 located on entry to the FUNCTION. */
15282
15283static rtx
b96a374d 15284x86_this_parameter (tree function)
483ab821
MM
15285{
15286 tree type = TREE_TYPE (function);
15287
3961e8fe
RH
15288 if (TARGET_64BIT)
15289 {
61f71b34 15290 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
3961e8fe
RH
15291 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15292 }
15293
e767b5be 15294 if (ix86_function_regparm (type, function) > 0)
483ab821
MM
15295 {
15296 tree parm;
15297
15298 parm = TYPE_ARG_TYPES (type);
15299 /* Figure out whether or not the function has a variable number of
15300 arguments. */
3961e8fe 15301 for (; parm; parm = TREE_CHAIN (parm))
483ab821
MM
15302 if (TREE_VALUE (parm) == void_type_node)
15303 break;
e767b5be 15304 /* If not, the this parameter is in the first argument. */
483ab821 15305 if (parm)
e767b5be
JH
15306 {
15307 int regno = 0;
15308 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
15309 regno = 2;
02e02343 15310 return gen_rtx_REG (SImode, regno);
e767b5be 15311 }
483ab821
MM
15312 }
15313
61f71b34 15314 if (aggregate_value_p (TREE_TYPE (type), type))
483ab821
MM
15315 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15316 else
15317 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15318}
15319
3961e8fe
RH
15320/* Determine whether x86_output_mi_thunk can succeed. */
15321
15322static bool
b96a374d
AJ
15323x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15324 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15325 HOST_WIDE_INT vcall_offset, tree function)
3961e8fe
RH
15326{
15327 /* 64-bit can handle anything. */
15328 if (TARGET_64BIT)
15329 return true;
15330
15331 /* For 32-bit, everything's fine if we have one free register. */
e767b5be 15332 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
3961e8fe
RH
15333 return true;
15334
15335 /* Need a free register for vcall_offset. */
15336 if (vcall_offset)
15337 return false;
15338
15339 /* Need a free register for GOT references. */
15340 if (flag_pic && !(*targetm.binds_local_p) (function))
15341 return false;
15342
15343 /* Otherwise ok. */
15344 return true;
15345}
15346
15347/* Output the assembler code for a thunk function. THUNK_DECL is the
15348 declaration for the thunk function itself, FUNCTION is the decl for
15349 the target function. DELTA is an immediate constant offset to be
272d0bee 15350 added to THIS. If VCALL_OFFSET is nonzero, the word at
3961e8fe 15351 *(*this + vcall_offset) should be added to THIS. */
483ab821 15352
c590b625 15353static void
b96a374d
AJ
15354x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15355 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15356 HOST_WIDE_INT vcall_offset, tree function)
194734e9 15357{
194734e9 15358 rtx xops[3];
3961e8fe
RH
15359 rtx this = x86_this_parameter (function);
15360 rtx this_reg, tmp;
194734e9 15361
3961e8fe
RH
15362 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15363 pull it in now and let DELTA benefit. */
15364 if (REG_P (this))
15365 this_reg = this;
15366 else if (vcall_offset)
15367 {
15368 /* Put the this parameter into %eax. */
15369 xops[0] = this;
15370 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15371 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15372 }
15373 else
15374 this_reg = NULL_RTX;
15375
15376 /* Adjust the this parameter by a fixed constant. */
15377 if (delta)
194734e9 15378 {
483ab821 15379 xops[0] = GEN_INT (delta);
3961e8fe
RH
15380 xops[1] = this_reg ? this_reg : this;
15381 if (TARGET_64BIT)
194734e9 15382 {
3961e8fe
RH
15383 if (!x86_64_general_operand (xops[0], DImode))
15384 {
15385 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15386 xops[1] = tmp;
15387 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15388 xops[0] = tmp;
15389 xops[1] = this;
15390 }
15391 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
194734e9
JH
15392 }
15393 else
3961e8fe 15394 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
194734e9 15395 }
3961e8fe
RH
15396
15397 /* Adjust the this parameter by a value stored in the vtable. */
15398 if (vcall_offset)
194734e9 15399 {
3961e8fe
RH
15400 if (TARGET_64BIT)
15401 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15402 else
e767b5be
JH
15403 {
15404 int tmp_regno = 2 /* ECX */;
15405 if (lookup_attribute ("fastcall",
15406 TYPE_ATTRIBUTES (TREE_TYPE (function))))
15407 tmp_regno = 0 /* EAX */;
15408 tmp = gen_rtx_REG (SImode, tmp_regno);
15409 }
483ab821 15410
3961e8fe
RH
15411 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15412 xops[1] = tmp;
15413 if (TARGET_64BIT)
15414 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15415 else
15416 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
483ab821 15417
3961e8fe
RH
15418 /* Adjust the this parameter. */
15419 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15420 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15421 {
15422 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15423 xops[0] = GEN_INT (vcall_offset);
15424 xops[1] = tmp2;
15425 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15426 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
483ab821 15427 }
3961e8fe
RH
15428 xops[1] = this_reg;
15429 if (TARGET_64BIT)
15430 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15431 else
15432 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15433 }
194734e9 15434
3961e8fe
RH
15435 /* If necessary, drop THIS back to its stack slot. */
15436 if (this_reg && this_reg != this)
15437 {
15438 xops[0] = this_reg;
15439 xops[1] = this;
15440 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15441 }
194734e9 15442
89ce1c8f 15443 xops[0] = XEXP (DECL_RTL (function), 0);
3961e8fe
RH
15444 if (TARGET_64BIT)
15445 {
15446 if (!flag_pic || (*targetm.binds_local_p) (function))
15447 output_asm_insn ("jmp\t%P0", xops);
15448 else
fcbe3b89 15449 {
89ce1c8f 15450 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
fcbe3b89
RH
15451 tmp = gen_rtx_CONST (Pmode, tmp);
15452 tmp = gen_rtx_MEM (QImode, tmp);
15453 xops[0] = tmp;
15454 output_asm_insn ("jmp\t%A0", xops);
15455 }
3961e8fe
RH
15456 }
15457 else
15458 {
15459 if (!flag_pic || (*targetm.binds_local_p) (function))
15460 output_asm_insn ("jmp\t%P0", xops);
194734e9 15461 else
21ff35fb 15462#if TARGET_MACHO
095fa594
SH
15463 if (TARGET_MACHO)
15464 {
15465 char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15466 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15467 tmp = gen_rtx_MEM (QImode, tmp);
15468 xops[0] = tmp;
15469 output_asm_insn ("jmp\t%0", xops);
15470 }
15471 else
15472#endif /* TARGET_MACHO */
194734e9 15473 {
3961e8fe
RH
15474 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15475 output_set_got (tmp);
15476
15477 xops[1] = tmp;
15478 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15479 output_asm_insn ("jmp\t{*}%1", xops);
194734e9
JH
15480 }
15481 }
15482}
e2500fed 15483
1bc7c5b6 15484static void
b96a374d 15485x86_file_start (void)
1bc7c5b6
ZW
15486{
15487 default_file_start ();
15488 if (X86_FILE_START_VERSION_DIRECTIVE)
15489 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15490 if (X86_FILE_START_FLTUSED)
15491 fputs ("\t.global\t__fltused\n", asm_out_file);
15492 if (ix86_asm_dialect == ASM_INTEL)
15493 fputs ("\t.intel_syntax\n", asm_out_file);
15494}
15495
e932b21b 15496int
b96a374d 15497x86_field_alignment (tree field, int computed)
e932b21b
JH
15498{
15499 enum machine_mode mode;
ad9335eb
JJ
15500 tree type = TREE_TYPE (field);
15501
15502 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
e932b21b 15503 return computed;
ad9335eb
JJ
15504 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15505 ? get_inner_array_type (type) : type);
39e3a681
JJ
15506 if (mode == DFmode || mode == DCmode
15507 || GET_MODE_CLASS (mode) == MODE_INT
15508 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
e932b21b
JH
15509 return MIN (32, computed);
15510 return computed;
15511}
15512
a5fa1ecd
JH
15513/* Output assembler code to FILE to increment profiler label # LABELNO
15514 for profiling a function entry. */
15515void
b96a374d 15516x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
a5fa1ecd
JH
15517{
15518 if (TARGET_64BIT)
15519 if (flag_pic)
15520 {
15521#ifndef NO_PROFILE_COUNTERS
15522 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15523#endif
15524 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15525 }
15526 else
15527 {
15528#ifndef NO_PROFILE_COUNTERS
15529 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15530#endif
15531 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15532 }
15533 else if (flag_pic)
15534 {
15535#ifndef NO_PROFILE_COUNTERS
15536 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15537 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15538#endif
15539 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15540 }
15541 else
15542 {
15543#ifndef NO_PROFILE_COUNTERS
ff6e2d3e 15544 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
a5fa1ecd
JH
15545 PROFILE_COUNT_REGISTER);
15546#endif
15547 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15548 }
15549}
15550
d2c49530
JH
15551/* We don't have exact information about the insn sizes, but we may assume
15552 quite safely that we are informed about all 1 byte insns and memory
c51e6d85 15553 address sizes. This is enough to eliminate unnecessary padding in
d2c49530
JH
15554 99% of cases. */
15555
15556static int
b96a374d 15557min_insn_size (rtx insn)
d2c49530
JH
15558{
15559 int l = 0;
15560
15561 if (!INSN_P (insn) || !active_insn_p (insn))
15562 return 0;
15563
15564 /* Discard alignments we've emit and jump instructions. */
15565 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15566 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15567 return 0;
15568 if (GET_CODE (insn) == JUMP_INSN
15569 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15570 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15571 return 0;
15572
15573 /* Important case - calls are always 5 bytes.
15574 It is common to have many calls in the row. */
15575 if (GET_CODE (insn) == CALL_INSN
15576 && symbolic_reference_mentioned_p (PATTERN (insn))
15577 && !SIBLING_CALL_P (insn))
15578 return 5;
15579 if (get_attr_length (insn) <= 1)
15580 return 1;
15581
15582 /* For normal instructions we may rely on the sizes of addresses
15583 and the presence of symbol to require 4 bytes of encoding.
15584 This is not the case for jumps where references are PC relative. */
15585 if (GET_CODE (insn) != JUMP_INSN)
15586 {
15587 l = get_attr_length_address (insn);
15588 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15589 l = 4;
15590 }
15591 if (l)
15592 return 1+l;
15593 else
15594 return 2;
15595}
15596
c51e6d85 15597/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
d2c49530
JH
15598 window. */
15599
15600static void
b96a374d 15601k8_avoid_jump_misspredicts (void)
d2c49530
JH
15602{
15603 rtx insn, start = get_insns ();
15604 int nbytes = 0, njumps = 0;
15605 int isjump = 0;
15606
15607 /* Look for all minimal intervals of instructions containing 4 jumps.
15608 The intervals are bounded by START and INSN. NBYTES is the total
15609 size of instructions in the interval including INSN and not including
15610 START. When the NBYTES is smaller than 16 bytes, it is possible
15611 that the end of START and INSN ends up in the same 16byte page.
15612
15613 The smallest offset in the page INSN can start is the case where START
15614 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15615 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15616 */
15617 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15618 {
15619
15620 nbytes += min_insn_size (insn);
15621 if (rtl_dump_file)
da2d4c01 15622 fprintf(rtl_dump_file, "Insn %i estimated to %i bytes\n",
d2c49530
JH
15623 INSN_UID (insn), min_insn_size (insn));
15624 if ((GET_CODE (insn) == JUMP_INSN
15625 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15626 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15627 || GET_CODE (insn) == CALL_INSN)
15628 njumps++;
15629 else
15630 continue;
15631
15632 while (njumps > 3)
15633 {
15634 start = NEXT_INSN (start);
15635 if ((GET_CODE (start) == JUMP_INSN
15636 && GET_CODE (PATTERN (start)) != ADDR_VEC
15637 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15638 || GET_CODE (start) == CALL_INSN)
15639 njumps--, isjump = 1;
15640 else
15641 isjump = 0;
15642 nbytes -= min_insn_size (start);
15643 }
15644 if (njumps < 0)
15645 abort ();
15646 if (rtl_dump_file)
10e9fecc 15647 fprintf(rtl_dump_file, "Interval %i to %i has %i bytes\n",
d2c49530
JH
15648 INSN_UID (start), INSN_UID (insn), nbytes);
15649
15650 if (njumps == 3 && isjump && nbytes < 16)
15651 {
15652 int padsize = 15 - nbytes + min_insn_size (insn);
15653
15654 if (rtl_dump_file)
15655 fprintf (rtl_dump_file, "Padding insn %i by %i bytes!\n", INSN_UID (insn), padsize);
15656 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15657 }
15658 }
15659}
15660
b96a374d 15661/* Implement machine specific optimizations.
2a500b9e 15662 At the moment we implement single transformation: AMD Athlon works faster
d1f87653 15663 when RET is not destination of conditional jump or directly preceded
2a500b9e
JH
15664 by other jump instruction. We avoid the penalty by inserting NOP just
15665 before the RET instructions in such cases. */
18dbd950 15666static void
b96a374d 15667ix86_reorg (void)
2a500b9e
JH
15668{
15669 edge e;
15670
4977bab6 15671 if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
2a500b9e
JH
15672 return;
15673 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15674 {
15675 basic_block bb = e->src;
15676 rtx ret = bb->end;
15677 rtx prev;
253c7a00 15678 bool replace = false;
2a500b9e 15679
253c7a00
JH
15680 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15681 || !maybe_hot_bb_p (bb))
2a500b9e 15682 continue;
4977bab6
ZW
15683 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15684 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15685 break;
2a500b9e
JH
15686 if (prev && GET_CODE (prev) == CODE_LABEL)
15687 {
15688 edge e;
15689 for (e = bb->pred; e; e = e->pred_next)
4977bab6 15690 if (EDGE_FREQUENCY (e) && e->src->index >= 0
2a500b9e 15691 && !(e->flags & EDGE_FALLTHRU))
253c7a00 15692 replace = true;
2a500b9e 15693 }
253c7a00 15694 if (!replace)
2a500b9e 15695 {
4977bab6 15696 prev = prev_active_insn (ret);
25f57a0e
JH
15697 if (prev
15698 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15699 || GET_CODE (prev) == CALL_INSN))
253c7a00 15700 replace = true;
c51e6d85 15701 /* Empty functions get branch mispredict even when the jump destination
4977bab6
ZW
15702 is not visible to us. */
15703 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
253c7a00
JH
15704 replace = true;
15705 }
15706 if (replace)
15707 {
15708 emit_insn_before (gen_return_internal_long (), ret);
15709 delete_insn (ret);
2a500b9e 15710 }
2a500b9e 15711 }
10e9fecc 15712 k8_avoid_jump_misspredicts ();
2a500b9e
JH
15713}
15714
4977bab6
ZW
15715/* Return nonzero when QImode register that must be represented via REX prefix
15716 is used. */
15717bool
b96a374d 15718x86_extended_QIreg_mentioned_p (rtx insn)
4977bab6
ZW
15719{
15720 int i;
15721 extract_insn_cached (insn);
15722 for (i = 0; i < recog_data.n_operands; i++)
15723 if (REG_P (recog_data.operand[i])
15724 && REGNO (recog_data.operand[i]) >= 4)
15725 return true;
15726 return false;
15727}
15728
15729/* Return nonzero when P points to register encoded via REX prefix.
15730 Called via for_each_rtx. */
15731static int
b96a374d 15732extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
4977bab6
ZW
15733{
15734 unsigned int regno;
15735 if (!REG_P (*p))
15736 return 0;
15737 regno = REGNO (*p);
15738 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15739}
15740
15741/* Return true when INSN mentions register that must be encoded using REX
15742 prefix. */
15743bool
b96a374d 15744x86_extended_reg_mentioned_p (rtx insn)
4977bab6
ZW
15745{
15746 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15747}
15748
1d6ba901 15749/* Generate an unsigned DImode/SImode to FP conversion. This is the same code
8d705469
JH
15750 optabs would emit if we didn't have TFmode patterns. */
15751
15752void
b96a374d 15753x86_emit_floatuns (rtx operands[2])
8d705469
JH
15754{
15755 rtx neglab, donelab, i0, i1, f0, in, out;
1d6ba901
ZD
15756 enum machine_mode mode, inmode;
15757
15758 inmode = GET_MODE (operands[1]);
15759 if (inmode != SImode
15760 && inmode != DImode)
15761 abort ();
8d705469
JH
15762
15763 out = operands[0];
1d6ba901 15764 in = force_reg (inmode, operands[1]);
8d705469
JH
15765 mode = GET_MODE (out);
15766 neglab = gen_label_rtx ();
15767 donelab = gen_label_rtx ();
15768 i1 = gen_reg_rtx (Pmode);
15769 f0 = gen_reg_rtx (mode);
15770
15771 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15772
15773 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15774 emit_jump_insn (gen_jump (donelab));
15775 emit_barrier ();
15776
15777 emit_label (neglab);
15778
15779 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15780 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15781 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15782 expand_float (f0, i0, 0);
15783 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15784
15785 emit_label (donelab);
15786}
15787
dafc5b82
JH
15788/* Return if we do not know how to pass TYPE solely in registers. */
15789bool
b96a374d 15790ix86_must_pass_in_stack (enum machine_mode mode, tree type)
dafc5b82
JH
15791{
15792 if (default_must_pass_in_stack (mode, type))
15793 return true;
15794 return (!TARGET_64BIT && type && mode == TImode);
15795}
15796
e2500fed 15797#include "gt-i386.h"
This page took 4.252 seconds and 5 git commands to generate.