]> gcc.gnu.org Git - gcc.git/blame - gcc/config/i386/i386.c
i386.c (ix86_expand_int_movcc): Fix setcc sign bit case.
[gcc.git] / gcc / config / i386 / i386.c
CommitLineData
e075ae69 1/* Subroutines used for code generation on IA-32.
07933f72 2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
36210500 3 2002, 2003 Free Software Foundation, Inc.
2a2ab3f9
JVA
4
5This file is part of GNU CC.
6
7GNU CC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GNU CC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU CC; see the file COPYING. If not, write to
97aadbb9 19the Free Software Foundation, 59 Temple Place - Suite 330,
0f290768 20Boston, MA 02111-1307, USA. */
2a2ab3f9 21
2a2ab3f9 22#include "config.h"
bb5177ac 23#include "system.h"
4977bab6
ZW
24#include "coretypes.h"
25#include "tm.h"
2a2ab3f9 26#include "rtl.h"
6baf1cc8
BS
27#include "tree.h"
28#include "tm_p.h"
2a2ab3f9
JVA
29#include "regs.h"
30#include "hard-reg-set.h"
31#include "real.h"
32#include "insn-config.h"
33#include "conditions.h"
2a2ab3f9
JVA
34#include "output.h"
35#include "insn-attr.h"
2a2ab3f9 36#include "flags.h"
a8ffcc81 37#include "except.h"
ecbc4695 38#include "function.h"
00c79232 39#include "recog.h"
ced8dd8c 40#include "expr.h"
e78d8e51 41#include "optabs.h"
f103890b 42#include "toplev.h"
e075ae69 43#include "basic-block.h"
1526a060 44#include "ggc.h"
672a6f42
NB
45#include "target.h"
46#include "target-def.h"
f1e639b1 47#include "langhooks.h"
2a2ab3f9 48
8dfe5673 49#ifndef CHECK_STACK_LIMIT
07933f72 50#define CHECK_STACK_LIMIT (-1)
8dfe5673
RK
51#endif
52
3c50106f
RH
53/* Return index of given mode in mult and division cost tables. */
54#define MODE_INDEX(mode) \
55 ((mode) == QImode ? 0 \
56 : (mode) == HImode ? 1 \
57 : (mode) == SImode ? 2 \
58 : (mode) == DImode ? 3 \
59 : 4)
60
2ab0437e 61/* Processor costs (relative to an add) */
fce5a9f2 62static const
2ab0437e
JH
63struct processor_costs size_cost = { /* costs for tunning for size */
64 2, /* cost of an add instruction */
65 3, /* cost of a lea instruction */
66 2, /* variable shift costs */
67 3, /* constant shift costs */
4977bab6 68 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
2ab0437e 69 0, /* cost of multiply per each bit set */
4977bab6 70 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
44cf5b6a
JH
71 3, /* cost of movsx */
72 3, /* cost of movzx */
2ab0437e
JH
73 0, /* "large" insn */
74 2, /* MOVE_RATIO */
75 2, /* cost for loading QImode using movzbl */
76 {2, 2, 2}, /* cost of loading integer registers
77 in QImode, HImode and SImode.
78 Relative to reg-reg move (2). */
79 {2, 2, 2}, /* cost of storing integer registers */
80 2, /* cost of reg,reg fld/fst */
81 {2, 2, 2}, /* cost of loading fp registers
82 in SFmode, DFmode and XFmode */
83 {2, 2, 2}, /* cost of loading integer registers */
84 3, /* cost of moving MMX register */
85 {3, 3}, /* cost of loading MMX registers
86 in SImode and DImode */
87 {3, 3}, /* cost of storing MMX registers
88 in SImode and DImode */
89 3, /* cost of moving SSE register */
90 {3, 3, 3}, /* cost of loading SSE registers
91 in SImode, DImode and TImode */
92 {3, 3, 3}, /* cost of storing SSE registers
93 in SImode, DImode and TImode */
94 3, /* MMX or SSE register to integer */
f4365627
JH
95 0, /* size of prefetch block */
96 0, /* number of parallel prefetches */
4977bab6 97 1, /* Branch cost */
229b303a
RS
98 2, /* cost of FADD and FSUB insns. */
99 2, /* cost of FMUL instruction. */
100 2, /* cost of FDIV instruction. */
101 2, /* cost of FABS instruction. */
102 2, /* cost of FCHS instruction. */
103 2, /* cost of FSQRT instruction. */
2ab0437e 104};
229b303a 105
32b5b1aa 106/* Processor costs (relative to an add) */
fce5a9f2 107static const
32b5b1aa 108struct processor_costs i386_cost = { /* 386 specific costs */
e9a25f70 109 1, /* cost of an add instruction */
32b5b1aa
SC
110 1, /* cost of a lea instruction */
111 3, /* variable shift costs */
112 2, /* constant shift costs */
4977bab6 113 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
32b5b1aa 114 1, /* cost of multiply per each bit set */
4977bab6 115 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
44cf5b6a
JH
116 3, /* cost of movsx */
117 2, /* cost of movzx */
96e7ae40 118 15, /* "large" insn */
e2e52e1b 119 3, /* MOVE_RATIO */
7c6b971d 120 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
121 {2, 4, 2}, /* cost of loading integer registers
122 in QImode, HImode and SImode.
0f290768 123 Relative to reg-reg move (2). */
96e7ae40
JH
124 {2, 4, 2}, /* cost of storing integer registers */
125 2, /* cost of reg,reg fld/fst */
126 {8, 8, 8}, /* cost of loading fp registers
127 in SFmode, DFmode and XFmode */
fa79946e
JH
128 {8, 8, 8}, /* cost of loading integer registers */
129 2, /* cost of moving MMX register */
130 {4, 8}, /* cost of loading MMX registers
131 in SImode and DImode */
132 {4, 8}, /* cost of storing MMX registers
133 in SImode and DImode */
134 2, /* cost of moving SSE register */
135 {4, 8, 16}, /* cost of loading SSE registers
136 in SImode, DImode and TImode */
137 {4, 8, 16}, /* cost of storing SSE registers
138 in SImode, DImode and TImode */
139 3, /* MMX or SSE register to integer */
f4365627
JH
140 0, /* size of prefetch block */
141 0, /* number of parallel prefetches */
4977bab6 142 1, /* Branch cost */
229b303a
RS
143 23, /* cost of FADD and FSUB insns. */
144 27, /* cost of FMUL instruction. */
145 88, /* cost of FDIV instruction. */
146 22, /* cost of FABS instruction. */
147 24, /* cost of FCHS instruction. */
148 122, /* cost of FSQRT instruction. */
32b5b1aa
SC
149};
150
fce5a9f2 151static const
32b5b1aa
SC
152struct processor_costs i486_cost = { /* 486 specific costs */
153 1, /* cost of an add instruction */
154 1, /* cost of a lea instruction */
155 3, /* variable shift costs */
156 2, /* constant shift costs */
4977bab6 157 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
32b5b1aa 158 1, /* cost of multiply per each bit set */
4977bab6 159 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
44cf5b6a
JH
160 3, /* cost of movsx */
161 2, /* cost of movzx */
96e7ae40 162 15, /* "large" insn */
e2e52e1b 163 3, /* MOVE_RATIO */
7c6b971d 164 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
165 {2, 4, 2}, /* cost of loading integer registers
166 in QImode, HImode and SImode.
0f290768 167 Relative to reg-reg move (2). */
96e7ae40
JH
168 {2, 4, 2}, /* cost of storing integer registers */
169 2, /* cost of reg,reg fld/fst */
170 {8, 8, 8}, /* cost of loading fp registers
171 in SFmode, DFmode and XFmode */
fa79946e
JH
172 {8, 8, 8}, /* cost of loading integer registers */
173 2, /* cost of moving MMX register */
174 {4, 8}, /* cost of loading MMX registers
175 in SImode and DImode */
176 {4, 8}, /* cost of storing MMX registers
177 in SImode and DImode */
178 2, /* cost of moving SSE register */
179 {4, 8, 16}, /* cost of loading SSE registers
180 in SImode, DImode and TImode */
181 {4, 8, 16}, /* cost of storing SSE registers
182 in SImode, DImode and TImode */
f4365627
JH
183 3, /* MMX or SSE register to integer */
184 0, /* size of prefetch block */
185 0, /* number of parallel prefetches */
4977bab6 186 1, /* Branch cost */
229b303a
RS
187 8, /* cost of FADD and FSUB insns. */
188 16, /* cost of FMUL instruction. */
189 73, /* cost of FDIV instruction. */
190 3, /* cost of FABS instruction. */
191 3, /* cost of FCHS instruction. */
192 83, /* cost of FSQRT instruction. */
32b5b1aa
SC
193};
194
fce5a9f2 195static const
e5cb57e8 196struct processor_costs pentium_cost = {
32b5b1aa
SC
197 1, /* cost of an add instruction */
198 1, /* cost of a lea instruction */
856b07a1 199 4, /* variable shift costs */
e5cb57e8 200 1, /* constant shift costs */
4977bab6 201 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
856b07a1 202 0, /* cost of multiply per each bit set */
4977bab6 203 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
44cf5b6a
JH
204 3, /* cost of movsx */
205 2, /* cost of movzx */
96e7ae40 206 8, /* "large" insn */
e2e52e1b 207 6, /* MOVE_RATIO */
7c6b971d 208 6, /* cost for loading QImode using movzbl */
96e7ae40
JH
209 {2, 4, 2}, /* cost of loading integer registers
210 in QImode, HImode and SImode.
0f290768 211 Relative to reg-reg move (2). */
96e7ae40
JH
212 {2, 4, 2}, /* cost of storing integer registers */
213 2, /* cost of reg,reg fld/fst */
214 {2, 2, 6}, /* cost of loading fp registers
215 in SFmode, DFmode and XFmode */
fa79946e
JH
216 {4, 4, 6}, /* cost of loading integer registers */
217 8, /* cost of moving MMX register */
218 {8, 8}, /* cost of loading MMX registers
219 in SImode and DImode */
220 {8, 8}, /* cost of storing MMX registers
221 in SImode and DImode */
222 2, /* cost of moving SSE register */
223 {4, 8, 16}, /* cost of loading SSE registers
224 in SImode, DImode and TImode */
225 {4, 8, 16}, /* cost of storing SSE registers
226 in SImode, DImode and TImode */
f4365627
JH
227 3, /* MMX or SSE register to integer */
228 0, /* size of prefetch block */
229 0, /* number of parallel prefetches */
4977bab6 230 2, /* Branch cost */
229b303a
RS
231 3, /* cost of FADD and FSUB insns. */
232 3, /* cost of FMUL instruction. */
233 39, /* cost of FDIV instruction. */
234 1, /* cost of FABS instruction. */
235 1, /* cost of FCHS instruction. */
236 70, /* cost of FSQRT instruction. */
32b5b1aa
SC
237};
238
fce5a9f2 239static const
856b07a1
SC
240struct processor_costs pentiumpro_cost = {
241 1, /* cost of an add instruction */
242 1, /* cost of a lea instruction */
e075ae69 243 1, /* variable shift costs */
856b07a1 244 1, /* constant shift costs */
4977bab6 245 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
856b07a1 246 0, /* cost of multiply per each bit set */
4977bab6 247 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
44cf5b6a
JH
248 1, /* cost of movsx */
249 1, /* cost of movzx */
96e7ae40 250 8, /* "large" insn */
e2e52e1b 251 6, /* MOVE_RATIO */
7c6b971d 252 2, /* cost for loading QImode using movzbl */
96e7ae40
JH
253 {4, 4, 4}, /* cost of loading integer registers
254 in QImode, HImode and SImode.
0f290768 255 Relative to reg-reg move (2). */
96e7ae40
JH
256 {2, 2, 2}, /* cost of storing integer registers */
257 2, /* cost of reg,reg fld/fst */
258 {2, 2, 6}, /* cost of loading fp registers
259 in SFmode, DFmode and XFmode */
fa79946e
JH
260 {4, 4, 6}, /* cost of loading integer registers */
261 2, /* cost of moving MMX register */
262 {2, 2}, /* cost of loading MMX registers
263 in SImode and DImode */
264 {2, 2}, /* cost of storing MMX registers
265 in SImode and DImode */
266 2, /* cost of moving SSE register */
267 {2, 2, 8}, /* cost of loading SSE registers
268 in SImode, DImode and TImode */
269 {2, 2, 8}, /* cost of storing SSE registers
270 in SImode, DImode and TImode */
f4365627
JH
271 3, /* MMX or SSE register to integer */
272 32, /* size of prefetch block */
273 6, /* number of parallel prefetches */
4977bab6 274 2, /* Branch cost */
229b303a
RS
275 3, /* cost of FADD and FSUB insns. */
276 5, /* cost of FMUL instruction. */
277 56, /* cost of FDIV instruction. */
278 2, /* cost of FABS instruction. */
279 2, /* cost of FCHS instruction. */
280 56, /* cost of FSQRT instruction. */
856b07a1
SC
281};
282
fce5a9f2 283static const
a269a03c
JC
284struct processor_costs k6_cost = {
285 1, /* cost of an add instruction */
e075ae69 286 2, /* cost of a lea instruction */
a269a03c
JC
287 1, /* variable shift costs */
288 1, /* constant shift costs */
4977bab6 289 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
a269a03c 290 0, /* cost of multiply per each bit set */
4977bab6 291 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
44cf5b6a
JH
292 2, /* cost of movsx */
293 2, /* cost of movzx */
96e7ae40 294 8, /* "large" insn */
e2e52e1b 295 4, /* MOVE_RATIO */
7c6b971d 296 3, /* cost for loading QImode using movzbl */
96e7ae40
JH
297 {4, 5, 4}, /* cost of loading integer registers
298 in QImode, HImode and SImode.
0f290768 299 Relative to reg-reg move (2). */
96e7ae40
JH
300 {2, 3, 2}, /* cost of storing integer registers */
301 4, /* cost of reg,reg fld/fst */
302 {6, 6, 6}, /* cost of loading fp registers
303 in SFmode, DFmode and XFmode */
fa79946e
JH
304 {4, 4, 4}, /* cost of loading integer registers */
305 2, /* cost of moving MMX register */
306 {2, 2}, /* cost of loading MMX registers
307 in SImode and DImode */
308 {2, 2}, /* cost of storing MMX registers
309 in SImode and DImode */
310 2, /* cost of moving SSE register */
311 {2, 2, 8}, /* cost of loading SSE registers
312 in SImode, DImode and TImode */
313 {2, 2, 8}, /* cost of storing SSE registers
314 in SImode, DImode and TImode */
f4365627
JH
315 6, /* MMX or SSE register to integer */
316 32, /* size of prefetch block */
317 1, /* number of parallel prefetches */
4977bab6 318 1, /* Branch cost */
229b303a
RS
319 2, /* cost of FADD and FSUB insns. */
320 2, /* cost of FMUL instruction. */
4f770e7b
RS
321 56, /* cost of FDIV instruction. */
322 2, /* cost of FABS instruction. */
229b303a
RS
323 2, /* cost of FCHS instruction. */
324 56, /* cost of FSQRT instruction. */
a269a03c
JC
325};
326
fce5a9f2 327static const
309ada50
JH
328struct processor_costs athlon_cost = {
329 1, /* cost of an add instruction */
0b5107cf 330 2, /* cost of a lea instruction */
309ada50
JH
331 1, /* variable shift costs */
332 1, /* constant shift costs */
4977bab6 333 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
309ada50 334 0, /* cost of multiply per each bit set */
4977bab6 335 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
44cf5b6a
JH
336 1, /* cost of movsx */
337 1, /* cost of movzx */
309ada50 338 8, /* "large" insn */
e2e52e1b 339 9, /* MOVE_RATIO */
309ada50 340 4, /* cost for loading QImode using movzbl */
b72b1c29 341 {3, 4, 3}, /* cost of loading integer registers
309ada50 342 in QImode, HImode and SImode.
0f290768 343 Relative to reg-reg move (2). */
b72b1c29 344 {3, 4, 3}, /* cost of storing integer registers */
309ada50 345 4, /* cost of reg,reg fld/fst */
b72b1c29 346 {4, 4, 12}, /* cost of loading fp registers
309ada50 347 in SFmode, DFmode and XFmode */
b72b1c29 348 {6, 6, 8}, /* cost of loading integer registers */
fa79946e 349 2, /* cost of moving MMX register */
b72b1c29 350 {4, 4}, /* cost of loading MMX registers
fa79946e 351 in SImode and DImode */
b72b1c29 352 {4, 4}, /* cost of storing MMX registers
fa79946e
JH
353 in SImode and DImode */
354 2, /* cost of moving SSE register */
b72b1c29 355 {4, 4, 6}, /* cost of loading SSE registers
fa79946e 356 in SImode, DImode and TImode */
b72b1c29 357 {4, 4, 5}, /* cost of storing SSE registers
fa79946e 358 in SImode, DImode and TImode */
b72b1c29 359 5, /* MMX or SSE register to integer */
f4365627
JH
360 64, /* size of prefetch block */
361 6, /* number of parallel prefetches */
4977bab6 362 2, /* Branch cost */
229b303a
RS
363 4, /* cost of FADD and FSUB insns. */
364 4, /* cost of FMUL instruction. */
365 24, /* cost of FDIV instruction. */
366 2, /* cost of FABS instruction. */
367 2, /* cost of FCHS instruction. */
368 35, /* cost of FSQRT instruction. */
309ada50
JH
369};
370
4977bab6
ZW
371static const
372struct processor_costs k8_cost = {
373 1, /* cost of an add instruction */
374 2, /* cost of a lea instruction */
375 1, /* variable shift costs */
376 1, /* constant shift costs */
377 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
378 0, /* cost of multiply per each bit set */
379 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
380 1, /* cost of movsx */
381 1, /* cost of movzx */
382 8, /* "large" insn */
383 9, /* MOVE_RATIO */
384 4, /* cost for loading QImode using movzbl */
385 {3, 4, 3}, /* cost of loading integer registers
386 in QImode, HImode and SImode.
387 Relative to reg-reg move (2). */
388 {3, 4, 3}, /* cost of storing integer registers */
389 4, /* cost of reg,reg fld/fst */
390 {4, 4, 12}, /* cost of loading fp registers
391 in SFmode, DFmode and XFmode */
392 {6, 6, 8}, /* cost of loading integer registers */
393 2, /* cost of moving MMX register */
394 {3, 3}, /* cost of loading MMX registers
395 in SImode and DImode */
396 {4, 4}, /* cost of storing MMX registers
397 in SImode and DImode */
398 2, /* cost of moving SSE register */
399 {4, 3, 6}, /* cost of loading SSE registers
400 in SImode, DImode and TImode */
401 {4, 4, 5}, /* cost of storing SSE registers
402 in SImode, DImode and TImode */
403 5, /* MMX or SSE register to integer */
404 64, /* size of prefetch block */
405 6, /* number of parallel prefetches */
406 2, /* Branch cost */
407 4, /* cost of FADD and FSUB insns. */
408 4, /* cost of FMUL instruction. */
409 19, /* cost of FDIV instruction. */
410 2, /* cost of FABS instruction. */
411 2, /* cost of FCHS instruction. */
412 35, /* cost of FSQRT instruction. */
413};
414
fce5a9f2 415static const
b4e89e2d
JH
416struct processor_costs pentium4_cost = {
417 1, /* cost of an add instruction */
418 1, /* cost of a lea instruction */
4977bab6
ZW
419 4, /* variable shift costs */
420 4, /* constant shift costs */
421 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
b4e89e2d 422 0, /* cost of multiply per each bit set */
4977bab6 423 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
44cf5b6a
JH
424 1, /* cost of movsx */
425 1, /* cost of movzx */
b4e89e2d
JH
426 16, /* "large" insn */
427 6, /* MOVE_RATIO */
428 2, /* cost for loading QImode using movzbl */
429 {4, 5, 4}, /* cost of loading integer registers
430 in QImode, HImode and SImode.
431 Relative to reg-reg move (2). */
432 {2, 3, 2}, /* cost of storing integer registers */
433 2, /* cost of reg,reg fld/fst */
434 {2, 2, 6}, /* cost of loading fp registers
435 in SFmode, DFmode and XFmode */
436 {4, 4, 6}, /* cost of loading integer registers */
437 2, /* cost of moving MMX register */
438 {2, 2}, /* cost of loading MMX registers
439 in SImode and DImode */
440 {2, 2}, /* cost of storing MMX registers
441 in SImode and DImode */
442 12, /* cost of moving SSE register */
443 {12, 12, 12}, /* cost of loading SSE registers
444 in SImode, DImode and TImode */
445 {2, 2, 8}, /* cost of storing SSE registers
446 in SImode, DImode and TImode */
447 10, /* MMX or SSE register to integer */
f4365627
JH
448 64, /* size of prefetch block */
449 6, /* number of parallel prefetches */
4977bab6 450 2, /* Branch cost */
229b303a
RS
451 5, /* cost of FADD and FSUB insns. */
452 7, /* cost of FMUL instruction. */
453 43, /* cost of FDIV instruction. */
454 2, /* cost of FABS instruction. */
455 2, /* cost of FCHS instruction. */
456 43, /* cost of FSQRT instruction. */
b4e89e2d
JH
457};
458
8b60264b 459const struct processor_costs *ix86_cost = &pentium_cost;
32b5b1aa 460
a269a03c
JC
461/* Processor feature/optimization bitmasks. */
462#define m_386 (1<<PROCESSOR_I386)
463#define m_486 (1<<PROCESSOR_I486)
464#define m_PENT (1<<PROCESSOR_PENTIUM)
465#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
466#define m_K6 (1<<PROCESSOR_K6)
309ada50 467#define m_ATHLON (1<<PROCESSOR_ATHLON)
b4e89e2d 468#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
4977bab6
ZW
469#define m_K8 (1<<PROCESSOR_K8)
470#define m_ATHLON_K8 (m_K8 | m_ATHLON)
a269a03c 471
4977bab6
ZW
472const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
473const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
a269a03c 474const int x86_zero_extend_with_and = m_486 | m_PENT;
4977bab6 475const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
e075ae69 476const int x86_double_with_add = ~m_386;
a269a03c 477const int x86_use_bit_test = m_386;
4977bab6
ZW
478const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
479const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
480const int x86_3dnow_a = m_ATHLON_K8;
481const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
ef6257cd 482const int x86_branch_hints = m_PENT4;
b4e89e2d 483const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
e075ae69
RH
484const int x86_partial_reg_stall = m_PPRO;
485const int x86_use_loop = m_K6;
4977bab6 486const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
e075ae69
RH
487const int x86_use_mov0 = m_K6;
488const int x86_use_cltd = ~(m_PENT | m_K6);
489const int x86_read_modify_write = ~m_PENT;
490const int x86_read_modify = ~(m_PENT | m_PPRO);
491const int x86_split_long_moves = m_PPRO;
4977bab6 492const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
285464d0 493const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
b4e89e2d 494const int x86_single_stringop = m_386 | m_PENT4;
d9f32422
JH
495const int x86_qimode_math = ~(0);
496const int x86_promote_qi_regs = 0;
497const int x86_himode_math = ~(m_PPRO);
498const int x86_promote_hi_regs = m_PPRO;
4977bab6
ZW
499const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
500const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
501const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
502const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
503const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
504const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
505const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
506const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
507const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
508const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
b972dd02 509const int x86_decompose_lea = m_PENT4;
495333a6 510const int x86_shift1 = ~m_486;
4977bab6
ZW
511const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
512const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
513/* Set for machines where the type and dependencies are resolved on SSE register
d1f87653 514 parts instead of whole registers, so we may maintain just lower part of
4977bab6
ZW
515 scalar values in proper format leaving the upper part undefined. */
516const int x86_sse_partial_regs = m_ATHLON_K8;
517/* Athlon optimizes partial-register FPS special case, thus avoiding the
518 need for extra instructions beforehand */
519const int x86_sse_partial_regs_for_cvtsd2ss = 0;
520const int x86_sse_typeless_stores = m_ATHLON_K8;
521const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
522const int x86_use_ffreep = m_ATHLON_K8;
523const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
a269a03c 524
d1f87653 525/* In case the average insn count for single function invocation is
6ab16dd9
JH
526 lower than this constant, emit fast (but longer) prologue and
527 epilogue code. */
4977bab6 528#define FAST_PROLOGUE_INSN_COUNT 20
5bf0ebab 529
6ab16dd9
JH
530/* Set by prologue expander and used by epilogue expander to determine
531 the style used. */
532static int use_fast_prologue_epilogue;
533
5bf0ebab
RH
534/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
535static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
536static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
537static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
4c0d89b5
RS
538
539/* Array of the smallest class containing reg number REGNO, indexed by
0f290768 540 REGNO. Used by REGNO_REG_CLASS in i386.h. */
4c0d89b5 541
e075ae69 542enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
4c0d89b5
RS
543{
544 /* ax, dx, cx, bx */
ab408a86 545 AREG, DREG, CREG, BREG,
4c0d89b5 546 /* si, di, bp, sp */
e075ae69 547 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
4c0d89b5
RS
548 /* FP registers */
549 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
79325812 550 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
4c0d89b5 551 /* arg pointer */
83774849 552 NON_Q_REGS,
564d80f4 553 /* flags, fpsr, dirflag, frame */
a7180f70
BS
554 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
555 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
556 SSE_REGS, SSE_REGS,
557 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
3d117b30
JH
558 MMX_REGS, MMX_REGS,
559 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
560 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
561 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
562 SSE_REGS, SSE_REGS,
4c0d89b5 563};
c572e5ba 564
3d117b30 565/* The "default" register map used in 32bit mode. */
83774849 566
0f290768 567int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
568{
569 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
570 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
3d117b30 571 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
572 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
573 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
3d117b30
JH
574 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
575 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
576};
577
5bf0ebab
RH
578static int const x86_64_int_parameter_registers[6] =
579{
580 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
581 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
582};
583
584static int const x86_64_int_return_registers[4] =
585{
586 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
587};
53c17031 588
0f7fa3d0
JH
589/* The "default" register map used in 64bit mode. */
590int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
591{
592 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
3de72741 593 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
0f7fa3d0
JH
594 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
595 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
596 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
597 8,9,10,11,12,13,14,15, /* extended integer registers */
598 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
599};
600
83774849
RH
601/* Define the register numbers to be used in Dwarf debugging information.
602 The SVR4 reference port C compiler uses the following register numbers
603 in its Dwarf output code:
604 0 for %eax (gcc regno = 0)
605 1 for %ecx (gcc regno = 2)
606 2 for %edx (gcc regno = 1)
607 3 for %ebx (gcc regno = 3)
608 4 for %esp (gcc regno = 7)
609 5 for %ebp (gcc regno = 6)
610 6 for %esi (gcc regno = 4)
611 7 for %edi (gcc regno = 5)
612 The following three DWARF register numbers are never generated by
613 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
614 believes these numbers have these meanings.
615 8 for %eip (no gcc equivalent)
616 9 for %eflags (gcc regno = 17)
617 10 for %trapno (no gcc equivalent)
618 It is not at all clear how we should number the FP stack registers
619 for the x86 architecture. If the version of SDB on x86/svr4 were
620 a bit less brain dead with respect to floating-point then we would
621 have a precedent to follow with respect to DWARF register numbers
622 for x86 FP registers, but the SDB on x86/svr4 is so completely
623 broken with respect to FP registers that it is hardly worth thinking
624 of it as something to strive for compatibility with.
625 The version of x86/svr4 SDB I have at the moment does (partially)
626 seem to believe that DWARF register number 11 is associated with
627 the x86 register %st(0), but that's about all. Higher DWARF
628 register numbers don't seem to be associated with anything in
629 particular, and even for DWARF regno 11, SDB only seems to under-
630 stand that it should say that a variable lives in %st(0) (when
631 asked via an `=' command) if we said it was in DWARF regno 11,
632 but SDB still prints garbage when asked for the value of the
633 variable in question (via a `/' command).
634 (Also note that the labels SDB prints for various FP stack regs
635 when doing an `x' command are all wrong.)
636 Note that these problems generally don't affect the native SVR4
637 C compiler because it doesn't allow the use of -O with -g and
638 because when it is *not* optimizing, it allocates a memory
639 location for each floating-point variable, and the memory
640 location is what gets described in the DWARF AT_location
641 attribute for the variable in question.
642 Regardless of the severe mental illness of the x86/svr4 SDB, we
643 do something sensible here and we use the following DWARF
644 register numbers. Note that these are all stack-top-relative
645 numbers.
646 11 for %st(0) (gcc regno = 8)
647 12 for %st(1) (gcc regno = 9)
648 13 for %st(2) (gcc regno = 10)
649 14 for %st(3) (gcc regno = 11)
650 15 for %st(4) (gcc regno = 12)
651 16 for %st(5) (gcc regno = 13)
652 17 for %st(6) (gcc regno = 14)
653 18 for %st(7) (gcc regno = 15)
654*/
0f290768 655int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
656{
657 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
658 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
3f3f2124 659 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
660 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
661 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
d1f87653
KH
662 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
663 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
664};
665
c572e5ba
JVA
666/* Test and compare insns in i386.md store the information needed to
667 generate branch and scc insns here. */
668
07933f72
GS
669rtx ix86_compare_op0 = NULL_RTX;
670rtx ix86_compare_op1 = NULL_RTX;
f5316dfe 671
f996902d
RH
672/* The encoding characters for the four TLS models present in ELF. */
673
755ac5d4 674static char const tls_model_chars[] = " GLil";
f996902d 675
7a2e09f4 676#define MAX_386_STACK_LOCALS 3
8362f420
JH
677/* Size of the register save area. */
678#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
36edd3cc
BS
679
680/* Define the structure for the machine field in struct function. */
e2500fed 681struct machine_function GTY(())
36edd3cc
BS
682{
683 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
f996902d 684 const char *some_ld_name;
8362f420 685 int save_varrargs_registers;
6fca22eb 686 int accesses_prev_frame;
36edd3cc
BS
687};
688
01d939e8 689#define ix86_stack_locals (cfun->machine->stack_locals)
8362f420 690#define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
36edd3cc 691
4dd2ac2c
JH
692/* Structure describing stack frame layout.
693 Stack grows downward:
694
695 [arguments]
696 <- ARG_POINTER
697 saved pc
698
699 saved frame pointer if frame_pointer_needed
700 <- HARD_FRAME_POINTER
701 [saved regs]
702
703 [padding1] \
704 )
705 [va_arg registers] (
706 > to_allocate <- FRAME_POINTER
707 [frame] (
708 )
709 [padding2] /
710 */
711struct ix86_frame
712{
713 int nregs;
714 int padding1;
8362f420 715 int va_arg_size;
4dd2ac2c
JH
716 HOST_WIDE_INT frame;
717 int padding2;
718 int outgoing_arguments_size;
8362f420 719 int red_zone_size;
4dd2ac2c
JH
720
721 HOST_WIDE_INT to_allocate;
722 /* The offsets relative to ARG_POINTER. */
723 HOST_WIDE_INT frame_pointer_offset;
724 HOST_WIDE_INT hard_frame_pointer_offset;
725 HOST_WIDE_INT stack_pointer_offset;
726};
727
c93e80a5
JH
728/* Used to enable/disable debugging features. */
729const char *ix86_debug_arg_string, *ix86_debug_addr_string;
6189a572
JH
730/* Code model option as passed by user. */
731const char *ix86_cmodel_string;
732/* Parsed value. */
733enum cmodel ix86_cmodel;
80f33d06
GS
734/* Asm dialect. */
735const char *ix86_asm_string;
736enum asm_dialect ix86_asm_dialect = ASM_ATT;
f996902d
RH
737/* TLS dialext. */
738const char *ix86_tls_dialect_string;
739enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
6189a572 740
5bf0ebab 741/* Which unit we are generating floating point math for. */
965f5423
JH
742enum fpmath_unit ix86_fpmath;
743
5bf0ebab
RH
744/* Which cpu are we scheduling for. */
745enum processor_type ix86_cpu;
746/* Which instruction set architecture to use. */
747enum processor_type ix86_arch;
c8c5cb99
SC
748
749/* Strings to hold which cpu and instruction set architecture to use. */
9c23aa47
ZW
750const char *ix86_cpu_string; /* for -mcpu=<xxx> */
751const char *ix86_arch_string; /* for -march=<xxx> */
965f5423 752const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
c8c5cb99 753
0f290768 754/* # of registers to use to pass arguments. */
e075ae69 755const char *ix86_regparm_string;
e9a25f70 756
f4365627
JH
757/* true if sse prefetch instruction is not NOOP. */
758int x86_prefetch_sse;
759
e075ae69
RH
760/* ix86_regparm_string as a number */
761int ix86_regparm;
e9a25f70
JL
762
763/* Alignment to use for loops and jumps: */
764
0f290768 765/* Power of two alignment for loops. */
e075ae69 766const char *ix86_align_loops_string;
e9a25f70 767
0f290768 768/* Power of two alignment for non-loop jumps. */
e075ae69 769const char *ix86_align_jumps_string;
e9a25f70 770
3af4bd89 771/* Power of two alignment for stack boundary in bytes. */
e075ae69 772const char *ix86_preferred_stack_boundary_string;
3af4bd89
JH
773
774/* Preferred alignment for stack boundary in bits. */
e075ae69 775int ix86_preferred_stack_boundary;
3af4bd89 776
e9a25f70 777/* Values 1-5: see jump.c */
e075ae69
RH
778int ix86_branch_cost;
779const char *ix86_branch_cost_string;
e9a25f70 780
0f290768 781/* Power of two alignment for functions. */
e075ae69 782const char *ix86_align_funcs_string;
623fe810
RH
783
784/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
785static char internal_label_prefix[16];
786static int internal_label_prefix_len;
e075ae69 787\f
623fe810 788static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
f996902d 789static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
f6da8bc3
KG
790static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
791static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
e075ae69 792 int, int, FILE *));
f996902d
RH
793static const char *get_some_local_dynamic_name PARAMS ((void));
794static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
795static rtx maybe_get_pool_constant PARAMS ((rtx));
f6da8bc3 796static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
3a3677ff
RH
797static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
798 rtx *, rtx *));
f996902d 799static rtx get_thread_pointer PARAMS ((void));
145aacc2 800static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
f6da8bc3
KG
801static rtx gen_push PARAMS ((rtx));
802static int memory_address_length PARAMS ((rtx addr));
803static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
804static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
f6da8bc3
KG
805static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
806static void ix86_dump_ppro_packet PARAMS ((FILE *));
807static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
e2500fed 808static struct machine_function * ix86_init_machine_status PARAMS ((void));
2b589241 809static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
b531087a
KH
810static int ix86_nsaved_regs PARAMS ((void));
811static void ix86_emit_save_regs PARAMS ((void));
c6036a37 812static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
37a58036 813static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
bd09bdeb 814static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
0e4970d7 815static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
b531087a 816static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
55efb413 817static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
0945b39d 818static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
0945b39d
JH
819static rtx ix86_expand_aligntest PARAMS ((rtx, int));
820static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
c237e94a
ZW
821static int ix86_issue_rate PARAMS ((void));
822static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
823static void ix86_sched_init PARAMS ((FILE *, int, int));
824static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
825static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
9b690711
RH
826static int ia32_use_dfa_pipeline_interface PARAMS ((void));
827static int ia32_multipass_dfa_lookahead PARAMS ((void));
e37af218 828static void ix86_init_mmx_sse_builtins PARAMS ((void));
3961e8fe
RH
829static rtx x86_this_parameter PARAMS ((tree));
830static void x86_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
831 HOST_WIDE_INT, tree));
832static bool x86_can_output_mi_thunk PARAMS ((tree, HOST_WIDE_INT,
833 HOST_WIDE_INT, tree));
4977bab6 834bool ix86_expand_carry_flag_compare PARAMS ((enum rtx_code, rtx, rtx, rtx*));
e075ae69
RH
835
836struct ix86_address
837{
838 rtx base, index, disp;
839 HOST_WIDE_INT scale;
840};
b08de47e 841
e075ae69 842static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
dcefdf67 843static int ix86_address_cost PARAMS ((rtx));
3a04ff64 844static bool ix86_cannot_force_const_mem PARAMS ((rtx));
bd793c65 845
f996902d
RH
846static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
847static const char *ix86_strip_name_encoding PARAMS ((const char *))
848 ATTRIBUTE_UNUSED;
fb49053f 849
bd793c65 850struct builtin_description;
8b60264b
KG
851static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
852 tree, rtx));
853static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
854 tree, rtx));
bd793c65
BS
855static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
856static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
857static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
e37af218 858static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
bd793c65 859static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
c0c102a9
JH
860static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
861static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
862 enum rtx_code *,
863 enum rtx_code *,
864 enum rtx_code *));
9e7adcb3
JH
865static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
866 rtx *, rtx *));
867static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
868static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
869static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
870static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
bd09bdeb 871static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
9b690711 872static int ix86_save_reg PARAMS ((unsigned int, int));
4dd2ac2c 873static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
8d8e52be 874static int ix86_comp_type_attributes PARAMS ((tree, tree));
483ab821 875static int ix86_fntype_regparm PARAMS ((tree));
91d231cb 876const struct attribute_spec ix86_attribute_table[];
4977bab6 877static bool ix86_function_ok_for_sibcall PARAMS ((tree, tree));
91d231cb
JM
878static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
879static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
b069de3b 880static int ix86_value_regno PARAMS ((enum machine_mode));
4977bab6 881static bool ix86_ms_bitfield_layout_p PARAMS ((tree));
fe77449a 882static tree ix86_handle_struct_attribute PARAMS ((tree *, tree, tree, int, bool *));
4977bab6 883static int extended_reg_mentioned_1 PARAMS ((rtx *, void *));
3c50106f 884static bool ix86_rtx_costs PARAMS ((rtx, int, int, int *));
7c262518 885
21c318ba 886#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
2cc07db4
RH
887static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
888#endif
e56feed6 889
53c17031
JH
890/* Register class used for passing given 64bit part of the argument.
891 These represent classes as documented by the PS ABI, with the exception
892 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
d1f87653 893 use SF or DFmode move instead of DImode to avoid reformatting penalties.
53c17031 894
d1f87653 895 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
53c17031
JH
896 whenever possible (upper half does contain padding).
897 */
898enum x86_64_reg_class
899 {
900 X86_64_NO_CLASS,
901 X86_64_INTEGER_CLASS,
902 X86_64_INTEGERSI_CLASS,
903 X86_64_SSE_CLASS,
904 X86_64_SSESF_CLASS,
905 X86_64_SSEDF_CLASS,
906 X86_64_SSEUP_CLASS,
907 X86_64_X87_CLASS,
908 X86_64_X87UP_CLASS,
909 X86_64_MEMORY_CLASS
910 };
0b5826ac 911static const char * const x86_64_reg_class_name[] =
53c17031
JH
912 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
913
914#define MAX_CLASSES 4
915static int classify_argument PARAMS ((enum machine_mode, tree,
916 enum x86_64_reg_class [MAX_CLASSES],
917 int));
918static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
919 int *));
920static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
07933f72 921 const int *, int));
53c17031
JH
922static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
923 enum x86_64_reg_class));
672a6f42
NB
924\f
925/* Initialize the GCC target structure. */
91d231cb
JM
926#undef TARGET_ATTRIBUTE_TABLE
927#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
672a6f42 928#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
f5f4be42
NB
929# undef TARGET_MERGE_DECL_ATTRIBUTES
930# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
672a6f42
NB
931#endif
932
8d8e52be
JM
933#undef TARGET_COMP_TYPE_ATTRIBUTES
934#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
935
f6155fda
SS
936#undef TARGET_INIT_BUILTINS
937#define TARGET_INIT_BUILTINS ix86_init_builtins
938
939#undef TARGET_EXPAND_BUILTIN
940#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
941
bd09bdeb
RH
942#undef TARGET_ASM_FUNCTION_EPILOGUE
943#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
08c148a8 944
17b53c33
NB
945#undef TARGET_ASM_OPEN_PAREN
946#define TARGET_ASM_OPEN_PAREN ""
947#undef TARGET_ASM_CLOSE_PAREN
948#define TARGET_ASM_CLOSE_PAREN ""
949
301d03af
RS
950#undef TARGET_ASM_ALIGNED_HI_OP
951#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
952#undef TARGET_ASM_ALIGNED_SI_OP
953#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
954#ifdef ASM_QUAD
955#undef TARGET_ASM_ALIGNED_DI_OP
956#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
957#endif
958
959#undef TARGET_ASM_UNALIGNED_HI_OP
960#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
961#undef TARGET_ASM_UNALIGNED_SI_OP
962#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
963#undef TARGET_ASM_UNALIGNED_DI_OP
964#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
965
c237e94a
ZW
966#undef TARGET_SCHED_ADJUST_COST
967#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
968#undef TARGET_SCHED_ISSUE_RATE
969#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
970#undef TARGET_SCHED_VARIABLE_ISSUE
971#define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
972#undef TARGET_SCHED_INIT
973#define TARGET_SCHED_INIT ix86_sched_init
974#undef TARGET_SCHED_REORDER
975#define TARGET_SCHED_REORDER ix86_sched_reorder
fce5a9f2 976#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
9b690711
RH
977#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
978 ia32_use_dfa_pipeline_interface
979#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
980#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
981 ia32_multipass_dfa_lookahead
c237e94a 982
4977bab6
ZW
983#undef TARGET_FUNCTION_OK_FOR_SIBCALL
984#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
985
f996902d
RH
986#ifdef HAVE_AS_TLS
987#undef TARGET_HAVE_TLS
988#define TARGET_HAVE_TLS true
989#endif
3a04ff64
RH
990#undef TARGET_CANNOT_FORCE_CONST_MEM
991#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
f996902d 992
4977bab6
ZW
993#undef TARGET_MS_BITFIELD_LAYOUT_P
994#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
995
c590b625
RH
996#undef TARGET_ASM_OUTPUT_MI_THUNK
997#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
3961e8fe
RH
998#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
999#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
c590b625 1000
3c50106f
RH
1001#undef TARGET_RTX_COSTS
1002#define TARGET_RTX_COSTS ix86_rtx_costs
dcefdf67
RH
1003#undef TARGET_ADDRESS_COST
1004#define TARGET_ADDRESS_COST ix86_address_cost
3c50106f 1005
f6897b10 1006struct gcc_target targetm = TARGET_INITIALIZER;
e075ae69 1007\f
f5316dfe
MM
1008/* Sometimes certain combinations of command options do not make
1009 sense on a particular target machine. You can define a macro
1010 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1011 defined, is executed once just after all the command options have
1012 been parsed.
1013
1014 Don't use this macro to turn on various extra optimizations for
1015 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1016
1017void
1018override_options ()
1019{
400500c4 1020 int i;
e075ae69
RH
1021 /* Comes from final.c -- no real reason to change it. */
1022#define MAX_CODE_ALIGN 16
f5316dfe 1023
c8c5cb99
SC
1024 static struct ptt
1025 {
8b60264b
KG
1026 const struct processor_costs *cost; /* Processor costs */
1027 const int target_enable; /* Target flags to enable. */
1028 const int target_disable; /* Target flags to disable. */
1029 const int align_loop; /* Default alignments. */
2cca7283 1030 const int align_loop_max_skip;
8b60264b 1031 const int align_jump;
2cca7283 1032 const int align_jump_max_skip;
8b60264b 1033 const int align_func;
e075ae69 1034 }
0f290768 1035 const processor_target_table[PROCESSOR_max] =
e075ae69 1036 {
4977bab6
ZW
1037 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1038 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1039 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1040 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1041 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1042 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1043 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1044 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
e075ae69
RH
1045 };
1046
f4365627 1047 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
e075ae69
RH
1048 static struct pta
1049 {
8b60264b
KG
1050 const char *const name; /* processor name or nickname. */
1051 const enum processor_type processor;
0dd0e980
JH
1052 const enum pta_flags
1053 {
1054 PTA_SSE = 1,
1055 PTA_SSE2 = 2,
1056 PTA_MMX = 4,
f4365627 1057 PTA_PREFETCH_SSE = 8,
0dd0e980 1058 PTA_3DNOW = 16,
4977bab6
ZW
1059 PTA_3DNOW_A = 64,
1060 PTA_64BIT = 128
0dd0e980 1061 } flags;
e075ae69 1062 }
0f290768 1063 const processor_alias_table[] =
e075ae69 1064 {
0dd0e980
JH
1065 {"i386", PROCESSOR_I386, 0},
1066 {"i486", PROCESSOR_I486, 0},
1067 {"i586", PROCESSOR_PENTIUM, 0},
1068 {"pentium", PROCESSOR_PENTIUM, 0},
1069 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
3780101d
JG
1070 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1071 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1072 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
0dd0e980
JH
1073 {"i686", PROCESSOR_PENTIUMPRO, 0},
1074 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1075 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
f4365627 1076 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
0dd0e980 1077 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
f4365627 1078 PTA_MMX | PTA_PREFETCH_SSE},
0dd0e980
JH
1079 {"k6", PROCESSOR_K6, PTA_MMX},
1080 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1081 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
f4365627 1082 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1083 | PTA_3DNOW_A},
f4365627 1084 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
0dd0e980 1085 | PTA_3DNOW | PTA_3DNOW_A},
f4365627 1086 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1087 | PTA_3DNOW_A | PTA_SSE},
f4365627 1088 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1089 | PTA_3DNOW_A | PTA_SSE},
f4365627 1090 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1091 | PTA_3DNOW_A | PTA_SSE},
4977bab6
ZW
1092 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1093 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
3af4bd89 1094 };
c8c5cb99 1095
ca7558fc 1096 int const pta_size = ARRAY_SIZE (processor_alias_table);
c8c5cb99 1097
3dc85dfb
RH
1098 /* By default our XFmode is the 80-bit extended format. If we have
1099 use TFmode instead, it's also the 80-bit format, but with padding. */
1100 real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format;
1101 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
1102
41ed2237 1103 /* Set the default values for switches whose default depends on TARGET_64BIT
d1f87653 1104 in case they weren't overwritten by command line options. */
55ba61f3
JH
1105 if (TARGET_64BIT)
1106 {
1107 if (flag_omit_frame_pointer == 2)
1108 flag_omit_frame_pointer = 1;
1109 if (flag_asynchronous_unwind_tables == 2)
1110 flag_asynchronous_unwind_tables = 1;
1111 if (flag_pcc_struct_return == 2)
1112 flag_pcc_struct_return = 0;
1113 }
1114 else
1115 {
1116 if (flag_omit_frame_pointer == 2)
1117 flag_omit_frame_pointer = 0;
1118 if (flag_asynchronous_unwind_tables == 2)
1119 flag_asynchronous_unwind_tables = 0;
1120 if (flag_pcc_struct_return == 2)
7c712dcc 1121 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
55ba61f3
JH
1122 }
1123
f5316dfe
MM
1124#ifdef SUBTARGET_OVERRIDE_OPTIONS
1125 SUBTARGET_OVERRIDE_OPTIONS;
1126#endif
1127
f4365627
JH
1128 if (!ix86_cpu_string && ix86_arch_string)
1129 ix86_cpu_string = ix86_arch_string;
1130 if (!ix86_cpu_string)
1131 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
1132 if (!ix86_arch_string)
4977bab6 1133 ix86_arch_string = TARGET_64BIT ? "k8" : "i386";
e075ae69 1134
6189a572
JH
1135 if (ix86_cmodel_string != 0)
1136 {
1137 if (!strcmp (ix86_cmodel_string, "small"))
1138 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1139 else if (flag_pic)
c725bd79 1140 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
6189a572
JH
1141 else if (!strcmp (ix86_cmodel_string, "32"))
1142 ix86_cmodel = CM_32;
1143 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1144 ix86_cmodel = CM_KERNEL;
1145 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1146 ix86_cmodel = CM_MEDIUM;
1147 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1148 ix86_cmodel = CM_LARGE;
1149 else
1150 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1151 }
1152 else
1153 {
1154 ix86_cmodel = CM_32;
1155 if (TARGET_64BIT)
1156 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1157 }
c93e80a5
JH
1158 if (ix86_asm_string != 0)
1159 {
1160 if (!strcmp (ix86_asm_string, "intel"))
1161 ix86_asm_dialect = ASM_INTEL;
1162 else if (!strcmp (ix86_asm_string, "att"))
1163 ix86_asm_dialect = ASM_ATT;
1164 else
1165 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1166 }
6189a572 1167 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
c725bd79 1168 error ("code model `%s' not supported in the %s bit mode",
6189a572
JH
1169 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1170 if (ix86_cmodel == CM_LARGE)
c725bd79 1171 sorry ("code model `large' not supported yet");
0c2dc519 1172 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
c725bd79 1173 sorry ("%i-bit mode not compiled in",
0c2dc519 1174 (target_flags & MASK_64BIT) ? 64 : 32);
6189a572 1175
f4365627
JH
1176 for (i = 0; i < pta_size; i++)
1177 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1178 {
1179 ix86_arch = processor_alias_table[i].processor;
1180 /* Default cpu tuning to the architecture. */
1181 ix86_cpu = ix86_arch;
1182 if (processor_alias_table[i].flags & PTA_MMX
9ef1b13a 1183 && !(target_flags_explicit & MASK_MMX))
f4365627
JH
1184 target_flags |= MASK_MMX;
1185 if (processor_alias_table[i].flags & PTA_3DNOW
9ef1b13a 1186 && !(target_flags_explicit & MASK_3DNOW))
f4365627
JH
1187 target_flags |= MASK_3DNOW;
1188 if (processor_alias_table[i].flags & PTA_3DNOW_A
9ef1b13a 1189 && !(target_flags_explicit & MASK_3DNOW_A))
f4365627
JH
1190 target_flags |= MASK_3DNOW_A;
1191 if (processor_alias_table[i].flags & PTA_SSE
9ef1b13a 1192 && !(target_flags_explicit & MASK_SSE))
f4365627
JH
1193 target_flags |= MASK_SSE;
1194 if (processor_alias_table[i].flags & PTA_SSE2
9ef1b13a 1195 && !(target_flags_explicit & MASK_SSE2))
f4365627
JH
1196 target_flags |= MASK_SSE2;
1197 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1198 x86_prefetch_sse = true;
4977bab6
ZW
1199 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1200 error ("CPU you selected does not support x86-64 instruction set");
f4365627
JH
1201 break;
1202 }
400500c4 1203
f4365627
JH
1204 if (i == pta_size)
1205 error ("bad value (%s) for -march= switch", ix86_arch_string);
e075ae69 1206
f4365627
JH
1207 for (i = 0; i < pta_size; i++)
1208 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1209 {
1210 ix86_cpu = processor_alias_table[i].processor;
4977bab6
ZW
1211 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1212 error ("CPU you selected does not support x86-64 instruction set");
f4365627
JH
1213 break;
1214 }
1215 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1216 x86_prefetch_sse = true;
1217 if (i == pta_size)
1218 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
e075ae69 1219
2ab0437e
JH
1220 if (optimize_size)
1221 ix86_cost = &size_cost;
1222 else
1223 ix86_cost = processor_target_table[ix86_cpu].cost;
e075ae69
RH
1224 target_flags |= processor_target_table[ix86_cpu].target_enable;
1225 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1226
36edd3cc
BS
1227 /* Arrange to set up i386_stack_locals for all functions. */
1228 init_machine_status = ix86_init_machine_status;
fce5a9f2 1229
0f290768 1230 /* Validate -mregparm= value. */
e075ae69 1231 if (ix86_regparm_string)
b08de47e 1232 {
400500c4
RK
1233 i = atoi (ix86_regparm_string);
1234 if (i < 0 || i > REGPARM_MAX)
1235 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1236 else
1237 ix86_regparm = i;
b08de47e 1238 }
0d7d98ee
JH
1239 else
1240 if (TARGET_64BIT)
1241 ix86_regparm = REGPARM_MAX;
b08de47e 1242
3e18fdf6 1243 /* If the user has provided any of the -malign-* options,
a4f31c00 1244 warn and use that value only if -falign-* is not set.
3e18fdf6 1245 Remove this code in GCC 3.2 or later. */
e075ae69 1246 if (ix86_align_loops_string)
b08de47e 1247 {
3e18fdf6
GK
1248 warning ("-malign-loops is obsolete, use -falign-loops");
1249 if (align_loops == 0)
1250 {
1251 i = atoi (ix86_align_loops_string);
1252 if (i < 0 || i > MAX_CODE_ALIGN)
1253 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1254 else
1255 align_loops = 1 << i;
1256 }
b08de47e 1257 }
3af4bd89 1258
e075ae69 1259 if (ix86_align_jumps_string)
b08de47e 1260 {
3e18fdf6
GK
1261 warning ("-malign-jumps is obsolete, use -falign-jumps");
1262 if (align_jumps == 0)
1263 {
1264 i = atoi (ix86_align_jumps_string);
1265 if (i < 0 || i > MAX_CODE_ALIGN)
1266 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1267 else
1268 align_jumps = 1 << i;
1269 }
b08de47e 1270 }
b08de47e 1271
e075ae69 1272 if (ix86_align_funcs_string)
b08de47e 1273 {
3e18fdf6
GK
1274 warning ("-malign-functions is obsolete, use -falign-functions");
1275 if (align_functions == 0)
1276 {
1277 i = atoi (ix86_align_funcs_string);
1278 if (i < 0 || i > MAX_CODE_ALIGN)
1279 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1280 else
1281 align_functions = 1 << i;
1282 }
b08de47e 1283 }
3af4bd89 1284
3e18fdf6 1285 /* Default align_* from the processor table. */
3e18fdf6 1286 if (align_loops == 0)
2cca7283
JH
1287 {
1288 align_loops = processor_target_table[ix86_cpu].align_loop;
1289 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1290 }
3e18fdf6 1291 if (align_jumps == 0)
2cca7283
JH
1292 {
1293 align_jumps = processor_target_table[ix86_cpu].align_jump;
1294 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1295 }
3e18fdf6 1296 if (align_functions == 0)
2cca7283
JH
1297 {
1298 align_functions = processor_target_table[ix86_cpu].align_func;
1299 }
3e18fdf6 1300
e4c0478d 1301 /* Validate -mpreferred-stack-boundary= value, or provide default.
fbb83b43
AO
1302 The default of 128 bits is for Pentium III's SSE __m128, but we
1303 don't want additional code to keep the stack aligned when
1304 optimizing for code size. */
1305 ix86_preferred_stack_boundary = (optimize_size
ef49d42e 1306 ? TARGET_64BIT ? 128 : 32
fbb83b43 1307 : 128);
e075ae69 1308 if (ix86_preferred_stack_boundary_string)
3af4bd89 1309 {
400500c4 1310 i = atoi (ix86_preferred_stack_boundary_string);
ef49d42e 1311 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
c6257c5d 1312 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
ef49d42e 1313 TARGET_64BIT ? 4 : 2);
400500c4
RK
1314 else
1315 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3af4bd89 1316 }
77a989d1 1317
0f290768 1318 /* Validate -mbranch-cost= value, or provide default. */
4977bab6 1319 ix86_branch_cost = processor_target_table[ix86_cpu].cost->branch_cost;
e075ae69 1320 if (ix86_branch_cost_string)
804a8ee0 1321 {
400500c4
RK
1322 i = atoi (ix86_branch_cost_string);
1323 if (i < 0 || i > 5)
1324 error ("-mbranch-cost=%d is not between 0 and 5", i);
1325 else
1326 ix86_branch_cost = i;
804a8ee0 1327 }
804a8ee0 1328
f996902d
RH
1329 if (ix86_tls_dialect_string)
1330 {
1331 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1332 ix86_tls_dialect = TLS_DIALECT_GNU;
1333 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1334 ix86_tls_dialect = TLS_DIALECT_SUN;
1335 else
1336 error ("bad value (%s) for -mtls-dialect= switch",
1337 ix86_tls_dialect_string);
1338 }
1339
e9a25f70
JL
1340 /* Keep nonleaf frame pointers. */
1341 if (TARGET_OMIT_LEAF_FRAME_POINTER)
77a989d1 1342 flag_omit_frame_pointer = 1;
e075ae69
RH
1343
1344 /* If we're doing fast math, we don't care about comparison order
1345 wrt NaNs. This lets us use a shorter comparison sequence. */
de6c5979 1346 if (flag_unsafe_math_optimizations)
e075ae69
RH
1347 target_flags &= ~MASK_IEEE_FP;
1348
30c99a84
RH
1349 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1350 since the insns won't need emulation. */
1351 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1352 target_flags &= ~MASK_NO_FANCY_MATH_387;
1353
14f73b5a
JH
1354 if (TARGET_64BIT)
1355 {
1356 if (TARGET_ALIGN_DOUBLE)
c725bd79 1357 error ("-malign-double makes no sense in the 64bit mode");
14f73b5a 1358 if (TARGET_RTD)
c725bd79 1359 error ("-mrtd calling convention not supported in the 64bit mode");
14f73b5a 1360 /* Enable by default the SSE and MMX builtins. */
965f5423
JH
1361 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1362 ix86_fpmath = FPMATH_SSE;
14f73b5a 1363 }
965f5423
JH
1364 else
1365 ix86_fpmath = FPMATH_387;
1366
1367 if (ix86_fpmath_string != 0)
1368 {
1369 if (! strcmp (ix86_fpmath_string, "387"))
1370 ix86_fpmath = FPMATH_387;
1371 else if (! strcmp (ix86_fpmath_string, "sse"))
1372 {
1373 if (!TARGET_SSE)
1374 {
1375 warning ("SSE instruction set disabled, using 387 arithmetics");
1376 ix86_fpmath = FPMATH_387;
1377 }
1378 else
1379 ix86_fpmath = FPMATH_SSE;
1380 }
1381 else if (! strcmp (ix86_fpmath_string, "387,sse")
1382 || ! strcmp (ix86_fpmath_string, "sse,387"))
1383 {
1384 if (!TARGET_SSE)
1385 {
1386 warning ("SSE instruction set disabled, using 387 arithmetics");
1387 ix86_fpmath = FPMATH_387;
1388 }
1389 else if (!TARGET_80387)
1390 {
1391 warning ("387 instruction set disabled, using SSE arithmetics");
1392 ix86_fpmath = FPMATH_SSE;
1393 }
1394 else
1395 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1396 }
fce5a9f2 1397 else
965f5423
JH
1398 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1399 }
14f73b5a 1400
a7180f70
BS
1401 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1402 on by -msse. */
1403 if (TARGET_SSE)
e37af218
RH
1404 {
1405 target_flags |= MASK_MMX;
1406 x86_prefetch_sse = true;
1407 }
c6036a37 1408
47f339cf
BS
1409 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1410 if (TARGET_3DNOW)
1411 {
1412 target_flags |= MASK_MMX;
d1f87653 1413 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
47f339cf
BS
1414 extensions it adds. */
1415 if (x86_3dnow_a & (1 << ix86_arch))
1416 target_flags |= MASK_3DNOW_A;
1417 }
c6036a37 1418 if ((x86_accumulate_outgoing_args & CPUMASK)
9ef1b13a 1419 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
c6036a37
JH
1420 && !optimize_size)
1421 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
623fe810
RH
1422
1423 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1424 {
1425 char *p;
1426 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1427 p = strchr (internal_label_prefix, 'X');
1428 internal_label_prefix_len = p - internal_label_prefix;
1429 *p = '\0';
1430 }
f5316dfe
MM
1431}
1432\f
32b5b1aa 1433void
c6aded7c 1434optimization_options (level, size)
32b5b1aa 1435 int level;
bb5177ac 1436 int size ATTRIBUTE_UNUSED;
32b5b1aa 1437{
e9a25f70
JL
1438 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1439 make the problem with not enough registers even worse. */
32b5b1aa
SC
1440#ifdef INSN_SCHEDULING
1441 if (level > 1)
1442 flag_schedule_insns = 0;
1443#endif
55ba61f3
JH
1444
1445 /* The default values of these switches depend on the TARGET_64BIT
1446 that is not known at this moment. Mark these values with 2 and
1447 let user the to override these. In case there is no command line option
1448 specifying them, we will set the defaults in override_options. */
1449 if (optimize >= 1)
1450 flag_omit_frame_pointer = 2;
1451 flag_pcc_struct_return = 2;
1452 flag_asynchronous_unwind_tables = 2;
32b5b1aa 1453}
b08de47e 1454\f
91d231cb
JM
1455/* Table of valid machine attributes. */
1456const struct attribute_spec ix86_attribute_table[] =
b08de47e 1457{
91d231cb 1458 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
b08de47e
MM
1459 /* Stdcall attribute says callee is responsible for popping arguments
1460 if they are not variable. */
91d231cb 1461 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
e91f04de
CH
1462 /* Fastcall attribute says callee is responsible for popping arguments
1463 if they are not variable. */
1464 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
91d231cb
JM
1465 /* Cdecl attribute says the callee is a normal C declaration */
1466 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
b08de47e 1467 /* Regparm attribute specifies how many integer arguments are to be
0f290768 1468 passed in registers. */
91d231cb
JM
1469 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1470#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
3da1eb0b
DS
1471 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1472 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1473 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
91d231cb 1474#endif
fe77449a
DR
1475 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1476 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
91d231cb
JM
1477 { NULL, 0, 0, false, false, false, NULL }
1478};
1479
4977bab6
ZW
1480/* If PIC, we cannot make sibling calls to global functions
1481 because the PLT requires %ebx live.
1482 If we are returning floats on the register stack, we cannot make
1483 sibling calls to functions that return floats. (The stack adjust
1484 instruction will wind up after the sibcall jump, and not be executed.) */
1485
1486static bool
1487ix86_function_ok_for_sibcall (decl, exp)
1488 tree decl;
1489 tree exp;
1490{
1491 /* If we are generating position-independent code, we cannot sibcall
1492 optimize any indirect call, or a direct call to a global function,
1493 as the PLT requires %ebx be live. */
1494 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1495 return false;
1496
1497 /* If we are returning floats on the 80387 register stack, we cannot
1498 make a sibcall from a function that doesn't return a float to a
1499 function that does; the necessary stack adjustment will not be
1500 executed. */
1501 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1502 && ! STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1503 return false;
1504
1505 /* If this call is indirect, we'll need to be able to use a call-clobbered
1506 register for the address of the target function. Make sure that all
1507 such registers are not used for passing parameters. */
1508 if (!decl && !TARGET_64BIT)
1509 {
1510 int regparm = ix86_regparm;
1511 tree attr, type;
1512
1513 /* We're looking at the CALL_EXPR, we need the type of the function. */
1514 type = TREE_OPERAND (exp, 0); /* pointer expression */
1515 type = TREE_TYPE (type); /* pointer type */
1516 type = TREE_TYPE (type); /* function type */
1517
1518 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1519 if (attr)
1520 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1521
1522 if (regparm >= 3)
1523 {
1524 /* ??? Need to count the actual number of registers to be used,
1525 not the possible number of registers. Fix later. */
1526 return false;
1527 }
1528 }
1529
1530 /* Otherwise okay. That also includes certain types of indirect calls. */
1531 return true;
1532}
1533
e91f04de 1534/* Handle a "cdecl", "stdcall", or "fastcall" attribute;
91d231cb
JM
1535 arguments as in struct attribute_spec.handler. */
1536static tree
1537ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1538 tree *node;
1539 tree name;
1540 tree args ATTRIBUTE_UNUSED;
1541 int flags ATTRIBUTE_UNUSED;
1542 bool *no_add_attrs;
1543{
1544 if (TREE_CODE (*node) != FUNCTION_TYPE
1545 && TREE_CODE (*node) != METHOD_TYPE
1546 && TREE_CODE (*node) != FIELD_DECL
1547 && TREE_CODE (*node) != TYPE_DECL)
b08de47e 1548 {
91d231cb
JM
1549 warning ("`%s' attribute only applies to functions",
1550 IDENTIFIER_POINTER (name));
1551 *no_add_attrs = true;
1552 }
e91f04de
CH
1553 else
1554 {
1555 if (is_attribute_p ("fastcall", name))
1556 {
1557 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1558 {
1559 error ("fastcall and stdcall attributes are not compatible");
1560 }
1561 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1562 {
1563 error ("fastcall and regparm attributes are not compatible");
1564 }
1565 }
1566 else if (is_attribute_p ("stdcall", name))
1567 {
1568 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1569 {
1570 error ("fastcall and stdcall attributes are not compatible");
1571 }
1572 }
1573 }
b08de47e 1574
91d231cb
JM
1575 if (TARGET_64BIT)
1576 {
1577 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1578 *no_add_attrs = true;
1579 }
b08de47e 1580
91d231cb
JM
1581 return NULL_TREE;
1582}
b08de47e 1583
91d231cb
JM
1584/* Handle a "regparm" attribute;
1585 arguments as in struct attribute_spec.handler. */
1586static tree
1587ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1588 tree *node;
1589 tree name;
1590 tree args;
1591 int flags ATTRIBUTE_UNUSED;
1592 bool *no_add_attrs;
1593{
1594 if (TREE_CODE (*node) != FUNCTION_TYPE
1595 && TREE_CODE (*node) != METHOD_TYPE
1596 && TREE_CODE (*node) != FIELD_DECL
1597 && TREE_CODE (*node) != TYPE_DECL)
1598 {
1599 warning ("`%s' attribute only applies to functions",
1600 IDENTIFIER_POINTER (name));
1601 *no_add_attrs = true;
1602 }
1603 else
1604 {
1605 tree cst;
b08de47e 1606
91d231cb
JM
1607 cst = TREE_VALUE (args);
1608 if (TREE_CODE (cst) != INTEGER_CST)
1609 {
1610 warning ("`%s' attribute requires an integer constant argument",
1611 IDENTIFIER_POINTER (name));
1612 *no_add_attrs = true;
1613 }
1614 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1615 {
1616 warning ("argument to `%s' attribute larger than %d",
1617 IDENTIFIER_POINTER (name), REGPARM_MAX);
1618 *no_add_attrs = true;
1619 }
e91f04de
CH
1620
1621 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1622 {
1623 error ("fastcall and regparm attributes are not compatible");
1624 }
b08de47e
MM
1625 }
1626
91d231cb 1627 return NULL_TREE;
b08de47e
MM
1628}
1629
1630/* Return 0 if the attributes for two types are incompatible, 1 if they
1631 are compatible, and 2 if they are nearly compatible (which causes a
1632 warning to be generated). */
1633
8d8e52be 1634static int
e075ae69 1635ix86_comp_type_attributes (type1, type2)
afcfe58c
MM
1636 tree type1;
1637 tree type2;
b08de47e 1638{
0f290768 1639 /* Check for mismatch of non-default calling convention. */
27c38fbe 1640 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
afcfe58c
MM
1641
1642 if (TREE_CODE (type1) != FUNCTION_TYPE)
1643 return 1;
1644
e91f04de
CH
1645 /* Check for mismatched fastcall types */
1646 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1647 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1648 return 0;
1649
afcfe58c 1650 /* Check for mismatched return types (cdecl vs stdcall). */
6093f019
RH
1651 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1652 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
afcfe58c 1653 return 0;
b08de47e
MM
1654 return 1;
1655}
b08de47e 1656\f
483ab821
MM
1657/* Return the regparm value for a fuctio with the indicated TYPE. */
1658
1659static int
1660ix86_fntype_regparm (type)
1661 tree type;
1662{
1663 tree attr;
1664
1665 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1666 if (attr)
1667 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1668 else
1669 return ix86_regparm;
1670}
1671
b08de47e
MM
1672/* Value is the number of bytes of arguments automatically
1673 popped when returning from a subroutine call.
1674 FUNDECL is the declaration node of the function (as a tree),
1675 FUNTYPE is the data type of the function (as a tree),
1676 or for a library call it is an identifier node for the subroutine name.
1677 SIZE is the number of bytes of arguments passed on the stack.
1678
1679 On the 80386, the RTD insn may be used to pop them if the number
1680 of args is fixed, but if the number is variable then the caller
1681 must pop them all. RTD can't be used for library calls now
1682 because the library is compiled with the Unix compiler.
1683 Use of RTD is a selectable option, since it is incompatible with
1684 standard Unix calling sequences. If the option is not selected,
1685 the caller must always pop the args.
1686
1687 The attribute stdcall is equivalent to RTD on a per module basis. */
1688
1689int
e075ae69 1690ix86_return_pops_args (fundecl, funtype, size)
b08de47e
MM
1691 tree fundecl;
1692 tree funtype;
1693 int size;
79325812 1694{
3345ee7d 1695 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
b08de47e 1696
0f290768 1697 /* Cdecl functions override -mrtd, and never pop the stack. */
e9a25f70 1698 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
79325812 1699
e91f04de
CH
1700 /* Stdcall and fastcall functions will pop the stack if not variable args. */
1701 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1702 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
698cdd84 1703 rtd = 1;
79325812 1704
698cdd84
SC
1705 if (rtd
1706 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
e9a25f70
JL
1707 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1708 == void_type_node)))
698cdd84
SC
1709 return size;
1710 }
79325812 1711
232b8f52 1712 /* Lose any fake structure return argument if it is passed on the stack. */
0d7d98ee
JH
1713 if (aggregate_value_p (TREE_TYPE (funtype))
1714 && !TARGET_64BIT)
232b8f52 1715 {
483ab821 1716 int nregs = ix86_fntype_regparm (funtype);
232b8f52
JJ
1717
1718 if (!nregs)
1719 return GET_MODE_SIZE (Pmode);
1720 }
1721
1722 return 0;
b08de47e 1723}
b08de47e
MM
1724\f
1725/* Argument support functions. */
1726
53c17031
JH
1727/* Return true when register may be used to pass function parameters. */
1728bool
1729ix86_function_arg_regno_p (regno)
1730 int regno;
1731{
1732 int i;
1733 if (!TARGET_64BIT)
0333394e
JJ
1734 return (regno < REGPARM_MAX
1735 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
53c17031
JH
1736 if (SSE_REGNO_P (regno) && TARGET_SSE)
1737 return true;
1738 /* RAX is used as hidden argument to va_arg functions. */
1739 if (!regno)
1740 return true;
1741 for (i = 0; i < REGPARM_MAX; i++)
1742 if (regno == x86_64_int_parameter_registers[i])
1743 return true;
1744 return false;
1745}
1746
b08de47e
MM
1747/* Initialize a variable CUM of type CUMULATIVE_ARGS
1748 for a call to a function whose data type is FNTYPE.
1749 For a library call, FNTYPE is 0. */
1750
1751void
1752init_cumulative_args (cum, fntype, libname)
e9a25f70 1753 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
b08de47e
MM
1754 tree fntype; /* tree ptr for function decl */
1755 rtx libname; /* SYMBOL_REF of library name or 0 */
1756{
1757 static CUMULATIVE_ARGS zero_cum;
1758 tree param, next_param;
1759
1760 if (TARGET_DEBUG_ARG)
1761 {
1762 fprintf (stderr, "\ninit_cumulative_args (");
1763 if (fntype)
e9a25f70
JL
1764 fprintf (stderr, "fntype code = %s, ret code = %s",
1765 tree_code_name[(int) TREE_CODE (fntype)],
1766 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
b08de47e
MM
1767 else
1768 fprintf (stderr, "no fntype");
1769
1770 if (libname)
1771 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1772 }
1773
1774 *cum = zero_cum;
1775
1776 /* Set up the number of registers to use for passing arguments. */
e075ae69 1777 cum->nregs = ix86_regparm;
53c17031
JH
1778 cum->sse_nregs = SSE_REGPARM_MAX;
1779 if (fntype && !TARGET_64BIT)
b08de47e
MM
1780 {
1781 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
e9a25f70 1782
b08de47e
MM
1783 if (attr)
1784 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1785 }
53c17031 1786 cum->maybe_vaarg = false;
b08de47e 1787
e91f04de
CH
1788 /* Use ecx and edx registers if function has fastcall attribute */
1789 if (fntype && !TARGET_64BIT)
1790 {
1791 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1792 {
1793 cum->nregs = 2;
1794 cum->fastcall = 1;
1795 }
1796 }
1797
1798
b08de47e
MM
1799 /* Determine if this function has variable arguments. This is
1800 indicated by the last argument being 'void_type_mode' if there
1801 are no variable arguments. If there are variable arguments, then
1802 we won't pass anything in registers */
1803
1804 if (cum->nregs)
1805 {
1806 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
e9a25f70 1807 param != 0; param = next_param)
b08de47e
MM
1808 {
1809 next_param = TREE_CHAIN (param);
e9a25f70 1810 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
53c17031
JH
1811 {
1812 if (!TARGET_64BIT)
e91f04de
CH
1813 {
1814 cum->nregs = 0;
1815 cum->fastcall = 0;
1816 }
53c17031
JH
1817 cum->maybe_vaarg = true;
1818 }
b08de47e
MM
1819 }
1820 }
53c17031
JH
1821 if ((!fntype && !libname)
1822 || (fntype && !TYPE_ARG_TYPES (fntype)))
1823 cum->maybe_vaarg = 1;
b08de47e
MM
1824
1825 if (TARGET_DEBUG_ARG)
1826 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1827
1828 return;
1829}
1830
d1f87653 1831/* x86-64 register passing implementation. See x86-64 ABI for details. Goal
f710504c 1832 of this code is to classify each 8bytes of incoming argument by the register
53c17031
JH
1833 class and assign registers accordingly. */
1834
1835/* Return the union class of CLASS1 and CLASS2.
1836 See the x86-64 PS ABI for details. */
1837
1838static enum x86_64_reg_class
1839merge_classes (class1, class2)
1840 enum x86_64_reg_class class1, class2;
1841{
1842 /* Rule #1: If both classes are equal, this is the resulting class. */
1843 if (class1 == class2)
1844 return class1;
1845
1846 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1847 the other class. */
1848 if (class1 == X86_64_NO_CLASS)
1849 return class2;
1850 if (class2 == X86_64_NO_CLASS)
1851 return class1;
1852
1853 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1854 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1855 return X86_64_MEMORY_CLASS;
1856
1857 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1858 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1859 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1860 return X86_64_INTEGERSI_CLASS;
1861 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1862 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1863 return X86_64_INTEGER_CLASS;
1864
1865 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1866 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1867 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1868 return X86_64_MEMORY_CLASS;
1869
1870 /* Rule #6: Otherwise class SSE is used. */
1871 return X86_64_SSE_CLASS;
1872}
1873
1874/* Classify the argument of type TYPE and mode MODE.
1875 CLASSES will be filled by the register class used to pass each word
1876 of the operand. The number of words is returned. In case the parameter
1877 should be passed in memory, 0 is returned. As a special case for zero
1878 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1879
1880 BIT_OFFSET is used internally for handling records and specifies offset
1881 of the offset in bits modulo 256 to avoid overflow cases.
1882
1883 See the x86-64 PS ABI for details.
1884*/
1885
1886static int
1887classify_argument (mode, type, classes, bit_offset)
1888 enum machine_mode mode;
1889 tree type;
1890 enum x86_64_reg_class classes[MAX_CLASSES];
1891 int bit_offset;
1892{
1893 int bytes =
1894 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
23327dae 1895 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
53c17031 1896
c60ee6f5
JH
1897 /* Variable sized entities are always passed/returned in memory. */
1898 if (bytes < 0)
1899 return 0;
1900
53c17031
JH
1901 if (type && AGGREGATE_TYPE_P (type))
1902 {
1903 int i;
1904 tree field;
1905 enum x86_64_reg_class subclasses[MAX_CLASSES];
1906
1907 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1908 if (bytes > 16)
1909 return 0;
1910
1911 for (i = 0; i < words; i++)
1912 classes[i] = X86_64_NO_CLASS;
1913
1914 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1915 signalize memory class, so handle it as special case. */
1916 if (!words)
1917 {
1918 classes[0] = X86_64_NO_CLASS;
1919 return 1;
1920 }
1921
1922 /* Classify each field of record and merge classes. */
1923 if (TREE_CODE (type) == RECORD_TYPE)
1924 {
91ea38f9
JH
1925 /* For classes first merge in the field of the subclasses. */
1926 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1927 {
1928 tree bases = TYPE_BINFO_BASETYPES (type);
1929 int n_bases = TREE_VEC_LENGTH (bases);
1930 int i;
1931
1932 for (i = 0; i < n_bases; ++i)
1933 {
1934 tree binfo = TREE_VEC_ELT (bases, i);
1935 int num;
1936 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1937 tree type = BINFO_TYPE (binfo);
1938
1939 num = classify_argument (TYPE_MODE (type),
1940 type, subclasses,
1941 (offset + bit_offset) % 256);
1942 if (!num)
1943 return 0;
1944 for (i = 0; i < num; i++)
1945 {
db01f480 1946 int pos = (offset + (bit_offset % 64)) / 8 / 8;
91ea38f9
JH
1947 classes[i + pos] =
1948 merge_classes (subclasses[i], classes[i + pos]);
1949 }
1950 }
1951 }
1952 /* And now merge the fields of structure. */
53c17031
JH
1953 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1954 {
1955 if (TREE_CODE (field) == FIELD_DECL)
1956 {
1957 int num;
1958
1959 /* Bitfields are always classified as integer. Handle them
1960 early, since later code would consider them to be
1961 misaligned integers. */
1962 if (DECL_BIT_FIELD (field))
1963 {
1964 for (i = int_bit_position (field) / 8 / 8;
1965 i < (int_bit_position (field)
1966 + tree_low_cst (DECL_SIZE (field), 0)
1967 + 63) / 8 / 8; i++)
1968 classes[i] =
1969 merge_classes (X86_64_INTEGER_CLASS,
1970 classes[i]);
1971 }
1972 else
1973 {
1974 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1975 TREE_TYPE (field), subclasses,
1976 (int_bit_position (field)
1977 + bit_offset) % 256);
1978 if (!num)
1979 return 0;
1980 for (i = 0; i < num; i++)
1981 {
1982 int pos =
db01f480 1983 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
53c17031
JH
1984 classes[i + pos] =
1985 merge_classes (subclasses[i], classes[i + pos]);
1986 }
1987 }
1988 }
1989 }
1990 }
1991 /* Arrays are handled as small records. */
1992 else if (TREE_CODE (type) == ARRAY_TYPE)
1993 {
1994 int num;
1995 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1996 TREE_TYPE (type), subclasses, bit_offset);
1997 if (!num)
1998 return 0;
1999
2000 /* The partial classes are now full classes. */
2001 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2002 subclasses[0] = X86_64_SSE_CLASS;
2003 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2004 subclasses[0] = X86_64_INTEGER_CLASS;
2005
2006 for (i = 0; i < words; i++)
2007 classes[i] = subclasses[i % num];
2008 }
2009 /* Unions are similar to RECORD_TYPE but offset is always 0. */
e4dbaed5
AS
2010 else if (TREE_CODE (type) == UNION_TYPE
2011 || TREE_CODE (type) == QUAL_UNION_TYPE)
53c17031 2012 {
91ea38f9
JH
2013 /* For classes first merge in the field of the subclasses. */
2014 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2015 {
2016 tree bases = TYPE_BINFO_BASETYPES (type);
2017 int n_bases = TREE_VEC_LENGTH (bases);
2018 int i;
2019
2020 for (i = 0; i < n_bases; ++i)
2021 {
2022 tree binfo = TREE_VEC_ELT (bases, i);
2023 int num;
2024 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2025 tree type = BINFO_TYPE (binfo);
2026
2027 num = classify_argument (TYPE_MODE (type),
2028 type, subclasses,
db01f480 2029 (offset + (bit_offset % 64)) % 256);
91ea38f9
JH
2030 if (!num)
2031 return 0;
2032 for (i = 0; i < num; i++)
2033 {
c16576e6 2034 int pos = (offset + (bit_offset % 64)) / 8 / 8;
91ea38f9
JH
2035 classes[i + pos] =
2036 merge_classes (subclasses[i], classes[i + pos]);
2037 }
2038 }
2039 }
53c17031
JH
2040 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2041 {
2042 if (TREE_CODE (field) == FIELD_DECL)
2043 {
2044 int num;
2045 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2046 TREE_TYPE (field), subclasses,
2047 bit_offset);
2048 if (!num)
2049 return 0;
2050 for (i = 0; i < num; i++)
2051 classes[i] = merge_classes (subclasses[i], classes[i]);
2052 }
2053 }
2054 }
2055 else
2056 abort ();
2057
2058 /* Final merger cleanup. */
2059 for (i = 0; i < words; i++)
2060 {
2061 /* If one class is MEMORY, everything should be passed in
2062 memory. */
2063 if (classes[i] == X86_64_MEMORY_CLASS)
2064 return 0;
2065
d6a7951f 2066 /* The X86_64_SSEUP_CLASS should be always preceded by
53c17031
JH
2067 X86_64_SSE_CLASS. */
2068 if (classes[i] == X86_64_SSEUP_CLASS
2069 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2070 classes[i] = X86_64_SSE_CLASS;
2071
d6a7951f 2072 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
53c17031
JH
2073 if (classes[i] == X86_64_X87UP_CLASS
2074 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2075 classes[i] = X86_64_SSE_CLASS;
2076 }
2077 return words;
2078 }
2079
2080 /* Compute alignment needed. We align all types to natural boundaries with
2081 exception of XFmode that is aligned to 64bits. */
2082 if (mode != VOIDmode && mode != BLKmode)
2083 {
2084 int mode_alignment = GET_MODE_BITSIZE (mode);
2085
2086 if (mode == XFmode)
2087 mode_alignment = 128;
2088 else if (mode == XCmode)
2089 mode_alignment = 256;
f5143c46 2090 /* Misaligned fields are always returned in memory. */
53c17031
JH
2091 if (bit_offset % mode_alignment)
2092 return 0;
2093 }
2094
2095 /* Classification of atomic types. */
2096 switch (mode)
2097 {
2098 case DImode:
2099 case SImode:
2100 case HImode:
2101 case QImode:
2102 case CSImode:
2103 case CHImode:
2104 case CQImode:
2105 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2106 classes[0] = X86_64_INTEGERSI_CLASS;
2107 else
2108 classes[0] = X86_64_INTEGER_CLASS;
2109 return 1;
2110 case CDImode:
2111 case TImode:
2112 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2113 return 2;
2114 case CTImode:
2115 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2116 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2117 return 4;
2118 case SFmode:
2119 if (!(bit_offset % 64))
2120 classes[0] = X86_64_SSESF_CLASS;
2121 else
2122 classes[0] = X86_64_SSE_CLASS;
2123 return 1;
2124 case DFmode:
2125 classes[0] = X86_64_SSEDF_CLASS;
2126 return 1;
2127 case TFmode:
2128 classes[0] = X86_64_X87_CLASS;
2129 classes[1] = X86_64_X87UP_CLASS;
2130 return 2;
2131 case TCmode:
2132 classes[0] = X86_64_X87_CLASS;
2133 classes[1] = X86_64_X87UP_CLASS;
2134 classes[2] = X86_64_X87_CLASS;
2135 classes[3] = X86_64_X87UP_CLASS;
2136 return 4;
2137 case DCmode:
2138 classes[0] = X86_64_SSEDF_CLASS;
2139 classes[1] = X86_64_SSEDF_CLASS;
2140 return 2;
2141 case SCmode:
2142 classes[0] = X86_64_SSE_CLASS;
2143 return 1;
e95d6b23
JH
2144 case V4SFmode:
2145 case V4SImode:
495333a6
JH
2146 case V16QImode:
2147 case V8HImode:
2148 case V2DFmode:
2149 case V2DImode:
e95d6b23
JH
2150 classes[0] = X86_64_SSE_CLASS;
2151 classes[1] = X86_64_SSEUP_CLASS;
2152 return 2;
2153 case V2SFmode:
2154 case V2SImode:
2155 case V4HImode:
2156 case V8QImode:
1194ca05 2157 return 0;
53c17031 2158 case BLKmode:
e95d6b23 2159 case VOIDmode:
53c17031
JH
2160 return 0;
2161 default:
2162 abort ();
2163 }
2164}
2165
2166/* Examine the argument and return set number of register required in each
f5143c46 2167 class. Return 0 iff parameter should be passed in memory. */
53c17031
JH
2168static int
2169examine_argument (mode, type, in_return, int_nregs, sse_nregs)
2170 enum machine_mode mode;
2171 tree type;
2172 int *int_nregs, *sse_nregs;
2173 int in_return;
2174{
2175 enum x86_64_reg_class class[MAX_CLASSES];
2176 int n = classify_argument (mode, type, class, 0);
2177
2178 *int_nregs = 0;
2179 *sse_nregs = 0;
2180 if (!n)
2181 return 0;
2182 for (n--; n >= 0; n--)
2183 switch (class[n])
2184 {
2185 case X86_64_INTEGER_CLASS:
2186 case X86_64_INTEGERSI_CLASS:
2187 (*int_nregs)++;
2188 break;
2189 case X86_64_SSE_CLASS:
2190 case X86_64_SSESF_CLASS:
2191 case X86_64_SSEDF_CLASS:
2192 (*sse_nregs)++;
2193 break;
2194 case X86_64_NO_CLASS:
2195 case X86_64_SSEUP_CLASS:
2196 break;
2197 case X86_64_X87_CLASS:
2198 case X86_64_X87UP_CLASS:
2199 if (!in_return)
2200 return 0;
2201 break;
2202 case X86_64_MEMORY_CLASS:
2203 abort ();
2204 }
2205 return 1;
2206}
2207/* Construct container for the argument used by GCC interface. See
2208 FUNCTION_ARG for the detailed description. */
2209static rtx
2210construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
2211 enum machine_mode mode;
2212 tree type;
2213 int in_return;
2214 int nintregs, nsseregs;
07933f72
GS
2215 const int * intreg;
2216 int sse_regno;
53c17031
JH
2217{
2218 enum machine_mode tmpmode;
2219 int bytes =
2220 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2221 enum x86_64_reg_class class[MAX_CLASSES];
2222 int n;
2223 int i;
2224 int nexps = 0;
2225 int needed_sseregs, needed_intregs;
2226 rtx exp[MAX_CLASSES];
2227 rtx ret;
2228
2229 n = classify_argument (mode, type, class, 0);
2230 if (TARGET_DEBUG_ARG)
2231 {
2232 if (!n)
2233 fprintf (stderr, "Memory class\n");
2234 else
2235 {
2236 fprintf (stderr, "Classes:");
2237 for (i = 0; i < n; i++)
2238 {
2239 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2240 }
2241 fprintf (stderr, "\n");
2242 }
2243 }
2244 if (!n)
2245 return NULL;
2246 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2247 return NULL;
2248 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2249 return NULL;
2250
2251 /* First construct simple cases. Avoid SCmode, since we want to use
2252 single register to pass this type. */
2253 if (n == 1 && mode != SCmode)
2254 switch (class[0])
2255 {
2256 case X86_64_INTEGER_CLASS:
2257 case X86_64_INTEGERSI_CLASS:
2258 return gen_rtx_REG (mode, intreg[0]);
2259 case X86_64_SSE_CLASS:
2260 case X86_64_SSESF_CLASS:
2261 case X86_64_SSEDF_CLASS:
2262 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2263 case X86_64_X87_CLASS:
2264 return gen_rtx_REG (mode, FIRST_STACK_REG);
2265 case X86_64_NO_CLASS:
2266 /* Zero sized array, struct or class. */
2267 return NULL;
2268 default:
2269 abort ();
2270 }
2271 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
e95d6b23 2272 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
53c17031
JH
2273 if (n == 2
2274 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2275 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2276 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2277 && class[1] == X86_64_INTEGER_CLASS
2278 && (mode == CDImode || mode == TImode)
2279 && intreg[0] + 1 == intreg[1])
2280 return gen_rtx_REG (mode, intreg[0]);
2281 if (n == 4
2282 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2283 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2284 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2285
2286 /* Otherwise figure out the entries of the PARALLEL. */
2287 for (i = 0; i < n; i++)
2288 {
2289 switch (class[i])
2290 {
2291 case X86_64_NO_CLASS:
2292 break;
2293 case X86_64_INTEGER_CLASS:
2294 case X86_64_INTEGERSI_CLASS:
d1f87653 2295 /* Merge TImodes on aligned occasions here too. */
53c17031
JH
2296 if (i * 8 + 8 > bytes)
2297 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2298 else if (class[i] == X86_64_INTEGERSI_CLASS)
2299 tmpmode = SImode;
2300 else
2301 tmpmode = DImode;
2302 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2303 if (tmpmode == BLKmode)
2304 tmpmode = DImode;
2305 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2306 gen_rtx_REG (tmpmode, *intreg),
2307 GEN_INT (i*8));
2308 intreg++;
2309 break;
2310 case X86_64_SSESF_CLASS:
2311 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2312 gen_rtx_REG (SFmode,
2313 SSE_REGNO (sse_regno)),
2314 GEN_INT (i*8));
2315 sse_regno++;
2316 break;
2317 case X86_64_SSEDF_CLASS:
2318 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2319 gen_rtx_REG (DFmode,
2320 SSE_REGNO (sse_regno)),
2321 GEN_INT (i*8));
2322 sse_regno++;
2323 break;
2324 case X86_64_SSE_CLASS:
12f5c45e
JH
2325 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2326 tmpmode = TImode;
53c17031
JH
2327 else
2328 tmpmode = DImode;
2329 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2330 gen_rtx_REG (tmpmode,
2331 SSE_REGNO (sse_regno)),
2332 GEN_INT (i*8));
12f5c45e
JH
2333 if (tmpmode == TImode)
2334 i++;
53c17031
JH
2335 sse_regno++;
2336 break;
2337 default:
2338 abort ();
2339 }
2340 }
2341 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2342 for (i = 0; i < nexps; i++)
2343 XVECEXP (ret, 0, i) = exp [i];
2344 return ret;
2345}
2346
b08de47e
MM
2347/* Update the data in CUM to advance over an argument
2348 of mode MODE and data type TYPE.
2349 (TYPE is null for libcalls where that information may not be available.) */
2350
2351void
2352function_arg_advance (cum, mode, type, named)
2353 CUMULATIVE_ARGS *cum; /* current arg information */
2354 enum machine_mode mode; /* current arg mode */
2355 tree type; /* type of the argument or 0 if lib support */
2356 int named; /* whether or not the argument was named */
2357{
5ac9118e
KG
2358 int bytes =
2359 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
2360 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2361
2362 if (TARGET_DEBUG_ARG)
2363 fprintf (stderr,
e9a25f70 2364 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
b08de47e 2365 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
53c17031 2366 if (TARGET_64BIT)
b08de47e 2367 {
53c17031
JH
2368 int int_nregs, sse_nregs;
2369 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2370 cum->words += words;
2371 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
82a127a9 2372 {
53c17031
JH
2373 cum->nregs -= int_nregs;
2374 cum->sse_nregs -= sse_nregs;
2375 cum->regno += int_nregs;
2376 cum->sse_regno += sse_nregs;
82a127a9 2377 }
53c17031
JH
2378 else
2379 cum->words += words;
b08de47e 2380 }
a4f31c00 2381 else
82a127a9 2382 {
53c17031
JH
2383 if (TARGET_SSE && mode == TImode)
2384 {
2385 cum->sse_words += words;
2386 cum->sse_nregs -= 1;
2387 cum->sse_regno += 1;
2388 if (cum->sse_nregs <= 0)
2389 {
2390 cum->sse_nregs = 0;
2391 cum->sse_regno = 0;
2392 }
2393 }
2394 else
82a127a9 2395 {
53c17031
JH
2396 cum->words += words;
2397 cum->nregs -= words;
2398 cum->regno += words;
2399
2400 if (cum->nregs <= 0)
2401 {
2402 cum->nregs = 0;
2403 cum->regno = 0;
2404 }
82a127a9
CM
2405 }
2406 }
b08de47e
MM
2407 return;
2408}
2409
2410/* Define where to put the arguments to a function.
2411 Value is zero to push the argument on the stack,
2412 or a hard register in which to store the argument.
2413
2414 MODE is the argument's machine mode.
2415 TYPE is the data type of the argument (as a tree).
2416 This is null for libcalls where that information may
2417 not be available.
2418 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2419 the preceding args and about the function being called.
2420 NAMED is nonzero if this argument is a named parameter
2421 (otherwise it is an extra parameter matching an ellipsis). */
2422
07933f72 2423rtx
b08de47e
MM
2424function_arg (cum, mode, type, named)
2425 CUMULATIVE_ARGS *cum; /* current arg information */
2426 enum machine_mode mode; /* current arg mode */
2427 tree type; /* type of the argument or 0 if lib support */
2428 int named; /* != 0 for normal args, == 0 for ... args */
2429{
2430 rtx ret = NULL_RTX;
5ac9118e
KG
2431 int bytes =
2432 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
2433 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2434
5bdc5878 2435 /* Handle a hidden AL argument containing number of registers for varargs
53c17031
JH
2436 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2437 any AL settings. */
32ee7d1d 2438 if (mode == VOIDmode)
b08de47e 2439 {
53c17031
JH
2440 if (TARGET_64BIT)
2441 return GEN_INT (cum->maybe_vaarg
2442 ? (cum->sse_nregs < 0
2443 ? SSE_REGPARM_MAX
2444 : cum->sse_regno)
2445 : -1);
2446 else
2447 return constm1_rtx;
b08de47e 2448 }
53c17031
JH
2449 if (TARGET_64BIT)
2450 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2451 &x86_64_int_parameter_registers [cum->regno],
2452 cum->sse_regno);
2453 else
2454 switch (mode)
2455 {
2456 /* For now, pass fp/complex values on the stack. */
2457 default:
2458 break;
2459
2460 case BLKmode:
2461 case DImode:
2462 case SImode:
2463 case HImode:
2464 case QImode:
2465 if (words <= cum->nregs)
e91f04de
CH
2466 {
2467 int regno = cum->regno;
2468
2469 /* Fastcall allocates the first two DWORD (SImode) or
2470 smaller arguments to ECX and EDX. */
2471 if (cum->fastcall)
2472 {
2473 if (mode == BLKmode || mode == DImode)
2474 break;
2475
2476 /* ECX not EAX is the first allocated register. */
2477 if (regno == 0)
2478 regno = 2;
2479 }
2480 ret = gen_rtx_REG (mode, regno);
2481 }
53c17031
JH
2482 break;
2483 case TImode:
2484 if (cum->sse_nregs)
2485 ret = gen_rtx_REG (mode, cum->sse_regno);
2486 break;
2487 }
b08de47e
MM
2488
2489 if (TARGET_DEBUG_ARG)
2490 {
2491 fprintf (stderr,
91ea38f9 2492 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
b08de47e
MM
2493 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2494
2495 if (ret)
91ea38f9 2496 print_simple_rtl (stderr, ret);
b08de47e
MM
2497 else
2498 fprintf (stderr, ", stack");
2499
2500 fprintf (stderr, " )\n");
2501 }
2502
2503 return ret;
2504}
53c17031 2505
09b2e78d
ZD
2506/* A C expression that indicates when an argument must be passed by
2507 reference. If nonzero for an argument, a copy of that argument is
2508 made in memory and a pointer to the argument is passed instead of
2509 the argument itself. The pointer is passed in whatever way is
2510 appropriate for passing a pointer to that type. */
2511
2512int
2513function_arg_pass_by_reference (cum, mode, type, named)
2514 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
2515 enum machine_mode mode ATTRIBUTE_UNUSED;
2516 tree type;
2517 int named ATTRIBUTE_UNUSED;
2518{
2519 if (!TARGET_64BIT)
2520 return 0;
2521
2522 if (type && int_size_in_bytes (type) == -1)
2523 {
2524 if (TARGET_DEBUG_ARG)
2525 fprintf (stderr, "function_arg_pass_by_reference\n");
2526 return 1;
2527 }
2528
2529 return 0;
2530}
2531
53c17031
JH
2532/* Gives the alignment boundary, in bits, of an argument with the specified mode
2533 and type. */
2534
2535int
2536ix86_function_arg_boundary (mode, type)
2537 enum machine_mode mode;
2538 tree type;
2539{
2540 int align;
2541 if (!TARGET_64BIT)
2542 return PARM_BOUNDARY;
2543 if (type)
2544 align = TYPE_ALIGN (type);
2545 else
2546 align = GET_MODE_ALIGNMENT (mode);
2547 if (align < PARM_BOUNDARY)
2548 align = PARM_BOUNDARY;
2549 if (align > 128)
2550 align = 128;
2551 return align;
2552}
2553
2554/* Return true if N is a possible register number of function value. */
2555bool
2556ix86_function_value_regno_p (regno)
2557 int regno;
2558{
2559 if (!TARGET_64BIT)
2560 {
2561 return ((regno) == 0
2562 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2563 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2564 }
2565 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2566 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2567 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2568}
2569
2570/* Define how to find the value returned by a function.
2571 VALTYPE is the data type of the value (as a tree).
2572 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2573 otherwise, FUNC is 0. */
2574rtx
2575ix86_function_value (valtype)
2576 tree valtype;
2577{
2578 if (TARGET_64BIT)
2579 {
2580 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2581 REGPARM_MAX, SSE_REGPARM_MAX,
2582 x86_64_int_return_registers, 0);
d1f87653
KH
2583 /* For zero sized structures, construct_container return NULL, but we need
2584 to keep rest of compiler happy by returning meaningful value. */
53c17031
JH
2585 if (!ret)
2586 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2587 return ret;
2588 }
2589 else
b069de3b
SS
2590 return gen_rtx_REG (TYPE_MODE (valtype),
2591 ix86_value_regno (TYPE_MODE (valtype)));
53c17031
JH
2592}
2593
f5143c46 2594/* Return false iff type is returned in memory. */
53c17031
JH
2595int
2596ix86_return_in_memory (type)
2597 tree type;
2598{
2599 int needed_intregs, needed_sseregs;
2600 if (TARGET_64BIT)
2601 {
2602 return !examine_argument (TYPE_MODE (type), type, 1,
2603 &needed_intregs, &needed_sseregs);
2604 }
2605 else
2606 {
2607 if (TYPE_MODE (type) == BLKmode
2608 || (VECTOR_MODE_P (TYPE_MODE (type))
2609 && int_size_in_bytes (type) == 8)
2610 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2611 && TYPE_MODE (type) != TFmode
2612 && !VECTOR_MODE_P (TYPE_MODE (type))))
2613 return 1;
2614 return 0;
2615 }
2616}
2617
2618/* Define how to find the value returned by a library function
2619 assuming the value has mode MODE. */
2620rtx
2621ix86_libcall_value (mode)
2622 enum machine_mode mode;
2623{
2624 if (TARGET_64BIT)
2625 {
2626 switch (mode)
2627 {
2628 case SFmode:
2629 case SCmode:
2630 case DFmode:
2631 case DCmode:
2632 return gen_rtx_REG (mode, FIRST_SSE_REG);
2633 case TFmode:
2634 case TCmode:
2635 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2636 default:
2637 return gen_rtx_REG (mode, 0);
2638 }
2639 }
2640 else
b069de3b
SS
2641 return gen_rtx_REG (mode, ix86_value_regno (mode));
2642}
2643
2644/* Given a mode, return the register to use for a return value. */
2645
2646static int
2647ix86_value_regno (mode)
2648 enum machine_mode mode;
2649{
2650 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2651 return FIRST_FLOAT_REG;
2652 if (mode == TImode || VECTOR_MODE_P (mode))
2653 return FIRST_SSE_REG;
2654 return 0;
53c17031 2655}
ad919812
JH
2656\f
2657/* Create the va_list data type. */
53c17031 2658
ad919812
JH
2659tree
2660ix86_build_va_list ()
2661{
2662 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
53c17031 2663
ad919812
JH
2664 /* For i386 we use plain pointer to argument area. */
2665 if (!TARGET_64BIT)
2666 return build_pointer_type (char_type_node);
2667
f1e639b1 2668 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
ad919812
JH
2669 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2670
fce5a9f2 2671 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
ad919812 2672 unsigned_type_node);
fce5a9f2 2673 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
ad919812
JH
2674 unsigned_type_node);
2675 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2676 ptr_type_node);
2677 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2678 ptr_type_node);
2679
2680 DECL_FIELD_CONTEXT (f_gpr) = record;
2681 DECL_FIELD_CONTEXT (f_fpr) = record;
2682 DECL_FIELD_CONTEXT (f_ovf) = record;
2683 DECL_FIELD_CONTEXT (f_sav) = record;
2684
2685 TREE_CHAIN (record) = type_decl;
2686 TYPE_NAME (record) = type_decl;
2687 TYPE_FIELDS (record) = f_gpr;
2688 TREE_CHAIN (f_gpr) = f_fpr;
2689 TREE_CHAIN (f_fpr) = f_ovf;
2690 TREE_CHAIN (f_ovf) = f_sav;
2691
2692 layout_type (record);
2693
2694 /* The correct type is an array type of one element. */
2695 return build_array_type (record, build_index_type (size_zero_node));
2696}
2697
2698/* Perform any needed actions needed for a function that is receiving a
fce5a9f2 2699 variable number of arguments.
ad919812
JH
2700
2701 CUM is as above.
2702
2703 MODE and TYPE are the mode and type of the current parameter.
2704
2705 PRETEND_SIZE is a variable that should be set to the amount of stack
2706 that must be pushed by the prolog to pretend that our caller pushed
2707 it.
2708
2709 Normally, this macro will push all remaining incoming registers on the
2710 stack and set PRETEND_SIZE to the length of the registers pushed. */
2711
2712void
2713ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2714 CUMULATIVE_ARGS *cum;
2715 enum machine_mode mode;
2716 tree type;
2717 int *pretend_size ATTRIBUTE_UNUSED;
2718 int no_rtl;
2719
2720{
2721 CUMULATIVE_ARGS next_cum;
2722 rtx save_area = NULL_RTX, mem;
2723 rtx label;
2724 rtx label_ref;
2725 rtx tmp_reg;
2726 rtx nsse_reg;
2727 int set;
2728 tree fntype;
2729 int stdarg_p;
2730 int i;
2731
2732 if (!TARGET_64BIT)
2733 return;
2734
2735 /* Indicate to allocate space on the stack for varargs save area. */
2736 ix86_save_varrargs_registers = 1;
2737
2738 fntype = TREE_TYPE (current_function_decl);
2739 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2740 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2741 != void_type_node));
2742
2743 /* For varargs, we do not want to skip the dummy va_dcl argument.
2744 For stdargs, we do want to skip the last named argument. */
2745 next_cum = *cum;
2746 if (stdarg_p)
2747 function_arg_advance (&next_cum, mode, type, 1);
2748
2749 if (!no_rtl)
2750 save_area = frame_pointer_rtx;
2751
2752 set = get_varargs_alias_set ();
2753
2754 for (i = next_cum.regno; i < ix86_regparm; i++)
2755 {
2756 mem = gen_rtx_MEM (Pmode,
2757 plus_constant (save_area, i * UNITS_PER_WORD));
0692acba 2758 set_mem_alias_set (mem, set);
ad919812
JH
2759 emit_move_insn (mem, gen_rtx_REG (Pmode,
2760 x86_64_int_parameter_registers[i]));
2761 }
2762
2763 if (next_cum.sse_nregs)
2764 {
2765 /* Now emit code to save SSE registers. The AX parameter contains number
d1f87653 2766 of SSE parameter registers used to call this function. We use
ad919812
JH
2767 sse_prologue_save insn template that produces computed jump across
2768 SSE saves. We need some preparation work to get this working. */
2769
2770 label = gen_label_rtx ();
2771 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2772
2773 /* Compute address to jump to :
2774 label - 5*eax + nnamed_sse_arguments*5 */
2775 tmp_reg = gen_reg_rtx (Pmode);
2776 nsse_reg = gen_reg_rtx (Pmode);
2777 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2778 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
44cf5b6a 2779 gen_rtx_MULT (Pmode, nsse_reg,
ad919812
JH
2780 GEN_INT (4))));
2781 if (next_cum.sse_regno)
2782 emit_move_insn
2783 (nsse_reg,
2784 gen_rtx_CONST (DImode,
2785 gen_rtx_PLUS (DImode,
2786 label_ref,
2787 GEN_INT (next_cum.sse_regno * 4))));
2788 else
2789 emit_move_insn (nsse_reg, label_ref);
2790 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2791
2792 /* Compute address of memory block we save into. We always use pointer
2793 pointing 127 bytes after first byte to store - this is needed to keep
2794 instruction size limited by 4 bytes. */
2795 tmp_reg = gen_reg_rtx (Pmode);
8ac61af7
RK
2796 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2797 plus_constant (save_area,
2798 8 * REGPARM_MAX + 127)));
ad919812 2799 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
14f73b5a 2800 set_mem_alias_set (mem, set);
8ac61af7 2801 set_mem_align (mem, BITS_PER_WORD);
ad919812
JH
2802
2803 /* And finally do the dirty job! */
8ac61af7
RK
2804 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2805 GEN_INT (next_cum.sse_regno), label));
ad919812
JH
2806 }
2807
2808}
2809
2810/* Implement va_start. */
2811
2812void
e5faf155 2813ix86_va_start (valist, nextarg)
ad919812
JH
2814 tree valist;
2815 rtx nextarg;
2816{
2817 HOST_WIDE_INT words, n_gpr, n_fpr;
2818 tree f_gpr, f_fpr, f_ovf, f_sav;
2819 tree gpr, fpr, ovf, sav, t;
2820
2821 /* Only 64bit target needs something special. */
2822 if (!TARGET_64BIT)
2823 {
e5faf155 2824 std_expand_builtin_va_start (valist, nextarg);
ad919812
JH
2825 return;
2826 }
2827
2828 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2829 f_fpr = TREE_CHAIN (f_gpr);
2830 f_ovf = TREE_CHAIN (f_fpr);
2831 f_sav = TREE_CHAIN (f_ovf);
2832
2833 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2834 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2835 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2836 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2837 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2838
2839 /* Count number of gp and fp argument registers used. */
2840 words = current_function_args_info.words;
2841 n_gpr = current_function_args_info.regno;
2842 n_fpr = current_function_args_info.sse_regno;
2843
2844 if (TARGET_DEBUG_ARG)
2845 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
b531087a 2846 (int) words, (int) n_gpr, (int) n_fpr);
ad919812
JH
2847
2848 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2849 build_int_2 (n_gpr * 8, 0));
2850 TREE_SIDE_EFFECTS (t) = 1;
2851 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2852
2853 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2854 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2855 TREE_SIDE_EFFECTS (t) = 1;
2856 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2857
2858 /* Find the overflow area. */
2859 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2860 if (words != 0)
2861 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2862 build_int_2 (words * UNITS_PER_WORD, 0));
2863 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2864 TREE_SIDE_EFFECTS (t) = 1;
2865 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2866
2867 /* Find the register save area.
2868 Prologue of the function save it right above stack frame. */
2869 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2870 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2871 TREE_SIDE_EFFECTS (t) = 1;
2872 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2873}
2874
2875/* Implement va_arg. */
2876rtx
2877ix86_va_arg (valist, type)
2878 tree valist, type;
2879{
0139adca 2880 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
ad919812
JH
2881 tree f_gpr, f_fpr, f_ovf, f_sav;
2882 tree gpr, fpr, ovf, sav, t;
b932f770 2883 int size, rsize;
ad919812
JH
2884 rtx lab_false, lab_over = NULL_RTX;
2885 rtx addr_rtx, r;
2886 rtx container;
09b2e78d 2887 int indirect_p = 0;
ad919812
JH
2888
2889 /* Only 64bit target needs something special. */
2890 if (!TARGET_64BIT)
2891 {
2892 return std_expand_builtin_va_arg (valist, type);
2893 }
2894
2895 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2896 f_fpr = TREE_CHAIN (f_gpr);
2897 f_ovf = TREE_CHAIN (f_fpr);
2898 f_sav = TREE_CHAIN (f_ovf);
2899
2900 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2901 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2902 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2903 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2904 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2905
2906 size = int_size_in_bytes (type);
09b2e78d
ZD
2907 if (size == -1)
2908 {
2909 /* Passed by reference. */
2910 indirect_p = 1;
2911 type = build_pointer_type (type);
2912 size = int_size_in_bytes (type);
2913 }
ad919812
JH
2914 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2915
2916 container = construct_container (TYPE_MODE (type), type, 0,
2917 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2918 /*
2919 * Pull the value out of the saved registers ...
2920 */
2921
2922 addr_rtx = gen_reg_rtx (Pmode);
2923
2924 if (container)
2925 {
2926 rtx int_addr_rtx, sse_addr_rtx;
2927 int needed_intregs, needed_sseregs;
2928 int need_temp;
2929
2930 lab_over = gen_label_rtx ();
2931 lab_false = gen_label_rtx ();
8bad7136 2932
ad919812
JH
2933 examine_argument (TYPE_MODE (type), type, 0,
2934 &needed_intregs, &needed_sseregs);
2935
2936
2937 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2938 || TYPE_ALIGN (type) > 128);
2939
d1f87653 2940 /* In case we are passing structure, verify that it is consecutive block
ad919812
JH
2941 on the register save area. If not we need to do moves. */
2942 if (!need_temp && !REG_P (container))
2943 {
d1f87653 2944 /* Verify that all registers are strictly consecutive */
ad919812
JH
2945 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2946 {
2947 int i;
2948
2949 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2950 {
2951 rtx slot = XVECEXP (container, 0, i);
b531087a 2952 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
ad919812
JH
2953 || INTVAL (XEXP (slot, 1)) != i * 16)
2954 need_temp = 1;
2955 }
2956 }
2957 else
2958 {
2959 int i;
2960
2961 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2962 {
2963 rtx slot = XVECEXP (container, 0, i);
b531087a 2964 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
ad919812
JH
2965 || INTVAL (XEXP (slot, 1)) != i * 8)
2966 need_temp = 1;
2967 }
2968 }
2969 }
2970 if (!need_temp)
2971 {
2972 int_addr_rtx = addr_rtx;
2973 sse_addr_rtx = addr_rtx;
2974 }
2975 else
2976 {
2977 int_addr_rtx = gen_reg_rtx (Pmode);
2978 sse_addr_rtx = gen_reg_rtx (Pmode);
2979 }
2980 /* First ensure that we fit completely in registers. */
2981 if (needed_intregs)
2982 {
2983 emit_cmp_and_jump_insns (expand_expr
2984 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2985 GEN_INT ((REGPARM_MAX - needed_intregs +
2986 1) * 8), GE, const1_rtx, SImode,
d43e0b7d 2987 1, lab_false);
ad919812
JH
2988 }
2989 if (needed_sseregs)
2990 {
2991 emit_cmp_and_jump_insns (expand_expr
2992 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2993 GEN_INT ((SSE_REGPARM_MAX -
2994 needed_sseregs + 1) * 16 +
2995 REGPARM_MAX * 8), GE, const1_rtx,
d43e0b7d 2996 SImode, 1, lab_false);
ad919812
JH
2997 }
2998
2999 /* Compute index to start of area used for integer regs. */
3000 if (needed_intregs)
3001 {
3002 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3003 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3004 if (r != int_addr_rtx)
3005 emit_move_insn (int_addr_rtx, r);
3006 }
3007 if (needed_sseregs)
3008 {
3009 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3010 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3011 if (r != sse_addr_rtx)
3012 emit_move_insn (sse_addr_rtx, r);
3013 }
3014 if (need_temp)
3015 {
3016 int i;
3017 rtx mem;
3018
b932f770
JH
3019 /* Never use the memory itself, as it has the alias set. */
3020 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
3021 mem = gen_rtx_MEM (BLKmode, addr_rtx);
0692acba 3022 set_mem_alias_set (mem, get_varargs_alias_set ());
8ac61af7 3023 set_mem_align (mem, BITS_PER_UNIT);
b932f770 3024
ad919812
JH
3025 for (i = 0; i < XVECLEN (container, 0); i++)
3026 {
3027 rtx slot = XVECEXP (container, 0, i);
3028 rtx reg = XEXP (slot, 0);
3029 enum machine_mode mode = GET_MODE (reg);
3030 rtx src_addr;
3031 rtx src_mem;
3032 int src_offset;
3033 rtx dest_mem;
3034
3035 if (SSE_REGNO_P (REGNO (reg)))
3036 {
3037 src_addr = sse_addr_rtx;
3038 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3039 }
3040 else
3041 {
3042 src_addr = int_addr_rtx;
3043 src_offset = REGNO (reg) * 8;
3044 }
3045 src_mem = gen_rtx_MEM (mode, src_addr);
0692acba 3046 set_mem_alias_set (src_mem, get_varargs_alias_set ());
ad919812
JH
3047 src_mem = adjust_address (src_mem, mode, src_offset);
3048 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
ad919812
JH
3049 emit_move_insn (dest_mem, src_mem);
3050 }
3051 }
3052
3053 if (needed_intregs)
3054 {
3055 t =
3056 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3057 build_int_2 (needed_intregs * 8, 0));
3058 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3059 TREE_SIDE_EFFECTS (t) = 1;
3060 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3061 }
3062 if (needed_sseregs)
3063 {
3064 t =
3065 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3066 build_int_2 (needed_sseregs * 16, 0));
3067 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3068 TREE_SIDE_EFFECTS (t) = 1;
3069 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3070 }
3071
3072 emit_jump_insn (gen_jump (lab_over));
3073 emit_barrier ();
3074 emit_label (lab_false);
3075 }
3076
3077 /* ... otherwise out of the overflow area. */
3078
3079 /* Care for on-stack alignment if needed. */
3080 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3081 t = ovf;
3082 else
3083 {
3084 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3085 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3086 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3087 }
3088 t = save_expr (t);
3089
3090 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3091 if (r != addr_rtx)
3092 emit_move_insn (addr_rtx, r);
3093
3094 t =
3095 build (PLUS_EXPR, TREE_TYPE (t), t,
3096 build_int_2 (rsize * UNITS_PER_WORD, 0));
3097 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3098 TREE_SIDE_EFFECTS (t) = 1;
3099 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3100
3101 if (container)
3102 emit_label (lab_over);
3103
09b2e78d
ZD
3104 if (indirect_p)
3105 {
3106 r = gen_rtx_MEM (Pmode, addr_rtx);
3107 set_mem_alias_set (r, get_varargs_alias_set ());
3108 emit_move_insn (addr_rtx, r);
3109 }
3110
ad919812
JH
3111 return addr_rtx;
3112}
3113\f
c3c637e3
GS
3114/* Return nonzero if OP is either a i387 or SSE fp register. */
3115int
3116any_fp_register_operand (op, mode)
3117 rtx op;
3118 enum machine_mode mode ATTRIBUTE_UNUSED;
3119{
3120 return ANY_FP_REG_P (op);
3121}
3122
3123/* Return nonzero if OP is an i387 fp register. */
3124int
3125fp_register_operand (op, mode)
3126 rtx op;
3127 enum machine_mode mode ATTRIBUTE_UNUSED;
3128{
3129 return FP_REG_P (op);
3130}
3131
3132/* Return nonzero if OP is a non-fp register_operand. */
3133int
3134register_and_not_any_fp_reg_operand (op, mode)
3135 rtx op;
3136 enum machine_mode mode;
3137{
3138 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3139}
3140
40b982a9 3141/* Return nonzero if OP is a register operand other than an
c3c637e3
GS
3142 i387 fp register. */
3143int
3144register_and_not_fp_reg_operand (op, mode)
3145 rtx op;
3146 enum machine_mode mode;
3147{
3148 return register_operand (op, mode) && !FP_REG_P (op);
3149}
3150
7dd4b4a3
JH
3151/* Return nonzero if OP is general operand representable on x86_64. */
3152
3153int
3154x86_64_general_operand (op, mode)
3155 rtx op;
3156 enum machine_mode mode;
3157{
3158 if (!TARGET_64BIT)
3159 return general_operand (op, mode);
3160 if (nonimmediate_operand (op, mode))
3161 return 1;
c05dbe81 3162 return x86_64_sign_extended_value (op);
7dd4b4a3
JH
3163}
3164
3165/* Return nonzero if OP is general operand representable on x86_64
d6a7951f 3166 as either sign extended or zero extended constant. */
7dd4b4a3
JH
3167
3168int
3169x86_64_szext_general_operand (op, mode)
3170 rtx op;
3171 enum machine_mode mode;
3172{
3173 if (!TARGET_64BIT)
3174 return general_operand (op, mode);
3175 if (nonimmediate_operand (op, mode))
3176 return 1;
c05dbe81 3177 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
7dd4b4a3
JH
3178}
3179
3180/* Return nonzero if OP is nonmemory operand representable on x86_64. */
3181
3182int
3183x86_64_nonmemory_operand (op, mode)
3184 rtx op;
3185 enum machine_mode mode;
3186{
3187 if (!TARGET_64BIT)
3188 return nonmemory_operand (op, mode);
3189 if (register_operand (op, mode))
3190 return 1;
c05dbe81 3191 return x86_64_sign_extended_value (op);
7dd4b4a3
JH
3192}
3193
3194/* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3195
3196int
3197x86_64_movabs_operand (op, mode)
3198 rtx op;
3199 enum machine_mode mode;
3200{
3201 if (!TARGET_64BIT || !flag_pic)
3202 return nonmemory_operand (op, mode);
c05dbe81 3203 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
7dd4b4a3
JH
3204 return 1;
3205 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3206 return 1;
3207 return 0;
3208}
3209
3210/* Return nonzero if OP is nonmemory operand representable on x86_64. */
3211
3212int
3213x86_64_szext_nonmemory_operand (op, mode)
3214 rtx op;
3215 enum machine_mode mode;
3216{
3217 if (!TARGET_64BIT)
3218 return nonmemory_operand (op, mode);
3219 if (register_operand (op, mode))
3220 return 1;
c05dbe81 3221 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
7dd4b4a3
JH
3222}
3223
3224/* Return nonzero if OP is immediate operand representable on x86_64. */
3225
3226int
3227x86_64_immediate_operand (op, mode)
3228 rtx op;
3229 enum machine_mode mode;
3230{
3231 if (!TARGET_64BIT)
3232 return immediate_operand (op, mode);
c05dbe81 3233 return x86_64_sign_extended_value (op);
7dd4b4a3
JH
3234}
3235
3236/* Return nonzero if OP is immediate operand representable on x86_64. */
3237
3238int
3239x86_64_zext_immediate_operand (op, mode)
3240 rtx op;
3241 enum machine_mode mode ATTRIBUTE_UNUSED;
3242{
3243 return x86_64_zero_extended_value (op);
3244}
3245
8bad7136
JL
3246/* Return nonzero if OP is (const_int 1), else return zero. */
3247
3248int
3249const_int_1_operand (op, mode)
3250 rtx op;
3251 enum machine_mode mode ATTRIBUTE_UNUSED;
3252{
3253 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
3254}
3255
794a292d
JJ
3256/* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3257 for shift & compare patterns, as shifting by 0 does not change flags),
3258 else return zero. */
3259
3260int
3261const_int_1_31_operand (op, mode)
3262 rtx op;
3263 enum machine_mode mode ATTRIBUTE_UNUSED;
3264{
3265 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3266}
3267
e075ae69
RH
3268/* Returns 1 if OP is either a symbol reference or a sum of a symbol
3269 reference and a constant. */
b08de47e
MM
3270
3271int
e075ae69
RH
3272symbolic_operand (op, mode)
3273 register rtx op;
3274 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 3275{
e075ae69 3276 switch (GET_CODE (op))
2a2ab3f9 3277 {
e075ae69
RH
3278 case SYMBOL_REF:
3279 case LABEL_REF:
3280 return 1;
3281
3282 case CONST:
3283 op = XEXP (op, 0);
3284 if (GET_CODE (op) == SYMBOL_REF
3285 || GET_CODE (op) == LABEL_REF
3286 || (GET_CODE (op) == UNSPEC
8ee41eaf
RH
3287 && (XINT (op, 1) == UNSPEC_GOT
3288 || XINT (op, 1) == UNSPEC_GOTOFF
3289 || XINT (op, 1) == UNSPEC_GOTPCREL)))
e075ae69
RH
3290 return 1;
3291 if (GET_CODE (op) != PLUS
3292 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3293 return 0;
3294
3295 op = XEXP (op, 0);
3296 if (GET_CODE (op) == SYMBOL_REF
3297 || GET_CODE (op) == LABEL_REF)
3298 return 1;
3299 /* Only @GOTOFF gets offsets. */
3300 if (GET_CODE (op) != UNSPEC
8ee41eaf 3301 || XINT (op, 1) != UNSPEC_GOTOFF)
e075ae69
RH
3302 return 0;
3303
3304 op = XVECEXP (op, 0, 0);
3305 if (GET_CODE (op) == SYMBOL_REF
3306 || GET_CODE (op) == LABEL_REF)
3307 return 1;
3308 return 0;
3309
3310 default:
3311 return 0;
2a2ab3f9
JVA
3312 }
3313}
2a2ab3f9 3314
e075ae69 3315/* Return true if the operand contains a @GOT or @GOTOFF reference. */
3b3c6a3f 3316
e075ae69
RH
3317int
3318pic_symbolic_operand (op, mode)
3319 register rtx op;
3320 enum machine_mode mode ATTRIBUTE_UNUSED;
3321{
6eb791fc
JH
3322 if (GET_CODE (op) != CONST)
3323 return 0;
3324 op = XEXP (op, 0);
3325 if (TARGET_64BIT)
3326 {
3327 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
3328 return 1;
3329 }
fce5a9f2 3330 else
2a2ab3f9 3331 {
e075ae69
RH
3332 if (GET_CODE (op) == UNSPEC)
3333 return 1;
3334 if (GET_CODE (op) != PLUS
3335 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3336 return 0;
3337 op = XEXP (op, 0);
3338 if (GET_CODE (op) == UNSPEC)
3339 return 1;
2a2ab3f9 3340 }
e075ae69 3341 return 0;
2a2ab3f9 3342}
2a2ab3f9 3343
623fe810
RH
3344/* Return true if OP is a symbolic operand that resolves locally. */
3345
3346static int
3347local_symbolic_operand (op, mode)
3348 rtx op;
3349 enum machine_mode mode ATTRIBUTE_UNUSED;
3350{
623fe810
RH
3351 if (GET_CODE (op) == CONST
3352 && GET_CODE (XEXP (op, 0)) == PLUS
c05dbe81 3353 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
623fe810
RH
3354 op = XEXP (XEXP (op, 0), 0);
3355
8bfb45f8
JJ
3356 if (GET_CODE (op) == LABEL_REF)
3357 return 1;
3358
623fe810
RH
3359 if (GET_CODE (op) != SYMBOL_REF)
3360 return 0;
3361
3362 /* These we've been told are local by varasm and encode_section_info
3363 respectively. */
3364 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
3365 return 1;
3366
3367 /* There is, however, a not insubstantial body of code in the rest of
fce5a9f2 3368 the compiler that assumes it can just stick the results of
623fe810
RH
3369 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3370 /* ??? This is a hack. Should update the body of the compiler to
fb49053f 3371 always create a DECL an invoke targetm.encode_section_info. */
623fe810
RH
3372 if (strncmp (XSTR (op, 0), internal_label_prefix,
3373 internal_label_prefix_len) == 0)
3374 return 1;
3375
3376 return 0;
3377}
3378
f996902d
RH
3379/* Test for various thread-local symbols. See ix86_encode_section_info. */
3380
3381int
3382tls_symbolic_operand (op, mode)
3383 register rtx op;
3384 enum machine_mode mode ATTRIBUTE_UNUSED;
3385{
3386 const char *symbol_str;
3387
3388 if (GET_CODE (op) != SYMBOL_REF)
3389 return 0;
3390 symbol_str = XSTR (op, 0);
3391
3392 if (symbol_str[0] != '%')
3393 return 0;
755ac5d4 3394 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
f996902d
RH
3395}
3396
3397static int
3398tls_symbolic_operand_1 (op, kind)
3399 rtx op;
3400 enum tls_model kind;
3401{
3402 const char *symbol_str;
3403
3404 if (GET_CODE (op) != SYMBOL_REF)
3405 return 0;
3406 symbol_str = XSTR (op, 0);
3407
3408 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3409}
3410
3411int
3412global_dynamic_symbolic_operand (op, mode)
3413 register rtx op;
3414 enum machine_mode mode ATTRIBUTE_UNUSED;
3415{
3416 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3417}
3418
3419int
3420local_dynamic_symbolic_operand (op, mode)
3421 register rtx op;
3422 enum machine_mode mode ATTRIBUTE_UNUSED;
3423{
3424 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3425}
3426
3427int
3428initial_exec_symbolic_operand (op, mode)
3429 register rtx op;
3430 enum machine_mode mode ATTRIBUTE_UNUSED;
3431{
3432 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3433}
3434
3435int
3436local_exec_symbolic_operand (op, mode)
3437 register rtx op;
3438 enum machine_mode mode ATTRIBUTE_UNUSED;
3439{
3440 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3441}
3442
28d52ffb
RH
3443/* Test for a valid operand for a call instruction. Don't allow the
3444 arg pointer register or virtual regs since they may decay into
3445 reg + const, which the patterns can't handle. */
2a2ab3f9 3446
e075ae69
RH
3447int
3448call_insn_operand (op, mode)
3449 rtx op;
3450 enum machine_mode mode ATTRIBUTE_UNUSED;
3451{
e075ae69
RH
3452 /* Disallow indirect through a virtual register. This leads to
3453 compiler aborts when trying to eliminate them. */
3454 if (GET_CODE (op) == REG
3455 && (op == arg_pointer_rtx
564d80f4 3456 || op == frame_pointer_rtx
e075ae69
RH
3457 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3458 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3459 return 0;
2a2ab3f9 3460
28d52ffb
RH
3461 /* Disallow `call 1234'. Due to varying assembler lameness this
3462 gets either rejected or translated to `call .+1234'. */
3463 if (GET_CODE (op) == CONST_INT)
3464 return 0;
3465
cbbf65e0
RH
3466 /* Explicitly allow SYMBOL_REF even if pic. */
3467 if (GET_CODE (op) == SYMBOL_REF)
e075ae69 3468 return 1;
2a2ab3f9 3469
cbbf65e0
RH
3470 /* Otherwise we can allow any general_operand in the address. */
3471 return general_operand (op, Pmode);
e075ae69 3472}
79325812 3473
4977bab6
ZW
3474/* Test for a valid operand for a call instruction. Don't allow the
3475 arg pointer register or virtual regs since they may decay into
3476 reg + const, which the patterns can't handle. */
3477
3478int
3479sibcall_insn_operand (op, mode)
3480 rtx op;
3481 enum machine_mode mode ATTRIBUTE_UNUSED;
3482{
3483 /* Disallow indirect through a virtual register. This leads to
3484 compiler aborts when trying to eliminate them. */
3485 if (GET_CODE (op) == REG
3486 && (op == arg_pointer_rtx
3487 || op == frame_pointer_rtx
3488 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3489 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3490 return 0;
3491
3492 /* Explicitly allow SYMBOL_REF even if pic. */
3493 if (GET_CODE (op) == SYMBOL_REF)
3494 return 1;
3495
3496 /* Otherwise we can only allow register operands. */
3497 return register_operand (op, Pmode);
3498}
3499
e075ae69
RH
3500int
3501constant_call_address_operand (op, mode)
3502 rtx op;
3503 enum machine_mode mode ATTRIBUTE_UNUSED;
3504{
eaf19aba
JJ
3505 if (GET_CODE (op) == CONST
3506 && GET_CODE (XEXP (op, 0)) == PLUS
3507 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3508 op = XEXP (XEXP (op, 0), 0);
e1ff012c 3509 return GET_CODE (op) == SYMBOL_REF;
e075ae69 3510}
2a2ab3f9 3511
e075ae69 3512/* Match exactly zero and one. */
e9a25f70 3513
0f290768 3514int
e075ae69
RH
3515const0_operand (op, mode)
3516 register rtx op;
3517 enum machine_mode mode;
3518{
3519 return op == CONST0_RTX (mode);
3520}
e9a25f70 3521
0f290768 3522int
e075ae69
RH
3523const1_operand (op, mode)
3524 register rtx op;
3525 enum machine_mode mode ATTRIBUTE_UNUSED;
3526{
3527 return op == const1_rtx;
3528}
2a2ab3f9 3529
e075ae69 3530/* Match 2, 4, or 8. Used for leal multiplicands. */
e9a25f70 3531
e075ae69
RH
3532int
3533const248_operand (op, mode)
3534 register rtx op;
3535 enum machine_mode mode ATTRIBUTE_UNUSED;
3536{
3537 return (GET_CODE (op) == CONST_INT
3538 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3539}
e9a25f70 3540
d1f87653 3541/* True if this is a constant appropriate for an increment or decrement. */
81fd0956 3542
e075ae69
RH
3543int
3544incdec_operand (op, mode)
3545 register rtx op;
0631e0bf 3546 enum machine_mode mode ATTRIBUTE_UNUSED;
e075ae69 3547{
f5143c46 3548 /* On Pentium4, the inc and dec operations causes extra dependency on flag
b4e89e2d
JH
3549 registers, since carry flag is not set. */
3550 if (TARGET_PENTIUM4 && !optimize_size)
3551 return 0;
2b1c08f5 3552 return op == const1_rtx || op == constm1_rtx;
e075ae69 3553}
2a2ab3f9 3554
371bc54b
JH
3555/* Return nonzero if OP is acceptable as operand of DImode shift
3556 expander. */
3557
3558int
3559shiftdi_operand (op, mode)
3560 rtx op;
3561 enum machine_mode mode ATTRIBUTE_UNUSED;
3562{
3563 if (TARGET_64BIT)
3564 return nonimmediate_operand (op, mode);
3565 else
3566 return register_operand (op, mode);
3567}
3568
0f290768 3569/* Return false if this is the stack pointer, or any other fake
e075ae69
RH
3570 register eliminable to the stack pointer. Otherwise, this is
3571 a register operand.
2a2ab3f9 3572
e075ae69
RH
3573 This is used to prevent esp from being used as an index reg.
3574 Which would only happen in pathological cases. */
5f1ec3e6 3575
e075ae69
RH
3576int
3577reg_no_sp_operand (op, mode)
3578 register rtx op;
3579 enum machine_mode mode;
3580{
3581 rtx t = op;
3582 if (GET_CODE (t) == SUBREG)
3583 t = SUBREG_REG (t);
564d80f4 3584 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
e075ae69 3585 return 0;
2a2ab3f9 3586
e075ae69 3587 return register_operand (op, mode);
2a2ab3f9 3588}
b840bfb0 3589
915119a5
BS
3590int
3591mmx_reg_operand (op, mode)
3592 register rtx op;
bd793c65 3593 enum machine_mode mode ATTRIBUTE_UNUSED;
915119a5
BS
3594{
3595 return MMX_REG_P (op);
3596}
3597
2c5a510c
RH
3598/* Return false if this is any eliminable register. Otherwise
3599 general_operand. */
3600
3601int
3602general_no_elim_operand (op, mode)
3603 register rtx op;
3604 enum machine_mode mode;
3605{
3606 rtx t = op;
3607 if (GET_CODE (t) == SUBREG)
3608 t = SUBREG_REG (t);
3609 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3610 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3611 || t == virtual_stack_dynamic_rtx)
3612 return 0;
1020a5ab
RH
3613 if (REG_P (t)
3614 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3615 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3616 return 0;
2c5a510c
RH
3617
3618 return general_operand (op, mode);
3619}
3620
3621/* Return false if this is any eliminable register. Otherwise
3622 register_operand or const_int. */
3623
3624int
3625nonmemory_no_elim_operand (op, mode)
3626 register rtx op;
3627 enum machine_mode mode;
3628{
3629 rtx t = op;
3630 if (GET_CODE (t) == SUBREG)
3631 t = SUBREG_REG (t);
3632 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3633 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3634 || t == virtual_stack_dynamic_rtx)
3635 return 0;
3636
3637 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3638}
3639
7ec70495
JH
3640/* Return false if this is any eliminable register or stack register,
3641 otherwise work like register_operand. */
3642
3643int
3644index_register_operand (op, mode)
3645 register rtx op;
3646 enum machine_mode mode;
3647{
3648 rtx t = op;
3649 if (GET_CODE (t) == SUBREG)
3650 t = SUBREG_REG (t);
3651 if (!REG_P (t))
3652 return 0;
3653 if (t == arg_pointer_rtx
3654 || t == frame_pointer_rtx
3655 || t == virtual_incoming_args_rtx
3656 || t == virtual_stack_vars_rtx
3657 || t == virtual_stack_dynamic_rtx
3658 || REGNO (t) == STACK_POINTER_REGNUM)
3659 return 0;
3660
3661 return general_operand (op, mode);
3662}
3663
e075ae69 3664/* Return true if op is a Q_REGS class register. */
b840bfb0 3665
e075ae69
RH
3666int
3667q_regs_operand (op, mode)
3668 register rtx op;
3669 enum machine_mode mode;
b840bfb0 3670{
e075ae69
RH
3671 if (mode != VOIDmode && GET_MODE (op) != mode)
3672 return 0;
3673 if (GET_CODE (op) == SUBREG)
3674 op = SUBREG_REG (op);
7799175f 3675 return ANY_QI_REG_P (op);
0f290768 3676}
b840bfb0 3677
4977bab6
ZW
3678/* Return true if op is an flags register. */
3679
3680int
3681flags_reg_operand (op, mode)
3682 register rtx op;
3683 enum machine_mode mode;
3684{
3685 if (mode != VOIDmode && GET_MODE (op) != mode)
3686 return 0;
3687 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3688}
3689
e075ae69 3690/* Return true if op is a NON_Q_REGS class register. */
b840bfb0 3691
e075ae69
RH
3692int
3693non_q_regs_operand (op, mode)
3694 register rtx op;
3695 enum machine_mode mode;
3696{
3697 if (mode != VOIDmode && GET_MODE (op) != mode)
3698 return 0;
3699 if (GET_CODE (op) == SUBREG)
3700 op = SUBREG_REG (op);
3701 return NON_QI_REG_P (op);
0f290768 3702}
b840bfb0 3703
4977bab6
ZW
3704int
3705zero_extended_scalar_load_operand (op, mode)
3706 rtx op;
3707 enum machine_mode mode ATTRIBUTE_UNUSED;
3708{
3709 unsigned n_elts;
3710 if (GET_CODE (op) != MEM)
3711 return 0;
3712 op = maybe_get_pool_constant (op);
3713 if (!op)
3714 return 0;
3715 if (GET_CODE (op) != CONST_VECTOR)
3716 return 0;
3717 n_elts =
3718 (GET_MODE_SIZE (GET_MODE (op)) /
3719 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3720 for (n_elts--; n_elts > 0; n_elts--)
3721 {
3722 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3723 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3724 return 0;
3725 }
3726 return 1;
3727}
3728
915119a5
BS
3729/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3730 insns. */
3731int
3732sse_comparison_operator (op, mode)
3733 rtx op;
3734 enum machine_mode mode ATTRIBUTE_UNUSED;
3735{
3736 enum rtx_code code = GET_CODE (op);
a46d1d38
JH
3737 switch (code)
3738 {
3739 /* Operations supported directly. */
3740 case EQ:
3741 case LT:
3742 case LE:
3743 case UNORDERED:
3744 case NE:
3745 case UNGE:
3746 case UNGT:
3747 case ORDERED:
3748 return 1;
3749 /* These are equivalent to ones above in non-IEEE comparisons. */
3750 case UNEQ:
3751 case UNLT:
3752 case UNLE:
3753 case LTGT:
3754 case GE:
3755 case GT:
3756 return !TARGET_IEEE_FP;
3757 default:
3758 return 0;
3759 }
915119a5 3760}
9076b9c1 3761/* Return 1 if OP is a valid comparison operator in valid mode. */
e075ae69 3762int
9076b9c1
JH
3763ix86_comparison_operator (op, mode)
3764 register rtx op;
3765 enum machine_mode mode;
e075ae69 3766{
9076b9c1 3767 enum machine_mode inmode;
9a915772 3768 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
3769 if (mode != VOIDmode && GET_MODE (op) != mode)
3770 return 0;
9a915772
JH
3771 if (GET_RTX_CLASS (code) != '<')
3772 return 0;
3773 inmode = GET_MODE (XEXP (op, 0));
3774
3775 if (inmode == CCFPmode || inmode == CCFPUmode)
3776 {
3777 enum rtx_code second_code, bypass_code;
3778 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3779 return (bypass_code == NIL && second_code == NIL);
3780 }
3781 switch (code)
3a3677ff
RH
3782 {
3783 case EQ: case NE:
3a3677ff 3784 return 1;
9076b9c1 3785 case LT: case GE:
7e08e190 3786 if (inmode == CCmode || inmode == CCGCmode
9076b9c1
JH
3787 || inmode == CCGOCmode || inmode == CCNOmode)
3788 return 1;
3789 return 0;
7e08e190 3790 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
7e08e190 3791 if (inmode == CCmode)
9076b9c1
JH
3792 return 1;
3793 return 0;
3794 case GT: case LE:
7e08e190 3795 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
9076b9c1
JH
3796 return 1;
3797 return 0;
3a3677ff
RH
3798 default:
3799 return 0;
3800 }
3801}
3802
e6e81735
JH
3803/* Return 1 if OP is a valid comparison operator testing carry flag
3804 to be set. */
3805int
3806ix86_carry_flag_operator (op, mode)
3807 register rtx op;
3808 enum machine_mode mode;
3809{
3810 enum machine_mode inmode;
3811 enum rtx_code code = GET_CODE (op);
3812
3813 if (mode != VOIDmode && GET_MODE (op) != mode)
3814 return 0;
3815 if (GET_RTX_CLASS (code) != '<')
3816 return 0;
3817 inmode = GET_MODE (XEXP (op, 0));
3818 if (GET_CODE (XEXP (op, 0)) != REG
3819 || REGNO (XEXP (op, 0)) != 17
3820 || XEXP (op, 1) != const0_rtx)
3821 return 0;
3822
3823 if (inmode == CCFPmode || inmode == CCFPUmode)
3824 {
3825 enum rtx_code second_code, bypass_code;
3826
3827 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3828 if (bypass_code != NIL || second_code != NIL)
3829 return 0;
3830 code = ix86_fp_compare_code_to_integer (code);
3831 }
3832 else if (inmode != CCmode)
3833 return 0;
3834 return code == LTU;
3835}
3836
9076b9c1 3837/* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3a3677ff 3838
9076b9c1
JH
3839int
3840fcmov_comparison_operator (op, mode)
3a3677ff
RH
3841 register rtx op;
3842 enum machine_mode mode;
3843{
b62d22a2 3844 enum machine_mode inmode;
9a915772 3845 enum rtx_code code = GET_CODE (op);
e6e81735 3846
3a3677ff
RH
3847 if (mode != VOIDmode && GET_MODE (op) != mode)
3848 return 0;
9a915772
JH
3849 if (GET_RTX_CLASS (code) != '<')
3850 return 0;
3851 inmode = GET_MODE (XEXP (op, 0));
3852 if (inmode == CCFPmode || inmode == CCFPUmode)
3a3677ff 3853 {
9a915772 3854 enum rtx_code second_code, bypass_code;
e6e81735 3855
9a915772
JH
3856 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3857 if (bypass_code != NIL || second_code != NIL)
3858 return 0;
3859 code = ix86_fp_compare_code_to_integer (code);
3860 }
3861 /* i387 supports just limited amount of conditional codes. */
3862 switch (code)
3863 {
3864 case LTU: case GTU: case LEU: case GEU:
3865 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
9076b9c1
JH
3866 return 1;
3867 return 0;
9a915772
JH
3868 case ORDERED: case UNORDERED:
3869 case EQ: case NE:
3870 return 1;
3a3677ff
RH
3871 default:
3872 return 0;
3873 }
e075ae69 3874}
b840bfb0 3875
e9e80858
JH
3876/* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3877
3878int
3879promotable_binary_operator (op, mode)
3880 register rtx op;
3881 enum machine_mode mode ATTRIBUTE_UNUSED;
3882{
3883 switch (GET_CODE (op))
3884 {
3885 case MULT:
3886 /* Modern CPUs have same latency for HImode and SImode multiply,
3887 but 386 and 486 do HImode multiply faster. */
3888 return ix86_cpu > PROCESSOR_I486;
3889 case PLUS:
3890 case AND:
3891 case IOR:
3892 case XOR:
3893 case ASHIFT:
3894 return 1;
3895 default:
3896 return 0;
3897 }
3898}
3899
e075ae69
RH
3900/* Nearly general operand, but accept any const_double, since we wish
3901 to be able to drop them into memory rather than have them get pulled
3902 into registers. */
b840bfb0 3903
2a2ab3f9 3904int
e075ae69
RH
3905cmp_fp_expander_operand (op, mode)
3906 register rtx op;
3907 enum machine_mode mode;
2a2ab3f9 3908{
e075ae69 3909 if (mode != VOIDmode && mode != GET_MODE (op))
0b6b2900 3910 return 0;
e075ae69 3911 if (GET_CODE (op) == CONST_DOUBLE)
2a2ab3f9 3912 return 1;
e075ae69 3913 return general_operand (op, mode);
2a2ab3f9
JVA
3914}
3915
e075ae69 3916/* Match an SI or HImode register for a zero_extract. */
2a2ab3f9
JVA
3917
3918int
e075ae69 3919ext_register_operand (op, mode)
2a2ab3f9 3920 register rtx op;
bb5177ac 3921 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 3922{
3522082b 3923 int regno;
0d7d98ee
JH
3924 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3925 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
e075ae69 3926 return 0;
3522082b
JH
3927
3928 if (!register_operand (op, VOIDmode))
3929 return 0;
3930
d1f87653 3931 /* Be careful to accept only registers having upper parts. */
3522082b
JH
3932 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3933 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
e075ae69
RH
3934}
3935
3936/* Return 1 if this is a valid binary floating-point operation.
0f290768 3937 OP is the expression matched, and MODE is its mode. */
e075ae69
RH
3938
3939int
3940binary_fp_operator (op, mode)
3941 register rtx op;
3942 enum machine_mode mode;
3943{
3944 if (mode != VOIDmode && mode != GET_MODE (op))
3945 return 0;
3946
2a2ab3f9
JVA
3947 switch (GET_CODE (op))
3948 {
e075ae69
RH
3949 case PLUS:
3950 case MINUS:
3951 case MULT:
3952 case DIV:
3953 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
e9a25f70 3954
2a2ab3f9
JVA
3955 default:
3956 return 0;
3957 }
3958}
fee2770d 3959
e075ae69 3960int
b531087a 3961mult_operator (op, mode)
e075ae69
RH
3962 register rtx op;
3963 enum machine_mode mode ATTRIBUTE_UNUSED;
3964{
3965 return GET_CODE (op) == MULT;
3966}
3967
3968int
b531087a 3969div_operator (op, mode)
e075ae69
RH
3970 register rtx op;
3971 enum machine_mode mode ATTRIBUTE_UNUSED;
3972{
3973 return GET_CODE (op) == DIV;
3974}
0a726ef1
JL
3975
3976int
e075ae69
RH
3977arith_or_logical_operator (op, mode)
3978 rtx op;
3979 enum machine_mode mode;
0a726ef1 3980{
e075ae69
RH
3981 return ((mode == VOIDmode || GET_MODE (op) == mode)
3982 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3983 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
0a726ef1
JL
3984}
3985
e075ae69 3986/* Returns 1 if OP is memory operand with a displacement. */
fee2770d
RS
3987
3988int
e075ae69
RH
3989memory_displacement_operand (op, mode)
3990 register rtx op;
3991 enum machine_mode mode;
4f2c8ebb 3992{
e075ae69 3993 struct ix86_address parts;
e9a25f70 3994
e075ae69
RH
3995 if (! memory_operand (op, mode))
3996 return 0;
3997
3998 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3999 abort ();
4000
4001 return parts.disp != NULL_RTX;
4f2c8ebb
RS
4002}
4003
16189740 4004/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
e075ae69
RH
4005 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4006
4007 ??? It seems likely that this will only work because cmpsi is an
4008 expander, and no actual insns use this. */
4f2c8ebb
RS
4009
4010int
e075ae69
RH
4011cmpsi_operand (op, mode)
4012 rtx op;
4013 enum machine_mode mode;
fee2770d 4014{
b9b2c339 4015 if (nonimmediate_operand (op, mode))
e075ae69
RH
4016 return 1;
4017
4018 if (GET_CODE (op) == AND
4019 && GET_MODE (op) == SImode
4020 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4021 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4022 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4023 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4024 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4025 && GET_CODE (XEXP (op, 1)) == CONST_INT)
fee2770d 4026 return 1;
e9a25f70 4027
fee2770d
RS
4028 return 0;
4029}
d784886d 4030
e075ae69
RH
4031/* Returns 1 if OP is memory operand that can not be represented by the
4032 modRM array. */
d784886d
RK
4033
4034int
e075ae69 4035long_memory_operand (op, mode)
d784886d
RK
4036 register rtx op;
4037 enum machine_mode mode;
4038{
e075ae69 4039 if (! memory_operand (op, mode))
d784886d
RK
4040 return 0;
4041
e075ae69 4042 return memory_address_length (op) != 0;
d784886d 4043}
2247f6ed
JH
4044
4045/* Return nonzero if the rtx is known aligned. */
4046
4047int
4048aligned_operand (op, mode)
4049 rtx op;
4050 enum machine_mode mode;
4051{
4052 struct ix86_address parts;
4053
4054 if (!general_operand (op, mode))
4055 return 0;
4056
0f290768 4057 /* Registers and immediate operands are always "aligned". */
2247f6ed
JH
4058 if (GET_CODE (op) != MEM)
4059 return 1;
4060
0f290768 4061 /* Don't even try to do any aligned optimizations with volatiles. */
2247f6ed
JH
4062 if (MEM_VOLATILE_P (op))
4063 return 0;
4064
4065 op = XEXP (op, 0);
4066
4067 /* Pushes and pops are only valid on the stack pointer. */
4068 if (GET_CODE (op) == PRE_DEC
4069 || GET_CODE (op) == POST_INC)
4070 return 1;
4071
4072 /* Decode the address. */
4073 if (! ix86_decompose_address (op, &parts))
4074 abort ();
4075
1540f9eb
JH
4076 if (parts.base && GET_CODE (parts.base) == SUBREG)
4077 parts.base = SUBREG_REG (parts.base);
4078 if (parts.index && GET_CODE (parts.index) == SUBREG)
4079 parts.index = SUBREG_REG (parts.index);
4080
2247f6ed
JH
4081 /* Look for some component that isn't known to be aligned. */
4082 if (parts.index)
4083 {
4084 if (parts.scale < 4
bdb429a5 4085 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
2247f6ed
JH
4086 return 0;
4087 }
4088 if (parts.base)
4089 {
bdb429a5 4090 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
2247f6ed
JH
4091 return 0;
4092 }
4093 if (parts.disp)
4094 {
4095 if (GET_CODE (parts.disp) != CONST_INT
4096 || (INTVAL (parts.disp) & 3) != 0)
4097 return 0;
4098 }
4099
4100 /* Didn't find one -- this must be an aligned address. */
4101 return 1;
4102}
e075ae69
RH
4103\f
4104/* Return true if the constant is something that can be loaded with
4105 a special instruction. Only handle 0.0 and 1.0; others are less
4106 worthwhile. */
57dbca5e
BS
4107
4108int
e075ae69
RH
4109standard_80387_constant_p (x)
4110 rtx x;
57dbca5e 4111{
2b04e52b 4112 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
e075ae69 4113 return -1;
2b04e52b
JH
4114 /* Note that on the 80387, other constants, such as pi, that we should support
4115 too. On some machines, these are much slower to load as standard constant,
4116 than to load from doubles in memory. */
4117 if (x == CONST0_RTX (GET_MODE (x)))
4118 return 1;
4119 if (x == CONST1_RTX (GET_MODE (x)))
4120 return 2;
e075ae69 4121 return 0;
57dbca5e
BS
4122}
4123
2b04e52b
JH
4124/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4125 */
4126int
4127standard_sse_constant_p (x)
4128 rtx x;
4129{
0e67d460
JH
4130 if (x == const0_rtx)
4131 return 1;
2b04e52b
JH
4132 return (x == CONST0_RTX (GET_MODE (x)));
4133}
4134
2a2ab3f9
JVA
4135/* Returns 1 if OP contains a symbol reference */
4136
4137int
4138symbolic_reference_mentioned_p (op)
4139 rtx op;
4140{
6f7d635c 4141 register const char *fmt;
2a2ab3f9
JVA
4142 register int i;
4143
4144 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4145 return 1;
4146
4147 fmt = GET_RTX_FORMAT (GET_CODE (op));
4148 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4149 {
4150 if (fmt[i] == 'E')
4151 {
4152 register int j;
4153
4154 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4155 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4156 return 1;
4157 }
e9a25f70 4158
2a2ab3f9
JVA
4159 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4160 return 1;
4161 }
4162
4163 return 0;
4164}
e075ae69
RH
4165
4166/* Return 1 if it is appropriate to emit `ret' instructions in the
4167 body of a function. Do this only if the epilogue is simple, needing a
4168 couple of insns. Prior to reloading, we can't tell how many registers
4169 must be saved, so return 0 then. Return 0 if there is no frame
4170 marker to de-allocate.
4171
4172 If NON_SAVING_SETJMP is defined and true, then it is not possible
4173 for the epilogue to be simple, so return 0. This is a special case
4174 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4175 until final, but jump_optimize may need to know sooner if a
4176 `return' is OK. */
32b5b1aa
SC
4177
4178int
e075ae69 4179ix86_can_use_return_insn_p ()
32b5b1aa 4180{
4dd2ac2c 4181 struct ix86_frame frame;
9a7372d6 4182
e075ae69
RH
4183#ifdef NON_SAVING_SETJMP
4184 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4185 return 0;
4186#endif
9a7372d6
RH
4187
4188 if (! reload_completed || frame_pointer_needed)
4189 return 0;
32b5b1aa 4190
9a7372d6
RH
4191 /* Don't allow more than 32 pop, since that's all we can do
4192 with one instruction. */
4193 if (current_function_pops_args
4194 && current_function_args_size >= 32768)
e075ae69 4195 return 0;
32b5b1aa 4196
4dd2ac2c
JH
4197 ix86_compute_frame_layout (&frame);
4198 return frame.to_allocate == 0 && frame.nregs == 0;
e075ae69 4199}
6189a572
JH
4200\f
4201/* Return 1 if VALUE can be stored in the sign extended immediate field. */
4202int
c05dbe81 4203x86_64_sign_extended_value (value)
6189a572
JH
4204 rtx value;
4205{
4206 switch (GET_CODE (value))
4207 {
4208 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4209 to be at least 32 and this all acceptable constants are
4210 represented as CONST_INT. */
4211 case CONST_INT:
4212 if (HOST_BITS_PER_WIDE_INT == 32)
4213 return 1;
4214 else
4215 {
4216 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
fa9f36a1 4217 return trunc_int_for_mode (val, SImode) == val;
6189a572
JH
4218 }
4219 break;
4220
75d38379
JJ
4221 /* For certain code models, the symbolic references are known to fit.
4222 in CM_SMALL_PIC model we know it fits if it is local to the shared
4223 library. Don't count TLS SYMBOL_REFs here, since they should fit
4224 only if inside of UNSPEC handled below. */
6189a572 4225 case SYMBOL_REF:
c05dbe81 4226 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
6189a572
JH
4227
4228 /* For certain code models, the code is near as well. */
4229 case LABEL_REF:
c05dbe81
JH
4230 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4231 || ix86_cmodel == CM_KERNEL);
6189a572
JH
4232
4233 /* We also may accept the offsetted memory references in certain special
4234 cases. */
4235 case CONST:
75d38379
JJ
4236 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4237 switch (XINT (XEXP (value, 0), 1))
4238 {
4239 case UNSPEC_GOTPCREL:
4240 case UNSPEC_DTPOFF:
4241 case UNSPEC_GOTNTPOFF:
4242 case UNSPEC_NTPOFF:
4243 return 1;
4244 default:
4245 break;
4246 }
4247 if (GET_CODE (XEXP (value, 0)) == PLUS)
6189a572
JH
4248 {
4249 rtx op1 = XEXP (XEXP (value, 0), 0);
4250 rtx op2 = XEXP (XEXP (value, 0), 1);
4251 HOST_WIDE_INT offset;
4252
4253 if (ix86_cmodel == CM_LARGE)
4254 return 0;
4255 if (GET_CODE (op2) != CONST_INT)
4256 return 0;
4257 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4258 switch (GET_CODE (op1))
4259 {
4260 case SYMBOL_REF:
75d38379 4261 /* For CM_SMALL assume that latest object is 16MB before
6189a572
JH
4262 end of 31bits boundary. We may also accept pretty
4263 large negative constants knowing that all objects are
4264 in the positive half of address space. */
4265 if (ix86_cmodel == CM_SMALL
75d38379 4266 && offset < 16*1024*1024
6189a572
JH
4267 && trunc_int_for_mode (offset, SImode) == offset)
4268 return 1;
4269 /* For CM_KERNEL we know that all object resist in the
4270 negative half of 32bits address space. We may not
4271 accept negative offsets, since they may be just off
d6a7951f 4272 and we may accept pretty large positive ones. */
6189a572
JH
4273 if (ix86_cmodel == CM_KERNEL
4274 && offset > 0
4275 && trunc_int_for_mode (offset, SImode) == offset)
4276 return 1;
4277 break;
4278 case LABEL_REF:
4279 /* These conditions are similar to SYMBOL_REF ones, just the
4280 constraints for code models differ. */
c05dbe81 4281 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
75d38379 4282 && offset < 16*1024*1024
6189a572
JH
4283 && trunc_int_for_mode (offset, SImode) == offset)
4284 return 1;
4285 if (ix86_cmodel == CM_KERNEL
4286 && offset > 0
4287 && trunc_int_for_mode (offset, SImode) == offset)
4288 return 1;
4289 break;
75d38379
JJ
4290 case UNSPEC:
4291 switch (XINT (op1, 1))
4292 {
4293 case UNSPEC_DTPOFF:
4294 case UNSPEC_NTPOFF:
4295 if (offset > 0
4296 && trunc_int_for_mode (offset, SImode) == offset)
4297 return 1;
4298 }
4299 break;
6189a572
JH
4300 default:
4301 return 0;
4302 }
4303 }
4304 return 0;
4305 default:
4306 return 0;
4307 }
4308}
4309
4310/* Return 1 if VALUE can be stored in the zero extended immediate field. */
4311int
4312x86_64_zero_extended_value (value)
4313 rtx value;
4314{
4315 switch (GET_CODE (value))
4316 {
4317 case CONST_DOUBLE:
4318 if (HOST_BITS_PER_WIDE_INT == 32)
4319 return (GET_MODE (value) == VOIDmode
4320 && !CONST_DOUBLE_HIGH (value));
4321 else
4322 return 0;
4323 case CONST_INT:
4324 if (HOST_BITS_PER_WIDE_INT == 32)
4325 return INTVAL (value) >= 0;
4326 else
b531087a 4327 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
6189a572
JH
4328 break;
4329
4330 /* For certain code models, the symbolic references are known to fit. */
4331 case SYMBOL_REF:
4332 return ix86_cmodel == CM_SMALL;
4333
4334 /* For certain code models, the code is near as well. */
4335 case LABEL_REF:
4336 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4337
4338 /* We also may accept the offsetted memory references in certain special
4339 cases. */
4340 case CONST:
4341 if (GET_CODE (XEXP (value, 0)) == PLUS)
4342 {
4343 rtx op1 = XEXP (XEXP (value, 0), 0);
4344 rtx op2 = XEXP (XEXP (value, 0), 1);
4345
4346 if (ix86_cmodel == CM_LARGE)
4347 return 0;
4348 switch (GET_CODE (op1))
4349 {
4350 case SYMBOL_REF:
4351 return 0;
d6a7951f 4352 /* For small code model we may accept pretty large positive
6189a572
JH
4353 offsets, since one bit is available for free. Negative
4354 offsets are limited by the size of NULL pointer area
4355 specified by the ABI. */
4356 if (ix86_cmodel == CM_SMALL
4357 && GET_CODE (op2) == CONST_INT
4358 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4359 && (trunc_int_for_mode (INTVAL (op2), SImode)
4360 == INTVAL (op2)))
4361 return 1;
4362 /* ??? For the kernel, we may accept adjustment of
4363 -0x10000000, since we know that it will just convert
d6a7951f 4364 negative address space to positive, but perhaps this
6189a572
JH
4365 is not worthwhile. */
4366 break;
4367 case LABEL_REF:
4368 /* These conditions are similar to SYMBOL_REF ones, just the
4369 constraints for code models differ. */
4370 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4371 && GET_CODE (op2) == CONST_INT
4372 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4373 && (trunc_int_for_mode (INTVAL (op2), SImode)
4374 == INTVAL (op2)))
4375 return 1;
4376 break;
4377 default:
4378 return 0;
4379 }
4380 }
4381 return 0;
4382 default:
4383 return 0;
4384 }
4385}
6fca22eb
RH
4386
4387/* Value should be nonzero if functions must have frame pointers.
4388 Zero means the frame pointer need not be set up (and parms may
4389 be accessed via the stack pointer) in functions that seem suitable. */
4390
4391int
4392ix86_frame_pointer_required ()
4393{
4394 /* If we accessed previous frames, then the generated code expects
4395 to be able to access the saved ebp value in our frame. */
4396 if (cfun->machine->accesses_prev_frame)
4397 return 1;
a4f31c00 4398
6fca22eb
RH
4399 /* Several x86 os'es need a frame pointer for other reasons,
4400 usually pertaining to setjmp. */
4401 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4402 return 1;
4403
4404 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4405 the frame pointer by default. Turn it back on now if we've not
4406 got a leaf function. */
a7943381 4407 if (TARGET_OMIT_LEAF_FRAME_POINTER
55ba61f3
JH
4408 && (!current_function_is_leaf))
4409 return 1;
4410
4411 if (current_function_profile)
6fca22eb
RH
4412 return 1;
4413
4414 return 0;
4415}
4416
4417/* Record that the current function accesses previous call frames. */
4418
4419void
4420ix86_setup_frame_addresses ()
4421{
4422 cfun->machine->accesses_prev_frame = 1;
4423}
e075ae69 4424\f
145aacc2
RH
4425#if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4426# define USE_HIDDEN_LINKONCE 1
4427#else
4428# define USE_HIDDEN_LINKONCE 0
4429#endif
4430
bd09bdeb 4431static int pic_labels_used;
e9a25f70 4432
145aacc2
RH
4433/* Fills in the label name that should be used for a pc thunk for
4434 the given register. */
4435
4436static void
4437get_pc_thunk_name (name, regno)
4438 char name[32];
4439 unsigned int regno;
4440{
4441 if (USE_HIDDEN_LINKONCE)
4442 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4443 else
4444 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4445}
4446
4447
e075ae69
RH
4448/* This function generates code for -fpic that loads %ebx with
4449 the return address of the caller and then returns. */
4450
4451void
4cf12e7e 4452ix86_asm_file_end (file)
e075ae69 4453 FILE *file;
e075ae69
RH
4454{
4455 rtx xops[2];
bd09bdeb 4456 int regno;
32b5b1aa 4457
bd09bdeb 4458 for (regno = 0; regno < 8; ++regno)
7c262518 4459 {
145aacc2
RH
4460 char name[32];
4461
bd09bdeb
RH
4462 if (! ((pic_labels_used >> regno) & 1))
4463 continue;
4464
145aacc2 4465 get_pc_thunk_name (name, regno);
bd09bdeb 4466
145aacc2
RH
4467 if (USE_HIDDEN_LINKONCE)
4468 {
4469 tree decl;
4470
4471 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4472 error_mark_node);
4473 TREE_PUBLIC (decl) = 1;
4474 TREE_STATIC (decl) = 1;
4475 DECL_ONE_ONLY (decl) = 1;
4476
4477 (*targetm.asm_out.unique_section) (decl, 0);
4478 named_section (decl, NULL, 0);
4479
5eb99654 4480 (*targetm.asm_out.globalize_label) (file, name);
145aacc2
RH
4481 fputs ("\t.hidden\t", file);
4482 assemble_name (file, name);
4483 fputc ('\n', file);
4484 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
4485 }
4486 else
4487 {
4488 text_section ();
4489 ASM_OUTPUT_LABEL (file, name);
4490 }
bd09bdeb
RH
4491
4492 xops[0] = gen_rtx_REG (SImode, regno);
4493 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4494 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4495 output_asm_insn ("ret", xops);
7c262518 4496 }
32b5b1aa 4497}
32b5b1aa 4498
c8c03509 4499/* Emit code for the SET_GOT patterns. */
32b5b1aa 4500
c8c03509
RH
4501const char *
4502output_set_got (dest)
4503 rtx dest;
4504{
4505 rtx xops[3];
0d7d98ee 4506
c8c03509 4507 xops[0] = dest;
5fc0e5df 4508 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
32b5b1aa 4509
c8c03509 4510 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
32b5b1aa 4511 {
c8c03509
RH
4512 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4513
4514 if (!flag_pic)
4515 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4516 else
4517 output_asm_insn ("call\t%a2", xops);
4518
b069de3b
SS
4519#if TARGET_MACHO
4520 /* Output the "canonical" label name ("Lxx$pb") here too. This
4521 is what will be referred to by the Mach-O PIC subsystem. */
4522 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4523#endif
4977bab6 4524 (*targetm.asm_out.internal_label) (asm_out_file, "L",
c8c03509
RH
4525 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4526
4527 if (flag_pic)
4528 output_asm_insn ("pop{l}\t%0", xops);
32b5b1aa 4529 }
e075ae69 4530 else
e5cb57e8 4531 {
145aacc2
RH
4532 char name[32];
4533 get_pc_thunk_name (name, REGNO (dest));
bd09bdeb 4534 pic_labels_used |= 1 << REGNO (dest);
f996902d 4535
145aacc2 4536 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
c8c03509
RH
4537 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4538 output_asm_insn ("call\t%X2", xops);
e5cb57e8 4539 }
e5cb57e8 4540
c8c03509
RH
4541 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4542 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
b069de3b 4543 else if (!TARGET_MACHO)
8e9fadc3 4544 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
79325812 4545
c8c03509 4546 return "";
e9a25f70 4547}
8dfe5673 4548
0d7d98ee 4549/* Generate an "push" pattern for input ARG. */
e9a25f70 4550
e075ae69
RH
4551static rtx
4552gen_push (arg)
4553 rtx arg;
e9a25f70 4554{
c5c76735 4555 return gen_rtx_SET (VOIDmode,
0d7d98ee
JH
4556 gen_rtx_MEM (Pmode,
4557 gen_rtx_PRE_DEC (Pmode,
c5c76735
JL
4558 stack_pointer_rtx)),
4559 arg);
e9a25f70
JL
4560}
4561
bd09bdeb
RH
4562/* Return >= 0 if there is an unused call-clobbered register available
4563 for the entire function. */
4564
4565static unsigned int
4566ix86_select_alt_pic_regnum ()
4567{
4568 if (current_function_is_leaf && !current_function_profile)
4569 {
4570 int i;
4571 for (i = 2; i >= 0; --i)
4572 if (!regs_ever_live[i])
4573 return i;
4574 }
4575
4576 return INVALID_REGNUM;
4577}
fce5a9f2 4578
4dd2ac2c
JH
4579/* Return 1 if we need to save REGNO. */
4580static int
1020a5ab 4581ix86_save_reg (regno, maybe_eh_return)
9b690711 4582 unsigned int regno;
37a58036 4583 int maybe_eh_return;
1020a5ab 4584{
bd09bdeb
RH
4585 if (pic_offset_table_rtx
4586 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4587 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
66edd3b4 4588 || current_function_profile
1020a5ab 4589 || current_function_calls_eh_return))
bd09bdeb
RH
4590 {
4591 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4592 return 0;
4593 return 1;
4594 }
1020a5ab
RH
4595
4596 if (current_function_calls_eh_return && maybe_eh_return)
4597 {
4598 unsigned i;
4599 for (i = 0; ; i++)
4600 {
b531087a 4601 unsigned test = EH_RETURN_DATA_REGNO (i);
1020a5ab
RH
4602 if (test == INVALID_REGNUM)
4603 break;
9b690711 4604 if (test == regno)
1020a5ab
RH
4605 return 1;
4606 }
4607 }
4dd2ac2c 4608
1020a5ab
RH
4609 return (regs_ever_live[regno]
4610 && !call_used_regs[regno]
4611 && !fixed_regs[regno]
4612 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4dd2ac2c
JH
4613}
4614
0903fcab
JH
4615/* Return number of registers to be saved on the stack. */
4616
4617static int
4618ix86_nsaved_regs ()
4619{
4620 int nregs = 0;
0903fcab
JH
4621 int regno;
4622
4dd2ac2c 4623 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 4624 if (ix86_save_reg (regno, true))
4dd2ac2c 4625 nregs++;
0903fcab
JH
4626 return nregs;
4627}
4628
4629/* Return the offset between two registers, one to be eliminated, and the other
4630 its replacement, at the start of a routine. */
4631
4632HOST_WIDE_INT
4633ix86_initial_elimination_offset (from, to)
4634 int from;
4635 int to;
4636{
4dd2ac2c
JH
4637 struct ix86_frame frame;
4638 ix86_compute_frame_layout (&frame);
564d80f4
JH
4639
4640 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 4641 return frame.hard_frame_pointer_offset;
564d80f4
JH
4642 else if (from == FRAME_POINTER_REGNUM
4643 && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 4644 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
4645 else
4646 {
564d80f4
JH
4647 if (to != STACK_POINTER_REGNUM)
4648 abort ();
4649 else if (from == ARG_POINTER_REGNUM)
4dd2ac2c 4650 return frame.stack_pointer_offset;
564d80f4
JH
4651 else if (from != FRAME_POINTER_REGNUM)
4652 abort ();
0903fcab 4653 else
4dd2ac2c 4654 return frame.stack_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
4655 }
4656}
4657
4dd2ac2c 4658/* Fill structure ix86_frame about frame of currently computed function. */
0f290768 4659
4dd2ac2c
JH
4660static void
4661ix86_compute_frame_layout (frame)
4662 struct ix86_frame *frame;
65954bd8 4663{
65954bd8 4664 HOST_WIDE_INT total_size;
564d80f4 4665 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
44affdae
JH
4666 int offset;
4667 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4dd2ac2c 4668 HOST_WIDE_INT size = get_frame_size ();
65954bd8 4669
4dd2ac2c 4670 frame->nregs = ix86_nsaved_regs ();
564d80f4 4671 total_size = size;
65954bd8 4672
9ba81eaa 4673 /* Skip return address and saved base pointer. */
4dd2ac2c
JH
4674 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4675
4676 frame->hard_frame_pointer_offset = offset;
564d80f4 4677
fcbfaa65
RK
4678 /* Do some sanity checking of stack_alignment_needed and
4679 preferred_alignment, since i386 port is the only using those features
f710504c 4680 that may break easily. */
564d80f4 4681
44affdae
JH
4682 if (size && !stack_alignment_needed)
4683 abort ();
44affdae
JH
4684 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4685 abort ();
4686 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4687 abort ();
4688 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4689 abort ();
564d80f4 4690
4dd2ac2c
JH
4691 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4692 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
564d80f4 4693
4dd2ac2c
JH
4694 /* Register save area */
4695 offset += frame->nregs * UNITS_PER_WORD;
65954bd8 4696
8362f420
JH
4697 /* Va-arg area */
4698 if (ix86_save_varrargs_registers)
4699 {
4700 offset += X86_64_VARARGS_SIZE;
4701 frame->va_arg_size = X86_64_VARARGS_SIZE;
4702 }
4703 else
4704 frame->va_arg_size = 0;
4705
4dd2ac2c
JH
4706 /* Align start of frame for local function. */
4707 frame->padding1 = ((offset + stack_alignment_needed - 1)
4708 & -stack_alignment_needed) - offset;
f73ad30e 4709
4dd2ac2c 4710 offset += frame->padding1;
65954bd8 4711
4dd2ac2c
JH
4712 /* Frame pointer points here. */
4713 frame->frame_pointer_offset = offset;
54ff41b7 4714
4dd2ac2c 4715 offset += size;
65954bd8 4716
0b7ae565
RH
4717 /* Add outgoing arguments area. Can be skipped if we eliminated
4718 all the function calls as dead code. */
4719 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4dd2ac2c
JH
4720 {
4721 offset += current_function_outgoing_args_size;
4722 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4723 }
4724 else
4725 frame->outgoing_arguments_size = 0;
564d80f4 4726
002ff5bc
RH
4727 /* Align stack boundary. Only needed if we're calling another function
4728 or using alloca. */
4729 if (!current_function_is_leaf || current_function_calls_alloca)
0b7ae565
RH
4730 frame->padding2 = ((offset + preferred_alignment - 1)
4731 & -preferred_alignment) - offset;
4732 else
4733 frame->padding2 = 0;
4dd2ac2c
JH
4734
4735 offset += frame->padding2;
4736
4737 /* We've reached end of stack frame. */
4738 frame->stack_pointer_offset = offset;
4739
4740 /* Size prologue needs to allocate. */
4741 frame->to_allocate =
4742 (size + frame->padding1 + frame->padding2
8362f420 4743 + frame->outgoing_arguments_size + frame->va_arg_size);
4dd2ac2c 4744
8362f420
JH
4745 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4746 && current_function_is_leaf)
4747 {
4748 frame->red_zone_size = frame->to_allocate;
4749 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4750 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4751 }
4752 else
4753 frame->red_zone_size = 0;
4754 frame->to_allocate -= frame->red_zone_size;
4755 frame->stack_pointer_offset -= frame->red_zone_size;
4dd2ac2c
JH
4756#if 0
4757 fprintf (stderr, "nregs: %i\n", frame->nregs);
4758 fprintf (stderr, "size: %i\n", size);
4759 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4760 fprintf (stderr, "padding1: %i\n", frame->padding1);
8362f420 4761 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4dd2ac2c
JH
4762 fprintf (stderr, "padding2: %i\n", frame->padding2);
4763 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
8362f420 4764 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4dd2ac2c
JH
4765 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4766 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4767 frame->hard_frame_pointer_offset);
4768 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4769#endif
65954bd8
JL
4770}
4771
0903fcab
JH
4772/* Emit code to save registers in the prologue. */
4773
4774static void
4775ix86_emit_save_regs ()
4776{
4777 register int regno;
0903fcab 4778 rtx insn;
0903fcab 4779
4dd2ac2c 4780 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 4781 if (ix86_save_reg (regno, true))
0903fcab 4782 {
0d7d98ee 4783 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
0903fcab
JH
4784 RTX_FRAME_RELATED_P (insn) = 1;
4785 }
4786}
4787
c6036a37
JH
4788/* Emit code to save registers using MOV insns. First register
4789 is restored from POINTER + OFFSET. */
4790static void
4791ix86_emit_save_regs_using_mov (pointer, offset)
b72f00af
RK
4792 rtx pointer;
4793 HOST_WIDE_INT offset;
c6036a37
JH
4794{
4795 int regno;
4796 rtx insn;
4797
4798 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4799 if (ix86_save_reg (regno, true))
4800 {
b72f00af
RK
4801 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4802 Pmode, offset),
c6036a37
JH
4803 gen_rtx_REG (Pmode, regno));
4804 RTX_FRAME_RELATED_P (insn) = 1;
4805 offset += UNITS_PER_WORD;
4806 }
4807}
4808
0f290768 4809/* Expand the prologue into a bunch of separate insns. */
e075ae69
RH
4810
4811void
4812ix86_expand_prologue ()
2a2ab3f9 4813{
564d80f4 4814 rtx insn;
bd09bdeb 4815 bool pic_reg_used;
4dd2ac2c 4816 struct ix86_frame frame;
6ab16dd9 4817 int use_mov = 0;
c6036a37 4818 HOST_WIDE_INT allocate;
4dd2ac2c 4819
4977bab6 4820 ix86_compute_frame_layout (&frame);
2ab0437e 4821 if (!optimize_size)
6ab16dd9 4822 {
4977bab6
ZW
4823 int count = frame.nregs;
4824
4825 /* The fast prologue uses move instead of push to save registers. This
4826 is significantly longer, but also executes faster as modern hardware
4827 can execute the moves in parallel, but can't do that for push/pop.
4828
d1f87653 4829 Be careful about choosing what prologue to emit: When function takes
4977bab6
ZW
4830 many instructions to execute we may use slow version as well as in
4831 case function is known to be outside hot spot (this is known with
4832 feedback only). Weight the size of function by number of registers
4833 to save as it is cheap to use one or two push instructions but very
4834 slow to use many of them. */
4835 if (count)
4836 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4837 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4838 || (flag_branch_probabilities
4839 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4840 use_fast_prologue_epilogue = 0;
4841 else
4842 use_fast_prologue_epilogue = !expensive_function_p (count);
2ab0437e
JH
4843 if (TARGET_PROLOGUE_USING_MOVE)
4844 use_mov = use_fast_prologue_epilogue;
6ab16dd9 4845 }
79325812 4846
e075ae69
RH
4847 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4848 slower on all targets. Also sdb doesn't like it. */
e9a25f70 4849
2a2ab3f9
JVA
4850 if (frame_pointer_needed)
4851 {
564d80f4 4852 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
e075ae69 4853 RTX_FRAME_RELATED_P (insn) = 1;
e9a25f70 4854
564d80f4 4855 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
e075ae69 4856 RTX_FRAME_RELATED_P (insn) = 1;
2a2ab3f9
JVA
4857 }
4858
c6036a37
JH
4859 allocate = frame.to_allocate;
4860 /* In case we are dealing only with single register and empty frame,
4861 push is equivalent of the mov+add sequence. */
4862 if (allocate == 0 && frame.nregs <= 1)
4863 use_mov = 0;
4864
4865 if (!use_mov)
4866 ix86_emit_save_regs ();
4867 else
4868 allocate += frame.nregs * UNITS_PER_WORD;
564d80f4 4869
c6036a37 4870 if (allocate == 0)
8dfe5673 4871 ;
e323735c 4872 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
469ac993 4873 {
f2042df3
RH
4874 insn = emit_insn (gen_pro_epilogue_adjust_stack
4875 (stack_pointer_rtx, stack_pointer_rtx,
e323735c 4876 GEN_INT (-allocate)));
e075ae69 4877 RTX_FRAME_RELATED_P (insn) = 1;
469ac993 4878 }
79325812 4879 else
8dfe5673 4880 {
e075ae69 4881 /* ??? Is this only valid for Win32? */
e9a25f70 4882
e075ae69 4883 rtx arg0, sym;
e9a25f70 4884
8362f420 4885 if (TARGET_64BIT)
b531087a 4886 abort ();
8362f420 4887
e075ae69 4888 arg0 = gen_rtx_REG (SImode, 0);
c6036a37 4889 emit_move_insn (arg0, GEN_INT (allocate));
77a989d1 4890
e075ae69
RH
4891 sym = gen_rtx_MEM (FUNCTION_MODE,
4892 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
32ee7d1d 4893 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
e075ae69
RH
4894
4895 CALL_INSN_FUNCTION_USAGE (insn)
276ab4a4
RH
4896 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4897 CALL_INSN_FUNCTION_USAGE (insn));
98417968
DS
4898
4899 /* Don't allow scheduling pass to move insns across __alloca
4900 call. */
4901 emit_insn (gen_blockage (const0_rtx));
e075ae69 4902 }
c6036a37
JH
4903 if (use_mov)
4904 {
4905 if (!frame_pointer_needed || !frame.to_allocate)
4906 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4907 else
4908 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4909 -frame.nregs * UNITS_PER_WORD);
4910 }
e9a25f70 4911
84530511
SC
4912#ifdef SUBTARGET_PROLOGUE
4913 SUBTARGET_PROLOGUE;
0f290768 4914#endif
84530511 4915
bd09bdeb
RH
4916 pic_reg_used = false;
4917 if (pic_offset_table_rtx
4918 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4919 || current_function_profile))
4920 {
4921 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4922
4923 if (alt_pic_reg_used != INVALID_REGNUM)
4924 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4925
4926 pic_reg_used = true;
4927 }
4928
e9a25f70 4929 if (pic_reg_used)
c8c03509
RH
4930 {
4931 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4932
66edd3b4
RH
4933 /* Even with accurate pre-reload life analysis, we can wind up
4934 deleting all references to the pic register after reload.
4935 Consider if cross-jumping unifies two sides of a branch
d1f87653 4936 controlled by a comparison vs the only read from a global.
66edd3b4
RH
4937 In which case, allow the set_got to be deleted, though we're
4938 too late to do anything about the ebx save in the prologue. */
c8c03509
RH
4939 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4940 }
77a989d1 4941
66edd3b4
RH
4942 /* Prevent function calls from be scheduled before the call to mcount.
4943 In the pic_reg_used case, make sure that the got load isn't deleted. */
4944 if (current_function_profile)
4945 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
77a989d1
SC
4946}
4947
da2d1d3a
JH
4948/* Emit code to restore saved registers using MOV insns. First register
4949 is restored from POINTER + OFFSET. */
4950static void
1020a5ab
RH
4951ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4952 rtx pointer;
4953 int offset;
37a58036 4954 int maybe_eh_return;
da2d1d3a
JH
4955{
4956 int regno;
da2d1d3a 4957
4dd2ac2c 4958 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 4959 if (ix86_save_reg (regno, maybe_eh_return))
da2d1d3a 4960 {
4dd2ac2c 4961 emit_move_insn (gen_rtx_REG (Pmode, regno),
b72f00af
RK
4962 adjust_address (gen_rtx_MEM (Pmode, pointer),
4963 Pmode, offset));
4dd2ac2c 4964 offset += UNITS_PER_WORD;
da2d1d3a
JH
4965 }
4966}
4967
0f290768 4968/* Restore function stack, frame, and registers. */
e9a25f70 4969
2a2ab3f9 4970void
1020a5ab
RH
4971ix86_expand_epilogue (style)
4972 int style;
2a2ab3f9 4973{
1c71e60e 4974 int regno;
fdb8a883 4975 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4dd2ac2c 4976 struct ix86_frame frame;
65954bd8 4977 HOST_WIDE_INT offset;
4dd2ac2c
JH
4978
4979 ix86_compute_frame_layout (&frame);
2a2ab3f9 4980
a4f31c00 4981 /* Calculate start of saved registers relative to ebp. Special care
84e306b4
RH
4982 must be taken for the normal return case of a function using
4983 eh_return: the eax and edx registers are marked as saved, but not
4984 restored along this path. */
4985 offset = frame.nregs;
4986 if (current_function_calls_eh_return && style != 2)
4987 offset -= 2;
4988 offset *= -UNITS_PER_WORD;
2a2ab3f9 4989
fdb8a883
JW
4990 /* If we're only restoring one register and sp is not valid then
4991 using a move instruction to restore the register since it's
0f290768 4992 less work than reloading sp and popping the register.
da2d1d3a
JH
4993
4994 The default code result in stack adjustment using add/lea instruction,
4995 while this code results in LEAVE instruction (or discrete equivalent),
4996 so it is profitable in some other cases as well. Especially when there
4997 are no registers to restore. We also use this code when TARGET_USE_LEAVE
d1f87653 4998 and there is exactly one register to pop. This heuristic may need some
da2d1d3a 4999 tuning in future. */
4dd2ac2c 5000 if ((!sp_valid && frame.nregs <= 1)
2ab0437e 5001 || (TARGET_EPILOGUE_USING_MOVE
6ab16dd9 5002 && use_fast_prologue_epilogue
c6036a37 5003 && (frame.nregs > 1 || frame.to_allocate))
4dd2ac2c 5004 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
2ab0437e 5005 || (frame_pointer_needed && TARGET_USE_LEAVE
6ab16dd9 5006 && use_fast_prologue_epilogue && frame.nregs == 1)
2ab0437e 5007 || current_function_calls_eh_return)
2a2ab3f9 5008 {
da2d1d3a
JH
5009 /* Restore registers. We can use ebp or esp to address the memory
5010 locations. If both are available, default to ebp, since offsets
5011 are known to be small. Only exception is esp pointing directly to the
5012 end of block of saved registers, where we may simplify addressing
5013 mode. */
5014
4dd2ac2c 5015 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
1020a5ab
RH
5016 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5017 frame.to_allocate, style == 2);
da2d1d3a 5018 else
1020a5ab
RH
5019 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5020 offset, style == 2);
5021
5022 /* eh_return epilogues need %ecx added to the stack pointer. */
5023 if (style == 2)
5024 {
5025 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
2a2ab3f9 5026
1020a5ab
RH
5027 if (frame_pointer_needed)
5028 {
5029 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5030 tmp = plus_constant (tmp, UNITS_PER_WORD);
5031 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5032
5033 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5034 emit_move_insn (hard_frame_pointer_rtx, tmp);
5035
5036 emit_insn (gen_pro_epilogue_adjust_stack
f2042df3 5037 (stack_pointer_rtx, sa, const0_rtx));
1020a5ab
RH
5038 }
5039 else
5040 {
5041 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5042 tmp = plus_constant (tmp, (frame.to_allocate
5043 + frame.nregs * UNITS_PER_WORD));
5044 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5045 }
5046 }
5047 else if (!frame_pointer_needed)
f2042df3
RH
5048 emit_insn (gen_pro_epilogue_adjust_stack
5049 (stack_pointer_rtx, stack_pointer_rtx,
5050 GEN_INT (frame.to_allocate
5051 + frame.nregs * UNITS_PER_WORD)));
0f290768 5052 /* If not an i386, mov & pop is faster than "leave". */
6ab16dd9 5053 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
8362f420 5054 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
c8c5cb99 5055 else
2a2ab3f9 5056 {
1c71e60e
JH
5057 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5058 hard_frame_pointer_rtx,
f2042df3 5059 const0_rtx));
8362f420
JH
5060 if (TARGET_64BIT)
5061 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5062 else
5063 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
e9a25f70
JL
5064 }
5065 }
1c71e60e 5066 else
68f654ec 5067 {
1c71e60e
JH
5068 /* First step is to deallocate the stack frame so that we can
5069 pop the registers. */
5070 if (!sp_valid)
5071 {
5072 if (!frame_pointer_needed)
5073 abort ();
5074 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5075 hard_frame_pointer_rtx,
f2042df3 5076 GEN_INT (offset)));
1c71e60e 5077 }
4dd2ac2c 5078 else if (frame.to_allocate)
f2042df3
RH
5079 emit_insn (gen_pro_epilogue_adjust_stack
5080 (stack_pointer_rtx, stack_pointer_rtx,
5081 GEN_INT (frame.to_allocate)));
1c71e60e 5082
4dd2ac2c 5083 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 5084 if (ix86_save_reg (regno, false))
8362f420
JH
5085 {
5086 if (TARGET_64BIT)
5087 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5088 else
5089 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5090 }
4dd2ac2c 5091 if (frame_pointer_needed)
8362f420 5092 {
f5143c46 5093 /* Leave results in shorter dependency chains on CPUs that are
2ab0437e
JH
5094 able to grok it fast. */
5095 if (TARGET_USE_LEAVE)
5096 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5097 else if (TARGET_64BIT)
8362f420
JH
5098 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5099 else
5100 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5101 }
68f654ec 5102 }
68f654ec 5103
cbbf65e0 5104 /* Sibcall epilogues don't want a return instruction. */
1020a5ab 5105 if (style == 0)
cbbf65e0
RH
5106 return;
5107
2a2ab3f9
JVA
5108 if (current_function_pops_args && current_function_args_size)
5109 {
e075ae69 5110 rtx popc = GEN_INT (current_function_pops_args);
2a2ab3f9 5111
b8c752c8
UD
5112 /* i386 can only pop 64K bytes. If asked to pop more, pop
5113 return address, do explicit add, and jump indirectly to the
0f290768 5114 caller. */
2a2ab3f9 5115
b8c752c8 5116 if (current_function_pops_args >= 65536)
2a2ab3f9 5117 {
e075ae69 5118 rtx ecx = gen_rtx_REG (SImode, 2);
e9a25f70 5119
8362f420
JH
5120 /* There are is no "pascal" calling convention in 64bit ABI. */
5121 if (TARGET_64BIT)
b531087a 5122 abort ();
8362f420 5123
e075ae69
RH
5124 emit_insn (gen_popsi1 (ecx));
5125 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
11837777 5126 emit_jump_insn (gen_return_indirect_internal (ecx));
e9a25f70 5127 }
79325812 5128 else
e075ae69
RH
5129 emit_jump_insn (gen_return_pop_internal (popc));
5130 }
5131 else
5132 emit_jump_insn (gen_return_internal ());
5133}
bd09bdeb
RH
5134
5135/* Reset from the function's potential modifications. */
5136
5137static void
5138ix86_output_function_epilogue (file, size)
5139 FILE *file ATTRIBUTE_UNUSED;
5140 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
5141{
5142 if (pic_offset_table_rtx)
5143 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5144}
e075ae69
RH
5145\f
5146/* Extract the parts of an RTL expression that is a valid memory address
b446e5a2
JH
5147 for an instruction. Return 0 if the structure of the address is
5148 grossly off. Return -1 if the address contains ASHIFT, so it is not
5149 strictly valid, but still used for computing length of lea instruction.
5150 */
e075ae69
RH
5151
5152static int
5153ix86_decompose_address (addr, out)
5154 register rtx addr;
5155 struct ix86_address *out;
5156{
5157 rtx base = NULL_RTX;
5158 rtx index = NULL_RTX;
5159 rtx disp = NULL_RTX;
5160 HOST_WIDE_INT scale = 1;
5161 rtx scale_rtx = NULL_RTX;
b446e5a2 5162 int retval = 1;
e075ae69 5163
1540f9eb 5164 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
e075ae69
RH
5165 base = addr;
5166 else if (GET_CODE (addr) == PLUS)
5167 {
5168 rtx op0 = XEXP (addr, 0);
5169 rtx op1 = XEXP (addr, 1);
5170 enum rtx_code code0 = GET_CODE (op0);
5171 enum rtx_code code1 = GET_CODE (op1);
5172
5173 if (code0 == REG || code0 == SUBREG)
5174 {
5175 if (code1 == REG || code1 == SUBREG)
5176 index = op0, base = op1; /* index + base */
5177 else
5178 base = op0, disp = op1; /* base + displacement */
5179 }
5180 else if (code0 == MULT)
e9a25f70 5181 {
e075ae69
RH
5182 index = XEXP (op0, 0);
5183 scale_rtx = XEXP (op0, 1);
5184 if (code1 == REG || code1 == SUBREG)
5185 base = op1; /* index*scale + base */
e9a25f70 5186 else
e075ae69
RH
5187 disp = op1; /* index*scale + disp */
5188 }
5189 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
5190 {
5191 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
5192 scale_rtx = XEXP (XEXP (op0, 0), 1);
5193 base = XEXP (op0, 1);
5194 disp = op1;
2a2ab3f9 5195 }
e075ae69
RH
5196 else if (code0 == PLUS)
5197 {
5198 index = XEXP (op0, 0); /* index + base + disp */
5199 base = XEXP (op0, 1);
5200 disp = op1;
5201 }
5202 else
b446e5a2 5203 return 0;
e075ae69
RH
5204 }
5205 else if (GET_CODE (addr) == MULT)
5206 {
5207 index = XEXP (addr, 0); /* index*scale */
5208 scale_rtx = XEXP (addr, 1);
5209 }
5210 else if (GET_CODE (addr) == ASHIFT)
5211 {
5212 rtx tmp;
5213
5214 /* We're called for lea too, which implements ashift on occasion. */
5215 index = XEXP (addr, 0);
5216 tmp = XEXP (addr, 1);
5217 if (GET_CODE (tmp) != CONST_INT)
b446e5a2 5218 return 0;
e075ae69
RH
5219 scale = INTVAL (tmp);
5220 if ((unsigned HOST_WIDE_INT) scale > 3)
b446e5a2 5221 return 0;
e075ae69 5222 scale = 1 << scale;
b446e5a2 5223 retval = -1;
2a2ab3f9 5224 }
2a2ab3f9 5225 else
e075ae69
RH
5226 disp = addr; /* displacement */
5227
5228 /* Extract the integral value of scale. */
5229 if (scale_rtx)
e9a25f70 5230 {
e075ae69 5231 if (GET_CODE (scale_rtx) != CONST_INT)
b446e5a2 5232 return 0;
e075ae69 5233 scale = INTVAL (scale_rtx);
e9a25f70 5234 }
3b3c6a3f 5235
e075ae69
RH
5236 /* Allow arg pointer and stack pointer as index if there is not scaling */
5237 if (base && index && scale == 1
564d80f4
JH
5238 && (index == arg_pointer_rtx || index == frame_pointer_rtx
5239 || index == stack_pointer_rtx))
e075ae69
RH
5240 {
5241 rtx tmp = base;
5242 base = index;
5243 index = tmp;
5244 }
5245
5246 /* Special case: %ebp cannot be encoded as a base without a displacement. */
564d80f4
JH
5247 if ((base == hard_frame_pointer_rtx
5248 || base == frame_pointer_rtx
5249 || base == arg_pointer_rtx) && !disp)
e075ae69
RH
5250 disp = const0_rtx;
5251
5252 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5253 Avoid this by transforming to [%esi+0]. */
5254 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
5255 && base && !index && !disp
329e1d01 5256 && REG_P (base)
e075ae69
RH
5257 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5258 disp = const0_rtx;
5259
5260 /* Special case: encode reg+reg instead of reg*2. */
5261 if (!base && index && scale && scale == 2)
5262 base = index, scale = 1;
0f290768 5263
e075ae69
RH
5264 /* Special case: scaling cannot be encoded without base or displacement. */
5265 if (!base && !disp && index && scale != 1)
5266 disp = const0_rtx;
5267
5268 out->base = base;
5269 out->index = index;
5270 out->disp = disp;
5271 out->scale = scale;
3b3c6a3f 5272
b446e5a2 5273 return retval;
e075ae69 5274}
01329426
JH
5275\f
5276/* Return cost of the memory address x.
5277 For i386, it is better to use a complex address than let gcc copy
5278 the address into a reg and make a new pseudo. But not if the address
5279 requires to two regs - that would mean more pseudos with longer
5280 lifetimes. */
dcefdf67 5281static int
01329426
JH
5282ix86_address_cost (x)
5283 rtx x;
5284{
5285 struct ix86_address parts;
5286 int cost = 1;
3b3c6a3f 5287
01329426
JH
5288 if (!ix86_decompose_address (x, &parts))
5289 abort ();
5290
1540f9eb
JH
5291 if (parts.base && GET_CODE (parts.base) == SUBREG)
5292 parts.base = SUBREG_REG (parts.base);
5293 if (parts.index && GET_CODE (parts.index) == SUBREG)
5294 parts.index = SUBREG_REG (parts.index);
5295
01329426
JH
5296 /* More complex memory references are better. */
5297 if (parts.disp && parts.disp != const0_rtx)
5298 cost--;
5299
5300 /* Attempt to minimize number of registers in the address. */
5301 if ((parts.base
5302 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5303 || (parts.index
5304 && (!REG_P (parts.index)
5305 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5306 cost++;
5307
5308 if (parts.base
5309 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5310 && parts.index
5311 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5312 && parts.base != parts.index)
5313 cost++;
5314
5315 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5316 since it's predecode logic can't detect the length of instructions
5317 and it degenerates to vector decoded. Increase cost of such
5318 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
0f290768 5319 to split such addresses or even refuse such addresses at all.
01329426
JH
5320
5321 Following addressing modes are affected:
5322 [base+scale*index]
5323 [scale*index+disp]
5324 [base+index]
0f290768 5325
01329426
JH
5326 The first and last case may be avoidable by explicitly coding the zero in
5327 memory address, but I don't have AMD-K6 machine handy to check this
5328 theory. */
5329
5330 if (TARGET_K6
5331 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5332 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5333 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5334 cost += 10;
0f290768 5335
01329426
JH
5336 return cost;
5337}
5338\f
b949ea8b
JW
5339/* If X is a machine specific address (i.e. a symbol or label being
5340 referenced as a displacement from the GOT implemented using an
5341 UNSPEC), then return the base term. Otherwise return X. */
5342
5343rtx
5344ix86_find_base_term (x)
5345 rtx x;
5346{
5347 rtx term;
5348
6eb791fc
JH
5349 if (TARGET_64BIT)
5350 {
5351 if (GET_CODE (x) != CONST)
5352 return x;
5353 term = XEXP (x, 0);
5354 if (GET_CODE (term) == PLUS
5355 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5356 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5357 term = XEXP (term, 0);
5358 if (GET_CODE (term) != UNSPEC
8ee41eaf 5359 || XINT (term, 1) != UNSPEC_GOTPCREL)
6eb791fc
JH
5360 return x;
5361
5362 term = XVECEXP (term, 0, 0);
5363
5364 if (GET_CODE (term) != SYMBOL_REF
5365 && GET_CODE (term) != LABEL_REF)
5366 return x;
5367
5368 return term;
5369 }
5370
b949ea8b
JW
5371 if (GET_CODE (x) != PLUS
5372 || XEXP (x, 0) != pic_offset_table_rtx
5373 || GET_CODE (XEXP (x, 1)) != CONST)
5374 return x;
5375
5376 term = XEXP (XEXP (x, 1), 0);
5377
5378 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
5379 term = XEXP (term, 0);
5380
5381 if (GET_CODE (term) != UNSPEC
8ee41eaf 5382 || XINT (term, 1) != UNSPEC_GOTOFF)
b949ea8b
JW
5383 return x;
5384
5385 term = XVECEXP (term, 0, 0);
5386
5387 if (GET_CODE (term) != SYMBOL_REF
5388 && GET_CODE (term) != LABEL_REF)
5389 return x;
5390
5391 return term;
5392}
5393\f
f996902d
RH
5394/* Determine if a given RTX is a valid constant. We already know this
5395 satisfies CONSTANT_P. */
5396
5397bool
5398legitimate_constant_p (x)
5399 rtx x;
5400{
5401 rtx inner;
5402
5403 switch (GET_CODE (x))
5404 {
5405 case SYMBOL_REF:
5406 /* TLS symbols are not constant. */
5407 if (tls_symbolic_operand (x, Pmode))
5408 return false;
5409 break;
5410
5411 case CONST:
5412 inner = XEXP (x, 0);
5413
5414 /* Offsets of TLS symbols are never valid.
5415 Discourage CSE from creating them. */
5416 if (GET_CODE (inner) == PLUS
5417 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5418 return false;
5419
5420 /* Only some unspecs are valid as "constants". */
5421 if (GET_CODE (inner) == UNSPEC)
5422 switch (XINT (inner, 1))
5423 {
5424 case UNSPEC_TPOFF:
5425 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
f996902d
RH
5426 default:
5427 return false;
5428 }
5429 break;
5430
5431 default:
5432 break;
5433 }
5434
5435 /* Otherwise we handle everything else in the move patterns. */
5436 return true;
5437}
5438
3a04ff64
RH
5439/* Determine if it's legal to put X into the constant pool. This
5440 is not possible for the address of thread-local symbols, which
5441 is checked above. */
5442
5443static bool
5444ix86_cannot_force_const_mem (x)
5445 rtx x;
5446{
5447 return !legitimate_constant_p (x);
5448}
5449
f996902d
RH
5450/* Determine if a given RTX is a valid constant address. */
5451
5452bool
5453constant_address_p (x)
5454 rtx x;
5455{
5456 switch (GET_CODE (x))
5457 {
5458 case LABEL_REF:
5459 case CONST_INT:
5460 return true;
5461
5462 case CONST_DOUBLE:
5463 return TARGET_64BIT;
5464
5465 case CONST:
b069de3b
SS
5466 /* For Mach-O, really believe the CONST. */
5467 if (TARGET_MACHO)
5468 return true;
5469 /* Otherwise fall through. */
f996902d
RH
5470 case SYMBOL_REF:
5471 return !flag_pic && legitimate_constant_p (x);
5472
5473 default:
5474 return false;
5475 }
5476}
5477
5478/* Nonzero if the constant value X is a legitimate general operand
fce5a9f2 5479 when generating PIC code. It is given that flag_pic is on and
f996902d
RH
5480 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5481
5482bool
5483legitimate_pic_operand_p (x)
5484 rtx x;
5485{
5486 rtx inner;
5487
5488 switch (GET_CODE (x))
5489 {
5490 case CONST:
5491 inner = XEXP (x, 0);
5492
5493 /* Only some unspecs are valid as "constants". */
5494 if (GET_CODE (inner) == UNSPEC)
5495 switch (XINT (inner, 1))
5496 {
5497 case UNSPEC_TPOFF:
5498 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
f996902d
RH
5499 default:
5500 return false;
5501 }
5502 /* FALLTHRU */
5503
5504 case SYMBOL_REF:
5505 case LABEL_REF:
5506 return legitimate_pic_address_disp_p (x);
5507
5508 default:
5509 return true;
5510 }
5511}
5512
e075ae69
RH
5513/* Determine if a given CONST RTX is a valid memory displacement
5514 in PIC mode. */
0f290768 5515
59be65f6 5516int
91bb873f
RH
5517legitimate_pic_address_disp_p (disp)
5518 register rtx disp;
5519{
f996902d
RH
5520 bool saw_plus;
5521
6eb791fc
JH
5522 /* In 64bit mode we can allow direct addresses of symbols and labels
5523 when they are not dynamic symbols. */
c05dbe81
JH
5524 if (TARGET_64BIT)
5525 {
5526 /* TLS references should always be enclosed in UNSPEC. */
5527 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5528 return 0;
5529 if (GET_CODE (disp) == SYMBOL_REF
5530 && ix86_cmodel == CM_SMALL_PIC
5531 && (CONSTANT_POOL_ADDRESS_P (disp)
5532 || SYMBOL_REF_FLAG (disp)))
5533 return 1;
5534 if (GET_CODE (disp) == LABEL_REF)
5535 return 1;
5536 if (GET_CODE (disp) == CONST
5537 && GET_CODE (XEXP (disp, 0)) == PLUS
5538 && ((GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
5539 && ix86_cmodel == CM_SMALL_PIC
5540 && (CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (disp, 0), 0))
5541 || SYMBOL_REF_FLAG (XEXP (XEXP (disp, 0), 0))))
5542 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
5543 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT
5544 && INTVAL (XEXP (XEXP (disp, 0), 1)) < 16*1024*1024
5545 && INTVAL (XEXP (XEXP (disp, 0), 1)) >= -16*1024*1024)
5546 return 1;
5547 }
91bb873f
RH
5548 if (GET_CODE (disp) != CONST)
5549 return 0;
5550 disp = XEXP (disp, 0);
5551
6eb791fc
JH
5552 if (TARGET_64BIT)
5553 {
5554 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5555 of GOT tables. We should not need these anyway. */
5556 if (GET_CODE (disp) != UNSPEC
8ee41eaf 5557 || XINT (disp, 1) != UNSPEC_GOTPCREL)
6eb791fc
JH
5558 return 0;
5559
5560 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5561 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5562 return 0;
5563 return 1;
5564 }
5565
f996902d 5566 saw_plus = false;
91bb873f
RH
5567 if (GET_CODE (disp) == PLUS)
5568 {
5569 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5570 return 0;
5571 disp = XEXP (disp, 0);
f996902d 5572 saw_plus = true;
91bb873f
RH
5573 }
5574
b069de3b
SS
5575 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5576 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5577 {
5578 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5579 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5580 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5581 {
5582 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5583 if (strstr (sym_name, "$pb") != 0)
5584 return 1;
5585 }
5586 }
5587
8ee41eaf 5588 if (GET_CODE (disp) != UNSPEC)
91bb873f
RH
5589 return 0;
5590
623fe810
RH
5591 switch (XINT (disp, 1))
5592 {
8ee41eaf 5593 case UNSPEC_GOT:
f996902d
RH
5594 if (saw_plus)
5595 return false;
623fe810 5596 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
8ee41eaf 5597 case UNSPEC_GOTOFF:
623fe810 5598 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
f996902d 5599 case UNSPEC_GOTTPOFF:
dea73790
JJ
5600 case UNSPEC_GOTNTPOFF:
5601 case UNSPEC_INDNTPOFF:
f996902d
RH
5602 if (saw_plus)
5603 return false;
5604 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5605 case UNSPEC_NTPOFF:
f996902d
RH
5606 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5607 case UNSPEC_DTPOFF:
f996902d 5608 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
623fe810 5609 }
fce5a9f2 5610
623fe810 5611 return 0;
91bb873f
RH
5612}
5613
e075ae69
RH
5614/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5615 memory address for an instruction. The MODE argument is the machine mode
5616 for the MEM expression that wants to use this address.
5617
5618 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5619 convert common non-canonical forms to canonical form so that they will
5620 be recognized. */
5621
3b3c6a3f
MM
5622int
5623legitimate_address_p (mode, addr, strict)
5624 enum machine_mode mode;
5625 register rtx addr;
5626 int strict;
5627{
e075ae69
RH
5628 struct ix86_address parts;
5629 rtx base, index, disp;
5630 HOST_WIDE_INT scale;
5631 const char *reason = NULL;
5632 rtx reason_rtx = NULL_RTX;
3b3c6a3f
MM
5633
5634 if (TARGET_DEBUG_ADDR)
5635 {
5636 fprintf (stderr,
e9a25f70 5637 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
3b3c6a3f 5638 GET_MODE_NAME (mode), strict);
3b3c6a3f
MM
5639 debug_rtx (addr);
5640 }
5641
9e20be0c
JJ
5642 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
5643 {
5644 if (TARGET_DEBUG_ADDR)
5645 fprintf (stderr, "Success.\n");
5646 return TRUE;
5647 }
5648
b446e5a2 5649 if (ix86_decompose_address (addr, &parts) <= 0)
3b3c6a3f 5650 {
e075ae69 5651 reason = "decomposition failed";
50e60bc3 5652 goto report_error;
3b3c6a3f
MM
5653 }
5654
e075ae69
RH
5655 base = parts.base;
5656 index = parts.index;
5657 disp = parts.disp;
5658 scale = parts.scale;
91f0226f 5659
e075ae69 5660 /* Validate base register.
e9a25f70
JL
5661
5662 Don't allow SUBREG's here, it can lead to spill failures when the base
3d771dfd
MM
5663 is one word out of a two word structure, which is represented internally
5664 as a DImode int. */
e9a25f70 5665
3b3c6a3f
MM
5666 if (base)
5667 {
1540f9eb 5668 rtx reg;
e075ae69
RH
5669 reason_rtx = base;
5670
1540f9eb
JH
5671 if (GET_CODE (base) == SUBREG)
5672 reg = SUBREG_REG (base);
5673 else
5674 reg = base;
5675
5676 if (GET_CODE (reg) != REG)
3b3c6a3f 5677 {
e075ae69 5678 reason = "base is not a register";
50e60bc3 5679 goto report_error;
3b3c6a3f
MM
5680 }
5681
c954bd01
RH
5682 if (GET_MODE (base) != Pmode)
5683 {
e075ae69 5684 reason = "base is not in Pmode";
50e60bc3 5685 goto report_error;
c954bd01
RH
5686 }
5687
1540f9eb
JH
5688 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5689 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
3b3c6a3f 5690 {
e075ae69 5691 reason = "base is not valid";
50e60bc3 5692 goto report_error;
3b3c6a3f
MM
5693 }
5694 }
5695
e075ae69 5696 /* Validate index register.
e9a25f70
JL
5697
5698 Don't allow SUBREG's here, it can lead to spill failures when the index
3d771dfd
MM
5699 is one word out of a two word structure, which is represented internally
5700 as a DImode int. */
e075ae69
RH
5701
5702 if (index)
3b3c6a3f 5703 {
1540f9eb 5704 rtx reg;
e075ae69
RH
5705 reason_rtx = index;
5706
1540f9eb
JH
5707 if (GET_CODE (index) == SUBREG)
5708 reg = SUBREG_REG (index);
5709 else
5710 reg = index;
5711
5712 if (GET_CODE (reg) != REG)
3b3c6a3f 5713 {
e075ae69 5714 reason = "index is not a register";
50e60bc3 5715 goto report_error;
3b3c6a3f
MM
5716 }
5717
e075ae69 5718 if (GET_MODE (index) != Pmode)
c954bd01 5719 {
e075ae69 5720 reason = "index is not in Pmode";
50e60bc3 5721 goto report_error;
c954bd01
RH
5722 }
5723
1540f9eb
JH
5724 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5725 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
3b3c6a3f 5726 {
e075ae69 5727 reason = "index is not valid";
50e60bc3 5728 goto report_error;
3b3c6a3f
MM
5729 }
5730 }
3b3c6a3f 5731
e075ae69
RH
5732 /* Validate scale factor. */
5733 if (scale != 1)
3b3c6a3f 5734 {
e075ae69
RH
5735 reason_rtx = GEN_INT (scale);
5736 if (!index)
3b3c6a3f 5737 {
e075ae69 5738 reason = "scale without index";
50e60bc3 5739 goto report_error;
3b3c6a3f
MM
5740 }
5741
e075ae69 5742 if (scale != 2 && scale != 4 && scale != 8)
3b3c6a3f 5743 {
e075ae69 5744 reason = "scale is not a valid multiplier";
50e60bc3 5745 goto report_error;
3b3c6a3f
MM
5746 }
5747 }
5748
91bb873f 5749 /* Validate displacement. */
3b3c6a3f
MM
5750 if (disp)
5751 {
e075ae69
RH
5752 reason_rtx = disp;
5753
f996902d
RH
5754 if (GET_CODE (disp) == CONST
5755 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5756 switch (XINT (XEXP (disp, 0), 1))
5757 {
5758 case UNSPEC_GOT:
5759 case UNSPEC_GOTOFF:
5760 case UNSPEC_GOTPCREL:
5761 if (!flag_pic)
5762 abort ();
5763 goto is_legitimate_pic;
5764
5765 case UNSPEC_GOTTPOFF:
dea73790
JJ
5766 case UNSPEC_GOTNTPOFF:
5767 case UNSPEC_INDNTPOFF:
f996902d
RH
5768 case UNSPEC_NTPOFF:
5769 case UNSPEC_DTPOFF:
5770 break;
5771
5772 default:
5773 reason = "invalid address unspec";
5774 goto report_error;
5775 }
5776
b069de3b
SS
5777 else if (flag_pic && (SYMBOLIC_CONST (disp)
5778#if TARGET_MACHO
5779 && !machopic_operand_p (disp)
5780#endif
5781 ))
3b3c6a3f 5782 {
f996902d 5783 is_legitimate_pic:
0d7d98ee
JH
5784 if (TARGET_64BIT && (index || base))
5785 {
75d38379
JJ
5786 /* foo@dtpoff(%rX) is ok. */
5787 if (GET_CODE (disp) != CONST
5788 || GET_CODE (XEXP (disp, 0)) != PLUS
5789 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5790 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5791 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5792 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5793 {
5794 reason = "non-constant pic memory reference";
5795 goto report_error;
5796 }
0d7d98ee 5797 }
75d38379 5798 else if (! legitimate_pic_address_disp_p (disp))
91bb873f 5799 {
e075ae69 5800 reason = "displacement is an invalid pic construct";
50e60bc3 5801 goto report_error;
91bb873f
RH
5802 }
5803
4e9efe54 5804 /* This code used to verify that a symbolic pic displacement
0f290768
KH
5805 includes the pic_offset_table_rtx register.
5806
4e9efe54
JH
5807 While this is good idea, unfortunately these constructs may
5808 be created by "adds using lea" optimization for incorrect
5809 code like:
5810
5811 int a;
5812 int foo(int i)
5813 {
5814 return *(&a+i);
5815 }
5816
50e60bc3 5817 This code is nonsensical, but results in addressing
4e9efe54 5818 GOT table with pic_offset_table_rtx base. We can't
f710504c 5819 just refuse it easily, since it gets matched by
4e9efe54
JH
5820 "addsi3" pattern, that later gets split to lea in the
5821 case output register differs from input. While this
5822 can be handled by separate addsi pattern for this case
5823 that never results in lea, this seems to be easier and
5824 correct fix for crash to disable this test. */
3b3c6a3f 5825 }
f996902d
RH
5826 else if (!CONSTANT_ADDRESS_P (disp))
5827 {
5828 reason = "displacement is not constant";
5829 goto report_error;
5830 }
c05dbe81
JH
5831 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
5832 {
5833 reason = "displacement is out of range";
5834 goto report_error;
5835 }
5836 else if (!TARGET_64BIT && GET_CODE (disp) == CONST_DOUBLE)
5837 {
5838 reason = "displacement is a const_double";
5839 goto report_error;
5840 }
3b3c6a3f
MM
5841 }
5842
e075ae69 5843 /* Everything looks valid. */
3b3c6a3f 5844 if (TARGET_DEBUG_ADDR)
e075ae69 5845 fprintf (stderr, "Success.\n");
3b3c6a3f 5846 return TRUE;
e075ae69 5847
5bf0ebab 5848 report_error:
e075ae69
RH
5849 if (TARGET_DEBUG_ADDR)
5850 {
5851 fprintf (stderr, "Error: %s\n", reason);
5852 debug_rtx (reason_rtx);
5853 }
5854 return FALSE;
3b3c6a3f 5855}
3b3c6a3f 5856\f
55efb413
JW
5857/* Return an unique alias set for the GOT. */
5858
0f290768 5859static HOST_WIDE_INT
55efb413
JW
5860ix86_GOT_alias_set ()
5861{
5bf0ebab
RH
5862 static HOST_WIDE_INT set = -1;
5863 if (set == -1)
5864 set = new_alias_set ();
5865 return set;
0f290768 5866}
55efb413 5867
3b3c6a3f
MM
5868/* Return a legitimate reference for ORIG (an address) using the
5869 register REG. If REG is 0, a new pseudo is generated.
5870
91bb873f 5871 There are two types of references that must be handled:
3b3c6a3f
MM
5872
5873 1. Global data references must load the address from the GOT, via
5874 the PIC reg. An insn is emitted to do this load, and the reg is
5875 returned.
5876
91bb873f
RH
5877 2. Static data references, constant pool addresses, and code labels
5878 compute the address as an offset from the GOT, whose base is in
5879 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5880 differentiate them from global data objects. The returned
5881 address is the PIC reg + an unspec constant.
3b3c6a3f
MM
5882
5883 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
91bb873f 5884 reg also appears in the address. */
3b3c6a3f
MM
5885
5886rtx
5887legitimize_pic_address (orig, reg)
5888 rtx orig;
5889 rtx reg;
5890{
5891 rtx addr = orig;
5892 rtx new = orig;
91bb873f 5893 rtx base;
3b3c6a3f 5894
b069de3b
SS
5895#if TARGET_MACHO
5896 if (reg == 0)
5897 reg = gen_reg_rtx (Pmode);
5898 /* Use the generic Mach-O PIC machinery. */
5899 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5900#endif
5901
c05dbe81
JH
5902 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
5903 new = addr;
5904 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
3b3c6a3f 5905 {
c05dbe81
JH
5906 /* This symbol may be referenced via a displacement from the PIC
5907 base address (@GOTOFF). */
3b3c6a3f 5908
c05dbe81
JH
5909 if (reload_in_progress)
5910 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5911 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5912 new = gen_rtx_CONST (Pmode, new);
5913 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3b3c6a3f 5914
c05dbe81
JH
5915 if (reg != 0)
5916 {
5917 emit_move_insn (reg, new);
5918 new = reg;
5919 }
3b3c6a3f 5920 }
91bb873f 5921 else if (GET_CODE (addr) == SYMBOL_REF)
3b3c6a3f 5922 {
14f73b5a
JH
5923 if (TARGET_64BIT)
5924 {
8ee41eaf 5925 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
14f73b5a
JH
5926 new = gen_rtx_CONST (Pmode, new);
5927 new = gen_rtx_MEM (Pmode, new);
5928 RTX_UNCHANGING_P (new) = 1;
5929 set_mem_alias_set (new, ix86_GOT_alias_set ());
5930
5931 if (reg == 0)
5932 reg = gen_reg_rtx (Pmode);
5933 /* Use directly gen_movsi, otherwise the address is loaded
5934 into register for CSE. We don't want to CSE this addresses,
5935 instead we CSE addresses from the GOT table, so skip this. */
5936 emit_insn (gen_movsi (reg, new));
5937 new = reg;
5938 }
5939 else
5940 {
5941 /* This symbol must be referenced via a load from the
5942 Global Offset Table (@GOT). */
3b3c6a3f 5943
66edd3b4
RH
5944 if (reload_in_progress)
5945 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf 5946 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
14f73b5a
JH
5947 new = gen_rtx_CONST (Pmode, new);
5948 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5949 new = gen_rtx_MEM (Pmode, new);
5950 RTX_UNCHANGING_P (new) = 1;
5951 set_mem_alias_set (new, ix86_GOT_alias_set ());
3b3c6a3f 5952
14f73b5a
JH
5953 if (reg == 0)
5954 reg = gen_reg_rtx (Pmode);
5955 emit_move_insn (reg, new);
5956 new = reg;
5957 }
0f290768 5958 }
91bb873f
RH
5959 else
5960 {
5961 if (GET_CODE (addr) == CONST)
3b3c6a3f 5962 {
91bb873f 5963 addr = XEXP (addr, 0);
e3c8ea67
RH
5964
5965 /* We must match stuff we generate before. Assume the only
5966 unspecs that can get here are ours. Not that we could do
5967 anything with them anyway... */
5968 if (GET_CODE (addr) == UNSPEC
5969 || (GET_CODE (addr) == PLUS
5970 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5971 return orig;
5972 if (GET_CODE (addr) != PLUS)
564d80f4 5973 abort ();
3b3c6a3f 5974 }
91bb873f
RH
5975 if (GET_CODE (addr) == PLUS)
5976 {
5977 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
e9a25f70 5978
91bb873f
RH
5979 /* Check first to see if this is a constant offset from a @GOTOFF
5980 symbol reference. */
623fe810 5981 if (local_symbolic_operand (op0, Pmode)
91bb873f
RH
5982 && GET_CODE (op1) == CONST_INT)
5983 {
6eb791fc
JH
5984 if (!TARGET_64BIT)
5985 {
66edd3b4
RH
5986 if (reload_in_progress)
5987 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf
RH
5988 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5989 UNSPEC_GOTOFF);
6eb791fc
JH
5990 new = gen_rtx_PLUS (Pmode, new, op1);
5991 new = gen_rtx_CONST (Pmode, new);
5992 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
91bb873f 5993
6eb791fc
JH
5994 if (reg != 0)
5995 {
5996 emit_move_insn (reg, new);
5997 new = reg;
5998 }
5999 }
6000 else
91bb873f 6001 {
75d38379
JJ
6002 if (INTVAL (op1) < -16*1024*1024
6003 || INTVAL (op1) >= 16*1024*1024)
6004 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
91bb873f
RH
6005 }
6006 }
6007 else
6008 {
6009 base = legitimize_pic_address (XEXP (addr, 0), reg);
6010 new = legitimize_pic_address (XEXP (addr, 1),
6011 base == reg ? NULL_RTX : reg);
6012
6013 if (GET_CODE (new) == CONST_INT)
6014 new = plus_constant (base, INTVAL (new));
6015 else
6016 {
6017 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6018 {
6019 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6020 new = XEXP (new, 1);
6021 }
6022 new = gen_rtx_PLUS (Pmode, base, new);
6023 }
6024 }
6025 }
3b3c6a3f
MM
6026 }
6027 return new;
6028}
fb49053f 6029
fb49053f 6030static void
f996902d 6031ix86_encode_section_info (decl, first)
fb49053f
RH
6032 tree decl;
6033 int first ATTRIBUTE_UNUSED;
6034{
f996902d
RH
6035 bool local_p = (*targetm.binds_local_p) (decl);
6036 rtx rtl, symbol;
6037
6038 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
6039 if (GET_CODE (rtl) != MEM)
6040 return;
6041 symbol = XEXP (rtl, 0);
6042 if (GET_CODE (symbol) != SYMBOL_REF)
6043 return;
6044
6045 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
6046 symbol so that we may access it directly in the GOT. */
6047
fb49053f 6048 if (flag_pic)
f996902d
RH
6049 SYMBOL_REF_FLAG (symbol) = local_p;
6050
6051 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
6052 "local dynamic", "initial exec" or "local exec" TLS models
6053 respectively. */
6054
6055 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
fb49053f 6056 {
f996902d
RH
6057 const char *symbol_str;
6058 char *newstr;
6059 size_t len;
dce81a1a 6060 enum tls_model kind = decl_tls_model (decl);
f996902d 6061
75d38379
JJ
6062 if (TARGET_64BIT && ! flag_pic)
6063 {
6064 /* x86-64 doesn't allow non-pic code for shared libraries,
6065 so don't generate GD/LD TLS models for non-pic code. */
6066 switch (kind)
6067 {
6068 case TLS_MODEL_GLOBAL_DYNAMIC:
6069 kind = TLS_MODEL_INITIAL_EXEC; break;
6070 case TLS_MODEL_LOCAL_DYNAMIC:
6071 kind = TLS_MODEL_LOCAL_EXEC; break;
6072 default:
6073 break;
6074 }
6075 }
6076
f996902d 6077 symbol_str = XSTR (symbol, 0);
fb49053f 6078
f996902d
RH
6079 if (symbol_str[0] == '%')
6080 {
6081 if (symbol_str[1] == tls_model_chars[kind])
6082 return;
6083 symbol_str += 2;
6084 }
6085 len = strlen (symbol_str) + 1;
6086 newstr = alloca (len + 2);
6087
6088 newstr[0] = '%';
6089 newstr[1] = tls_model_chars[kind];
6090 memcpy (newstr + 2, symbol_str, len);
6091
6092 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
fb49053f
RH
6093 }
6094}
f996902d
RH
6095
6096/* Undo the above when printing symbol names. */
6097
6098static const char *
6099ix86_strip_name_encoding (str)
6100 const char *str;
6101{
6102 if (str[0] == '%')
6103 str += 2;
6104 if (str [0] == '*')
6105 str += 1;
6106 return str;
6107}
3b3c6a3f 6108\f
f996902d
RH
6109/* Load the thread pointer into a register. */
6110
6111static rtx
6112get_thread_pointer ()
6113{
6114 rtx tp;
6115
6116 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
9e20be0c
JJ
6117 tp = gen_rtx_MEM (Pmode, tp);
6118 RTX_UNCHANGING_P (tp) = 1;
6119 set_mem_alias_set (tp, ix86_GOT_alias_set ());
f996902d
RH
6120 tp = force_reg (Pmode, tp);
6121
6122 return tp;
6123}
fce5a9f2 6124
3b3c6a3f
MM
6125/* Try machine-dependent ways of modifying an illegitimate address
6126 to be legitimate. If we find one, return the new, valid address.
6127 This macro is used in only one place: `memory_address' in explow.c.
6128
6129 OLDX is the address as it was before break_out_memory_refs was called.
6130 In some cases it is useful to look at this to decide what needs to be done.
6131
6132 MODE and WIN are passed so that this macro can use
6133 GO_IF_LEGITIMATE_ADDRESS.
6134
6135 It is always safe for this macro to do nothing. It exists to recognize
6136 opportunities to optimize the output.
6137
6138 For the 80386, we handle X+REG by loading X into a register R and
6139 using R+REG. R will go in a general reg and indexing will be used.
6140 However, if REG is a broken-out memory address or multiplication,
6141 nothing needs to be done because REG can certainly go in a general reg.
6142
6143 When -fpic is used, special handling is needed for symbolic references.
6144 See comments by legitimize_pic_address in i386.c for details. */
6145
6146rtx
6147legitimize_address (x, oldx, mode)
6148 register rtx x;
bb5177ac 6149 register rtx oldx ATTRIBUTE_UNUSED;
3b3c6a3f
MM
6150 enum machine_mode mode;
6151{
6152 int changed = 0;
6153 unsigned log;
6154
6155 if (TARGET_DEBUG_ADDR)
6156 {
e9a25f70
JL
6157 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6158 GET_MODE_NAME (mode));
3b3c6a3f
MM
6159 debug_rtx (x);
6160 }
6161
f996902d
RH
6162 log = tls_symbolic_operand (x, mode);
6163 if (log)
6164 {
6165 rtx dest, base, off, pic;
75d38379 6166 int type;
f996902d 6167
755ac5d4 6168 switch (log)
f996902d
RH
6169 {
6170 case TLS_MODEL_GLOBAL_DYNAMIC:
6171 dest = gen_reg_rtx (Pmode);
75d38379
JJ
6172 if (TARGET_64BIT)
6173 {
6174 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6175
6176 start_sequence ();
6177 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6178 insns = get_insns ();
6179 end_sequence ();
6180
6181 emit_libcall_block (insns, dest, rax, x);
6182 }
6183 else
6184 emit_insn (gen_tls_global_dynamic_32 (dest, x));
f996902d
RH
6185 break;
6186
6187 case TLS_MODEL_LOCAL_DYNAMIC:
6188 base = gen_reg_rtx (Pmode);
75d38379
JJ
6189 if (TARGET_64BIT)
6190 {
6191 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6192
6193 start_sequence ();
6194 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6195 insns = get_insns ();
6196 end_sequence ();
6197
6198 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6199 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6200 emit_libcall_block (insns, base, rax, note);
6201 }
6202 else
6203 emit_insn (gen_tls_local_dynamic_base_32 (base));
f996902d
RH
6204
6205 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6206 off = gen_rtx_CONST (Pmode, off);
6207
6208 return gen_rtx_PLUS (Pmode, base, off);
6209
6210 case TLS_MODEL_INITIAL_EXEC:
75d38379
JJ
6211 if (TARGET_64BIT)
6212 {
6213 pic = NULL;
6214 type = UNSPEC_GOTNTPOFF;
6215 }
6216 else if (flag_pic)
f996902d 6217 {
66edd3b4
RH
6218 if (reload_in_progress)
6219 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
f996902d 6220 pic = pic_offset_table_rtx;
75d38379 6221 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
f996902d 6222 }
dea73790 6223 else if (!TARGET_GNU_TLS)
f996902d
RH
6224 {
6225 pic = gen_reg_rtx (Pmode);
6226 emit_insn (gen_set_got (pic));
75d38379 6227 type = UNSPEC_GOTTPOFF;
f996902d 6228 }
dea73790 6229 else
75d38379
JJ
6230 {
6231 pic = NULL;
6232 type = UNSPEC_INDNTPOFF;
6233 }
f996902d
RH
6234
6235 base = get_thread_pointer ();
6236
75d38379 6237 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
f996902d 6238 off = gen_rtx_CONST (Pmode, off);
75d38379 6239 if (pic)
dea73790 6240 off = gen_rtx_PLUS (Pmode, pic, off);
f996902d
RH
6241 off = gen_rtx_MEM (Pmode, off);
6242 RTX_UNCHANGING_P (off) = 1;
6243 set_mem_alias_set (off, ix86_GOT_alias_set ());
f996902d 6244 dest = gen_reg_rtx (Pmode);
dea73790 6245
75d38379 6246 if (TARGET_64BIT || TARGET_GNU_TLS)
dea73790
JJ
6247 {
6248 emit_move_insn (dest, off);
6249 return gen_rtx_PLUS (Pmode, base, dest);
6250 }
6251 else
6252 emit_insn (gen_subsi3 (dest, base, off));
f996902d
RH
6253 break;
6254
6255 case TLS_MODEL_LOCAL_EXEC:
6256 base = get_thread_pointer ();
6257
6258 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
75d38379
JJ
6259 (TARGET_64BIT || TARGET_GNU_TLS)
6260 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
f996902d
RH
6261 off = gen_rtx_CONST (Pmode, off);
6262
75d38379 6263 if (TARGET_64BIT || TARGET_GNU_TLS)
f996902d
RH
6264 return gen_rtx_PLUS (Pmode, base, off);
6265 else
6266 {
6267 dest = gen_reg_rtx (Pmode);
6268 emit_insn (gen_subsi3 (dest, base, off));
6269 }
6270 break;
6271
6272 default:
6273 abort ();
6274 }
6275
6276 return dest;
6277 }
6278
3b3c6a3f
MM
6279 if (flag_pic && SYMBOLIC_CONST (x))
6280 return legitimize_pic_address (x, 0);
6281
6282 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6283 if (GET_CODE (x) == ASHIFT
6284 && GET_CODE (XEXP (x, 1)) == CONST_INT
b531087a 6285 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
3b3c6a3f
MM
6286 {
6287 changed = 1;
a269a03c
JC
6288 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6289 GEN_INT (1 << log));
3b3c6a3f
MM
6290 }
6291
6292 if (GET_CODE (x) == PLUS)
6293 {
0f290768 6294 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
e9a25f70 6295
3b3c6a3f
MM
6296 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6297 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
b531087a 6298 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
3b3c6a3f
MM
6299 {
6300 changed = 1;
c5c76735
JL
6301 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6302 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6303 GEN_INT (1 << log));
3b3c6a3f
MM
6304 }
6305
6306 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6307 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
b531087a 6308 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
3b3c6a3f
MM
6309 {
6310 changed = 1;
c5c76735
JL
6311 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6312 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6313 GEN_INT (1 << log));
3b3c6a3f
MM
6314 }
6315
0f290768 6316 /* Put multiply first if it isn't already. */
3b3c6a3f
MM
6317 if (GET_CODE (XEXP (x, 1)) == MULT)
6318 {
6319 rtx tmp = XEXP (x, 0);
6320 XEXP (x, 0) = XEXP (x, 1);
6321 XEXP (x, 1) = tmp;
6322 changed = 1;
6323 }
6324
6325 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6326 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6327 created by virtual register instantiation, register elimination, and
6328 similar optimizations. */
6329 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6330 {
6331 changed = 1;
c5c76735
JL
6332 x = gen_rtx_PLUS (Pmode,
6333 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6334 XEXP (XEXP (x, 1), 0)),
6335 XEXP (XEXP (x, 1), 1));
3b3c6a3f
MM
6336 }
6337
e9a25f70
JL
6338 /* Canonicalize
6339 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3b3c6a3f
MM
6340 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6341 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6342 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6343 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6344 && CONSTANT_P (XEXP (x, 1)))
6345 {
00c79232
ML
6346 rtx constant;
6347 rtx other = NULL_RTX;
3b3c6a3f
MM
6348
6349 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6350 {
6351 constant = XEXP (x, 1);
6352 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6353 }
6354 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6355 {
6356 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6357 other = XEXP (x, 1);
6358 }
6359 else
6360 constant = 0;
6361
6362 if (constant)
6363 {
6364 changed = 1;
c5c76735
JL
6365 x = gen_rtx_PLUS (Pmode,
6366 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6367 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6368 plus_constant (other, INTVAL (constant)));
3b3c6a3f
MM
6369 }
6370 }
6371
6372 if (changed && legitimate_address_p (mode, x, FALSE))
6373 return x;
6374
6375 if (GET_CODE (XEXP (x, 0)) == MULT)
6376 {
6377 changed = 1;
6378 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6379 }
6380
6381 if (GET_CODE (XEXP (x, 1)) == MULT)
6382 {
6383 changed = 1;
6384 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6385 }
6386
6387 if (changed
6388 && GET_CODE (XEXP (x, 1)) == REG
6389 && GET_CODE (XEXP (x, 0)) == REG)
6390 return x;
6391
6392 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6393 {
6394 changed = 1;
6395 x = legitimize_pic_address (x, 0);
6396 }
6397
6398 if (changed && legitimate_address_p (mode, x, FALSE))
6399 return x;
6400
6401 if (GET_CODE (XEXP (x, 0)) == REG)
6402 {
6403 register rtx temp = gen_reg_rtx (Pmode);
6404 register rtx val = force_operand (XEXP (x, 1), temp);
6405 if (val != temp)
6406 emit_move_insn (temp, val);
6407
6408 XEXP (x, 1) = temp;
6409 return x;
6410 }
6411
6412 else if (GET_CODE (XEXP (x, 1)) == REG)
6413 {
6414 register rtx temp = gen_reg_rtx (Pmode);
6415 register rtx val = force_operand (XEXP (x, 0), temp);
6416 if (val != temp)
6417 emit_move_insn (temp, val);
6418
6419 XEXP (x, 0) = temp;
6420 return x;
6421 }
6422 }
6423
6424 return x;
6425}
2a2ab3f9
JVA
6426\f
6427/* Print an integer constant expression in assembler syntax. Addition
6428 and subtraction are the only arithmetic that may appear in these
6429 expressions. FILE is the stdio stream to write to, X is the rtx, and
6430 CODE is the operand print code from the output string. */
6431
6432static void
6433output_pic_addr_const (file, x, code)
6434 FILE *file;
6435 rtx x;
6436 int code;
6437{
6438 char buf[256];
6439
6440 switch (GET_CODE (x))
6441 {
6442 case PC:
6443 if (flag_pic)
6444 putc ('.', file);
6445 else
6446 abort ();
6447 break;
6448
6449 case SYMBOL_REF:
91bb873f 6450 assemble_name (file, XSTR (x, 0));
b069de3b 6451 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x))
91bb873f 6452 fputs ("@PLT", file);
2a2ab3f9
JVA
6453 break;
6454
91bb873f
RH
6455 case LABEL_REF:
6456 x = XEXP (x, 0);
6457 /* FALLTHRU */
2a2ab3f9
JVA
6458 case CODE_LABEL:
6459 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6460 assemble_name (asm_out_file, buf);
6461 break;
6462
6463 case CONST_INT:
f64cecad 6464 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2a2ab3f9
JVA
6465 break;
6466
6467 case CONST:
6468 /* This used to output parentheses around the expression,
6469 but that does not work on the 386 (either ATT or BSD assembler). */
6470 output_pic_addr_const (file, XEXP (x, 0), code);
6471 break;
6472
6473 case CONST_DOUBLE:
6474 if (GET_MODE (x) == VOIDmode)
6475 {
6476 /* We can use %d if the number is <32 bits and positive. */
6477 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
f64cecad
JC
6478 fprintf (file, "0x%lx%08lx",
6479 (unsigned long) CONST_DOUBLE_HIGH (x),
6480 (unsigned long) CONST_DOUBLE_LOW (x));
2a2ab3f9 6481 else
f64cecad 6482 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2a2ab3f9
JVA
6483 }
6484 else
6485 /* We can't handle floating point constants;
6486 PRINT_OPERAND must handle them. */
6487 output_operand_lossage ("floating constant misused");
6488 break;
6489
6490 case PLUS:
e9a25f70 6491 /* Some assemblers need integer constants to appear first. */
2a2ab3f9
JVA
6492 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6493 {
2a2ab3f9 6494 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 6495 putc ('+', file);
e9a25f70 6496 output_pic_addr_const (file, XEXP (x, 1), code);
2a2ab3f9 6497 }
91bb873f 6498 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2a2ab3f9 6499 {
2a2ab3f9 6500 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 6501 putc ('+', file);
e9a25f70 6502 output_pic_addr_const (file, XEXP (x, 0), code);
2a2ab3f9 6503 }
91bb873f
RH
6504 else
6505 abort ();
2a2ab3f9
JVA
6506 break;
6507
6508 case MINUS:
b069de3b
SS
6509 if (!TARGET_MACHO)
6510 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
2a2ab3f9 6511 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 6512 putc ('-', file);
2a2ab3f9 6513 output_pic_addr_const (file, XEXP (x, 1), code);
b069de3b
SS
6514 if (!TARGET_MACHO)
6515 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
2a2ab3f9
JVA
6516 break;
6517
91bb873f
RH
6518 case UNSPEC:
6519 if (XVECLEN (x, 0) != 1)
5bf0ebab 6520 abort ();
91bb873f
RH
6521 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6522 switch (XINT (x, 1))
77ebd435 6523 {
8ee41eaf 6524 case UNSPEC_GOT:
77ebd435
AJ
6525 fputs ("@GOT", file);
6526 break;
8ee41eaf 6527 case UNSPEC_GOTOFF:
77ebd435
AJ
6528 fputs ("@GOTOFF", file);
6529 break;
8ee41eaf 6530 case UNSPEC_GOTPCREL:
edfe8595 6531 fputs ("@GOTPCREL(%rip)", file);
6eb791fc 6532 break;
f996902d 6533 case UNSPEC_GOTTPOFF:
dea73790 6534 /* FIXME: This might be @TPOFF in Sun ld too. */
f996902d
RH
6535 fputs ("@GOTTPOFF", file);
6536 break;
6537 case UNSPEC_TPOFF:
6538 fputs ("@TPOFF", file);
6539 break;
6540 case UNSPEC_NTPOFF:
75d38379
JJ
6541 if (TARGET_64BIT)
6542 fputs ("@TPOFF", file);
6543 else
6544 fputs ("@NTPOFF", file);
f996902d
RH
6545 break;
6546 case UNSPEC_DTPOFF:
6547 fputs ("@DTPOFF", file);
6548 break;
dea73790 6549 case UNSPEC_GOTNTPOFF:
75d38379
JJ
6550 if (TARGET_64BIT)
6551 fputs ("@GOTTPOFF(%rip)", file);
6552 else
6553 fputs ("@GOTNTPOFF", file);
dea73790
JJ
6554 break;
6555 case UNSPEC_INDNTPOFF:
6556 fputs ("@INDNTPOFF", file);
6557 break;
77ebd435
AJ
6558 default:
6559 output_operand_lossage ("invalid UNSPEC as operand");
6560 break;
6561 }
91bb873f
RH
6562 break;
6563
2a2ab3f9
JVA
6564 default:
6565 output_operand_lossage ("invalid expression as operand");
6566 }
6567}
1865dbb5 6568
0f290768 6569/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
1865dbb5
JM
6570 We need to handle our special PIC relocations. */
6571
0f290768 6572void
1865dbb5
JM
6573i386_dwarf_output_addr_const (file, x)
6574 FILE *file;
6575 rtx x;
6576{
14f73b5a 6577#ifdef ASM_QUAD
18b5b8d6 6578 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
14f73b5a
JH
6579#else
6580 if (TARGET_64BIT)
6581 abort ();
18b5b8d6 6582 fprintf (file, "%s", ASM_LONG);
14f73b5a 6583#endif
1865dbb5
JM
6584 if (flag_pic)
6585 output_pic_addr_const (file, x, '\0');
6586 else
6587 output_addr_const (file, x);
6588 fputc ('\n', file);
6589}
6590
b9203463
RH
6591/* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6592 We need to emit DTP-relative relocations. */
6593
6594void
6595i386_output_dwarf_dtprel (file, size, x)
6596 FILE *file;
6597 int size;
6598 rtx x;
6599{
75d38379
JJ
6600 fputs (ASM_LONG, file);
6601 output_addr_const (file, x);
6602 fputs ("@DTPOFF", file);
b9203463
RH
6603 switch (size)
6604 {
6605 case 4:
b9203463
RH
6606 break;
6607 case 8:
75d38379 6608 fputs (", 0", file);
b9203463 6609 break;
b9203463
RH
6610 default:
6611 abort ();
6612 }
b9203463
RH
6613}
6614
1865dbb5
JM
6615/* In the name of slightly smaller debug output, and to cater to
6616 general assembler losage, recognize PIC+GOTOFF and turn it back
6617 into a direct symbol reference. */
6618
6619rtx
6620i386_simplify_dwarf_addr (orig_x)
6621 rtx orig_x;
6622{
ec65b2e3 6623 rtx x = orig_x, y;
1865dbb5 6624
4c8c0dec
JJ
6625 if (GET_CODE (x) == MEM)
6626 x = XEXP (x, 0);
6627
6eb791fc
JH
6628 if (TARGET_64BIT)
6629 {
6630 if (GET_CODE (x) != CONST
6631 || GET_CODE (XEXP (x, 0)) != UNSPEC
8ee41eaf 6632 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
4c8c0dec 6633 || GET_CODE (orig_x) != MEM)
6eb791fc
JH
6634 return orig_x;
6635 return XVECEXP (XEXP (x, 0), 0, 0);
6636 }
6637
1865dbb5 6638 if (GET_CODE (x) != PLUS
1865dbb5
JM
6639 || GET_CODE (XEXP (x, 1)) != CONST)
6640 return orig_x;
6641
ec65b2e3
JJ
6642 if (GET_CODE (XEXP (x, 0)) == REG
6643 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6644 /* %ebx + GOT/GOTOFF */
6645 y = NULL;
6646 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6647 {
6648 /* %ebx + %reg * scale + GOT/GOTOFF */
6649 y = XEXP (x, 0);
6650 if (GET_CODE (XEXP (y, 0)) == REG
6651 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6652 y = XEXP (y, 1);
6653 else if (GET_CODE (XEXP (y, 1)) == REG
6654 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6655 y = XEXP (y, 0);
6656 else
6657 return orig_x;
6658 if (GET_CODE (y) != REG
6659 && GET_CODE (y) != MULT
6660 && GET_CODE (y) != ASHIFT)
6661 return orig_x;
6662 }
6663 else
6664 return orig_x;
6665
1865dbb5
JM
6666 x = XEXP (XEXP (x, 1), 0);
6667 if (GET_CODE (x) == UNSPEC
8ee41eaf
RH
6668 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6669 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
ec65b2e3
JJ
6670 {
6671 if (y)
6672 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6673 return XVECEXP (x, 0, 0);
6674 }
1865dbb5
JM
6675
6676 if (GET_CODE (x) == PLUS
6677 && GET_CODE (XEXP (x, 0)) == UNSPEC
6678 && GET_CODE (XEXP (x, 1)) == CONST_INT
8ee41eaf
RH
6679 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6680 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6681 && GET_CODE (orig_x) != MEM)))
ec65b2e3
JJ
6682 {
6683 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6684 if (y)
6685 return gen_rtx_PLUS (Pmode, y, x);
6686 return x;
6687 }
1865dbb5
JM
6688
6689 return orig_x;
6690}
2a2ab3f9 6691\f
a269a03c 6692static void
e075ae69 6693put_condition_code (code, mode, reverse, fp, file)
a269a03c 6694 enum rtx_code code;
e075ae69
RH
6695 enum machine_mode mode;
6696 int reverse, fp;
a269a03c
JC
6697 FILE *file;
6698{
a269a03c
JC
6699 const char *suffix;
6700
9a915772
JH
6701 if (mode == CCFPmode || mode == CCFPUmode)
6702 {
6703 enum rtx_code second_code, bypass_code;
6704 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6705 if (bypass_code != NIL || second_code != NIL)
b531087a 6706 abort ();
9a915772
JH
6707 code = ix86_fp_compare_code_to_integer (code);
6708 mode = CCmode;
6709 }
a269a03c
JC
6710 if (reverse)
6711 code = reverse_condition (code);
e075ae69 6712
a269a03c
JC
6713 switch (code)
6714 {
6715 case EQ:
6716 suffix = "e";
6717 break;
a269a03c
JC
6718 case NE:
6719 suffix = "ne";
6720 break;
a269a03c 6721 case GT:
7e08e190 6722 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
e075ae69
RH
6723 abort ();
6724 suffix = "g";
a269a03c 6725 break;
a269a03c 6726 case GTU:
e075ae69
RH
6727 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6728 Those same assemblers have the same but opposite losage on cmov. */
7e08e190 6729 if (mode != CCmode)
0f290768 6730 abort ();
e075ae69 6731 suffix = fp ? "nbe" : "a";
a269a03c 6732 break;
a269a03c 6733 case LT:
9076b9c1 6734 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 6735 suffix = "s";
7e08e190 6736 else if (mode == CCmode || mode == CCGCmode)
e075ae69 6737 suffix = "l";
9076b9c1 6738 else
0f290768 6739 abort ();
a269a03c 6740 break;
a269a03c 6741 case LTU:
9076b9c1 6742 if (mode != CCmode)
0f290768 6743 abort ();
a269a03c
JC
6744 suffix = "b";
6745 break;
a269a03c 6746 case GE:
9076b9c1 6747 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 6748 suffix = "ns";
7e08e190 6749 else if (mode == CCmode || mode == CCGCmode)
e075ae69 6750 suffix = "ge";
9076b9c1 6751 else
0f290768 6752 abort ();
a269a03c 6753 break;
a269a03c 6754 case GEU:
e075ae69 6755 /* ??? As above. */
7e08e190 6756 if (mode != CCmode)
0f290768 6757 abort ();
7e08e190 6758 suffix = fp ? "nb" : "ae";
a269a03c 6759 break;
a269a03c 6760 case LE:
7e08e190 6761 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
e075ae69
RH
6762 abort ();
6763 suffix = "le";
a269a03c 6764 break;
a269a03c 6765 case LEU:
9076b9c1
JH
6766 if (mode != CCmode)
6767 abort ();
7e08e190 6768 suffix = "be";
a269a03c 6769 break;
3a3677ff 6770 case UNORDERED:
9e7adcb3 6771 suffix = fp ? "u" : "p";
3a3677ff
RH
6772 break;
6773 case ORDERED:
9e7adcb3 6774 suffix = fp ? "nu" : "np";
3a3677ff 6775 break;
a269a03c
JC
6776 default:
6777 abort ();
6778 }
6779 fputs (suffix, file);
6780}
6781
e075ae69
RH
6782void
6783print_reg (x, code, file)
6784 rtx x;
6785 int code;
6786 FILE *file;
e5cb57e8 6787{
e075ae69 6788 if (REGNO (x) == ARG_POINTER_REGNUM
564d80f4 6789 || REGNO (x) == FRAME_POINTER_REGNUM
e075ae69
RH
6790 || REGNO (x) == FLAGS_REG
6791 || REGNO (x) == FPSR_REG)
6792 abort ();
e9a25f70 6793
5bf0ebab 6794 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
e075ae69
RH
6795 putc ('%', file);
6796
ef6257cd 6797 if (code == 'w' || MMX_REG_P (x))
e075ae69
RH
6798 code = 2;
6799 else if (code == 'b')
6800 code = 1;
6801 else if (code == 'k')
6802 code = 4;
3f3f2124
JH
6803 else if (code == 'q')
6804 code = 8;
e075ae69
RH
6805 else if (code == 'y')
6806 code = 3;
6807 else if (code == 'h')
6808 code = 0;
6809 else
6810 code = GET_MODE_SIZE (GET_MODE (x));
e9a25f70 6811
3f3f2124
JH
6812 /* Irritatingly, AMD extended registers use different naming convention
6813 from the normal registers. */
6814 if (REX_INT_REG_P (x))
6815 {
885a70fd
JH
6816 if (!TARGET_64BIT)
6817 abort ();
3f3f2124
JH
6818 switch (code)
6819 {
ef6257cd 6820 case 0:
c725bd79 6821 error ("extended registers have no high halves");
3f3f2124
JH
6822 break;
6823 case 1:
6824 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6825 break;
6826 case 2:
6827 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6828 break;
6829 case 4:
6830 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6831 break;
6832 case 8:
6833 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6834 break;
6835 default:
c725bd79 6836 error ("unsupported operand size for extended register");
3f3f2124
JH
6837 break;
6838 }
6839 return;
6840 }
e075ae69
RH
6841 switch (code)
6842 {
6843 case 3:
6844 if (STACK_TOP_P (x))
6845 {
6846 fputs ("st(0)", file);
6847 break;
6848 }
6849 /* FALLTHRU */
e075ae69 6850 case 8:
3f3f2124 6851 case 4:
e075ae69 6852 case 12:
446988df 6853 if (! ANY_FP_REG_P (x))
885a70fd 6854 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
e075ae69 6855 /* FALLTHRU */
a7180f70 6856 case 16:
e075ae69
RH
6857 case 2:
6858 fputs (hi_reg_name[REGNO (x)], file);
6859 break;
6860 case 1:
6861 fputs (qi_reg_name[REGNO (x)], file);
6862 break;
6863 case 0:
6864 fputs (qi_high_reg_name[REGNO (x)], file);
6865 break;
6866 default:
6867 abort ();
fe25fea3 6868 }
e5cb57e8
SC
6869}
6870
f996902d
RH
6871/* Locate some local-dynamic symbol still in use by this function
6872 so that we can print its name in some tls_local_dynamic_base
6873 pattern. */
6874
6875static const char *
6876get_some_local_dynamic_name ()
6877{
6878 rtx insn;
6879
6880 if (cfun->machine->some_ld_name)
6881 return cfun->machine->some_ld_name;
6882
6883 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6884 if (INSN_P (insn)
6885 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6886 return cfun->machine->some_ld_name;
6887
6888 abort ();
6889}
6890
6891static int
6892get_some_local_dynamic_name_1 (px, data)
6893 rtx *px;
6894 void *data ATTRIBUTE_UNUSED;
6895{
6896 rtx x = *px;
6897
6898 if (GET_CODE (x) == SYMBOL_REF
6899 && local_dynamic_symbolic_operand (x, Pmode))
6900 {
6901 cfun->machine->some_ld_name = XSTR (x, 0);
6902 return 1;
6903 }
6904
6905 return 0;
6906}
6907
2a2ab3f9 6908/* Meaning of CODE:
fe25fea3 6909 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
e5cb57e8 6910 C -- print opcode suffix for set/cmov insn.
fe25fea3 6911 c -- like C, but print reversed condition
ef6257cd 6912 F,f -- likewise, but for floating-point.
048b1c95
JJ
6913 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6914 nothing
2a2ab3f9
JVA
6915 R -- print the prefix for register names.
6916 z -- print the opcode suffix for the size of the current operand.
6917 * -- print a star (in certain assembler syntax)
fb204271 6918 A -- print an absolute memory reference.
2a2ab3f9 6919 w -- print the operand as if it's a "word" (HImode) even if it isn't.
2d49677f
SC
6920 s -- print a shift double count, followed by the assemblers argument
6921 delimiter.
fe25fea3
SC
6922 b -- print the QImode name of the register for the indicated operand.
6923 %b0 would print %al if operands[0] is reg 0.
6924 w -- likewise, print the HImode name of the register.
6925 k -- likewise, print the SImode name of the register.
3f3f2124 6926 q -- likewise, print the DImode name of the register.
ef6257cd
JH
6927 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6928 y -- print "st(0)" instead of "st" as a register.
a46d1d38 6929 D -- print condition for SSE cmp instruction.
ef6257cd
JH
6930 P -- if PIC, print an @PLT suffix.
6931 X -- don't print any sort of PIC '@' suffix for a symbol.
f996902d 6932 & -- print some in-use local-dynamic symbol name.
a46d1d38 6933 */
2a2ab3f9
JVA
6934
6935void
6936print_operand (file, x, code)
6937 FILE *file;
6938 rtx x;
6939 int code;
6940{
6941 if (code)
6942 {
6943 switch (code)
6944 {
6945 case '*':
80f33d06 6946 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9
JVA
6947 putc ('*', file);
6948 return;
6949
f996902d
RH
6950 case '&':
6951 assemble_name (file, get_some_local_dynamic_name ());
6952 return;
6953
fb204271 6954 case 'A':
80f33d06 6955 if (ASSEMBLER_DIALECT == ASM_ATT)
fb204271 6956 putc ('*', file);
80f33d06 6957 else if (ASSEMBLER_DIALECT == ASM_INTEL)
fb204271
DN
6958 {
6959 /* Intel syntax. For absolute addresses, registers should not
6960 be surrounded by braces. */
6961 if (GET_CODE (x) != REG)
6962 {
6963 putc ('[', file);
6964 PRINT_OPERAND (file, x, 0);
6965 putc (']', file);
6966 return;
6967 }
6968 }
80f33d06
GS
6969 else
6970 abort ();
fb204271
DN
6971
6972 PRINT_OPERAND (file, x, 0);
6973 return;
6974
6975
2a2ab3f9 6976 case 'L':
80f33d06 6977 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6978 putc ('l', file);
2a2ab3f9
JVA
6979 return;
6980
6981 case 'W':
80f33d06 6982 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6983 putc ('w', file);
2a2ab3f9
JVA
6984 return;
6985
6986 case 'B':
80f33d06 6987 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6988 putc ('b', file);
2a2ab3f9
JVA
6989 return;
6990
6991 case 'Q':
80f33d06 6992 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6993 putc ('l', file);
2a2ab3f9
JVA
6994 return;
6995
6996 case 'S':
80f33d06 6997 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6998 putc ('s', file);
2a2ab3f9
JVA
6999 return;
7000
5f1ec3e6 7001 case 'T':
80f33d06 7002 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7003 putc ('t', file);
5f1ec3e6
JVA
7004 return;
7005
2a2ab3f9
JVA
7006 case 'z':
7007 /* 387 opcodes don't get size suffixes if the operands are
0f290768 7008 registers. */
2a2ab3f9
JVA
7009 if (STACK_REG_P (x))
7010 return;
7011
831c4e87
KC
7012 /* Likewise if using Intel opcodes. */
7013 if (ASSEMBLER_DIALECT == ASM_INTEL)
7014 return;
7015
7016 /* This is the size of op from size of operand. */
2a2ab3f9
JVA
7017 switch (GET_MODE_SIZE (GET_MODE (x)))
7018 {
2a2ab3f9 7019 case 2:
155d8a47
JW
7020#ifdef HAVE_GAS_FILDS_FISTS
7021 putc ('s', file);
7022#endif
2a2ab3f9
JVA
7023 return;
7024
7025 case 4:
7026 if (GET_MODE (x) == SFmode)
7027 {
e075ae69 7028 putc ('s', file);
2a2ab3f9
JVA
7029 return;
7030 }
7031 else
e075ae69 7032 putc ('l', file);
2a2ab3f9
JVA
7033 return;
7034
5f1ec3e6 7035 case 12:
2b589241 7036 case 16:
e075ae69
RH
7037 putc ('t', file);
7038 return;
5f1ec3e6 7039
2a2ab3f9
JVA
7040 case 8:
7041 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
56c0e8fa
JVA
7042 {
7043#ifdef GAS_MNEMONICS
e075ae69 7044 putc ('q', file);
56c0e8fa 7045#else
e075ae69
RH
7046 putc ('l', file);
7047 putc ('l', file);
56c0e8fa
JVA
7048#endif
7049 }
e075ae69
RH
7050 else
7051 putc ('l', file);
2a2ab3f9 7052 return;
155d8a47
JW
7053
7054 default:
7055 abort ();
2a2ab3f9 7056 }
4af3895e
JVA
7057
7058 case 'b':
7059 case 'w':
7060 case 'k':
3f3f2124 7061 case 'q':
4af3895e
JVA
7062 case 'h':
7063 case 'y':
5cb6195d 7064 case 'X':
e075ae69 7065 case 'P':
4af3895e
JVA
7066 break;
7067
2d49677f
SC
7068 case 's':
7069 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7070 {
7071 PRINT_OPERAND (file, x, 0);
e075ae69 7072 putc (',', file);
2d49677f 7073 }
a269a03c
JC
7074 return;
7075
a46d1d38
JH
7076 case 'D':
7077 /* Little bit of braindamage here. The SSE compare instructions
7078 does use completely different names for the comparisons that the
7079 fp conditional moves. */
7080 switch (GET_CODE (x))
7081 {
7082 case EQ:
7083 case UNEQ:
7084 fputs ("eq", file);
7085 break;
7086 case LT:
7087 case UNLT:
7088 fputs ("lt", file);
7089 break;
7090 case LE:
7091 case UNLE:
7092 fputs ("le", file);
7093 break;
7094 case UNORDERED:
7095 fputs ("unord", file);
7096 break;
7097 case NE:
7098 case LTGT:
7099 fputs ("neq", file);
7100 break;
7101 case UNGE:
7102 case GE:
7103 fputs ("nlt", file);
7104 break;
7105 case UNGT:
7106 case GT:
7107 fputs ("nle", file);
7108 break;
7109 case ORDERED:
7110 fputs ("ord", file);
7111 break;
7112 default:
7113 abort ();
7114 break;
7115 }
7116 return;
048b1c95
JJ
7117 case 'O':
7118#ifdef CMOV_SUN_AS_SYNTAX
7119 if (ASSEMBLER_DIALECT == ASM_ATT)
7120 {
7121 switch (GET_MODE (x))
7122 {
7123 case HImode: putc ('w', file); break;
7124 case SImode:
7125 case SFmode: putc ('l', file); break;
7126 case DImode:
7127 case DFmode: putc ('q', file); break;
7128 default: abort ();
7129 }
7130 putc ('.', file);
7131 }
7132#endif
7133 return;
1853aadd 7134 case 'C':
e075ae69 7135 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
1853aadd 7136 return;
fe25fea3 7137 case 'F':
048b1c95
JJ
7138#ifdef CMOV_SUN_AS_SYNTAX
7139 if (ASSEMBLER_DIALECT == ASM_ATT)
7140 putc ('.', file);
7141#endif
e075ae69 7142 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
fe25fea3
SC
7143 return;
7144
e9a25f70 7145 /* Like above, but reverse condition */
e075ae69 7146 case 'c':
fce5a9f2 7147 /* Check to see if argument to %c is really a constant
c1d5afc4
CR
7148 and not a condition code which needs to be reversed. */
7149 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7150 {
7151 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7152 return;
7153 }
e075ae69
RH
7154 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7155 return;
fe25fea3 7156 case 'f':
048b1c95
JJ
7157#ifdef CMOV_SUN_AS_SYNTAX
7158 if (ASSEMBLER_DIALECT == ASM_ATT)
7159 putc ('.', file);
7160#endif
e075ae69 7161 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
1853aadd 7162 return;
ef6257cd
JH
7163 case '+':
7164 {
7165 rtx x;
e5cb57e8 7166
ef6257cd
JH
7167 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7168 return;
a4f31c00 7169
ef6257cd
JH
7170 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7171 if (x)
7172 {
7173 int pred_val = INTVAL (XEXP (x, 0));
7174
7175 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7176 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7177 {
7178 int taken = pred_val > REG_BR_PROB_BASE / 2;
7179 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7180
7181 /* Emit hints only in the case default branch prediction
d1f87653 7182 heuristics would fail. */
ef6257cd
JH
7183 if (taken != cputaken)
7184 {
7185 /* We use 3e (DS) prefix for taken branches and
7186 2e (CS) prefix for not taken branches. */
7187 if (taken)
7188 fputs ("ds ; ", file);
7189 else
7190 fputs ("cs ; ", file);
7191 }
7192 }
7193 }
7194 return;
7195 }
4af3895e 7196 default:
a52453cc 7197 output_operand_lossage ("invalid operand code `%c'", code);
2a2ab3f9
JVA
7198 }
7199 }
e9a25f70 7200
2a2ab3f9
JVA
7201 if (GET_CODE (x) == REG)
7202 {
7203 PRINT_REG (x, code, file);
7204 }
e9a25f70 7205
2a2ab3f9
JVA
7206 else if (GET_CODE (x) == MEM)
7207 {
e075ae69 7208 /* No `byte ptr' prefix for call instructions. */
80f33d06 7209 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
2a2ab3f9 7210 {
69ddee61 7211 const char * size;
e075ae69
RH
7212 switch (GET_MODE_SIZE (GET_MODE (x)))
7213 {
7214 case 1: size = "BYTE"; break;
7215 case 2: size = "WORD"; break;
7216 case 4: size = "DWORD"; break;
7217 case 8: size = "QWORD"; break;
7218 case 12: size = "XWORD"; break;
a7180f70 7219 case 16: size = "XMMWORD"; break;
e075ae69 7220 default:
564d80f4 7221 abort ();
e075ae69 7222 }
fb204271
DN
7223
7224 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7225 if (code == 'b')
7226 size = "BYTE";
7227 else if (code == 'w')
7228 size = "WORD";
7229 else if (code == 'k')
7230 size = "DWORD";
7231
e075ae69
RH
7232 fputs (size, file);
7233 fputs (" PTR ", file);
2a2ab3f9 7234 }
e075ae69
RH
7235
7236 x = XEXP (x, 0);
7237 if (flag_pic && CONSTANT_ADDRESS_P (x))
7238 output_pic_addr_const (file, x, code);
0d7d98ee 7239 /* Avoid (%rip) for call operands. */
5bf0ebab 7240 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
0d7d98ee
JH
7241 && GET_CODE (x) != CONST_INT)
7242 output_addr_const (file, x);
c8b94768
RH
7243 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7244 output_operand_lossage ("invalid constraints for operand");
2a2ab3f9 7245 else
e075ae69 7246 output_address (x);
2a2ab3f9 7247 }
e9a25f70 7248
2a2ab3f9
JVA
7249 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7250 {
e9a25f70
JL
7251 REAL_VALUE_TYPE r;
7252 long l;
7253
5f1ec3e6
JVA
7254 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7255 REAL_VALUE_TO_TARGET_SINGLE (r, l);
e075ae69 7256
80f33d06 7257 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7258 putc ('$', file);
52267fcb 7259 fprintf (file, "0x%lx", l);
5f1ec3e6 7260 }
e9a25f70 7261
0f290768 7262 /* These float cases don't actually occur as immediate operands. */
5f1ec3e6
JVA
7263 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7264 {
e9a25f70
JL
7265 char dstr[30];
7266
da6eec72 7267 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
5f1ec3e6 7268 fprintf (file, "%s", dstr);
2a2ab3f9 7269 }
e9a25f70 7270
2b589241
JH
7271 else if (GET_CODE (x) == CONST_DOUBLE
7272 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
2a2ab3f9 7273 {
e9a25f70
JL
7274 char dstr[30];
7275
da6eec72 7276 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
5f1ec3e6 7277 fprintf (file, "%s", dstr);
2a2ab3f9 7278 }
f996902d 7279
79325812 7280 else
2a2ab3f9 7281 {
4af3895e 7282 if (code != 'P')
2a2ab3f9 7283 {
695dac07 7284 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
e075ae69 7285 {
80f33d06 7286 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
7287 putc ('$', file);
7288 }
2a2ab3f9
JVA
7289 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7290 || GET_CODE (x) == LABEL_REF)
e075ae69 7291 {
80f33d06 7292 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
7293 putc ('$', file);
7294 else
7295 fputs ("OFFSET FLAT:", file);
7296 }
2a2ab3f9 7297 }
e075ae69
RH
7298 if (GET_CODE (x) == CONST_INT)
7299 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7300 else if (flag_pic)
2a2ab3f9
JVA
7301 output_pic_addr_const (file, x, code);
7302 else
7303 output_addr_const (file, x);
7304 }
7305}
7306\f
7307/* Print a memory operand whose address is ADDR. */
7308
7309void
7310print_operand_address (file, addr)
7311 FILE *file;
7312 register rtx addr;
7313{
e075ae69
RH
7314 struct ix86_address parts;
7315 rtx base, index, disp;
7316 int scale;
e9a25f70 7317
9e20be0c
JJ
7318 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
7319 {
7320 if (ASSEMBLER_DIALECT == ASM_INTEL)
7321 fputs ("DWORD PTR ", file);
7322 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7323 putc ('%', file);
75d38379
JJ
7324 if (TARGET_64BIT)
7325 fputs ("fs:0", file);
7326 else
7327 fputs ("gs:0", file);
9e20be0c
JJ
7328 return;
7329 }
7330
e075ae69
RH
7331 if (! ix86_decompose_address (addr, &parts))
7332 abort ();
e9a25f70 7333
e075ae69
RH
7334 base = parts.base;
7335 index = parts.index;
7336 disp = parts.disp;
7337 scale = parts.scale;
e9a25f70 7338
e075ae69
RH
7339 if (!base && !index)
7340 {
7341 /* Displacement only requires special attention. */
e9a25f70 7342
e075ae69 7343 if (GET_CODE (disp) == CONST_INT)
2a2ab3f9 7344 {
80f33d06 7345 if (ASSEMBLER_DIALECT == ASM_INTEL)
fb204271
DN
7346 {
7347 if (USER_LABEL_PREFIX[0] == 0)
7348 putc ('%', file);
7349 fputs ("ds:", file);
7350 }
e075ae69 7351 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
2a2ab3f9 7352 }
e075ae69
RH
7353 else if (flag_pic)
7354 output_pic_addr_const (file, addr, 0);
7355 else
7356 output_addr_const (file, addr);
0d7d98ee
JH
7357
7358 /* Use one byte shorter RIP relative addressing for 64bit mode. */
edfe8595 7359 if (TARGET_64BIT
75d38379
JJ
7360 && ((GET_CODE (addr) == SYMBOL_REF
7361 && ! tls_symbolic_operand (addr, GET_MODE (addr)))
edfe8595
RH
7362 || GET_CODE (addr) == LABEL_REF
7363 || (GET_CODE (addr) == CONST
7364 && GET_CODE (XEXP (addr, 0)) == PLUS
200bcf7e
JH
7365 && (GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
7366 || GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF)
edfe8595 7367 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
0d7d98ee 7368 fputs ("(%rip)", file);
e075ae69
RH
7369 }
7370 else
7371 {
80f33d06 7372 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9 7373 {
e075ae69 7374 if (disp)
2a2ab3f9 7375 {
c399861d 7376 if (flag_pic)
e075ae69
RH
7377 output_pic_addr_const (file, disp, 0);
7378 else if (GET_CODE (disp) == LABEL_REF)
7379 output_asm_label (disp);
2a2ab3f9 7380 else
e075ae69 7381 output_addr_const (file, disp);
2a2ab3f9
JVA
7382 }
7383
e075ae69
RH
7384 putc ('(', file);
7385 if (base)
7386 PRINT_REG (base, 0, file);
7387 if (index)
2a2ab3f9 7388 {
e075ae69
RH
7389 putc (',', file);
7390 PRINT_REG (index, 0, file);
7391 if (scale != 1)
7392 fprintf (file, ",%d", scale);
2a2ab3f9 7393 }
e075ae69 7394 putc (')', file);
2a2ab3f9 7395 }
2a2ab3f9
JVA
7396 else
7397 {
e075ae69 7398 rtx offset = NULL_RTX;
e9a25f70 7399
e075ae69
RH
7400 if (disp)
7401 {
7402 /* Pull out the offset of a symbol; print any symbol itself. */
7403 if (GET_CODE (disp) == CONST
7404 && GET_CODE (XEXP (disp, 0)) == PLUS
7405 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7406 {
7407 offset = XEXP (XEXP (disp, 0), 1);
7408 disp = gen_rtx_CONST (VOIDmode,
7409 XEXP (XEXP (disp, 0), 0));
7410 }
ce193852 7411
e075ae69
RH
7412 if (flag_pic)
7413 output_pic_addr_const (file, disp, 0);
7414 else if (GET_CODE (disp) == LABEL_REF)
7415 output_asm_label (disp);
7416 else if (GET_CODE (disp) == CONST_INT)
7417 offset = disp;
7418 else
7419 output_addr_const (file, disp);
7420 }
e9a25f70 7421
e075ae69
RH
7422 putc ('[', file);
7423 if (base)
a8620236 7424 {
e075ae69
RH
7425 PRINT_REG (base, 0, file);
7426 if (offset)
7427 {
7428 if (INTVAL (offset) >= 0)
7429 putc ('+', file);
7430 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7431 }
a8620236 7432 }
e075ae69
RH
7433 else if (offset)
7434 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
2247a58c 7435 else
e075ae69 7436 putc ('0', file);
e9a25f70 7437
e075ae69
RH
7438 if (index)
7439 {
7440 putc ('+', file);
7441 PRINT_REG (index, 0, file);
7442 if (scale != 1)
7443 fprintf (file, "*%d", scale);
7444 }
7445 putc (']', file);
7446 }
2a2ab3f9
JVA
7447 }
7448}
f996902d
RH
7449
7450bool
7451output_addr_const_extra (file, x)
7452 FILE *file;
7453 rtx x;
7454{
7455 rtx op;
7456
7457 if (GET_CODE (x) != UNSPEC)
7458 return false;
7459
7460 op = XVECEXP (x, 0, 0);
7461 switch (XINT (x, 1))
7462 {
7463 case UNSPEC_GOTTPOFF:
7464 output_addr_const (file, op);
dea73790 7465 /* FIXME: This might be @TPOFF in Sun ld. */
f996902d
RH
7466 fputs ("@GOTTPOFF", file);
7467 break;
7468 case UNSPEC_TPOFF:
7469 output_addr_const (file, op);
7470 fputs ("@TPOFF", file);
7471 break;
7472 case UNSPEC_NTPOFF:
7473 output_addr_const (file, op);
75d38379
JJ
7474 if (TARGET_64BIT)
7475 fputs ("@TPOFF", file);
7476 else
7477 fputs ("@NTPOFF", file);
f996902d
RH
7478 break;
7479 case UNSPEC_DTPOFF:
7480 output_addr_const (file, op);
7481 fputs ("@DTPOFF", file);
7482 break;
dea73790
JJ
7483 case UNSPEC_GOTNTPOFF:
7484 output_addr_const (file, op);
75d38379
JJ
7485 if (TARGET_64BIT)
7486 fputs ("@GOTTPOFF(%rip)", file);
7487 else
7488 fputs ("@GOTNTPOFF", file);
dea73790
JJ
7489 break;
7490 case UNSPEC_INDNTPOFF:
7491 output_addr_const (file, op);
7492 fputs ("@INDNTPOFF", file);
7493 break;
f996902d
RH
7494
7495 default:
7496 return false;
7497 }
7498
7499 return true;
7500}
2a2ab3f9
JVA
7501\f
7502/* Split one or more DImode RTL references into pairs of SImode
7503 references. The RTL can be REG, offsettable MEM, integer constant, or
7504 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7505 split and "num" is its length. lo_half and hi_half are output arrays
0f290768 7506 that parallel "operands". */
2a2ab3f9
JVA
7507
7508void
7509split_di (operands, num, lo_half, hi_half)
7510 rtx operands[];
7511 int num;
7512 rtx lo_half[], hi_half[];
7513{
7514 while (num--)
7515 {
57dbca5e 7516 rtx op = operands[num];
b932f770
JH
7517
7518 /* simplify_subreg refuse to split volatile memory addresses,
7519 but we still have to handle it. */
7520 if (GET_CODE (op) == MEM)
2a2ab3f9 7521 {
f4ef873c 7522 lo_half[num] = adjust_address (op, SImode, 0);
b72f00af 7523 hi_half[num] = adjust_address (op, SImode, 4);
2a2ab3f9
JVA
7524 }
7525 else
b932f770 7526 {
38ca929b
JH
7527 lo_half[num] = simplify_gen_subreg (SImode, op,
7528 GET_MODE (op) == VOIDmode
7529 ? DImode : GET_MODE (op), 0);
7530 hi_half[num] = simplify_gen_subreg (SImode, op,
7531 GET_MODE (op) == VOIDmode
7532 ? DImode : GET_MODE (op), 4);
b932f770 7533 }
2a2ab3f9
JVA
7534 }
7535}
44cf5b6a
JH
7536/* Split one or more TImode RTL references into pairs of SImode
7537 references. The RTL can be REG, offsettable MEM, integer constant, or
7538 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7539 split and "num" is its length. lo_half and hi_half are output arrays
7540 that parallel "operands". */
7541
7542void
7543split_ti (operands, num, lo_half, hi_half)
7544 rtx operands[];
7545 int num;
7546 rtx lo_half[], hi_half[];
7547{
7548 while (num--)
7549 {
7550 rtx op = operands[num];
b932f770
JH
7551
7552 /* simplify_subreg refuse to split volatile memory addresses, but we
7553 still have to handle it. */
7554 if (GET_CODE (op) == MEM)
44cf5b6a
JH
7555 {
7556 lo_half[num] = adjust_address (op, DImode, 0);
7557 hi_half[num] = adjust_address (op, DImode, 8);
7558 }
7559 else
b932f770
JH
7560 {
7561 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7562 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7563 }
44cf5b6a
JH
7564 }
7565}
2a2ab3f9 7566\f
2a2ab3f9
JVA
7567/* Output code to perform a 387 binary operation in INSN, one of PLUS,
7568 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7569 is the expression of the binary operation. The output may either be
7570 emitted here, or returned to the caller, like all output_* functions.
7571
7572 There is no guarantee that the operands are the same mode, as they
0f290768 7573 might be within FLOAT or FLOAT_EXTEND expressions. */
2a2ab3f9 7574
e3c2afab
AM
7575#ifndef SYSV386_COMPAT
7576/* Set to 1 for compatibility with brain-damaged assemblers. No-one
7577 wants to fix the assemblers because that causes incompatibility
7578 with gcc. No-one wants to fix gcc because that causes
7579 incompatibility with assemblers... You can use the option of
7580 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7581#define SYSV386_COMPAT 1
7582#endif
7583
69ddee61 7584const char *
2a2ab3f9
JVA
7585output_387_binary_op (insn, operands)
7586 rtx insn;
7587 rtx *operands;
7588{
e3c2afab 7589 static char buf[30];
69ddee61 7590 const char *p;
1deaa899
JH
7591 const char *ssep;
7592 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
2a2ab3f9 7593
e3c2afab
AM
7594#ifdef ENABLE_CHECKING
7595 /* Even if we do not want to check the inputs, this documents input
7596 constraints. Which helps in understanding the following code. */
7597 if (STACK_REG_P (operands[0])
7598 && ((REG_P (operands[1])
7599 && REGNO (operands[0]) == REGNO (operands[1])
7600 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7601 || (REG_P (operands[2])
7602 && REGNO (operands[0]) == REGNO (operands[2])
7603 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7604 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7605 ; /* ok */
1deaa899 7606 else if (!is_sse)
e3c2afab
AM
7607 abort ();
7608#endif
7609
2a2ab3f9
JVA
7610 switch (GET_CODE (operands[3]))
7611 {
7612 case PLUS:
e075ae69
RH
7613 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7614 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7615 p = "fiadd";
7616 else
7617 p = "fadd";
1deaa899 7618 ssep = "add";
2a2ab3f9
JVA
7619 break;
7620
7621 case MINUS:
e075ae69
RH
7622 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7623 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7624 p = "fisub";
7625 else
7626 p = "fsub";
1deaa899 7627 ssep = "sub";
2a2ab3f9
JVA
7628 break;
7629
7630 case MULT:
e075ae69
RH
7631 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7632 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7633 p = "fimul";
7634 else
7635 p = "fmul";
1deaa899 7636 ssep = "mul";
2a2ab3f9
JVA
7637 break;
7638
7639 case DIV:
e075ae69
RH
7640 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7641 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7642 p = "fidiv";
7643 else
7644 p = "fdiv";
1deaa899 7645 ssep = "div";
2a2ab3f9
JVA
7646 break;
7647
7648 default:
7649 abort ();
7650 }
7651
1deaa899
JH
7652 if (is_sse)
7653 {
7654 strcpy (buf, ssep);
7655 if (GET_MODE (operands[0]) == SFmode)
7656 strcat (buf, "ss\t{%2, %0|%0, %2}");
7657 else
7658 strcat (buf, "sd\t{%2, %0|%0, %2}");
7659 return buf;
7660 }
e075ae69 7661 strcpy (buf, p);
2a2ab3f9
JVA
7662
7663 switch (GET_CODE (operands[3]))
7664 {
7665 case MULT:
7666 case PLUS:
7667 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7668 {
e3c2afab 7669 rtx temp = operands[2];
2a2ab3f9
JVA
7670 operands[2] = operands[1];
7671 operands[1] = temp;
7672 }
7673
e3c2afab
AM
7674 /* know operands[0] == operands[1]. */
7675
2a2ab3f9 7676 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
7677 {
7678 p = "%z2\t%2";
7679 break;
7680 }
2a2ab3f9
JVA
7681
7682 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63
JL
7683 {
7684 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
7685 /* How is it that we are storing to a dead operand[2]?
7686 Well, presumably operands[1] is dead too. We can't
7687 store the result to st(0) as st(0) gets popped on this
7688 instruction. Instead store to operands[2] (which I
7689 think has to be st(1)). st(1) will be popped later.
7690 gcc <= 2.8.1 didn't have this check and generated
7691 assembly code that the Unixware assembler rejected. */
7692 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 7693 else
e3c2afab 7694 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
e075ae69 7695 break;
6b28fd63 7696 }
2a2ab3f9
JVA
7697
7698 if (STACK_TOP_P (operands[0]))
e3c2afab 7699 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 7700 else
e3c2afab 7701 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
e075ae69 7702 break;
2a2ab3f9
JVA
7703
7704 case MINUS:
7705 case DIV:
7706 if (GET_CODE (operands[1]) == MEM)
e075ae69
RH
7707 {
7708 p = "r%z1\t%1";
7709 break;
7710 }
2a2ab3f9
JVA
7711
7712 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
7713 {
7714 p = "%z2\t%2";
7715 break;
7716 }
2a2ab3f9 7717
2a2ab3f9 7718 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63 7719 {
e3c2afab
AM
7720#if SYSV386_COMPAT
7721 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7722 derived assemblers, confusingly reverse the direction of
7723 the operation for fsub{r} and fdiv{r} when the
7724 destination register is not st(0). The Intel assembler
7725 doesn't have this brain damage. Read !SYSV386_COMPAT to
7726 figure out what the hardware really does. */
7727 if (STACK_TOP_P (operands[0]))
7728 p = "{p\t%0, %2|rp\t%2, %0}";
7729 else
7730 p = "{rp\t%2, %0|p\t%0, %2}";
7731#else
6b28fd63 7732 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
7733 /* As above for fmul/fadd, we can't store to st(0). */
7734 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 7735 else
e3c2afab
AM
7736 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7737#endif
e075ae69 7738 break;
6b28fd63 7739 }
2a2ab3f9
JVA
7740
7741 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6b28fd63 7742 {
e3c2afab 7743#if SYSV386_COMPAT
6b28fd63 7744 if (STACK_TOP_P (operands[0]))
e3c2afab 7745 p = "{rp\t%0, %1|p\t%1, %0}";
6b28fd63 7746 else
e3c2afab
AM
7747 p = "{p\t%1, %0|rp\t%0, %1}";
7748#else
7749 if (STACK_TOP_P (operands[0]))
7750 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7751 else
7752 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7753#endif
e075ae69 7754 break;
6b28fd63 7755 }
2a2ab3f9
JVA
7756
7757 if (STACK_TOP_P (operands[0]))
7758 {
7759 if (STACK_TOP_P (operands[1]))
e3c2afab 7760 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 7761 else
e3c2afab 7762 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
e075ae69 7763 break;
2a2ab3f9
JVA
7764 }
7765 else if (STACK_TOP_P (operands[1]))
e3c2afab
AM
7766 {
7767#if SYSV386_COMPAT
7768 p = "{\t%1, %0|r\t%0, %1}";
7769#else
7770 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7771#endif
7772 }
2a2ab3f9 7773 else
e3c2afab
AM
7774 {
7775#if SYSV386_COMPAT
7776 p = "{r\t%2, %0|\t%0, %2}";
7777#else
7778 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7779#endif
7780 }
e075ae69 7781 break;
2a2ab3f9
JVA
7782
7783 default:
7784 abort ();
7785 }
e075ae69
RH
7786
7787 strcat (buf, p);
7788 return buf;
2a2ab3f9 7789}
e075ae69 7790
a4f31c00 7791/* Output code to initialize control word copies used by
7a2e09f4
JH
7792 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7793 is set to control word rounding downwards. */
7794void
7795emit_i387_cw_initialization (normal, round_down)
7796 rtx normal, round_down;
7797{
7798 rtx reg = gen_reg_rtx (HImode);
7799
7800 emit_insn (gen_x86_fnstcw_1 (normal));
7801 emit_move_insn (reg, normal);
7802 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7803 && !TARGET_64BIT)
7804 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7805 else
7806 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7807 emit_move_insn (round_down, reg);
7808}
7809
2a2ab3f9 7810/* Output code for INSN to convert a float to a signed int. OPERANDS
46d21d2c 7811 are the insn operands. The output may be [HSD]Imode and the input
e075ae69 7812 operand may be [SDX]Fmode. */
2a2ab3f9 7813
69ddee61 7814const char *
2a2ab3f9
JVA
7815output_fix_trunc (insn, operands)
7816 rtx insn;
7817 rtx *operands;
7818{
7819 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
e075ae69 7820 int dimode_p = GET_MODE (operands[0]) == DImode;
2a2ab3f9 7821
e075ae69
RH
7822 /* Jump through a hoop or two for DImode, since the hardware has no
7823 non-popping instruction. We used to do this a different way, but
7824 that was somewhat fragile and broke with post-reload splitters. */
a05924f9
JH
7825 if (dimode_p && !stack_top_dies)
7826 output_asm_insn ("fld\t%y1", operands);
e075ae69 7827
7a2e09f4 7828 if (!STACK_TOP_P (operands[1]))
10195bd8
JW
7829 abort ();
7830
e075ae69 7831 if (GET_CODE (operands[0]) != MEM)
7a2e09f4 7832 abort ();
e9a25f70 7833
7a2e09f4 7834 output_asm_insn ("fldcw\t%3", operands);
e075ae69 7835 if (stack_top_dies || dimode_p)
7a2e09f4 7836 output_asm_insn ("fistp%z0\t%0", operands);
10195bd8 7837 else
7a2e09f4 7838 output_asm_insn ("fist%z0\t%0", operands);
e075ae69 7839 output_asm_insn ("fldcw\t%2", operands);
10195bd8 7840
e075ae69 7841 return "";
2a2ab3f9 7842}
cda749b1 7843
e075ae69
RH
7844/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7845 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7846 when fucom should be used. */
7847
69ddee61 7848const char *
e075ae69 7849output_fp_compare (insn, operands, eflags_p, unordered_p)
cda749b1
JW
7850 rtx insn;
7851 rtx *operands;
e075ae69 7852 int eflags_p, unordered_p;
cda749b1 7853{
e075ae69
RH
7854 int stack_top_dies;
7855 rtx cmp_op0 = operands[0];
7856 rtx cmp_op1 = operands[1];
0644b628 7857 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
e075ae69
RH
7858
7859 if (eflags_p == 2)
7860 {
7861 cmp_op0 = cmp_op1;
7862 cmp_op1 = operands[2];
7863 }
0644b628
JH
7864 if (is_sse)
7865 {
7866 if (GET_MODE (operands[0]) == SFmode)
7867 if (unordered_p)
7868 return "ucomiss\t{%1, %0|%0, %1}";
7869 else
a5cf80f0 7870 return "comiss\t{%1, %0|%0, %1}";
0644b628
JH
7871 else
7872 if (unordered_p)
7873 return "ucomisd\t{%1, %0|%0, %1}";
7874 else
a5cf80f0 7875 return "comisd\t{%1, %0|%0, %1}";
0644b628 7876 }
cda749b1 7877
e075ae69 7878 if (! STACK_TOP_P (cmp_op0))
cda749b1
JW
7879 abort ();
7880
e075ae69 7881 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
cda749b1 7882
e075ae69
RH
7883 if (STACK_REG_P (cmp_op1)
7884 && stack_top_dies
7885 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7886 && REGNO (cmp_op1) != FIRST_STACK_REG)
cda749b1 7887 {
e075ae69
RH
7888 /* If both the top of the 387 stack dies, and the other operand
7889 is also a stack register that dies, then this must be a
7890 `fcompp' float compare */
7891
7892 if (eflags_p == 1)
7893 {
7894 /* There is no double popping fcomi variant. Fortunately,
7895 eflags is immune from the fstp's cc clobbering. */
7896 if (unordered_p)
7897 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7898 else
7899 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7900 return "fstp\t%y0";
7901 }
7902 else
cda749b1 7903 {
e075ae69
RH
7904 if (eflags_p == 2)
7905 {
7906 if (unordered_p)
7907 return "fucompp\n\tfnstsw\t%0";
7908 else
7909 return "fcompp\n\tfnstsw\t%0";
7910 }
cda749b1
JW
7911 else
7912 {
e075ae69
RH
7913 if (unordered_p)
7914 return "fucompp";
7915 else
7916 return "fcompp";
cda749b1
JW
7917 }
7918 }
cda749b1
JW
7919 }
7920 else
7921 {
e075ae69 7922 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
cda749b1 7923
0f290768 7924 static const char * const alt[24] =
e075ae69
RH
7925 {
7926 "fcom%z1\t%y1",
7927 "fcomp%z1\t%y1",
7928 "fucom%z1\t%y1",
7929 "fucomp%z1\t%y1",
0f290768 7930
e075ae69
RH
7931 "ficom%z1\t%y1",
7932 "ficomp%z1\t%y1",
7933 NULL,
7934 NULL,
7935
7936 "fcomi\t{%y1, %0|%0, %y1}",
7937 "fcomip\t{%y1, %0|%0, %y1}",
7938 "fucomi\t{%y1, %0|%0, %y1}",
7939 "fucomip\t{%y1, %0|%0, %y1}",
7940
7941 NULL,
7942 NULL,
7943 NULL,
7944 NULL,
7945
7946 "fcom%z2\t%y2\n\tfnstsw\t%0",
7947 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7948 "fucom%z2\t%y2\n\tfnstsw\t%0",
7949 "fucomp%z2\t%y2\n\tfnstsw\t%0",
0f290768 7950
e075ae69
RH
7951 "ficom%z2\t%y2\n\tfnstsw\t%0",
7952 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7953 NULL,
7954 NULL
7955 };
7956
7957 int mask;
69ddee61 7958 const char *ret;
e075ae69
RH
7959
7960 mask = eflags_p << 3;
7961 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7962 mask |= unordered_p << 1;
7963 mask |= stack_top_dies;
7964
7965 if (mask >= 24)
7966 abort ();
7967 ret = alt[mask];
7968 if (ret == NULL)
7969 abort ();
cda749b1 7970
e075ae69 7971 return ret;
cda749b1
JW
7972 }
7973}
2a2ab3f9 7974
f88c65f7
RH
7975void
7976ix86_output_addr_vec_elt (file, value)
7977 FILE *file;
7978 int value;
7979{
7980 const char *directive = ASM_LONG;
7981
7982 if (TARGET_64BIT)
7983 {
7984#ifdef ASM_QUAD
7985 directive = ASM_QUAD;
7986#else
7987 abort ();
7988#endif
7989 }
7990
7991 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7992}
7993
7994void
7995ix86_output_addr_diff_elt (file, value, rel)
7996 FILE *file;
7997 int value, rel;
7998{
7999 if (TARGET_64BIT)
74411039 8000 fprintf (file, "%s%s%d-%s%d\n",
f88c65f7
RH
8001 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8002 else if (HAVE_AS_GOTOFF_IN_DATA)
8003 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
b069de3b
SS
8004#if TARGET_MACHO
8005 else if (TARGET_MACHO)
8006 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
8007 machopic_function_base_name () + 1);
8008#endif
f88c65f7 8009 else
5fc0e5df
KW
8010 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8011 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
f88c65f7 8012}
32b5b1aa 8013\f
a8bac9ab
RH
8014/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8015 for the target. */
8016
8017void
8018ix86_expand_clear (dest)
8019 rtx dest;
8020{
8021 rtx tmp;
8022
8023 /* We play register width games, which are only valid after reload. */
8024 if (!reload_completed)
8025 abort ();
8026
8027 /* Avoid HImode and its attendant prefix byte. */
8028 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8029 dest = gen_rtx_REG (SImode, REGNO (dest));
8030
8031 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8032
8033 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8034 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8035 {
8036 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8037 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8038 }
8039
8040 emit_insn (tmp);
8041}
8042
f996902d
RH
8043/* X is an unchanging MEM. If it is a constant pool reference, return
8044 the constant pool rtx, else NULL. */
8045
8046static rtx
8047maybe_get_pool_constant (x)
8048 rtx x;
8049{
8050 x = XEXP (x, 0);
8051
75d38379 8052 if (flag_pic && ! TARGET_64BIT)
f996902d
RH
8053 {
8054 if (GET_CODE (x) != PLUS)
8055 return NULL_RTX;
8056 if (XEXP (x, 0) != pic_offset_table_rtx)
8057 return NULL_RTX;
8058 x = XEXP (x, 1);
8059 if (GET_CODE (x) != CONST)
8060 return NULL_RTX;
8061 x = XEXP (x, 0);
8062 if (GET_CODE (x) != UNSPEC)
8063 return NULL_RTX;
8064 if (XINT (x, 1) != UNSPEC_GOTOFF)
8065 return NULL_RTX;
8066 x = XVECEXP (x, 0, 0);
8067 }
8068
8069 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8070 return get_pool_constant (x);
8071
8072 return NULL_RTX;
8073}
8074
79325812 8075void
e075ae69
RH
8076ix86_expand_move (mode, operands)
8077 enum machine_mode mode;
8078 rtx operands[];
32b5b1aa 8079{
e075ae69 8080 int strict = (reload_in_progress || reload_completed);
f996902d
RH
8081 rtx insn, op0, op1, tmp;
8082
8083 op0 = operands[0];
8084 op1 = operands[1];
8085
f996902d
RH
8086 if (tls_symbolic_operand (op1, Pmode))
8087 {
8088 op1 = legitimize_address (op1, op1, VOIDmode);
8089 if (GET_CODE (op0) == MEM)
8090 {
8091 tmp = gen_reg_rtx (mode);
8092 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
8093 op1 = tmp;
8094 }
8095 }
8096 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8097 {
b069de3b
SS
8098#if TARGET_MACHO
8099 if (MACHOPIC_PURE)
8100 {
8101 rtx temp = ((reload_in_progress
8102 || ((op0 && GET_CODE (op0) == REG)
8103 && mode == Pmode))
8104 ? op0 : gen_reg_rtx (Pmode));
8105 op1 = machopic_indirect_data_reference (op1, temp);
8106 op1 = machopic_legitimize_pic_address (op1, mode,
8107 temp == op1 ? 0 : temp);
8108 }
8109 else
8110 {
8111 if (MACHOPIC_INDIRECT)
8112 op1 = machopic_indirect_data_reference (op1, 0);
8113 }
8114 if (op0 != op1)
8115 {
8116 insn = gen_rtx_SET (VOIDmode, op0, op1);
8117 emit_insn (insn);
8118 }
8119 return;
8120#endif /* TARGET_MACHO */
f996902d
RH
8121 if (GET_CODE (op0) == MEM)
8122 op1 = force_reg (Pmode, op1);
e075ae69 8123 else
32b5b1aa 8124 {
f996902d 8125 rtx temp = op0;
e075ae69
RH
8126 if (GET_CODE (temp) != REG)
8127 temp = gen_reg_rtx (Pmode);
f996902d
RH
8128 temp = legitimize_pic_address (op1, temp);
8129 if (temp == op0)
e075ae69 8130 return;
f996902d 8131 op1 = temp;
32b5b1aa 8132 }
e075ae69
RH
8133 }
8134 else
8135 {
f996902d 8136 if (GET_CODE (op0) == MEM
44cf5b6a 8137 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
f996902d
RH
8138 || !push_operand (op0, mode))
8139 && GET_CODE (op1) == MEM)
8140 op1 = force_reg (mode, op1);
e9a25f70 8141
f996902d
RH
8142 if (push_operand (op0, mode)
8143 && ! general_no_elim_operand (op1, mode))
8144 op1 = copy_to_mode_reg (mode, op1);
2c5a510c 8145
44cf5b6a
JH
8146 /* Force large constants in 64bit compilation into register
8147 to get them CSEed. */
8148 if (TARGET_64BIT && mode == DImode
f996902d
RH
8149 && immediate_operand (op1, mode)
8150 && !x86_64_zero_extended_value (op1)
8151 && !register_operand (op0, mode)
44cf5b6a 8152 && optimize && !reload_completed && !reload_in_progress)
f996902d 8153 op1 = copy_to_mode_reg (mode, op1);
44cf5b6a 8154
e075ae69 8155 if (FLOAT_MODE_P (mode))
32b5b1aa 8156 {
d7a29404
JH
8157 /* If we are loading a floating point constant to a register,
8158 force the value to memory now, since we'll get better code
8159 out the back end. */
e075ae69
RH
8160
8161 if (strict)
8162 ;
f996902d
RH
8163 else if (GET_CODE (op1) == CONST_DOUBLE
8164 && register_operand (op0, mode))
8165 op1 = validize_mem (force_const_mem (mode, op1));
32b5b1aa 8166 }
32b5b1aa 8167 }
e9a25f70 8168
f996902d 8169 insn = gen_rtx_SET (VOIDmode, op0, op1);
e9a25f70 8170
e075ae69
RH
8171 emit_insn (insn);
8172}
e9a25f70 8173
e37af218
RH
8174void
8175ix86_expand_vector_move (mode, operands)
8176 enum machine_mode mode;
8177 rtx operands[];
8178{
8179 /* Force constants other than zero into memory. We do not know how
8180 the instructions used to build constants modify the upper 64 bits
8181 of the register, once we have that information we may be able
8182 to handle some of them more efficiently. */
8183 if ((reload_in_progress | reload_completed) == 0
8184 && register_operand (operands[0], mode)
8185 && CONSTANT_P (operands[1]))
2b28d405 8186 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
e37af218
RH
8187
8188 /* Make operand1 a register if it isn't already. */
f8ca7923 8189 if (!no_new_pseudos
e37af218 8190 && !register_operand (operands[0], mode)
b105d6da 8191 && !register_operand (operands[1], mode))
e37af218 8192 {
59bef189 8193 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
e37af218
RH
8194 emit_move_insn (operands[0], temp);
8195 return;
8196 }
8197
8198 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
fce5a9f2 8199}
e37af218 8200
e075ae69
RH
8201/* Attempt to expand a binary operator. Make the expansion closer to the
8202 actual machine, then just general_operand, which will allow 3 separate
9d81fc27 8203 memory references (one output, two input) in a single insn. */
e9a25f70 8204
e075ae69
RH
8205void
8206ix86_expand_binary_operator (code, mode, operands)
8207 enum rtx_code code;
8208 enum machine_mode mode;
8209 rtx operands[];
8210{
8211 int matching_memory;
8212 rtx src1, src2, dst, op, clob;
8213
8214 dst = operands[0];
8215 src1 = operands[1];
8216 src2 = operands[2];
8217
8218 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8219 if (GET_RTX_CLASS (code) == 'c'
8220 && (rtx_equal_p (dst, src2)
8221 || immediate_operand (src1, mode)))
8222 {
8223 rtx temp = src1;
8224 src1 = src2;
8225 src2 = temp;
32b5b1aa 8226 }
e9a25f70 8227
e075ae69
RH
8228 /* If the destination is memory, and we do not have matching source
8229 operands, do things in registers. */
8230 matching_memory = 0;
8231 if (GET_CODE (dst) == MEM)
32b5b1aa 8232 {
e075ae69
RH
8233 if (rtx_equal_p (dst, src1))
8234 matching_memory = 1;
8235 else if (GET_RTX_CLASS (code) == 'c'
8236 && rtx_equal_p (dst, src2))
8237 matching_memory = 2;
8238 else
8239 dst = gen_reg_rtx (mode);
8240 }
0f290768 8241
e075ae69
RH
8242 /* Both source operands cannot be in memory. */
8243 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8244 {
8245 if (matching_memory != 2)
8246 src2 = force_reg (mode, src2);
8247 else
8248 src1 = force_reg (mode, src1);
32b5b1aa 8249 }
e9a25f70 8250
06a964de
JH
8251 /* If the operation is not commutable, source 1 cannot be a constant
8252 or non-matching memory. */
0f290768 8253 if ((CONSTANT_P (src1)
06a964de
JH
8254 || (!matching_memory && GET_CODE (src1) == MEM))
8255 && GET_RTX_CLASS (code) != 'c')
e075ae69 8256 src1 = force_reg (mode, src1);
0f290768 8257
e075ae69 8258 /* If optimizing, copy to regs to improve CSE */
fe577e58 8259 if (optimize && ! no_new_pseudos)
32b5b1aa 8260 {
e075ae69
RH
8261 if (GET_CODE (dst) == MEM)
8262 dst = gen_reg_rtx (mode);
8263 if (GET_CODE (src1) == MEM)
8264 src1 = force_reg (mode, src1);
8265 if (GET_CODE (src2) == MEM)
8266 src2 = force_reg (mode, src2);
32b5b1aa 8267 }
e9a25f70 8268
e075ae69
RH
8269 /* Emit the instruction. */
8270
8271 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8272 if (reload_in_progress)
8273 {
8274 /* Reload doesn't know about the flags register, and doesn't know that
8275 it doesn't want to clobber it. We can only do this with PLUS. */
8276 if (code != PLUS)
8277 abort ();
8278 emit_insn (op);
8279 }
8280 else
32b5b1aa 8281 {
e075ae69
RH
8282 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8283 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
32b5b1aa 8284 }
e9a25f70 8285
e075ae69
RH
8286 /* Fix up the destination if needed. */
8287 if (dst != operands[0])
8288 emit_move_insn (operands[0], dst);
8289}
8290
8291/* Return TRUE or FALSE depending on whether the binary operator meets the
8292 appropriate constraints. */
8293
8294int
8295ix86_binary_operator_ok (code, mode, operands)
8296 enum rtx_code code;
8297 enum machine_mode mode ATTRIBUTE_UNUSED;
8298 rtx operands[3];
8299{
8300 /* Both source operands cannot be in memory. */
8301 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8302 return 0;
8303 /* If the operation is not commutable, source 1 cannot be a constant. */
8304 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8305 return 0;
8306 /* If the destination is memory, we must have a matching source operand. */
8307 if (GET_CODE (operands[0]) == MEM
8308 && ! (rtx_equal_p (operands[0], operands[1])
8309 || (GET_RTX_CLASS (code) == 'c'
8310 && rtx_equal_p (operands[0], operands[2]))))
8311 return 0;
06a964de 8312 /* If the operation is not commutable and the source 1 is memory, we must
d6a7951f 8313 have a matching destination. */
06a964de
JH
8314 if (GET_CODE (operands[1]) == MEM
8315 && GET_RTX_CLASS (code) != 'c'
8316 && ! rtx_equal_p (operands[0], operands[1]))
8317 return 0;
e075ae69
RH
8318 return 1;
8319}
8320
8321/* Attempt to expand a unary operator. Make the expansion closer to the
8322 actual machine, then just general_operand, which will allow 2 separate
9d81fc27 8323 memory references (one output, one input) in a single insn. */
e075ae69 8324
9d81fc27 8325void
e075ae69
RH
8326ix86_expand_unary_operator (code, mode, operands)
8327 enum rtx_code code;
8328 enum machine_mode mode;
8329 rtx operands[];
8330{
06a964de
JH
8331 int matching_memory;
8332 rtx src, dst, op, clob;
8333
8334 dst = operands[0];
8335 src = operands[1];
e075ae69 8336
06a964de
JH
8337 /* If the destination is memory, and we do not have matching source
8338 operands, do things in registers. */
8339 matching_memory = 0;
8340 if (GET_CODE (dst) == MEM)
32b5b1aa 8341 {
06a964de
JH
8342 if (rtx_equal_p (dst, src))
8343 matching_memory = 1;
e075ae69 8344 else
06a964de 8345 dst = gen_reg_rtx (mode);
32b5b1aa 8346 }
e9a25f70 8347
06a964de
JH
8348 /* When source operand is memory, destination must match. */
8349 if (!matching_memory && GET_CODE (src) == MEM)
8350 src = force_reg (mode, src);
0f290768 8351
06a964de 8352 /* If optimizing, copy to regs to improve CSE */
fe577e58 8353 if (optimize && ! no_new_pseudos)
06a964de
JH
8354 {
8355 if (GET_CODE (dst) == MEM)
8356 dst = gen_reg_rtx (mode);
8357 if (GET_CODE (src) == MEM)
8358 src = force_reg (mode, src);
8359 }
8360
8361 /* Emit the instruction. */
8362
8363 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8364 if (reload_in_progress || code == NOT)
8365 {
8366 /* Reload doesn't know about the flags register, and doesn't know that
8367 it doesn't want to clobber it. */
8368 if (code != NOT)
8369 abort ();
8370 emit_insn (op);
8371 }
8372 else
8373 {
8374 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8375 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8376 }
8377
8378 /* Fix up the destination if needed. */
8379 if (dst != operands[0])
8380 emit_move_insn (operands[0], dst);
e075ae69
RH
8381}
8382
8383/* Return TRUE or FALSE depending on whether the unary operator meets the
8384 appropriate constraints. */
8385
8386int
8387ix86_unary_operator_ok (code, mode, operands)
8388 enum rtx_code code ATTRIBUTE_UNUSED;
8389 enum machine_mode mode ATTRIBUTE_UNUSED;
8390 rtx operands[2] ATTRIBUTE_UNUSED;
8391{
06a964de
JH
8392 /* If one of operands is memory, source and destination must match. */
8393 if ((GET_CODE (operands[0]) == MEM
8394 || GET_CODE (operands[1]) == MEM)
8395 && ! rtx_equal_p (operands[0], operands[1]))
8396 return FALSE;
e075ae69
RH
8397 return TRUE;
8398}
8399
16189740
RH
8400/* Return TRUE or FALSE depending on whether the first SET in INSN
8401 has source and destination with matching CC modes, and that the
8402 CC mode is at least as constrained as REQ_MODE. */
8403
8404int
8405ix86_match_ccmode (insn, req_mode)
8406 rtx insn;
8407 enum machine_mode req_mode;
8408{
8409 rtx set;
8410 enum machine_mode set_mode;
8411
8412 set = PATTERN (insn);
8413 if (GET_CODE (set) == PARALLEL)
8414 set = XVECEXP (set, 0, 0);
8415 if (GET_CODE (set) != SET)
8416 abort ();
9076b9c1
JH
8417 if (GET_CODE (SET_SRC (set)) != COMPARE)
8418 abort ();
16189740
RH
8419
8420 set_mode = GET_MODE (SET_DEST (set));
8421 switch (set_mode)
8422 {
9076b9c1
JH
8423 case CCNOmode:
8424 if (req_mode != CCNOmode
8425 && (req_mode != CCmode
8426 || XEXP (SET_SRC (set), 1) != const0_rtx))
8427 return 0;
8428 break;
16189740 8429 case CCmode:
9076b9c1 8430 if (req_mode == CCGCmode)
16189740
RH
8431 return 0;
8432 /* FALLTHRU */
9076b9c1
JH
8433 case CCGCmode:
8434 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8435 return 0;
8436 /* FALLTHRU */
8437 case CCGOCmode:
16189740
RH
8438 if (req_mode == CCZmode)
8439 return 0;
8440 /* FALLTHRU */
8441 case CCZmode:
8442 break;
8443
8444 default:
8445 abort ();
8446 }
8447
8448 return (GET_MODE (SET_SRC (set)) == set_mode);
8449}
8450
e075ae69
RH
8451/* Generate insn patterns to do an integer compare of OPERANDS. */
8452
8453static rtx
8454ix86_expand_int_compare (code, op0, op1)
8455 enum rtx_code code;
8456 rtx op0, op1;
8457{
8458 enum machine_mode cmpmode;
8459 rtx tmp, flags;
8460
8461 cmpmode = SELECT_CC_MODE (code, op0, op1);
8462 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8463
8464 /* This is very simple, but making the interface the same as in the
8465 FP case makes the rest of the code easier. */
8466 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8467 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8468
8469 /* Return the test that should be put into the flags user, i.e.
8470 the bcc, scc, or cmov instruction. */
8471 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8472}
8473
3a3677ff
RH
8474/* Figure out whether to use ordered or unordered fp comparisons.
8475 Return the appropriate mode to use. */
e075ae69 8476
b1cdafbb 8477enum machine_mode
3a3677ff 8478ix86_fp_compare_mode (code)
8752c357 8479 enum rtx_code code ATTRIBUTE_UNUSED;
e075ae69 8480{
9e7adcb3
JH
8481 /* ??? In order to make all comparisons reversible, we do all comparisons
8482 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8483 all forms trapping and nontrapping comparisons, we can make inequality
8484 comparisons trapping again, since it results in better code when using
8485 FCOM based compares. */
8486 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
3a3677ff
RH
8487}
8488
9076b9c1
JH
8489enum machine_mode
8490ix86_cc_mode (code, op0, op1)
8491 enum rtx_code code;
8492 rtx op0, op1;
8493{
8494 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8495 return ix86_fp_compare_mode (code);
8496 switch (code)
8497 {
8498 /* Only zero flag is needed. */
8499 case EQ: /* ZF=0 */
8500 case NE: /* ZF!=0 */
8501 return CCZmode;
8502 /* Codes needing carry flag. */
265dab10
JH
8503 case GEU: /* CF=0 */
8504 case GTU: /* CF=0 & ZF=0 */
7e08e190
JH
8505 case LTU: /* CF=1 */
8506 case LEU: /* CF=1 | ZF=1 */
265dab10 8507 return CCmode;
9076b9c1
JH
8508 /* Codes possibly doable only with sign flag when
8509 comparing against zero. */
8510 case GE: /* SF=OF or SF=0 */
7e08e190 8511 case LT: /* SF<>OF or SF=1 */
9076b9c1
JH
8512 if (op1 == const0_rtx)
8513 return CCGOCmode;
8514 else
8515 /* For other cases Carry flag is not required. */
8516 return CCGCmode;
8517 /* Codes doable only with sign flag when comparing
8518 against zero, but we miss jump instruction for it
4aae8a9a 8519 so we need to use relational tests against overflow
9076b9c1
JH
8520 that thus needs to be zero. */
8521 case GT: /* ZF=0 & SF=OF */
8522 case LE: /* ZF=1 | SF<>OF */
8523 if (op1 == const0_rtx)
8524 return CCNOmode;
8525 else
8526 return CCGCmode;
7fcd7218
JH
8527 /* strcmp pattern do (use flags) and combine may ask us for proper
8528 mode. */
8529 case USE:
8530 return CCmode;
9076b9c1 8531 default:
0f290768 8532 abort ();
9076b9c1
JH
8533 }
8534}
8535
3a3677ff
RH
8536/* Return true if we should use an FCOMI instruction for this fp comparison. */
8537
a940d8bd 8538int
3a3677ff 8539ix86_use_fcomi_compare (code)
9e7adcb3 8540 enum rtx_code code ATTRIBUTE_UNUSED;
3a3677ff 8541{
9e7adcb3
JH
8542 enum rtx_code swapped_code = swap_condition (code);
8543 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8544 || (ix86_fp_comparison_cost (swapped_code)
8545 == ix86_fp_comparison_fcomi_cost (swapped_code)));
3a3677ff
RH
8546}
8547
0f290768 8548/* Swap, force into registers, or otherwise massage the two operands
3a3677ff 8549 to a fp comparison. The operands are updated in place; the new
d1f87653 8550 comparison code is returned. */
3a3677ff
RH
8551
8552static enum rtx_code
8553ix86_prepare_fp_compare_args (code, pop0, pop1)
8554 enum rtx_code code;
8555 rtx *pop0, *pop1;
8556{
8557 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8558 rtx op0 = *pop0, op1 = *pop1;
8559 enum machine_mode op_mode = GET_MODE (op0);
0644b628 8560 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
3a3677ff 8561
e075ae69 8562 /* All of the unordered compare instructions only work on registers.
3a3677ff
RH
8563 The same is true of the XFmode compare instructions. The same is
8564 true of the fcomi compare instructions. */
8565
0644b628
JH
8566 if (!is_sse
8567 && (fpcmp_mode == CCFPUmode
8568 || op_mode == XFmode
8569 || op_mode == TFmode
8570 || ix86_use_fcomi_compare (code)))
e075ae69 8571 {
3a3677ff
RH
8572 op0 = force_reg (op_mode, op0);
8573 op1 = force_reg (op_mode, op1);
e075ae69
RH
8574 }
8575 else
8576 {
8577 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8578 things around if they appear profitable, otherwise force op0
8579 into a register. */
8580
8581 if (standard_80387_constant_p (op0) == 0
8582 || (GET_CODE (op0) == MEM
8583 && ! (standard_80387_constant_p (op1) == 0
8584 || GET_CODE (op1) == MEM)))
32b5b1aa 8585 {
e075ae69
RH
8586 rtx tmp;
8587 tmp = op0, op0 = op1, op1 = tmp;
8588 code = swap_condition (code);
8589 }
8590
8591 if (GET_CODE (op0) != REG)
3a3677ff 8592 op0 = force_reg (op_mode, op0);
e075ae69
RH
8593
8594 if (CONSTANT_P (op1))
8595 {
8596 if (standard_80387_constant_p (op1))
3a3677ff 8597 op1 = force_reg (op_mode, op1);
e075ae69 8598 else
3a3677ff 8599 op1 = validize_mem (force_const_mem (op_mode, op1));
32b5b1aa
SC
8600 }
8601 }
e9a25f70 8602
9e7adcb3
JH
8603 /* Try to rearrange the comparison to make it cheaper. */
8604 if (ix86_fp_comparison_cost (code)
8605 > ix86_fp_comparison_cost (swap_condition (code))
558740bf 8606 && (GET_CODE (op1) == REG || !no_new_pseudos))
9e7adcb3
JH
8607 {
8608 rtx tmp;
8609 tmp = op0, op0 = op1, op1 = tmp;
8610 code = swap_condition (code);
8611 if (GET_CODE (op0) != REG)
8612 op0 = force_reg (op_mode, op0);
8613 }
8614
3a3677ff
RH
8615 *pop0 = op0;
8616 *pop1 = op1;
8617 return code;
8618}
8619
c0c102a9
JH
8620/* Convert comparison codes we use to represent FP comparison to integer
8621 code that will result in proper branch. Return UNKNOWN if no such code
8622 is available. */
8623static enum rtx_code
8624ix86_fp_compare_code_to_integer (code)
8625 enum rtx_code code;
8626{
8627 switch (code)
8628 {
8629 case GT:
8630 return GTU;
8631 case GE:
8632 return GEU;
8633 case ORDERED:
8634 case UNORDERED:
8635 return code;
8636 break;
8637 case UNEQ:
8638 return EQ;
8639 break;
8640 case UNLT:
8641 return LTU;
8642 break;
8643 case UNLE:
8644 return LEU;
8645 break;
8646 case LTGT:
8647 return NE;
8648 break;
8649 default:
8650 return UNKNOWN;
8651 }
8652}
8653
8654/* Split comparison code CODE into comparisons we can do using branch
8655 instructions. BYPASS_CODE is comparison code for branch that will
8656 branch around FIRST_CODE and SECOND_CODE. If some of branches
8657 is not required, set value to NIL.
8658 We never require more than two branches. */
8659static void
8660ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8661 enum rtx_code code, *bypass_code, *first_code, *second_code;
8662{
8663 *first_code = code;
8664 *bypass_code = NIL;
8665 *second_code = NIL;
8666
8667 /* The fcomi comparison sets flags as follows:
8668
8669 cmp ZF PF CF
8670 > 0 0 0
8671 < 0 0 1
8672 = 1 0 0
8673 un 1 1 1 */
8674
8675 switch (code)
8676 {
8677 case GT: /* GTU - CF=0 & ZF=0 */
8678 case GE: /* GEU - CF=0 */
8679 case ORDERED: /* PF=0 */
8680 case UNORDERED: /* PF=1 */
8681 case UNEQ: /* EQ - ZF=1 */
8682 case UNLT: /* LTU - CF=1 */
8683 case UNLE: /* LEU - CF=1 | ZF=1 */
8684 case LTGT: /* EQ - ZF=0 */
8685 break;
8686 case LT: /* LTU - CF=1 - fails on unordered */
8687 *first_code = UNLT;
8688 *bypass_code = UNORDERED;
8689 break;
8690 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8691 *first_code = UNLE;
8692 *bypass_code = UNORDERED;
8693 break;
8694 case EQ: /* EQ - ZF=1 - fails on unordered */
8695 *first_code = UNEQ;
8696 *bypass_code = UNORDERED;
8697 break;
8698 case NE: /* NE - ZF=0 - fails on unordered */
8699 *first_code = LTGT;
8700 *second_code = UNORDERED;
8701 break;
8702 case UNGE: /* GEU - CF=0 - fails on unordered */
8703 *first_code = GE;
8704 *second_code = UNORDERED;
8705 break;
8706 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8707 *first_code = GT;
8708 *second_code = UNORDERED;
8709 break;
8710 default:
8711 abort ();
8712 }
8713 if (!TARGET_IEEE_FP)
8714 {
8715 *second_code = NIL;
8716 *bypass_code = NIL;
8717 }
8718}
8719
9e7adcb3 8720/* Return cost of comparison done fcom + arithmetics operations on AX.
5bdc5878 8721 All following functions do use number of instructions as a cost metrics.
9e7adcb3
JH
8722 In future this should be tweaked to compute bytes for optimize_size and
8723 take into account performance of various instructions on various CPUs. */
8724static int
8725ix86_fp_comparison_arithmetics_cost (code)
8726 enum rtx_code code;
8727{
8728 if (!TARGET_IEEE_FP)
8729 return 4;
8730 /* The cost of code output by ix86_expand_fp_compare. */
8731 switch (code)
8732 {
8733 case UNLE:
8734 case UNLT:
8735 case LTGT:
8736 case GT:
8737 case GE:
8738 case UNORDERED:
8739 case ORDERED:
8740 case UNEQ:
8741 return 4;
8742 break;
8743 case LT:
8744 case NE:
8745 case EQ:
8746 case UNGE:
8747 return 5;
8748 break;
8749 case LE:
8750 case UNGT:
8751 return 6;
8752 break;
8753 default:
8754 abort ();
8755 }
8756}
8757
8758/* Return cost of comparison done using fcomi operation.
8759 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8760static int
8761ix86_fp_comparison_fcomi_cost (code)
8762 enum rtx_code code;
8763{
8764 enum rtx_code bypass_code, first_code, second_code;
d1f87653 8765 /* Return arbitrarily high cost when instruction is not supported - this
9e7adcb3
JH
8766 prevents gcc from using it. */
8767 if (!TARGET_CMOVE)
8768 return 1024;
8769 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8770 return (bypass_code != NIL || second_code != NIL) + 2;
8771}
8772
8773/* Return cost of comparison done using sahf operation.
8774 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8775static int
8776ix86_fp_comparison_sahf_cost (code)
8777 enum rtx_code code;
8778{
8779 enum rtx_code bypass_code, first_code, second_code;
d1f87653 8780 /* Return arbitrarily high cost when instruction is not preferred - this
9e7adcb3
JH
8781 avoids gcc from using it. */
8782 if (!TARGET_USE_SAHF && !optimize_size)
8783 return 1024;
8784 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8785 return (bypass_code != NIL || second_code != NIL) + 3;
8786}
8787
8788/* Compute cost of the comparison done using any method.
8789 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8790static int
8791ix86_fp_comparison_cost (code)
8792 enum rtx_code code;
8793{
8794 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8795 int min;
8796
8797 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8798 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8799
8800 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8801 if (min > sahf_cost)
8802 min = sahf_cost;
8803 if (min > fcomi_cost)
8804 min = fcomi_cost;
8805 return min;
8806}
c0c102a9 8807
3a3677ff
RH
8808/* Generate insn patterns to do a floating point compare of OPERANDS. */
8809
9e7adcb3
JH
8810static rtx
8811ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
3a3677ff
RH
8812 enum rtx_code code;
8813 rtx op0, op1, scratch;
9e7adcb3
JH
8814 rtx *second_test;
8815 rtx *bypass_test;
3a3677ff
RH
8816{
8817 enum machine_mode fpcmp_mode, intcmp_mode;
c0c102a9 8818 rtx tmp, tmp2;
9e7adcb3 8819 int cost = ix86_fp_comparison_cost (code);
c0c102a9 8820 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
8821
8822 fpcmp_mode = ix86_fp_compare_mode (code);
8823 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8824
9e7adcb3
JH
8825 if (second_test)
8826 *second_test = NULL_RTX;
8827 if (bypass_test)
8828 *bypass_test = NULL_RTX;
8829
c0c102a9
JH
8830 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8831
9e7adcb3
JH
8832 /* Do fcomi/sahf based test when profitable. */
8833 if ((bypass_code == NIL || bypass_test)
8834 && (second_code == NIL || second_test)
8835 && ix86_fp_comparison_arithmetics_cost (code) > cost)
32b5b1aa 8836 {
c0c102a9
JH
8837 if (TARGET_CMOVE)
8838 {
8839 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8840 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8841 tmp);
8842 emit_insn (tmp);
8843 }
8844 else
8845 {
8846 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 8847 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
8848 if (!scratch)
8849 scratch = gen_reg_rtx (HImode);
c0c102a9
JH
8850 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8851 emit_insn (gen_x86_sahf_1 (scratch));
8852 }
e075ae69
RH
8853
8854 /* The FP codes work out to act like unsigned. */
9a915772 8855 intcmp_mode = fpcmp_mode;
9e7adcb3
JH
8856 code = first_code;
8857 if (bypass_code != NIL)
8858 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8859 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8860 const0_rtx);
8861 if (second_code != NIL)
8862 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8863 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8864 const0_rtx);
e075ae69
RH
8865 }
8866 else
8867 {
8868 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
e075ae69 8869 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 8870 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
8871 if (!scratch)
8872 scratch = gen_reg_rtx (HImode);
3a3677ff 8873 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
e075ae69 8874
9a915772
JH
8875 /* In the unordered case, we have to check C2 for NaN's, which
8876 doesn't happen to work out to anything nice combination-wise.
8877 So do some bit twiddling on the value we've got in AH to come
8878 up with an appropriate set of condition codes. */
e075ae69 8879
9a915772
JH
8880 intcmp_mode = CCNOmode;
8881 switch (code)
32b5b1aa 8882 {
9a915772
JH
8883 case GT:
8884 case UNGT:
8885 if (code == GT || !TARGET_IEEE_FP)
32b5b1aa 8886 {
3a3677ff 8887 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
e075ae69 8888 code = EQ;
9a915772
JH
8889 }
8890 else
8891 {
8892 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8893 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8894 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8895 intcmp_mode = CCmode;
8896 code = GEU;
8897 }
8898 break;
8899 case LT:
8900 case UNLT:
8901 if (code == LT && TARGET_IEEE_FP)
8902 {
3a3677ff
RH
8903 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8904 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
e075ae69
RH
8905 intcmp_mode = CCmode;
8906 code = EQ;
9a915772
JH
8907 }
8908 else
8909 {
8910 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8911 code = NE;
8912 }
8913 break;
8914 case GE:
8915 case UNGE:
8916 if (code == GE || !TARGET_IEEE_FP)
8917 {
3a3677ff 8918 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
e075ae69 8919 code = EQ;
9a915772
JH
8920 }
8921 else
8922 {
8923 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8924 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8925 GEN_INT (0x01)));
8926 code = NE;
8927 }
8928 break;
8929 case LE:
8930 case UNLE:
8931 if (code == LE && TARGET_IEEE_FP)
8932 {
3a3677ff
RH
8933 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8934 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8935 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
8936 intcmp_mode = CCmode;
8937 code = LTU;
9a915772
JH
8938 }
8939 else
8940 {
8941 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8942 code = NE;
8943 }
8944 break;
8945 case EQ:
8946 case UNEQ:
8947 if (code == EQ && TARGET_IEEE_FP)
8948 {
3a3677ff
RH
8949 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8950 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
8951 intcmp_mode = CCmode;
8952 code = EQ;
9a915772
JH
8953 }
8954 else
8955 {
3a3677ff
RH
8956 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8957 code = NE;
8958 break;
9a915772
JH
8959 }
8960 break;
8961 case NE:
8962 case LTGT:
8963 if (code == NE && TARGET_IEEE_FP)
8964 {
3a3677ff 8965 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9a915772
JH
8966 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8967 GEN_INT (0x40)));
3a3677ff 8968 code = NE;
9a915772
JH
8969 }
8970 else
8971 {
3a3677ff
RH
8972 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8973 code = EQ;
32b5b1aa 8974 }
9a915772
JH
8975 break;
8976
8977 case UNORDERED:
8978 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8979 code = NE;
8980 break;
8981 case ORDERED:
8982 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8983 code = EQ;
8984 break;
8985
8986 default:
8987 abort ();
32b5b1aa 8988 }
32b5b1aa 8989 }
e075ae69
RH
8990
8991 /* Return the test that should be put into the flags user, i.e.
8992 the bcc, scc, or cmov instruction. */
8993 return gen_rtx_fmt_ee (code, VOIDmode,
8994 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8995 const0_rtx);
8996}
8997
9e3e266c 8998rtx
a1b8572c 8999ix86_expand_compare (code, second_test, bypass_test)
e075ae69 9000 enum rtx_code code;
a1b8572c 9001 rtx *second_test, *bypass_test;
e075ae69
RH
9002{
9003 rtx op0, op1, ret;
9004 op0 = ix86_compare_op0;
9005 op1 = ix86_compare_op1;
9006
a1b8572c
JH
9007 if (second_test)
9008 *second_test = NULL_RTX;
9009 if (bypass_test)
9010 *bypass_test = NULL_RTX;
9011
e075ae69 9012 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
bf71a4f8 9013 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
77ebd435 9014 second_test, bypass_test);
32b5b1aa 9015 else
e075ae69
RH
9016 ret = ix86_expand_int_compare (code, op0, op1);
9017
9018 return ret;
9019}
9020
03598dea
JH
9021/* Return true if the CODE will result in nontrivial jump sequence. */
9022bool
9023ix86_fp_jump_nontrivial_p (code)
9024 enum rtx_code code;
9025{
9026 enum rtx_code bypass_code, first_code, second_code;
9027 if (!TARGET_CMOVE)
9028 return true;
9029 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9030 return bypass_code != NIL || second_code != NIL;
9031}
9032
e075ae69 9033void
3a3677ff 9034ix86_expand_branch (code, label)
e075ae69 9035 enum rtx_code code;
e075ae69
RH
9036 rtx label;
9037{
3a3677ff 9038 rtx tmp;
e075ae69 9039
3a3677ff 9040 switch (GET_MODE (ix86_compare_op0))
32b5b1aa 9041 {
3a3677ff
RH
9042 case QImode:
9043 case HImode:
9044 case SImode:
0d7d98ee 9045 simple:
a1b8572c 9046 tmp = ix86_expand_compare (code, NULL, NULL);
e075ae69
RH
9047 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9048 gen_rtx_LABEL_REF (VOIDmode, label),
9049 pc_rtx);
9050 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32b5b1aa 9051 return;
e075ae69 9052
3a3677ff
RH
9053 case SFmode:
9054 case DFmode:
0f290768 9055 case XFmode:
2b589241 9056 case TFmode:
3a3677ff
RH
9057 {
9058 rtvec vec;
9059 int use_fcomi;
03598dea 9060 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
9061
9062 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9063 &ix86_compare_op1);
fce5a9f2 9064
03598dea
JH
9065 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9066
9067 /* Check whether we will use the natural sequence with one jump. If
9068 so, we can expand jump early. Otherwise delay expansion by
9069 creating compound insn to not confuse optimizers. */
9070 if (bypass_code == NIL && second_code == NIL
9071 && TARGET_CMOVE)
9072 {
9073 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9074 gen_rtx_LABEL_REF (VOIDmode, label),
9075 pc_rtx, NULL_RTX);
9076 }
9077 else
9078 {
9079 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9080 ix86_compare_op0, ix86_compare_op1);
9081 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9082 gen_rtx_LABEL_REF (VOIDmode, label),
9083 pc_rtx);
9084 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9085
9086 use_fcomi = ix86_use_fcomi_compare (code);
9087 vec = rtvec_alloc (3 + !use_fcomi);
9088 RTVEC_ELT (vec, 0) = tmp;
9089 RTVEC_ELT (vec, 1)
9090 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9091 RTVEC_ELT (vec, 2)
9092 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9093 if (! use_fcomi)
9094 RTVEC_ELT (vec, 3)
9095 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9096
9097 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9098 }
3a3677ff
RH
9099 return;
9100 }
32b5b1aa 9101
3a3677ff 9102 case DImode:
0d7d98ee
JH
9103 if (TARGET_64BIT)
9104 goto simple;
3a3677ff
RH
9105 /* Expand DImode branch into multiple compare+branch. */
9106 {
9107 rtx lo[2], hi[2], label2;
9108 enum rtx_code code1, code2, code3;
32b5b1aa 9109
3a3677ff
RH
9110 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9111 {
9112 tmp = ix86_compare_op0;
9113 ix86_compare_op0 = ix86_compare_op1;
9114 ix86_compare_op1 = tmp;
9115 code = swap_condition (code);
9116 }
9117 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9118 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
32b5b1aa 9119
3a3677ff
RH
9120 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9121 avoid two branches. This costs one extra insn, so disable when
9122 optimizing for size. */
32b5b1aa 9123
3a3677ff
RH
9124 if ((code == EQ || code == NE)
9125 && (!optimize_size
9126 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9127 {
9128 rtx xor0, xor1;
32b5b1aa 9129
3a3677ff
RH
9130 xor1 = hi[0];
9131 if (hi[1] != const0_rtx)
9132 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9133 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 9134
3a3677ff
RH
9135 xor0 = lo[0];
9136 if (lo[1] != const0_rtx)
9137 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9138 NULL_RTX, 0, OPTAB_WIDEN);
e075ae69 9139
3a3677ff
RH
9140 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9141 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 9142
3a3677ff
RH
9143 ix86_compare_op0 = tmp;
9144 ix86_compare_op1 = const0_rtx;
9145 ix86_expand_branch (code, label);
9146 return;
9147 }
e075ae69 9148
1f9124e4
JJ
9149 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9150 op1 is a constant and the low word is zero, then we can just
9151 examine the high word. */
32b5b1aa 9152
1f9124e4
JJ
9153 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9154 switch (code)
9155 {
9156 case LT: case LTU: case GE: case GEU:
9157 ix86_compare_op0 = hi[0];
9158 ix86_compare_op1 = hi[1];
9159 ix86_expand_branch (code, label);
9160 return;
9161 default:
9162 break;
9163 }
e075ae69 9164
3a3677ff 9165 /* Otherwise, we need two or three jumps. */
e075ae69 9166
3a3677ff 9167 label2 = gen_label_rtx ();
e075ae69 9168
3a3677ff
RH
9169 code1 = code;
9170 code2 = swap_condition (code);
9171 code3 = unsigned_condition (code);
e075ae69 9172
3a3677ff
RH
9173 switch (code)
9174 {
9175 case LT: case GT: case LTU: case GTU:
9176 break;
e075ae69 9177
3a3677ff
RH
9178 case LE: code1 = LT; code2 = GT; break;
9179 case GE: code1 = GT; code2 = LT; break;
9180 case LEU: code1 = LTU; code2 = GTU; break;
9181 case GEU: code1 = GTU; code2 = LTU; break;
e075ae69 9182
3a3677ff
RH
9183 case EQ: code1 = NIL; code2 = NE; break;
9184 case NE: code2 = NIL; break;
e075ae69 9185
3a3677ff
RH
9186 default:
9187 abort ();
9188 }
e075ae69 9189
3a3677ff
RH
9190 /*
9191 * a < b =>
9192 * if (hi(a) < hi(b)) goto true;
9193 * if (hi(a) > hi(b)) goto false;
9194 * if (lo(a) < lo(b)) goto true;
9195 * false:
9196 */
9197
9198 ix86_compare_op0 = hi[0];
9199 ix86_compare_op1 = hi[1];
9200
9201 if (code1 != NIL)
9202 ix86_expand_branch (code1, label);
9203 if (code2 != NIL)
9204 ix86_expand_branch (code2, label2);
9205
9206 ix86_compare_op0 = lo[0];
9207 ix86_compare_op1 = lo[1];
9208 ix86_expand_branch (code3, label);
9209
9210 if (code2 != NIL)
9211 emit_label (label2);
9212 return;
9213 }
e075ae69 9214
3a3677ff
RH
9215 default:
9216 abort ();
9217 }
32b5b1aa 9218}
e075ae69 9219
9e7adcb3
JH
9220/* Split branch based on floating point condition. */
9221void
03598dea
JH
9222ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
9223 enum rtx_code code;
9224 rtx op1, op2, target1, target2, tmp;
9e7adcb3
JH
9225{
9226 rtx second, bypass;
9227 rtx label = NULL_RTX;
03598dea 9228 rtx condition;
6b24c259
JH
9229 int bypass_probability = -1, second_probability = -1, probability = -1;
9230 rtx i;
9e7adcb3
JH
9231
9232 if (target2 != pc_rtx)
9233 {
9234 rtx tmp = target2;
9235 code = reverse_condition_maybe_unordered (code);
9236 target2 = target1;
9237 target1 = tmp;
9238 }
9239
9240 condition = ix86_expand_fp_compare (code, op1, op2,
9241 tmp, &second, &bypass);
6b24c259
JH
9242
9243 if (split_branch_probability >= 0)
9244 {
9245 /* Distribute the probabilities across the jumps.
9246 Assume the BYPASS and SECOND to be always test
9247 for UNORDERED. */
9248 probability = split_branch_probability;
9249
d6a7951f 9250 /* Value of 1 is low enough to make no need for probability
6b24c259
JH
9251 to be updated. Later we may run some experiments and see
9252 if unordered values are more frequent in practice. */
9253 if (bypass)
9254 bypass_probability = 1;
9255 if (second)
9256 second_probability = 1;
9257 }
9e7adcb3
JH
9258 if (bypass != NULL_RTX)
9259 {
9260 label = gen_label_rtx ();
6b24c259
JH
9261 i = emit_jump_insn (gen_rtx_SET
9262 (VOIDmode, pc_rtx,
9263 gen_rtx_IF_THEN_ELSE (VOIDmode,
9264 bypass,
9265 gen_rtx_LABEL_REF (VOIDmode,
9266 label),
9267 pc_rtx)));
9268 if (bypass_probability >= 0)
9269 REG_NOTES (i)
9270 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9271 GEN_INT (bypass_probability),
9272 REG_NOTES (i));
9273 }
9274 i = emit_jump_insn (gen_rtx_SET
9e7adcb3
JH
9275 (VOIDmode, pc_rtx,
9276 gen_rtx_IF_THEN_ELSE (VOIDmode,
6b24c259
JH
9277 condition, target1, target2)));
9278 if (probability >= 0)
9279 REG_NOTES (i)
9280 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9281 GEN_INT (probability),
9282 REG_NOTES (i));
9283 if (second != NULL_RTX)
9e7adcb3 9284 {
6b24c259
JH
9285 i = emit_jump_insn (gen_rtx_SET
9286 (VOIDmode, pc_rtx,
9287 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9288 target2)));
9289 if (second_probability >= 0)
9290 REG_NOTES (i)
9291 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9292 GEN_INT (second_probability),
9293 REG_NOTES (i));
9e7adcb3 9294 }
9e7adcb3
JH
9295 if (label != NULL_RTX)
9296 emit_label (label);
9297}
9298
32b5b1aa 9299int
3a3677ff 9300ix86_expand_setcc (code, dest)
e075ae69 9301 enum rtx_code code;
e075ae69 9302 rtx dest;
32b5b1aa 9303{
a1b8572c
JH
9304 rtx ret, tmp, tmpreg;
9305 rtx second_test, bypass_test;
e075ae69 9306
885a70fd
JH
9307 if (GET_MODE (ix86_compare_op0) == DImode
9308 && !TARGET_64BIT)
e075ae69
RH
9309 return 0; /* FAIL */
9310
b932f770
JH
9311 if (GET_MODE (dest) != QImode)
9312 abort ();
e075ae69 9313
a1b8572c 9314 ret = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
9315 PUT_MODE (ret, QImode);
9316
9317 tmp = dest;
a1b8572c 9318 tmpreg = dest;
32b5b1aa 9319
e075ae69 9320 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
a1b8572c
JH
9321 if (bypass_test || second_test)
9322 {
9323 rtx test = second_test;
9324 int bypass = 0;
9325 rtx tmp2 = gen_reg_rtx (QImode);
9326 if (bypass_test)
9327 {
9328 if (second_test)
b531087a 9329 abort ();
a1b8572c
JH
9330 test = bypass_test;
9331 bypass = 1;
9332 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9333 }
9334 PUT_MODE (test, QImode);
9335 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9336
9337 if (bypass)
9338 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9339 else
9340 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9341 }
e075ae69 9342
e075ae69 9343 return 1; /* DONE */
32b5b1aa 9344}
e075ae69 9345
d1f87653 9346/* Expand comparison setting or clearing carry flag. Return true when successful
4977bab6
ZW
9347 and set pop for the operation. */
9348bool
9349ix86_expand_carry_flag_compare (code, op0, op1, pop)
9350 rtx op0, op1, *pop;
9351 enum rtx_code code;
9352{
9353 enum machine_mode mode =
9354 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9355
9356 /* Do not handle DImode compares that go trought special path. Also we can't
9357 deal with FP compares yet. This is possible to add. */
e6e81735
JH
9358 if ((mode == DImode && !TARGET_64BIT))
9359 return false;
9360 if (FLOAT_MODE_P (mode))
9361 {
9362 rtx second_test = NULL, bypass_test = NULL;
9363 rtx compare_op, compare_seq;
9364
9365 /* Shortcut: following common codes never translate into carry flag compares. */
9366 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9367 || code == ORDERED || code == UNORDERED)
9368 return false;
9369
9370 /* These comparisons require zero flag; swap operands so they won't. */
9371 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9372 && !TARGET_IEEE_FP)
9373 {
9374 rtx tmp = op0;
9375 op0 = op1;
9376 op1 = tmp;
9377 code = swap_condition (code);
9378 }
9379
9380 /* Try to expand the comparsion and verify that we end up with carry flag
9381 based comparsion. This is fails to be true only when we decide to expand
9382 comparsion using arithmetic that is not too common scenario. */
9383 start_sequence ();
9384 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9385 &second_test, &bypass_test);
9386 compare_seq = get_insns ();
9387 end_sequence ();
9388
9389 if (second_test || bypass_test)
9390 return false;
9391 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9392 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9393 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9394 else
9395 code = GET_CODE (compare_op);
9396 if (code != LTU && code != GEU)
9397 return false;
9398 emit_insn (compare_seq);
9399 *pop = compare_op;
9400 return true;
9401 }
9402 if (!INTEGRAL_MODE_P (mode))
4977bab6
ZW
9403 return false;
9404 switch (code)
9405 {
9406 case LTU:
9407 case GEU:
9408 break;
9409
9410 /* Convert a==0 into (unsigned)a<1. */
9411 case EQ:
9412 case NE:
9413 if (op1 != const0_rtx)
9414 return false;
9415 op1 = const1_rtx;
9416 code = (code == EQ ? LTU : GEU);
9417 break;
9418
9419 /* Convert a>b into b<a or a>=b-1. */
9420 case GTU:
9421 case LEU:
9422 if (GET_CODE (op1) == CONST_INT)
9423 {
9424 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9425 /* Bail out on overflow. We still can swap operands but that
9426 would force loading of the constant into register. */
9427 if (op1 == const0_rtx
9428 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9429 return false;
9430 code = (code == GTU ? GEU : LTU);
9431 }
9432 else
9433 {
9434 rtx tmp = op1;
9435 op1 = op0;
9436 op0 = tmp;
9437 code = (code == GTU ? LTU : GEU);
9438 }
9439 break;
9440
9441 /* Convert a>0 into (unsigned)a<0x7fffffff. */
9442 case LT:
9443 case GE:
9444 if (mode == DImode || op1 != const0_rtx)
9445 return false;
9446 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9447 code = (code == LT ? GEU : LTU);
9448 break;
9449 case LE:
9450 case GT:
9451 if (mode == DImode || op1 != constm1_rtx)
9452 return false;
9453 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9454 code = (code == LE ? GEU : LTU);
9455 break;
9456
9457 default:
9458 return false;
9459 }
9460 ix86_compare_op0 = op0;
9461 ix86_compare_op1 = op1;
9462 *pop = ix86_expand_compare (code, NULL, NULL);
9463 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9464 abort ();
9465 return true;
9466}
9467
32b5b1aa 9468int
e075ae69
RH
9469ix86_expand_int_movcc (operands)
9470 rtx operands[];
32b5b1aa 9471{
e075ae69
RH
9472 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9473 rtx compare_seq, compare_op;
a1b8572c 9474 rtx second_test, bypass_test;
635559ab 9475 enum machine_mode mode = GET_MODE (operands[0]);
4977bab6 9476 bool sign_bit_compare_p = false;;
3a3677ff 9477
e075ae69 9478 start_sequence ();
a1b8572c 9479 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
2f937369 9480 compare_seq = get_insns ();
e075ae69
RH
9481 end_sequence ();
9482
9483 compare_code = GET_CODE (compare_op);
9484
4977bab6
ZW
9485 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9486 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9487 sign_bit_compare_p = true;
9488
e075ae69
RH
9489 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9490 HImode insns, we'd be swallowed in word prefix ops. */
9491
4977bab6 9492 if ((mode != HImode || TARGET_FAST_PREFIX)
635559ab 9493 && (mode != DImode || TARGET_64BIT)
0f290768 9494 && GET_CODE (operands[2]) == CONST_INT
e075ae69
RH
9495 && GET_CODE (operands[3]) == CONST_INT)
9496 {
9497 rtx out = operands[0];
9498 HOST_WIDE_INT ct = INTVAL (operands[2]);
9499 HOST_WIDE_INT cf = INTVAL (operands[3]);
9500 HOST_WIDE_INT diff;
9501
4977bab6
ZW
9502 diff = ct - cf;
9503 /* Sign bit compares are better done using shifts than we do by using
9504 sbb. */
9505 if (sign_bit_compare_p
9506 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9507 ix86_compare_op1, &compare_op))
e075ae69 9508 {
e075ae69
RH
9509 /* Detect overlap between destination and compare sources. */
9510 rtx tmp = out;
9511
4977bab6 9512 if (!sign_bit_compare_p)
36583fea 9513 {
e6e81735
JH
9514 bool fpcmp = false;
9515
4977bab6
ZW
9516 compare_code = GET_CODE (compare_op);
9517
e6e81735
JH
9518 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9519 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9520 {
9521 fpcmp = true;
9522 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9523 }
9524
4977bab6
ZW
9525 /* To simplify rest of code, restrict to the GEU case. */
9526 if (compare_code == LTU)
9527 {
9528 HOST_WIDE_INT tmp = ct;
9529 ct = cf;
9530 cf = tmp;
9531 compare_code = reverse_condition (compare_code);
9532 code = reverse_condition (code);
9533 }
e6e81735
JH
9534 else
9535 {
9536 if (fpcmp)
9537 PUT_CODE (compare_op,
9538 reverse_condition_maybe_unordered
9539 (GET_CODE (compare_op)));
9540 else
9541 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9542 }
4977bab6 9543 diff = ct - cf;
36583fea 9544
4977bab6
ZW
9545 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9546 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9547 tmp = gen_reg_rtx (mode);
e075ae69 9548
4977bab6 9549 if (mode == DImode)
e6e81735 9550 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
4977bab6 9551 else
e6e81735 9552 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
4977bab6 9553 }
14f73b5a 9554 else
4977bab6
ZW
9555 {
9556 if (code == GT || code == GE)
9557 code = reverse_condition (code);
9558 else
9559 {
9560 HOST_WIDE_INT tmp = ct;
9561 ct = cf;
9562 cf = tmp;
9563 }
9564 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9565 ix86_compare_op1, VOIDmode, 0, -1);
9566 }
e075ae69 9567
36583fea
JH
9568 if (diff == 1)
9569 {
9570 /*
9571 * cmpl op0,op1
9572 * sbbl dest,dest
9573 * [addl dest, ct]
9574 *
9575 * Size 5 - 8.
9576 */
9577 if (ct)
635559ab
JH
9578 tmp = expand_simple_binop (mode, PLUS,
9579 tmp, GEN_INT (ct),
4977bab6 9580 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
9581 }
9582 else if (cf == -1)
9583 {
9584 /*
9585 * cmpl op0,op1
9586 * sbbl dest,dest
9587 * orl $ct, dest
9588 *
9589 * Size 8.
9590 */
635559ab
JH
9591 tmp = expand_simple_binop (mode, IOR,
9592 tmp, GEN_INT (ct),
4977bab6 9593 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
9594 }
9595 else if (diff == -1 && ct)
9596 {
9597 /*
9598 * cmpl op0,op1
9599 * sbbl dest,dest
06ec023f 9600 * notl dest
36583fea
JH
9601 * [addl dest, cf]
9602 *
9603 * Size 8 - 11.
9604 */
4977bab6 9605 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
635559ab
JH
9606 if (cf)
9607 tmp = expand_simple_binop (mode, PLUS,
4977bab6
ZW
9608 copy_rtx (tmp), GEN_INT (cf),
9609 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
9610 }
9611 else
9612 {
9613 /*
9614 * cmpl op0,op1
9615 * sbbl dest,dest
06ec023f 9616 * [notl dest]
36583fea
JH
9617 * andl cf - ct, dest
9618 * [addl dest, ct]
9619 *
9620 * Size 8 - 11.
9621 */
06ec023f
RB
9622
9623 if (cf == 0)
9624 {
9625 cf = ct;
9626 ct = 0;
4977bab6 9627 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
06ec023f
RB
9628 }
9629
635559ab 9630 tmp = expand_simple_binop (mode, AND,
4977bab6 9631 copy_rtx (tmp),
d8bf17f9 9632 gen_int_mode (cf - ct, mode),
4977bab6 9633 copy_rtx (tmp), 1, OPTAB_DIRECT);
635559ab
JH
9634 if (ct)
9635 tmp = expand_simple_binop (mode, PLUS,
4977bab6
ZW
9636 copy_rtx (tmp), GEN_INT (ct),
9637 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea 9638 }
e075ae69 9639
4977bab6
ZW
9640 if (!rtx_equal_p (tmp, out))
9641 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
e075ae69
RH
9642
9643 return 1; /* DONE */
9644 }
9645
e075ae69
RH
9646 if (diff < 0)
9647 {
9648 HOST_WIDE_INT tmp;
9649 tmp = ct, ct = cf, cf = tmp;
9650 diff = -diff;
734dba19
JH
9651 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9652 {
9653 /* We may be reversing unordered compare to normal compare, that
9654 is not valid in general (we may convert non-trapping condition
9655 to trapping one), however on i386 we currently emit all
9656 comparisons unordered. */
9657 compare_code = reverse_condition_maybe_unordered (compare_code);
9658 code = reverse_condition_maybe_unordered (code);
9659 }
9660 else
9661 {
9662 compare_code = reverse_condition (compare_code);
9663 code = reverse_condition (code);
9664 }
e075ae69 9665 }
0f2a3457
JJ
9666
9667 compare_code = NIL;
9668 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9669 && GET_CODE (ix86_compare_op1) == CONST_INT)
9670 {
9671 if (ix86_compare_op1 == const0_rtx
9672 && (code == LT || code == GE))
9673 compare_code = code;
9674 else if (ix86_compare_op1 == constm1_rtx)
9675 {
9676 if (code == LE)
9677 compare_code = LT;
9678 else if (code == GT)
9679 compare_code = GE;
9680 }
9681 }
9682
9683 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9684 if (compare_code != NIL
9685 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9686 && (cf == -1 || ct == -1))
9687 {
9688 /* If lea code below could be used, only optimize
9689 if it results in a 2 insn sequence. */
9690
9691 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9692 || diff == 3 || diff == 5 || diff == 9)
9693 || (compare_code == LT && ct == -1)
9694 || (compare_code == GE && cf == -1))
9695 {
9696 /*
9697 * notl op1 (if necessary)
9698 * sarl $31, op1
9699 * orl cf, op1
9700 */
9701 if (ct != -1)
9702 {
9703 cf = ct;
9704 ct = -1;
9705 code = reverse_condition (code);
9706 }
9707
9708 out = emit_store_flag (out, code, ix86_compare_op0,
9709 ix86_compare_op1, VOIDmode, 0, -1);
9710
9711 out = expand_simple_binop (mode, IOR,
9712 out, GEN_INT (cf),
9713 out, 1, OPTAB_DIRECT);
9714 if (out != operands[0])
9715 emit_move_insn (operands[0], out);
9716
9717 return 1; /* DONE */
9718 }
9719 }
9720
4977bab6 9721
635559ab
JH
9722 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9723 || diff == 3 || diff == 5 || diff == 9)
4977bab6 9724 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
c05dbe81 9725 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
e075ae69
RH
9726 {
9727 /*
9728 * xorl dest,dest
9729 * cmpl op1,op2
9730 * setcc dest
9731 * lea cf(dest*(ct-cf)),dest
9732 *
9733 * Size 14.
9734 *
9735 * This also catches the degenerate setcc-only case.
9736 */
9737
9738 rtx tmp;
9739 int nops;
9740
9741 out = emit_store_flag (out, code, ix86_compare_op0,
9742 ix86_compare_op1, VOIDmode, 0, 1);
9743
9744 nops = 0;
97f51ac4
RB
9745 /* On x86_64 the lea instruction operates on Pmode, so we need
9746 to get arithmetics done in proper mode to match. */
e075ae69 9747 if (diff == 1)
068f5dea 9748 tmp = copy_rtx (out);
e075ae69
RH
9749 else
9750 {
885a70fd 9751 rtx out1;
068f5dea 9752 out1 = copy_rtx (out);
635559ab 9753 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
e075ae69
RH
9754 nops++;
9755 if (diff & 1)
9756 {
635559ab 9757 tmp = gen_rtx_PLUS (mode, tmp, out1);
e075ae69
RH
9758 nops++;
9759 }
9760 }
9761 if (cf != 0)
9762 {
635559ab 9763 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
e075ae69
RH
9764 nops++;
9765 }
4977bab6 9766 if (!rtx_equal_p (tmp, out))
e075ae69 9767 {
14f73b5a 9768 if (nops == 1)
a5cf80f0 9769 out = force_operand (tmp, copy_rtx (out));
e075ae69 9770 else
4977bab6 9771 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
e075ae69 9772 }
4977bab6 9773 if (!rtx_equal_p (out, operands[0]))
1985ef90 9774 emit_move_insn (operands[0], copy_rtx (out));
e075ae69
RH
9775
9776 return 1; /* DONE */
9777 }
9778
9779 /*
9780 * General case: Jumpful:
9781 * xorl dest,dest cmpl op1, op2
9782 * cmpl op1, op2 movl ct, dest
9783 * setcc dest jcc 1f
9784 * decl dest movl cf, dest
9785 * andl (cf-ct),dest 1:
9786 * addl ct,dest
0f290768 9787 *
e075ae69
RH
9788 * Size 20. Size 14.
9789 *
9790 * This is reasonably steep, but branch mispredict costs are
9791 * high on modern cpus, so consider failing only if optimizing
9792 * for space.
e075ae69
RH
9793 */
9794
4977bab6
ZW
9795 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9796 && BRANCH_COST >= 2)
e075ae69 9797 {
97f51ac4 9798 if (cf == 0)
e075ae69 9799 {
97f51ac4
RB
9800 cf = ct;
9801 ct = 0;
734dba19 9802 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
0f2a3457
JJ
9803 /* We may be reversing unordered compare to normal compare,
9804 that is not valid in general (we may convert non-trapping
9805 condition to trapping one), however on i386 we currently
9806 emit all comparisons unordered. */
9807 code = reverse_condition_maybe_unordered (code);
9808 else
9809 {
9810 code = reverse_condition (code);
9811 if (compare_code != NIL)
9812 compare_code = reverse_condition (compare_code);
9813 }
9814 }
9815
9816 if (compare_code != NIL)
9817 {
9818 /* notl op1 (if needed)
9819 sarl $31, op1
9820 andl (cf-ct), op1
9821 addl ct, op1
9822
9823 For x < 0 (resp. x <= -1) there will be no notl,
9824 so if possible swap the constants to get rid of the
9825 complement.
9826 True/false will be -1/0 while code below (store flag
9827 followed by decrement) is 0/-1, so the constants need
9828 to be exchanged once more. */
9829
9830 if (compare_code == GE || !cf)
734dba19 9831 {
0f2a3457
JJ
9832 code = reverse_condition (code);
9833 compare_code = LT;
734dba19
JH
9834 }
9835 else
9836 {
0f2a3457
JJ
9837 HOST_WIDE_INT tmp = cf;
9838 cf = ct;
9839 ct = tmp;
734dba19 9840 }
0f2a3457
JJ
9841
9842 out = emit_store_flag (out, code, ix86_compare_op0,
9843 ix86_compare_op1, VOIDmode, 0, -1);
e075ae69 9844 }
0f2a3457
JJ
9845 else
9846 {
9847 out = emit_store_flag (out, code, ix86_compare_op0,
9848 ix86_compare_op1, VOIDmode, 0, 1);
e075ae69 9849
4977bab6
ZW
9850 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
9851 copy_rtx (out), 1, OPTAB_DIRECT);
0f2a3457 9852 }
e075ae69 9853
4977bab6 9854 out = expand_simple_binop (mode, AND, copy_rtx (out),
d8bf17f9 9855 gen_int_mode (cf - ct, mode),
4977bab6 9856 copy_rtx (out), 1, OPTAB_DIRECT);
97f51ac4 9857 if (ct)
4977bab6
ZW
9858 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
9859 copy_rtx (out), 1, OPTAB_DIRECT);
9860 if (!rtx_equal_p (out, operands[0]))
9861 emit_move_insn (operands[0], copy_rtx (out));
e075ae69
RH
9862
9863 return 1; /* DONE */
9864 }
9865 }
9866
4977bab6 9867 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
e075ae69
RH
9868 {
9869 /* Try a few things more with specific constants and a variable. */
9870
78a0d70c 9871 optab op;
e075ae69
RH
9872 rtx var, orig_out, out, tmp;
9873
4977bab6 9874 if (BRANCH_COST <= 2)
e075ae69
RH
9875 return 0; /* FAIL */
9876
0f290768 9877 /* If one of the two operands is an interesting constant, load a
e075ae69 9878 constant with the above and mask it in with a logical operation. */
0f290768 9879
e075ae69
RH
9880 if (GET_CODE (operands[2]) == CONST_INT)
9881 {
9882 var = operands[3];
4977bab6 9883 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
e075ae69 9884 operands[3] = constm1_rtx, op = and_optab;
4977bab6 9885 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
e075ae69 9886 operands[3] = const0_rtx, op = ior_optab;
78a0d70c
ZW
9887 else
9888 return 0; /* FAIL */
e075ae69
RH
9889 }
9890 else if (GET_CODE (operands[3]) == CONST_INT)
9891 {
9892 var = operands[2];
4977bab6 9893 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
e075ae69 9894 operands[2] = constm1_rtx, op = and_optab;
4977bab6 9895 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
e075ae69 9896 operands[2] = const0_rtx, op = ior_optab;
78a0d70c
ZW
9897 else
9898 return 0; /* FAIL */
e075ae69 9899 }
78a0d70c 9900 else
e075ae69
RH
9901 return 0; /* FAIL */
9902
9903 orig_out = operands[0];
635559ab 9904 tmp = gen_reg_rtx (mode);
e075ae69
RH
9905 operands[0] = tmp;
9906
9907 /* Recurse to get the constant loaded. */
9908 if (ix86_expand_int_movcc (operands) == 0)
9909 return 0; /* FAIL */
9910
9911 /* Mask in the interesting variable. */
635559ab 9912 out = expand_binop (mode, op, var, tmp, orig_out, 0,
e075ae69 9913 OPTAB_WIDEN);
4977bab6
ZW
9914 if (!rtx_equal_p (out, orig_out))
9915 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
e075ae69
RH
9916
9917 return 1; /* DONE */
9918 }
9919
9920 /*
9921 * For comparison with above,
9922 *
9923 * movl cf,dest
9924 * movl ct,tmp
9925 * cmpl op1,op2
9926 * cmovcc tmp,dest
9927 *
9928 * Size 15.
9929 */
9930
635559ab
JH
9931 if (! nonimmediate_operand (operands[2], mode))
9932 operands[2] = force_reg (mode, operands[2]);
9933 if (! nonimmediate_operand (operands[3], mode))
9934 operands[3] = force_reg (mode, operands[3]);
e075ae69 9935
a1b8572c
JH
9936 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9937 {
635559ab 9938 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
9939 emit_move_insn (tmp, operands[3]);
9940 operands[3] = tmp;
9941 }
9942 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9943 {
635559ab 9944 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
9945 emit_move_insn (tmp, operands[2]);
9946 operands[2] = tmp;
9947 }
4977bab6 9948
c9682caf 9949 if (! register_operand (operands[2], VOIDmode)
4977bab6
ZW
9950 && (mode == QImode
9951 || ! register_operand (operands[3], VOIDmode)))
635559ab 9952 operands[2] = force_reg (mode, operands[2]);
a1b8572c 9953
4977bab6
ZW
9954 if (mode == QImode
9955 && ! register_operand (operands[3], VOIDmode))
9956 operands[3] = force_reg (mode, operands[3]);
9957
e075ae69
RH
9958 emit_insn (compare_seq);
9959 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 9960 gen_rtx_IF_THEN_ELSE (mode,
e075ae69
RH
9961 compare_op, operands[2],
9962 operands[3])));
a1b8572c 9963 if (bypass_test)
4977bab6 9964 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
635559ab 9965 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c 9966 bypass_test,
4977bab6
ZW
9967 copy_rtx (operands[3]),
9968 copy_rtx (operands[0]))));
a1b8572c 9969 if (second_test)
4977bab6 9970 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
635559ab 9971 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c 9972 second_test,
4977bab6
ZW
9973 copy_rtx (operands[2]),
9974 copy_rtx (operands[0]))));
e075ae69
RH
9975
9976 return 1; /* DONE */
e9a25f70 9977}
e075ae69 9978
32b5b1aa 9979int
e075ae69
RH
9980ix86_expand_fp_movcc (operands)
9981 rtx operands[];
32b5b1aa 9982{
e075ae69 9983 enum rtx_code code;
e075ae69 9984 rtx tmp;
a1b8572c 9985 rtx compare_op, second_test, bypass_test;
32b5b1aa 9986
0073023d
JH
9987 /* For SF/DFmode conditional moves based on comparisons
9988 in same mode, we may want to use SSE min/max instructions. */
965f5423
JH
9989 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9990 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
0073023d 9991 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
fa9f36a1
JH
9992 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9993 && (!TARGET_IEEE_FP
9994 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
0073023d
JH
9995 /* We may be called from the post-reload splitter. */
9996 && (!REG_P (operands[0])
9997 || SSE_REG_P (operands[0])
52a661a6 9998 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
0073023d
JH
9999 {
10000 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10001 code = GET_CODE (operands[1]);
10002
10003 /* See if we have (cross) match between comparison operands and
10004 conditional move operands. */
10005 if (rtx_equal_p (operands[2], op1))
10006 {
10007 rtx tmp = op0;
10008 op0 = op1;
10009 op1 = tmp;
10010 code = reverse_condition_maybe_unordered (code);
10011 }
10012 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10013 {
10014 /* Check for min operation. */
4977bab6 10015 if (code == LT || code == UNLE)
0073023d 10016 {
4977bab6
ZW
10017 if (code == UNLE)
10018 {
10019 rtx tmp = op0;
10020 op0 = op1;
10021 op1 = tmp;
10022 }
0073023d
JH
10023 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10024 if (memory_operand (op0, VOIDmode))
10025 op0 = force_reg (GET_MODE (operands[0]), op0);
10026 if (GET_MODE (operands[0]) == SFmode)
10027 emit_insn (gen_minsf3 (operands[0], op0, op1));
10028 else
10029 emit_insn (gen_mindf3 (operands[0], op0, op1));
10030 return 1;
10031 }
10032 /* Check for max operation. */
4977bab6 10033 if (code == GT || code == UNGE)
0073023d 10034 {
4977bab6
ZW
10035 if (code == UNGE)
10036 {
10037 rtx tmp = op0;
10038 op0 = op1;
10039 op1 = tmp;
10040 }
0073023d
JH
10041 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10042 if (memory_operand (op0, VOIDmode))
10043 op0 = force_reg (GET_MODE (operands[0]), op0);
10044 if (GET_MODE (operands[0]) == SFmode)
10045 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10046 else
10047 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10048 return 1;
10049 }
10050 }
10051 /* Manage condition to be sse_comparison_operator. In case we are
10052 in non-ieee mode, try to canonicalize the destination operand
10053 to be first in the comparison - this helps reload to avoid extra
10054 moves. */
10055 if (!sse_comparison_operator (operands[1], VOIDmode)
10056 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10057 {
10058 rtx tmp = ix86_compare_op0;
10059 ix86_compare_op0 = ix86_compare_op1;
10060 ix86_compare_op1 = tmp;
10061 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10062 VOIDmode, ix86_compare_op0,
10063 ix86_compare_op1);
10064 }
d1f87653 10065 /* Similarly try to manage result to be first operand of conditional
fa9f36a1
JH
10066 move. We also don't support the NE comparison on SSE, so try to
10067 avoid it. */
037f20f1
JH
10068 if ((rtx_equal_p (operands[0], operands[3])
10069 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10070 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
0073023d
JH
10071 {
10072 rtx tmp = operands[2];
10073 operands[2] = operands[3];
92d0fb09 10074 operands[3] = tmp;
0073023d
JH
10075 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10076 (GET_CODE (operands[1])),
10077 VOIDmode, ix86_compare_op0,
10078 ix86_compare_op1);
10079 }
10080 if (GET_MODE (operands[0]) == SFmode)
10081 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10082 operands[2], operands[3],
10083 ix86_compare_op0, ix86_compare_op1));
10084 else
10085 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10086 operands[2], operands[3],
10087 ix86_compare_op0, ix86_compare_op1));
10088 return 1;
10089 }
10090
e075ae69 10091 /* The floating point conditional move instructions don't directly
0f290768 10092 support conditions resulting from a signed integer comparison. */
32b5b1aa 10093
e075ae69 10094 code = GET_CODE (operands[1]);
a1b8572c 10095 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9e7adcb3
JH
10096
10097 /* The floating point conditional move instructions don't directly
10098 support signed integer comparisons. */
10099
a1b8572c 10100 if (!fcmov_comparison_operator (compare_op, VOIDmode))
e075ae69 10101 {
a1b8572c 10102 if (second_test != NULL || bypass_test != NULL)
b531087a 10103 abort ();
e075ae69 10104 tmp = gen_reg_rtx (QImode);
3a3677ff 10105 ix86_expand_setcc (code, tmp);
e075ae69
RH
10106 code = NE;
10107 ix86_compare_op0 = tmp;
10108 ix86_compare_op1 = const0_rtx;
a1b8572c
JH
10109 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10110 }
10111 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10112 {
10113 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10114 emit_move_insn (tmp, operands[3]);
10115 operands[3] = tmp;
10116 }
10117 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10118 {
10119 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10120 emit_move_insn (tmp, operands[2]);
10121 operands[2] = tmp;
e075ae69 10122 }
e9a25f70 10123
e075ae69
RH
10124 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10125 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9e7adcb3 10126 compare_op,
e075ae69
RH
10127 operands[2],
10128 operands[3])));
a1b8572c
JH
10129 if (bypass_test)
10130 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10131 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10132 bypass_test,
10133 operands[3],
10134 operands[0])));
10135 if (second_test)
10136 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10137 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10138 second_test,
10139 operands[2],
10140 operands[0])));
32b5b1aa 10141
e075ae69 10142 return 1;
32b5b1aa
SC
10143}
10144
7b52eede
JH
10145/* Expand conditional increment or decrement using adb/sbb instructions.
10146 The default case using setcc followed by the conditional move can be
10147 done by generic code. */
10148int
10149ix86_expand_int_addcc (operands)
10150 rtx operands[];
10151{
10152 enum rtx_code code = GET_CODE (operands[1]);
10153 rtx compare_op;
10154 rtx val = const0_rtx;
e6e81735 10155 bool fpcmp = false;
e6e81735 10156 enum machine_mode mode = GET_MODE (operands[0]);
7b52eede
JH
10157
10158 if (operands[3] != const1_rtx
10159 && operands[3] != constm1_rtx)
10160 return 0;
10161 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10162 ix86_compare_op1, &compare_op))
10163 return 0;
e6e81735
JH
10164 code = GET_CODE (compare_op);
10165
10166 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10167 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10168 {
10169 fpcmp = true;
10170 code = ix86_fp_compare_code_to_integer (code);
10171 }
10172
10173 if (code != LTU)
10174 {
10175 val = constm1_rtx;
10176 if (fpcmp)
10177 PUT_CODE (compare_op,
10178 reverse_condition_maybe_unordered
10179 (GET_CODE (compare_op)));
10180 else
10181 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10182 }
10183 PUT_MODE (compare_op, mode);
10184
10185 /* Construct either adc or sbb insn. */
10186 if ((code == LTU) == (operands[3] == constm1_rtx))
7b52eede
JH
10187 {
10188 switch (GET_MODE (operands[0]))
10189 {
10190 case QImode:
e6e81735 10191 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10192 break;
10193 case HImode:
e6e81735 10194 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10195 break;
10196 case SImode:
e6e81735 10197 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10198 break;
10199 case DImode:
e6e81735 10200 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
7b52eede
JH
10201 break;
10202 default:
10203 abort ();
10204 }
10205 }
10206 else
10207 {
10208 switch (GET_MODE (operands[0]))
10209 {
10210 case QImode:
e6e81735 10211 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10212 break;
10213 case HImode:
e6e81735 10214 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10215 break;
10216 case SImode:
e6e81735 10217 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10218 break;
10219 case DImode:
e6e81735 10220 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
7b52eede
JH
10221 break;
10222 default:
10223 abort ();
10224 }
10225 }
10226 return 1; /* DONE */
10227}
10228
10229
2450a057
JH
10230/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10231 works for floating pointer parameters and nonoffsetable memories.
10232 For pushes, it returns just stack offsets; the values will be saved
10233 in the right order. Maximally three parts are generated. */
10234
2b589241 10235static int
2450a057
JH
10236ix86_split_to_parts (operand, parts, mode)
10237 rtx operand;
10238 rtx *parts;
10239 enum machine_mode mode;
32b5b1aa 10240{
26e5b205
JH
10241 int size;
10242
10243 if (!TARGET_64BIT)
10244 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
10245 else
10246 size = (GET_MODE_SIZE (mode) + 4) / 8;
2450a057 10247
a7180f70
BS
10248 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10249 abort ();
2450a057
JH
10250 if (size < 2 || size > 3)
10251 abort ();
10252
f996902d
RH
10253 /* Optimize constant pool reference to immediates. This is used by fp
10254 moves, that force all constants to memory to allow combining. */
10255 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10256 {
10257 rtx tmp = maybe_get_pool_constant (operand);
10258 if (tmp)
10259 operand = tmp;
10260 }
d7a29404 10261
2450a057 10262 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
e075ae69 10263 {
2450a057
JH
10264 /* The only non-offsetable memories we handle are pushes. */
10265 if (! push_operand (operand, VOIDmode))
10266 abort ();
10267
26e5b205
JH
10268 operand = copy_rtx (operand);
10269 PUT_MODE (operand, Pmode);
2450a057
JH
10270 parts[0] = parts[1] = parts[2] = operand;
10271 }
26e5b205 10272 else if (!TARGET_64BIT)
2450a057
JH
10273 {
10274 if (mode == DImode)
10275 split_di (&operand, 1, &parts[0], &parts[1]);
10276 else
e075ae69 10277 {
2450a057
JH
10278 if (REG_P (operand))
10279 {
10280 if (!reload_completed)
10281 abort ();
10282 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10283 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10284 if (size == 3)
10285 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10286 }
10287 else if (offsettable_memref_p (operand))
10288 {
f4ef873c 10289 operand = adjust_address (operand, SImode, 0);
2450a057 10290 parts[0] = operand;
b72f00af 10291 parts[1] = adjust_address (operand, SImode, 4);
2450a057 10292 if (size == 3)
b72f00af 10293 parts[2] = adjust_address (operand, SImode, 8);
2450a057
JH
10294 }
10295 else if (GET_CODE (operand) == CONST_DOUBLE)
10296 {
10297 REAL_VALUE_TYPE r;
2b589241 10298 long l[4];
2450a057
JH
10299
10300 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10301 switch (mode)
10302 {
10303 case XFmode:
2b589241 10304 case TFmode:
2450a057 10305 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
d8bf17f9 10306 parts[2] = gen_int_mode (l[2], SImode);
2450a057
JH
10307 break;
10308 case DFmode:
10309 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10310 break;
10311 default:
10312 abort ();
10313 }
d8bf17f9
LB
10314 parts[1] = gen_int_mode (l[1], SImode);
10315 parts[0] = gen_int_mode (l[0], SImode);
2450a057
JH
10316 }
10317 else
10318 abort ();
e075ae69 10319 }
2450a057 10320 }
26e5b205
JH
10321 else
10322 {
44cf5b6a
JH
10323 if (mode == TImode)
10324 split_ti (&operand, 1, &parts[0], &parts[1]);
26e5b205
JH
10325 if (mode == XFmode || mode == TFmode)
10326 {
10327 if (REG_P (operand))
10328 {
10329 if (!reload_completed)
10330 abort ();
10331 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10332 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10333 }
10334 else if (offsettable_memref_p (operand))
10335 {
b72f00af 10336 operand = adjust_address (operand, DImode, 0);
26e5b205 10337 parts[0] = operand;
b72f00af 10338 parts[1] = adjust_address (operand, SImode, 8);
26e5b205
JH
10339 }
10340 else if (GET_CODE (operand) == CONST_DOUBLE)
10341 {
10342 REAL_VALUE_TYPE r;
10343 long l[3];
10344
10345 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10346 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10347 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10348 if (HOST_BITS_PER_WIDE_INT >= 64)
523fbd9d 10349 parts[0]
d8bf17f9 10350 = gen_int_mode
44cf5b6a 10351 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
b531087a 10352 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
d8bf17f9 10353 DImode);
26e5b205
JH
10354 else
10355 parts[0] = immed_double_const (l[0], l[1], DImode);
d8bf17f9 10356 parts[1] = gen_int_mode (l[2], SImode);
26e5b205
JH
10357 }
10358 else
10359 abort ();
10360 }
10361 }
2450a057 10362
2b589241 10363 return size;
2450a057
JH
10364}
10365
10366/* Emit insns to perform a move or push of DI, DF, and XF values.
10367 Return false when normal moves are needed; true when all required
10368 insns have been emitted. Operands 2-4 contain the input values
10369 int the correct order; operands 5-7 contain the output values. */
10370
26e5b205
JH
10371void
10372ix86_split_long_move (operands)
10373 rtx operands[];
2450a057
JH
10374{
10375 rtx part[2][3];
26e5b205 10376 int nparts;
2450a057
JH
10377 int push = 0;
10378 int collisions = 0;
26e5b205
JH
10379 enum machine_mode mode = GET_MODE (operands[0]);
10380
10381 /* The DFmode expanders may ask us to move double.
10382 For 64bit target this is single move. By hiding the fact
10383 here we simplify i386.md splitters. */
10384 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10385 {
8cdfa312
RH
10386 /* Optimize constant pool reference to immediates. This is used by
10387 fp moves, that force all constants to memory to allow combining. */
26e5b205
JH
10388
10389 if (GET_CODE (operands[1]) == MEM
10390 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10391 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10392 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10393 if (push_operand (operands[0], VOIDmode))
b47b4f21
JH
10394 {
10395 operands[0] = copy_rtx (operands[0]);
10396 PUT_MODE (operands[0], Pmode);
10397 }
26e5b205
JH
10398 else
10399 operands[0] = gen_lowpart (DImode, operands[0]);
10400 operands[1] = gen_lowpart (DImode, operands[1]);
10401 emit_move_insn (operands[0], operands[1]);
10402 return;
10403 }
2450a057 10404
2450a057
JH
10405 /* The only non-offsettable memory we handle is push. */
10406 if (push_operand (operands[0], VOIDmode))
10407 push = 1;
10408 else if (GET_CODE (operands[0]) == MEM
10409 && ! offsettable_memref_p (operands[0]))
10410 abort ();
10411
26e5b205
JH
10412 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10413 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
2450a057
JH
10414
10415 /* When emitting push, take care for source operands on the stack. */
10416 if (push && GET_CODE (operands[1]) == MEM
10417 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10418 {
26e5b205 10419 if (nparts == 3)
886cbb88
JH
10420 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10421 XEXP (part[1][2], 0));
10422 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10423 XEXP (part[1][1], 0));
2450a057
JH
10424 }
10425
0f290768 10426 /* We need to do copy in the right order in case an address register
2450a057
JH
10427 of the source overlaps the destination. */
10428 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10429 {
10430 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10431 collisions++;
10432 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10433 collisions++;
26e5b205 10434 if (nparts == 3
2450a057
JH
10435 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10436 collisions++;
10437
10438 /* Collision in the middle part can be handled by reordering. */
26e5b205 10439 if (collisions == 1 && nparts == 3
2450a057 10440 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
e075ae69 10441 {
2450a057
JH
10442 rtx tmp;
10443 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10444 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10445 }
e075ae69 10446
2450a057
JH
10447 /* If there are more collisions, we can't handle it by reordering.
10448 Do an lea to the last part and use only one colliding move. */
10449 else if (collisions > 1)
10450 {
10451 collisions = 1;
26e5b205 10452 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
2450a057 10453 XEXP (part[1][0], 0)));
26e5b205
JH
10454 part[1][0] = change_address (part[1][0],
10455 TARGET_64BIT ? DImode : SImode,
10456 part[0][nparts - 1]);
b72f00af 10457 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
26e5b205 10458 if (nparts == 3)
b72f00af 10459 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
2450a057
JH
10460 }
10461 }
10462
10463 if (push)
10464 {
26e5b205 10465 if (!TARGET_64BIT)
2b589241 10466 {
26e5b205
JH
10467 if (nparts == 3)
10468 {
10469 /* We use only first 12 bytes of TFmode value, but for pushing we
10470 are required to adjust stack as if we were pushing real 16byte
10471 value. */
10472 if (mode == TFmode && !TARGET_64BIT)
10473 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10474 GEN_INT (-4)));
10475 emit_move_insn (part[0][2], part[1][2]);
10476 }
2b589241 10477 }
26e5b205
JH
10478 else
10479 {
10480 /* In 64bit mode we don't have 32bit push available. In case this is
10481 register, it is OK - we will just use larger counterpart. We also
10482 retype memory - these comes from attempt to avoid REX prefix on
10483 moving of second half of TFmode value. */
10484 if (GET_MODE (part[1][1]) == SImode)
10485 {
10486 if (GET_CODE (part[1][1]) == MEM)
f4ef873c 10487 part[1][1] = adjust_address (part[1][1], DImode, 0);
26e5b205
JH
10488 else if (REG_P (part[1][1]))
10489 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10490 else
b531087a 10491 abort ();
886cbb88
JH
10492 if (GET_MODE (part[1][0]) == SImode)
10493 part[1][0] = part[1][1];
26e5b205
JH
10494 }
10495 }
10496 emit_move_insn (part[0][1], part[1][1]);
10497 emit_move_insn (part[0][0], part[1][0]);
10498 return;
2450a057
JH
10499 }
10500
10501 /* Choose correct order to not overwrite the source before it is copied. */
10502 if ((REG_P (part[0][0])
10503 && REG_P (part[1][1])
10504 && (REGNO (part[0][0]) == REGNO (part[1][1])
26e5b205 10505 || (nparts == 3
2450a057
JH
10506 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10507 || (collisions > 0
10508 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10509 {
26e5b205 10510 if (nparts == 3)
2450a057 10511 {
26e5b205
JH
10512 operands[2] = part[0][2];
10513 operands[3] = part[0][1];
10514 operands[4] = part[0][0];
10515 operands[5] = part[1][2];
10516 operands[6] = part[1][1];
10517 operands[7] = part[1][0];
2450a057
JH
10518 }
10519 else
10520 {
26e5b205
JH
10521 operands[2] = part[0][1];
10522 operands[3] = part[0][0];
10523 operands[5] = part[1][1];
10524 operands[6] = part[1][0];
2450a057
JH
10525 }
10526 }
10527 else
10528 {
26e5b205 10529 if (nparts == 3)
2450a057 10530 {
26e5b205
JH
10531 operands[2] = part[0][0];
10532 operands[3] = part[0][1];
10533 operands[4] = part[0][2];
10534 operands[5] = part[1][0];
10535 operands[6] = part[1][1];
10536 operands[7] = part[1][2];
2450a057
JH
10537 }
10538 else
10539 {
26e5b205
JH
10540 operands[2] = part[0][0];
10541 operands[3] = part[0][1];
10542 operands[5] = part[1][0];
10543 operands[6] = part[1][1];
e075ae69
RH
10544 }
10545 }
26e5b205
JH
10546 emit_move_insn (operands[2], operands[5]);
10547 emit_move_insn (operands[3], operands[6]);
10548 if (nparts == 3)
10549 emit_move_insn (operands[4], operands[7]);
32b5b1aa 10550
26e5b205 10551 return;
32b5b1aa 10552}
32b5b1aa 10553
e075ae69
RH
10554void
10555ix86_split_ashldi (operands, scratch)
10556 rtx *operands, scratch;
32b5b1aa 10557{
e075ae69
RH
10558 rtx low[2], high[2];
10559 int count;
b985a30f 10560
e075ae69
RH
10561 if (GET_CODE (operands[2]) == CONST_INT)
10562 {
10563 split_di (operands, 2, low, high);
10564 count = INTVAL (operands[2]) & 63;
32b5b1aa 10565
e075ae69
RH
10566 if (count >= 32)
10567 {
10568 emit_move_insn (high[0], low[1]);
10569 emit_move_insn (low[0], const0_rtx);
b985a30f 10570
e075ae69
RH
10571 if (count > 32)
10572 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10573 }
10574 else
10575 {
10576 if (!rtx_equal_p (operands[0], operands[1]))
10577 emit_move_insn (operands[0], operands[1]);
10578 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10579 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10580 }
10581 }
10582 else
10583 {
10584 if (!rtx_equal_p (operands[0], operands[1]))
10585 emit_move_insn (operands[0], operands[1]);
b985a30f 10586
e075ae69 10587 split_di (operands, 1, low, high);
b985a30f 10588
e075ae69
RH
10589 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10590 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
32b5b1aa 10591
fe577e58 10592 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 10593 {
fe577e58 10594 if (! no_new_pseudos)
e075ae69
RH
10595 scratch = force_reg (SImode, const0_rtx);
10596 else
10597 emit_move_insn (scratch, const0_rtx);
10598
10599 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10600 scratch));
10601 }
10602 else
10603 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10604 }
e9a25f70 10605}
32b5b1aa 10606
e075ae69
RH
10607void
10608ix86_split_ashrdi (operands, scratch)
10609 rtx *operands, scratch;
32b5b1aa 10610{
e075ae69
RH
10611 rtx low[2], high[2];
10612 int count;
32b5b1aa 10613
e075ae69
RH
10614 if (GET_CODE (operands[2]) == CONST_INT)
10615 {
10616 split_di (operands, 2, low, high);
10617 count = INTVAL (operands[2]) & 63;
32b5b1aa 10618
e075ae69
RH
10619 if (count >= 32)
10620 {
10621 emit_move_insn (low[0], high[1]);
32b5b1aa 10622
e075ae69
RH
10623 if (! reload_completed)
10624 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10625 else
10626 {
10627 emit_move_insn (high[0], low[0]);
10628 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10629 }
10630
10631 if (count > 32)
10632 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10633 }
10634 else
10635 {
10636 if (!rtx_equal_p (operands[0], operands[1]))
10637 emit_move_insn (operands[0], operands[1]);
10638 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10639 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10640 }
10641 }
10642 else
32b5b1aa 10643 {
e075ae69
RH
10644 if (!rtx_equal_p (operands[0], operands[1]))
10645 emit_move_insn (operands[0], operands[1]);
10646
10647 split_di (operands, 1, low, high);
10648
10649 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10650 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10651
fe577e58 10652 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 10653 {
fe577e58 10654 if (! no_new_pseudos)
e075ae69
RH
10655 scratch = gen_reg_rtx (SImode);
10656 emit_move_insn (scratch, high[0]);
10657 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10658 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10659 scratch));
10660 }
10661 else
10662 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
32b5b1aa 10663 }
e075ae69 10664}
32b5b1aa 10665
e075ae69
RH
10666void
10667ix86_split_lshrdi (operands, scratch)
10668 rtx *operands, scratch;
10669{
10670 rtx low[2], high[2];
10671 int count;
32b5b1aa 10672
e075ae69 10673 if (GET_CODE (operands[2]) == CONST_INT)
32b5b1aa 10674 {
e075ae69
RH
10675 split_di (operands, 2, low, high);
10676 count = INTVAL (operands[2]) & 63;
10677
10678 if (count >= 32)
c7271385 10679 {
e075ae69
RH
10680 emit_move_insn (low[0], high[1]);
10681 emit_move_insn (high[0], const0_rtx);
32b5b1aa 10682
e075ae69
RH
10683 if (count > 32)
10684 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10685 }
10686 else
10687 {
10688 if (!rtx_equal_p (operands[0], operands[1]))
10689 emit_move_insn (operands[0], operands[1]);
10690 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10691 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10692 }
32b5b1aa 10693 }
e075ae69
RH
10694 else
10695 {
10696 if (!rtx_equal_p (operands[0], operands[1]))
10697 emit_move_insn (operands[0], operands[1]);
32b5b1aa 10698
e075ae69
RH
10699 split_di (operands, 1, low, high);
10700
10701 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10702 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10703
10704 /* Heh. By reversing the arguments, we can reuse this pattern. */
fe577e58 10705 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 10706 {
fe577e58 10707 if (! no_new_pseudos)
e075ae69
RH
10708 scratch = force_reg (SImode, const0_rtx);
10709 else
10710 emit_move_insn (scratch, const0_rtx);
10711
10712 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10713 scratch));
10714 }
10715 else
10716 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10717 }
32b5b1aa 10718}
3f803cd9 10719
0407c02b 10720/* Helper function for the string operations below. Dest VARIABLE whether
0945b39d
JH
10721 it is aligned to VALUE bytes. If true, jump to the label. */
10722static rtx
10723ix86_expand_aligntest (variable, value)
10724 rtx variable;
10725 int value;
10726{
10727 rtx label = gen_label_rtx ();
10728 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10729 if (GET_MODE (variable) == DImode)
10730 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10731 else
10732 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10733 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
d43e0b7d 10734 1, label);
0945b39d
JH
10735 return label;
10736}
10737
10738/* Adjust COUNTER by the VALUE. */
10739static void
10740ix86_adjust_counter (countreg, value)
10741 rtx countreg;
10742 HOST_WIDE_INT value;
10743{
10744 if (GET_MODE (countreg) == DImode)
10745 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10746 else
10747 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10748}
10749
10750/* Zero extend possibly SImode EXP to Pmode register. */
d24b3457 10751rtx
0945b39d
JH
10752ix86_zero_extend_to_Pmode (exp)
10753 rtx exp;
10754{
10755 rtx r;
10756 if (GET_MODE (exp) == VOIDmode)
10757 return force_reg (Pmode, exp);
10758 if (GET_MODE (exp) == Pmode)
10759 return copy_to_mode_reg (Pmode, exp);
10760 r = gen_reg_rtx (Pmode);
10761 emit_insn (gen_zero_extendsidi2 (r, exp));
10762 return r;
10763}
10764
10765/* Expand string move (memcpy) operation. Use i386 string operations when
10766 profitable. expand_clrstr contains similar code. */
10767int
10768ix86_expand_movstr (dst, src, count_exp, align_exp)
10769 rtx dst, src, count_exp, align_exp;
10770{
10771 rtx srcreg, destreg, countreg;
10772 enum machine_mode counter_mode;
10773 HOST_WIDE_INT align = 0;
10774 unsigned HOST_WIDE_INT count = 0;
10775 rtx insns;
10776
0945b39d
JH
10777
10778 if (GET_CODE (align_exp) == CONST_INT)
10779 align = INTVAL (align_exp);
10780
5519a4f9 10781 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
10782 if (!TARGET_ALIGN_STRINGOPS)
10783 align = 64;
10784
10785 if (GET_CODE (count_exp) == CONST_INT)
26771da7
JH
10786 {
10787 count = INTVAL (count_exp);
10788 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10789 return 0;
10790 }
0945b39d
JH
10791
10792 /* Figure out proper mode for counter. For 32bits it is always SImode,
10793 for 64bits use SImode when possible, otherwise DImode.
10794 Set count to number of bytes copied when known at compile time. */
10795 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10796 || x86_64_zero_extended_value (count_exp))
10797 counter_mode = SImode;
10798 else
10799 counter_mode = DImode;
10800
26771da7
JH
10801 start_sequence ();
10802
0945b39d
JH
10803 if (counter_mode != SImode && counter_mode != DImode)
10804 abort ();
10805
10806 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10807 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10808
10809 emit_insn (gen_cld ());
10810
10811 /* When optimizing for size emit simple rep ; movsb instruction for
10812 counts not divisible by 4. */
10813
10814 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10815 {
10816 countreg = ix86_zero_extend_to_Pmode (count_exp);
10817 if (TARGET_64BIT)
10818 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
10819 destreg, srcreg, countreg));
10820 else
10821 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
10822 destreg, srcreg, countreg));
10823 }
10824
10825 /* For constant aligned (or small unaligned) copies use rep movsl
10826 followed by code copying the rest. For PentiumPro ensure 8 byte
10827 alignment to allow rep movsl acceleration. */
10828
10829 else if (count != 0
10830 && (align >= 8
10831 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 10832 || optimize_size || count < (unsigned int) 64))
0945b39d
JH
10833 {
10834 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10835 if (count & ~(size - 1))
10836 {
10837 countreg = copy_to_mode_reg (counter_mode,
10838 GEN_INT ((count >> (size == 4 ? 2 : 3))
10839 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10840 countreg = ix86_zero_extend_to_Pmode (countreg);
10841 if (size == 4)
10842 {
10843 if (TARGET_64BIT)
10844 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
10845 destreg, srcreg, countreg));
10846 else
10847 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
10848 destreg, srcreg, countreg));
10849 }
10850 else
10851 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
10852 destreg, srcreg, countreg));
10853 }
10854 if (size == 8 && (count & 0x04))
10855 emit_insn (gen_strmovsi (destreg, srcreg));
10856 if (count & 0x02)
10857 emit_insn (gen_strmovhi (destreg, srcreg));
10858 if (count & 0x01)
10859 emit_insn (gen_strmovqi (destreg, srcreg));
10860 }
10861 /* The generic code based on the glibc implementation:
10862 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10863 allowing accelerated copying there)
10864 - copy the data using rep movsl
10865 - copy the rest. */
10866 else
10867 {
10868 rtx countreg2;
10869 rtx label = NULL;
37ad04a5
JH
10870 int desired_alignment = (TARGET_PENTIUMPRO
10871 && (count == 0 || count >= (unsigned int) 260)
10872 ? 8 : UNITS_PER_WORD);
0945b39d
JH
10873
10874 /* In case we don't know anything about the alignment, default to
10875 library version, since it is usually equally fast and result in
4977bab6
ZW
10876 shorter code.
10877
10878 Also emit call when we know that the count is large and call overhead
10879 will not be important. */
10880 if (!TARGET_INLINE_ALL_STRINGOPS
10881 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
0945b39d
JH
10882 {
10883 end_sequence ();
10884 return 0;
10885 }
10886
10887 if (TARGET_SINGLE_STRINGOP)
10888 emit_insn (gen_cld ());
10889
10890 countreg2 = gen_reg_rtx (Pmode);
10891 countreg = copy_to_mode_reg (counter_mode, count_exp);
10892
10893 /* We don't use loops to align destination and to copy parts smaller
10894 than 4 bytes, because gcc is able to optimize such code better (in
10895 the case the destination or the count really is aligned, gcc is often
10896 able to predict the branches) and also it is friendlier to the
a4f31c00 10897 hardware branch prediction.
0945b39d 10898
d1f87653 10899 Using loops is beneficial for generic case, because we can
0945b39d
JH
10900 handle small counts using the loops. Many CPUs (such as Athlon)
10901 have large REP prefix setup costs.
10902
4aae8a9a 10903 This is quite costly. Maybe we can revisit this decision later or
0945b39d
JH
10904 add some customizability to this code. */
10905
37ad04a5 10906 if (count == 0 && align < desired_alignment)
0945b39d
JH
10907 {
10908 label = gen_label_rtx ();
aaae0bb9 10909 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
d43e0b7d 10910 LEU, 0, counter_mode, 1, label);
0945b39d
JH
10911 }
10912 if (align <= 1)
10913 {
10914 rtx label = ix86_expand_aligntest (destreg, 1);
10915 emit_insn (gen_strmovqi (destreg, srcreg));
10916 ix86_adjust_counter (countreg, 1);
10917 emit_label (label);
10918 LABEL_NUSES (label) = 1;
10919 }
10920 if (align <= 2)
10921 {
10922 rtx label = ix86_expand_aligntest (destreg, 2);
10923 emit_insn (gen_strmovhi (destreg, srcreg));
10924 ix86_adjust_counter (countreg, 2);
10925 emit_label (label);
10926 LABEL_NUSES (label) = 1;
10927 }
37ad04a5 10928 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
10929 {
10930 rtx label = ix86_expand_aligntest (destreg, 4);
10931 emit_insn (gen_strmovsi (destreg, srcreg));
10932 ix86_adjust_counter (countreg, 4);
10933 emit_label (label);
10934 LABEL_NUSES (label) = 1;
10935 }
10936
37ad04a5
JH
10937 if (label && desired_alignment > 4 && !TARGET_64BIT)
10938 {
10939 emit_label (label);
10940 LABEL_NUSES (label) = 1;
10941 label = NULL_RTX;
10942 }
0945b39d
JH
10943 if (!TARGET_SINGLE_STRINGOP)
10944 emit_insn (gen_cld ());
10945 if (TARGET_64BIT)
10946 {
10947 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10948 GEN_INT (3)));
10949 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10950 destreg, srcreg, countreg2));
10951 }
10952 else
10953 {
10954 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10955 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10956 destreg, srcreg, countreg2));
10957 }
10958
10959 if (label)
10960 {
10961 emit_label (label);
10962 LABEL_NUSES (label) = 1;
10963 }
10964 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10965 emit_insn (gen_strmovsi (destreg, srcreg));
10966 if ((align <= 4 || count == 0) && TARGET_64BIT)
10967 {
10968 rtx label = ix86_expand_aligntest (countreg, 4);
10969 emit_insn (gen_strmovsi (destreg, srcreg));
10970 emit_label (label);
10971 LABEL_NUSES (label) = 1;
10972 }
10973 if (align > 2 && count != 0 && (count & 2))
10974 emit_insn (gen_strmovhi (destreg, srcreg));
10975 if (align <= 2 || count == 0)
10976 {
10977 rtx label = ix86_expand_aligntest (countreg, 2);
10978 emit_insn (gen_strmovhi (destreg, srcreg));
10979 emit_label (label);
10980 LABEL_NUSES (label) = 1;
10981 }
10982 if (align > 1 && count != 0 && (count & 1))
10983 emit_insn (gen_strmovqi (destreg, srcreg));
10984 if (align <= 1 || count == 0)
10985 {
10986 rtx label = ix86_expand_aligntest (countreg, 1);
10987 emit_insn (gen_strmovqi (destreg, srcreg));
10988 emit_label (label);
10989 LABEL_NUSES (label) = 1;
10990 }
10991 }
10992
10993 insns = get_insns ();
10994 end_sequence ();
10995
10996 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
2f937369 10997 emit_insn (insns);
0945b39d
JH
10998 return 1;
10999}
11000
11001/* Expand string clear operation (bzero). Use i386 string operations when
11002 profitable. expand_movstr contains similar code. */
11003int
11004ix86_expand_clrstr (src, count_exp, align_exp)
11005 rtx src, count_exp, align_exp;
11006{
11007 rtx destreg, zeroreg, countreg;
11008 enum machine_mode counter_mode;
11009 HOST_WIDE_INT align = 0;
11010 unsigned HOST_WIDE_INT count = 0;
11011
11012 if (GET_CODE (align_exp) == CONST_INT)
11013 align = INTVAL (align_exp);
11014
5519a4f9 11015 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
11016 if (!TARGET_ALIGN_STRINGOPS)
11017 align = 32;
11018
11019 if (GET_CODE (count_exp) == CONST_INT)
26771da7
JH
11020 {
11021 count = INTVAL (count_exp);
11022 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11023 return 0;
11024 }
0945b39d
JH
11025 /* Figure out proper mode for counter. For 32bits it is always SImode,
11026 for 64bits use SImode when possible, otherwise DImode.
11027 Set count to number of bytes copied when known at compile time. */
11028 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11029 || x86_64_zero_extended_value (count_exp))
11030 counter_mode = SImode;
11031 else
11032 counter_mode = DImode;
11033
11034 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11035
11036 emit_insn (gen_cld ());
11037
11038 /* When optimizing for size emit simple rep ; movsb instruction for
11039 counts not divisible by 4. */
11040
11041 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11042 {
11043 countreg = ix86_zero_extend_to_Pmode (count_exp);
11044 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11045 if (TARGET_64BIT)
11046 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
11047 destreg, countreg));
11048 else
11049 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
11050 destreg, countreg));
11051 }
11052 else if (count != 0
11053 && (align >= 8
11054 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 11055 || optimize_size || count < (unsigned int) 64))
0945b39d
JH
11056 {
11057 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11058 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11059 if (count & ~(size - 1))
11060 {
11061 countreg = copy_to_mode_reg (counter_mode,
11062 GEN_INT ((count >> (size == 4 ? 2 : 3))
11063 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11064 countreg = ix86_zero_extend_to_Pmode (countreg);
11065 if (size == 4)
11066 {
11067 if (TARGET_64BIT)
11068 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
11069 destreg, countreg));
11070 else
11071 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
11072 destreg, countreg));
11073 }
11074 else
11075 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
11076 destreg, countreg));
11077 }
11078 if (size == 8 && (count & 0x04))
11079 emit_insn (gen_strsetsi (destreg,
11080 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11081 if (count & 0x02)
11082 emit_insn (gen_strsethi (destreg,
11083 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11084 if (count & 0x01)
11085 emit_insn (gen_strsetqi (destreg,
11086 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11087 }
11088 else
11089 {
11090 rtx countreg2;
11091 rtx label = NULL;
37ad04a5
JH
11092 /* Compute desired alignment of the string operation. */
11093 int desired_alignment = (TARGET_PENTIUMPRO
11094 && (count == 0 || count >= (unsigned int) 260)
11095 ? 8 : UNITS_PER_WORD);
0945b39d
JH
11096
11097 /* In case we don't know anything about the alignment, default to
11098 library version, since it is usually equally fast and result in
4977bab6
ZW
11099 shorter code.
11100
11101 Also emit call when we know that the count is large and call overhead
11102 will not be important. */
11103 if (!TARGET_INLINE_ALL_STRINGOPS
11104 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
0945b39d
JH
11105 return 0;
11106
11107 if (TARGET_SINGLE_STRINGOP)
11108 emit_insn (gen_cld ());
11109
11110 countreg2 = gen_reg_rtx (Pmode);
11111 countreg = copy_to_mode_reg (counter_mode, count_exp);
11112 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11113
37ad04a5 11114 if (count == 0 && align < desired_alignment)
0945b39d
JH
11115 {
11116 label = gen_label_rtx ();
37ad04a5 11117 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
d43e0b7d 11118 LEU, 0, counter_mode, 1, label);
0945b39d
JH
11119 }
11120 if (align <= 1)
11121 {
11122 rtx label = ix86_expand_aligntest (destreg, 1);
11123 emit_insn (gen_strsetqi (destreg,
11124 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11125 ix86_adjust_counter (countreg, 1);
11126 emit_label (label);
11127 LABEL_NUSES (label) = 1;
11128 }
11129 if (align <= 2)
11130 {
11131 rtx label = ix86_expand_aligntest (destreg, 2);
11132 emit_insn (gen_strsethi (destreg,
11133 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11134 ix86_adjust_counter (countreg, 2);
11135 emit_label (label);
11136 LABEL_NUSES (label) = 1;
11137 }
37ad04a5 11138 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
11139 {
11140 rtx label = ix86_expand_aligntest (destreg, 4);
11141 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
11142 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11143 : zeroreg)));
11144 ix86_adjust_counter (countreg, 4);
11145 emit_label (label);
11146 LABEL_NUSES (label) = 1;
11147 }
11148
37ad04a5
JH
11149 if (label && desired_alignment > 4 && !TARGET_64BIT)
11150 {
11151 emit_label (label);
11152 LABEL_NUSES (label) = 1;
11153 label = NULL_RTX;
11154 }
11155
0945b39d
JH
11156 if (!TARGET_SINGLE_STRINGOP)
11157 emit_insn (gen_cld ());
11158 if (TARGET_64BIT)
11159 {
11160 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11161 GEN_INT (3)));
11162 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
11163 destreg, countreg2));
11164 }
11165 else
11166 {
11167 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
11168 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
11169 destreg, countreg2));
11170 }
0945b39d
JH
11171 if (label)
11172 {
11173 emit_label (label);
11174 LABEL_NUSES (label) = 1;
11175 }
37ad04a5 11176
0945b39d
JH
11177 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11178 emit_insn (gen_strsetsi (destreg,
11179 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11180 if (TARGET_64BIT && (align <= 4 || count == 0))
11181 {
79258dce 11182 rtx label = ix86_expand_aligntest (countreg, 4);
0945b39d
JH
11183 emit_insn (gen_strsetsi (destreg,
11184 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11185 emit_label (label);
11186 LABEL_NUSES (label) = 1;
11187 }
11188 if (align > 2 && count != 0 && (count & 2))
11189 emit_insn (gen_strsethi (destreg,
11190 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11191 if (align <= 2 || count == 0)
11192 {
74411039 11193 rtx label = ix86_expand_aligntest (countreg, 2);
0945b39d
JH
11194 emit_insn (gen_strsethi (destreg,
11195 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11196 emit_label (label);
11197 LABEL_NUSES (label) = 1;
11198 }
11199 if (align > 1 && count != 0 && (count & 1))
11200 emit_insn (gen_strsetqi (destreg,
11201 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11202 if (align <= 1 || count == 0)
11203 {
74411039 11204 rtx label = ix86_expand_aligntest (countreg, 1);
0945b39d
JH
11205 emit_insn (gen_strsetqi (destreg,
11206 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11207 emit_label (label);
11208 LABEL_NUSES (label) = 1;
11209 }
11210 }
11211 return 1;
11212}
11213/* Expand strlen. */
11214int
11215ix86_expand_strlen (out, src, eoschar, align)
11216 rtx out, src, eoschar, align;
11217{
11218 rtx addr, scratch1, scratch2, scratch3, scratch4;
11219
11220 /* The generic case of strlen expander is long. Avoid it's
11221 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11222
11223 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11224 && !TARGET_INLINE_ALL_STRINGOPS
11225 && !optimize_size
11226 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11227 return 0;
11228
11229 addr = force_reg (Pmode, XEXP (src, 0));
11230 scratch1 = gen_reg_rtx (Pmode);
11231
11232 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11233 && !optimize_size)
11234 {
11235 /* Well it seems that some optimizer does not combine a call like
11236 foo(strlen(bar), strlen(bar));
11237 when the move and the subtraction is done here. It does calculate
11238 the length just once when these instructions are done inside of
11239 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11240 often used and I use one fewer register for the lifetime of
11241 output_strlen_unroll() this is better. */
11242
11243 emit_move_insn (out, addr);
11244
11245 ix86_expand_strlensi_unroll_1 (out, align);
11246
11247 /* strlensi_unroll_1 returns the address of the zero at the end of
11248 the string, like memchr(), so compute the length by subtracting
11249 the start address. */
11250 if (TARGET_64BIT)
11251 emit_insn (gen_subdi3 (out, out, addr));
11252 else
11253 emit_insn (gen_subsi3 (out, out, addr));
11254 }
11255 else
11256 {
11257 scratch2 = gen_reg_rtx (Pmode);
11258 scratch3 = gen_reg_rtx (Pmode);
11259 scratch4 = force_reg (Pmode, constm1_rtx);
11260
11261 emit_move_insn (scratch3, addr);
11262 eoschar = force_reg (QImode, eoschar);
11263
11264 emit_insn (gen_cld ());
11265 if (TARGET_64BIT)
11266 {
11267 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
11268 align, scratch4, scratch3));
11269 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11270 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11271 }
11272 else
11273 {
11274 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
11275 align, scratch4, scratch3));
11276 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11277 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11278 }
11279 }
11280 return 1;
11281}
11282
e075ae69
RH
11283/* Expand the appropriate insns for doing strlen if not just doing
11284 repnz; scasb
11285
11286 out = result, initialized with the start address
11287 align_rtx = alignment of the address.
11288 scratch = scratch register, initialized with the startaddress when
77ebd435 11289 not aligned, otherwise undefined
3f803cd9
SC
11290
11291 This is just the body. It needs the initialisations mentioned above and
11292 some address computing at the end. These things are done in i386.md. */
11293
0945b39d
JH
11294static void
11295ix86_expand_strlensi_unroll_1 (out, align_rtx)
11296 rtx out, align_rtx;
3f803cd9 11297{
e075ae69
RH
11298 int align;
11299 rtx tmp;
11300 rtx align_2_label = NULL_RTX;
11301 rtx align_3_label = NULL_RTX;
11302 rtx align_4_label = gen_label_rtx ();
11303 rtx end_0_label = gen_label_rtx ();
e075ae69 11304 rtx mem;
e2e52e1b 11305 rtx tmpreg = gen_reg_rtx (SImode);
0945b39d 11306 rtx scratch = gen_reg_rtx (SImode);
e6e81735 11307 rtx cmp;
e075ae69
RH
11308
11309 align = 0;
11310 if (GET_CODE (align_rtx) == CONST_INT)
11311 align = INTVAL (align_rtx);
3f803cd9 11312
e9a25f70 11313 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
3f803cd9 11314
e9a25f70 11315 /* Is there a known alignment and is it less than 4? */
e075ae69 11316 if (align < 4)
3f803cd9 11317 {
0945b39d
JH
11318 rtx scratch1 = gen_reg_rtx (Pmode);
11319 emit_move_insn (scratch1, out);
e9a25f70 11320 /* Is there a known alignment and is it not 2? */
e075ae69 11321 if (align != 2)
3f803cd9 11322 {
e075ae69
RH
11323 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11324 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11325
11326 /* Leave just the 3 lower bits. */
0945b39d 11327 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
e075ae69
RH
11328 NULL_RTX, 0, OPTAB_WIDEN);
11329
9076b9c1 11330 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 11331 Pmode, 1, align_4_label);
9076b9c1 11332 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
d43e0b7d 11333 Pmode, 1, align_2_label);
9076b9c1 11334 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
d43e0b7d 11335 Pmode, 1, align_3_label);
3f803cd9
SC
11336 }
11337 else
11338 {
e9a25f70
JL
11339 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11340 check if is aligned to 4 - byte. */
e9a25f70 11341
0945b39d 11342 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
e075ae69
RH
11343 NULL_RTX, 0, OPTAB_WIDEN);
11344
9076b9c1 11345 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 11346 Pmode, 1, align_4_label);
3f803cd9
SC
11347 }
11348
e075ae69 11349 mem = gen_rtx_MEM (QImode, out);
e9a25f70 11350
e075ae69 11351 /* Now compare the bytes. */
e9a25f70 11352
0f290768 11353 /* Compare the first n unaligned byte on a byte per byte basis. */
9076b9c1 11354 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
d43e0b7d 11355 QImode, 1, end_0_label);
3f803cd9 11356
0f290768 11357 /* Increment the address. */
0945b39d
JH
11358 if (TARGET_64BIT)
11359 emit_insn (gen_adddi3 (out, out, const1_rtx));
11360 else
11361 emit_insn (gen_addsi3 (out, out, const1_rtx));
e9a25f70 11362
e075ae69
RH
11363 /* Not needed with an alignment of 2 */
11364 if (align != 2)
11365 {
11366 emit_label (align_2_label);
3f803cd9 11367
d43e0b7d
RK
11368 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11369 end_0_label);
e075ae69 11370
0945b39d
JH
11371 if (TARGET_64BIT)
11372 emit_insn (gen_adddi3 (out, out, const1_rtx));
11373 else
11374 emit_insn (gen_addsi3 (out, out, const1_rtx));
e075ae69
RH
11375
11376 emit_label (align_3_label);
11377 }
11378
d43e0b7d
RK
11379 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11380 end_0_label);
e075ae69 11381
0945b39d
JH
11382 if (TARGET_64BIT)
11383 emit_insn (gen_adddi3 (out, out, const1_rtx));
11384 else
11385 emit_insn (gen_addsi3 (out, out, const1_rtx));
3f803cd9
SC
11386 }
11387
e075ae69
RH
11388 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11389 align this loop. It gives only huge programs, but does not help to
11390 speed up. */
11391 emit_label (align_4_label);
3f803cd9 11392
e075ae69
RH
11393 mem = gen_rtx_MEM (SImode, out);
11394 emit_move_insn (scratch, mem);
0945b39d
JH
11395 if (TARGET_64BIT)
11396 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11397 else
11398 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
e075ae69 11399
e2e52e1b
JH
11400 /* This formula yields a nonzero result iff one of the bytes is zero.
11401 This saves three branches inside loop and many cycles. */
11402
11403 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11404 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11405 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
7471a1f0 11406 emit_insn (gen_andsi3 (tmpreg, tmpreg,
d8bf17f9 11407 gen_int_mode (0x80808080, SImode)));
d43e0b7d
RK
11408 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11409 align_4_label);
e2e52e1b
JH
11410
11411 if (TARGET_CMOVE)
11412 {
11413 rtx reg = gen_reg_rtx (SImode);
0945b39d 11414 rtx reg2 = gen_reg_rtx (Pmode);
e2e52e1b
JH
11415 emit_move_insn (reg, tmpreg);
11416 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11417
0f290768 11418 /* If zero is not in the first two bytes, move two bytes forward. */
16189740 11419 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
11420 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11421 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11422 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11423 gen_rtx_IF_THEN_ELSE (SImode, tmp,
77ebd435
AJ
11424 reg,
11425 tmpreg)));
e2e52e1b 11426 /* Emit lea manually to avoid clobbering of flags. */
0945b39d
JH
11427 emit_insn (gen_rtx_SET (SImode, reg2,
11428 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
e2e52e1b
JH
11429
11430 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11431 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11432 emit_insn (gen_rtx_SET (VOIDmode, out,
0945b39d 11433 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
a4f31c00
AJ
11434 reg2,
11435 out)));
e2e52e1b
JH
11436
11437 }
11438 else
11439 {
11440 rtx end_2_label = gen_label_rtx ();
11441 /* Is zero in the first two bytes? */
11442
16189740 11443 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
11444 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11445 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11446 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11447 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11448 pc_rtx);
11449 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11450 JUMP_LABEL (tmp) = end_2_label;
11451
0f290768 11452 /* Not in the first two. Move two bytes forward. */
e2e52e1b 11453 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
0945b39d
JH
11454 if (TARGET_64BIT)
11455 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11456 else
11457 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
e2e52e1b
JH
11458
11459 emit_label (end_2_label);
11460
11461 }
11462
0f290768 11463 /* Avoid branch in fixing the byte. */
e2e52e1b 11464 tmpreg = gen_lowpart (QImode, tmpreg);
7e08e190 11465 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
e6e81735 11466 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
0945b39d 11467 if (TARGET_64BIT)
e6e81735 11468 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
0945b39d 11469 else
e6e81735 11470 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
e075ae69
RH
11471
11472 emit_label (end_0_label);
11473}
0e07aff3
RH
11474
11475void
4977bab6 11476ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop, sibcall)
0e07aff3 11477 rtx retval, fnaddr, callarg1, callarg2, pop;
4977bab6 11478 int sibcall;
0e07aff3
RH
11479{
11480 rtx use = NULL, call;
11481
11482 if (pop == const0_rtx)
11483 pop = NULL;
11484 if (TARGET_64BIT && pop)
11485 abort ();
11486
b069de3b
SS
11487#if TARGET_MACHO
11488 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11489 fnaddr = machopic_indirect_call_target (fnaddr);
11490#else
0e07aff3
RH
11491 /* Static functions and indirect calls don't need the pic register. */
11492 if (! TARGET_64BIT && flag_pic
11493 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11494 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
66edd3b4 11495 use_reg (&use, pic_offset_table_rtx);
0e07aff3
RH
11496
11497 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11498 {
11499 rtx al = gen_rtx_REG (QImode, 0);
11500 emit_move_insn (al, callarg2);
11501 use_reg (&use, al);
11502 }
b069de3b 11503#endif /* TARGET_MACHO */
0e07aff3
RH
11504
11505 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11506 {
11507 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11508 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11509 }
4977bab6
ZW
11510 if (sibcall && TARGET_64BIT
11511 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11512 {
11513 rtx addr;
11514 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11515 fnaddr = gen_rtx_REG (Pmode, 40);
11516 emit_move_insn (fnaddr, addr);
11517 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11518 }
0e07aff3
RH
11519
11520 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11521 if (retval)
11522 call = gen_rtx_SET (VOIDmode, retval, call);
11523 if (pop)
11524 {
11525 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11526 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11527 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11528 }
11529
11530 call = emit_call_insn (call);
11531 if (use)
11532 CALL_INSN_FUNCTION_USAGE (call) = use;
11533}
fce5a9f2 11534
e075ae69 11535\f
e075ae69
RH
11536/* Clear stack slot assignments remembered from previous functions.
11537 This is called from INIT_EXPANDERS once before RTL is emitted for each
11538 function. */
11539
e2500fed
GK
11540static struct machine_function *
11541ix86_init_machine_status ()
37b15744 11542{
e2500fed 11543 return ggc_alloc_cleared (sizeof (struct machine_function));
1526a060
BS
11544}
11545
e075ae69
RH
11546/* Return a MEM corresponding to a stack slot with mode MODE.
11547 Allocate a new slot if necessary.
11548
11549 The RTL for a function can have several slots available: N is
11550 which slot to use. */
11551
11552rtx
11553assign_386_stack_local (mode, n)
11554 enum machine_mode mode;
11555 int n;
11556{
11557 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11558 abort ();
11559
11560 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
11561 ix86_stack_locals[(int) mode][n]
11562 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11563
11564 return ix86_stack_locals[(int) mode][n];
11565}
f996902d
RH
11566
11567/* Construct the SYMBOL_REF for the tls_get_addr function. */
11568
e2500fed 11569static GTY(()) rtx ix86_tls_symbol;
f996902d
RH
11570rtx
11571ix86_tls_get_addr ()
11572{
f996902d 11573
e2500fed 11574 if (!ix86_tls_symbol)
f996902d 11575 {
75d38379
JJ
11576 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11577 (TARGET_GNU_TLS && !TARGET_64BIT)
11578 ? "___tls_get_addr"
11579 : "__tls_get_addr");
f996902d
RH
11580 }
11581
e2500fed 11582 return ix86_tls_symbol;
f996902d 11583}
e075ae69
RH
11584\f
11585/* Calculate the length of the memory address in the instruction
11586 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11587
11588static int
11589memory_address_length (addr)
11590 rtx addr;
11591{
11592 struct ix86_address parts;
11593 rtx base, index, disp;
11594 int len;
11595
11596 if (GET_CODE (addr) == PRE_DEC
0d7d98ee
JH
11597 || GET_CODE (addr) == POST_INC
11598 || GET_CODE (addr) == PRE_MODIFY
11599 || GET_CODE (addr) == POST_MODIFY)
e075ae69 11600 return 0;
3f803cd9 11601
e075ae69
RH
11602 if (! ix86_decompose_address (addr, &parts))
11603 abort ();
3f803cd9 11604
e075ae69
RH
11605 base = parts.base;
11606 index = parts.index;
11607 disp = parts.disp;
11608 len = 0;
3f803cd9 11609
e075ae69
RH
11610 /* Register Indirect. */
11611 if (base && !index && !disp)
11612 {
11613 /* Special cases: ebp and esp need the two-byte modrm form. */
11614 if (addr == stack_pointer_rtx
11615 || addr == arg_pointer_rtx
564d80f4
JH
11616 || addr == frame_pointer_rtx
11617 || addr == hard_frame_pointer_rtx)
e075ae69 11618 len = 1;
3f803cd9 11619 }
e9a25f70 11620
e075ae69
RH
11621 /* Direct Addressing. */
11622 else if (disp && !base && !index)
11623 len = 4;
11624
3f803cd9
SC
11625 else
11626 {
e075ae69
RH
11627 /* Find the length of the displacement constant. */
11628 if (disp)
11629 {
11630 if (GET_CODE (disp) == CONST_INT
11631 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
11632 len = 1;
11633 else
11634 len = 4;
11635 }
3f803cd9 11636
e075ae69
RH
11637 /* An index requires the two-byte modrm form. */
11638 if (index)
11639 len += 1;
3f803cd9
SC
11640 }
11641
e075ae69
RH
11642 return len;
11643}
79325812 11644
5bf0ebab
RH
11645/* Compute default value for "length_immediate" attribute. When SHORTFORM
11646 is set, expect that insn have 8bit immediate alternative. */
e075ae69 11647int
6ef67412 11648ix86_attr_length_immediate_default (insn, shortform)
e075ae69 11649 rtx insn;
6ef67412 11650 int shortform;
e075ae69 11651{
6ef67412
JH
11652 int len = 0;
11653 int i;
6c698a6d 11654 extract_insn_cached (insn);
6ef67412
JH
11655 for (i = recog_data.n_operands - 1; i >= 0; --i)
11656 if (CONSTANT_P (recog_data.operand[i]))
3071fab5 11657 {
6ef67412 11658 if (len)
3071fab5 11659 abort ();
6ef67412
JH
11660 if (shortform
11661 && GET_CODE (recog_data.operand[i]) == CONST_INT
11662 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11663 len = 1;
11664 else
11665 {
11666 switch (get_attr_mode (insn))
11667 {
11668 case MODE_QI:
11669 len+=1;
11670 break;
11671 case MODE_HI:
11672 len+=2;
11673 break;
11674 case MODE_SI:
11675 len+=4;
11676 break;
14f73b5a
JH
11677 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11678 case MODE_DI:
11679 len+=4;
11680 break;
6ef67412 11681 default:
c725bd79 11682 fatal_insn ("unknown insn mode", insn);
6ef67412
JH
11683 }
11684 }
3071fab5 11685 }
6ef67412
JH
11686 return len;
11687}
11688/* Compute default value for "length_address" attribute. */
11689int
11690ix86_attr_length_address_default (insn)
11691 rtx insn;
11692{
11693 int i;
6c698a6d 11694 extract_insn_cached (insn);
1ccbefce
RH
11695 for (i = recog_data.n_operands - 1; i >= 0; --i)
11696 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 11697 {
6ef67412 11698 return memory_address_length (XEXP (recog_data.operand[i], 0));
e075ae69
RH
11699 break;
11700 }
6ef67412 11701 return 0;
3f803cd9 11702}
e075ae69
RH
11703\f
11704/* Return the maximum number of instructions a cpu can issue. */
b657fc39 11705
c237e94a 11706static int
e075ae69 11707ix86_issue_rate ()
b657fc39 11708{
e075ae69 11709 switch (ix86_cpu)
b657fc39 11710 {
e075ae69
RH
11711 case PROCESSOR_PENTIUM:
11712 case PROCESSOR_K6:
11713 return 2;
79325812 11714
e075ae69 11715 case PROCESSOR_PENTIUMPRO:
b4e89e2d
JH
11716 case PROCESSOR_PENTIUM4:
11717 case PROCESSOR_ATHLON:
4977bab6 11718 case PROCESSOR_K8:
e075ae69 11719 return 3;
b657fc39 11720
b657fc39 11721 default:
e075ae69 11722 return 1;
b657fc39 11723 }
b657fc39
L
11724}
11725
e075ae69
RH
11726/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11727 by DEP_INSN and nothing set by DEP_INSN. */
b657fc39 11728
e075ae69
RH
11729static int
11730ix86_flags_dependant (insn, dep_insn, insn_type)
11731 rtx insn, dep_insn;
11732 enum attr_type insn_type;
11733{
11734 rtx set, set2;
b657fc39 11735
e075ae69
RH
11736 /* Simplify the test for uninteresting insns. */
11737 if (insn_type != TYPE_SETCC
11738 && insn_type != TYPE_ICMOV
11739 && insn_type != TYPE_FCMOV
11740 && insn_type != TYPE_IBR)
11741 return 0;
b657fc39 11742
e075ae69
RH
11743 if ((set = single_set (dep_insn)) != 0)
11744 {
11745 set = SET_DEST (set);
11746 set2 = NULL_RTX;
11747 }
11748 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11749 && XVECLEN (PATTERN (dep_insn), 0) == 2
11750 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11751 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11752 {
11753 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11754 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11755 }
78a0d70c
ZW
11756 else
11757 return 0;
b657fc39 11758
78a0d70c
ZW
11759 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11760 return 0;
b657fc39 11761
f5143c46 11762 /* This test is true if the dependent insn reads the flags but
78a0d70c
ZW
11763 not any other potentially set register. */
11764 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11765 return 0;
11766
11767 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11768 return 0;
11769
11770 return 1;
e075ae69 11771}
b657fc39 11772
e075ae69
RH
11773/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11774 address with operands set by DEP_INSN. */
11775
11776static int
11777ix86_agi_dependant (insn, dep_insn, insn_type)
11778 rtx insn, dep_insn;
11779 enum attr_type insn_type;
11780{
11781 rtx addr;
11782
6ad48e84
JH
11783 if (insn_type == TYPE_LEA
11784 && TARGET_PENTIUM)
5fbdde42
RH
11785 {
11786 addr = PATTERN (insn);
11787 if (GET_CODE (addr) == SET)
11788 ;
11789 else if (GET_CODE (addr) == PARALLEL
11790 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11791 addr = XVECEXP (addr, 0, 0);
11792 else
11793 abort ();
11794 addr = SET_SRC (addr);
11795 }
e075ae69
RH
11796 else
11797 {
11798 int i;
6c698a6d 11799 extract_insn_cached (insn);
1ccbefce
RH
11800 for (i = recog_data.n_operands - 1; i >= 0; --i)
11801 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 11802 {
1ccbefce 11803 addr = XEXP (recog_data.operand[i], 0);
e075ae69
RH
11804 goto found;
11805 }
11806 return 0;
11807 found:;
b657fc39
L
11808 }
11809
e075ae69 11810 return modified_in_p (addr, dep_insn);
b657fc39 11811}
a269a03c 11812
c237e94a 11813static int
e075ae69 11814ix86_adjust_cost (insn, link, dep_insn, cost)
a269a03c
JC
11815 rtx insn, link, dep_insn;
11816 int cost;
11817{
e075ae69 11818 enum attr_type insn_type, dep_insn_type;
6ad48e84 11819 enum attr_memory memory, dep_memory;
e075ae69 11820 rtx set, set2;
9b00189f 11821 int dep_insn_code_number;
a269a03c 11822
d1f87653 11823 /* Anti and output dependencies have zero cost on all CPUs. */
e075ae69 11824 if (REG_NOTE_KIND (link) != 0)
309ada50 11825 return 0;
a269a03c 11826
9b00189f
JH
11827 dep_insn_code_number = recog_memoized (dep_insn);
11828
e075ae69 11829 /* If we can't recognize the insns, we can't really do anything. */
9b00189f 11830 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
e075ae69 11831 return cost;
a269a03c 11832
1c71e60e
JH
11833 insn_type = get_attr_type (insn);
11834 dep_insn_type = get_attr_type (dep_insn);
9b00189f 11835
a269a03c
JC
11836 switch (ix86_cpu)
11837 {
11838 case PROCESSOR_PENTIUM:
e075ae69
RH
11839 /* Address Generation Interlock adds a cycle of latency. */
11840 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11841 cost += 1;
11842
11843 /* ??? Compares pair with jump/setcc. */
11844 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11845 cost = 0;
11846
d1f87653 11847 /* Floating point stores require value to be ready one cycle earlier. */
0f290768 11848 if (insn_type == TYPE_FMOV
e075ae69
RH
11849 && get_attr_memory (insn) == MEMORY_STORE
11850 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11851 cost += 1;
11852 break;
a269a03c 11853
e075ae69 11854 case PROCESSOR_PENTIUMPRO:
6ad48e84
JH
11855 memory = get_attr_memory (insn);
11856 dep_memory = get_attr_memory (dep_insn);
11857
0f290768 11858 /* Since we can't represent delayed latencies of load+operation,
e075ae69
RH
11859 increase the cost here for non-imov insns. */
11860 if (dep_insn_type != TYPE_IMOV
6ad48e84
JH
11861 && dep_insn_type != TYPE_FMOV
11862 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
e075ae69
RH
11863 cost += 1;
11864
11865 /* INT->FP conversion is expensive. */
11866 if (get_attr_fp_int_src (dep_insn))
11867 cost += 5;
11868
11869 /* There is one cycle extra latency between an FP op and a store. */
11870 if (insn_type == TYPE_FMOV
11871 && (set = single_set (dep_insn)) != NULL_RTX
11872 && (set2 = single_set (insn)) != NULL_RTX
11873 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11874 && GET_CODE (SET_DEST (set2)) == MEM)
11875 cost += 1;
6ad48e84
JH
11876
11877 /* Show ability of reorder buffer to hide latency of load by executing
11878 in parallel with previous instruction in case
11879 previous instruction is not needed to compute the address. */
11880 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11881 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11882 {
11883 /* Claim moves to take one cycle, as core can issue one load
11884 at time and the next load can start cycle later. */
11885 if (dep_insn_type == TYPE_IMOV
11886 || dep_insn_type == TYPE_FMOV)
11887 cost = 1;
11888 else if (cost > 1)
11889 cost--;
11890 }
e075ae69 11891 break;
a269a03c 11892
e075ae69 11893 case PROCESSOR_K6:
6ad48e84
JH
11894 memory = get_attr_memory (insn);
11895 dep_memory = get_attr_memory (dep_insn);
e075ae69
RH
11896 /* The esp dependency is resolved before the instruction is really
11897 finished. */
11898 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11899 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11900 return 1;
a269a03c 11901
0f290768 11902 /* Since we can't represent delayed latencies of load+operation,
e075ae69 11903 increase the cost here for non-imov insns. */
6ad48e84 11904 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
e075ae69
RH
11905 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
11906
11907 /* INT->FP conversion is expensive. */
11908 if (get_attr_fp_int_src (dep_insn))
11909 cost += 5;
6ad48e84
JH
11910
11911 /* Show ability of reorder buffer to hide latency of load by executing
11912 in parallel with previous instruction in case
11913 previous instruction is not needed to compute the address. */
11914 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11915 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11916 {
11917 /* Claim moves to take one cycle, as core can issue one load
11918 at time and the next load can start cycle later. */
11919 if (dep_insn_type == TYPE_IMOV
11920 || dep_insn_type == TYPE_FMOV)
11921 cost = 1;
11922 else if (cost > 2)
11923 cost -= 2;
11924 else
11925 cost = 1;
11926 }
a14003ee 11927 break;
e075ae69 11928
309ada50 11929 case PROCESSOR_ATHLON:
4977bab6 11930 case PROCESSOR_K8:
6ad48e84
JH
11931 memory = get_attr_memory (insn);
11932 dep_memory = get_attr_memory (dep_insn);
11933
6ad48e84
JH
11934 /* Show ability of reorder buffer to hide latency of load by executing
11935 in parallel with previous instruction in case
11936 previous instruction is not needed to compute the address. */
11937 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11938 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11939 {
11940 /* Claim moves to take one cycle, as core can issue one load
11941 at time and the next load can start cycle later. */
11942 if (dep_insn_type == TYPE_IMOV
11943 || dep_insn_type == TYPE_FMOV)
11944 cost = 0;
11945 else if (cost >= 3)
11946 cost -= 3;
11947 else
11948 cost = 0;
11949 }
309ada50 11950
a269a03c 11951 default:
a269a03c
JC
11952 break;
11953 }
11954
11955 return cost;
11956}
0a726ef1 11957
e075ae69
RH
11958static union
11959{
11960 struct ppro_sched_data
11961 {
11962 rtx decode[3];
11963 int issued_this_cycle;
11964 } ppro;
11965} ix86_sched_data;
0a726ef1 11966
e075ae69
RH
11967static enum attr_ppro_uops
11968ix86_safe_ppro_uops (insn)
11969 rtx insn;
11970{
11971 if (recog_memoized (insn) >= 0)
11972 return get_attr_ppro_uops (insn);
11973 else
11974 return PPRO_UOPS_MANY;
11975}
0a726ef1 11976
e075ae69
RH
11977static void
11978ix86_dump_ppro_packet (dump)
11979 FILE *dump;
0a726ef1 11980{
e075ae69 11981 if (ix86_sched_data.ppro.decode[0])
0a726ef1 11982 {
e075ae69
RH
11983 fprintf (dump, "PPRO packet: %d",
11984 INSN_UID (ix86_sched_data.ppro.decode[0]));
11985 if (ix86_sched_data.ppro.decode[1])
11986 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
11987 if (ix86_sched_data.ppro.decode[2])
11988 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
11989 fputc ('\n', dump);
11990 }
11991}
0a726ef1 11992
e075ae69 11993/* We're beginning a new block. Initialize data structures as necessary. */
0a726ef1 11994
c237e94a
ZW
11995static void
11996ix86_sched_init (dump, sched_verbose, veclen)
e075ae69
RH
11997 FILE *dump ATTRIBUTE_UNUSED;
11998 int sched_verbose ATTRIBUTE_UNUSED;
c237e94a 11999 int veclen ATTRIBUTE_UNUSED;
e075ae69
RH
12000{
12001 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
12002}
12003
12004/* Shift INSN to SLOT, and shift everything else down. */
12005
12006static void
12007ix86_reorder_insn (insnp, slot)
12008 rtx *insnp, *slot;
12009{
12010 if (insnp != slot)
12011 {
12012 rtx insn = *insnp;
0f290768 12013 do
e075ae69
RH
12014 insnp[0] = insnp[1];
12015 while (++insnp != slot);
12016 *insnp = insn;
0a726ef1 12017 }
e075ae69
RH
12018}
12019
c6991660 12020static void
78a0d70c
ZW
12021ix86_sched_reorder_ppro (ready, e_ready)
12022 rtx *ready;
12023 rtx *e_ready;
12024{
12025 rtx decode[3];
12026 enum attr_ppro_uops cur_uops;
12027 int issued_this_cycle;
12028 rtx *insnp;
12029 int i;
e075ae69 12030
0f290768 12031 /* At this point .ppro.decode contains the state of the three
78a0d70c 12032 decoders from last "cycle". That is, those insns that were
0f290768 12033 actually independent. But here we're scheduling for the
78a0d70c
ZW
12034 decoder, and we may find things that are decodable in the
12035 same cycle. */
e075ae69 12036
0f290768 12037 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
78a0d70c 12038 issued_this_cycle = 0;
e075ae69 12039
78a0d70c
ZW
12040 insnp = e_ready;
12041 cur_uops = ix86_safe_ppro_uops (*insnp);
0a726ef1 12042
78a0d70c
ZW
12043 /* If the decoders are empty, and we've a complex insn at the
12044 head of the priority queue, let it issue without complaint. */
12045 if (decode[0] == NULL)
12046 {
12047 if (cur_uops == PPRO_UOPS_MANY)
12048 {
12049 decode[0] = *insnp;
12050 goto ppro_done;
12051 }
12052
12053 /* Otherwise, search for a 2-4 uop unsn to issue. */
12054 while (cur_uops != PPRO_UOPS_FEW)
12055 {
12056 if (insnp == ready)
12057 break;
12058 cur_uops = ix86_safe_ppro_uops (*--insnp);
12059 }
12060
12061 /* If so, move it to the head of the line. */
12062 if (cur_uops == PPRO_UOPS_FEW)
12063 ix86_reorder_insn (insnp, e_ready);
0a726ef1 12064
78a0d70c
ZW
12065 /* Issue the head of the queue. */
12066 issued_this_cycle = 1;
12067 decode[0] = *e_ready--;
12068 }
fb693d44 12069
78a0d70c
ZW
12070 /* Look for simple insns to fill in the other two slots. */
12071 for (i = 1; i < 3; ++i)
12072 if (decode[i] == NULL)
12073 {
a151daf0 12074 if (ready > e_ready)
78a0d70c 12075 goto ppro_done;
fb693d44 12076
e075ae69
RH
12077 insnp = e_ready;
12078 cur_uops = ix86_safe_ppro_uops (*insnp);
78a0d70c
ZW
12079 while (cur_uops != PPRO_UOPS_ONE)
12080 {
12081 if (insnp == ready)
12082 break;
12083 cur_uops = ix86_safe_ppro_uops (*--insnp);
12084 }
fb693d44 12085
78a0d70c
ZW
12086 /* Found one. Move it to the head of the queue and issue it. */
12087 if (cur_uops == PPRO_UOPS_ONE)
e075ae69 12088 {
78a0d70c
ZW
12089 ix86_reorder_insn (insnp, e_ready);
12090 decode[i] = *e_ready--;
12091 issued_this_cycle++;
12092 continue;
12093 }
fb693d44 12094
78a0d70c
ZW
12095 /* ??? Didn't find one. Ideally, here we would do a lazy split
12096 of 2-uop insns, issue one and queue the other. */
12097 }
fb693d44 12098
78a0d70c
ZW
12099 ppro_done:
12100 if (issued_this_cycle == 0)
12101 issued_this_cycle = 1;
12102 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
12103}
fb693d44 12104
0f290768 12105/* We are about to being issuing insns for this clock cycle.
78a0d70c 12106 Override the default sort algorithm to better slot instructions. */
c237e94a
ZW
12107static int
12108ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
78a0d70c
ZW
12109 FILE *dump ATTRIBUTE_UNUSED;
12110 int sched_verbose ATTRIBUTE_UNUSED;
12111 rtx *ready;
c237e94a 12112 int *n_readyp;
78a0d70c
ZW
12113 int clock_var ATTRIBUTE_UNUSED;
12114{
c237e94a 12115 int n_ready = *n_readyp;
78a0d70c 12116 rtx *e_ready = ready + n_ready - 1;
fb693d44 12117
fce5a9f2 12118 /* Make sure to go ahead and initialize key items in
a151daf0
JL
12119 ix86_sched_data if we are not going to bother trying to
12120 reorder the ready queue. */
78a0d70c 12121 if (n_ready < 2)
a151daf0
JL
12122 {
12123 ix86_sched_data.ppro.issued_this_cycle = 1;
12124 goto out;
12125 }
e075ae69 12126
78a0d70c
ZW
12127 switch (ix86_cpu)
12128 {
12129 default:
12130 break;
e075ae69 12131
78a0d70c
ZW
12132 case PROCESSOR_PENTIUMPRO:
12133 ix86_sched_reorder_ppro (ready, e_ready);
e075ae69 12134 break;
fb693d44
RH
12135 }
12136
e075ae69
RH
12137out:
12138 return ix86_issue_rate ();
12139}
fb693d44 12140
e075ae69
RH
12141/* We are about to issue INSN. Return the number of insns left on the
12142 ready queue that can be issued this cycle. */
b222082e 12143
c237e94a 12144static int
e075ae69
RH
12145ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
12146 FILE *dump;
12147 int sched_verbose;
12148 rtx insn;
12149 int can_issue_more;
12150{
12151 int i;
12152 switch (ix86_cpu)
fb693d44 12153 {
e075ae69
RH
12154 default:
12155 return can_issue_more - 1;
fb693d44 12156
e075ae69
RH
12157 case PROCESSOR_PENTIUMPRO:
12158 {
12159 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
fb693d44 12160
e075ae69
RH
12161 if (uops == PPRO_UOPS_MANY)
12162 {
12163 if (sched_verbose)
12164 ix86_dump_ppro_packet (dump);
12165 ix86_sched_data.ppro.decode[0] = insn;
12166 ix86_sched_data.ppro.decode[1] = NULL;
12167 ix86_sched_data.ppro.decode[2] = NULL;
12168 if (sched_verbose)
12169 ix86_dump_ppro_packet (dump);
12170 ix86_sched_data.ppro.decode[0] = NULL;
12171 }
12172 else if (uops == PPRO_UOPS_FEW)
12173 {
12174 if (sched_verbose)
12175 ix86_dump_ppro_packet (dump);
12176 ix86_sched_data.ppro.decode[0] = insn;
12177 ix86_sched_data.ppro.decode[1] = NULL;
12178 ix86_sched_data.ppro.decode[2] = NULL;
12179 }
12180 else
12181 {
12182 for (i = 0; i < 3; ++i)
12183 if (ix86_sched_data.ppro.decode[i] == NULL)
12184 {
12185 ix86_sched_data.ppro.decode[i] = insn;
12186 break;
12187 }
12188 if (i == 3)
12189 abort ();
12190 if (i == 2)
12191 {
12192 if (sched_verbose)
12193 ix86_dump_ppro_packet (dump);
12194 ix86_sched_data.ppro.decode[0] = NULL;
12195 ix86_sched_data.ppro.decode[1] = NULL;
12196 ix86_sched_data.ppro.decode[2] = NULL;
12197 }
12198 }
12199 }
12200 return --ix86_sched_data.ppro.issued_this_cycle;
12201 }
fb693d44 12202}
9b690711
RH
12203
12204static int
12205ia32_use_dfa_pipeline_interface ()
12206{
4977bab6 12207 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
9b690711
RH
12208 return 1;
12209 return 0;
12210}
12211
12212/* How many alternative schedules to try. This should be as wide as the
12213 scheduling freedom in the DFA, but no wider. Making this value too
12214 large results extra work for the scheduler. */
12215
12216static int
12217ia32_multipass_dfa_lookahead ()
12218{
12219 if (ix86_cpu == PROCESSOR_PENTIUM)
12220 return 2;
12221 else
12222 return 0;
12223}
12224
a7180f70 12225\f
0e4970d7
RK
12226/* Walk through INSNS and look for MEM references whose address is DSTREG or
12227 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
12228 appropriate. */
12229
12230void
12231ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
12232 rtx insns;
12233 rtx dstref, srcref, dstreg, srcreg;
12234{
12235 rtx insn;
12236
12237 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
12238 if (INSN_P (insn))
12239 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
12240 dstreg, srcreg);
12241}
12242
12243/* Subroutine of above to actually do the updating by recursively walking
12244 the rtx. */
12245
12246static void
12247ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
12248 rtx x;
12249 rtx dstref, srcref, dstreg, srcreg;
12250{
12251 enum rtx_code code = GET_CODE (x);
12252 const char *format_ptr = GET_RTX_FORMAT (code);
12253 int i, j;
12254
12255 if (code == MEM && XEXP (x, 0) == dstreg)
12256 MEM_COPY_ATTRIBUTES (x, dstref);
12257 else if (code == MEM && XEXP (x, 0) == srcreg)
12258 MEM_COPY_ATTRIBUTES (x, srcref);
12259
12260 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
12261 {
12262 if (*format_ptr == 'e')
12263 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
12264 dstreg, srcreg);
12265 else if (*format_ptr == 'E')
12266 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
397be6cf 12267 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
0e4970d7
RK
12268 dstreg, srcreg);
12269 }
12270}
12271\f
a7180f70
BS
12272/* Compute the alignment given to a constant that is being placed in memory.
12273 EXP is the constant and ALIGN is the alignment that the object would
12274 ordinarily have.
12275 The value of this function is used instead of that alignment to align
12276 the object. */
12277
12278int
12279ix86_constant_alignment (exp, align)
12280 tree exp;
12281 int align;
12282{
12283 if (TREE_CODE (exp) == REAL_CST)
12284 {
12285 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12286 return 64;
12287 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12288 return 128;
12289 }
12290 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
12291 && align < 256)
12292 return 256;
12293
12294 return align;
12295}
12296
12297/* Compute the alignment for a static variable.
12298 TYPE is the data type, and ALIGN is the alignment that
12299 the object would ordinarily have. The value of this function is used
12300 instead of that alignment to align the object. */
12301
12302int
12303ix86_data_alignment (type, align)
12304 tree type;
12305 int align;
12306{
12307 if (AGGREGATE_TYPE_P (type)
12308 && TYPE_SIZE (type)
12309 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12310 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12311 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12312 return 256;
12313
0d7d98ee
JH
12314 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12315 to 16byte boundary. */
12316 if (TARGET_64BIT)
12317 {
12318 if (AGGREGATE_TYPE_P (type)
12319 && TYPE_SIZE (type)
12320 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12321 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12322 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12323 return 128;
12324 }
12325
a7180f70
BS
12326 if (TREE_CODE (type) == ARRAY_TYPE)
12327 {
12328 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12329 return 64;
12330 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12331 return 128;
12332 }
12333 else if (TREE_CODE (type) == COMPLEX_TYPE)
12334 {
0f290768 12335
a7180f70
BS
12336 if (TYPE_MODE (type) == DCmode && align < 64)
12337 return 64;
12338 if (TYPE_MODE (type) == XCmode && align < 128)
12339 return 128;
12340 }
12341 else if ((TREE_CODE (type) == RECORD_TYPE
12342 || TREE_CODE (type) == UNION_TYPE
12343 || TREE_CODE (type) == QUAL_UNION_TYPE)
12344 && TYPE_FIELDS (type))
12345 {
12346 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12347 return 64;
12348 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12349 return 128;
12350 }
12351 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12352 || TREE_CODE (type) == INTEGER_TYPE)
12353 {
12354 if (TYPE_MODE (type) == DFmode && align < 64)
12355 return 64;
12356 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12357 return 128;
12358 }
12359
12360 return align;
12361}
12362
12363/* Compute the alignment for a local variable.
12364 TYPE is the data type, and ALIGN is the alignment that
12365 the object would ordinarily have. The value of this macro is used
12366 instead of that alignment to align the object. */
12367
12368int
12369ix86_local_alignment (type, align)
12370 tree type;
12371 int align;
12372{
0d7d98ee
JH
12373 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12374 to 16byte boundary. */
12375 if (TARGET_64BIT)
12376 {
12377 if (AGGREGATE_TYPE_P (type)
12378 && TYPE_SIZE (type)
12379 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12380 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12381 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12382 return 128;
12383 }
a7180f70
BS
12384 if (TREE_CODE (type) == ARRAY_TYPE)
12385 {
12386 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12387 return 64;
12388 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12389 return 128;
12390 }
12391 else if (TREE_CODE (type) == COMPLEX_TYPE)
12392 {
12393 if (TYPE_MODE (type) == DCmode && align < 64)
12394 return 64;
12395 if (TYPE_MODE (type) == XCmode && align < 128)
12396 return 128;
12397 }
12398 else if ((TREE_CODE (type) == RECORD_TYPE
12399 || TREE_CODE (type) == UNION_TYPE
12400 || TREE_CODE (type) == QUAL_UNION_TYPE)
12401 && TYPE_FIELDS (type))
12402 {
12403 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12404 return 64;
12405 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12406 return 128;
12407 }
12408 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12409 || TREE_CODE (type) == INTEGER_TYPE)
12410 {
0f290768 12411
a7180f70
BS
12412 if (TYPE_MODE (type) == DFmode && align < 64)
12413 return 64;
12414 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12415 return 128;
12416 }
12417 return align;
12418}
0ed08620
JH
12419\f
12420/* Emit RTL insns to initialize the variable parts of a trampoline.
12421 FNADDR is an RTX for the address of the function's pure code.
12422 CXT is an RTX for the static chain value for the function. */
12423void
12424x86_initialize_trampoline (tramp, fnaddr, cxt)
12425 rtx tramp, fnaddr, cxt;
12426{
12427 if (!TARGET_64BIT)
12428 {
12429 /* Compute offset from the end of the jmp to the target function. */
12430 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12431 plus_constant (tramp, 10),
12432 NULL_RTX, 1, OPTAB_DIRECT);
12433 emit_move_insn (gen_rtx_MEM (QImode, tramp),
d8bf17f9 12434 gen_int_mode (0xb9, QImode));
0ed08620
JH
12435 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12436 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
d8bf17f9 12437 gen_int_mode (0xe9, QImode));
0ed08620
JH
12438 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12439 }
12440 else
12441 {
12442 int offset = 0;
12443 /* Try to load address using shorter movl instead of movabs.
12444 We may want to support movq for kernel mode, but kernel does not use
12445 trampolines at the moment. */
12446 if (x86_64_zero_extended_value (fnaddr))
12447 {
12448 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12449 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12450 gen_int_mode (0xbb41, HImode));
0ed08620
JH
12451 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12452 gen_lowpart (SImode, fnaddr));
12453 offset += 6;
12454 }
12455 else
12456 {
12457 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12458 gen_int_mode (0xbb49, HImode));
0ed08620
JH
12459 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12460 fnaddr);
12461 offset += 10;
12462 }
12463 /* Load static chain using movabs to r10. */
12464 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12465 gen_int_mode (0xba49, HImode));
0ed08620
JH
12466 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12467 cxt);
12468 offset += 10;
12469 /* Jump to the r11 */
12470 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12471 gen_int_mode (0xff49, HImode));
0ed08620 12472 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
d8bf17f9 12473 gen_int_mode (0xe3, QImode));
0ed08620
JH
12474 offset += 3;
12475 if (offset > TRAMPOLINE_SIZE)
b531087a 12476 abort ();
0ed08620 12477 }
5791cc29
JT
12478
12479#ifdef TRANSFER_FROM_TRAMPOLINE
12480 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12481 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12482#endif
0ed08620 12483}
eeb06b1b 12484\f
6a2dd09a
RS
12485#define def_builtin(MASK, NAME, TYPE, CODE) \
12486do { \
12487 if ((MASK) & target_flags) \
12488 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12489 NULL, NULL_TREE); \
eeb06b1b 12490} while (0)
bd793c65 12491
bd793c65
BS
12492struct builtin_description
12493{
8b60264b
KG
12494 const unsigned int mask;
12495 const enum insn_code icode;
12496 const char *const name;
12497 const enum ix86_builtins code;
12498 const enum rtx_code comparison;
12499 const unsigned int flag;
bd793c65
BS
12500};
12501
fbe5eb6d
BS
12502/* Used for builtins that are enabled both by -msse and -msse2. */
12503#define MASK_SSE1 (MASK_SSE | MASK_SSE2)
12504
8b60264b 12505static const struct builtin_description bdesc_comi[] =
bd793c65 12506{
1194ca05
JH
12507 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12508 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12509 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12510 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12511 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12512 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12513 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12514 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12515 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12516 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12517 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12518 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12519 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12520 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12521 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12522 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12523 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12524 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12525 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12526 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12527 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12528 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12529 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12530 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
bd793c65
BS
12531};
12532
8b60264b 12533static const struct builtin_description bdesc_2arg[] =
bd793c65
BS
12534{
12535 /* SSE */
fbe5eb6d
BS
12536 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12537 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12538 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12539 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12540 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12541 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12542 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12543 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12544
12545 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12546 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12547 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12548 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12549 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12550 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12551 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12552 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12553 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12554 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12555 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12556 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12557 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12558 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12559 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
fbe5eb6d
BS
12560 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12561 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12562 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12563 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
fbe5eb6d
BS
12564 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12565
12566 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12567 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12568 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12569 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12570
1877be45
JH
12571 { MASK_SSE1, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12572 { MASK_SSE1, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12573 { MASK_SSE1, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12574 { MASK_SSE1, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12575
fbe5eb6d
BS
12576 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12577 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12578 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12579 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12580 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
bd793c65
BS
12581
12582 /* MMX */
eeb06b1b
BS
12583 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12584 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12585 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12586 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12587 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12588 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12589
12590 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12591 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12592 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12593 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12594 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12595 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12596 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12597 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12598
12599 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12600 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
fbe5eb6d 12601 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
eeb06b1b
BS
12602
12603 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12604 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12605 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12606 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12607
fbe5eb6d
BS
12608 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12609 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
eeb06b1b
BS
12610
12611 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12612 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12613 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12614 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12615 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12616 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12617
fbe5eb6d
BS
12618 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12619 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12620 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12621 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
eeb06b1b
BS
12622
12623 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12624 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12625 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12626 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12627 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12628 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
bd793c65
BS
12629
12630 /* Special. */
eeb06b1b
BS
12631 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12632 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12633 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12634
fbe5eb6d
BS
12635 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12636 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
eeb06b1b
BS
12637
12638 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12639 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12640 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12641 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12642 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12643 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12644
12645 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12646 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12647 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12648 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12649 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12650 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12651
12652 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12653 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12654 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12655 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12656
fbe5eb6d
BS
12657 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12658 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12659
12660 /* SSE2 */
12661 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12662 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12663 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12664 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12665 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12666 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12667 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12668 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12669
12670 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12671 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12672 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12673 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12674 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12675 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12676 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12677 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12678 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12679 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12680 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12681 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12682 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12683 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12684 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
fbe5eb6d
BS
12685 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12686 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12687 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12688 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
fbe5eb6d
BS
12689 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12690
12691 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12692 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12693 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12694 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12695
1877be45
JH
12696 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12697 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12698 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12699 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
fbe5eb6d
BS
12700
12701 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12702 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12703 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12704
12705 /* SSE2 MMX */
12706 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12707 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12708 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12709 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12710 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12711 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12712 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12713 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12714
12715 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12716 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12717 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12718 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12719 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12720 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12721 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12722 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12723
12724 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12725 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12726 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12727 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12728
916b60b7
BS
12729 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12730 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12731 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12732 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
fbe5eb6d
BS
12733
12734 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12735 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12736
12737 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12738 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12739 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12740 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12741 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12742 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12743
12744 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12745 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12746 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12747 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12748
12749 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12750 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12751 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
077084dd 12752 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
fbe5eb6d
BS
12753 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12754 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12755 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
f02e1358 12756 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
fbe5eb6d 12757
916b60b7
BS
12758 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12759 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12760 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12761
12762 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12763 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12764
12765 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12766 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12767 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12768 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12769 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12770 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12771
12772 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12773 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12774 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12775 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12776 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12777 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12778
12779 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12780 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12781 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12782 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12783
12784 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12785
fbe5eb6d
BS
12786 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12787 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12788 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
bd793c65
BS
12789};
12790
8b60264b 12791static const struct builtin_description bdesc_1arg[] =
bd793c65 12792{
fbe5eb6d
BS
12793 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12794 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12795
12796 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12797 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12798 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12799
12800 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12801 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12802 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12803 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12804
12805 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12806 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12807 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
f02e1358 12808 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
fbe5eb6d
BS
12809
12810 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12811
12812 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12813 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
bd793c65 12814
fbe5eb6d
BS
12815 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12816 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12817 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12818 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12819 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
bd793c65 12820
fbe5eb6d 12821 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
bd793c65 12822
fbe5eb6d
BS
12823 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12824 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12825
12826 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12827 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
f02e1358
JH
12828 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12829
12830 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 }
bd793c65
BS
12831};
12832
f6155fda
SS
12833void
12834ix86_init_builtins ()
12835{
12836 if (TARGET_MMX)
12837 ix86_init_mmx_sse_builtins ();
12838}
12839
12840/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
bd793c65
BS
12841 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12842 builtins. */
e37af218 12843static void
f6155fda 12844ix86_init_mmx_sse_builtins ()
bd793c65 12845{
8b60264b 12846 const struct builtin_description * d;
77ebd435 12847 size_t i;
bd793c65
BS
12848
12849 tree pchar_type_node = build_pointer_type (char_type_node);
068f5dea
JH
12850 tree pcchar_type_node = build_pointer_type (
12851 build_type_variant (char_type_node, 1, 0));
bd793c65 12852 tree pfloat_type_node = build_pointer_type (float_type_node);
068f5dea
JH
12853 tree pcfloat_type_node = build_pointer_type (
12854 build_type_variant (float_type_node, 1, 0));
bd793c65 12855 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
916b60b7 12856 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
bd793c65
BS
12857 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12858
12859 /* Comparisons. */
12860 tree int_ftype_v4sf_v4sf
b4de2f7d
AH
12861 = build_function_type_list (integer_type_node,
12862 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12863 tree v4si_ftype_v4sf_v4sf
b4de2f7d
AH
12864 = build_function_type_list (V4SI_type_node,
12865 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12866 /* MMX/SSE/integer conversions. */
bd793c65 12867 tree int_ftype_v4sf
b4de2f7d
AH
12868 = build_function_type_list (integer_type_node,
12869 V4SF_type_node, NULL_TREE);
bd793c65 12870 tree int_ftype_v8qi
b4de2f7d 12871 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
bd793c65 12872 tree v4sf_ftype_v4sf_int
b4de2f7d
AH
12873 = build_function_type_list (V4SF_type_node,
12874 V4SF_type_node, integer_type_node, NULL_TREE);
bd793c65 12875 tree v4sf_ftype_v4sf_v2si
b4de2f7d
AH
12876 = build_function_type_list (V4SF_type_node,
12877 V4SF_type_node, V2SI_type_node, NULL_TREE);
bd793c65 12878 tree int_ftype_v4hi_int
b4de2f7d
AH
12879 = build_function_type_list (integer_type_node,
12880 V4HI_type_node, integer_type_node, NULL_TREE);
bd793c65 12881 tree v4hi_ftype_v4hi_int_int
e7a60f56 12882 = build_function_type_list (V4HI_type_node, V4HI_type_node,
b4de2f7d
AH
12883 integer_type_node, integer_type_node,
12884 NULL_TREE);
bd793c65
BS
12885 /* Miscellaneous. */
12886 tree v8qi_ftype_v4hi_v4hi
b4de2f7d
AH
12887 = build_function_type_list (V8QI_type_node,
12888 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 12889 tree v4hi_ftype_v2si_v2si
b4de2f7d
AH
12890 = build_function_type_list (V4HI_type_node,
12891 V2SI_type_node, V2SI_type_node, NULL_TREE);
bd793c65 12892 tree v4sf_ftype_v4sf_v4sf_int
b4de2f7d
AH
12893 = build_function_type_list (V4SF_type_node,
12894 V4SF_type_node, V4SF_type_node,
12895 integer_type_node, NULL_TREE);
bd793c65 12896 tree v2si_ftype_v4hi_v4hi
b4de2f7d
AH
12897 = build_function_type_list (V2SI_type_node,
12898 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 12899 tree v4hi_ftype_v4hi_int
b4de2f7d 12900 = build_function_type_list (V4HI_type_node,
e7a60f56 12901 V4HI_type_node, integer_type_node, NULL_TREE);
bd793c65 12902 tree v4hi_ftype_v4hi_di
b4de2f7d
AH
12903 = build_function_type_list (V4HI_type_node,
12904 V4HI_type_node, long_long_unsigned_type_node,
12905 NULL_TREE);
bd793c65 12906 tree v2si_ftype_v2si_di
b4de2f7d
AH
12907 = build_function_type_list (V2SI_type_node,
12908 V2SI_type_node, long_long_unsigned_type_node,
12909 NULL_TREE);
bd793c65 12910 tree void_ftype_void
b4de2f7d 12911 = build_function_type (void_type_node, void_list_node);
bd793c65 12912 tree void_ftype_unsigned
b4de2f7d 12913 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
bd793c65 12914 tree unsigned_ftype_void
b4de2f7d 12915 = build_function_type (unsigned_type_node, void_list_node);
bd793c65 12916 tree di_ftype_void
b4de2f7d 12917 = build_function_type (long_long_unsigned_type_node, void_list_node);
e37af218 12918 tree v4sf_ftype_void
b4de2f7d 12919 = build_function_type (V4SF_type_node, void_list_node);
bd793c65 12920 tree v2si_ftype_v4sf
b4de2f7d 12921 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12922 /* Loads/stores. */
bd793c65 12923 tree void_ftype_v8qi_v8qi_pchar
b4de2f7d
AH
12924 = build_function_type_list (void_type_node,
12925 V8QI_type_node, V8QI_type_node,
12926 pchar_type_node, NULL_TREE);
068f5dea
JH
12927 tree v4sf_ftype_pcfloat
12928 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
bd793c65
BS
12929 /* @@@ the type is bogus */
12930 tree v4sf_ftype_v4sf_pv2si
b4de2f7d 12931 = build_function_type_list (V4SF_type_node,
f8ca7923 12932 V4SF_type_node, pv2si_type_node, NULL_TREE);
1255c85c 12933 tree void_ftype_pv2si_v4sf
b4de2f7d 12934 = build_function_type_list (void_type_node,
f8ca7923 12935 pv2si_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12936 tree void_ftype_pfloat_v4sf
b4de2f7d
AH
12937 = build_function_type_list (void_type_node,
12938 pfloat_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12939 tree void_ftype_pdi_di
b4de2f7d
AH
12940 = build_function_type_list (void_type_node,
12941 pdi_type_node, long_long_unsigned_type_node,
12942 NULL_TREE);
916b60b7 12943 tree void_ftype_pv2di_v2di
b4de2f7d
AH
12944 = build_function_type_list (void_type_node,
12945 pv2di_type_node, V2DI_type_node, NULL_TREE);
bd793c65
BS
12946 /* Normal vector unops. */
12947 tree v4sf_ftype_v4sf
b4de2f7d 12948 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
0f290768 12949
bd793c65
BS
12950 /* Normal vector binops. */
12951 tree v4sf_ftype_v4sf_v4sf
b4de2f7d
AH
12952 = build_function_type_list (V4SF_type_node,
12953 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12954 tree v8qi_ftype_v8qi_v8qi
b4de2f7d
AH
12955 = build_function_type_list (V8QI_type_node,
12956 V8QI_type_node, V8QI_type_node, NULL_TREE);
bd793c65 12957 tree v4hi_ftype_v4hi_v4hi
b4de2f7d
AH
12958 = build_function_type_list (V4HI_type_node,
12959 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 12960 tree v2si_ftype_v2si_v2si
b4de2f7d
AH
12961 = build_function_type_list (V2SI_type_node,
12962 V2SI_type_node, V2SI_type_node, NULL_TREE);
bd793c65 12963 tree di_ftype_di_di
b4de2f7d
AH
12964 = build_function_type_list (long_long_unsigned_type_node,
12965 long_long_unsigned_type_node,
12966 long_long_unsigned_type_node, NULL_TREE);
bd793c65 12967
47f339cf 12968 tree v2si_ftype_v2sf
ae3aa00d 12969 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
47f339cf 12970 tree v2sf_ftype_v2si
b4de2f7d 12971 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
47f339cf 12972 tree v2si_ftype_v2si
b4de2f7d 12973 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
47f339cf 12974 tree v2sf_ftype_v2sf
b4de2f7d 12975 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 12976 tree v2sf_ftype_v2sf_v2sf
b4de2f7d
AH
12977 = build_function_type_list (V2SF_type_node,
12978 V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 12979 tree v2si_ftype_v2sf_v2sf
b4de2f7d
AH
12980 = build_function_type_list (V2SI_type_node,
12981 V2SF_type_node, V2SF_type_node, NULL_TREE);
fbe5eb6d 12982 tree pint_type_node = build_pointer_type (integer_type_node);
068f5dea
JH
12983 tree pcint_type_node = build_pointer_type (
12984 build_type_variant (integer_type_node, 1, 0));
fbe5eb6d 12985 tree pdouble_type_node = build_pointer_type (double_type_node);
068f5dea
JH
12986 tree pcdouble_type_node = build_pointer_type (
12987 build_type_variant (double_type_node, 1, 0));
fbe5eb6d 12988 tree int_ftype_v2df_v2df
b4de2f7d
AH
12989 = build_function_type_list (integer_type_node,
12990 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d
BS
12991
12992 tree ti_ftype_void
b4de2f7d 12993 = build_function_type (intTI_type_node, void_list_node);
f02e1358
JH
12994 tree v2di_ftype_void
12995 = build_function_type (V2DI_type_node, void_list_node);
fbe5eb6d 12996 tree ti_ftype_ti_ti
b4de2f7d
AH
12997 = build_function_type_list (intTI_type_node,
12998 intTI_type_node, intTI_type_node, NULL_TREE);
068f5dea
JH
12999 tree void_ftype_pcvoid
13000 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
fbe5eb6d 13001 tree v2di_ftype_di
b4de2f7d
AH
13002 = build_function_type_list (V2DI_type_node,
13003 long_long_unsigned_type_node, NULL_TREE);
f02e1358
JH
13004 tree di_ftype_v2di
13005 = build_function_type_list (long_long_unsigned_type_node,
13006 V2DI_type_node, NULL_TREE);
fbe5eb6d 13007 tree v4sf_ftype_v4si
b4de2f7d 13008 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 13009 tree v4si_ftype_v4sf
b4de2f7d 13010 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 13011 tree v2df_ftype_v4si
b4de2f7d 13012 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 13013 tree v4si_ftype_v2df
b4de2f7d 13014 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13015 tree v2si_ftype_v2df
b4de2f7d 13016 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13017 tree v4sf_ftype_v2df
b4de2f7d 13018 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13019 tree v2df_ftype_v2si
b4de2f7d 13020 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
fbe5eb6d 13021 tree v2df_ftype_v4sf
b4de2f7d 13022 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 13023 tree int_ftype_v2df
b4de2f7d 13024 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13025 tree v2df_ftype_v2df_int
b4de2f7d
AH
13026 = build_function_type_list (V2DF_type_node,
13027 V2DF_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 13028 tree v4sf_ftype_v4sf_v2df
b4de2f7d
AH
13029 = build_function_type_list (V4SF_type_node,
13030 V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13031 tree v2df_ftype_v2df_v4sf
b4de2f7d
AH
13032 = build_function_type_list (V2DF_type_node,
13033 V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 13034 tree v2df_ftype_v2df_v2df_int
b4de2f7d
AH
13035 = build_function_type_list (V2DF_type_node,
13036 V2DF_type_node, V2DF_type_node,
13037 integer_type_node,
13038 NULL_TREE);
fbe5eb6d 13039 tree v2df_ftype_v2df_pv2si
b4de2f7d
AH
13040 = build_function_type_list (V2DF_type_node,
13041 V2DF_type_node, pv2si_type_node, NULL_TREE);
fbe5eb6d 13042 tree void_ftype_pv2si_v2df
b4de2f7d
AH
13043 = build_function_type_list (void_type_node,
13044 pv2si_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13045 tree void_ftype_pdouble_v2df
b4de2f7d
AH
13046 = build_function_type_list (void_type_node,
13047 pdouble_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13048 tree void_ftype_pint_int
b4de2f7d
AH
13049 = build_function_type_list (void_type_node,
13050 pint_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 13051 tree void_ftype_v16qi_v16qi_pchar
b4de2f7d
AH
13052 = build_function_type_list (void_type_node,
13053 V16QI_type_node, V16QI_type_node,
13054 pchar_type_node, NULL_TREE);
068f5dea
JH
13055 tree v2df_ftype_pcdouble
13056 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
fbe5eb6d 13057 tree v2df_ftype_v2df_v2df
b4de2f7d
AH
13058 = build_function_type_list (V2DF_type_node,
13059 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13060 tree v16qi_ftype_v16qi_v16qi
b4de2f7d
AH
13061 = build_function_type_list (V16QI_type_node,
13062 V16QI_type_node, V16QI_type_node, NULL_TREE);
fbe5eb6d 13063 tree v8hi_ftype_v8hi_v8hi
b4de2f7d
AH
13064 = build_function_type_list (V8HI_type_node,
13065 V8HI_type_node, V8HI_type_node, NULL_TREE);
fbe5eb6d 13066 tree v4si_ftype_v4si_v4si
b4de2f7d
AH
13067 = build_function_type_list (V4SI_type_node,
13068 V4SI_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 13069 tree v2di_ftype_v2di_v2di
b4de2f7d
AH
13070 = build_function_type_list (V2DI_type_node,
13071 V2DI_type_node, V2DI_type_node, NULL_TREE);
fbe5eb6d 13072 tree v2di_ftype_v2df_v2df
b4de2f7d
AH
13073 = build_function_type_list (V2DI_type_node,
13074 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13075 tree v2df_ftype_v2df
b4de2f7d 13076 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13077 tree v2df_ftype_double
b4de2f7d 13078 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
fbe5eb6d 13079 tree v2df_ftype_double_double
b4de2f7d
AH
13080 = build_function_type_list (V2DF_type_node,
13081 double_type_node, double_type_node, NULL_TREE);
fbe5eb6d 13082 tree int_ftype_v8hi_int
b4de2f7d
AH
13083 = build_function_type_list (integer_type_node,
13084 V8HI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 13085 tree v8hi_ftype_v8hi_int_int
b4de2f7d
AH
13086 = build_function_type_list (V8HI_type_node,
13087 V8HI_type_node, integer_type_node,
13088 integer_type_node, NULL_TREE);
916b60b7 13089 tree v2di_ftype_v2di_int
b4de2f7d
AH
13090 = build_function_type_list (V2DI_type_node,
13091 V2DI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 13092 tree v4si_ftype_v4si_int
b4de2f7d
AH
13093 = build_function_type_list (V4SI_type_node,
13094 V4SI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 13095 tree v8hi_ftype_v8hi_int
b4de2f7d
AH
13096 = build_function_type_list (V8HI_type_node,
13097 V8HI_type_node, integer_type_node, NULL_TREE);
916b60b7 13098 tree v8hi_ftype_v8hi_v2di
b4de2f7d
AH
13099 = build_function_type_list (V8HI_type_node,
13100 V8HI_type_node, V2DI_type_node, NULL_TREE);
916b60b7 13101 tree v4si_ftype_v4si_v2di
b4de2f7d
AH
13102 = build_function_type_list (V4SI_type_node,
13103 V4SI_type_node, V2DI_type_node, NULL_TREE);
916b60b7 13104 tree v4si_ftype_v8hi_v8hi
b4de2f7d
AH
13105 = build_function_type_list (V4SI_type_node,
13106 V8HI_type_node, V8HI_type_node, NULL_TREE);
916b60b7 13107 tree di_ftype_v8qi_v8qi
b4de2f7d
AH
13108 = build_function_type_list (long_long_unsigned_type_node,
13109 V8QI_type_node, V8QI_type_node, NULL_TREE);
916b60b7 13110 tree v2di_ftype_v16qi_v16qi
b4de2f7d
AH
13111 = build_function_type_list (V2DI_type_node,
13112 V16QI_type_node, V16QI_type_node, NULL_TREE);
916b60b7 13113 tree int_ftype_v16qi
b4de2f7d 13114 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
068f5dea
JH
13115 tree v16qi_ftype_pcchar
13116 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
f02e1358
JH
13117 tree void_ftype_pchar_v16qi
13118 = build_function_type_list (void_type_node,
13119 pchar_type_node, V16QI_type_node, NULL_TREE);
068f5dea
JH
13120 tree v4si_ftype_pcint
13121 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13122 tree void_ftype_pcint_v4si
f02e1358 13123 = build_function_type_list (void_type_node,
068f5dea 13124 pcint_type_node, V4SI_type_node, NULL_TREE);
f02e1358
JH
13125 tree v2di_ftype_v2di
13126 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
47f339cf 13127
bd793c65
BS
13128 /* Add all builtins that are more or less simple operations on two
13129 operands. */
ca7558fc 13130 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
13131 {
13132 /* Use one of the operands; the target can have a different mode for
13133 mask-generating compares. */
13134 enum machine_mode mode;
13135 tree type;
13136
13137 if (d->name == 0)
13138 continue;
13139 mode = insn_data[d->icode].operand[1].mode;
13140
bd793c65
BS
13141 switch (mode)
13142 {
fbe5eb6d
BS
13143 case V16QImode:
13144 type = v16qi_ftype_v16qi_v16qi;
13145 break;
13146 case V8HImode:
13147 type = v8hi_ftype_v8hi_v8hi;
13148 break;
13149 case V4SImode:
13150 type = v4si_ftype_v4si_v4si;
13151 break;
13152 case V2DImode:
13153 type = v2di_ftype_v2di_v2di;
13154 break;
13155 case V2DFmode:
13156 type = v2df_ftype_v2df_v2df;
13157 break;
13158 case TImode:
13159 type = ti_ftype_ti_ti;
13160 break;
bd793c65
BS
13161 case V4SFmode:
13162 type = v4sf_ftype_v4sf_v4sf;
13163 break;
13164 case V8QImode:
13165 type = v8qi_ftype_v8qi_v8qi;
13166 break;
13167 case V4HImode:
13168 type = v4hi_ftype_v4hi_v4hi;
13169 break;
13170 case V2SImode:
13171 type = v2si_ftype_v2si_v2si;
13172 break;
bd793c65
BS
13173 case DImode:
13174 type = di_ftype_di_di;
13175 break;
13176
13177 default:
13178 abort ();
13179 }
0f290768 13180
bd793c65
BS
13181 /* Override for comparisons. */
13182 if (d->icode == CODE_FOR_maskcmpv4sf3
13183 || d->icode == CODE_FOR_maskncmpv4sf3
13184 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13185 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13186 type = v4si_ftype_v4sf_v4sf;
13187
fbe5eb6d
BS
13188 if (d->icode == CODE_FOR_maskcmpv2df3
13189 || d->icode == CODE_FOR_maskncmpv2df3
13190 || d->icode == CODE_FOR_vmmaskcmpv2df3
13191 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13192 type = v2di_ftype_v2df_v2df;
13193
eeb06b1b 13194 def_builtin (d->mask, d->name, type, d->code);
bd793c65
BS
13195 }
13196
13197 /* Add the remaining MMX insns with somewhat more complicated types. */
eeb06b1b
BS
13198 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13199 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
eeb06b1b
BS
13200 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13201 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13202 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13203
13204 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13205 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13206 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13207
13208 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13209 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13210
13211 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13212 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
0f290768 13213
bd793c65 13214 /* comi/ucomi insns. */
ca7558fc 13215 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
fbe5eb6d
BS
13216 if (d->mask == MASK_SSE2)
13217 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13218 else
13219 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
bd793c65 13220
1255c85c
BS
13221 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13222 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13223 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
bd793c65 13224
36210500
SP
13225 def_builtin (MASK_SSE1, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13226 def_builtin (MASK_SSE1, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
fbe5eb6d
BS
13227 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13228 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13229 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13230 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13231 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13232 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
bd793c65 13233
fbe5eb6d
BS
13234 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13235 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
bd793c65 13236
fbe5eb6d 13237 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
bd793c65 13238
068f5dea
JH
13239 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13240 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13241 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
fbe5eb6d
BS
13242 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13243 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13244 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
bd793c65 13245
fbe5eb6d
BS
13246 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13247 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13248 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13249 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
bd793c65 13250
fbe5eb6d
BS
13251 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13252 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13253 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13254 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
bd793c65 13255
fbe5eb6d 13256 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
bd793c65 13257
916b60b7 13258 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
bd793c65 13259
fbe5eb6d
BS
13260 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13261 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13262 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13263 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13264 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13265 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
bd793c65 13266
fbe5eb6d 13267 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
bd793c65 13268
47f339cf
BS
13269 /* Original 3DNow! */
13270 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13271 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13272 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13273 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13274 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13275 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13276 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13277 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13278 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13279 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13280 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13281 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13282 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13283 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13284 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13285 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13286 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13287 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13288 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13289 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
47f339cf
BS
13290
13291 /* 3DNow! extension as used in the Athlon CPU. */
13292 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13293 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13294 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13295 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13296 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13297 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13298
fbe5eb6d
BS
13299 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13300
13301 /* SSE2 */
13302 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13303 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13304
13305 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13306 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
f02e1358 13307 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
fbe5eb6d 13308
068f5dea
JH
13309 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13310 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13311 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
fbe5eb6d
BS
13312 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13313 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13314 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13315
13316 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13317 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13318 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13319 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13320
13321 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
916b60b7 13322 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
fbe5eb6d
BS
13323 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13324 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
916b60b7 13325 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
fbe5eb6d
BS
13326
13327 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13328 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13329 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
916b60b7 13330 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
fbe5eb6d
BS
13331
13332 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13333 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13334
13335 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13336
13337 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
916b60b7 13338 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
fbe5eb6d
BS
13339
13340 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13341 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13342 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13343 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13344 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13345
13346 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13347
13348 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13349 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13350
13351 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13352 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13353 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13354
13355 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13356 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13357 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13358
13359 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13360 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13361 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
068f5dea
JH
13362 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13363 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
fbe5eb6d
BS
13364 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13365 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13366
068f5dea 13367 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
fbe5eb6d
BS
13368 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13369 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
916b60b7 13370
068f5dea
JH
13371 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13372 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13373 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
f02e1358
JH
13374 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13375 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
068f5dea 13376 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
f02e1358
JH
13377 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13378
13379 def_builtin (MASK_SSE1, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13380
916b60b7
BS
13381 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13382 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13383 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13384
13385 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13386 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13387 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13388
13389 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13390 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13391
ab3146fd 13392 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
916b60b7
BS
13393 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13394 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13395 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13396
ab3146fd 13397 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
916b60b7
BS
13398 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13399 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13400 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13401
13402 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13403 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13404
13405 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
bd793c65
BS
13406}
13407
13408/* Errors in the source file can cause expand_expr to return const0_rtx
13409 where we expect a vector. To avoid crashing, use one of the vector
13410 clear instructions. */
13411static rtx
13412safe_vector_operand (x, mode)
13413 rtx x;
13414 enum machine_mode mode;
13415{
13416 if (x != const0_rtx)
13417 return x;
13418 x = gen_reg_rtx (mode);
13419
47f339cf 13420 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
bd793c65
BS
13421 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13422 : gen_rtx_SUBREG (DImode, x, 0)));
13423 else
e37af218 13424 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
4977bab6
ZW
13425 : gen_rtx_SUBREG (V4SFmode, x, 0),
13426 CONST0_RTX (V4SFmode)));
bd793c65
BS
13427 return x;
13428}
13429
13430/* Subroutine of ix86_expand_builtin to take care of binop insns. */
13431
13432static rtx
13433ix86_expand_binop_builtin (icode, arglist, target)
13434 enum insn_code icode;
13435 tree arglist;
13436 rtx target;
13437{
13438 rtx pat;
13439 tree arg0 = TREE_VALUE (arglist);
13440 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13441 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13442 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13443 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13444 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13445 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13446
13447 if (VECTOR_MODE_P (mode0))
13448 op0 = safe_vector_operand (op0, mode0);
13449 if (VECTOR_MODE_P (mode1))
13450 op1 = safe_vector_operand (op1, mode1);
13451
13452 if (! target
13453 || GET_MODE (target) != tmode
13454 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13455 target = gen_reg_rtx (tmode);
13456
13457 /* In case the insn wants input operands in modes different from
13458 the result, abort. */
13459 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
13460 abort ();
13461
13462 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13463 op0 = copy_to_mode_reg (mode0, op0);
13464 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13465 op1 = copy_to_mode_reg (mode1, op1);
13466
59bef189
RH
13467 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13468 yet one of the two must not be a memory. This is normally enforced
13469 by expanders, but we didn't bother to create one here. */
13470 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13471 op0 = copy_to_mode_reg (mode0, op0);
13472
bd793c65
BS
13473 pat = GEN_FCN (icode) (target, op0, op1);
13474 if (! pat)
13475 return 0;
13476 emit_insn (pat);
13477 return target;
13478}
13479
13480/* Subroutine of ix86_expand_builtin to take care of stores. */
13481
13482static rtx
e37af218 13483ix86_expand_store_builtin (icode, arglist)
bd793c65
BS
13484 enum insn_code icode;
13485 tree arglist;
bd793c65
BS
13486{
13487 rtx pat;
13488 tree arg0 = TREE_VALUE (arglist);
13489 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13490 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13491 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13492 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13493 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13494
13495 if (VECTOR_MODE_P (mode1))
13496 op1 = safe_vector_operand (op1, mode1);
13497
13498 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
59bef189
RH
13499
13500 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13501 op1 = copy_to_mode_reg (mode1, op1);
13502
bd793c65
BS
13503 pat = GEN_FCN (icode) (op0, op1);
13504 if (pat)
13505 emit_insn (pat);
13506 return 0;
13507}
13508
13509/* Subroutine of ix86_expand_builtin to take care of unop insns. */
13510
13511static rtx
13512ix86_expand_unop_builtin (icode, arglist, target, do_load)
13513 enum insn_code icode;
13514 tree arglist;
13515 rtx target;
13516 int do_load;
13517{
13518 rtx pat;
13519 tree arg0 = TREE_VALUE (arglist);
13520 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13521 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13522 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13523
13524 if (! target
13525 || GET_MODE (target) != tmode
13526 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13527 target = gen_reg_rtx (tmode);
13528 if (do_load)
13529 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13530 else
13531 {
13532 if (VECTOR_MODE_P (mode0))
13533 op0 = safe_vector_operand (op0, mode0);
13534
13535 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13536 op0 = copy_to_mode_reg (mode0, op0);
13537 }
13538
13539 pat = GEN_FCN (icode) (target, op0);
13540 if (! pat)
13541 return 0;
13542 emit_insn (pat);
13543 return target;
13544}
13545
13546/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13547 sqrtss, rsqrtss, rcpss. */
13548
13549static rtx
13550ix86_expand_unop1_builtin (icode, arglist, target)
13551 enum insn_code icode;
13552 tree arglist;
13553 rtx target;
13554{
13555 rtx pat;
13556 tree arg0 = TREE_VALUE (arglist);
59bef189 13557 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
bd793c65
BS
13558 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13559 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13560
13561 if (! target
13562 || GET_MODE (target) != tmode
13563 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13564 target = gen_reg_rtx (tmode);
13565
13566 if (VECTOR_MODE_P (mode0))
13567 op0 = safe_vector_operand (op0, mode0);
13568
13569 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13570 op0 = copy_to_mode_reg (mode0, op0);
fce5a9f2 13571
59bef189
RH
13572 op1 = op0;
13573 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13574 op1 = copy_to_mode_reg (mode0, op1);
fce5a9f2 13575
59bef189 13576 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
13577 if (! pat)
13578 return 0;
13579 emit_insn (pat);
13580 return target;
13581}
13582
13583/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13584
13585static rtx
13586ix86_expand_sse_compare (d, arglist, target)
8b60264b 13587 const struct builtin_description *d;
bd793c65
BS
13588 tree arglist;
13589 rtx target;
13590{
13591 rtx pat;
13592 tree arg0 = TREE_VALUE (arglist);
13593 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13594 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13595 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13596 rtx op2;
13597 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13598 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13599 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13600 enum rtx_code comparison = d->comparison;
13601
13602 if (VECTOR_MODE_P (mode0))
13603 op0 = safe_vector_operand (op0, mode0);
13604 if (VECTOR_MODE_P (mode1))
13605 op1 = safe_vector_operand (op1, mode1);
13606
13607 /* Swap operands if we have a comparison that isn't available in
13608 hardware. */
13609 if (d->flag)
13610 {
21e1b5f1
BS
13611 rtx tmp = gen_reg_rtx (mode1);
13612 emit_move_insn (tmp, op1);
bd793c65 13613 op1 = op0;
21e1b5f1 13614 op0 = tmp;
bd793c65 13615 }
21e1b5f1
BS
13616
13617 if (! target
13618 || GET_MODE (target) != tmode
13619 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
bd793c65
BS
13620 target = gen_reg_rtx (tmode);
13621
13622 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13623 op0 = copy_to_mode_reg (mode0, op0);
13624 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13625 op1 = copy_to_mode_reg (mode1, op1);
13626
13627 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13628 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13629 if (! pat)
13630 return 0;
13631 emit_insn (pat);
13632 return target;
13633}
13634
13635/* Subroutine of ix86_expand_builtin to take care of comi insns. */
13636
13637static rtx
13638ix86_expand_sse_comi (d, arglist, target)
8b60264b 13639 const struct builtin_description *d;
bd793c65
BS
13640 tree arglist;
13641 rtx target;
13642{
13643 rtx pat;
13644 tree arg0 = TREE_VALUE (arglist);
13645 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13646 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13647 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13648 rtx op2;
13649 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13650 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13651 enum rtx_code comparison = d->comparison;
13652
13653 if (VECTOR_MODE_P (mode0))
13654 op0 = safe_vector_operand (op0, mode0);
13655 if (VECTOR_MODE_P (mode1))
13656 op1 = safe_vector_operand (op1, mode1);
13657
13658 /* Swap operands if we have a comparison that isn't available in
13659 hardware. */
13660 if (d->flag)
13661 {
13662 rtx tmp = op1;
13663 op1 = op0;
13664 op0 = tmp;
bd793c65
BS
13665 }
13666
13667 target = gen_reg_rtx (SImode);
13668 emit_move_insn (target, const0_rtx);
13669 target = gen_rtx_SUBREG (QImode, target, 0);
13670
13671 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13672 op0 = copy_to_mode_reg (mode0, op0);
13673 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13674 op1 = copy_to_mode_reg (mode1, op1);
13675
13676 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
1194ca05 13677 pat = GEN_FCN (d->icode) (op0, op1);
bd793c65
BS
13678 if (! pat)
13679 return 0;
13680 emit_insn (pat);
29628f27
BS
13681 emit_insn (gen_rtx_SET (VOIDmode,
13682 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13683 gen_rtx_fmt_ee (comparison, QImode,
1194ca05 13684 SET_DEST (pat),
29628f27 13685 const0_rtx)));
bd793c65 13686
6f1a6c5b 13687 return SUBREG_REG (target);
bd793c65
BS
13688}
13689
13690/* Expand an expression EXP that calls a built-in function,
13691 with result going to TARGET if that's convenient
13692 (and in mode MODE if that's convenient).
13693 SUBTARGET may be used as the target for computing one of EXP's operands.
13694 IGNORE is nonzero if the value is to be ignored. */
13695
13696rtx
13697ix86_expand_builtin (exp, target, subtarget, mode, ignore)
13698 tree exp;
13699 rtx target;
13700 rtx subtarget ATTRIBUTE_UNUSED;
13701 enum machine_mode mode ATTRIBUTE_UNUSED;
13702 int ignore ATTRIBUTE_UNUSED;
13703{
8b60264b 13704 const struct builtin_description *d;
77ebd435 13705 size_t i;
bd793c65
BS
13706 enum insn_code icode;
13707 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13708 tree arglist = TREE_OPERAND (exp, 1);
e37af218 13709 tree arg0, arg1, arg2;
bd793c65
BS
13710 rtx op0, op1, op2, pat;
13711 enum machine_mode tmode, mode0, mode1, mode2;
8752c357 13712 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
bd793c65
BS
13713
13714 switch (fcode)
13715 {
13716 case IX86_BUILTIN_EMMS:
13717 emit_insn (gen_emms ());
13718 return 0;
13719
13720 case IX86_BUILTIN_SFENCE:
13721 emit_insn (gen_sfence ());
13722 return 0;
13723
bd793c65 13724 case IX86_BUILTIN_PEXTRW:
fbe5eb6d
BS
13725 case IX86_BUILTIN_PEXTRW128:
13726 icode = (fcode == IX86_BUILTIN_PEXTRW
13727 ? CODE_FOR_mmx_pextrw
13728 : CODE_FOR_sse2_pextrw);
bd793c65
BS
13729 arg0 = TREE_VALUE (arglist);
13730 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13731 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13732 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13733 tmode = insn_data[icode].operand[0].mode;
13734 mode0 = insn_data[icode].operand[1].mode;
13735 mode1 = insn_data[icode].operand[2].mode;
13736
13737 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13738 op0 = copy_to_mode_reg (mode0, op0);
13739 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13740 {
13741 /* @@@ better error message */
13742 error ("selector must be an immediate");
6f1a6c5b 13743 return gen_reg_rtx (tmode);
bd793c65
BS
13744 }
13745 if (target == 0
13746 || GET_MODE (target) != tmode
13747 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13748 target = gen_reg_rtx (tmode);
13749 pat = GEN_FCN (icode) (target, op0, op1);
13750 if (! pat)
13751 return 0;
13752 emit_insn (pat);
13753 return target;
13754
13755 case IX86_BUILTIN_PINSRW:
fbe5eb6d
BS
13756 case IX86_BUILTIN_PINSRW128:
13757 icode = (fcode == IX86_BUILTIN_PINSRW
13758 ? CODE_FOR_mmx_pinsrw
13759 : CODE_FOR_sse2_pinsrw);
bd793c65
BS
13760 arg0 = TREE_VALUE (arglist);
13761 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13762 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13763 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13764 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13765 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13766 tmode = insn_data[icode].operand[0].mode;
13767 mode0 = insn_data[icode].operand[1].mode;
13768 mode1 = insn_data[icode].operand[2].mode;
13769 mode2 = insn_data[icode].operand[3].mode;
13770
13771 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13772 op0 = copy_to_mode_reg (mode0, op0);
13773 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13774 op1 = copy_to_mode_reg (mode1, op1);
13775 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13776 {
13777 /* @@@ better error message */
13778 error ("selector must be an immediate");
13779 return const0_rtx;
13780 }
13781 if (target == 0
13782 || GET_MODE (target) != tmode
13783 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13784 target = gen_reg_rtx (tmode);
13785 pat = GEN_FCN (icode) (target, op0, op1, op2);
13786 if (! pat)
13787 return 0;
13788 emit_insn (pat);
13789 return target;
13790
13791 case IX86_BUILTIN_MASKMOVQ:
077084dd 13792 case IX86_BUILTIN_MASKMOVDQU:
fbe5eb6d
BS
13793 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13794 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
f8ca7923
JH
13795 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
13796 : CODE_FOR_sse2_maskmovdqu));
bd793c65
BS
13797 /* Note the arg order is different from the operand order. */
13798 arg1 = TREE_VALUE (arglist);
13799 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13800 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13801 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13802 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13803 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13804 mode0 = insn_data[icode].operand[0].mode;
13805 mode1 = insn_data[icode].operand[1].mode;
13806 mode2 = insn_data[icode].operand[2].mode;
13807
5c464583 13808 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
bd793c65
BS
13809 op0 = copy_to_mode_reg (mode0, op0);
13810 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13811 op1 = copy_to_mode_reg (mode1, op1);
13812 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13813 op2 = copy_to_mode_reg (mode2, op2);
13814 pat = GEN_FCN (icode) (op0, op1, op2);
13815 if (! pat)
13816 return 0;
13817 emit_insn (pat);
13818 return 0;
13819
13820 case IX86_BUILTIN_SQRTSS:
13821 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13822 case IX86_BUILTIN_RSQRTSS:
13823 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13824 case IX86_BUILTIN_RCPSS:
13825 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13826
13827 case IX86_BUILTIN_LOADAPS:
13828 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13829
13830 case IX86_BUILTIN_LOADUPS:
13831 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13832
13833 case IX86_BUILTIN_STOREAPS:
e37af218 13834 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
f02e1358 13835
bd793c65 13836 case IX86_BUILTIN_STOREUPS:
e37af218 13837 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
bd793c65
BS
13838
13839 case IX86_BUILTIN_LOADSS:
13840 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13841
13842 case IX86_BUILTIN_STORESS:
e37af218 13843 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
bd793c65 13844
0f290768 13845 case IX86_BUILTIN_LOADHPS:
bd793c65 13846 case IX86_BUILTIN_LOADLPS:
fbe5eb6d
BS
13847 case IX86_BUILTIN_LOADHPD:
13848 case IX86_BUILTIN_LOADLPD:
13849 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13850 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13851 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13852 : CODE_FOR_sse2_movlpd);
bd793c65
BS
13853 arg0 = TREE_VALUE (arglist);
13854 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13855 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13856 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13857 tmode = insn_data[icode].operand[0].mode;
13858 mode0 = insn_data[icode].operand[1].mode;
13859 mode1 = insn_data[icode].operand[2].mode;
13860
13861 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13862 op0 = copy_to_mode_reg (mode0, op0);
13863 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13864 if (target == 0
13865 || GET_MODE (target) != tmode
13866 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13867 target = gen_reg_rtx (tmode);
13868 pat = GEN_FCN (icode) (target, op0, op1);
13869 if (! pat)
13870 return 0;
13871 emit_insn (pat);
13872 return target;
0f290768 13873
bd793c65
BS
13874 case IX86_BUILTIN_STOREHPS:
13875 case IX86_BUILTIN_STORELPS:
fbe5eb6d
BS
13876 case IX86_BUILTIN_STOREHPD:
13877 case IX86_BUILTIN_STORELPD:
13878 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13879 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13880 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13881 : CODE_FOR_sse2_movlpd);
bd793c65
BS
13882 arg0 = TREE_VALUE (arglist);
13883 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13884 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13885 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13886 mode0 = insn_data[icode].operand[1].mode;
13887 mode1 = insn_data[icode].operand[2].mode;
13888
13889 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13890 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13891 op1 = copy_to_mode_reg (mode1, op1);
13892
13893 pat = GEN_FCN (icode) (op0, op0, op1);
13894 if (! pat)
13895 return 0;
13896 emit_insn (pat);
13897 return 0;
13898
13899 case IX86_BUILTIN_MOVNTPS:
e37af218 13900 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
bd793c65 13901 case IX86_BUILTIN_MOVNTQ:
e37af218 13902 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
bd793c65
BS
13903
13904 case IX86_BUILTIN_LDMXCSR:
13905 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13906 target = assign_386_stack_local (SImode, 0);
13907 emit_move_insn (target, op0);
13908 emit_insn (gen_ldmxcsr (target));
13909 return 0;
13910
13911 case IX86_BUILTIN_STMXCSR:
13912 target = assign_386_stack_local (SImode, 0);
13913 emit_insn (gen_stmxcsr (target));
13914 return copy_to_mode_reg (SImode, target);
13915
bd793c65 13916 case IX86_BUILTIN_SHUFPS:
fbe5eb6d
BS
13917 case IX86_BUILTIN_SHUFPD:
13918 icode = (fcode == IX86_BUILTIN_SHUFPS
13919 ? CODE_FOR_sse_shufps
13920 : CODE_FOR_sse2_shufpd);
bd793c65
BS
13921 arg0 = TREE_VALUE (arglist);
13922 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13923 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13924 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13925 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13926 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13927 tmode = insn_data[icode].operand[0].mode;
13928 mode0 = insn_data[icode].operand[1].mode;
13929 mode1 = insn_data[icode].operand[2].mode;
13930 mode2 = insn_data[icode].operand[3].mode;
13931
13932 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13933 op0 = copy_to_mode_reg (mode0, op0);
13934 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13935 op1 = copy_to_mode_reg (mode1, op1);
13936 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13937 {
13938 /* @@@ better error message */
13939 error ("mask must be an immediate");
6f1a6c5b 13940 return gen_reg_rtx (tmode);
bd793c65
BS
13941 }
13942 if (target == 0
13943 || GET_MODE (target) != tmode
13944 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13945 target = gen_reg_rtx (tmode);
13946 pat = GEN_FCN (icode) (target, op0, op1, op2);
13947 if (! pat)
13948 return 0;
13949 emit_insn (pat);
13950 return target;
13951
13952 case IX86_BUILTIN_PSHUFW:
fbe5eb6d
BS
13953 case IX86_BUILTIN_PSHUFD:
13954 case IX86_BUILTIN_PSHUFHW:
13955 case IX86_BUILTIN_PSHUFLW:
13956 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13957 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13958 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13959 : CODE_FOR_mmx_pshufw);
bd793c65
BS
13960 arg0 = TREE_VALUE (arglist);
13961 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13962 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13963 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13964 tmode = insn_data[icode].operand[0].mode;
29628f27
BS
13965 mode1 = insn_data[icode].operand[1].mode;
13966 mode2 = insn_data[icode].operand[2].mode;
bd793c65 13967
29628f27
BS
13968 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13969 op0 = copy_to_mode_reg (mode1, op0);
13970 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
bd793c65
BS
13971 {
13972 /* @@@ better error message */
13973 error ("mask must be an immediate");
13974 return const0_rtx;
13975 }
13976 if (target == 0
13977 || GET_MODE (target) != tmode
13978 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13979 target = gen_reg_rtx (tmode);
29628f27 13980 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
13981 if (! pat)
13982 return 0;
13983 emit_insn (pat);
13984 return target;
13985
ab3146fd
ZD
13986 case IX86_BUILTIN_PSLLDQI128:
13987 case IX86_BUILTIN_PSRLDQI128:
13988 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
13989 : CODE_FOR_sse2_lshrti3);
13990 arg0 = TREE_VALUE (arglist);
13991 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13992 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13993 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13994 tmode = insn_data[icode].operand[0].mode;
13995 mode1 = insn_data[icode].operand[1].mode;
13996 mode2 = insn_data[icode].operand[2].mode;
13997
13998 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13999 {
14000 op0 = copy_to_reg (op0);
14001 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14002 }
14003 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14004 {
14005 error ("shift must be an immediate");
14006 return const0_rtx;
14007 }
14008 target = gen_reg_rtx (V2DImode);
14009 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14010 if (! pat)
14011 return 0;
14012 emit_insn (pat);
14013 return target;
14014
47f339cf
BS
14015 case IX86_BUILTIN_FEMMS:
14016 emit_insn (gen_femms ());
14017 return NULL_RTX;
14018
14019 case IX86_BUILTIN_PAVGUSB:
14020 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14021
14022 case IX86_BUILTIN_PF2ID:
14023 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14024
14025 case IX86_BUILTIN_PFACC:
14026 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14027
14028 case IX86_BUILTIN_PFADD:
14029 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14030
14031 case IX86_BUILTIN_PFCMPEQ:
14032 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14033
14034 case IX86_BUILTIN_PFCMPGE:
14035 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14036
14037 case IX86_BUILTIN_PFCMPGT:
14038 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14039
14040 case IX86_BUILTIN_PFMAX:
14041 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14042
14043 case IX86_BUILTIN_PFMIN:
14044 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14045
14046 case IX86_BUILTIN_PFMUL:
14047 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14048
14049 case IX86_BUILTIN_PFRCP:
14050 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14051
14052 case IX86_BUILTIN_PFRCPIT1:
14053 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14054
14055 case IX86_BUILTIN_PFRCPIT2:
14056 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14057
14058 case IX86_BUILTIN_PFRSQIT1:
14059 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14060
14061 case IX86_BUILTIN_PFRSQRT:
14062 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14063
14064 case IX86_BUILTIN_PFSUB:
14065 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14066
14067 case IX86_BUILTIN_PFSUBR:
14068 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14069
14070 case IX86_BUILTIN_PI2FD:
14071 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14072
14073 case IX86_BUILTIN_PMULHRW:
14074 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14075
47f339cf
BS
14076 case IX86_BUILTIN_PF2IW:
14077 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14078
14079 case IX86_BUILTIN_PFNACC:
14080 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14081
14082 case IX86_BUILTIN_PFPNACC:
14083 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14084
14085 case IX86_BUILTIN_PI2FW:
14086 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14087
14088 case IX86_BUILTIN_PSWAPDSI:
14089 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14090
14091 case IX86_BUILTIN_PSWAPDSF:
14092 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14093
e37af218
RH
14094 case IX86_BUILTIN_SSE_ZERO:
14095 target = gen_reg_rtx (V4SFmode);
4977bab6 14096 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
bd793c65
BS
14097 return target;
14098
bd793c65
BS
14099 case IX86_BUILTIN_MMX_ZERO:
14100 target = gen_reg_rtx (DImode);
14101 emit_insn (gen_mmx_clrdi (target));
14102 return target;
14103
f02e1358
JH
14104 case IX86_BUILTIN_CLRTI:
14105 target = gen_reg_rtx (V2DImode);
14106 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14107 return target;
14108
14109
fbe5eb6d
BS
14110 case IX86_BUILTIN_SQRTSD:
14111 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14112 case IX86_BUILTIN_LOADAPD:
14113 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14114 case IX86_BUILTIN_LOADUPD:
14115 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14116
14117 case IX86_BUILTIN_STOREAPD:
14118 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14119 case IX86_BUILTIN_STOREUPD:
14120 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14121
14122 case IX86_BUILTIN_LOADSD:
14123 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14124
14125 case IX86_BUILTIN_STORESD:
14126 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14127
14128 case IX86_BUILTIN_SETPD1:
14129 target = assign_386_stack_local (DFmode, 0);
14130 arg0 = TREE_VALUE (arglist);
14131 emit_move_insn (adjust_address (target, DFmode, 0),
14132 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14133 op0 = gen_reg_rtx (V2DFmode);
14134 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14135 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
14136 return op0;
14137
14138 case IX86_BUILTIN_SETPD:
14139 target = assign_386_stack_local (V2DFmode, 0);
14140 arg0 = TREE_VALUE (arglist);
14141 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14142 emit_move_insn (adjust_address (target, DFmode, 0),
14143 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14144 emit_move_insn (adjust_address (target, DFmode, 8),
14145 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14146 op0 = gen_reg_rtx (V2DFmode);
14147 emit_insn (gen_sse2_movapd (op0, target));
14148 return op0;
14149
14150 case IX86_BUILTIN_LOADRPD:
14151 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14152 gen_reg_rtx (V2DFmode), 1);
14153 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
14154 return target;
14155
14156 case IX86_BUILTIN_LOADPD1:
14157 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14158 gen_reg_rtx (V2DFmode), 1);
14159 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14160 return target;
14161
14162 case IX86_BUILTIN_STOREPD1:
14163 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14164 case IX86_BUILTIN_STORERPD:
14165 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14166
48126a97
JH
14167 case IX86_BUILTIN_CLRPD:
14168 target = gen_reg_rtx (V2DFmode);
14169 emit_insn (gen_sse_clrv2df (target));
14170 return target;
14171
fbe5eb6d
BS
14172 case IX86_BUILTIN_MFENCE:
14173 emit_insn (gen_sse2_mfence ());
14174 return 0;
14175 case IX86_BUILTIN_LFENCE:
14176 emit_insn (gen_sse2_lfence ());
14177 return 0;
14178
14179 case IX86_BUILTIN_CLFLUSH:
14180 arg0 = TREE_VALUE (arglist);
14181 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14182 icode = CODE_FOR_sse2_clflush;
1194ca05
JH
14183 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14184 op0 = copy_to_mode_reg (Pmode, op0);
fbe5eb6d
BS
14185
14186 emit_insn (gen_sse2_clflush (op0));
14187 return 0;
14188
14189 case IX86_BUILTIN_MOVNTPD:
14190 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14191 case IX86_BUILTIN_MOVNTDQ:
916b60b7 14192 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
fbe5eb6d
BS
14193 case IX86_BUILTIN_MOVNTI:
14194 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14195
f02e1358
JH
14196 case IX86_BUILTIN_LOADDQA:
14197 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14198 case IX86_BUILTIN_LOADDQU:
14199 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14200 case IX86_BUILTIN_LOADD:
14201 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14202
14203 case IX86_BUILTIN_STOREDQA:
14204 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14205 case IX86_BUILTIN_STOREDQU:
14206 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14207 case IX86_BUILTIN_STORED:
14208 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14209
bd793c65
BS
14210 default:
14211 break;
14212 }
14213
ca7558fc 14214 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
14215 if (d->code == fcode)
14216 {
14217 /* Compares are treated specially. */
14218 if (d->icode == CODE_FOR_maskcmpv4sf3
14219 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14220 || d->icode == CODE_FOR_maskncmpv4sf3
fbe5eb6d
BS
14221 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14222 || d->icode == CODE_FOR_maskcmpv2df3
14223 || d->icode == CODE_FOR_vmmaskcmpv2df3
14224 || d->icode == CODE_FOR_maskncmpv2df3
14225 || d->icode == CODE_FOR_vmmaskncmpv2df3)
bd793c65
BS
14226 return ix86_expand_sse_compare (d, arglist, target);
14227
14228 return ix86_expand_binop_builtin (d->icode, arglist, target);
14229 }
14230
ca7558fc 14231 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
bd793c65
BS
14232 if (d->code == fcode)
14233 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
0f290768 14234
ca7558fc 14235 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
bd793c65
BS
14236 if (d->code == fcode)
14237 return ix86_expand_sse_comi (d, arglist, target);
0f290768 14238
bd793c65
BS
14239 /* @@@ Should really do something sensible here. */
14240 return 0;
bd793c65 14241}
4211a8fb
JH
14242
14243/* Store OPERAND to the memory after reload is completed. This means
f710504c 14244 that we can't easily use assign_stack_local. */
4211a8fb
JH
14245rtx
14246ix86_force_to_memory (mode, operand)
14247 enum machine_mode mode;
14248 rtx operand;
14249{
898d374d 14250 rtx result;
4211a8fb
JH
14251 if (!reload_completed)
14252 abort ();
898d374d
JH
14253 if (TARGET_64BIT && TARGET_RED_ZONE)
14254 {
14255 result = gen_rtx_MEM (mode,
14256 gen_rtx_PLUS (Pmode,
14257 stack_pointer_rtx,
14258 GEN_INT (-RED_ZONE_SIZE)));
14259 emit_move_insn (result, operand);
14260 }
14261 else if (TARGET_64BIT && !TARGET_RED_ZONE)
4211a8fb 14262 {
898d374d 14263 switch (mode)
4211a8fb 14264 {
898d374d
JH
14265 case HImode:
14266 case SImode:
14267 operand = gen_lowpart (DImode, operand);
14268 /* FALLTHRU */
14269 case DImode:
4211a8fb 14270 emit_insn (
898d374d
JH
14271 gen_rtx_SET (VOIDmode,
14272 gen_rtx_MEM (DImode,
14273 gen_rtx_PRE_DEC (DImode,
14274 stack_pointer_rtx)),
14275 operand));
14276 break;
14277 default:
14278 abort ();
14279 }
14280 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14281 }
14282 else
14283 {
14284 switch (mode)
14285 {
14286 case DImode:
14287 {
14288 rtx operands[2];
14289 split_di (&operand, 1, operands, operands + 1);
14290 emit_insn (
14291 gen_rtx_SET (VOIDmode,
14292 gen_rtx_MEM (SImode,
14293 gen_rtx_PRE_DEC (Pmode,
14294 stack_pointer_rtx)),
14295 operands[1]));
14296 emit_insn (
14297 gen_rtx_SET (VOIDmode,
14298 gen_rtx_MEM (SImode,
14299 gen_rtx_PRE_DEC (Pmode,
14300 stack_pointer_rtx)),
14301 operands[0]));
14302 }
14303 break;
14304 case HImode:
14305 /* It is better to store HImodes as SImodes. */
14306 if (!TARGET_PARTIAL_REG_STALL)
14307 operand = gen_lowpart (SImode, operand);
14308 /* FALLTHRU */
14309 case SImode:
4211a8fb 14310 emit_insn (
898d374d
JH
14311 gen_rtx_SET (VOIDmode,
14312 gen_rtx_MEM (GET_MODE (operand),
14313 gen_rtx_PRE_DEC (SImode,
14314 stack_pointer_rtx)),
14315 operand));
14316 break;
14317 default:
14318 abort ();
4211a8fb 14319 }
898d374d 14320 result = gen_rtx_MEM (mode, stack_pointer_rtx);
4211a8fb 14321 }
898d374d 14322 return result;
4211a8fb
JH
14323}
14324
14325/* Free operand from the memory. */
14326void
14327ix86_free_from_memory (mode)
14328 enum machine_mode mode;
14329{
898d374d
JH
14330 if (!TARGET_64BIT || !TARGET_RED_ZONE)
14331 {
14332 int size;
14333
14334 if (mode == DImode || TARGET_64BIT)
14335 size = 8;
14336 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14337 size = 2;
14338 else
14339 size = 4;
14340 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14341 to pop or add instruction if registers are available. */
14342 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14343 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14344 GEN_INT (size))));
14345 }
4211a8fb 14346}
a946dd00 14347
f84aa48a
JH
14348/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14349 QImode must go into class Q_REGS.
14350 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
892a2d68 14351 movdf to do mem-to-mem moves through integer regs. */
f84aa48a
JH
14352enum reg_class
14353ix86_preferred_reload_class (x, class)
14354 rtx x;
14355 enum reg_class class;
14356{
1877be45
JH
14357 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14358 return NO_REGS;
f84aa48a
JH
14359 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14360 {
14361 /* SSE can't load any constant directly yet. */
14362 if (SSE_CLASS_P (class))
14363 return NO_REGS;
14364 /* Floats can load 0 and 1. */
14365 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14366 {
14367 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14368 if (MAYBE_SSE_CLASS_P (class))
14369 return (reg_class_subset_p (class, GENERAL_REGS)
14370 ? GENERAL_REGS : FLOAT_REGS);
14371 else
14372 return class;
14373 }
14374 /* General regs can load everything. */
14375 if (reg_class_subset_p (class, GENERAL_REGS))
14376 return GENERAL_REGS;
14377 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14378 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14379 return NO_REGS;
14380 }
14381 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14382 return NO_REGS;
14383 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14384 return Q_REGS;
14385 return class;
14386}
14387
14388/* If we are copying between general and FP registers, we need a memory
14389 location. The same is true for SSE and MMX registers.
14390
14391 The macro can't work reliably when one of the CLASSES is class containing
14392 registers from multiple units (SSE, MMX, integer). We avoid this by never
14393 combining those units in single alternative in the machine description.
14394 Ensure that this constraint holds to avoid unexpected surprises.
14395
14396 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14397 enforce these sanity checks. */
14398int
14399ix86_secondary_memory_needed (class1, class2, mode, strict)
14400 enum reg_class class1, class2;
14401 enum machine_mode mode;
14402 int strict;
14403{
14404 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14405 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14406 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14407 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14408 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14409 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14410 {
14411 if (strict)
14412 abort ();
14413 else
14414 return 1;
14415 }
14416 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14417 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14418 && (mode) != SImode)
14419 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14420 && (mode) != SImode));
14421}
14422/* Return the cost of moving data from a register in class CLASS1 to
a4f31c00 14423 one in class CLASS2.
f84aa48a
JH
14424
14425 It is not required that the cost always equal 2 when FROM is the same as TO;
14426 on some machines it is expensive to move between registers if they are not
14427 general registers. */
14428int
14429ix86_register_move_cost (mode, class1, class2)
14430 enum machine_mode mode;
14431 enum reg_class class1, class2;
14432{
14433 /* In case we require secondary memory, compute cost of the store followed
d631b80a
RH
14434 by load. In order to avoid bad register allocation choices, we need
14435 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14436
f84aa48a
JH
14437 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14438 {
d631b80a
RH
14439 int cost = 1;
14440
14441 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14442 MEMORY_MOVE_COST (mode, class1, 1));
14443 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14444 MEMORY_MOVE_COST (mode, class2, 1));
14445
14446 /* In case of copying from general_purpose_register we may emit multiple
14447 stores followed by single load causing memory size mismatch stall.
d1f87653 14448 Count this as arbitrarily high cost of 20. */
62415523 14449 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
d631b80a
RH
14450 cost += 20;
14451
14452 /* In the case of FP/MMX moves, the registers actually overlap, and we
14453 have to switch modes in order to treat them differently. */
14454 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14455 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14456 cost += 20;
14457
14458 return cost;
f84aa48a 14459 }
d631b80a 14460
92d0fb09 14461 /* Moves between SSE/MMX and integer unit are expensive. */
62415523
JH
14462 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14463 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
fa79946e
JH
14464 return ix86_cost->mmxsse_to_integer;
14465 if (MAYBE_FLOAT_CLASS_P (class1))
14466 return ix86_cost->fp_move;
14467 if (MAYBE_SSE_CLASS_P (class1))
14468 return ix86_cost->sse_move;
14469 if (MAYBE_MMX_CLASS_P (class1))
14470 return ix86_cost->mmx_move;
f84aa48a
JH
14471 return 2;
14472}
14473
a946dd00
JH
14474/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14475int
14476ix86_hard_regno_mode_ok (regno, mode)
14477 int regno;
14478 enum machine_mode mode;
14479{
14480 /* Flags and only flags can only hold CCmode values. */
14481 if (CC_REGNO_P (regno))
14482 return GET_MODE_CLASS (mode) == MODE_CC;
14483 if (GET_MODE_CLASS (mode) == MODE_CC
14484 || GET_MODE_CLASS (mode) == MODE_RANDOM
14485 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14486 return 0;
14487 if (FP_REGNO_P (regno))
14488 return VALID_FP_MODE_P (mode);
14489 if (SSE_REGNO_P (regno))
14490 return VALID_SSE_REG_MODE (mode);
14491 if (MMX_REGNO_P (regno))
47f339cf 14492 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
a946dd00
JH
14493 /* We handle both integer and floats in the general purpose registers.
14494 In future we should be able to handle vector modes as well. */
14495 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14496 return 0;
14497 /* Take care for QImode values - they can be in non-QI regs, but then
14498 they do cause partial register stalls. */
d2836273 14499 if (regno < 4 || mode != QImode || TARGET_64BIT)
a946dd00
JH
14500 return 1;
14501 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14502}
fa79946e
JH
14503
14504/* Return the cost of moving data of mode M between a
14505 register and memory. A value of 2 is the default; this cost is
14506 relative to those in `REGISTER_MOVE_COST'.
14507
14508 If moving between registers and memory is more expensive than
14509 between two registers, you should define this macro to express the
a4f31c00
AJ
14510 relative cost.
14511
fa79946e
JH
14512 Model also increased moving costs of QImode registers in non
14513 Q_REGS classes.
14514 */
14515int
14516ix86_memory_move_cost (mode, class, in)
14517 enum machine_mode mode;
14518 enum reg_class class;
14519 int in;
14520{
14521 if (FLOAT_CLASS_P (class))
14522 {
14523 int index;
14524 switch (mode)
14525 {
14526 case SFmode:
14527 index = 0;
14528 break;
14529 case DFmode:
14530 index = 1;
14531 break;
14532 case XFmode:
14533 case TFmode:
14534 index = 2;
14535 break;
14536 default:
14537 return 100;
14538 }
14539 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14540 }
14541 if (SSE_CLASS_P (class))
14542 {
14543 int index;
14544 switch (GET_MODE_SIZE (mode))
14545 {
14546 case 4:
14547 index = 0;
14548 break;
14549 case 8:
14550 index = 1;
14551 break;
14552 case 16:
14553 index = 2;
14554 break;
14555 default:
14556 return 100;
14557 }
14558 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14559 }
14560 if (MMX_CLASS_P (class))
14561 {
14562 int index;
14563 switch (GET_MODE_SIZE (mode))
14564 {
14565 case 4:
14566 index = 0;
14567 break;
14568 case 8:
14569 index = 1;
14570 break;
14571 default:
14572 return 100;
14573 }
14574 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14575 }
14576 switch (GET_MODE_SIZE (mode))
14577 {
14578 case 1:
14579 if (in)
14580 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14581 : ix86_cost->movzbl_load);
14582 else
14583 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14584 : ix86_cost->int_store[0] + 4);
14585 break;
14586 case 2:
14587 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14588 default:
14589 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14590 if (mode == TFmode)
14591 mode = XFmode;
3bb7e126 14592 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
d09e61b9
JH
14593 * ((int) GET_MODE_SIZE (mode)
14594 + UNITS_PER_WORD -1 ) / UNITS_PER_WORD);
fa79946e
JH
14595 }
14596}
0ecf09f9 14597
3c50106f
RH
14598/* Compute a (partial) cost for rtx X. Return true if the complete
14599 cost has been computed, and false if subexpressions should be
14600 scanned. In either case, *TOTAL contains the cost result. */
14601
14602static bool
14603ix86_rtx_costs (x, code, outer_code, total)
14604 rtx x;
14605 int code, outer_code;
14606 int *total;
14607{
14608 enum machine_mode mode = GET_MODE (x);
14609
14610 switch (code)
14611 {
14612 case CONST_INT:
14613 case CONST:
14614 case LABEL_REF:
14615 case SYMBOL_REF:
14616 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
14617 *total = 3;
14618 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
14619 *total = 2;
14620 else if (flag_pic && SYMBOLIC_CONST (x))
14621 *total = 1;
14622 else
14623 *total = 0;
14624 return true;
14625
14626 case CONST_DOUBLE:
14627 if (mode == VOIDmode)
14628 *total = 0;
14629 else
14630 switch (standard_80387_constant_p (x))
14631 {
14632 case 1: /* 0.0 */
14633 *total = 1;
14634 break;
14635 case 2: /* 1.0 */
14636 *total = 2;
14637 break;
14638 default:
14639 /* Start with (MEM (SYMBOL_REF)), since that's where
14640 it'll probably end up. Add a penalty for size. */
14641 *total = (COSTS_N_INSNS (1)
14642 + (flag_pic != 0)
14643 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14644 break;
14645 }
14646 return true;
14647
14648 case ZERO_EXTEND:
14649 /* The zero extensions is often completely free on x86_64, so make
14650 it as cheap as possible. */
14651 if (TARGET_64BIT && mode == DImode
14652 && GET_MODE (XEXP (x, 0)) == SImode)
14653 *total = 1;
14654 else if (TARGET_ZERO_EXTEND_WITH_AND)
14655 *total = COSTS_N_INSNS (ix86_cost->add);
14656 else
14657 *total = COSTS_N_INSNS (ix86_cost->movzx);
14658 return false;
14659
14660 case SIGN_EXTEND:
14661 *total = COSTS_N_INSNS (ix86_cost->movsx);
14662 return false;
14663
14664 case ASHIFT:
14665 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14666 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14667 {
14668 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14669 if (value == 1)
14670 {
14671 *total = COSTS_N_INSNS (ix86_cost->add);
14672 return false;
14673 }
14674 if ((value == 2 || value == 3)
14675 && !TARGET_DECOMPOSE_LEA
14676 && ix86_cost->lea <= ix86_cost->shift_const)
14677 {
14678 *total = COSTS_N_INSNS (ix86_cost->lea);
14679 return false;
14680 }
14681 }
14682 /* FALLTHRU */
14683
14684 case ROTATE:
14685 case ASHIFTRT:
14686 case LSHIFTRT:
14687 case ROTATERT:
14688 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
14689 {
14690 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14691 {
14692 if (INTVAL (XEXP (x, 1)) > 32)
14693 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
14694 else
14695 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
14696 }
14697 else
14698 {
14699 if (GET_CODE (XEXP (x, 1)) == AND)
14700 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
14701 else
14702 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
14703 }
14704 }
14705 else
14706 {
14707 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14708 *total = COSTS_N_INSNS (ix86_cost->shift_const);
14709 else
14710 *total = COSTS_N_INSNS (ix86_cost->shift_var);
14711 }
14712 return false;
14713
14714 case MULT:
14715 if (FLOAT_MODE_P (mode))
14716 *total = COSTS_N_INSNS (ix86_cost->fmul);
14717 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14718 {
14719 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14720 int nbits;
14721
14722 for (nbits = 0; value != 0; value >>= 1)
14723 nbits++;
14724
14725 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14726 + nbits * ix86_cost->mult_bit);
14727 }
14728 else
14729 {
14730 /* This is arbitrary */
14731 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14732 + 7 * ix86_cost->mult_bit);
14733 }
14734 return false;
14735
14736 case DIV:
14737 case UDIV:
14738 case MOD:
14739 case UMOD:
14740 if (FLOAT_MODE_P (mode))
14741 *total = COSTS_N_INSNS (ix86_cost->fdiv);
14742 else
14743 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
14744 return false;
14745
14746 case PLUS:
14747 if (FLOAT_MODE_P (mode))
14748 *total = COSTS_N_INSNS (ix86_cost->fadd);
14749 else if (!TARGET_DECOMPOSE_LEA
14750 && GET_MODE_CLASS (mode) == MODE_INT
14751 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
14752 {
14753 if (GET_CODE (XEXP (x, 0)) == PLUS
14754 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14755 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
14756 && CONSTANT_P (XEXP (x, 1)))
14757 {
14758 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
14759 if (val == 2 || val == 4 || val == 8)
14760 {
14761 *total = COSTS_N_INSNS (ix86_cost->lea);
14762 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14763 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
14764 outer_code);
14765 *total += rtx_cost (XEXP (x, 1), outer_code);
14766 return true;
14767 }
14768 }
14769 else if (GET_CODE (XEXP (x, 0)) == MULT
14770 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
14771 {
14772 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
14773 if (val == 2 || val == 4 || val == 8)
14774 {
14775 *total = COSTS_N_INSNS (ix86_cost->lea);
14776 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14777 *total += rtx_cost (XEXP (x, 1), outer_code);
14778 return true;
14779 }
14780 }
14781 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14782 {
14783 *total = COSTS_N_INSNS (ix86_cost->lea);
14784 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14785 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14786 *total += rtx_cost (XEXP (x, 1), outer_code);
14787 return true;
14788 }
14789 }
14790 /* FALLTHRU */
14791
14792 case MINUS:
14793 if (FLOAT_MODE_P (mode))
14794 {
14795 *total = COSTS_N_INSNS (ix86_cost->fadd);
14796 return false;
14797 }
14798 /* FALLTHRU */
14799
14800 case AND:
14801 case IOR:
14802 case XOR:
14803 if (!TARGET_64BIT && mode == DImode)
14804 {
14805 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
14806 + (rtx_cost (XEXP (x, 0), outer_code)
14807 << (GET_MODE (XEXP (x, 0)) != DImode))
14808 + (rtx_cost (XEXP (x, 1), outer_code)
14809 << (GET_MODE (XEXP (x, 1)) != DImode)));
14810 return true;
14811 }
14812 /* FALLTHRU */
14813
14814 case NEG:
14815 if (FLOAT_MODE_P (mode))
14816 {
14817 *total = COSTS_N_INSNS (ix86_cost->fchs);
14818 return false;
14819 }
14820 /* FALLTHRU */
14821
14822 case NOT:
14823 if (!TARGET_64BIT && mode == DImode)
14824 *total = COSTS_N_INSNS (ix86_cost->add * 2);
14825 else
14826 *total = COSTS_N_INSNS (ix86_cost->add);
14827 return false;
14828
14829 case FLOAT_EXTEND:
14830 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
14831 *total = 0;
14832 return false;
14833
14834 case ABS:
14835 if (FLOAT_MODE_P (mode))
14836 *total = COSTS_N_INSNS (ix86_cost->fabs);
14837 return false;
14838
14839 case SQRT:
14840 if (FLOAT_MODE_P (mode))
14841 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
14842 return false;
14843
14844 default:
14845 return false;
14846 }
14847}
14848
21c318ba 14849#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
2cc07db4
RH
14850static void
14851ix86_svr3_asm_out_constructor (symbol, priority)
14852 rtx symbol;
14853 int priority ATTRIBUTE_UNUSED;
14854{
14855 init_section ();
14856 fputs ("\tpushl $", asm_out_file);
14857 assemble_name (asm_out_file, XSTR (symbol, 0));
14858 fputc ('\n', asm_out_file);
14859}
14860#endif
162f023b 14861
b069de3b
SS
14862#if TARGET_MACHO
14863
14864static int current_machopic_label_num;
14865
14866/* Given a symbol name and its associated stub, write out the
14867 definition of the stub. */
14868
14869void
14870machopic_output_stub (file, symb, stub)
14871 FILE *file;
14872 const char *symb, *stub;
14873{
14874 unsigned int length;
14875 char *binder_name, *symbol_name, lazy_ptr_name[32];
14876 int label = ++current_machopic_label_num;
14877
14878 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
14879 symb = (*targetm.strip_name_encoding) (symb);
14880
14881 length = strlen (stub);
14882 binder_name = alloca (length + 32);
14883 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
14884
14885 length = strlen (symb);
14886 symbol_name = alloca (length + 32);
14887 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
14888
14889 sprintf (lazy_ptr_name, "L%d$lz", label);
14890
14891 if (MACHOPIC_PURE)
14892 machopic_picsymbol_stub_section ();
14893 else
14894 machopic_symbol_stub_section ();
14895
14896 fprintf (file, "%s:\n", stub);
14897 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14898
14899 if (MACHOPIC_PURE)
14900 {
14901 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
14902 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
14903 fprintf (file, "\tjmp %%edx\n");
14904 }
14905 else
14906 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
14907
14908 fprintf (file, "%s:\n", binder_name);
14909
14910 if (MACHOPIC_PURE)
14911 {
14912 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
14913 fprintf (file, "\tpushl %%eax\n");
14914 }
14915 else
14916 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
14917
14918 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
14919
14920 machopic_lazy_symbol_ptr_section ();
14921 fprintf (file, "%s:\n", lazy_ptr_name);
14922 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14923 fprintf (file, "\t.long %s\n", binder_name);
14924}
14925#endif /* TARGET_MACHO */
14926
162f023b
JH
14927/* Order the registers for register allocator. */
14928
14929void
14930x86_order_regs_for_local_alloc ()
14931{
14932 int pos = 0;
14933 int i;
14934
14935 /* First allocate the local general purpose registers. */
14936 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14937 if (GENERAL_REGNO_P (i) && call_used_regs[i])
14938 reg_alloc_order [pos++] = i;
14939
14940 /* Global general purpose registers. */
14941 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14942 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
14943 reg_alloc_order [pos++] = i;
14944
14945 /* x87 registers come first in case we are doing FP math
14946 using them. */
14947 if (!TARGET_SSE_MATH)
14948 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14949 reg_alloc_order [pos++] = i;
fce5a9f2 14950
162f023b
JH
14951 /* SSE registers. */
14952 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
14953 reg_alloc_order [pos++] = i;
14954 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
14955 reg_alloc_order [pos++] = i;
14956
d1f87653 14957 /* x87 registers. */
162f023b
JH
14958 if (TARGET_SSE_MATH)
14959 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14960 reg_alloc_order [pos++] = i;
14961
14962 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
14963 reg_alloc_order [pos++] = i;
14964
14965 /* Initialize the rest of array as we do not allocate some registers
14966 at all. */
14967 while (pos < FIRST_PSEUDO_REGISTER)
14968 reg_alloc_order [pos++] = 0;
14969}
194734e9 14970
4977bab6
ZW
14971#ifndef TARGET_USE_MS_BITFIELD_LAYOUT
14972#define TARGET_USE_MS_BITFIELD_LAYOUT 0
14973#endif
14974
fe77449a
DR
14975/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
14976 struct attribute_spec.handler. */
14977static tree
14978ix86_handle_struct_attribute (node, name, args, flags, no_add_attrs)
14979 tree *node;
14980 tree name;
14981 tree args ATTRIBUTE_UNUSED;
14982 int flags ATTRIBUTE_UNUSED;
14983 bool *no_add_attrs;
14984{
14985 tree *type = NULL;
14986 if (DECL_P (*node))
14987 {
14988 if (TREE_CODE (*node) == TYPE_DECL)
14989 type = &TREE_TYPE (*node);
14990 }
14991 else
14992 type = node;
14993
14994 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
14995 || TREE_CODE (*type) == UNION_TYPE)))
14996 {
14997 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
14998 *no_add_attrs = true;
14999 }
15000
15001 else if ((is_attribute_p ("ms_struct", name)
15002 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15003 || ((is_attribute_p ("gcc_struct", name)
15004 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15005 {
15006 warning ("`%s' incompatible attribute ignored",
15007 IDENTIFIER_POINTER (name));
15008 *no_add_attrs = true;
15009 }
15010
15011 return NULL_TREE;
15012}
15013
4977bab6
ZW
15014static bool
15015ix86_ms_bitfield_layout_p (record_type)
fe77449a 15016 tree record_type;
4977bab6 15017{
fe77449a
DR
15018 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15019 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15020 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
4977bab6
ZW
15021}
15022
483ab821
MM
15023/* Returns an expression indicating where the this parameter is
15024 located on entry to the FUNCTION. */
15025
15026static rtx
3961e8fe 15027x86_this_parameter (function)
483ab821
MM
15028 tree function;
15029{
15030 tree type = TREE_TYPE (function);
15031
3961e8fe
RH
15032 if (TARGET_64BIT)
15033 {
15034 int n = aggregate_value_p (TREE_TYPE (type)) != 0;
15035 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15036 }
15037
483ab821
MM
15038 if (ix86_fntype_regparm (type) > 0)
15039 {
15040 tree parm;
15041
15042 parm = TYPE_ARG_TYPES (type);
15043 /* Figure out whether or not the function has a variable number of
15044 arguments. */
3961e8fe 15045 for (; parm; parm = TREE_CHAIN (parm))
483ab821
MM
15046 if (TREE_VALUE (parm) == void_type_node)
15047 break;
15048 /* If not, the this parameter is in %eax. */
15049 if (parm)
15050 return gen_rtx_REG (SImode, 0);
15051 }
15052
15053 if (aggregate_value_p (TREE_TYPE (type)))
15054 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15055 else
15056 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15057}
15058
3961e8fe
RH
15059/* Determine whether x86_output_mi_thunk can succeed. */
15060
15061static bool
15062x86_can_output_mi_thunk (thunk, delta, vcall_offset, function)
15063 tree thunk ATTRIBUTE_UNUSED;
15064 HOST_WIDE_INT delta ATTRIBUTE_UNUSED;
15065 HOST_WIDE_INT vcall_offset;
15066 tree function;
15067{
15068 /* 64-bit can handle anything. */
15069 if (TARGET_64BIT)
15070 return true;
15071
15072 /* For 32-bit, everything's fine if we have one free register. */
15073 if (ix86_fntype_regparm (TREE_TYPE (function)) < 3)
15074 return true;
15075
15076 /* Need a free register for vcall_offset. */
15077 if (vcall_offset)
15078 return false;
15079
15080 /* Need a free register for GOT references. */
15081 if (flag_pic && !(*targetm.binds_local_p) (function))
15082 return false;
15083
15084 /* Otherwise ok. */
15085 return true;
15086}
15087
15088/* Output the assembler code for a thunk function. THUNK_DECL is the
15089 declaration for the thunk function itself, FUNCTION is the decl for
15090 the target function. DELTA is an immediate constant offset to be
272d0bee 15091 added to THIS. If VCALL_OFFSET is nonzero, the word at
3961e8fe 15092 *(*this + vcall_offset) should be added to THIS. */
483ab821 15093
c590b625 15094static void
3961e8fe
RH
15095x86_output_mi_thunk (file, thunk, delta, vcall_offset, function)
15096 FILE *file ATTRIBUTE_UNUSED;
483ab821 15097 tree thunk ATTRIBUTE_UNUSED;
eb0424da 15098 HOST_WIDE_INT delta;
3961e8fe 15099 HOST_WIDE_INT vcall_offset;
194734e9
JH
15100 tree function;
15101{
194734e9 15102 rtx xops[3];
3961e8fe
RH
15103 rtx this = x86_this_parameter (function);
15104 rtx this_reg, tmp;
194734e9 15105
3961e8fe
RH
15106 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15107 pull it in now and let DELTA benefit. */
15108 if (REG_P (this))
15109 this_reg = this;
15110 else if (vcall_offset)
15111 {
15112 /* Put the this parameter into %eax. */
15113 xops[0] = this;
15114 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15115 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15116 }
15117 else
15118 this_reg = NULL_RTX;
15119
15120 /* Adjust the this parameter by a fixed constant. */
15121 if (delta)
194734e9 15122 {
483ab821 15123 xops[0] = GEN_INT (delta);
3961e8fe
RH
15124 xops[1] = this_reg ? this_reg : this;
15125 if (TARGET_64BIT)
194734e9 15126 {
3961e8fe
RH
15127 if (!x86_64_general_operand (xops[0], DImode))
15128 {
15129 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15130 xops[1] = tmp;
15131 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15132 xops[0] = tmp;
15133 xops[1] = this;
15134 }
15135 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
194734e9
JH
15136 }
15137 else
3961e8fe 15138 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
194734e9 15139 }
3961e8fe
RH
15140
15141 /* Adjust the this parameter by a value stored in the vtable. */
15142 if (vcall_offset)
194734e9 15143 {
3961e8fe
RH
15144 if (TARGET_64BIT)
15145 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15146 else
15147 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
483ab821 15148
3961e8fe
RH
15149 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15150 xops[1] = tmp;
15151 if (TARGET_64BIT)
15152 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15153 else
15154 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
483ab821 15155
3961e8fe
RH
15156 /* Adjust the this parameter. */
15157 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15158 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15159 {
15160 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15161 xops[0] = GEN_INT (vcall_offset);
15162 xops[1] = tmp2;
15163 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15164 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
483ab821 15165 }
3961e8fe
RH
15166 xops[1] = this_reg;
15167 if (TARGET_64BIT)
15168 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15169 else
15170 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15171 }
194734e9 15172
3961e8fe
RH
15173 /* If necessary, drop THIS back to its stack slot. */
15174 if (this_reg && this_reg != this)
15175 {
15176 xops[0] = this_reg;
15177 xops[1] = this;
15178 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15179 }
194734e9 15180
3961e8fe
RH
15181 xops[0] = DECL_RTL (function);
15182 if (TARGET_64BIT)
15183 {
15184 if (!flag_pic || (*targetm.binds_local_p) (function))
15185 output_asm_insn ("jmp\t%P0", xops);
15186 else
fcbe3b89
RH
15187 {
15188 tmp = XEXP (xops[0], 0);
15189 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, tmp), UNSPEC_GOTPCREL);
15190 tmp = gen_rtx_CONST (Pmode, tmp);
15191 tmp = gen_rtx_MEM (QImode, tmp);
15192 xops[0] = tmp;
15193 output_asm_insn ("jmp\t%A0", xops);
15194 }
3961e8fe
RH
15195 }
15196 else
15197 {
15198 if (!flag_pic || (*targetm.binds_local_p) (function))
15199 output_asm_insn ("jmp\t%P0", xops);
194734e9 15200 else
21ff35fb 15201#if TARGET_MACHO
095fa594
SH
15202 if (TARGET_MACHO)
15203 {
15204 char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15205 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15206 tmp = gen_rtx_MEM (QImode, tmp);
15207 xops[0] = tmp;
15208 output_asm_insn ("jmp\t%0", xops);
15209 }
15210 else
15211#endif /* TARGET_MACHO */
194734e9 15212 {
3961e8fe
RH
15213 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15214 output_set_got (tmp);
15215
15216 xops[1] = tmp;
15217 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15218 output_asm_insn ("jmp\t{*}%1", xops);
194734e9
JH
15219 }
15220 }
15221}
e2500fed 15222
e932b21b
JH
15223int
15224x86_field_alignment (field, computed)
15225 tree field;
15226 int computed;
15227{
15228 enum machine_mode mode;
ad9335eb
JJ
15229 tree type = TREE_TYPE (field);
15230
15231 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
e932b21b 15232 return computed;
ad9335eb
JJ
15233 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15234 ? get_inner_array_type (type) : type);
39e3a681
JJ
15235 if (mode == DFmode || mode == DCmode
15236 || GET_MODE_CLASS (mode) == MODE_INT
15237 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
e932b21b
JH
15238 return MIN (32, computed);
15239 return computed;
15240}
15241
a5fa1ecd
JH
15242/* Output assembler code to FILE to increment profiler label # LABELNO
15243 for profiling a function entry. */
15244void
15245x86_function_profiler (file, labelno)
15246 FILE *file;
b9b21a05 15247 int labelno ATTRIBUTE_UNUSED;
a5fa1ecd
JH
15248{
15249 if (TARGET_64BIT)
15250 if (flag_pic)
15251 {
15252#ifndef NO_PROFILE_COUNTERS
15253 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15254#endif
15255 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15256 }
15257 else
15258 {
15259#ifndef NO_PROFILE_COUNTERS
15260 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15261#endif
15262 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15263 }
15264 else if (flag_pic)
15265 {
15266#ifndef NO_PROFILE_COUNTERS
15267 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15268 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15269#endif
15270 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15271 }
15272 else
15273 {
15274#ifndef NO_PROFILE_COUNTERS
095f9093 15275 fprintf (file, "\tmovl\t$%sP%d,%%$%s\n", LPREFIX, labelno,
a5fa1ecd
JH
15276 PROFILE_COUNT_REGISTER);
15277#endif
15278 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15279 }
15280}
15281
2a500b9e
JH
15282/* Implement machine specific optimizations.
15283 At the moment we implement single transformation: AMD Athlon works faster
d1f87653 15284 when RET is not destination of conditional jump or directly preceded
2a500b9e
JH
15285 by other jump instruction. We avoid the penalty by inserting NOP just
15286 before the RET instructions in such cases. */
15287void
15288x86_machine_dependent_reorg (first)
15289 rtx first ATTRIBUTE_UNUSED;
15290{
15291 edge e;
15292
4977bab6 15293 if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
2a500b9e
JH
15294 return;
15295 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15296 {
15297 basic_block bb = e->src;
15298 rtx ret = bb->end;
15299 rtx prev;
15300 bool insert = false;
15301
15302 if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
15303 continue;
4977bab6
ZW
15304 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15305 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15306 break;
2a500b9e
JH
15307 if (prev && GET_CODE (prev) == CODE_LABEL)
15308 {
15309 edge e;
15310 for (e = bb->pred; e; e = e->pred_next)
4977bab6 15311 if (EDGE_FREQUENCY (e) && e->src->index >= 0
2a500b9e
JH
15312 && !(e->flags & EDGE_FALLTHRU))
15313 insert = 1;
15314 }
15315 if (!insert)
15316 {
4977bab6 15317 prev = prev_active_insn (ret);
2a500b9e
JH
15318 if (prev && GET_CODE (prev) == JUMP_INSN
15319 && any_condjump_p (prev))
15320 insert = 1;
4977bab6
ZW
15321 /* Empty functions get branch misspredict even when the jump destination
15322 is not visible to us. */
15323 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15324 insert = 1;
2a500b9e
JH
15325 }
15326 if (insert)
15327 emit_insn_before (gen_nop (), ret);
15328 }
15329}
15330
4977bab6
ZW
15331/* Return nonzero when QImode register that must be represented via REX prefix
15332 is used. */
15333bool
15334x86_extended_QIreg_mentioned_p (insn)
15335 rtx insn;
15336{
15337 int i;
15338 extract_insn_cached (insn);
15339 for (i = 0; i < recog_data.n_operands; i++)
15340 if (REG_P (recog_data.operand[i])
15341 && REGNO (recog_data.operand[i]) >= 4)
15342 return true;
15343 return false;
15344}
15345
15346/* Return nonzero when P points to register encoded via REX prefix.
15347 Called via for_each_rtx. */
15348static int
15349extended_reg_mentioned_1 (p, data)
15350 rtx *p;
15351 void *data ATTRIBUTE_UNUSED;
15352{
15353 unsigned int regno;
15354 if (!REG_P (*p))
15355 return 0;
15356 regno = REGNO (*p);
15357 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15358}
15359
15360/* Return true when INSN mentions register that must be encoded using REX
15361 prefix. */
15362bool
15363x86_extended_reg_mentioned_p (insn)
15364 rtx insn;
15365{
15366 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15367}
15368
e2500fed 15369#include "gt-i386.h"
This page took 3.85338 seconds and 5 git commands to generate.