]> gcc.gnu.org Git - gcc.git/blame - gcc/config/i386/i386.c
re PR c++/3902 ([parser] ambiguous 8.2/7)
[gcc.git] / gcc / config / i386 / i386.c
CommitLineData
e075ae69 1/* Subroutines used for code generation on IA-32.
07933f72 2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
36210500 3 2002, 2003 Free Software Foundation, Inc.
2a2ab3f9
JVA
4
5This file is part of GNU CC.
6
7GNU CC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GNU CC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU CC; see the file COPYING. If not, write to
97aadbb9 19the Free Software Foundation, 59 Temple Place - Suite 330,
0f290768 20Boston, MA 02111-1307, USA. */
2a2ab3f9 21
2a2ab3f9 22#include "config.h"
bb5177ac 23#include "system.h"
4977bab6
ZW
24#include "coretypes.h"
25#include "tm.h"
2a2ab3f9 26#include "rtl.h"
6baf1cc8
BS
27#include "tree.h"
28#include "tm_p.h"
2a2ab3f9
JVA
29#include "regs.h"
30#include "hard-reg-set.h"
31#include "real.h"
32#include "insn-config.h"
33#include "conditions.h"
2a2ab3f9
JVA
34#include "output.h"
35#include "insn-attr.h"
2a2ab3f9 36#include "flags.h"
a8ffcc81 37#include "except.h"
ecbc4695 38#include "function.h"
00c79232 39#include "recog.h"
ced8dd8c 40#include "expr.h"
e78d8e51 41#include "optabs.h"
f103890b 42#include "toplev.h"
e075ae69 43#include "basic-block.h"
1526a060 44#include "ggc.h"
672a6f42
NB
45#include "target.h"
46#include "target-def.h"
f1e639b1 47#include "langhooks.h"
2a2ab3f9 48
8dfe5673 49#ifndef CHECK_STACK_LIMIT
07933f72 50#define CHECK_STACK_LIMIT (-1)
8dfe5673
RK
51#endif
52
3c50106f
RH
53/* Return index of given mode in mult and division cost tables. */
54#define MODE_INDEX(mode) \
55 ((mode) == QImode ? 0 \
56 : (mode) == HImode ? 1 \
57 : (mode) == SImode ? 2 \
58 : (mode) == DImode ? 3 \
59 : 4)
60
2ab0437e 61/* Processor costs (relative to an add) */
fce5a9f2 62static const
2ab0437e
JH
63struct processor_costs size_cost = { /* costs for tunning for size */
64 2, /* cost of an add instruction */
65 3, /* cost of a lea instruction */
66 2, /* variable shift costs */
67 3, /* constant shift costs */
4977bab6 68 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
2ab0437e 69 0, /* cost of multiply per each bit set */
4977bab6 70 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
44cf5b6a
JH
71 3, /* cost of movsx */
72 3, /* cost of movzx */
2ab0437e
JH
73 0, /* "large" insn */
74 2, /* MOVE_RATIO */
75 2, /* cost for loading QImode using movzbl */
76 {2, 2, 2}, /* cost of loading integer registers
77 in QImode, HImode and SImode.
78 Relative to reg-reg move (2). */
79 {2, 2, 2}, /* cost of storing integer registers */
80 2, /* cost of reg,reg fld/fst */
81 {2, 2, 2}, /* cost of loading fp registers
82 in SFmode, DFmode and XFmode */
83 {2, 2, 2}, /* cost of loading integer registers */
84 3, /* cost of moving MMX register */
85 {3, 3}, /* cost of loading MMX registers
86 in SImode and DImode */
87 {3, 3}, /* cost of storing MMX registers
88 in SImode and DImode */
89 3, /* cost of moving SSE register */
90 {3, 3, 3}, /* cost of loading SSE registers
91 in SImode, DImode and TImode */
92 {3, 3, 3}, /* cost of storing SSE registers
93 in SImode, DImode and TImode */
94 3, /* MMX or SSE register to integer */
f4365627
JH
95 0, /* size of prefetch block */
96 0, /* number of parallel prefetches */
4977bab6 97 1, /* Branch cost */
229b303a
RS
98 2, /* cost of FADD and FSUB insns. */
99 2, /* cost of FMUL instruction. */
100 2, /* cost of FDIV instruction. */
101 2, /* cost of FABS instruction. */
102 2, /* cost of FCHS instruction. */
103 2, /* cost of FSQRT instruction. */
2ab0437e 104};
229b303a 105
32b5b1aa 106/* Processor costs (relative to an add) */
fce5a9f2 107static const
32b5b1aa 108struct processor_costs i386_cost = { /* 386 specific costs */
e9a25f70 109 1, /* cost of an add instruction */
32b5b1aa
SC
110 1, /* cost of a lea instruction */
111 3, /* variable shift costs */
112 2, /* constant shift costs */
4977bab6 113 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
32b5b1aa 114 1, /* cost of multiply per each bit set */
4977bab6 115 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
44cf5b6a
JH
116 3, /* cost of movsx */
117 2, /* cost of movzx */
96e7ae40 118 15, /* "large" insn */
e2e52e1b 119 3, /* MOVE_RATIO */
7c6b971d 120 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
121 {2, 4, 2}, /* cost of loading integer registers
122 in QImode, HImode and SImode.
0f290768 123 Relative to reg-reg move (2). */
96e7ae40
JH
124 {2, 4, 2}, /* cost of storing integer registers */
125 2, /* cost of reg,reg fld/fst */
126 {8, 8, 8}, /* cost of loading fp registers
127 in SFmode, DFmode and XFmode */
fa79946e
JH
128 {8, 8, 8}, /* cost of loading integer registers */
129 2, /* cost of moving MMX register */
130 {4, 8}, /* cost of loading MMX registers
131 in SImode and DImode */
132 {4, 8}, /* cost of storing MMX registers
133 in SImode and DImode */
134 2, /* cost of moving SSE register */
135 {4, 8, 16}, /* cost of loading SSE registers
136 in SImode, DImode and TImode */
137 {4, 8, 16}, /* cost of storing SSE registers
138 in SImode, DImode and TImode */
139 3, /* MMX or SSE register to integer */
f4365627
JH
140 0, /* size of prefetch block */
141 0, /* number of parallel prefetches */
4977bab6 142 1, /* Branch cost */
229b303a
RS
143 23, /* cost of FADD and FSUB insns. */
144 27, /* cost of FMUL instruction. */
145 88, /* cost of FDIV instruction. */
146 22, /* cost of FABS instruction. */
147 24, /* cost of FCHS instruction. */
148 122, /* cost of FSQRT instruction. */
32b5b1aa
SC
149};
150
fce5a9f2 151static const
32b5b1aa
SC
152struct processor_costs i486_cost = { /* 486 specific costs */
153 1, /* cost of an add instruction */
154 1, /* cost of a lea instruction */
155 3, /* variable shift costs */
156 2, /* constant shift costs */
4977bab6 157 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
32b5b1aa 158 1, /* cost of multiply per each bit set */
4977bab6 159 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
44cf5b6a
JH
160 3, /* cost of movsx */
161 2, /* cost of movzx */
96e7ae40 162 15, /* "large" insn */
e2e52e1b 163 3, /* MOVE_RATIO */
7c6b971d 164 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
165 {2, 4, 2}, /* cost of loading integer registers
166 in QImode, HImode and SImode.
0f290768 167 Relative to reg-reg move (2). */
96e7ae40
JH
168 {2, 4, 2}, /* cost of storing integer registers */
169 2, /* cost of reg,reg fld/fst */
170 {8, 8, 8}, /* cost of loading fp registers
171 in SFmode, DFmode and XFmode */
fa79946e
JH
172 {8, 8, 8}, /* cost of loading integer registers */
173 2, /* cost of moving MMX register */
174 {4, 8}, /* cost of loading MMX registers
175 in SImode and DImode */
176 {4, 8}, /* cost of storing MMX registers
177 in SImode and DImode */
178 2, /* cost of moving SSE register */
179 {4, 8, 16}, /* cost of loading SSE registers
180 in SImode, DImode and TImode */
181 {4, 8, 16}, /* cost of storing SSE registers
182 in SImode, DImode and TImode */
f4365627
JH
183 3, /* MMX or SSE register to integer */
184 0, /* size of prefetch block */
185 0, /* number of parallel prefetches */
4977bab6 186 1, /* Branch cost */
229b303a
RS
187 8, /* cost of FADD and FSUB insns. */
188 16, /* cost of FMUL instruction. */
189 73, /* cost of FDIV instruction. */
190 3, /* cost of FABS instruction. */
191 3, /* cost of FCHS instruction. */
192 83, /* cost of FSQRT instruction. */
32b5b1aa
SC
193};
194
fce5a9f2 195static const
e5cb57e8 196struct processor_costs pentium_cost = {
32b5b1aa
SC
197 1, /* cost of an add instruction */
198 1, /* cost of a lea instruction */
856b07a1 199 4, /* variable shift costs */
e5cb57e8 200 1, /* constant shift costs */
4977bab6 201 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
856b07a1 202 0, /* cost of multiply per each bit set */
4977bab6 203 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
44cf5b6a
JH
204 3, /* cost of movsx */
205 2, /* cost of movzx */
96e7ae40 206 8, /* "large" insn */
e2e52e1b 207 6, /* MOVE_RATIO */
7c6b971d 208 6, /* cost for loading QImode using movzbl */
96e7ae40
JH
209 {2, 4, 2}, /* cost of loading integer registers
210 in QImode, HImode and SImode.
0f290768 211 Relative to reg-reg move (2). */
96e7ae40
JH
212 {2, 4, 2}, /* cost of storing integer registers */
213 2, /* cost of reg,reg fld/fst */
214 {2, 2, 6}, /* cost of loading fp registers
215 in SFmode, DFmode and XFmode */
fa79946e
JH
216 {4, 4, 6}, /* cost of loading integer registers */
217 8, /* cost of moving MMX register */
218 {8, 8}, /* cost of loading MMX registers
219 in SImode and DImode */
220 {8, 8}, /* cost of storing MMX registers
221 in SImode and DImode */
222 2, /* cost of moving SSE register */
223 {4, 8, 16}, /* cost of loading SSE registers
224 in SImode, DImode and TImode */
225 {4, 8, 16}, /* cost of storing SSE registers
226 in SImode, DImode and TImode */
f4365627
JH
227 3, /* MMX or SSE register to integer */
228 0, /* size of prefetch block */
229 0, /* number of parallel prefetches */
4977bab6 230 2, /* Branch cost */
229b303a
RS
231 3, /* cost of FADD and FSUB insns. */
232 3, /* cost of FMUL instruction. */
233 39, /* cost of FDIV instruction. */
234 1, /* cost of FABS instruction. */
235 1, /* cost of FCHS instruction. */
236 70, /* cost of FSQRT instruction. */
32b5b1aa
SC
237};
238
fce5a9f2 239static const
856b07a1
SC
240struct processor_costs pentiumpro_cost = {
241 1, /* cost of an add instruction */
242 1, /* cost of a lea instruction */
e075ae69 243 1, /* variable shift costs */
856b07a1 244 1, /* constant shift costs */
4977bab6 245 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
856b07a1 246 0, /* cost of multiply per each bit set */
4977bab6 247 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
44cf5b6a
JH
248 1, /* cost of movsx */
249 1, /* cost of movzx */
96e7ae40 250 8, /* "large" insn */
e2e52e1b 251 6, /* MOVE_RATIO */
7c6b971d 252 2, /* cost for loading QImode using movzbl */
96e7ae40
JH
253 {4, 4, 4}, /* cost of loading integer registers
254 in QImode, HImode and SImode.
0f290768 255 Relative to reg-reg move (2). */
96e7ae40
JH
256 {2, 2, 2}, /* cost of storing integer registers */
257 2, /* cost of reg,reg fld/fst */
258 {2, 2, 6}, /* cost of loading fp registers
259 in SFmode, DFmode and XFmode */
fa79946e
JH
260 {4, 4, 6}, /* cost of loading integer registers */
261 2, /* cost of moving MMX register */
262 {2, 2}, /* cost of loading MMX registers
263 in SImode and DImode */
264 {2, 2}, /* cost of storing MMX registers
265 in SImode and DImode */
266 2, /* cost of moving SSE register */
267 {2, 2, 8}, /* cost of loading SSE registers
268 in SImode, DImode and TImode */
269 {2, 2, 8}, /* cost of storing SSE registers
270 in SImode, DImode and TImode */
f4365627
JH
271 3, /* MMX or SSE register to integer */
272 32, /* size of prefetch block */
273 6, /* number of parallel prefetches */
4977bab6 274 2, /* Branch cost */
229b303a
RS
275 3, /* cost of FADD and FSUB insns. */
276 5, /* cost of FMUL instruction. */
277 56, /* cost of FDIV instruction. */
278 2, /* cost of FABS instruction. */
279 2, /* cost of FCHS instruction. */
280 56, /* cost of FSQRT instruction. */
856b07a1
SC
281};
282
fce5a9f2 283static const
a269a03c
JC
284struct processor_costs k6_cost = {
285 1, /* cost of an add instruction */
e075ae69 286 2, /* cost of a lea instruction */
a269a03c
JC
287 1, /* variable shift costs */
288 1, /* constant shift costs */
4977bab6 289 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
a269a03c 290 0, /* cost of multiply per each bit set */
4977bab6 291 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
44cf5b6a
JH
292 2, /* cost of movsx */
293 2, /* cost of movzx */
96e7ae40 294 8, /* "large" insn */
e2e52e1b 295 4, /* MOVE_RATIO */
7c6b971d 296 3, /* cost for loading QImode using movzbl */
96e7ae40
JH
297 {4, 5, 4}, /* cost of loading integer registers
298 in QImode, HImode and SImode.
0f290768 299 Relative to reg-reg move (2). */
96e7ae40
JH
300 {2, 3, 2}, /* cost of storing integer registers */
301 4, /* cost of reg,reg fld/fst */
302 {6, 6, 6}, /* cost of loading fp registers
303 in SFmode, DFmode and XFmode */
fa79946e
JH
304 {4, 4, 4}, /* cost of loading integer registers */
305 2, /* cost of moving MMX register */
306 {2, 2}, /* cost of loading MMX registers
307 in SImode and DImode */
308 {2, 2}, /* cost of storing MMX registers
309 in SImode and DImode */
310 2, /* cost of moving SSE register */
311 {2, 2, 8}, /* cost of loading SSE registers
312 in SImode, DImode and TImode */
313 {2, 2, 8}, /* cost of storing SSE registers
314 in SImode, DImode and TImode */
f4365627
JH
315 6, /* MMX or SSE register to integer */
316 32, /* size of prefetch block */
317 1, /* number of parallel prefetches */
4977bab6 318 1, /* Branch cost */
229b303a
RS
319 2, /* cost of FADD and FSUB insns. */
320 2, /* cost of FMUL instruction. */
4f770e7b
RS
321 56, /* cost of FDIV instruction. */
322 2, /* cost of FABS instruction. */
229b303a
RS
323 2, /* cost of FCHS instruction. */
324 56, /* cost of FSQRT instruction. */
a269a03c
JC
325};
326
fce5a9f2 327static const
309ada50
JH
328struct processor_costs athlon_cost = {
329 1, /* cost of an add instruction */
0b5107cf 330 2, /* cost of a lea instruction */
309ada50
JH
331 1, /* variable shift costs */
332 1, /* constant shift costs */
4977bab6 333 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
309ada50 334 0, /* cost of multiply per each bit set */
4977bab6 335 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
44cf5b6a
JH
336 1, /* cost of movsx */
337 1, /* cost of movzx */
309ada50 338 8, /* "large" insn */
e2e52e1b 339 9, /* MOVE_RATIO */
309ada50 340 4, /* cost for loading QImode using movzbl */
b72b1c29 341 {3, 4, 3}, /* cost of loading integer registers
309ada50 342 in QImode, HImode and SImode.
0f290768 343 Relative to reg-reg move (2). */
b72b1c29 344 {3, 4, 3}, /* cost of storing integer registers */
309ada50 345 4, /* cost of reg,reg fld/fst */
b72b1c29 346 {4, 4, 12}, /* cost of loading fp registers
309ada50 347 in SFmode, DFmode and XFmode */
b72b1c29 348 {6, 6, 8}, /* cost of loading integer registers */
fa79946e 349 2, /* cost of moving MMX register */
b72b1c29 350 {4, 4}, /* cost of loading MMX registers
fa79946e 351 in SImode and DImode */
b72b1c29 352 {4, 4}, /* cost of storing MMX registers
fa79946e
JH
353 in SImode and DImode */
354 2, /* cost of moving SSE register */
b72b1c29 355 {4, 4, 6}, /* cost of loading SSE registers
fa79946e 356 in SImode, DImode and TImode */
b72b1c29 357 {4, 4, 5}, /* cost of storing SSE registers
fa79946e 358 in SImode, DImode and TImode */
b72b1c29 359 5, /* MMX or SSE register to integer */
f4365627
JH
360 64, /* size of prefetch block */
361 6, /* number of parallel prefetches */
4977bab6 362 2, /* Branch cost */
229b303a
RS
363 4, /* cost of FADD and FSUB insns. */
364 4, /* cost of FMUL instruction. */
365 24, /* cost of FDIV instruction. */
366 2, /* cost of FABS instruction. */
367 2, /* cost of FCHS instruction. */
368 35, /* cost of FSQRT instruction. */
309ada50
JH
369};
370
4977bab6
ZW
371static const
372struct processor_costs k8_cost = {
373 1, /* cost of an add instruction */
374 2, /* cost of a lea instruction */
375 1, /* variable shift costs */
376 1, /* constant shift costs */
377 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
378 0, /* cost of multiply per each bit set */
379 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
380 1, /* cost of movsx */
381 1, /* cost of movzx */
382 8, /* "large" insn */
383 9, /* MOVE_RATIO */
384 4, /* cost for loading QImode using movzbl */
385 {3, 4, 3}, /* cost of loading integer registers
386 in QImode, HImode and SImode.
387 Relative to reg-reg move (2). */
388 {3, 4, 3}, /* cost of storing integer registers */
389 4, /* cost of reg,reg fld/fst */
390 {4, 4, 12}, /* cost of loading fp registers
391 in SFmode, DFmode and XFmode */
392 {6, 6, 8}, /* cost of loading integer registers */
393 2, /* cost of moving MMX register */
394 {3, 3}, /* cost of loading MMX registers
395 in SImode and DImode */
396 {4, 4}, /* cost of storing MMX registers
397 in SImode and DImode */
398 2, /* cost of moving SSE register */
399 {4, 3, 6}, /* cost of loading SSE registers
400 in SImode, DImode and TImode */
401 {4, 4, 5}, /* cost of storing SSE registers
402 in SImode, DImode and TImode */
403 5, /* MMX or SSE register to integer */
404 64, /* size of prefetch block */
405 6, /* number of parallel prefetches */
406 2, /* Branch cost */
407 4, /* cost of FADD and FSUB insns. */
408 4, /* cost of FMUL instruction. */
409 19, /* cost of FDIV instruction. */
410 2, /* cost of FABS instruction. */
411 2, /* cost of FCHS instruction. */
412 35, /* cost of FSQRT instruction. */
413};
414
fce5a9f2 415static const
b4e89e2d
JH
416struct processor_costs pentium4_cost = {
417 1, /* cost of an add instruction */
418 1, /* cost of a lea instruction */
4977bab6
ZW
419 4, /* variable shift costs */
420 4, /* constant shift costs */
421 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
b4e89e2d 422 0, /* cost of multiply per each bit set */
4977bab6 423 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
44cf5b6a
JH
424 1, /* cost of movsx */
425 1, /* cost of movzx */
b4e89e2d
JH
426 16, /* "large" insn */
427 6, /* MOVE_RATIO */
428 2, /* cost for loading QImode using movzbl */
429 {4, 5, 4}, /* cost of loading integer registers
430 in QImode, HImode and SImode.
431 Relative to reg-reg move (2). */
432 {2, 3, 2}, /* cost of storing integer registers */
433 2, /* cost of reg,reg fld/fst */
434 {2, 2, 6}, /* cost of loading fp registers
435 in SFmode, DFmode and XFmode */
436 {4, 4, 6}, /* cost of loading integer registers */
437 2, /* cost of moving MMX register */
438 {2, 2}, /* cost of loading MMX registers
439 in SImode and DImode */
440 {2, 2}, /* cost of storing MMX registers
441 in SImode and DImode */
442 12, /* cost of moving SSE register */
443 {12, 12, 12}, /* cost of loading SSE registers
444 in SImode, DImode and TImode */
445 {2, 2, 8}, /* cost of storing SSE registers
446 in SImode, DImode and TImode */
447 10, /* MMX or SSE register to integer */
f4365627
JH
448 64, /* size of prefetch block */
449 6, /* number of parallel prefetches */
4977bab6 450 2, /* Branch cost */
229b303a
RS
451 5, /* cost of FADD and FSUB insns. */
452 7, /* cost of FMUL instruction. */
453 43, /* cost of FDIV instruction. */
454 2, /* cost of FABS instruction. */
455 2, /* cost of FCHS instruction. */
456 43, /* cost of FSQRT instruction. */
b4e89e2d
JH
457};
458
8b60264b 459const struct processor_costs *ix86_cost = &pentium_cost;
32b5b1aa 460
a269a03c
JC
461/* Processor feature/optimization bitmasks. */
462#define m_386 (1<<PROCESSOR_I386)
463#define m_486 (1<<PROCESSOR_I486)
464#define m_PENT (1<<PROCESSOR_PENTIUM)
465#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
466#define m_K6 (1<<PROCESSOR_K6)
309ada50 467#define m_ATHLON (1<<PROCESSOR_ATHLON)
b4e89e2d 468#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
4977bab6
ZW
469#define m_K8 (1<<PROCESSOR_K8)
470#define m_ATHLON_K8 (m_K8 | m_ATHLON)
a269a03c 471
4977bab6
ZW
472const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
473const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
a269a03c 474const int x86_zero_extend_with_and = m_486 | m_PENT;
4977bab6 475const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
e075ae69 476const int x86_double_with_add = ~m_386;
a269a03c 477const int x86_use_bit_test = m_386;
4977bab6
ZW
478const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
479const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
480const int x86_3dnow_a = m_ATHLON_K8;
481const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
ef6257cd 482const int x86_branch_hints = m_PENT4;
b4e89e2d 483const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
e075ae69
RH
484const int x86_partial_reg_stall = m_PPRO;
485const int x86_use_loop = m_K6;
4977bab6 486const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
e075ae69
RH
487const int x86_use_mov0 = m_K6;
488const int x86_use_cltd = ~(m_PENT | m_K6);
489const int x86_read_modify_write = ~m_PENT;
490const int x86_read_modify = ~(m_PENT | m_PPRO);
491const int x86_split_long_moves = m_PPRO;
4977bab6 492const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
285464d0 493const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
b4e89e2d 494const int x86_single_stringop = m_386 | m_PENT4;
d9f32422
JH
495const int x86_qimode_math = ~(0);
496const int x86_promote_qi_regs = 0;
497const int x86_himode_math = ~(m_PPRO);
498const int x86_promote_hi_regs = m_PPRO;
4977bab6
ZW
499const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
500const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
501const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
502const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
503const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
504const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
505const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
506const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
507const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
508const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
b972dd02 509const int x86_decompose_lea = m_PENT4;
495333a6 510const int x86_shift1 = ~m_486;
4977bab6
ZW
511const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
512const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
513/* Set for machines where the type and dependencies are resolved on SSE register
d1f87653 514 parts instead of whole registers, so we may maintain just lower part of
4977bab6
ZW
515 scalar values in proper format leaving the upper part undefined. */
516const int x86_sse_partial_regs = m_ATHLON_K8;
517/* Athlon optimizes partial-register FPS special case, thus avoiding the
518 need for extra instructions beforehand */
519const int x86_sse_partial_regs_for_cvtsd2ss = 0;
520const int x86_sse_typeless_stores = m_ATHLON_K8;
521const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
522const int x86_use_ffreep = m_ATHLON_K8;
523const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
a269a03c 524
d1f87653 525/* In case the average insn count for single function invocation is
6ab16dd9
JH
526 lower than this constant, emit fast (but longer) prologue and
527 epilogue code. */
4977bab6 528#define FAST_PROLOGUE_INSN_COUNT 20
5bf0ebab 529
6ab16dd9
JH
530/* Set by prologue expander and used by epilogue expander to determine
531 the style used. */
532static int use_fast_prologue_epilogue;
533
5bf0ebab
RH
534/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
535static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
536static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
537static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
4c0d89b5
RS
538
539/* Array of the smallest class containing reg number REGNO, indexed by
0f290768 540 REGNO. Used by REGNO_REG_CLASS in i386.h. */
4c0d89b5 541
e075ae69 542enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
4c0d89b5
RS
543{
544 /* ax, dx, cx, bx */
ab408a86 545 AREG, DREG, CREG, BREG,
4c0d89b5 546 /* si, di, bp, sp */
e075ae69 547 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
4c0d89b5
RS
548 /* FP registers */
549 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
79325812 550 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
4c0d89b5 551 /* arg pointer */
83774849 552 NON_Q_REGS,
564d80f4 553 /* flags, fpsr, dirflag, frame */
a7180f70
BS
554 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
555 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
556 SSE_REGS, SSE_REGS,
557 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
3d117b30
JH
558 MMX_REGS, MMX_REGS,
559 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
560 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
561 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
562 SSE_REGS, SSE_REGS,
4c0d89b5 563};
c572e5ba 564
3d117b30 565/* The "default" register map used in 32bit mode. */
83774849 566
0f290768 567int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
568{
569 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
570 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
3d117b30 571 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
572 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
573 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
3d117b30
JH
574 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
575 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
576};
577
5bf0ebab
RH
578static int const x86_64_int_parameter_registers[6] =
579{
580 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
581 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
582};
583
584static int const x86_64_int_return_registers[4] =
585{
586 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
587};
53c17031 588
0f7fa3d0
JH
589/* The "default" register map used in 64bit mode. */
590int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
591{
592 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
3de72741 593 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
0f7fa3d0
JH
594 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
595 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
596 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
597 8,9,10,11,12,13,14,15, /* extended integer registers */
598 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
599};
600
83774849
RH
601/* Define the register numbers to be used in Dwarf debugging information.
602 The SVR4 reference port C compiler uses the following register numbers
603 in its Dwarf output code:
604 0 for %eax (gcc regno = 0)
605 1 for %ecx (gcc regno = 2)
606 2 for %edx (gcc regno = 1)
607 3 for %ebx (gcc regno = 3)
608 4 for %esp (gcc regno = 7)
609 5 for %ebp (gcc regno = 6)
610 6 for %esi (gcc regno = 4)
611 7 for %edi (gcc regno = 5)
612 The following three DWARF register numbers are never generated by
613 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
614 believes these numbers have these meanings.
615 8 for %eip (no gcc equivalent)
616 9 for %eflags (gcc regno = 17)
617 10 for %trapno (no gcc equivalent)
618 It is not at all clear how we should number the FP stack registers
619 for the x86 architecture. If the version of SDB on x86/svr4 were
620 a bit less brain dead with respect to floating-point then we would
621 have a precedent to follow with respect to DWARF register numbers
622 for x86 FP registers, but the SDB on x86/svr4 is so completely
623 broken with respect to FP registers that it is hardly worth thinking
624 of it as something to strive for compatibility with.
625 The version of x86/svr4 SDB I have at the moment does (partially)
626 seem to believe that DWARF register number 11 is associated with
627 the x86 register %st(0), but that's about all. Higher DWARF
628 register numbers don't seem to be associated with anything in
629 particular, and even for DWARF regno 11, SDB only seems to under-
630 stand that it should say that a variable lives in %st(0) (when
631 asked via an `=' command) if we said it was in DWARF regno 11,
632 but SDB still prints garbage when asked for the value of the
633 variable in question (via a `/' command).
634 (Also note that the labels SDB prints for various FP stack regs
635 when doing an `x' command are all wrong.)
636 Note that these problems generally don't affect the native SVR4
637 C compiler because it doesn't allow the use of -O with -g and
638 because when it is *not* optimizing, it allocates a memory
639 location for each floating-point variable, and the memory
640 location is what gets described in the DWARF AT_location
641 attribute for the variable in question.
642 Regardless of the severe mental illness of the x86/svr4 SDB, we
643 do something sensible here and we use the following DWARF
644 register numbers. Note that these are all stack-top-relative
645 numbers.
646 11 for %st(0) (gcc regno = 8)
647 12 for %st(1) (gcc regno = 9)
648 13 for %st(2) (gcc regno = 10)
649 14 for %st(3) (gcc regno = 11)
650 15 for %st(4) (gcc regno = 12)
651 16 for %st(5) (gcc regno = 13)
652 17 for %st(6) (gcc regno = 14)
653 18 for %st(7) (gcc regno = 15)
654*/
0f290768 655int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
656{
657 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
658 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
3f3f2124 659 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
660 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
661 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
d1f87653
KH
662 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
663 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
664};
665
c572e5ba
JVA
666/* Test and compare insns in i386.md store the information needed to
667 generate branch and scc insns here. */
668
07933f72
GS
669rtx ix86_compare_op0 = NULL_RTX;
670rtx ix86_compare_op1 = NULL_RTX;
f5316dfe 671
f996902d
RH
672/* The encoding characters for the four TLS models present in ELF. */
673
755ac5d4 674static char const tls_model_chars[] = " GLil";
f996902d 675
7a2e09f4 676#define MAX_386_STACK_LOCALS 3
8362f420
JH
677/* Size of the register save area. */
678#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
36edd3cc
BS
679
680/* Define the structure for the machine field in struct function. */
e2500fed 681struct machine_function GTY(())
36edd3cc
BS
682{
683 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
f996902d 684 const char *some_ld_name;
8362f420 685 int save_varrargs_registers;
6fca22eb 686 int accesses_prev_frame;
36edd3cc
BS
687};
688
01d939e8 689#define ix86_stack_locals (cfun->machine->stack_locals)
8362f420 690#define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
36edd3cc 691
4dd2ac2c
JH
692/* Structure describing stack frame layout.
693 Stack grows downward:
694
695 [arguments]
696 <- ARG_POINTER
697 saved pc
698
699 saved frame pointer if frame_pointer_needed
700 <- HARD_FRAME_POINTER
701 [saved regs]
702
703 [padding1] \
704 )
705 [va_arg registers] (
706 > to_allocate <- FRAME_POINTER
707 [frame] (
708 )
709 [padding2] /
710 */
711struct ix86_frame
712{
713 int nregs;
714 int padding1;
8362f420 715 int va_arg_size;
4dd2ac2c
JH
716 HOST_WIDE_INT frame;
717 int padding2;
718 int outgoing_arguments_size;
8362f420 719 int red_zone_size;
4dd2ac2c
JH
720
721 HOST_WIDE_INT to_allocate;
722 /* The offsets relative to ARG_POINTER. */
723 HOST_WIDE_INT frame_pointer_offset;
724 HOST_WIDE_INT hard_frame_pointer_offset;
725 HOST_WIDE_INT stack_pointer_offset;
726};
727
c93e80a5
JH
728/* Used to enable/disable debugging features. */
729const char *ix86_debug_arg_string, *ix86_debug_addr_string;
6189a572
JH
730/* Code model option as passed by user. */
731const char *ix86_cmodel_string;
732/* Parsed value. */
733enum cmodel ix86_cmodel;
80f33d06
GS
734/* Asm dialect. */
735const char *ix86_asm_string;
736enum asm_dialect ix86_asm_dialect = ASM_ATT;
f996902d
RH
737/* TLS dialext. */
738const char *ix86_tls_dialect_string;
739enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
6189a572 740
5bf0ebab 741/* Which unit we are generating floating point math for. */
965f5423
JH
742enum fpmath_unit ix86_fpmath;
743
5bf0ebab
RH
744/* Which cpu are we scheduling for. */
745enum processor_type ix86_cpu;
746/* Which instruction set architecture to use. */
747enum processor_type ix86_arch;
c8c5cb99
SC
748
749/* Strings to hold which cpu and instruction set architecture to use. */
9c23aa47
ZW
750const char *ix86_cpu_string; /* for -mcpu=<xxx> */
751const char *ix86_arch_string; /* for -march=<xxx> */
965f5423 752const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
c8c5cb99 753
0f290768 754/* # of registers to use to pass arguments. */
e075ae69 755const char *ix86_regparm_string;
e9a25f70 756
f4365627
JH
757/* true if sse prefetch instruction is not NOOP. */
758int x86_prefetch_sse;
759
e075ae69
RH
760/* ix86_regparm_string as a number */
761int ix86_regparm;
e9a25f70
JL
762
763/* Alignment to use for loops and jumps: */
764
0f290768 765/* Power of two alignment for loops. */
e075ae69 766const char *ix86_align_loops_string;
e9a25f70 767
0f290768 768/* Power of two alignment for non-loop jumps. */
e075ae69 769const char *ix86_align_jumps_string;
e9a25f70 770
3af4bd89 771/* Power of two alignment for stack boundary in bytes. */
e075ae69 772const char *ix86_preferred_stack_boundary_string;
3af4bd89
JH
773
774/* Preferred alignment for stack boundary in bits. */
e075ae69 775int ix86_preferred_stack_boundary;
3af4bd89 776
e9a25f70 777/* Values 1-5: see jump.c */
e075ae69
RH
778int ix86_branch_cost;
779const char *ix86_branch_cost_string;
e9a25f70 780
0f290768 781/* Power of two alignment for functions. */
e075ae69 782const char *ix86_align_funcs_string;
623fe810
RH
783
784/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
785static char internal_label_prefix[16];
786static int internal_label_prefix_len;
e075ae69 787\f
623fe810 788static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
f996902d 789static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
f6da8bc3
KG
790static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
791static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
e075ae69 792 int, int, FILE *));
f996902d
RH
793static const char *get_some_local_dynamic_name PARAMS ((void));
794static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
795static rtx maybe_get_pool_constant PARAMS ((rtx));
f6da8bc3 796static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
3a3677ff
RH
797static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
798 rtx *, rtx *));
f996902d 799static rtx get_thread_pointer PARAMS ((void));
145aacc2 800static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
f6da8bc3
KG
801static rtx gen_push PARAMS ((rtx));
802static int memory_address_length PARAMS ((rtx addr));
803static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
804static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
f6da8bc3
KG
805static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
806static void ix86_dump_ppro_packet PARAMS ((FILE *));
807static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
e2500fed 808static struct machine_function * ix86_init_machine_status PARAMS ((void));
2b589241 809static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
b531087a
KH
810static int ix86_nsaved_regs PARAMS ((void));
811static void ix86_emit_save_regs PARAMS ((void));
c6036a37 812static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
37a58036 813static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
bd09bdeb 814static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
0e4970d7 815static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
b531087a 816static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
55efb413 817static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
0945b39d 818static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
0945b39d
JH
819static rtx ix86_expand_aligntest PARAMS ((rtx, int));
820static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
c237e94a
ZW
821static int ix86_issue_rate PARAMS ((void));
822static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
823static void ix86_sched_init PARAMS ((FILE *, int, int));
824static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
825static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
9b690711
RH
826static int ia32_use_dfa_pipeline_interface PARAMS ((void));
827static int ia32_multipass_dfa_lookahead PARAMS ((void));
e37af218 828static void ix86_init_mmx_sse_builtins PARAMS ((void));
3961e8fe
RH
829static rtx x86_this_parameter PARAMS ((tree));
830static void x86_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
831 HOST_WIDE_INT, tree));
832static bool x86_can_output_mi_thunk PARAMS ((tree, HOST_WIDE_INT,
833 HOST_WIDE_INT, tree));
4977bab6 834bool ix86_expand_carry_flag_compare PARAMS ((enum rtx_code, rtx, rtx, rtx*));
e075ae69
RH
835
836struct ix86_address
837{
838 rtx base, index, disp;
839 HOST_WIDE_INT scale;
840};
b08de47e 841
e075ae69 842static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
3a04ff64 843static bool ix86_cannot_force_const_mem PARAMS ((rtx));
bd793c65 844
f996902d
RH
845static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
846static const char *ix86_strip_name_encoding PARAMS ((const char *))
847 ATTRIBUTE_UNUSED;
fb49053f 848
bd793c65 849struct builtin_description;
8b60264b
KG
850static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
851 tree, rtx));
852static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
853 tree, rtx));
bd793c65
BS
854static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
855static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
856static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
e37af218 857static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
bd793c65 858static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
c0c102a9
JH
859static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
860static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
861 enum rtx_code *,
862 enum rtx_code *,
863 enum rtx_code *));
9e7adcb3
JH
864static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
865 rtx *, rtx *));
866static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
867static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
868static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
869static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
bd09bdeb 870static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
9b690711 871static int ix86_save_reg PARAMS ((unsigned int, int));
4dd2ac2c 872static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
8d8e52be 873static int ix86_comp_type_attributes PARAMS ((tree, tree));
483ab821 874static int ix86_fntype_regparm PARAMS ((tree));
91d231cb 875const struct attribute_spec ix86_attribute_table[];
4977bab6 876static bool ix86_function_ok_for_sibcall PARAMS ((tree, tree));
91d231cb
JM
877static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
878static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
b069de3b 879static int ix86_value_regno PARAMS ((enum machine_mode));
4977bab6 880static bool ix86_ms_bitfield_layout_p PARAMS ((tree));
fe77449a 881static tree ix86_handle_struct_attribute PARAMS ((tree *, tree, tree, int, bool *));
4977bab6 882static int extended_reg_mentioned_1 PARAMS ((rtx *, void *));
3c50106f 883static bool ix86_rtx_costs PARAMS ((rtx, int, int, int *));
7c262518 884
21c318ba 885#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
2cc07db4
RH
886static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
887#endif
e56feed6 888
53c17031
JH
889/* Register class used for passing given 64bit part of the argument.
890 These represent classes as documented by the PS ABI, with the exception
891 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
d1f87653 892 use SF or DFmode move instead of DImode to avoid reformatting penalties.
53c17031 893
d1f87653 894 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
53c17031
JH
895 whenever possible (upper half does contain padding).
896 */
897enum x86_64_reg_class
898 {
899 X86_64_NO_CLASS,
900 X86_64_INTEGER_CLASS,
901 X86_64_INTEGERSI_CLASS,
902 X86_64_SSE_CLASS,
903 X86_64_SSESF_CLASS,
904 X86_64_SSEDF_CLASS,
905 X86_64_SSEUP_CLASS,
906 X86_64_X87_CLASS,
907 X86_64_X87UP_CLASS,
908 X86_64_MEMORY_CLASS
909 };
0b5826ac 910static const char * const x86_64_reg_class_name[] =
53c17031
JH
911 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
912
913#define MAX_CLASSES 4
914static int classify_argument PARAMS ((enum machine_mode, tree,
915 enum x86_64_reg_class [MAX_CLASSES],
916 int));
917static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
918 int *));
919static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
07933f72 920 const int *, int));
53c17031
JH
921static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
922 enum x86_64_reg_class));
672a6f42
NB
923\f
924/* Initialize the GCC target structure. */
91d231cb
JM
925#undef TARGET_ATTRIBUTE_TABLE
926#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
672a6f42 927#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
f5f4be42
NB
928# undef TARGET_MERGE_DECL_ATTRIBUTES
929# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
672a6f42
NB
930#endif
931
8d8e52be
JM
932#undef TARGET_COMP_TYPE_ATTRIBUTES
933#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
934
f6155fda
SS
935#undef TARGET_INIT_BUILTINS
936#define TARGET_INIT_BUILTINS ix86_init_builtins
937
938#undef TARGET_EXPAND_BUILTIN
939#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
940
bd09bdeb
RH
941#undef TARGET_ASM_FUNCTION_EPILOGUE
942#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
08c148a8 943
17b53c33
NB
944#undef TARGET_ASM_OPEN_PAREN
945#define TARGET_ASM_OPEN_PAREN ""
946#undef TARGET_ASM_CLOSE_PAREN
947#define TARGET_ASM_CLOSE_PAREN ""
948
301d03af
RS
949#undef TARGET_ASM_ALIGNED_HI_OP
950#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
951#undef TARGET_ASM_ALIGNED_SI_OP
952#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
953#ifdef ASM_QUAD
954#undef TARGET_ASM_ALIGNED_DI_OP
955#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
956#endif
957
958#undef TARGET_ASM_UNALIGNED_HI_OP
959#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
960#undef TARGET_ASM_UNALIGNED_SI_OP
961#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
962#undef TARGET_ASM_UNALIGNED_DI_OP
963#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
964
c237e94a
ZW
965#undef TARGET_SCHED_ADJUST_COST
966#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
967#undef TARGET_SCHED_ISSUE_RATE
968#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
969#undef TARGET_SCHED_VARIABLE_ISSUE
970#define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
971#undef TARGET_SCHED_INIT
972#define TARGET_SCHED_INIT ix86_sched_init
973#undef TARGET_SCHED_REORDER
974#define TARGET_SCHED_REORDER ix86_sched_reorder
fce5a9f2 975#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
9b690711
RH
976#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
977 ia32_use_dfa_pipeline_interface
978#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
979#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
980 ia32_multipass_dfa_lookahead
c237e94a 981
4977bab6
ZW
982#undef TARGET_FUNCTION_OK_FOR_SIBCALL
983#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
984
f996902d
RH
985#ifdef HAVE_AS_TLS
986#undef TARGET_HAVE_TLS
987#define TARGET_HAVE_TLS true
988#endif
3a04ff64
RH
989#undef TARGET_CANNOT_FORCE_CONST_MEM
990#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
f996902d 991
4977bab6
ZW
992#undef TARGET_MS_BITFIELD_LAYOUT_P
993#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
994
c590b625
RH
995#undef TARGET_ASM_OUTPUT_MI_THUNK
996#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
3961e8fe
RH
997#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
998#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
c590b625 999
3c50106f
RH
1000#undef TARGET_RTX_COSTS
1001#define TARGET_RTX_COSTS ix86_rtx_costs
1002
f6897b10 1003struct gcc_target targetm = TARGET_INITIALIZER;
e075ae69 1004\f
f5316dfe
MM
1005/* Sometimes certain combinations of command options do not make
1006 sense on a particular target machine. You can define a macro
1007 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1008 defined, is executed once just after all the command options have
1009 been parsed.
1010
1011 Don't use this macro to turn on various extra optimizations for
1012 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1013
1014void
1015override_options ()
1016{
400500c4 1017 int i;
e075ae69
RH
1018 /* Comes from final.c -- no real reason to change it. */
1019#define MAX_CODE_ALIGN 16
f5316dfe 1020
c8c5cb99
SC
1021 static struct ptt
1022 {
8b60264b
KG
1023 const struct processor_costs *cost; /* Processor costs */
1024 const int target_enable; /* Target flags to enable. */
1025 const int target_disable; /* Target flags to disable. */
1026 const int align_loop; /* Default alignments. */
2cca7283 1027 const int align_loop_max_skip;
8b60264b 1028 const int align_jump;
2cca7283 1029 const int align_jump_max_skip;
8b60264b 1030 const int align_func;
e075ae69 1031 }
0f290768 1032 const processor_target_table[PROCESSOR_max] =
e075ae69 1033 {
4977bab6
ZW
1034 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1035 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1036 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1037 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1038 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1039 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1040 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1041 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
e075ae69
RH
1042 };
1043
f4365627 1044 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
e075ae69
RH
1045 static struct pta
1046 {
8b60264b
KG
1047 const char *const name; /* processor name or nickname. */
1048 const enum processor_type processor;
0dd0e980
JH
1049 const enum pta_flags
1050 {
1051 PTA_SSE = 1,
1052 PTA_SSE2 = 2,
1053 PTA_MMX = 4,
f4365627 1054 PTA_PREFETCH_SSE = 8,
0dd0e980 1055 PTA_3DNOW = 16,
4977bab6
ZW
1056 PTA_3DNOW_A = 64,
1057 PTA_64BIT = 128
0dd0e980 1058 } flags;
e075ae69 1059 }
0f290768 1060 const processor_alias_table[] =
e075ae69 1061 {
0dd0e980
JH
1062 {"i386", PROCESSOR_I386, 0},
1063 {"i486", PROCESSOR_I486, 0},
1064 {"i586", PROCESSOR_PENTIUM, 0},
1065 {"pentium", PROCESSOR_PENTIUM, 0},
1066 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
3780101d
JG
1067 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1068 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1069 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
0dd0e980
JH
1070 {"i686", PROCESSOR_PENTIUMPRO, 0},
1071 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1072 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
f4365627 1073 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
0dd0e980 1074 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
f4365627 1075 PTA_MMX | PTA_PREFETCH_SSE},
0dd0e980
JH
1076 {"k6", PROCESSOR_K6, PTA_MMX},
1077 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1078 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
f4365627 1079 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1080 | PTA_3DNOW_A},
f4365627 1081 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
0dd0e980 1082 | PTA_3DNOW | PTA_3DNOW_A},
f4365627 1083 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1084 | PTA_3DNOW_A | PTA_SSE},
f4365627 1085 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1086 | PTA_3DNOW_A | PTA_SSE},
f4365627 1087 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1088 | PTA_3DNOW_A | PTA_SSE},
4977bab6
ZW
1089 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1090 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
3af4bd89 1091 };
c8c5cb99 1092
ca7558fc 1093 int const pta_size = ARRAY_SIZE (processor_alias_table);
c8c5cb99 1094
3dc85dfb
RH
1095 /* By default our XFmode is the 80-bit extended format. If we have
1096 use TFmode instead, it's also the 80-bit format, but with padding. */
1097 real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format;
1098 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
1099
41ed2237 1100 /* Set the default values for switches whose default depends on TARGET_64BIT
d1f87653 1101 in case they weren't overwritten by command line options. */
55ba61f3
JH
1102 if (TARGET_64BIT)
1103 {
1104 if (flag_omit_frame_pointer == 2)
1105 flag_omit_frame_pointer = 1;
1106 if (flag_asynchronous_unwind_tables == 2)
1107 flag_asynchronous_unwind_tables = 1;
1108 if (flag_pcc_struct_return == 2)
1109 flag_pcc_struct_return = 0;
1110 }
1111 else
1112 {
1113 if (flag_omit_frame_pointer == 2)
1114 flag_omit_frame_pointer = 0;
1115 if (flag_asynchronous_unwind_tables == 2)
1116 flag_asynchronous_unwind_tables = 0;
1117 if (flag_pcc_struct_return == 2)
1118 flag_pcc_struct_return = 1;
1119 }
1120
f5316dfe
MM
1121#ifdef SUBTARGET_OVERRIDE_OPTIONS
1122 SUBTARGET_OVERRIDE_OPTIONS;
1123#endif
1124
f4365627
JH
1125 if (!ix86_cpu_string && ix86_arch_string)
1126 ix86_cpu_string = ix86_arch_string;
1127 if (!ix86_cpu_string)
1128 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
1129 if (!ix86_arch_string)
4977bab6 1130 ix86_arch_string = TARGET_64BIT ? "k8" : "i386";
e075ae69 1131
6189a572
JH
1132 if (ix86_cmodel_string != 0)
1133 {
1134 if (!strcmp (ix86_cmodel_string, "small"))
1135 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1136 else if (flag_pic)
c725bd79 1137 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
6189a572
JH
1138 else if (!strcmp (ix86_cmodel_string, "32"))
1139 ix86_cmodel = CM_32;
1140 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1141 ix86_cmodel = CM_KERNEL;
1142 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1143 ix86_cmodel = CM_MEDIUM;
1144 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1145 ix86_cmodel = CM_LARGE;
1146 else
1147 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1148 }
1149 else
1150 {
1151 ix86_cmodel = CM_32;
1152 if (TARGET_64BIT)
1153 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1154 }
c93e80a5
JH
1155 if (ix86_asm_string != 0)
1156 {
1157 if (!strcmp (ix86_asm_string, "intel"))
1158 ix86_asm_dialect = ASM_INTEL;
1159 else if (!strcmp (ix86_asm_string, "att"))
1160 ix86_asm_dialect = ASM_ATT;
1161 else
1162 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1163 }
6189a572 1164 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
c725bd79 1165 error ("code model `%s' not supported in the %s bit mode",
6189a572
JH
1166 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1167 if (ix86_cmodel == CM_LARGE)
c725bd79 1168 sorry ("code model `large' not supported yet");
0c2dc519 1169 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
c725bd79 1170 sorry ("%i-bit mode not compiled in",
0c2dc519 1171 (target_flags & MASK_64BIT) ? 64 : 32);
6189a572 1172
f4365627
JH
1173 for (i = 0; i < pta_size; i++)
1174 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1175 {
1176 ix86_arch = processor_alias_table[i].processor;
1177 /* Default cpu tuning to the architecture. */
1178 ix86_cpu = ix86_arch;
1179 if (processor_alias_table[i].flags & PTA_MMX
9ef1b13a 1180 && !(target_flags_explicit & MASK_MMX))
f4365627
JH
1181 target_flags |= MASK_MMX;
1182 if (processor_alias_table[i].flags & PTA_3DNOW
9ef1b13a 1183 && !(target_flags_explicit & MASK_3DNOW))
f4365627
JH
1184 target_flags |= MASK_3DNOW;
1185 if (processor_alias_table[i].flags & PTA_3DNOW_A
9ef1b13a 1186 && !(target_flags_explicit & MASK_3DNOW_A))
f4365627
JH
1187 target_flags |= MASK_3DNOW_A;
1188 if (processor_alias_table[i].flags & PTA_SSE
9ef1b13a 1189 && !(target_flags_explicit & MASK_SSE))
f4365627
JH
1190 target_flags |= MASK_SSE;
1191 if (processor_alias_table[i].flags & PTA_SSE2
9ef1b13a 1192 && !(target_flags_explicit & MASK_SSE2))
f4365627
JH
1193 target_flags |= MASK_SSE2;
1194 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1195 x86_prefetch_sse = true;
4977bab6
ZW
1196 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1197 error ("CPU you selected does not support x86-64 instruction set");
f4365627
JH
1198 break;
1199 }
400500c4 1200
f4365627
JH
1201 if (i == pta_size)
1202 error ("bad value (%s) for -march= switch", ix86_arch_string);
e075ae69 1203
f4365627
JH
1204 for (i = 0; i < pta_size; i++)
1205 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1206 {
1207 ix86_cpu = processor_alias_table[i].processor;
4977bab6
ZW
1208 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1209 error ("CPU you selected does not support x86-64 instruction set");
f4365627
JH
1210 break;
1211 }
1212 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1213 x86_prefetch_sse = true;
1214 if (i == pta_size)
1215 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
e075ae69 1216
2ab0437e
JH
1217 if (optimize_size)
1218 ix86_cost = &size_cost;
1219 else
1220 ix86_cost = processor_target_table[ix86_cpu].cost;
e075ae69
RH
1221 target_flags |= processor_target_table[ix86_cpu].target_enable;
1222 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1223
36edd3cc
BS
1224 /* Arrange to set up i386_stack_locals for all functions. */
1225 init_machine_status = ix86_init_machine_status;
fce5a9f2 1226
0f290768 1227 /* Validate -mregparm= value. */
e075ae69 1228 if (ix86_regparm_string)
b08de47e 1229 {
400500c4
RK
1230 i = atoi (ix86_regparm_string);
1231 if (i < 0 || i > REGPARM_MAX)
1232 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1233 else
1234 ix86_regparm = i;
b08de47e 1235 }
0d7d98ee
JH
1236 else
1237 if (TARGET_64BIT)
1238 ix86_regparm = REGPARM_MAX;
b08de47e 1239
3e18fdf6 1240 /* If the user has provided any of the -malign-* options,
a4f31c00 1241 warn and use that value only if -falign-* is not set.
3e18fdf6 1242 Remove this code in GCC 3.2 or later. */
e075ae69 1243 if (ix86_align_loops_string)
b08de47e 1244 {
3e18fdf6
GK
1245 warning ("-malign-loops is obsolete, use -falign-loops");
1246 if (align_loops == 0)
1247 {
1248 i = atoi (ix86_align_loops_string);
1249 if (i < 0 || i > MAX_CODE_ALIGN)
1250 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1251 else
1252 align_loops = 1 << i;
1253 }
b08de47e 1254 }
3af4bd89 1255
e075ae69 1256 if (ix86_align_jumps_string)
b08de47e 1257 {
3e18fdf6
GK
1258 warning ("-malign-jumps is obsolete, use -falign-jumps");
1259 if (align_jumps == 0)
1260 {
1261 i = atoi (ix86_align_jumps_string);
1262 if (i < 0 || i > MAX_CODE_ALIGN)
1263 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1264 else
1265 align_jumps = 1 << i;
1266 }
b08de47e 1267 }
b08de47e 1268
e075ae69 1269 if (ix86_align_funcs_string)
b08de47e 1270 {
3e18fdf6
GK
1271 warning ("-malign-functions is obsolete, use -falign-functions");
1272 if (align_functions == 0)
1273 {
1274 i = atoi (ix86_align_funcs_string);
1275 if (i < 0 || i > MAX_CODE_ALIGN)
1276 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1277 else
1278 align_functions = 1 << i;
1279 }
b08de47e 1280 }
3af4bd89 1281
3e18fdf6 1282 /* Default align_* from the processor table. */
3e18fdf6 1283 if (align_loops == 0)
2cca7283
JH
1284 {
1285 align_loops = processor_target_table[ix86_cpu].align_loop;
1286 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1287 }
3e18fdf6 1288 if (align_jumps == 0)
2cca7283
JH
1289 {
1290 align_jumps = processor_target_table[ix86_cpu].align_jump;
1291 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1292 }
3e18fdf6 1293 if (align_functions == 0)
2cca7283
JH
1294 {
1295 align_functions = processor_target_table[ix86_cpu].align_func;
1296 }
3e18fdf6 1297
e4c0478d 1298 /* Validate -mpreferred-stack-boundary= value, or provide default.
fbb83b43
AO
1299 The default of 128 bits is for Pentium III's SSE __m128, but we
1300 don't want additional code to keep the stack aligned when
1301 optimizing for code size. */
1302 ix86_preferred_stack_boundary = (optimize_size
ef49d42e 1303 ? TARGET_64BIT ? 128 : 32
fbb83b43 1304 : 128);
e075ae69 1305 if (ix86_preferred_stack_boundary_string)
3af4bd89 1306 {
400500c4 1307 i = atoi (ix86_preferred_stack_boundary_string);
ef49d42e 1308 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
c6257c5d 1309 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
ef49d42e 1310 TARGET_64BIT ? 4 : 2);
400500c4
RK
1311 else
1312 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3af4bd89 1313 }
77a989d1 1314
0f290768 1315 /* Validate -mbranch-cost= value, or provide default. */
4977bab6 1316 ix86_branch_cost = processor_target_table[ix86_cpu].cost->branch_cost;
e075ae69 1317 if (ix86_branch_cost_string)
804a8ee0 1318 {
400500c4
RK
1319 i = atoi (ix86_branch_cost_string);
1320 if (i < 0 || i > 5)
1321 error ("-mbranch-cost=%d is not between 0 and 5", i);
1322 else
1323 ix86_branch_cost = i;
804a8ee0 1324 }
804a8ee0 1325
f996902d
RH
1326 if (ix86_tls_dialect_string)
1327 {
1328 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1329 ix86_tls_dialect = TLS_DIALECT_GNU;
1330 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1331 ix86_tls_dialect = TLS_DIALECT_SUN;
1332 else
1333 error ("bad value (%s) for -mtls-dialect= switch",
1334 ix86_tls_dialect_string);
1335 }
1336
e9a25f70
JL
1337 /* Keep nonleaf frame pointers. */
1338 if (TARGET_OMIT_LEAF_FRAME_POINTER)
77a989d1 1339 flag_omit_frame_pointer = 1;
e075ae69
RH
1340
1341 /* If we're doing fast math, we don't care about comparison order
1342 wrt NaNs. This lets us use a shorter comparison sequence. */
de6c5979 1343 if (flag_unsafe_math_optimizations)
e075ae69
RH
1344 target_flags &= ~MASK_IEEE_FP;
1345
30c99a84
RH
1346 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1347 since the insns won't need emulation. */
1348 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1349 target_flags &= ~MASK_NO_FANCY_MATH_387;
1350
14f73b5a
JH
1351 if (TARGET_64BIT)
1352 {
1353 if (TARGET_ALIGN_DOUBLE)
c725bd79 1354 error ("-malign-double makes no sense in the 64bit mode");
14f73b5a 1355 if (TARGET_RTD)
c725bd79 1356 error ("-mrtd calling convention not supported in the 64bit mode");
14f73b5a 1357 /* Enable by default the SSE and MMX builtins. */
965f5423
JH
1358 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1359 ix86_fpmath = FPMATH_SSE;
14f73b5a 1360 }
965f5423
JH
1361 else
1362 ix86_fpmath = FPMATH_387;
1363
1364 if (ix86_fpmath_string != 0)
1365 {
1366 if (! strcmp (ix86_fpmath_string, "387"))
1367 ix86_fpmath = FPMATH_387;
1368 else if (! strcmp (ix86_fpmath_string, "sse"))
1369 {
1370 if (!TARGET_SSE)
1371 {
1372 warning ("SSE instruction set disabled, using 387 arithmetics");
1373 ix86_fpmath = FPMATH_387;
1374 }
1375 else
1376 ix86_fpmath = FPMATH_SSE;
1377 }
1378 else if (! strcmp (ix86_fpmath_string, "387,sse")
1379 || ! strcmp (ix86_fpmath_string, "sse,387"))
1380 {
1381 if (!TARGET_SSE)
1382 {
1383 warning ("SSE instruction set disabled, using 387 arithmetics");
1384 ix86_fpmath = FPMATH_387;
1385 }
1386 else if (!TARGET_80387)
1387 {
1388 warning ("387 instruction set disabled, using SSE arithmetics");
1389 ix86_fpmath = FPMATH_SSE;
1390 }
1391 else
1392 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1393 }
fce5a9f2 1394 else
965f5423
JH
1395 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1396 }
14f73b5a 1397
a7180f70
BS
1398 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1399 on by -msse. */
1400 if (TARGET_SSE)
e37af218
RH
1401 {
1402 target_flags |= MASK_MMX;
1403 x86_prefetch_sse = true;
1404 }
c6036a37 1405
47f339cf
BS
1406 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1407 if (TARGET_3DNOW)
1408 {
1409 target_flags |= MASK_MMX;
d1f87653 1410 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
47f339cf
BS
1411 extensions it adds. */
1412 if (x86_3dnow_a & (1 << ix86_arch))
1413 target_flags |= MASK_3DNOW_A;
1414 }
c6036a37 1415 if ((x86_accumulate_outgoing_args & CPUMASK)
9ef1b13a 1416 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
c6036a37
JH
1417 && !optimize_size)
1418 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
623fe810
RH
1419
1420 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1421 {
1422 char *p;
1423 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1424 p = strchr (internal_label_prefix, 'X');
1425 internal_label_prefix_len = p - internal_label_prefix;
1426 *p = '\0';
1427 }
f5316dfe
MM
1428}
1429\f
32b5b1aa 1430void
c6aded7c 1431optimization_options (level, size)
32b5b1aa 1432 int level;
bb5177ac 1433 int size ATTRIBUTE_UNUSED;
32b5b1aa 1434{
e9a25f70
JL
1435 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1436 make the problem with not enough registers even worse. */
32b5b1aa
SC
1437#ifdef INSN_SCHEDULING
1438 if (level > 1)
1439 flag_schedule_insns = 0;
1440#endif
55ba61f3
JH
1441
1442 /* The default values of these switches depend on the TARGET_64BIT
1443 that is not known at this moment. Mark these values with 2 and
1444 let user the to override these. In case there is no command line option
1445 specifying them, we will set the defaults in override_options. */
1446 if (optimize >= 1)
1447 flag_omit_frame_pointer = 2;
1448 flag_pcc_struct_return = 2;
1449 flag_asynchronous_unwind_tables = 2;
32b5b1aa 1450}
b08de47e 1451\f
91d231cb
JM
1452/* Table of valid machine attributes. */
1453const struct attribute_spec ix86_attribute_table[] =
b08de47e 1454{
91d231cb 1455 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
b08de47e
MM
1456 /* Stdcall attribute says callee is responsible for popping arguments
1457 if they are not variable. */
91d231cb 1458 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
e91f04de
CH
1459 /* Fastcall attribute says callee is responsible for popping arguments
1460 if they are not variable. */
1461 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
91d231cb
JM
1462 /* Cdecl attribute says the callee is a normal C declaration */
1463 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
b08de47e 1464 /* Regparm attribute specifies how many integer arguments are to be
0f290768 1465 passed in registers. */
91d231cb
JM
1466 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1467#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
3da1eb0b
DS
1468 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1469 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1470 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
91d231cb 1471#endif
fe77449a
DR
1472 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1473 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
91d231cb
JM
1474 { NULL, 0, 0, false, false, false, NULL }
1475};
1476
4977bab6
ZW
1477/* If PIC, we cannot make sibling calls to global functions
1478 because the PLT requires %ebx live.
1479 If we are returning floats on the register stack, we cannot make
1480 sibling calls to functions that return floats. (The stack adjust
1481 instruction will wind up after the sibcall jump, and not be executed.) */
1482
1483static bool
1484ix86_function_ok_for_sibcall (decl, exp)
1485 tree decl;
1486 tree exp;
1487{
1488 /* If we are generating position-independent code, we cannot sibcall
1489 optimize any indirect call, or a direct call to a global function,
1490 as the PLT requires %ebx be live. */
1491 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1492 return false;
1493
1494 /* If we are returning floats on the 80387 register stack, we cannot
1495 make a sibcall from a function that doesn't return a float to a
1496 function that does; the necessary stack adjustment will not be
1497 executed. */
1498 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1499 && ! STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1500 return false;
1501
1502 /* If this call is indirect, we'll need to be able to use a call-clobbered
1503 register for the address of the target function. Make sure that all
1504 such registers are not used for passing parameters. */
1505 if (!decl && !TARGET_64BIT)
1506 {
1507 int regparm = ix86_regparm;
1508 tree attr, type;
1509
1510 /* We're looking at the CALL_EXPR, we need the type of the function. */
1511 type = TREE_OPERAND (exp, 0); /* pointer expression */
1512 type = TREE_TYPE (type); /* pointer type */
1513 type = TREE_TYPE (type); /* function type */
1514
1515 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1516 if (attr)
1517 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1518
1519 if (regparm >= 3)
1520 {
1521 /* ??? Need to count the actual number of registers to be used,
1522 not the possible number of registers. Fix later. */
1523 return false;
1524 }
1525 }
1526
1527 /* Otherwise okay. That also includes certain types of indirect calls. */
1528 return true;
1529}
1530
e91f04de 1531/* Handle a "cdecl", "stdcall", or "fastcall" attribute;
91d231cb
JM
1532 arguments as in struct attribute_spec.handler. */
1533static tree
1534ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1535 tree *node;
1536 tree name;
1537 tree args ATTRIBUTE_UNUSED;
1538 int flags ATTRIBUTE_UNUSED;
1539 bool *no_add_attrs;
1540{
1541 if (TREE_CODE (*node) != FUNCTION_TYPE
1542 && TREE_CODE (*node) != METHOD_TYPE
1543 && TREE_CODE (*node) != FIELD_DECL
1544 && TREE_CODE (*node) != TYPE_DECL)
b08de47e 1545 {
91d231cb
JM
1546 warning ("`%s' attribute only applies to functions",
1547 IDENTIFIER_POINTER (name));
1548 *no_add_attrs = true;
1549 }
e91f04de
CH
1550 else
1551 {
1552 if (is_attribute_p ("fastcall", name))
1553 {
1554 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1555 {
1556 error ("fastcall and stdcall attributes are not compatible");
1557 }
1558 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1559 {
1560 error ("fastcall and regparm attributes are not compatible");
1561 }
1562 }
1563 else if (is_attribute_p ("stdcall", name))
1564 {
1565 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1566 {
1567 error ("fastcall and stdcall attributes are not compatible");
1568 }
1569 }
1570 }
b08de47e 1571
91d231cb
JM
1572 if (TARGET_64BIT)
1573 {
1574 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1575 *no_add_attrs = true;
1576 }
b08de47e 1577
91d231cb
JM
1578 return NULL_TREE;
1579}
b08de47e 1580
91d231cb
JM
1581/* Handle a "regparm" attribute;
1582 arguments as in struct attribute_spec.handler. */
1583static tree
1584ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1585 tree *node;
1586 tree name;
1587 tree args;
1588 int flags ATTRIBUTE_UNUSED;
1589 bool *no_add_attrs;
1590{
1591 if (TREE_CODE (*node) != FUNCTION_TYPE
1592 && TREE_CODE (*node) != METHOD_TYPE
1593 && TREE_CODE (*node) != FIELD_DECL
1594 && TREE_CODE (*node) != TYPE_DECL)
1595 {
1596 warning ("`%s' attribute only applies to functions",
1597 IDENTIFIER_POINTER (name));
1598 *no_add_attrs = true;
1599 }
1600 else
1601 {
1602 tree cst;
b08de47e 1603
91d231cb
JM
1604 cst = TREE_VALUE (args);
1605 if (TREE_CODE (cst) != INTEGER_CST)
1606 {
1607 warning ("`%s' attribute requires an integer constant argument",
1608 IDENTIFIER_POINTER (name));
1609 *no_add_attrs = true;
1610 }
1611 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1612 {
1613 warning ("argument to `%s' attribute larger than %d",
1614 IDENTIFIER_POINTER (name), REGPARM_MAX);
1615 *no_add_attrs = true;
1616 }
e91f04de
CH
1617
1618 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1619 {
1620 error ("fastcall and regparm attributes are not compatible");
1621 }
b08de47e
MM
1622 }
1623
91d231cb 1624 return NULL_TREE;
b08de47e
MM
1625}
1626
1627/* Return 0 if the attributes for two types are incompatible, 1 if they
1628 are compatible, and 2 if they are nearly compatible (which causes a
1629 warning to be generated). */
1630
8d8e52be 1631static int
e075ae69 1632ix86_comp_type_attributes (type1, type2)
afcfe58c
MM
1633 tree type1;
1634 tree type2;
b08de47e 1635{
0f290768 1636 /* Check for mismatch of non-default calling convention. */
27c38fbe 1637 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
afcfe58c
MM
1638
1639 if (TREE_CODE (type1) != FUNCTION_TYPE)
1640 return 1;
1641
e91f04de
CH
1642 /* Check for mismatched fastcall types */
1643 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1644 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1645 return 0;
1646
afcfe58c 1647 /* Check for mismatched return types (cdecl vs stdcall). */
6093f019
RH
1648 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1649 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
afcfe58c 1650 return 0;
b08de47e
MM
1651 return 1;
1652}
b08de47e 1653\f
483ab821
MM
1654/* Return the regparm value for a fuctio with the indicated TYPE. */
1655
1656static int
1657ix86_fntype_regparm (type)
1658 tree type;
1659{
1660 tree attr;
1661
1662 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1663 if (attr)
1664 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1665 else
1666 return ix86_regparm;
1667}
1668
b08de47e
MM
1669/* Value is the number of bytes of arguments automatically
1670 popped when returning from a subroutine call.
1671 FUNDECL is the declaration node of the function (as a tree),
1672 FUNTYPE is the data type of the function (as a tree),
1673 or for a library call it is an identifier node for the subroutine name.
1674 SIZE is the number of bytes of arguments passed on the stack.
1675
1676 On the 80386, the RTD insn may be used to pop them if the number
1677 of args is fixed, but if the number is variable then the caller
1678 must pop them all. RTD can't be used for library calls now
1679 because the library is compiled with the Unix compiler.
1680 Use of RTD is a selectable option, since it is incompatible with
1681 standard Unix calling sequences. If the option is not selected,
1682 the caller must always pop the args.
1683
1684 The attribute stdcall is equivalent to RTD on a per module basis. */
1685
1686int
e075ae69 1687ix86_return_pops_args (fundecl, funtype, size)
b08de47e
MM
1688 tree fundecl;
1689 tree funtype;
1690 int size;
79325812 1691{
3345ee7d 1692 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
b08de47e 1693
0f290768 1694 /* Cdecl functions override -mrtd, and never pop the stack. */
e9a25f70 1695 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
79325812 1696
e91f04de
CH
1697 /* Stdcall and fastcall functions will pop the stack if not variable args. */
1698 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1699 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
698cdd84 1700 rtd = 1;
79325812 1701
698cdd84
SC
1702 if (rtd
1703 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
e9a25f70
JL
1704 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1705 == void_type_node)))
698cdd84
SC
1706 return size;
1707 }
79325812 1708
232b8f52 1709 /* Lose any fake structure return argument if it is passed on the stack. */
0d7d98ee
JH
1710 if (aggregate_value_p (TREE_TYPE (funtype))
1711 && !TARGET_64BIT)
232b8f52 1712 {
483ab821 1713 int nregs = ix86_fntype_regparm (funtype);
232b8f52
JJ
1714
1715 if (!nregs)
1716 return GET_MODE_SIZE (Pmode);
1717 }
1718
1719 return 0;
b08de47e 1720}
b08de47e
MM
1721\f
1722/* Argument support functions. */
1723
53c17031
JH
1724/* Return true when register may be used to pass function parameters. */
1725bool
1726ix86_function_arg_regno_p (regno)
1727 int regno;
1728{
1729 int i;
1730 if (!TARGET_64BIT)
0333394e
JJ
1731 return (regno < REGPARM_MAX
1732 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
53c17031
JH
1733 if (SSE_REGNO_P (regno) && TARGET_SSE)
1734 return true;
1735 /* RAX is used as hidden argument to va_arg functions. */
1736 if (!regno)
1737 return true;
1738 for (i = 0; i < REGPARM_MAX; i++)
1739 if (regno == x86_64_int_parameter_registers[i])
1740 return true;
1741 return false;
1742}
1743
b08de47e
MM
1744/* Initialize a variable CUM of type CUMULATIVE_ARGS
1745 for a call to a function whose data type is FNTYPE.
1746 For a library call, FNTYPE is 0. */
1747
1748void
1749init_cumulative_args (cum, fntype, libname)
e9a25f70 1750 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
b08de47e
MM
1751 tree fntype; /* tree ptr for function decl */
1752 rtx libname; /* SYMBOL_REF of library name or 0 */
1753{
1754 static CUMULATIVE_ARGS zero_cum;
1755 tree param, next_param;
1756
1757 if (TARGET_DEBUG_ARG)
1758 {
1759 fprintf (stderr, "\ninit_cumulative_args (");
1760 if (fntype)
e9a25f70
JL
1761 fprintf (stderr, "fntype code = %s, ret code = %s",
1762 tree_code_name[(int) TREE_CODE (fntype)],
1763 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
b08de47e
MM
1764 else
1765 fprintf (stderr, "no fntype");
1766
1767 if (libname)
1768 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1769 }
1770
1771 *cum = zero_cum;
1772
1773 /* Set up the number of registers to use for passing arguments. */
e075ae69 1774 cum->nregs = ix86_regparm;
53c17031
JH
1775 cum->sse_nregs = SSE_REGPARM_MAX;
1776 if (fntype && !TARGET_64BIT)
b08de47e
MM
1777 {
1778 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
e9a25f70 1779
b08de47e
MM
1780 if (attr)
1781 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1782 }
53c17031 1783 cum->maybe_vaarg = false;
b08de47e 1784
e91f04de
CH
1785 /* Use ecx and edx registers if function has fastcall attribute */
1786 if (fntype && !TARGET_64BIT)
1787 {
1788 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1789 {
1790 cum->nregs = 2;
1791 cum->fastcall = 1;
1792 }
1793 }
1794
1795
b08de47e
MM
1796 /* Determine if this function has variable arguments. This is
1797 indicated by the last argument being 'void_type_mode' if there
1798 are no variable arguments. If there are variable arguments, then
1799 we won't pass anything in registers */
1800
1801 if (cum->nregs)
1802 {
1803 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
e9a25f70 1804 param != 0; param = next_param)
b08de47e
MM
1805 {
1806 next_param = TREE_CHAIN (param);
e9a25f70 1807 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
53c17031
JH
1808 {
1809 if (!TARGET_64BIT)
e91f04de
CH
1810 {
1811 cum->nregs = 0;
1812 cum->fastcall = 0;
1813 }
53c17031
JH
1814 cum->maybe_vaarg = true;
1815 }
b08de47e
MM
1816 }
1817 }
53c17031
JH
1818 if ((!fntype && !libname)
1819 || (fntype && !TYPE_ARG_TYPES (fntype)))
1820 cum->maybe_vaarg = 1;
b08de47e
MM
1821
1822 if (TARGET_DEBUG_ARG)
1823 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1824
1825 return;
1826}
1827
d1f87653 1828/* x86-64 register passing implementation. See x86-64 ABI for details. Goal
f710504c 1829 of this code is to classify each 8bytes of incoming argument by the register
53c17031
JH
1830 class and assign registers accordingly. */
1831
1832/* Return the union class of CLASS1 and CLASS2.
1833 See the x86-64 PS ABI for details. */
1834
1835static enum x86_64_reg_class
1836merge_classes (class1, class2)
1837 enum x86_64_reg_class class1, class2;
1838{
1839 /* Rule #1: If both classes are equal, this is the resulting class. */
1840 if (class1 == class2)
1841 return class1;
1842
1843 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1844 the other class. */
1845 if (class1 == X86_64_NO_CLASS)
1846 return class2;
1847 if (class2 == X86_64_NO_CLASS)
1848 return class1;
1849
1850 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1851 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1852 return X86_64_MEMORY_CLASS;
1853
1854 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1855 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1856 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1857 return X86_64_INTEGERSI_CLASS;
1858 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1859 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1860 return X86_64_INTEGER_CLASS;
1861
1862 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1863 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1864 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1865 return X86_64_MEMORY_CLASS;
1866
1867 /* Rule #6: Otherwise class SSE is used. */
1868 return X86_64_SSE_CLASS;
1869}
1870
1871/* Classify the argument of type TYPE and mode MODE.
1872 CLASSES will be filled by the register class used to pass each word
1873 of the operand. The number of words is returned. In case the parameter
1874 should be passed in memory, 0 is returned. As a special case for zero
1875 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1876
1877 BIT_OFFSET is used internally for handling records and specifies offset
1878 of the offset in bits modulo 256 to avoid overflow cases.
1879
1880 See the x86-64 PS ABI for details.
1881*/
1882
1883static int
1884classify_argument (mode, type, classes, bit_offset)
1885 enum machine_mode mode;
1886 tree type;
1887 enum x86_64_reg_class classes[MAX_CLASSES];
1888 int bit_offset;
1889{
1890 int bytes =
1891 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
23327dae 1892 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
53c17031 1893
c60ee6f5
JH
1894 /* Variable sized entities are always passed/returned in memory. */
1895 if (bytes < 0)
1896 return 0;
1897
53c17031
JH
1898 if (type && AGGREGATE_TYPE_P (type))
1899 {
1900 int i;
1901 tree field;
1902 enum x86_64_reg_class subclasses[MAX_CLASSES];
1903
1904 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1905 if (bytes > 16)
1906 return 0;
1907
1908 for (i = 0; i < words; i++)
1909 classes[i] = X86_64_NO_CLASS;
1910
1911 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1912 signalize memory class, so handle it as special case. */
1913 if (!words)
1914 {
1915 classes[0] = X86_64_NO_CLASS;
1916 return 1;
1917 }
1918
1919 /* Classify each field of record and merge classes. */
1920 if (TREE_CODE (type) == RECORD_TYPE)
1921 {
91ea38f9
JH
1922 /* For classes first merge in the field of the subclasses. */
1923 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1924 {
1925 tree bases = TYPE_BINFO_BASETYPES (type);
1926 int n_bases = TREE_VEC_LENGTH (bases);
1927 int i;
1928
1929 for (i = 0; i < n_bases; ++i)
1930 {
1931 tree binfo = TREE_VEC_ELT (bases, i);
1932 int num;
1933 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1934 tree type = BINFO_TYPE (binfo);
1935
1936 num = classify_argument (TYPE_MODE (type),
1937 type, subclasses,
1938 (offset + bit_offset) % 256);
1939 if (!num)
1940 return 0;
1941 for (i = 0; i < num; i++)
1942 {
db01f480 1943 int pos = (offset + (bit_offset % 64)) / 8 / 8;
91ea38f9
JH
1944 classes[i + pos] =
1945 merge_classes (subclasses[i], classes[i + pos]);
1946 }
1947 }
1948 }
1949 /* And now merge the fields of structure. */
53c17031
JH
1950 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1951 {
1952 if (TREE_CODE (field) == FIELD_DECL)
1953 {
1954 int num;
1955
1956 /* Bitfields are always classified as integer. Handle them
1957 early, since later code would consider them to be
1958 misaligned integers. */
1959 if (DECL_BIT_FIELD (field))
1960 {
1961 for (i = int_bit_position (field) / 8 / 8;
1962 i < (int_bit_position (field)
1963 + tree_low_cst (DECL_SIZE (field), 0)
1964 + 63) / 8 / 8; i++)
1965 classes[i] =
1966 merge_classes (X86_64_INTEGER_CLASS,
1967 classes[i]);
1968 }
1969 else
1970 {
1971 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1972 TREE_TYPE (field), subclasses,
1973 (int_bit_position (field)
1974 + bit_offset) % 256);
1975 if (!num)
1976 return 0;
1977 for (i = 0; i < num; i++)
1978 {
1979 int pos =
db01f480 1980 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
53c17031
JH
1981 classes[i + pos] =
1982 merge_classes (subclasses[i], classes[i + pos]);
1983 }
1984 }
1985 }
1986 }
1987 }
1988 /* Arrays are handled as small records. */
1989 else if (TREE_CODE (type) == ARRAY_TYPE)
1990 {
1991 int num;
1992 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1993 TREE_TYPE (type), subclasses, bit_offset);
1994 if (!num)
1995 return 0;
1996
1997 /* The partial classes are now full classes. */
1998 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1999 subclasses[0] = X86_64_SSE_CLASS;
2000 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2001 subclasses[0] = X86_64_INTEGER_CLASS;
2002
2003 for (i = 0; i < words; i++)
2004 classes[i] = subclasses[i % num];
2005 }
2006 /* Unions are similar to RECORD_TYPE but offset is always 0. */
e4dbaed5
AS
2007 else if (TREE_CODE (type) == UNION_TYPE
2008 || TREE_CODE (type) == QUAL_UNION_TYPE)
53c17031 2009 {
91ea38f9
JH
2010 /* For classes first merge in the field of the subclasses. */
2011 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2012 {
2013 tree bases = TYPE_BINFO_BASETYPES (type);
2014 int n_bases = TREE_VEC_LENGTH (bases);
2015 int i;
2016
2017 for (i = 0; i < n_bases; ++i)
2018 {
2019 tree binfo = TREE_VEC_ELT (bases, i);
2020 int num;
2021 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2022 tree type = BINFO_TYPE (binfo);
2023
2024 num = classify_argument (TYPE_MODE (type),
2025 type, subclasses,
db01f480 2026 (offset + (bit_offset % 64)) % 256);
91ea38f9
JH
2027 if (!num)
2028 return 0;
2029 for (i = 0; i < num; i++)
2030 {
c16576e6 2031 int pos = (offset + (bit_offset % 64)) / 8 / 8;
91ea38f9
JH
2032 classes[i + pos] =
2033 merge_classes (subclasses[i], classes[i + pos]);
2034 }
2035 }
2036 }
53c17031
JH
2037 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2038 {
2039 if (TREE_CODE (field) == FIELD_DECL)
2040 {
2041 int num;
2042 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2043 TREE_TYPE (field), subclasses,
2044 bit_offset);
2045 if (!num)
2046 return 0;
2047 for (i = 0; i < num; i++)
2048 classes[i] = merge_classes (subclasses[i], classes[i]);
2049 }
2050 }
2051 }
2052 else
2053 abort ();
2054
2055 /* Final merger cleanup. */
2056 for (i = 0; i < words; i++)
2057 {
2058 /* If one class is MEMORY, everything should be passed in
2059 memory. */
2060 if (classes[i] == X86_64_MEMORY_CLASS)
2061 return 0;
2062
d6a7951f 2063 /* The X86_64_SSEUP_CLASS should be always preceded by
53c17031
JH
2064 X86_64_SSE_CLASS. */
2065 if (classes[i] == X86_64_SSEUP_CLASS
2066 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2067 classes[i] = X86_64_SSE_CLASS;
2068
d6a7951f 2069 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
53c17031
JH
2070 if (classes[i] == X86_64_X87UP_CLASS
2071 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2072 classes[i] = X86_64_SSE_CLASS;
2073 }
2074 return words;
2075 }
2076
2077 /* Compute alignment needed. We align all types to natural boundaries with
2078 exception of XFmode that is aligned to 64bits. */
2079 if (mode != VOIDmode && mode != BLKmode)
2080 {
2081 int mode_alignment = GET_MODE_BITSIZE (mode);
2082
2083 if (mode == XFmode)
2084 mode_alignment = 128;
2085 else if (mode == XCmode)
2086 mode_alignment = 256;
f5143c46 2087 /* Misaligned fields are always returned in memory. */
53c17031
JH
2088 if (bit_offset % mode_alignment)
2089 return 0;
2090 }
2091
2092 /* Classification of atomic types. */
2093 switch (mode)
2094 {
2095 case DImode:
2096 case SImode:
2097 case HImode:
2098 case QImode:
2099 case CSImode:
2100 case CHImode:
2101 case CQImode:
2102 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2103 classes[0] = X86_64_INTEGERSI_CLASS;
2104 else
2105 classes[0] = X86_64_INTEGER_CLASS;
2106 return 1;
2107 case CDImode:
2108 case TImode:
2109 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2110 return 2;
2111 case CTImode:
2112 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2113 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2114 return 4;
2115 case SFmode:
2116 if (!(bit_offset % 64))
2117 classes[0] = X86_64_SSESF_CLASS;
2118 else
2119 classes[0] = X86_64_SSE_CLASS;
2120 return 1;
2121 case DFmode:
2122 classes[0] = X86_64_SSEDF_CLASS;
2123 return 1;
2124 case TFmode:
2125 classes[0] = X86_64_X87_CLASS;
2126 classes[1] = X86_64_X87UP_CLASS;
2127 return 2;
2128 case TCmode:
2129 classes[0] = X86_64_X87_CLASS;
2130 classes[1] = X86_64_X87UP_CLASS;
2131 classes[2] = X86_64_X87_CLASS;
2132 classes[3] = X86_64_X87UP_CLASS;
2133 return 4;
2134 case DCmode:
2135 classes[0] = X86_64_SSEDF_CLASS;
2136 classes[1] = X86_64_SSEDF_CLASS;
2137 return 2;
2138 case SCmode:
2139 classes[0] = X86_64_SSE_CLASS;
2140 return 1;
e95d6b23
JH
2141 case V4SFmode:
2142 case V4SImode:
495333a6
JH
2143 case V16QImode:
2144 case V8HImode:
2145 case V2DFmode:
2146 case V2DImode:
e95d6b23
JH
2147 classes[0] = X86_64_SSE_CLASS;
2148 classes[1] = X86_64_SSEUP_CLASS;
2149 return 2;
2150 case V2SFmode:
2151 case V2SImode:
2152 case V4HImode:
2153 case V8QImode:
1194ca05 2154 return 0;
53c17031 2155 case BLKmode:
e95d6b23 2156 case VOIDmode:
53c17031
JH
2157 return 0;
2158 default:
2159 abort ();
2160 }
2161}
2162
2163/* Examine the argument and return set number of register required in each
f5143c46 2164 class. Return 0 iff parameter should be passed in memory. */
53c17031
JH
2165static int
2166examine_argument (mode, type, in_return, int_nregs, sse_nregs)
2167 enum machine_mode mode;
2168 tree type;
2169 int *int_nregs, *sse_nregs;
2170 int in_return;
2171{
2172 enum x86_64_reg_class class[MAX_CLASSES];
2173 int n = classify_argument (mode, type, class, 0);
2174
2175 *int_nregs = 0;
2176 *sse_nregs = 0;
2177 if (!n)
2178 return 0;
2179 for (n--; n >= 0; n--)
2180 switch (class[n])
2181 {
2182 case X86_64_INTEGER_CLASS:
2183 case X86_64_INTEGERSI_CLASS:
2184 (*int_nregs)++;
2185 break;
2186 case X86_64_SSE_CLASS:
2187 case X86_64_SSESF_CLASS:
2188 case X86_64_SSEDF_CLASS:
2189 (*sse_nregs)++;
2190 break;
2191 case X86_64_NO_CLASS:
2192 case X86_64_SSEUP_CLASS:
2193 break;
2194 case X86_64_X87_CLASS:
2195 case X86_64_X87UP_CLASS:
2196 if (!in_return)
2197 return 0;
2198 break;
2199 case X86_64_MEMORY_CLASS:
2200 abort ();
2201 }
2202 return 1;
2203}
2204/* Construct container for the argument used by GCC interface. See
2205 FUNCTION_ARG for the detailed description. */
2206static rtx
2207construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
2208 enum machine_mode mode;
2209 tree type;
2210 int in_return;
2211 int nintregs, nsseregs;
07933f72
GS
2212 const int * intreg;
2213 int sse_regno;
53c17031
JH
2214{
2215 enum machine_mode tmpmode;
2216 int bytes =
2217 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2218 enum x86_64_reg_class class[MAX_CLASSES];
2219 int n;
2220 int i;
2221 int nexps = 0;
2222 int needed_sseregs, needed_intregs;
2223 rtx exp[MAX_CLASSES];
2224 rtx ret;
2225
2226 n = classify_argument (mode, type, class, 0);
2227 if (TARGET_DEBUG_ARG)
2228 {
2229 if (!n)
2230 fprintf (stderr, "Memory class\n");
2231 else
2232 {
2233 fprintf (stderr, "Classes:");
2234 for (i = 0; i < n; i++)
2235 {
2236 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2237 }
2238 fprintf (stderr, "\n");
2239 }
2240 }
2241 if (!n)
2242 return NULL;
2243 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2244 return NULL;
2245 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2246 return NULL;
2247
2248 /* First construct simple cases. Avoid SCmode, since we want to use
2249 single register to pass this type. */
2250 if (n == 1 && mode != SCmode)
2251 switch (class[0])
2252 {
2253 case X86_64_INTEGER_CLASS:
2254 case X86_64_INTEGERSI_CLASS:
2255 return gen_rtx_REG (mode, intreg[0]);
2256 case X86_64_SSE_CLASS:
2257 case X86_64_SSESF_CLASS:
2258 case X86_64_SSEDF_CLASS:
2259 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2260 case X86_64_X87_CLASS:
2261 return gen_rtx_REG (mode, FIRST_STACK_REG);
2262 case X86_64_NO_CLASS:
2263 /* Zero sized array, struct or class. */
2264 return NULL;
2265 default:
2266 abort ();
2267 }
2268 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
e95d6b23 2269 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
53c17031
JH
2270 if (n == 2
2271 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2272 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2273 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2274 && class[1] == X86_64_INTEGER_CLASS
2275 && (mode == CDImode || mode == TImode)
2276 && intreg[0] + 1 == intreg[1])
2277 return gen_rtx_REG (mode, intreg[0]);
2278 if (n == 4
2279 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2280 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2281 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2282
2283 /* Otherwise figure out the entries of the PARALLEL. */
2284 for (i = 0; i < n; i++)
2285 {
2286 switch (class[i])
2287 {
2288 case X86_64_NO_CLASS:
2289 break;
2290 case X86_64_INTEGER_CLASS:
2291 case X86_64_INTEGERSI_CLASS:
d1f87653 2292 /* Merge TImodes on aligned occasions here too. */
53c17031
JH
2293 if (i * 8 + 8 > bytes)
2294 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2295 else if (class[i] == X86_64_INTEGERSI_CLASS)
2296 tmpmode = SImode;
2297 else
2298 tmpmode = DImode;
2299 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2300 if (tmpmode == BLKmode)
2301 tmpmode = DImode;
2302 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2303 gen_rtx_REG (tmpmode, *intreg),
2304 GEN_INT (i*8));
2305 intreg++;
2306 break;
2307 case X86_64_SSESF_CLASS:
2308 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2309 gen_rtx_REG (SFmode,
2310 SSE_REGNO (sse_regno)),
2311 GEN_INT (i*8));
2312 sse_regno++;
2313 break;
2314 case X86_64_SSEDF_CLASS:
2315 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2316 gen_rtx_REG (DFmode,
2317 SSE_REGNO (sse_regno)),
2318 GEN_INT (i*8));
2319 sse_regno++;
2320 break;
2321 case X86_64_SSE_CLASS:
12f5c45e
JH
2322 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2323 tmpmode = TImode;
53c17031
JH
2324 else
2325 tmpmode = DImode;
2326 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2327 gen_rtx_REG (tmpmode,
2328 SSE_REGNO (sse_regno)),
2329 GEN_INT (i*8));
12f5c45e
JH
2330 if (tmpmode == TImode)
2331 i++;
53c17031
JH
2332 sse_regno++;
2333 break;
2334 default:
2335 abort ();
2336 }
2337 }
2338 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2339 for (i = 0; i < nexps; i++)
2340 XVECEXP (ret, 0, i) = exp [i];
2341 return ret;
2342}
2343
b08de47e
MM
2344/* Update the data in CUM to advance over an argument
2345 of mode MODE and data type TYPE.
2346 (TYPE is null for libcalls where that information may not be available.) */
2347
2348void
2349function_arg_advance (cum, mode, type, named)
2350 CUMULATIVE_ARGS *cum; /* current arg information */
2351 enum machine_mode mode; /* current arg mode */
2352 tree type; /* type of the argument or 0 if lib support */
2353 int named; /* whether or not the argument was named */
2354{
5ac9118e
KG
2355 int bytes =
2356 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
2357 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2358
2359 if (TARGET_DEBUG_ARG)
2360 fprintf (stderr,
e9a25f70 2361 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
b08de47e 2362 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
53c17031 2363 if (TARGET_64BIT)
b08de47e 2364 {
53c17031
JH
2365 int int_nregs, sse_nregs;
2366 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2367 cum->words += words;
2368 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
82a127a9 2369 {
53c17031
JH
2370 cum->nregs -= int_nregs;
2371 cum->sse_nregs -= sse_nregs;
2372 cum->regno += int_nregs;
2373 cum->sse_regno += sse_nregs;
82a127a9 2374 }
53c17031
JH
2375 else
2376 cum->words += words;
b08de47e 2377 }
a4f31c00 2378 else
82a127a9 2379 {
53c17031
JH
2380 if (TARGET_SSE && mode == TImode)
2381 {
2382 cum->sse_words += words;
2383 cum->sse_nregs -= 1;
2384 cum->sse_regno += 1;
2385 if (cum->sse_nregs <= 0)
2386 {
2387 cum->sse_nregs = 0;
2388 cum->sse_regno = 0;
2389 }
2390 }
2391 else
82a127a9 2392 {
53c17031
JH
2393 cum->words += words;
2394 cum->nregs -= words;
2395 cum->regno += words;
2396
2397 if (cum->nregs <= 0)
2398 {
2399 cum->nregs = 0;
2400 cum->regno = 0;
2401 }
82a127a9
CM
2402 }
2403 }
b08de47e
MM
2404 return;
2405}
2406
2407/* Define where to put the arguments to a function.
2408 Value is zero to push the argument on the stack,
2409 or a hard register in which to store the argument.
2410
2411 MODE is the argument's machine mode.
2412 TYPE is the data type of the argument (as a tree).
2413 This is null for libcalls where that information may
2414 not be available.
2415 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2416 the preceding args and about the function being called.
2417 NAMED is nonzero if this argument is a named parameter
2418 (otherwise it is an extra parameter matching an ellipsis). */
2419
07933f72 2420rtx
b08de47e
MM
2421function_arg (cum, mode, type, named)
2422 CUMULATIVE_ARGS *cum; /* current arg information */
2423 enum machine_mode mode; /* current arg mode */
2424 tree type; /* type of the argument or 0 if lib support */
2425 int named; /* != 0 for normal args, == 0 for ... args */
2426{
2427 rtx ret = NULL_RTX;
5ac9118e
KG
2428 int bytes =
2429 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
2430 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2431
5bdc5878 2432 /* Handle a hidden AL argument containing number of registers for varargs
53c17031
JH
2433 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2434 any AL settings. */
32ee7d1d 2435 if (mode == VOIDmode)
b08de47e 2436 {
53c17031
JH
2437 if (TARGET_64BIT)
2438 return GEN_INT (cum->maybe_vaarg
2439 ? (cum->sse_nregs < 0
2440 ? SSE_REGPARM_MAX
2441 : cum->sse_regno)
2442 : -1);
2443 else
2444 return constm1_rtx;
b08de47e 2445 }
53c17031
JH
2446 if (TARGET_64BIT)
2447 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2448 &x86_64_int_parameter_registers [cum->regno],
2449 cum->sse_regno);
2450 else
2451 switch (mode)
2452 {
2453 /* For now, pass fp/complex values on the stack. */
2454 default:
2455 break;
2456
2457 case BLKmode:
2458 case DImode:
2459 case SImode:
2460 case HImode:
2461 case QImode:
2462 if (words <= cum->nregs)
e91f04de
CH
2463 {
2464 int regno = cum->regno;
2465
2466 /* Fastcall allocates the first two DWORD (SImode) or
2467 smaller arguments to ECX and EDX. */
2468 if (cum->fastcall)
2469 {
2470 if (mode == BLKmode || mode == DImode)
2471 break;
2472
2473 /* ECX not EAX is the first allocated register. */
2474 if (regno == 0)
2475 regno = 2;
2476 }
2477 ret = gen_rtx_REG (mode, regno);
2478 }
53c17031
JH
2479 break;
2480 case TImode:
2481 if (cum->sse_nregs)
2482 ret = gen_rtx_REG (mode, cum->sse_regno);
2483 break;
2484 }
b08de47e
MM
2485
2486 if (TARGET_DEBUG_ARG)
2487 {
2488 fprintf (stderr,
91ea38f9 2489 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
b08de47e
MM
2490 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2491
2492 if (ret)
91ea38f9 2493 print_simple_rtl (stderr, ret);
b08de47e
MM
2494 else
2495 fprintf (stderr, ", stack");
2496
2497 fprintf (stderr, " )\n");
2498 }
2499
2500 return ret;
2501}
53c17031 2502
09b2e78d
ZD
2503/* A C expression that indicates when an argument must be passed by
2504 reference. If nonzero for an argument, a copy of that argument is
2505 made in memory and a pointer to the argument is passed instead of
2506 the argument itself. The pointer is passed in whatever way is
2507 appropriate for passing a pointer to that type. */
2508
2509int
2510function_arg_pass_by_reference (cum, mode, type, named)
2511 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
2512 enum machine_mode mode ATTRIBUTE_UNUSED;
2513 tree type;
2514 int named ATTRIBUTE_UNUSED;
2515{
2516 if (!TARGET_64BIT)
2517 return 0;
2518
2519 if (type && int_size_in_bytes (type) == -1)
2520 {
2521 if (TARGET_DEBUG_ARG)
2522 fprintf (stderr, "function_arg_pass_by_reference\n");
2523 return 1;
2524 }
2525
2526 return 0;
2527}
2528
53c17031
JH
2529/* Gives the alignment boundary, in bits, of an argument with the specified mode
2530 and type. */
2531
2532int
2533ix86_function_arg_boundary (mode, type)
2534 enum machine_mode mode;
2535 tree type;
2536{
2537 int align;
2538 if (!TARGET_64BIT)
2539 return PARM_BOUNDARY;
2540 if (type)
2541 align = TYPE_ALIGN (type);
2542 else
2543 align = GET_MODE_ALIGNMENT (mode);
2544 if (align < PARM_BOUNDARY)
2545 align = PARM_BOUNDARY;
2546 if (align > 128)
2547 align = 128;
2548 return align;
2549}
2550
2551/* Return true if N is a possible register number of function value. */
2552bool
2553ix86_function_value_regno_p (regno)
2554 int regno;
2555{
2556 if (!TARGET_64BIT)
2557 {
2558 return ((regno) == 0
2559 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2560 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2561 }
2562 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2563 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2564 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2565}
2566
2567/* Define how to find the value returned by a function.
2568 VALTYPE is the data type of the value (as a tree).
2569 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2570 otherwise, FUNC is 0. */
2571rtx
2572ix86_function_value (valtype)
2573 tree valtype;
2574{
2575 if (TARGET_64BIT)
2576 {
2577 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2578 REGPARM_MAX, SSE_REGPARM_MAX,
2579 x86_64_int_return_registers, 0);
d1f87653
KH
2580 /* For zero sized structures, construct_container return NULL, but we need
2581 to keep rest of compiler happy by returning meaningful value. */
53c17031
JH
2582 if (!ret)
2583 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2584 return ret;
2585 }
2586 else
b069de3b
SS
2587 return gen_rtx_REG (TYPE_MODE (valtype),
2588 ix86_value_regno (TYPE_MODE (valtype)));
53c17031
JH
2589}
2590
f5143c46 2591/* Return false iff type is returned in memory. */
53c17031
JH
2592int
2593ix86_return_in_memory (type)
2594 tree type;
2595{
2596 int needed_intregs, needed_sseregs;
2597 if (TARGET_64BIT)
2598 {
2599 return !examine_argument (TYPE_MODE (type), type, 1,
2600 &needed_intregs, &needed_sseregs);
2601 }
2602 else
2603 {
2604 if (TYPE_MODE (type) == BLKmode
2605 || (VECTOR_MODE_P (TYPE_MODE (type))
2606 && int_size_in_bytes (type) == 8)
2607 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2608 && TYPE_MODE (type) != TFmode
2609 && !VECTOR_MODE_P (TYPE_MODE (type))))
2610 return 1;
2611 return 0;
2612 }
2613}
2614
2615/* Define how to find the value returned by a library function
2616 assuming the value has mode MODE. */
2617rtx
2618ix86_libcall_value (mode)
2619 enum machine_mode mode;
2620{
2621 if (TARGET_64BIT)
2622 {
2623 switch (mode)
2624 {
2625 case SFmode:
2626 case SCmode:
2627 case DFmode:
2628 case DCmode:
2629 return gen_rtx_REG (mode, FIRST_SSE_REG);
2630 case TFmode:
2631 case TCmode:
2632 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2633 default:
2634 return gen_rtx_REG (mode, 0);
2635 }
2636 }
2637 else
b069de3b
SS
2638 return gen_rtx_REG (mode, ix86_value_regno (mode));
2639}
2640
2641/* Given a mode, return the register to use for a return value. */
2642
2643static int
2644ix86_value_regno (mode)
2645 enum machine_mode mode;
2646{
2647 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2648 return FIRST_FLOAT_REG;
2649 if (mode == TImode || VECTOR_MODE_P (mode))
2650 return FIRST_SSE_REG;
2651 return 0;
53c17031 2652}
ad919812
JH
2653\f
2654/* Create the va_list data type. */
53c17031 2655
ad919812
JH
2656tree
2657ix86_build_va_list ()
2658{
2659 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
53c17031 2660
ad919812
JH
2661 /* For i386 we use plain pointer to argument area. */
2662 if (!TARGET_64BIT)
2663 return build_pointer_type (char_type_node);
2664
f1e639b1 2665 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
ad919812
JH
2666 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2667
fce5a9f2 2668 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
ad919812 2669 unsigned_type_node);
fce5a9f2 2670 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
ad919812
JH
2671 unsigned_type_node);
2672 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2673 ptr_type_node);
2674 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2675 ptr_type_node);
2676
2677 DECL_FIELD_CONTEXT (f_gpr) = record;
2678 DECL_FIELD_CONTEXT (f_fpr) = record;
2679 DECL_FIELD_CONTEXT (f_ovf) = record;
2680 DECL_FIELD_CONTEXT (f_sav) = record;
2681
2682 TREE_CHAIN (record) = type_decl;
2683 TYPE_NAME (record) = type_decl;
2684 TYPE_FIELDS (record) = f_gpr;
2685 TREE_CHAIN (f_gpr) = f_fpr;
2686 TREE_CHAIN (f_fpr) = f_ovf;
2687 TREE_CHAIN (f_ovf) = f_sav;
2688
2689 layout_type (record);
2690
2691 /* The correct type is an array type of one element. */
2692 return build_array_type (record, build_index_type (size_zero_node));
2693}
2694
2695/* Perform any needed actions needed for a function that is receiving a
fce5a9f2 2696 variable number of arguments.
ad919812
JH
2697
2698 CUM is as above.
2699
2700 MODE and TYPE are the mode and type of the current parameter.
2701
2702 PRETEND_SIZE is a variable that should be set to the amount of stack
2703 that must be pushed by the prolog to pretend that our caller pushed
2704 it.
2705
2706 Normally, this macro will push all remaining incoming registers on the
2707 stack and set PRETEND_SIZE to the length of the registers pushed. */
2708
2709void
2710ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2711 CUMULATIVE_ARGS *cum;
2712 enum machine_mode mode;
2713 tree type;
2714 int *pretend_size ATTRIBUTE_UNUSED;
2715 int no_rtl;
2716
2717{
2718 CUMULATIVE_ARGS next_cum;
2719 rtx save_area = NULL_RTX, mem;
2720 rtx label;
2721 rtx label_ref;
2722 rtx tmp_reg;
2723 rtx nsse_reg;
2724 int set;
2725 tree fntype;
2726 int stdarg_p;
2727 int i;
2728
2729 if (!TARGET_64BIT)
2730 return;
2731
2732 /* Indicate to allocate space on the stack for varargs save area. */
2733 ix86_save_varrargs_registers = 1;
2734
2735 fntype = TREE_TYPE (current_function_decl);
2736 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2737 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2738 != void_type_node));
2739
2740 /* For varargs, we do not want to skip the dummy va_dcl argument.
2741 For stdargs, we do want to skip the last named argument. */
2742 next_cum = *cum;
2743 if (stdarg_p)
2744 function_arg_advance (&next_cum, mode, type, 1);
2745
2746 if (!no_rtl)
2747 save_area = frame_pointer_rtx;
2748
2749 set = get_varargs_alias_set ();
2750
2751 for (i = next_cum.regno; i < ix86_regparm; i++)
2752 {
2753 mem = gen_rtx_MEM (Pmode,
2754 plus_constant (save_area, i * UNITS_PER_WORD));
0692acba 2755 set_mem_alias_set (mem, set);
ad919812
JH
2756 emit_move_insn (mem, gen_rtx_REG (Pmode,
2757 x86_64_int_parameter_registers[i]));
2758 }
2759
2760 if (next_cum.sse_nregs)
2761 {
2762 /* Now emit code to save SSE registers. The AX parameter contains number
d1f87653 2763 of SSE parameter registers used to call this function. We use
ad919812
JH
2764 sse_prologue_save insn template that produces computed jump across
2765 SSE saves. We need some preparation work to get this working. */
2766
2767 label = gen_label_rtx ();
2768 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2769
2770 /* Compute address to jump to :
2771 label - 5*eax + nnamed_sse_arguments*5 */
2772 tmp_reg = gen_reg_rtx (Pmode);
2773 nsse_reg = gen_reg_rtx (Pmode);
2774 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2775 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
44cf5b6a 2776 gen_rtx_MULT (Pmode, nsse_reg,
ad919812
JH
2777 GEN_INT (4))));
2778 if (next_cum.sse_regno)
2779 emit_move_insn
2780 (nsse_reg,
2781 gen_rtx_CONST (DImode,
2782 gen_rtx_PLUS (DImode,
2783 label_ref,
2784 GEN_INT (next_cum.sse_regno * 4))));
2785 else
2786 emit_move_insn (nsse_reg, label_ref);
2787 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2788
2789 /* Compute address of memory block we save into. We always use pointer
2790 pointing 127 bytes after first byte to store - this is needed to keep
2791 instruction size limited by 4 bytes. */
2792 tmp_reg = gen_reg_rtx (Pmode);
8ac61af7
RK
2793 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2794 plus_constant (save_area,
2795 8 * REGPARM_MAX + 127)));
ad919812 2796 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
14f73b5a 2797 set_mem_alias_set (mem, set);
8ac61af7 2798 set_mem_align (mem, BITS_PER_WORD);
ad919812
JH
2799
2800 /* And finally do the dirty job! */
8ac61af7
RK
2801 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2802 GEN_INT (next_cum.sse_regno), label));
ad919812
JH
2803 }
2804
2805}
2806
2807/* Implement va_start. */
2808
2809void
e5faf155 2810ix86_va_start (valist, nextarg)
ad919812
JH
2811 tree valist;
2812 rtx nextarg;
2813{
2814 HOST_WIDE_INT words, n_gpr, n_fpr;
2815 tree f_gpr, f_fpr, f_ovf, f_sav;
2816 tree gpr, fpr, ovf, sav, t;
2817
2818 /* Only 64bit target needs something special. */
2819 if (!TARGET_64BIT)
2820 {
e5faf155 2821 std_expand_builtin_va_start (valist, nextarg);
ad919812
JH
2822 return;
2823 }
2824
2825 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2826 f_fpr = TREE_CHAIN (f_gpr);
2827 f_ovf = TREE_CHAIN (f_fpr);
2828 f_sav = TREE_CHAIN (f_ovf);
2829
2830 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2831 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2832 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2833 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2834 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2835
2836 /* Count number of gp and fp argument registers used. */
2837 words = current_function_args_info.words;
2838 n_gpr = current_function_args_info.regno;
2839 n_fpr = current_function_args_info.sse_regno;
2840
2841 if (TARGET_DEBUG_ARG)
2842 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
b531087a 2843 (int) words, (int) n_gpr, (int) n_fpr);
ad919812
JH
2844
2845 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2846 build_int_2 (n_gpr * 8, 0));
2847 TREE_SIDE_EFFECTS (t) = 1;
2848 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2849
2850 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2851 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2852 TREE_SIDE_EFFECTS (t) = 1;
2853 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2854
2855 /* Find the overflow area. */
2856 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2857 if (words != 0)
2858 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2859 build_int_2 (words * UNITS_PER_WORD, 0));
2860 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2861 TREE_SIDE_EFFECTS (t) = 1;
2862 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2863
2864 /* Find the register save area.
2865 Prologue of the function save it right above stack frame. */
2866 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2867 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2868 TREE_SIDE_EFFECTS (t) = 1;
2869 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2870}
2871
2872/* Implement va_arg. */
2873rtx
2874ix86_va_arg (valist, type)
2875 tree valist, type;
2876{
0139adca 2877 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
ad919812
JH
2878 tree f_gpr, f_fpr, f_ovf, f_sav;
2879 tree gpr, fpr, ovf, sav, t;
b932f770 2880 int size, rsize;
ad919812
JH
2881 rtx lab_false, lab_over = NULL_RTX;
2882 rtx addr_rtx, r;
2883 rtx container;
09b2e78d 2884 int indirect_p = 0;
ad919812
JH
2885
2886 /* Only 64bit target needs something special. */
2887 if (!TARGET_64BIT)
2888 {
2889 return std_expand_builtin_va_arg (valist, type);
2890 }
2891
2892 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2893 f_fpr = TREE_CHAIN (f_gpr);
2894 f_ovf = TREE_CHAIN (f_fpr);
2895 f_sav = TREE_CHAIN (f_ovf);
2896
2897 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2898 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2899 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2900 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2901 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2902
2903 size = int_size_in_bytes (type);
09b2e78d
ZD
2904 if (size == -1)
2905 {
2906 /* Passed by reference. */
2907 indirect_p = 1;
2908 type = build_pointer_type (type);
2909 size = int_size_in_bytes (type);
2910 }
ad919812
JH
2911 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2912
2913 container = construct_container (TYPE_MODE (type), type, 0,
2914 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2915 /*
2916 * Pull the value out of the saved registers ...
2917 */
2918
2919 addr_rtx = gen_reg_rtx (Pmode);
2920
2921 if (container)
2922 {
2923 rtx int_addr_rtx, sse_addr_rtx;
2924 int needed_intregs, needed_sseregs;
2925 int need_temp;
2926
2927 lab_over = gen_label_rtx ();
2928 lab_false = gen_label_rtx ();
8bad7136 2929
ad919812
JH
2930 examine_argument (TYPE_MODE (type), type, 0,
2931 &needed_intregs, &needed_sseregs);
2932
2933
2934 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2935 || TYPE_ALIGN (type) > 128);
2936
d1f87653 2937 /* In case we are passing structure, verify that it is consecutive block
ad919812
JH
2938 on the register save area. If not we need to do moves. */
2939 if (!need_temp && !REG_P (container))
2940 {
d1f87653 2941 /* Verify that all registers are strictly consecutive */
ad919812
JH
2942 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2943 {
2944 int i;
2945
2946 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2947 {
2948 rtx slot = XVECEXP (container, 0, i);
b531087a 2949 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
ad919812
JH
2950 || INTVAL (XEXP (slot, 1)) != i * 16)
2951 need_temp = 1;
2952 }
2953 }
2954 else
2955 {
2956 int i;
2957
2958 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2959 {
2960 rtx slot = XVECEXP (container, 0, i);
b531087a 2961 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
ad919812
JH
2962 || INTVAL (XEXP (slot, 1)) != i * 8)
2963 need_temp = 1;
2964 }
2965 }
2966 }
2967 if (!need_temp)
2968 {
2969 int_addr_rtx = addr_rtx;
2970 sse_addr_rtx = addr_rtx;
2971 }
2972 else
2973 {
2974 int_addr_rtx = gen_reg_rtx (Pmode);
2975 sse_addr_rtx = gen_reg_rtx (Pmode);
2976 }
2977 /* First ensure that we fit completely in registers. */
2978 if (needed_intregs)
2979 {
2980 emit_cmp_and_jump_insns (expand_expr
2981 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2982 GEN_INT ((REGPARM_MAX - needed_intregs +
2983 1) * 8), GE, const1_rtx, SImode,
d43e0b7d 2984 1, lab_false);
ad919812
JH
2985 }
2986 if (needed_sseregs)
2987 {
2988 emit_cmp_and_jump_insns (expand_expr
2989 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2990 GEN_INT ((SSE_REGPARM_MAX -
2991 needed_sseregs + 1) * 16 +
2992 REGPARM_MAX * 8), GE, const1_rtx,
d43e0b7d 2993 SImode, 1, lab_false);
ad919812
JH
2994 }
2995
2996 /* Compute index to start of area used for integer regs. */
2997 if (needed_intregs)
2998 {
2999 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3000 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3001 if (r != int_addr_rtx)
3002 emit_move_insn (int_addr_rtx, r);
3003 }
3004 if (needed_sseregs)
3005 {
3006 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3007 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3008 if (r != sse_addr_rtx)
3009 emit_move_insn (sse_addr_rtx, r);
3010 }
3011 if (need_temp)
3012 {
3013 int i;
3014 rtx mem;
3015
b932f770
JH
3016 /* Never use the memory itself, as it has the alias set. */
3017 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
3018 mem = gen_rtx_MEM (BLKmode, addr_rtx);
0692acba 3019 set_mem_alias_set (mem, get_varargs_alias_set ());
8ac61af7 3020 set_mem_align (mem, BITS_PER_UNIT);
b932f770 3021
ad919812
JH
3022 for (i = 0; i < XVECLEN (container, 0); i++)
3023 {
3024 rtx slot = XVECEXP (container, 0, i);
3025 rtx reg = XEXP (slot, 0);
3026 enum machine_mode mode = GET_MODE (reg);
3027 rtx src_addr;
3028 rtx src_mem;
3029 int src_offset;
3030 rtx dest_mem;
3031
3032 if (SSE_REGNO_P (REGNO (reg)))
3033 {
3034 src_addr = sse_addr_rtx;
3035 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3036 }
3037 else
3038 {
3039 src_addr = int_addr_rtx;
3040 src_offset = REGNO (reg) * 8;
3041 }
3042 src_mem = gen_rtx_MEM (mode, src_addr);
0692acba 3043 set_mem_alias_set (src_mem, get_varargs_alias_set ());
ad919812
JH
3044 src_mem = adjust_address (src_mem, mode, src_offset);
3045 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
ad919812
JH
3046 emit_move_insn (dest_mem, src_mem);
3047 }
3048 }
3049
3050 if (needed_intregs)
3051 {
3052 t =
3053 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3054 build_int_2 (needed_intregs * 8, 0));
3055 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3056 TREE_SIDE_EFFECTS (t) = 1;
3057 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3058 }
3059 if (needed_sseregs)
3060 {
3061 t =
3062 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3063 build_int_2 (needed_sseregs * 16, 0));
3064 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3065 TREE_SIDE_EFFECTS (t) = 1;
3066 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3067 }
3068
3069 emit_jump_insn (gen_jump (lab_over));
3070 emit_barrier ();
3071 emit_label (lab_false);
3072 }
3073
3074 /* ... otherwise out of the overflow area. */
3075
3076 /* Care for on-stack alignment if needed. */
3077 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3078 t = ovf;
3079 else
3080 {
3081 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3082 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3083 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3084 }
3085 t = save_expr (t);
3086
3087 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3088 if (r != addr_rtx)
3089 emit_move_insn (addr_rtx, r);
3090
3091 t =
3092 build (PLUS_EXPR, TREE_TYPE (t), t,
3093 build_int_2 (rsize * UNITS_PER_WORD, 0));
3094 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3095 TREE_SIDE_EFFECTS (t) = 1;
3096 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3097
3098 if (container)
3099 emit_label (lab_over);
3100
09b2e78d
ZD
3101 if (indirect_p)
3102 {
3103 r = gen_rtx_MEM (Pmode, addr_rtx);
3104 set_mem_alias_set (r, get_varargs_alias_set ());
3105 emit_move_insn (addr_rtx, r);
3106 }
3107
ad919812
JH
3108 return addr_rtx;
3109}
3110\f
c3c637e3
GS
3111/* Return nonzero if OP is either a i387 or SSE fp register. */
3112int
3113any_fp_register_operand (op, mode)
3114 rtx op;
3115 enum machine_mode mode ATTRIBUTE_UNUSED;
3116{
3117 return ANY_FP_REG_P (op);
3118}
3119
3120/* Return nonzero if OP is an i387 fp register. */
3121int
3122fp_register_operand (op, mode)
3123 rtx op;
3124 enum machine_mode mode ATTRIBUTE_UNUSED;
3125{
3126 return FP_REG_P (op);
3127}
3128
3129/* Return nonzero if OP is a non-fp register_operand. */
3130int
3131register_and_not_any_fp_reg_operand (op, mode)
3132 rtx op;
3133 enum machine_mode mode;
3134{
3135 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3136}
3137
40b982a9 3138/* Return nonzero if OP is a register operand other than an
c3c637e3
GS
3139 i387 fp register. */
3140int
3141register_and_not_fp_reg_operand (op, mode)
3142 rtx op;
3143 enum machine_mode mode;
3144{
3145 return register_operand (op, mode) && !FP_REG_P (op);
3146}
3147
7dd4b4a3
JH
3148/* Return nonzero if OP is general operand representable on x86_64. */
3149
3150int
3151x86_64_general_operand (op, mode)
3152 rtx op;
3153 enum machine_mode mode;
3154{
3155 if (!TARGET_64BIT)
3156 return general_operand (op, mode);
3157 if (nonimmediate_operand (op, mode))
3158 return 1;
c05dbe81 3159 return x86_64_sign_extended_value (op);
7dd4b4a3
JH
3160}
3161
3162/* Return nonzero if OP is general operand representable on x86_64
d6a7951f 3163 as either sign extended or zero extended constant. */
7dd4b4a3
JH
3164
3165int
3166x86_64_szext_general_operand (op, mode)
3167 rtx op;
3168 enum machine_mode mode;
3169{
3170 if (!TARGET_64BIT)
3171 return general_operand (op, mode);
3172 if (nonimmediate_operand (op, mode))
3173 return 1;
c05dbe81 3174 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
7dd4b4a3
JH
3175}
3176
3177/* Return nonzero if OP is nonmemory operand representable on x86_64. */
3178
3179int
3180x86_64_nonmemory_operand (op, mode)
3181 rtx op;
3182 enum machine_mode mode;
3183{
3184 if (!TARGET_64BIT)
3185 return nonmemory_operand (op, mode);
3186 if (register_operand (op, mode))
3187 return 1;
c05dbe81 3188 return x86_64_sign_extended_value (op);
7dd4b4a3
JH
3189}
3190
3191/* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3192
3193int
3194x86_64_movabs_operand (op, mode)
3195 rtx op;
3196 enum machine_mode mode;
3197{
3198 if (!TARGET_64BIT || !flag_pic)
3199 return nonmemory_operand (op, mode);
c05dbe81 3200 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
7dd4b4a3
JH
3201 return 1;
3202 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3203 return 1;
3204 return 0;
3205}
3206
3207/* Return nonzero if OP is nonmemory operand representable on x86_64. */
3208
3209int
3210x86_64_szext_nonmemory_operand (op, mode)
3211 rtx op;
3212 enum machine_mode mode;
3213{
3214 if (!TARGET_64BIT)
3215 return nonmemory_operand (op, mode);
3216 if (register_operand (op, mode))
3217 return 1;
c05dbe81 3218 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
7dd4b4a3
JH
3219}
3220
3221/* Return nonzero if OP is immediate operand representable on x86_64. */
3222
3223int
3224x86_64_immediate_operand (op, mode)
3225 rtx op;
3226 enum machine_mode mode;
3227{
3228 if (!TARGET_64BIT)
3229 return immediate_operand (op, mode);
c05dbe81 3230 return x86_64_sign_extended_value (op);
7dd4b4a3
JH
3231}
3232
3233/* Return nonzero if OP is immediate operand representable on x86_64. */
3234
3235int
3236x86_64_zext_immediate_operand (op, mode)
3237 rtx op;
3238 enum machine_mode mode ATTRIBUTE_UNUSED;
3239{
3240 return x86_64_zero_extended_value (op);
3241}
3242
8bad7136
JL
3243/* Return nonzero if OP is (const_int 1), else return zero. */
3244
3245int
3246const_int_1_operand (op, mode)
3247 rtx op;
3248 enum machine_mode mode ATTRIBUTE_UNUSED;
3249{
3250 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
3251}
3252
794a292d
JJ
3253/* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3254 for shift & compare patterns, as shifting by 0 does not change flags),
3255 else return zero. */
3256
3257int
3258const_int_1_31_operand (op, mode)
3259 rtx op;
3260 enum machine_mode mode ATTRIBUTE_UNUSED;
3261{
3262 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3263}
3264
e075ae69
RH
3265/* Returns 1 if OP is either a symbol reference or a sum of a symbol
3266 reference and a constant. */
b08de47e
MM
3267
3268int
e075ae69
RH
3269symbolic_operand (op, mode)
3270 register rtx op;
3271 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 3272{
e075ae69 3273 switch (GET_CODE (op))
2a2ab3f9 3274 {
e075ae69
RH
3275 case SYMBOL_REF:
3276 case LABEL_REF:
3277 return 1;
3278
3279 case CONST:
3280 op = XEXP (op, 0);
3281 if (GET_CODE (op) == SYMBOL_REF
3282 || GET_CODE (op) == LABEL_REF
3283 || (GET_CODE (op) == UNSPEC
8ee41eaf
RH
3284 && (XINT (op, 1) == UNSPEC_GOT
3285 || XINT (op, 1) == UNSPEC_GOTOFF
3286 || XINT (op, 1) == UNSPEC_GOTPCREL)))
e075ae69
RH
3287 return 1;
3288 if (GET_CODE (op) != PLUS
3289 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3290 return 0;
3291
3292 op = XEXP (op, 0);
3293 if (GET_CODE (op) == SYMBOL_REF
3294 || GET_CODE (op) == LABEL_REF)
3295 return 1;
3296 /* Only @GOTOFF gets offsets. */
3297 if (GET_CODE (op) != UNSPEC
8ee41eaf 3298 || XINT (op, 1) != UNSPEC_GOTOFF)
e075ae69
RH
3299 return 0;
3300
3301 op = XVECEXP (op, 0, 0);
3302 if (GET_CODE (op) == SYMBOL_REF
3303 || GET_CODE (op) == LABEL_REF)
3304 return 1;
3305 return 0;
3306
3307 default:
3308 return 0;
2a2ab3f9
JVA
3309 }
3310}
2a2ab3f9 3311
e075ae69 3312/* Return true if the operand contains a @GOT or @GOTOFF reference. */
3b3c6a3f 3313
e075ae69
RH
3314int
3315pic_symbolic_operand (op, mode)
3316 register rtx op;
3317 enum machine_mode mode ATTRIBUTE_UNUSED;
3318{
6eb791fc
JH
3319 if (GET_CODE (op) != CONST)
3320 return 0;
3321 op = XEXP (op, 0);
3322 if (TARGET_64BIT)
3323 {
3324 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
3325 return 1;
3326 }
fce5a9f2 3327 else
2a2ab3f9 3328 {
e075ae69
RH
3329 if (GET_CODE (op) == UNSPEC)
3330 return 1;
3331 if (GET_CODE (op) != PLUS
3332 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3333 return 0;
3334 op = XEXP (op, 0);
3335 if (GET_CODE (op) == UNSPEC)
3336 return 1;
2a2ab3f9 3337 }
e075ae69 3338 return 0;
2a2ab3f9 3339}
2a2ab3f9 3340
623fe810
RH
3341/* Return true if OP is a symbolic operand that resolves locally. */
3342
3343static int
3344local_symbolic_operand (op, mode)
3345 rtx op;
3346 enum machine_mode mode ATTRIBUTE_UNUSED;
3347{
623fe810
RH
3348 if (GET_CODE (op) == CONST
3349 && GET_CODE (XEXP (op, 0)) == PLUS
c05dbe81 3350 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
623fe810
RH
3351 op = XEXP (XEXP (op, 0), 0);
3352
8bfb45f8
JJ
3353 if (GET_CODE (op) == LABEL_REF)
3354 return 1;
3355
623fe810
RH
3356 if (GET_CODE (op) != SYMBOL_REF)
3357 return 0;
3358
3359 /* These we've been told are local by varasm and encode_section_info
3360 respectively. */
3361 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
3362 return 1;
3363
3364 /* There is, however, a not insubstantial body of code in the rest of
fce5a9f2 3365 the compiler that assumes it can just stick the results of
623fe810
RH
3366 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3367 /* ??? This is a hack. Should update the body of the compiler to
fb49053f 3368 always create a DECL an invoke targetm.encode_section_info. */
623fe810
RH
3369 if (strncmp (XSTR (op, 0), internal_label_prefix,
3370 internal_label_prefix_len) == 0)
3371 return 1;
3372
3373 return 0;
3374}
3375
f996902d
RH
3376/* Test for various thread-local symbols. See ix86_encode_section_info. */
3377
3378int
3379tls_symbolic_operand (op, mode)
3380 register rtx op;
3381 enum machine_mode mode ATTRIBUTE_UNUSED;
3382{
3383 const char *symbol_str;
3384
3385 if (GET_CODE (op) != SYMBOL_REF)
3386 return 0;
3387 symbol_str = XSTR (op, 0);
3388
3389 if (symbol_str[0] != '%')
3390 return 0;
755ac5d4 3391 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
f996902d
RH
3392}
3393
3394static int
3395tls_symbolic_operand_1 (op, kind)
3396 rtx op;
3397 enum tls_model kind;
3398{
3399 const char *symbol_str;
3400
3401 if (GET_CODE (op) != SYMBOL_REF)
3402 return 0;
3403 symbol_str = XSTR (op, 0);
3404
3405 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3406}
3407
3408int
3409global_dynamic_symbolic_operand (op, mode)
3410 register rtx op;
3411 enum machine_mode mode ATTRIBUTE_UNUSED;
3412{
3413 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3414}
3415
3416int
3417local_dynamic_symbolic_operand (op, mode)
3418 register rtx op;
3419 enum machine_mode mode ATTRIBUTE_UNUSED;
3420{
3421 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3422}
3423
3424int
3425initial_exec_symbolic_operand (op, mode)
3426 register rtx op;
3427 enum machine_mode mode ATTRIBUTE_UNUSED;
3428{
3429 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3430}
3431
3432int
3433local_exec_symbolic_operand (op, mode)
3434 register rtx op;
3435 enum machine_mode mode ATTRIBUTE_UNUSED;
3436{
3437 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3438}
3439
28d52ffb
RH
3440/* Test for a valid operand for a call instruction. Don't allow the
3441 arg pointer register or virtual regs since they may decay into
3442 reg + const, which the patterns can't handle. */
2a2ab3f9 3443
e075ae69
RH
3444int
3445call_insn_operand (op, mode)
3446 rtx op;
3447 enum machine_mode mode ATTRIBUTE_UNUSED;
3448{
e075ae69
RH
3449 /* Disallow indirect through a virtual register. This leads to
3450 compiler aborts when trying to eliminate them. */
3451 if (GET_CODE (op) == REG
3452 && (op == arg_pointer_rtx
564d80f4 3453 || op == frame_pointer_rtx
e075ae69
RH
3454 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3455 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3456 return 0;
2a2ab3f9 3457
28d52ffb
RH
3458 /* Disallow `call 1234'. Due to varying assembler lameness this
3459 gets either rejected or translated to `call .+1234'. */
3460 if (GET_CODE (op) == CONST_INT)
3461 return 0;
3462
cbbf65e0
RH
3463 /* Explicitly allow SYMBOL_REF even if pic. */
3464 if (GET_CODE (op) == SYMBOL_REF)
e075ae69 3465 return 1;
2a2ab3f9 3466
cbbf65e0
RH
3467 /* Otherwise we can allow any general_operand in the address. */
3468 return general_operand (op, Pmode);
e075ae69 3469}
79325812 3470
4977bab6
ZW
3471/* Test for a valid operand for a call instruction. Don't allow the
3472 arg pointer register or virtual regs since they may decay into
3473 reg + const, which the patterns can't handle. */
3474
3475int
3476sibcall_insn_operand (op, mode)
3477 rtx op;
3478 enum machine_mode mode ATTRIBUTE_UNUSED;
3479{
3480 /* Disallow indirect through a virtual register. This leads to
3481 compiler aborts when trying to eliminate them. */
3482 if (GET_CODE (op) == REG
3483 && (op == arg_pointer_rtx
3484 || op == frame_pointer_rtx
3485 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3486 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3487 return 0;
3488
3489 /* Explicitly allow SYMBOL_REF even if pic. */
3490 if (GET_CODE (op) == SYMBOL_REF)
3491 return 1;
3492
3493 /* Otherwise we can only allow register operands. */
3494 return register_operand (op, Pmode);
3495}
3496
e075ae69
RH
3497int
3498constant_call_address_operand (op, mode)
3499 rtx op;
3500 enum machine_mode mode ATTRIBUTE_UNUSED;
3501{
eaf19aba
JJ
3502 if (GET_CODE (op) == CONST
3503 && GET_CODE (XEXP (op, 0)) == PLUS
3504 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3505 op = XEXP (XEXP (op, 0), 0);
e1ff012c 3506 return GET_CODE (op) == SYMBOL_REF;
e075ae69 3507}
2a2ab3f9 3508
e075ae69 3509/* Match exactly zero and one. */
e9a25f70 3510
0f290768 3511int
e075ae69
RH
3512const0_operand (op, mode)
3513 register rtx op;
3514 enum machine_mode mode;
3515{
3516 return op == CONST0_RTX (mode);
3517}
e9a25f70 3518
0f290768 3519int
e075ae69
RH
3520const1_operand (op, mode)
3521 register rtx op;
3522 enum machine_mode mode ATTRIBUTE_UNUSED;
3523{
3524 return op == const1_rtx;
3525}
2a2ab3f9 3526
e075ae69 3527/* Match 2, 4, or 8. Used for leal multiplicands. */
e9a25f70 3528
e075ae69
RH
3529int
3530const248_operand (op, mode)
3531 register rtx op;
3532 enum machine_mode mode ATTRIBUTE_UNUSED;
3533{
3534 return (GET_CODE (op) == CONST_INT
3535 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3536}
e9a25f70 3537
d1f87653 3538/* True if this is a constant appropriate for an increment or decrement. */
81fd0956 3539
e075ae69
RH
3540int
3541incdec_operand (op, mode)
3542 register rtx op;
0631e0bf 3543 enum machine_mode mode ATTRIBUTE_UNUSED;
e075ae69 3544{
f5143c46 3545 /* On Pentium4, the inc and dec operations causes extra dependency on flag
b4e89e2d
JH
3546 registers, since carry flag is not set. */
3547 if (TARGET_PENTIUM4 && !optimize_size)
3548 return 0;
2b1c08f5 3549 return op == const1_rtx || op == constm1_rtx;
e075ae69 3550}
2a2ab3f9 3551
371bc54b
JH
3552/* Return nonzero if OP is acceptable as operand of DImode shift
3553 expander. */
3554
3555int
3556shiftdi_operand (op, mode)
3557 rtx op;
3558 enum machine_mode mode ATTRIBUTE_UNUSED;
3559{
3560 if (TARGET_64BIT)
3561 return nonimmediate_operand (op, mode);
3562 else
3563 return register_operand (op, mode);
3564}
3565
0f290768 3566/* Return false if this is the stack pointer, or any other fake
e075ae69
RH
3567 register eliminable to the stack pointer. Otherwise, this is
3568 a register operand.
2a2ab3f9 3569
e075ae69
RH
3570 This is used to prevent esp from being used as an index reg.
3571 Which would only happen in pathological cases. */
5f1ec3e6 3572
e075ae69
RH
3573int
3574reg_no_sp_operand (op, mode)
3575 register rtx op;
3576 enum machine_mode mode;
3577{
3578 rtx t = op;
3579 if (GET_CODE (t) == SUBREG)
3580 t = SUBREG_REG (t);
564d80f4 3581 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
e075ae69 3582 return 0;
2a2ab3f9 3583
e075ae69 3584 return register_operand (op, mode);
2a2ab3f9 3585}
b840bfb0 3586
915119a5
BS
3587int
3588mmx_reg_operand (op, mode)
3589 register rtx op;
bd793c65 3590 enum machine_mode mode ATTRIBUTE_UNUSED;
915119a5
BS
3591{
3592 return MMX_REG_P (op);
3593}
3594
2c5a510c
RH
3595/* Return false if this is any eliminable register. Otherwise
3596 general_operand. */
3597
3598int
3599general_no_elim_operand (op, mode)
3600 register rtx op;
3601 enum machine_mode mode;
3602{
3603 rtx t = op;
3604 if (GET_CODE (t) == SUBREG)
3605 t = SUBREG_REG (t);
3606 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3607 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3608 || t == virtual_stack_dynamic_rtx)
3609 return 0;
1020a5ab
RH
3610 if (REG_P (t)
3611 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3612 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3613 return 0;
2c5a510c
RH
3614
3615 return general_operand (op, mode);
3616}
3617
3618/* Return false if this is any eliminable register. Otherwise
3619 register_operand or const_int. */
3620
3621int
3622nonmemory_no_elim_operand (op, mode)
3623 register rtx op;
3624 enum machine_mode mode;
3625{
3626 rtx t = op;
3627 if (GET_CODE (t) == SUBREG)
3628 t = SUBREG_REG (t);
3629 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3630 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3631 || t == virtual_stack_dynamic_rtx)
3632 return 0;
3633
3634 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3635}
3636
7ec70495
JH
3637/* Return false if this is any eliminable register or stack register,
3638 otherwise work like register_operand. */
3639
3640int
3641index_register_operand (op, mode)
3642 register rtx op;
3643 enum machine_mode mode;
3644{
3645 rtx t = op;
3646 if (GET_CODE (t) == SUBREG)
3647 t = SUBREG_REG (t);
3648 if (!REG_P (t))
3649 return 0;
3650 if (t == arg_pointer_rtx
3651 || t == frame_pointer_rtx
3652 || t == virtual_incoming_args_rtx
3653 || t == virtual_stack_vars_rtx
3654 || t == virtual_stack_dynamic_rtx
3655 || REGNO (t) == STACK_POINTER_REGNUM)
3656 return 0;
3657
3658 return general_operand (op, mode);
3659}
3660
e075ae69 3661/* Return true if op is a Q_REGS class register. */
b840bfb0 3662
e075ae69
RH
3663int
3664q_regs_operand (op, mode)
3665 register rtx op;
3666 enum machine_mode mode;
b840bfb0 3667{
e075ae69
RH
3668 if (mode != VOIDmode && GET_MODE (op) != mode)
3669 return 0;
3670 if (GET_CODE (op) == SUBREG)
3671 op = SUBREG_REG (op);
7799175f 3672 return ANY_QI_REG_P (op);
0f290768 3673}
b840bfb0 3674
4977bab6
ZW
3675/* Return true if op is an flags register. */
3676
3677int
3678flags_reg_operand (op, mode)
3679 register rtx op;
3680 enum machine_mode mode;
3681{
3682 if (mode != VOIDmode && GET_MODE (op) != mode)
3683 return 0;
3684 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3685}
3686
e075ae69 3687/* Return true if op is a NON_Q_REGS class register. */
b840bfb0 3688
e075ae69
RH
3689int
3690non_q_regs_operand (op, mode)
3691 register rtx op;
3692 enum machine_mode mode;
3693{
3694 if (mode != VOIDmode && GET_MODE (op) != mode)
3695 return 0;
3696 if (GET_CODE (op) == SUBREG)
3697 op = SUBREG_REG (op);
3698 return NON_QI_REG_P (op);
0f290768 3699}
b840bfb0 3700
4977bab6
ZW
3701int
3702zero_extended_scalar_load_operand (op, mode)
3703 rtx op;
3704 enum machine_mode mode ATTRIBUTE_UNUSED;
3705{
3706 unsigned n_elts;
3707 if (GET_CODE (op) != MEM)
3708 return 0;
3709 op = maybe_get_pool_constant (op);
3710 if (!op)
3711 return 0;
3712 if (GET_CODE (op) != CONST_VECTOR)
3713 return 0;
3714 n_elts =
3715 (GET_MODE_SIZE (GET_MODE (op)) /
3716 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3717 for (n_elts--; n_elts > 0; n_elts--)
3718 {
3719 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3720 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3721 return 0;
3722 }
3723 return 1;
3724}
3725
915119a5
BS
3726/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3727 insns. */
3728int
3729sse_comparison_operator (op, mode)
3730 rtx op;
3731 enum machine_mode mode ATTRIBUTE_UNUSED;
3732{
3733 enum rtx_code code = GET_CODE (op);
a46d1d38
JH
3734 switch (code)
3735 {
3736 /* Operations supported directly. */
3737 case EQ:
3738 case LT:
3739 case LE:
3740 case UNORDERED:
3741 case NE:
3742 case UNGE:
3743 case UNGT:
3744 case ORDERED:
3745 return 1;
3746 /* These are equivalent to ones above in non-IEEE comparisons. */
3747 case UNEQ:
3748 case UNLT:
3749 case UNLE:
3750 case LTGT:
3751 case GE:
3752 case GT:
3753 return !TARGET_IEEE_FP;
3754 default:
3755 return 0;
3756 }
915119a5 3757}
9076b9c1 3758/* Return 1 if OP is a valid comparison operator in valid mode. */
e075ae69 3759int
9076b9c1
JH
3760ix86_comparison_operator (op, mode)
3761 register rtx op;
3762 enum machine_mode mode;
e075ae69 3763{
9076b9c1 3764 enum machine_mode inmode;
9a915772 3765 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
3766 if (mode != VOIDmode && GET_MODE (op) != mode)
3767 return 0;
9a915772
JH
3768 if (GET_RTX_CLASS (code) != '<')
3769 return 0;
3770 inmode = GET_MODE (XEXP (op, 0));
3771
3772 if (inmode == CCFPmode || inmode == CCFPUmode)
3773 {
3774 enum rtx_code second_code, bypass_code;
3775 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3776 return (bypass_code == NIL && second_code == NIL);
3777 }
3778 switch (code)
3a3677ff
RH
3779 {
3780 case EQ: case NE:
3a3677ff 3781 return 1;
9076b9c1 3782 case LT: case GE:
7e08e190 3783 if (inmode == CCmode || inmode == CCGCmode
9076b9c1
JH
3784 || inmode == CCGOCmode || inmode == CCNOmode)
3785 return 1;
3786 return 0;
7e08e190 3787 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
7e08e190 3788 if (inmode == CCmode)
9076b9c1
JH
3789 return 1;
3790 return 0;
3791 case GT: case LE:
7e08e190 3792 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
9076b9c1
JH
3793 return 1;
3794 return 0;
3a3677ff
RH
3795 default:
3796 return 0;
3797 }
3798}
3799
e6e81735
JH
3800/* Return 1 if OP is a valid comparison operator testing carry flag
3801 to be set. */
3802int
3803ix86_carry_flag_operator (op, mode)
3804 register rtx op;
3805 enum machine_mode mode;
3806{
3807 enum machine_mode inmode;
3808 enum rtx_code code = GET_CODE (op);
3809
3810 if (mode != VOIDmode && GET_MODE (op) != mode)
3811 return 0;
3812 if (GET_RTX_CLASS (code) != '<')
3813 return 0;
3814 inmode = GET_MODE (XEXP (op, 0));
3815 if (GET_CODE (XEXP (op, 0)) != REG
3816 || REGNO (XEXP (op, 0)) != 17
3817 || XEXP (op, 1) != const0_rtx)
3818 return 0;
3819
3820 if (inmode == CCFPmode || inmode == CCFPUmode)
3821 {
3822 enum rtx_code second_code, bypass_code;
3823
3824 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3825 if (bypass_code != NIL || second_code != NIL)
3826 return 0;
3827 code = ix86_fp_compare_code_to_integer (code);
3828 }
3829 else if (inmode != CCmode)
3830 return 0;
3831 return code == LTU;
3832}
3833
9076b9c1 3834/* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3a3677ff 3835
9076b9c1
JH
3836int
3837fcmov_comparison_operator (op, mode)
3a3677ff
RH
3838 register rtx op;
3839 enum machine_mode mode;
3840{
b62d22a2 3841 enum machine_mode inmode;
9a915772 3842 enum rtx_code code = GET_CODE (op);
e6e81735 3843
3a3677ff
RH
3844 if (mode != VOIDmode && GET_MODE (op) != mode)
3845 return 0;
9a915772
JH
3846 if (GET_RTX_CLASS (code) != '<')
3847 return 0;
3848 inmode = GET_MODE (XEXP (op, 0));
3849 if (inmode == CCFPmode || inmode == CCFPUmode)
3a3677ff 3850 {
9a915772 3851 enum rtx_code second_code, bypass_code;
e6e81735 3852
9a915772
JH
3853 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3854 if (bypass_code != NIL || second_code != NIL)
3855 return 0;
3856 code = ix86_fp_compare_code_to_integer (code);
3857 }
3858 /* i387 supports just limited amount of conditional codes. */
3859 switch (code)
3860 {
3861 case LTU: case GTU: case LEU: case GEU:
3862 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
9076b9c1
JH
3863 return 1;
3864 return 0;
9a915772
JH
3865 case ORDERED: case UNORDERED:
3866 case EQ: case NE:
3867 return 1;
3a3677ff
RH
3868 default:
3869 return 0;
3870 }
e075ae69 3871}
b840bfb0 3872
e9e80858
JH
3873/* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3874
3875int
3876promotable_binary_operator (op, mode)
3877 register rtx op;
3878 enum machine_mode mode ATTRIBUTE_UNUSED;
3879{
3880 switch (GET_CODE (op))
3881 {
3882 case MULT:
3883 /* Modern CPUs have same latency for HImode and SImode multiply,
3884 but 386 and 486 do HImode multiply faster. */
3885 return ix86_cpu > PROCESSOR_I486;
3886 case PLUS:
3887 case AND:
3888 case IOR:
3889 case XOR:
3890 case ASHIFT:
3891 return 1;
3892 default:
3893 return 0;
3894 }
3895}
3896
e075ae69
RH
3897/* Nearly general operand, but accept any const_double, since we wish
3898 to be able to drop them into memory rather than have them get pulled
3899 into registers. */
b840bfb0 3900
2a2ab3f9 3901int
e075ae69
RH
3902cmp_fp_expander_operand (op, mode)
3903 register rtx op;
3904 enum machine_mode mode;
2a2ab3f9 3905{
e075ae69 3906 if (mode != VOIDmode && mode != GET_MODE (op))
0b6b2900 3907 return 0;
e075ae69 3908 if (GET_CODE (op) == CONST_DOUBLE)
2a2ab3f9 3909 return 1;
e075ae69 3910 return general_operand (op, mode);
2a2ab3f9
JVA
3911}
3912
e075ae69 3913/* Match an SI or HImode register for a zero_extract. */
2a2ab3f9
JVA
3914
3915int
e075ae69 3916ext_register_operand (op, mode)
2a2ab3f9 3917 register rtx op;
bb5177ac 3918 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 3919{
3522082b 3920 int regno;
0d7d98ee
JH
3921 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3922 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
e075ae69 3923 return 0;
3522082b
JH
3924
3925 if (!register_operand (op, VOIDmode))
3926 return 0;
3927
d1f87653 3928 /* Be careful to accept only registers having upper parts. */
3522082b
JH
3929 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3930 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
e075ae69
RH
3931}
3932
3933/* Return 1 if this is a valid binary floating-point operation.
0f290768 3934 OP is the expression matched, and MODE is its mode. */
e075ae69
RH
3935
3936int
3937binary_fp_operator (op, mode)
3938 register rtx op;
3939 enum machine_mode mode;
3940{
3941 if (mode != VOIDmode && mode != GET_MODE (op))
3942 return 0;
3943
2a2ab3f9
JVA
3944 switch (GET_CODE (op))
3945 {
e075ae69
RH
3946 case PLUS:
3947 case MINUS:
3948 case MULT:
3949 case DIV:
3950 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
e9a25f70 3951
2a2ab3f9
JVA
3952 default:
3953 return 0;
3954 }
3955}
fee2770d 3956
e075ae69 3957int
b531087a 3958mult_operator (op, mode)
e075ae69
RH
3959 register rtx op;
3960 enum machine_mode mode ATTRIBUTE_UNUSED;
3961{
3962 return GET_CODE (op) == MULT;
3963}
3964
3965int
b531087a 3966div_operator (op, mode)
e075ae69
RH
3967 register rtx op;
3968 enum machine_mode mode ATTRIBUTE_UNUSED;
3969{
3970 return GET_CODE (op) == DIV;
3971}
0a726ef1
JL
3972
3973int
e075ae69
RH
3974arith_or_logical_operator (op, mode)
3975 rtx op;
3976 enum machine_mode mode;
0a726ef1 3977{
e075ae69
RH
3978 return ((mode == VOIDmode || GET_MODE (op) == mode)
3979 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3980 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
0a726ef1
JL
3981}
3982
e075ae69 3983/* Returns 1 if OP is memory operand with a displacement. */
fee2770d
RS
3984
3985int
e075ae69
RH
3986memory_displacement_operand (op, mode)
3987 register rtx op;
3988 enum machine_mode mode;
4f2c8ebb 3989{
e075ae69 3990 struct ix86_address parts;
e9a25f70 3991
e075ae69
RH
3992 if (! memory_operand (op, mode))
3993 return 0;
3994
3995 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3996 abort ();
3997
3998 return parts.disp != NULL_RTX;
4f2c8ebb
RS
3999}
4000
16189740 4001/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
e075ae69
RH
4002 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4003
4004 ??? It seems likely that this will only work because cmpsi is an
4005 expander, and no actual insns use this. */
4f2c8ebb
RS
4006
4007int
e075ae69
RH
4008cmpsi_operand (op, mode)
4009 rtx op;
4010 enum machine_mode mode;
fee2770d 4011{
b9b2c339 4012 if (nonimmediate_operand (op, mode))
e075ae69
RH
4013 return 1;
4014
4015 if (GET_CODE (op) == AND
4016 && GET_MODE (op) == SImode
4017 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4018 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4019 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4020 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4021 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4022 && GET_CODE (XEXP (op, 1)) == CONST_INT)
fee2770d 4023 return 1;
e9a25f70 4024
fee2770d
RS
4025 return 0;
4026}
d784886d 4027
e075ae69
RH
4028/* Returns 1 if OP is memory operand that can not be represented by the
4029 modRM array. */
d784886d
RK
4030
4031int
e075ae69 4032long_memory_operand (op, mode)
d784886d
RK
4033 register rtx op;
4034 enum machine_mode mode;
4035{
e075ae69 4036 if (! memory_operand (op, mode))
d784886d
RK
4037 return 0;
4038
e075ae69 4039 return memory_address_length (op) != 0;
d784886d 4040}
2247f6ed
JH
4041
4042/* Return nonzero if the rtx is known aligned. */
4043
4044int
4045aligned_operand (op, mode)
4046 rtx op;
4047 enum machine_mode mode;
4048{
4049 struct ix86_address parts;
4050
4051 if (!general_operand (op, mode))
4052 return 0;
4053
0f290768 4054 /* Registers and immediate operands are always "aligned". */
2247f6ed
JH
4055 if (GET_CODE (op) != MEM)
4056 return 1;
4057
0f290768 4058 /* Don't even try to do any aligned optimizations with volatiles. */
2247f6ed
JH
4059 if (MEM_VOLATILE_P (op))
4060 return 0;
4061
4062 op = XEXP (op, 0);
4063
4064 /* Pushes and pops are only valid on the stack pointer. */
4065 if (GET_CODE (op) == PRE_DEC
4066 || GET_CODE (op) == POST_INC)
4067 return 1;
4068
4069 /* Decode the address. */
4070 if (! ix86_decompose_address (op, &parts))
4071 abort ();
4072
1540f9eb
JH
4073 if (parts.base && GET_CODE (parts.base) == SUBREG)
4074 parts.base = SUBREG_REG (parts.base);
4075 if (parts.index && GET_CODE (parts.index) == SUBREG)
4076 parts.index = SUBREG_REG (parts.index);
4077
2247f6ed
JH
4078 /* Look for some component that isn't known to be aligned. */
4079 if (parts.index)
4080 {
4081 if (parts.scale < 4
bdb429a5 4082 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
2247f6ed
JH
4083 return 0;
4084 }
4085 if (parts.base)
4086 {
bdb429a5 4087 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
2247f6ed
JH
4088 return 0;
4089 }
4090 if (parts.disp)
4091 {
4092 if (GET_CODE (parts.disp) != CONST_INT
4093 || (INTVAL (parts.disp) & 3) != 0)
4094 return 0;
4095 }
4096
4097 /* Didn't find one -- this must be an aligned address. */
4098 return 1;
4099}
e075ae69
RH
4100\f
4101/* Return true if the constant is something that can be loaded with
4102 a special instruction. Only handle 0.0 and 1.0; others are less
4103 worthwhile. */
57dbca5e
BS
4104
4105int
e075ae69
RH
4106standard_80387_constant_p (x)
4107 rtx x;
57dbca5e 4108{
2b04e52b 4109 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
e075ae69 4110 return -1;
2b04e52b
JH
4111 /* Note that on the 80387, other constants, such as pi, that we should support
4112 too. On some machines, these are much slower to load as standard constant,
4113 than to load from doubles in memory. */
4114 if (x == CONST0_RTX (GET_MODE (x)))
4115 return 1;
4116 if (x == CONST1_RTX (GET_MODE (x)))
4117 return 2;
e075ae69 4118 return 0;
57dbca5e
BS
4119}
4120
2b04e52b
JH
4121/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4122 */
4123int
4124standard_sse_constant_p (x)
4125 rtx x;
4126{
0e67d460
JH
4127 if (x == const0_rtx)
4128 return 1;
2b04e52b
JH
4129 return (x == CONST0_RTX (GET_MODE (x)));
4130}
4131
2a2ab3f9
JVA
4132/* Returns 1 if OP contains a symbol reference */
4133
4134int
4135symbolic_reference_mentioned_p (op)
4136 rtx op;
4137{
6f7d635c 4138 register const char *fmt;
2a2ab3f9
JVA
4139 register int i;
4140
4141 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4142 return 1;
4143
4144 fmt = GET_RTX_FORMAT (GET_CODE (op));
4145 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4146 {
4147 if (fmt[i] == 'E')
4148 {
4149 register int j;
4150
4151 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4152 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4153 return 1;
4154 }
e9a25f70 4155
2a2ab3f9
JVA
4156 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4157 return 1;
4158 }
4159
4160 return 0;
4161}
e075ae69
RH
4162
4163/* Return 1 if it is appropriate to emit `ret' instructions in the
4164 body of a function. Do this only if the epilogue is simple, needing a
4165 couple of insns. Prior to reloading, we can't tell how many registers
4166 must be saved, so return 0 then. Return 0 if there is no frame
4167 marker to de-allocate.
4168
4169 If NON_SAVING_SETJMP is defined and true, then it is not possible
4170 for the epilogue to be simple, so return 0. This is a special case
4171 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4172 until final, but jump_optimize may need to know sooner if a
4173 `return' is OK. */
32b5b1aa
SC
4174
4175int
e075ae69 4176ix86_can_use_return_insn_p ()
32b5b1aa 4177{
4dd2ac2c 4178 struct ix86_frame frame;
9a7372d6 4179
e075ae69
RH
4180#ifdef NON_SAVING_SETJMP
4181 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4182 return 0;
4183#endif
9a7372d6
RH
4184
4185 if (! reload_completed || frame_pointer_needed)
4186 return 0;
32b5b1aa 4187
9a7372d6
RH
4188 /* Don't allow more than 32 pop, since that's all we can do
4189 with one instruction. */
4190 if (current_function_pops_args
4191 && current_function_args_size >= 32768)
e075ae69 4192 return 0;
32b5b1aa 4193
4dd2ac2c
JH
4194 ix86_compute_frame_layout (&frame);
4195 return frame.to_allocate == 0 && frame.nregs == 0;
e075ae69 4196}
6189a572
JH
4197\f
4198/* Return 1 if VALUE can be stored in the sign extended immediate field. */
4199int
c05dbe81 4200x86_64_sign_extended_value (value)
6189a572
JH
4201 rtx value;
4202{
4203 switch (GET_CODE (value))
4204 {
4205 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4206 to be at least 32 and this all acceptable constants are
4207 represented as CONST_INT. */
4208 case CONST_INT:
4209 if (HOST_BITS_PER_WIDE_INT == 32)
4210 return 1;
4211 else
4212 {
4213 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
fa9f36a1 4214 return trunc_int_for_mode (val, SImode) == val;
6189a572
JH
4215 }
4216 break;
4217
75d38379
JJ
4218 /* For certain code models, the symbolic references are known to fit.
4219 in CM_SMALL_PIC model we know it fits if it is local to the shared
4220 library. Don't count TLS SYMBOL_REFs here, since they should fit
4221 only if inside of UNSPEC handled below. */
6189a572 4222 case SYMBOL_REF:
c05dbe81 4223 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
6189a572
JH
4224
4225 /* For certain code models, the code is near as well. */
4226 case LABEL_REF:
c05dbe81
JH
4227 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4228 || ix86_cmodel == CM_KERNEL);
6189a572
JH
4229
4230 /* We also may accept the offsetted memory references in certain special
4231 cases. */
4232 case CONST:
75d38379
JJ
4233 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4234 switch (XINT (XEXP (value, 0), 1))
4235 {
4236 case UNSPEC_GOTPCREL:
4237 case UNSPEC_DTPOFF:
4238 case UNSPEC_GOTNTPOFF:
4239 case UNSPEC_NTPOFF:
4240 return 1;
4241 default:
4242 break;
4243 }
4244 if (GET_CODE (XEXP (value, 0)) == PLUS)
6189a572
JH
4245 {
4246 rtx op1 = XEXP (XEXP (value, 0), 0);
4247 rtx op2 = XEXP (XEXP (value, 0), 1);
4248 HOST_WIDE_INT offset;
4249
4250 if (ix86_cmodel == CM_LARGE)
4251 return 0;
4252 if (GET_CODE (op2) != CONST_INT)
4253 return 0;
4254 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4255 switch (GET_CODE (op1))
4256 {
4257 case SYMBOL_REF:
75d38379 4258 /* For CM_SMALL assume that latest object is 16MB before
6189a572
JH
4259 end of 31bits boundary. We may also accept pretty
4260 large negative constants knowing that all objects are
4261 in the positive half of address space. */
4262 if (ix86_cmodel == CM_SMALL
75d38379 4263 && offset < 16*1024*1024
6189a572
JH
4264 && trunc_int_for_mode (offset, SImode) == offset)
4265 return 1;
4266 /* For CM_KERNEL we know that all object resist in the
4267 negative half of 32bits address space. We may not
4268 accept negative offsets, since they may be just off
d6a7951f 4269 and we may accept pretty large positive ones. */
6189a572
JH
4270 if (ix86_cmodel == CM_KERNEL
4271 && offset > 0
4272 && trunc_int_for_mode (offset, SImode) == offset)
4273 return 1;
4274 break;
4275 case LABEL_REF:
4276 /* These conditions are similar to SYMBOL_REF ones, just the
4277 constraints for code models differ. */
c05dbe81 4278 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
75d38379 4279 && offset < 16*1024*1024
6189a572
JH
4280 && trunc_int_for_mode (offset, SImode) == offset)
4281 return 1;
4282 if (ix86_cmodel == CM_KERNEL
4283 && offset > 0
4284 && trunc_int_for_mode (offset, SImode) == offset)
4285 return 1;
4286 break;
75d38379
JJ
4287 case UNSPEC:
4288 switch (XINT (op1, 1))
4289 {
4290 case UNSPEC_DTPOFF:
4291 case UNSPEC_NTPOFF:
4292 if (offset > 0
4293 && trunc_int_for_mode (offset, SImode) == offset)
4294 return 1;
4295 }
4296 break;
6189a572
JH
4297 default:
4298 return 0;
4299 }
4300 }
4301 return 0;
4302 default:
4303 return 0;
4304 }
4305}
4306
4307/* Return 1 if VALUE can be stored in the zero extended immediate field. */
4308int
4309x86_64_zero_extended_value (value)
4310 rtx value;
4311{
4312 switch (GET_CODE (value))
4313 {
4314 case CONST_DOUBLE:
4315 if (HOST_BITS_PER_WIDE_INT == 32)
4316 return (GET_MODE (value) == VOIDmode
4317 && !CONST_DOUBLE_HIGH (value));
4318 else
4319 return 0;
4320 case CONST_INT:
4321 if (HOST_BITS_PER_WIDE_INT == 32)
4322 return INTVAL (value) >= 0;
4323 else
b531087a 4324 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
6189a572
JH
4325 break;
4326
4327 /* For certain code models, the symbolic references are known to fit. */
4328 case SYMBOL_REF:
4329 return ix86_cmodel == CM_SMALL;
4330
4331 /* For certain code models, the code is near as well. */
4332 case LABEL_REF:
4333 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4334
4335 /* We also may accept the offsetted memory references in certain special
4336 cases. */
4337 case CONST:
4338 if (GET_CODE (XEXP (value, 0)) == PLUS)
4339 {
4340 rtx op1 = XEXP (XEXP (value, 0), 0);
4341 rtx op2 = XEXP (XEXP (value, 0), 1);
4342
4343 if (ix86_cmodel == CM_LARGE)
4344 return 0;
4345 switch (GET_CODE (op1))
4346 {
4347 case SYMBOL_REF:
4348 return 0;
d6a7951f 4349 /* For small code model we may accept pretty large positive
6189a572
JH
4350 offsets, since one bit is available for free. Negative
4351 offsets are limited by the size of NULL pointer area
4352 specified by the ABI. */
4353 if (ix86_cmodel == CM_SMALL
4354 && GET_CODE (op2) == CONST_INT
4355 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4356 && (trunc_int_for_mode (INTVAL (op2), SImode)
4357 == INTVAL (op2)))
4358 return 1;
4359 /* ??? For the kernel, we may accept adjustment of
4360 -0x10000000, since we know that it will just convert
d6a7951f 4361 negative address space to positive, but perhaps this
6189a572
JH
4362 is not worthwhile. */
4363 break;
4364 case LABEL_REF:
4365 /* These conditions are similar to SYMBOL_REF ones, just the
4366 constraints for code models differ. */
4367 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4368 && GET_CODE (op2) == CONST_INT
4369 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4370 && (trunc_int_for_mode (INTVAL (op2), SImode)
4371 == INTVAL (op2)))
4372 return 1;
4373 break;
4374 default:
4375 return 0;
4376 }
4377 }
4378 return 0;
4379 default:
4380 return 0;
4381 }
4382}
6fca22eb
RH
4383
4384/* Value should be nonzero if functions must have frame pointers.
4385 Zero means the frame pointer need not be set up (and parms may
4386 be accessed via the stack pointer) in functions that seem suitable. */
4387
4388int
4389ix86_frame_pointer_required ()
4390{
4391 /* If we accessed previous frames, then the generated code expects
4392 to be able to access the saved ebp value in our frame. */
4393 if (cfun->machine->accesses_prev_frame)
4394 return 1;
a4f31c00 4395
6fca22eb
RH
4396 /* Several x86 os'es need a frame pointer for other reasons,
4397 usually pertaining to setjmp. */
4398 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4399 return 1;
4400
4401 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4402 the frame pointer by default. Turn it back on now if we've not
4403 got a leaf function. */
a7943381 4404 if (TARGET_OMIT_LEAF_FRAME_POINTER
55ba61f3
JH
4405 && (!current_function_is_leaf))
4406 return 1;
4407
4408 if (current_function_profile)
6fca22eb
RH
4409 return 1;
4410
4411 return 0;
4412}
4413
4414/* Record that the current function accesses previous call frames. */
4415
4416void
4417ix86_setup_frame_addresses ()
4418{
4419 cfun->machine->accesses_prev_frame = 1;
4420}
e075ae69 4421\f
145aacc2
RH
4422#if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4423# define USE_HIDDEN_LINKONCE 1
4424#else
4425# define USE_HIDDEN_LINKONCE 0
4426#endif
4427
bd09bdeb 4428static int pic_labels_used;
e9a25f70 4429
145aacc2
RH
4430/* Fills in the label name that should be used for a pc thunk for
4431 the given register. */
4432
4433static void
4434get_pc_thunk_name (name, regno)
4435 char name[32];
4436 unsigned int regno;
4437{
4438 if (USE_HIDDEN_LINKONCE)
4439 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4440 else
4441 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4442}
4443
4444
e075ae69
RH
4445/* This function generates code for -fpic that loads %ebx with
4446 the return address of the caller and then returns. */
4447
4448void
4cf12e7e 4449ix86_asm_file_end (file)
e075ae69 4450 FILE *file;
e075ae69
RH
4451{
4452 rtx xops[2];
bd09bdeb 4453 int regno;
32b5b1aa 4454
bd09bdeb 4455 for (regno = 0; regno < 8; ++regno)
7c262518 4456 {
145aacc2
RH
4457 char name[32];
4458
bd09bdeb
RH
4459 if (! ((pic_labels_used >> regno) & 1))
4460 continue;
4461
145aacc2 4462 get_pc_thunk_name (name, regno);
bd09bdeb 4463
145aacc2
RH
4464 if (USE_HIDDEN_LINKONCE)
4465 {
4466 tree decl;
4467
4468 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4469 error_mark_node);
4470 TREE_PUBLIC (decl) = 1;
4471 TREE_STATIC (decl) = 1;
4472 DECL_ONE_ONLY (decl) = 1;
4473
4474 (*targetm.asm_out.unique_section) (decl, 0);
4475 named_section (decl, NULL, 0);
4476
5eb99654 4477 (*targetm.asm_out.globalize_label) (file, name);
145aacc2
RH
4478 fputs ("\t.hidden\t", file);
4479 assemble_name (file, name);
4480 fputc ('\n', file);
4481 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
4482 }
4483 else
4484 {
4485 text_section ();
4486 ASM_OUTPUT_LABEL (file, name);
4487 }
bd09bdeb
RH
4488
4489 xops[0] = gen_rtx_REG (SImode, regno);
4490 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4491 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4492 output_asm_insn ("ret", xops);
7c262518 4493 }
32b5b1aa 4494}
32b5b1aa 4495
c8c03509 4496/* Emit code for the SET_GOT patterns. */
32b5b1aa 4497
c8c03509
RH
4498const char *
4499output_set_got (dest)
4500 rtx dest;
4501{
4502 rtx xops[3];
0d7d98ee 4503
c8c03509 4504 xops[0] = dest;
5fc0e5df 4505 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
32b5b1aa 4506
c8c03509 4507 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
32b5b1aa 4508 {
c8c03509
RH
4509 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4510
4511 if (!flag_pic)
4512 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4513 else
4514 output_asm_insn ("call\t%a2", xops);
4515
b069de3b
SS
4516#if TARGET_MACHO
4517 /* Output the "canonical" label name ("Lxx$pb") here too. This
4518 is what will be referred to by the Mach-O PIC subsystem. */
4519 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4520#endif
4977bab6 4521 (*targetm.asm_out.internal_label) (asm_out_file, "L",
c8c03509
RH
4522 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4523
4524 if (flag_pic)
4525 output_asm_insn ("pop{l}\t%0", xops);
32b5b1aa 4526 }
e075ae69 4527 else
e5cb57e8 4528 {
145aacc2
RH
4529 char name[32];
4530 get_pc_thunk_name (name, REGNO (dest));
bd09bdeb 4531 pic_labels_used |= 1 << REGNO (dest);
f996902d 4532
145aacc2 4533 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
c8c03509
RH
4534 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4535 output_asm_insn ("call\t%X2", xops);
e5cb57e8 4536 }
e5cb57e8 4537
c8c03509
RH
4538 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4539 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
b069de3b 4540 else if (!TARGET_MACHO)
8e9fadc3 4541 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
79325812 4542
c8c03509 4543 return "";
e9a25f70 4544}
8dfe5673 4545
0d7d98ee 4546/* Generate an "push" pattern for input ARG. */
e9a25f70 4547
e075ae69
RH
4548static rtx
4549gen_push (arg)
4550 rtx arg;
e9a25f70 4551{
c5c76735 4552 return gen_rtx_SET (VOIDmode,
0d7d98ee
JH
4553 gen_rtx_MEM (Pmode,
4554 gen_rtx_PRE_DEC (Pmode,
c5c76735
JL
4555 stack_pointer_rtx)),
4556 arg);
e9a25f70
JL
4557}
4558
bd09bdeb
RH
4559/* Return >= 0 if there is an unused call-clobbered register available
4560 for the entire function. */
4561
4562static unsigned int
4563ix86_select_alt_pic_regnum ()
4564{
4565 if (current_function_is_leaf && !current_function_profile)
4566 {
4567 int i;
4568 for (i = 2; i >= 0; --i)
4569 if (!regs_ever_live[i])
4570 return i;
4571 }
4572
4573 return INVALID_REGNUM;
4574}
fce5a9f2 4575
4dd2ac2c
JH
4576/* Return 1 if we need to save REGNO. */
4577static int
1020a5ab 4578ix86_save_reg (regno, maybe_eh_return)
9b690711 4579 unsigned int regno;
37a58036 4580 int maybe_eh_return;
1020a5ab 4581{
bd09bdeb
RH
4582 if (pic_offset_table_rtx
4583 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4584 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
66edd3b4 4585 || current_function_profile
1020a5ab 4586 || current_function_calls_eh_return))
bd09bdeb
RH
4587 {
4588 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4589 return 0;
4590 return 1;
4591 }
1020a5ab
RH
4592
4593 if (current_function_calls_eh_return && maybe_eh_return)
4594 {
4595 unsigned i;
4596 for (i = 0; ; i++)
4597 {
b531087a 4598 unsigned test = EH_RETURN_DATA_REGNO (i);
1020a5ab
RH
4599 if (test == INVALID_REGNUM)
4600 break;
9b690711 4601 if (test == regno)
1020a5ab
RH
4602 return 1;
4603 }
4604 }
4dd2ac2c 4605
1020a5ab
RH
4606 return (regs_ever_live[regno]
4607 && !call_used_regs[regno]
4608 && !fixed_regs[regno]
4609 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4dd2ac2c
JH
4610}
4611
0903fcab
JH
4612/* Return number of registers to be saved on the stack. */
4613
4614static int
4615ix86_nsaved_regs ()
4616{
4617 int nregs = 0;
0903fcab
JH
4618 int regno;
4619
4dd2ac2c 4620 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 4621 if (ix86_save_reg (regno, true))
4dd2ac2c 4622 nregs++;
0903fcab
JH
4623 return nregs;
4624}
4625
4626/* Return the offset between two registers, one to be eliminated, and the other
4627 its replacement, at the start of a routine. */
4628
4629HOST_WIDE_INT
4630ix86_initial_elimination_offset (from, to)
4631 int from;
4632 int to;
4633{
4dd2ac2c
JH
4634 struct ix86_frame frame;
4635 ix86_compute_frame_layout (&frame);
564d80f4
JH
4636
4637 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 4638 return frame.hard_frame_pointer_offset;
564d80f4
JH
4639 else if (from == FRAME_POINTER_REGNUM
4640 && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 4641 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
4642 else
4643 {
564d80f4
JH
4644 if (to != STACK_POINTER_REGNUM)
4645 abort ();
4646 else if (from == ARG_POINTER_REGNUM)
4dd2ac2c 4647 return frame.stack_pointer_offset;
564d80f4
JH
4648 else if (from != FRAME_POINTER_REGNUM)
4649 abort ();
0903fcab 4650 else
4dd2ac2c 4651 return frame.stack_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
4652 }
4653}
4654
4dd2ac2c 4655/* Fill structure ix86_frame about frame of currently computed function. */
0f290768 4656
4dd2ac2c
JH
4657static void
4658ix86_compute_frame_layout (frame)
4659 struct ix86_frame *frame;
65954bd8 4660{
65954bd8 4661 HOST_WIDE_INT total_size;
564d80f4 4662 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
44affdae
JH
4663 int offset;
4664 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4dd2ac2c 4665 HOST_WIDE_INT size = get_frame_size ();
65954bd8 4666
4dd2ac2c 4667 frame->nregs = ix86_nsaved_regs ();
564d80f4 4668 total_size = size;
65954bd8 4669
9ba81eaa 4670 /* Skip return address and saved base pointer. */
4dd2ac2c
JH
4671 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4672
4673 frame->hard_frame_pointer_offset = offset;
564d80f4 4674
fcbfaa65
RK
4675 /* Do some sanity checking of stack_alignment_needed and
4676 preferred_alignment, since i386 port is the only using those features
f710504c 4677 that may break easily. */
564d80f4 4678
44affdae
JH
4679 if (size && !stack_alignment_needed)
4680 abort ();
44affdae
JH
4681 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4682 abort ();
4683 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4684 abort ();
4685 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4686 abort ();
564d80f4 4687
4dd2ac2c
JH
4688 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4689 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
564d80f4 4690
4dd2ac2c
JH
4691 /* Register save area */
4692 offset += frame->nregs * UNITS_PER_WORD;
65954bd8 4693
8362f420
JH
4694 /* Va-arg area */
4695 if (ix86_save_varrargs_registers)
4696 {
4697 offset += X86_64_VARARGS_SIZE;
4698 frame->va_arg_size = X86_64_VARARGS_SIZE;
4699 }
4700 else
4701 frame->va_arg_size = 0;
4702
4dd2ac2c
JH
4703 /* Align start of frame for local function. */
4704 frame->padding1 = ((offset + stack_alignment_needed - 1)
4705 & -stack_alignment_needed) - offset;
f73ad30e 4706
4dd2ac2c 4707 offset += frame->padding1;
65954bd8 4708
4dd2ac2c
JH
4709 /* Frame pointer points here. */
4710 frame->frame_pointer_offset = offset;
54ff41b7 4711
4dd2ac2c 4712 offset += size;
65954bd8 4713
0b7ae565
RH
4714 /* Add outgoing arguments area. Can be skipped if we eliminated
4715 all the function calls as dead code. */
4716 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4dd2ac2c
JH
4717 {
4718 offset += current_function_outgoing_args_size;
4719 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4720 }
4721 else
4722 frame->outgoing_arguments_size = 0;
564d80f4 4723
002ff5bc
RH
4724 /* Align stack boundary. Only needed if we're calling another function
4725 or using alloca. */
4726 if (!current_function_is_leaf || current_function_calls_alloca)
0b7ae565
RH
4727 frame->padding2 = ((offset + preferred_alignment - 1)
4728 & -preferred_alignment) - offset;
4729 else
4730 frame->padding2 = 0;
4dd2ac2c
JH
4731
4732 offset += frame->padding2;
4733
4734 /* We've reached end of stack frame. */
4735 frame->stack_pointer_offset = offset;
4736
4737 /* Size prologue needs to allocate. */
4738 frame->to_allocate =
4739 (size + frame->padding1 + frame->padding2
8362f420 4740 + frame->outgoing_arguments_size + frame->va_arg_size);
4dd2ac2c 4741
8362f420
JH
4742 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4743 && current_function_is_leaf)
4744 {
4745 frame->red_zone_size = frame->to_allocate;
4746 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4747 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4748 }
4749 else
4750 frame->red_zone_size = 0;
4751 frame->to_allocate -= frame->red_zone_size;
4752 frame->stack_pointer_offset -= frame->red_zone_size;
4dd2ac2c
JH
4753#if 0
4754 fprintf (stderr, "nregs: %i\n", frame->nregs);
4755 fprintf (stderr, "size: %i\n", size);
4756 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4757 fprintf (stderr, "padding1: %i\n", frame->padding1);
8362f420 4758 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4dd2ac2c
JH
4759 fprintf (stderr, "padding2: %i\n", frame->padding2);
4760 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
8362f420 4761 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4dd2ac2c
JH
4762 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4763 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4764 frame->hard_frame_pointer_offset);
4765 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4766#endif
65954bd8
JL
4767}
4768
0903fcab
JH
4769/* Emit code to save registers in the prologue. */
4770
4771static void
4772ix86_emit_save_regs ()
4773{
4774 register int regno;
0903fcab 4775 rtx insn;
0903fcab 4776
4dd2ac2c 4777 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 4778 if (ix86_save_reg (regno, true))
0903fcab 4779 {
0d7d98ee 4780 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
0903fcab
JH
4781 RTX_FRAME_RELATED_P (insn) = 1;
4782 }
4783}
4784
c6036a37
JH
4785/* Emit code to save registers using MOV insns. First register
4786 is restored from POINTER + OFFSET. */
4787static void
4788ix86_emit_save_regs_using_mov (pointer, offset)
b72f00af
RK
4789 rtx pointer;
4790 HOST_WIDE_INT offset;
c6036a37
JH
4791{
4792 int regno;
4793 rtx insn;
4794
4795 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4796 if (ix86_save_reg (regno, true))
4797 {
b72f00af
RK
4798 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4799 Pmode, offset),
c6036a37
JH
4800 gen_rtx_REG (Pmode, regno));
4801 RTX_FRAME_RELATED_P (insn) = 1;
4802 offset += UNITS_PER_WORD;
4803 }
4804}
4805
0f290768 4806/* Expand the prologue into a bunch of separate insns. */
e075ae69
RH
4807
4808void
4809ix86_expand_prologue ()
2a2ab3f9 4810{
564d80f4 4811 rtx insn;
bd09bdeb 4812 bool pic_reg_used;
4dd2ac2c 4813 struct ix86_frame frame;
6ab16dd9 4814 int use_mov = 0;
c6036a37 4815 HOST_WIDE_INT allocate;
4dd2ac2c 4816
4977bab6 4817 ix86_compute_frame_layout (&frame);
2ab0437e 4818 if (!optimize_size)
6ab16dd9 4819 {
4977bab6
ZW
4820 int count = frame.nregs;
4821
4822 /* The fast prologue uses move instead of push to save registers. This
4823 is significantly longer, but also executes faster as modern hardware
4824 can execute the moves in parallel, but can't do that for push/pop.
4825
d1f87653 4826 Be careful about choosing what prologue to emit: When function takes
4977bab6
ZW
4827 many instructions to execute we may use slow version as well as in
4828 case function is known to be outside hot spot (this is known with
4829 feedback only). Weight the size of function by number of registers
4830 to save as it is cheap to use one or two push instructions but very
4831 slow to use many of them. */
4832 if (count)
4833 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4834 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4835 || (flag_branch_probabilities
4836 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4837 use_fast_prologue_epilogue = 0;
4838 else
4839 use_fast_prologue_epilogue = !expensive_function_p (count);
2ab0437e
JH
4840 if (TARGET_PROLOGUE_USING_MOVE)
4841 use_mov = use_fast_prologue_epilogue;
6ab16dd9 4842 }
79325812 4843
e075ae69
RH
4844 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4845 slower on all targets. Also sdb doesn't like it. */
e9a25f70 4846
2a2ab3f9
JVA
4847 if (frame_pointer_needed)
4848 {
564d80f4 4849 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
e075ae69 4850 RTX_FRAME_RELATED_P (insn) = 1;
e9a25f70 4851
564d80f4 4852 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
e075ae69 4853 RTX_FRAME_RELATED_P (insn) = 1;
2a2ab3f9
JVA
4854 }
4855
c6036a37
JH
4856 allocate = frame.to_allocate;
4857 /* In case we are dealing only with single register and empty frame,
4858 push is equivalent of the mov+add sequence. */
4859 if (allocate == 0 && frame.nregs <= 1)
4860 use_mov = 0;
4861
4862 if (!use_mov)
4863 ix86_emit_save_regs ();
4864 else
4865 allocate += frame.nregs * UNITS_PER_WORD;
564d80f4 4866
c6036a37 4867 if (allocate == 0)
8dfe5673 4868 ;
e323735c 4869 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
469ac993 4870 {
f2042df3
RH
4871 insn = emit_insn (gen_pro_epilogue_adjust_stack
4872 (stack_pointer_rtx, stack_pointer_rtx,
e323735c 4873 GEN_INT (-allocate)));
e075ae69 4874 RTX_FRAME_RELATED_P (insn) = 1;
469ac993 4875 }
79325812 4876 else
8dfe5673 4877 {
e075ae69 4878 /* ??? Is this only valid for Win32? */
e9a25f70 4879
e075ae69 4880 rtx arg0, sym;
e9a25f70 4881
8362f420 4882 if (TARGET_64BIT)
b531087a 4883 abort ();
8362f420 4884
e075ae69 4885 arg0 = gen_rtx_REG (SImode, 0);
c6036a37 4886 emit_move_insn (arg0, GEN_INT (allocate));
77a989d1 4887
e075ae69
RH
4888 sym = gen_rtx_MEM (FUNCTION_MODE,
4889 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
32ee7d1d 4890 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
e075ae69
RH
4891
4892 CALL_INSN_FUNCTION_USAGE (insn)
276ab4a4
RH
4893 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4894 CALL_INSN_FUNCTION_USAGE (insn));
98417968
DS
4895
4896 /* Don't allow scheduling pass to move insns across __alloca
4897 call. */
4898 emit_insn (gen_blockage (const0_rtx));
e075ae69 4899 }
c6036a37
JH
4900 if (use_mov)
4901 {
4902 if (!frame_pointer_needed || !frame.to_allocate)
4903 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4904 else
4905 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4906 -frame.nregs * UNITS_PER_WORD);
4907 }
e9a25f70 4908
84530511
SC
4909#ifdef SUBTARGET_PROLOGUE
4910 SUBTARGET_PROLOGUE;
0f290768 4911#endif
84530511 4912
bd09bdeb
RH
4913 pic_reg_used = false;
4914 if (pic_offset_table_rtx
4915 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4916 || current_function_profile))
4917 {
4918 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4919
4920 if (alt_pic_reg_used != INVALID_REGNUM)
4921 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4922
4923 pic_reg_used = true;
4924 }
4925
e9a25f70 4926 if (pic_reg_used)
c8c03509
RH
4927 {
4928 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4929
66edd3b4
RH
4930 /* Even with accurate pre-reload life analysis, we can wind up
4931 deleting all references to the pic register after reload.
4932 Consider if cross-jumping unifies two sides of a branch
d1f87653 4933 controlled by a comparison vs the only read from a global.
66edd3b4
RH
4934 In which case, allow the set_got to be deleted, though we're
4935 too late to do anything about the ebx save in the prologue. */
c8c03509
RH
4936 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4937 }
77a989d1 4938
66edd3b4
RH
4939 /* Prevent function calls from be scheduled before the call to mcount.
4940 In the pic_reg_used case, make sure that the got load isn't deleted. */
4941 if (current_function_profile)
4942 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
77a989d1
SC
4943}
4944
da2d1d3a
JH
4945/* Emit code to restore saved registers using MOV insns. First register
4946 is restored from POINTER + OFFSET. */
4947static void
1020a5ab
RH
4948ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4949 rtx pointer;
4950 int offset;
37a58036 4951 int maybe_eh_return;
da2d1d3a
JH
4952{
4953 int regno;
da2d1d3a 4954
4dd2ac2c 4955 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 4956 if (ix86_save_reg (regno, maybe_eh_return))
da2d1d3a 4957 {
4dd2ac2c 4958 emit_move_insn (gen_rtx_REG (Pmode, regno),
b72f00af
RK
4959 adjust_address (gen_rtx_MEM (Pmode, pointer),
4960 Pmode, offset));
4dd2ac2c 4961 offset += UNITS_PER_WORD;
da2d1d3a
JH
4962 }
4963}
4964
0f290768 4965/* Restore function stack, frame, and registers. */
e9a25f70 4966
2a2ab3f9 4967void
1020a5ab
RH
4968ix86_expand_epilogue (style)
4969 int style;
2a2ab3f9 4970{
1c71e60e 4971 int regno;
fdb8a883 4972 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4dd2ac2c 4973 struct ix86_frame frame;
65954bd8 4974 HOST_WIDE_INT offset;
4dd2ac2c
JH
4975
4976 ix86_compute_frame_layout (&frame);
2a2ab3f9 4977
a4f31c00 4978 /* Calculate start of saved registers relative to ebp. Special care
84e306b4
RH
4979 must be taken for the normal return case of a function using
4980 eh_return: the eax and edx registers are marked as saved, but not
4981 restored along this path. */
4982 offset = frame.nregs;
4983 if (current_function_calls_eh_return && style != 2)
4984 offset -= 2;
4985 offset *= -UNITS_PER_WORD;
2a2ab3f9 4986
fdb8a883
JW
4987 /* If we're only restoring one register and sp is not valid then
4988 using a move instruction to restore the register since it's
0f290768 4989 less work than reloading sp and popping the register.
da2d1d3a
JH
4990
4991 The default code result in stack adjustment using add/lea instruction,
4992 while this code results in LEAVE instruction (or discrete equivalent),
4993 so it is profitable in some other cases as well. Especially when there
4994 are no registers to restore. We also use this code when TARGET_USE_LEAVE
d1f87653 4995 and there is exactly one register to pop. This heuristic may need some
da2d1d3a 4996 tuning in future. */
4dd2ac2c 4997 if ((!sp_valid && frame.nregs <= 1)
2ab0437e 4998 || (TARGET_EPILOGUE_USING_MOVE
6ab16dd9 4999 && use_fast_prologue_epilogue
c6036a37 5000 && (frame.nregs > 1 || frame.to_allocate))
4dd2ac2c 5001 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
2ab0437e 5002 || (frame_pointer_needed && TARGET_USE_LEAVE
6ab16dd9 5003 && use_fast_prologue_epilogue && frame.nregs == 1)
2ab0437e 5004 || current_function_calls_eh_return)
2a2ab3f9 5005 {
da2d1d3a
JH
5006 /* Restore registers. We can use ebp or esp to address the memory
5007 locations. If both are available, default to ebp, since offsets
5008 are known to be small. Only exception is esp pointing directly to the
5009 end of block of saved registers, where we may simplify addressing
5010 mode. */
5011
4dd2ac2c 5012 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
1020a5ab
RH
5013 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5014 frame.to_allocate, style == 2);
da2d1d3a 5015 else
1020a5ab
RH
5016 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5017 offset, style == 2);
5018
5019 /* eh_return epilogues need %ecx added to the stack pointer. */
5020 if (style == 2)
5021 {
5022 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
2a2ab3f9 5023
1020a5ab
RH
5024 if (frame_pointer_needed)
5025 {
5026 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5027 tmp = plus_constant (tmp, UNITS_PER_WORD);
5028 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5029
5030 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5031 emit_move_insn (hard_frame_pointer_rtx, tmp);
5032
5033 emit_insn (gen_pro_epilogue_adjust_stack
f2042df3 5034 (stack_pointer_rtx, sa, const0_rtx));
1020a5ab
RH
5035 }
5036 else
5037 {
5038 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5039 tmp = plus_constant (tmp, (frame.to_allocate
5040 + frame.nregs * UNITS_PER_WORD));
5041 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5042 }
5043 }
5044 else if (!frame_pointer_needed)
f2042df3
RH
5045 emit_insn (gen_pro_epilogue_adjust_stack
5046 (stack_pointer_rtx, stack_pointer_rtx,
5047 GEN_INT (frame.to_allocate
5048 + frame.nregs * UNITS_PER_WORD)));
0f290768 5049 /* If not an i386, mov & pop is faster than "leave". */
6ab16dd9 5050 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
8362f420 5051 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
c8c5cb99 5052 else
2a2ab3f9 5053 {
1c71e60e
JH
5054 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5055 hard_frame_pointer_rtx,
f2042df3 5056 const0_rtx));
8362f420
JH
5057 if (TARGET_64BIT)
5058 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5059 else
5060 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
e9a25f70
JL
5061 }
5062 }
1c71e60e 5063 else
68f654ec 5064 {
1c71e60e
JH
5065 /* First step is to deallocate the stack frame so that we can
5066 pop the registers. */
5067 if (!sp_valid)
5068 {
5069 if (!frame_pointer_needed)
5070 abort ();
5071 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5072 hard_frame_pointer_rtx,
f2042df3 5073 GEN_INT (offset)));
1c71e60e 5074 }
4dd2ac2c 5075 else if (frame.to_allocate)
f2042df3
RH
5076 emit_insn (gen_pro_epilogue_adjust_stack
5077 (stack_pointer_rtx, stack_pointer_rtx,
5078 GEN_INT (frame.to_allocate)));
1c71e60e 5079
4dd2ac2c 5080 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 5081 if (ix86_save_reg (regno, false))
8362f420
JH
5082 {
5083 if (TARGET_64BIT)
5084 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5085 else
5086 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5087 }
4dd2ac2c 5088 if (frame_pointer_needed)
8362f420 5089 {
f5143c46 5090 /* Leave results in shorter dependency chains on CPUs that are
2ab0437e
JH
5091 able to grok it fast. */
5092 if (TARGET_USE_LEAVE)
5093 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5094 else if (TARGET_64BIT)
8362f420
JH
5095 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5096 else
5097 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5098 }
68f654ec 5099 }
68f654ec 5100
cbbf65e0 5101 /* Sibcall epilogues don't want a return instruction. */
1020a5ab 5102 if (style == 0)
cbbf65e0
RH
5103 return;
5104
2a2ab3f9
JVA
5105 if (current_function_pops_args && current_function_args_size)
5106 {
e075ae69 5107 rtx popc = GEN_INT (current_function_pops_args);
2a2ab3f9 5108
b8c752c8
UD
5109 /* i386 can only pop 64K bytes. If asked to pop more, pop
5110 return address, do explicit add, and jump indirectly to the
0f290768 5111 caller. */
2a2ab3f9 5112
b8c752c8 5113 if (current_function_pops_args >= 65536)
2a2ab3f9 5114 {
e075ae69 5115 rtx ecx = gen_rtx_REG (SImode, 2);
e9a25f70 5116
8362f420
JH
5117 /* There are is no "pascal" calling convention in 64bit ABI. */
5118 if (TARGET_64BIT)
b531087a 5119 abort ();
8362f420 5120
e075ae69
RH
5121 emit_insn (gen_popsi1 (ecx));
5122 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
11837777 5123 emit_jump_insn (gen_return_indirect_internal (ecx));
e9a25f70 5124 }
79325812 5125 else
e075ae69
RH
5126 emit_jump_insn (gen_return_pop_internal (popc));
5127 }
5128 else
5129 emit_jump_insn (gen_return_internal ());
5130}
bd09bdeb
RH
5131
5132/* Reset from the function's potential modifications. */
5133
5134static void
5135ix86_output_function_epilogue (file, size)
5136 FILE *file ATTRIBUTE_UNUSED;
5137 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
5138{
5139 if (pic_offset_table_rtx)
5140 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5141}
e075ae69
RH
5142\f
5143/* Extract the parts of an RTL expression that is a valid memory address
b446e5a2
JH
5144 for an instruction. Return 0 if the structure of the address is
5145 grossly off. Return -1 if the address contains ASHIFT, so it is not
5146 strictly valid, but still used for computing length of lea instruction.
5147 */
e075ae69
RH
5148
5149static int
5150ix86_decompose_address (addr, out)
5151 register rtx addr;
5152 struct ix86_address *out;
5153{
5154 rtx base = NULL_RTX;
5155 rtx index = NULL_RTX;
5156 rtx disp = NULL_RTX;
5157 HOST_WIDE_INT scale = 1;
5158 rtx scale_rtx = NULL_RTX;
b446e5a2 5159 int retval = 1;
e075ae69 5160
1540f9eb 5161 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
e075ae69
RH
5162 base = addr;
5163 else if (GET_CODE (addr) == PLUS)
5164 {
5165 rtx op0 = XEXP (addr, 0);
5166 rtx op1 = XEXP (addr, 1);
5167 enum rtx_code code0 = GET_CODE (op0);
5168 enum rtx_code code1 = GET_CODE (op1);
5169
5170 if (code0 == REG || code0 == SUBREG)
5171 {
5172 if (code1 == REG || code1 == SUBREG)
5173 index = op0, base = op1; /* index + base */
5174 else
5175 base = op0, disp = op1; /* base + displacement */
5176 }
5177 else if (code0 == MULT)
e9a25f70 5178 {
e075ae69
RH
5179 index = XEXP (op0, 0);
5180 scale_rtx = XEXP (op0, 1);
5181 if (code1 == REG || code1 == SUBREG)
5182 base = op1; /* index*scale + base */
e9a25f70 5183 else
e075ae69
RH
5184 disp = op1; /* index*scale + disp */
5185 }
5186 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
5187 {
5188 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
5189 scale_rtx = XEXP (XEXP (op0, 0), 1);
5190 base = XEXP (op0, 1);
5191 disp = op1;
2a2ab3f9 5192 }
e075ae69
RH
5193 else if (code0 == PLUS)
5194 {
5195 index = XEXP (op0, 0); /* index + base + disp */
5196 base = XEXP (op0, 1);
5197 disp = op1;
5198 }
5199 else
b446e5a2 5200 return 0;
e075ae69
RH
5201 }
5202 else if (GET_CODE (addr) == MULT)
5203 {
5204 index = XEXP (addr, 0); /* index*scale */
5205 scale_rtx = XEXP (addr, 1);
5206 }
5207 else if (GET_CODE (addr) == ASHIFT)
5208 {
5209 rtx tmp;
5210
5211 /* We're called for lea too, which implements ashift on occasion. */
5212 index = XEXP (addr, 0);
5213 tmp = XEXP (addr, 1);
5214 if (GET_CODE (tmp) != CONST_INT)
b446e5a2 5215 return 0;
e075ae69
RH
5216 scale = INTVAL (tmp);
5217 if ((unsigned HOST_WIDE_INT) scale > 3)
b446e5a2 5218 return 0;
e075ae69 5219 scale = 1 << scale;
b446e5a2 5220 retval = -1;
2a2ab3f9 5221 }
2a2ab3f9 5222 else
e075ae69
RH
5223 disp = addr; /* displacement */
5224
5225 /* Extract the integral value of scale. */
5226 if (scale_rtx)
e9a25f70 5227 {
e075ae69 5228 if (GET_CODE (scale_rtx) != CONST_INT)
b446e5a2 5229 return 0;
e075ae69 5230 scale = INTVAL (scale_rtx);
e9a25f70 5231 }
3b3c6a3f 5232
e075ae69
RH
5233 /* Allow arg pointer and stack pointer as index if there is not scaling */
5234 if (base && index && scale == 1
564d80f4
JH
5235 && (index == arg_pointer_rtx || index == frame_pointer_rtx
5236 || index == stack_pointer_rtx))
e075ae69
RH
5237 {
5238 rtx tmp = base;
5239 base = index;
5240 index = tmp;
5241 }
5242
5243 /* Special case: %ebp cannot be encoded as a base without a displacement. */
564d80f4
JH
5244 if ((base == hard_frame_pointer_rtx
5245 || base == frame_pointer_rtx
5246 || base == arg_pointer_rtx) && !disp)
e075ae69
RH
5247 disp = const0_rtx;
5248
5249 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5250 Avoid this by transforming to [%esi+0]. */
5251 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
5252 && base && !index && !disp
329e1d01 5253 && REG_P (base)
e075ae69
RH
5254 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5255 disp = const0_rtx;
5256
5257 /* Special case: encode reg+reg instead of reg*2. */
5258 if (!base && index && scale && scale == 2)
5259 base = index, scale = 1;
0f290768 5260
e075ae69
RH
5261 /* Special case: scaling cannot be encoded without base or displacement. */
5262 if (!base && !disp && index && scale != 1)
5263 disp = const0_rtx;
5264
5265 out->base = base;
5266 out->index = index;
5267 out->disp = disp;
5268 out->scale = scale;
3b3c6a3f 5269
b446e5a2 5270 return retval;
e075ae69 5271}
01329426
JH
5272\f
5273/* Return cost of the memory address x.
5274 For i386, it is better to use a complex address than let gcc copy
5275 the address into a reg and make a new pseudo. But not if the address
5276 requires to two regs - that would mean more pseudos with longer
5277 lifetimes. */
5278int
5279ix86_address_cost (x)
5280 rtx x;
5281{
5282 struct ix86_address parts;
5283 int cost = 1;
3b3c6a3f 5284
01329426
JH
5285 if (!ix86_decompose_address (x, &parts))
5286 abort ();
5287
1540f9eb
JH
5288 if (parts.base && GET_CODE (parts.base) == SUBREG)
5289 parts.base = SUBREG_REG (parts.base);
5290 if (parts.index && GET_CODE (parts.index) == SUBREG)
5291 parts.index = SUBREG_REG (parts.index);
5292
01329426
JH
5293 /* More complex memory references are better. */
5294 if (parts.disp && parts.disp != const0_rtx)
5295 cost--;
5296
5297 /* Attempt to minimize number of registers in the address. */
5298 if ((parts.base
5299 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5300 || (parts.index
5301 && (!REG_P (parts.index)
5302 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5303 cost++;
5304
5305 if (parts.base
5306 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5307 && parts.index
5308 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5309 && parts.base != parts.index)
5310 cost++;
5311
5312 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5313 since it's predecode logic can't detect the length of instructions
5314 and it degenerates to vector decoded. Increase cost of such
5315 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
0f290768 5316 to split such addresses or even refuse such addresses at all.
01329426
JH
5317
5318 Following addressing modes are affected:
5319 [base+scale*index]
5320 [scale*index+disp]
5321 [base+index]
0f290768 5322
01329426
JH
5323 The first and last case may be avoidable by explicitly coding the zero in
5324 memory address, but I don't have AMD-K6 machine handy to check this
5325 theory. */
5326
5327 if (TARGET_K6
5328 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5329 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5330 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5331 cost += 10;
0f290768 5332
01329426
JH
5333 return cost;
5334}
5335\f
b949ea8b
JW
5336/* If X is a machine specific address (i.e. a symbol or label being
5337 referenced as a displacement from the GOT implemented using an
5338 UNSPEC), then return the base term. Otherwise return X. */
5339
5340rtx
5341ix86_find_base_term (x)
5342 rtx x;
5343{
5344 rtx term;
5345
6eb791fc
JH
5346 if (TARGET_64BIT)
5347 {
5348 if (GET_CODE (x) != CONST)
5349 return x;
5350 term = XEXP (x, 0);
5351 if (GET_CODE (term) == PLUS
5352 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5353 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5354 term = XEXP (term, 0);
5355 if (GET_CODE (term) != UNSPEC
8ee41eaf 5356 || XINT (term, 1) != UNSPEC_GOTPCREL)
6eb791fc
JH
5357 return x;
5358
5359 term = XVECEXP (term, 0, 0);
5360
5361 if (GET_CODE (term) != SYMBOL_REF
5362 && GET_CODE (term) != LABEL_REF)
5363 return x;
5364
5365 return term;
5366 }
5367
b949ea8b
JW
5368 if (GET_CODE (x) != PLUS
5369 || XEXP (x, 0) != pic_offset_table_rtx
5370 || GET_CODE (XEXP (x, 1)) != CONST)
5371 return x;
5372
5373 term = XEXP (XEXP (x, 1), 0);
5374
5375 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
5376 term = XEXP (term, 0);
5377
5378 if (GET_CODE (term) != UNSPEC
8ee41eaf 5379 || XINT (term, 1) != UNSPEC_GOTOFF)
b949ea8b
JW
5380 return x;
5381
5382 term = XVECEXP (term, 0, 0);
5383
5384 if (GET_CODE (term) != SYMBOL_REF
5385 && GET_CODE (term) != LABEL_REF)
5386 return x;
5387
5388 return term;
5389}
5390\f
f996902d
RH
5391/* Determine if a given RTX is a valid constant. We already know this
5392 satisfies CONSTANT_P. */
5393
5394bool
5395legitimate_constant_p (x)
5396 rtx x;
5397{
5398 rtx inner;
5399
5400 switch (GET_CODE (x))
5401 {
5402 case SYMBOL_REF:
5403 /* TLS symbols are not constant. */
5404 if (tls_symbolic_operand (x, Pmode))
5405 return false;
5406 break;
5407
5408 case CONST:
5409 inner = XEXP (x, 0);
5410
5411 /* Offsets of TLS symbols are never valid.
5412 Discourage CSE from creating them. */
5413 if (GET_CODE (inner) == PLUS
5414 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5415 return false;
5416
5417 /* Only some unspecs are valid as "constants". */
5418 if (GET_CODE (inner) == UNSPEC)
5419 switch (XINT (inner, 1))
5420 {
5421 case UNSPEC_TPOFF:
5422 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
f996902d
RH
5423 default:
5424 return false;
5425 }
5426 break;
5427
5428 default:
5429 break;
5430 }
5431
5432 /* Otherwise we handle everything else in the move patterns. */
5433 return true;
5434}
5435
3a04ff64
RH
5436/* Determine if it's legal to put X into the constant pool. This
5437 is not possible for the address of thread-local symbols, which
5438 is checked above. */
5439
5440static bool
5441ix86_cannot_force_const_mem (x)
5442 rtx x;
5443{
5444 return !legitimate_constant_p (x);
5445}
5446
f996902d
RH
5447/* Determine if a given RTX is a valid constant address. */
5448
5449bool
5450constant_address_p (x)
5451 rtx x;
5452{
5453 switch (GET_CODE (x))
5454 {
5455 case LABEL_REF:
5456 case CONST_INT:
5457 return true;
5458
5459 case CONST_DOUBLE:
5460 return TARGET_64BIT;
5461
5462 case CONST:
b069de3b
SS
5463 /* For Mach-O, really believe the CONST. */
5464 if (TARGET_MACHO)
5465 return true;
5466 /* Otherwise fall through. */
f996902d
RH
5467 case SYMBOL_REF:
5468 return !flag_pic && legitimate_constant_p (x);
5469
5470 default:
5471 return false;
5472 }
5473}
5474
5475/* Nonzero if the constant value X is a legitimate general operand
fce5a9f2 5476 when generating PIC code. It is given that flag_pic is on and
f996902d
RH
5477 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5478
5479bool
5480legitimate_pic_operand_p (x)
5481 rtx x;
5482{
5483 rtx inner;
5484
5485 switch (GET_CODE (x))
5486 {
5487 case CONST:
5488 inner = XEXP (x, 0);
5489
5490 /* Only some unspecs are valid as "constants". */
5491 if (GET_CODE (inner) == UNSPEC)
5492 switch (XINT (inner, 1))
5493 {
5494 case UNSPEC_TPOFF:
5495 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
f996902d
RH
5496 default:
5497 return false;
5498 }
5499 /* FALLTHRU */
5500
5501 case SYMBOL_REF:
5502 case LABEL_REF:
5503 return legitimate_pic_address_disp_p (x);
5504
5505 default:
5506 return true;
5507 }
5508}
5509
e075ae69
RH
5510/* Determine if a given CONST RTX is a valid memory displacement
5511 in PIC mode. */
0f290768 5512
59be65f6 5513int
91bb873f
RH
5514legitimate_pic_address_disp_p (disp)
5515 register rtx disp;
5516{
f996902d
RH
5517 bool saw_plus;
5518
6eb791fc
JH
5519 /* In 64bit mode we can allow direct addresses of symbols and labels
5520 when they are not dynamic symbols. */
c05dbe81
JH
5521 if (TARGET_64BIT)
5522 {
5523 /* TLS references should always be enclosed in UNSPEC. */
5524 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5525 return 0;
5526 if (GET_CODE (disp) == SYMBOL_REF
5527 && ix86_cmodel == CM_SMALL_PIC
5528 && (CONSTANT_POOL_ADDRESS_P (disp)
5529 || SYMBOL_REF_FLAG (disp)))
5530 return 1;
5531 if (GET_CODE (disp) == LABEL_REF)
5532 return 1;
5533 if (GET_CODE (disp) == CONST
5534 && GET_CODE (XEXP (disp, 0)) == PLUS
5535 && ((GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
5536 && ix86_cmodel == CM_SMALL_PIC
5537 && (CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (disp, 0), 0))
5538 || SYMBOL_REF_FLAG (XEXP (XEXP (disp, 0), 0))))
5539 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
5540 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT
5541 && INTVAL (XEXP (XEXP (disp, 0), 1)) < 16*1024*1024
5542 && INTVAL (XEXP (XEXP (disp, 0), 1)) >= -16*1024*1024)
5543 return 1;
5544 }
91bb873f
RH
5545 if (GET_CODE (disp) != CONST)
5546 return 0;
5547 disp = XEXP (disp, 0);
5548
6eb791fc
JH
5549 if (TARGET_64BIT)
5550 {
5551 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5552 of GOT tables. We should not need these anyway. */
5553 if (GET_CODE (disp) != UNSPEC
8ee41eaf 5554 || XINT (disp, 1) != UNSPEC_GOTPCREL)
6eb791fc
JH
5555 return 0;
5556
5557 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5558 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5559 return 0;
5560 return 1;
5561 }
5562
f996902d 5563 saw_plus = false;
91bb873f
RH
5564 if (GET_CODE (disp) == PLUS)
5565 {
5566 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5567 return 0;
5568 disp = XEXP (disp, 0);
f996902d 5569 saw_plus = true;
91bb873f
RH
5570 }
5571
b069de3b
SS
5572 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5573 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5574 {
5575 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5576 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5577 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5578 {
5579 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5580 if (strstr (sym_name, "$pb") != 0)
5581 return 1;
5582 }
5583 }
5584
8ee41eaf 5585 if (GET_CODE (disp) != UNSPEC)
91bb873f
RH
5586 return 0;
5587
623fe810
RH
5588 switch (XINT (disp, 1))
5589 {
8ee41eaf 5590 case UNSPEC_GOT:
f996902d
RH
5591 if (saw_plus)
5592 return false;
623fe810 5593 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
8ee41eaf 5594 case UNSPEC_GOTOFF:
623fe810 5595 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
f996902d 5596 case UNSPEC_GOTTPOFF:
dea73790
JJ
5597 case UNSPEC_GOTNTPOFF:
5598 case UNSPEC_INDNTPOFF:
f996902d
RH
5599 if (saw_plus)
5600 return false;
5601 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5602 case UNSPEC_NTPOFF:
f996902d
RH
5603 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5604 case UNSPEC_DTPOFF:
f996902d 5605 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
623fe810 5606 }
fce5a9f2 5607
623fe810 5608 return 0;
91bb873f
RH
5609}
5610
e075ae69
RH
5611/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5612 memory address for an instruction. The MODE argument is the machine mode
5613 for the MEM expression that wants to use this address.
5614
5615 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5616 convert common non-canonical forms to canonical form so that they will
5617 be recognized. */
5618
3b3c6a3f
MM
5619int
5620legitimate_address_p (mode, addr, strict)
5621 enum machine_mode mode;
5622 register rtx addr;
5623 int strict;
5624{
e075ae69
RH
5625 struct ix86_address parts;
5626 rtx base, index, disp;
5627 HOST_WIDE_INT scale;
5628 const char *reason = NULL;
5629 rtx reason_rtx = NULL_RTX;
3b3c6a3f
MM
5630
5631 if (TARGET_DEBUG_ADDR)
5632 {
5633 fprintf (stderr,
e9a25f70 5634 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
3b3c6a3f 5635 GET_MODE_NAME (mode), strict);
3b3c6a3f
MM
5636 debug_rtx (addr);
5637 }
5638
9e20be0c
JJ
5639 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
5640 {
5641 if (TARGET_DEBUG_ADDR)
5642 fprintf (stderr, "Success.\n");
5643 return TRUE;
5644 }
5645
b446e5a2 5646 if (ix86_decompose_address (addr, &parts) <= 0)
3b3c6a3f 5647 {
e075ae69 5648 reason = "decomposition failed";
50e60bc3 5649 goto report_error;
3b3c6a3f
MM
5650 }
5651
e075ae69
RH
5652 base = parts.base;
5653 index = parts.index;
5654 disp = parts.disp;
5655 scale = parts.scale;
91f0226f 5656
e075ae69 5657 /* Validate base register.
e9a25f70
JL
5658
5659 Don't allow SUBREG's here, it can lead to spill failures when the base
3d771dfd
MM
5660 is one word out of a two word structure, which is represented internally
5661 as a DImode int. */
e9a25f70 5662
3b3c6a3f
MM
5663 if (base)
5664 {
1540f9eb 5665 rtx reg;
e075ae69
RH
5666 reason_rtx = base;
5667
1540f9eb
JH
5668 if (GET_CODE (base) == SUBREG)
5669 reg = SUBREG_REG (base);
5670 else
5671 reg = base;
5672
5673 if (GET_CODE (reg) != REG)
3b3c6a3f 5674 {
e075ae69 5675 reason = "base is not a register";
50e60bc3 5676 goto report_error;
3b3c6a3f
MM
5677 }
5678
c954bd01
RH
5679 if (GET_MODE (base) != Pmode)
5680 {
e075ae69 5681 reason = "base is not in Pmode";
50e60bc3 5682 goto report_error;
c954bd01
RH
5683 }
5684
1540f9eb
JH
5685 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5686 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
3b3c6a3f 5687 {
e075ae69 5688 reason = "base is not valid";
50e60bc3 5689 goto report_error;
3b3c6a3f
MM
5690 }
5691 }
5692
e075ae69 5693 /* Validate index register.
e9a25f70
JL
5694
5695 Don't allow SUBREG's here, it can lead to spill failures when the index
3d771dfd
MM
5696 is one word out of a two word structure, which is represented internally
5697 as a DImode int. */
e075ae69
RH
5698
5699 if (index)
3b3c6a3f 5700 {
1540f9eb 5701 rtx reg;
e075ae69
RH
5702 reason_rtx = index;
5703
1540f9eb
JH
5704 if (GET_CODE (index) == SUBREG)
5705 reg = SUBREG_REG (index);
5706 else
5707 reg = index;
5708
5709 if (GET_CODE (reg) != REG)
3b3c6a3f 5710 {
e075ae69 5711 reason = "index is not a register";
50e60bc3 5712 goto report_error;
3b3c6a3f
MM
5713 }
5714
e075ae69 5715 if (GET_MODE (index) != Pmode)
c954bd01 5716 {
e075ae69 5717 reason = "index is not in Pmode";
50e60bc3 5718 goto report_error;
c954bd01
RH
5719 }
5720
1540f9eb
JH
5721 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5722 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
3b3c6a3f 5723 {
e075ae69 5724 reason = "index is not valid";
50e60bc3 5725 goto report_error;
3b3c6a3f
MM
5726 }
5727 }
3b3c6a3f 5728
e075ae69
RH
5729 /* Validate scale factor. */
5730 if (scale != 1)
3b3c6a3f 5731 {
e075ae69
RH
5732 reason_rtx = GEN_INT (scale);
5733 if (!index)
3b3c6a3f 5734 {
e075ae69 5735 reason = "scale without index";
50e60bc3 5736 goto report_error;
3b3c6a3f
MM
5737 }
5738
e075ae69 5739 if (scale != 2 && scale != 4 && scale != 8)
3b3c6a3f 5740 {
e075ae69 5741 reason = "scale is not a valid multiplier";
50e60bc3 5742 goto report_error;
3b3c6a3f
MM
5743 }
5744 }
5745
91bb873f 5746 /* Validate displacement. */
3b3c6a3f
MM
5747 if (disp)
5748 {
e075ae69
RH
5749 reason_rtx = disp;
5750
f996902d
RH
5751 if (GET_CODE (disp) == CONST
5752 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5753 switch (XINT (XEXP (disp, 0), 1))
5754 {
5755 case UNSPEC_GOT:
5756 case UNSPEC_GOTOFF:
5757 case UNSPEC_GOTPCREL:
5758 if (!flag_pic)
5759 abort ();
5760 goto is_legitimate_pic;
5761
5762 case UNSPEC_GOTTPOFF:
dea73790
JJ
5763 case UNSPEC_GOTNTPOFF:
5764 case UNSPEC_INDNTPOFF:
f996902d
RH
5765 case UNSPEC_NTPOFF:
5766 case UNSPEC_DTPOFF:
5767 break;
5768
5769 default:
5770 reason = "invalid address unspec";
5771 goto report_error;
5772 }
5773
b069de3b
SS
5774 else if (flag_pic && (SYMBOLIC_CONST (disp)
5775#if TARGET_MACHO
5776 && !machopic_operand_p (disp)
5777#endif
5778 ))
3b3c6a3f 5779 {
f996902d 5780 is_legitimate_pic:
0d7d98ee
JH
5781 if (TARGET_64BIT && (index || base))
5782 {
75d38379
JJ
5783 /* foo@dtpoff(%rX) is ok. */
5784 if (GET_CODE (disp) != CONST
5785 || GET_CODE (XEXP (disp, 0)) != PLUS
5786 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5787 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5788 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5789 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5790 {
5791 reason = "non-constant pic memory reference";
5792 goto report_error;
5793 }
0d7d98ee 5794 }
75d38379 5795 else if (! legitimate_pic_address_disp_p (disp))
91bb873f 5796 {
e075ae69 5797 reason = "displacement is an invalid pic construct";
50e60bc3 5798 goto report_error;
91bb873f
RH
5799 }
5800
4e9efe54 5801 /* This code used to verify that a symbolic pic displacement
0f290768
KH
5802 includes the pic_offset_table_rtx register.
5803
4e9efe54
JH
5804 While this is good idea, unfortunately these constructs may
5805 be created by "adds using lea" optimization for incorrect
5806 code like:
5807
5808 int a;
5809 int foo(int i)
5810 {
5811 return *(&a+i);
5812 }
5813
50e60bc3 5814 This code is nonsensical, but results in addressing
4e9efe54 5815 GOT table with pic_offset_table_rtx base. We can't
f710504c 5816 just refuse it easily, since it gets matched by
4e9efe54
JH
5817 "addsi3" pattern, that later gets split to lea in the
5818 case output register differs from input. While this
5819 can be handled by separate addsi pattern for this case
5820 that never results in lea, this seems to be easier and
5821 correct fix for crash to disable this test. */
3b3c6a3f 5822 }
f996902d
RH
5823 else if (!CONSTANT_ADDRESS_P (disp))
5824 {
5825 reason = "displacement is not constant";
5826 goto report_error;
5827 }
c05dbe81
JH
5828 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
5829 {
5830 reason = "displacement is out of range";
5831 goto report_error;
5832 }
5833 else if (!TARGET_64BIT && GET_CODE (disp) == CONST_DOUBLE)
5834 {
5835 reason = "displacement is a const_double";
5836 goto report_error;
5837 }
3b3c6a3f
MM
5838 }
5839
e075ae69 5840 /* Everything looks valid. */
3b3c6a3f 5841 if (TARGET_DEBUG_ADDR)
e075ae69 5842 fprintf (stderr, "Success.\n");
3b3c6a3f 5843 return TRUE;
e075ae69 5844
5bf0ebab 5845 report_error:
e075ae69
RH
5846 if (TARGET_DEBUG_ADDR)
5847 {
5848 fprintf (stderr, "Error: %s\n", reason);
5849 debug_rtx (reason_rtx);
5850 }
5851 return FALSE;
3b3c6a3f 5852}
3b3c6a3f 5853\f
55efb413
JW
5854/* Return an unique alias set for the GOT. */
5855
0f290768 5856static HOST_WIDE_INT
55efb413
JW
5857ix86_GOT_alias_set ()
5858{
5bf0ebab
RH
5859 static HOST_WIDE_INT set = -1;
5860 if (set == -1)
5861 set = new_alias_set ();
5862 return set;
0f290768 5863}
55efb413 5864
3b3c6a3f
MM
5865/* Return a legitimate reference for ORIG (an address) using the
5866 register REG. If REG is 0, a new pseudo is generated.
5867
91bb873f 5868 There are two types of references that must be handled:
3b3c6a3f
MM
5869
5870 1. Global data references must load the address from the GOT, via
5871 the PIC reg. An insn is emitted to do this load, and the reg is
5872 returned.
5873
91bb873f
RH
5874 2. Static data references, constant pool addresses, and code labels
5875 compute the address as an offset from the GOT, whose base is in
5876 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5877 differentiate them from global data objects. The returned
5878 address is the PIC reg + an unspec constant.
3b3c6a3f
MM
5879
5880 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
91bb873f 5881 reg also appears in the address. */
3b3c6a3f
MM
5882
5883rtx
5884legitimize_pic_address (orig, reg)
5885 rtx orig;
5886 rtx reg;
5887{
5888 rtx addr = orig;
5889 rtx new = orig;
91bb873f 5890 rtx base;
3b3c6a3f 5891
b069de3b
SS
5892#if TARGET_MACHO
5893 if (reg == 0)
5894 reg = gen_reg_rtx (Pmode);
5895 /* Use the generic Mach-O PIC machinery. */
5896 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5897#endif
5898
c05dbe81
JH
5899 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
5900 new = addr;
5901 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
3b3c6a3f 5902 {
c05dbe81
JH
5903 /* This symbol may be referenced via a displacement from the PIC
5904 base address (@GOTOFF). */
3b3c6a3f 5905
c05dbe81
JH
5906 if (reload_in_progress)
5907 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5908 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5909 new = gen_rtx_CONST (Pmode, new);
5910 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3b3c6a3f 5911
c05dbe81
JH
5912 if (reg != 0)
5913 {
5914 emit_move_insn (reg, new);
5915 new = reg;
5916 }
3b3c6a3f 5917 }
91bb873f 5918 else if (GET_CODE (addr) == SYMBOL_REF)
3b3c6a3f 5919 {
14f73b5a
JH
5920 if (TARGET_64BIT)
5921 {
8ee41eaf 5922 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
14f73b5a
JH
5923 new = gen_rtx_CONST (Pmode, new);
5924 new = gen_rtx_MEM (Pmode, new);
5925 RTX_UNCHANGING_P (new) = 1;
5926 set_mem_alias_set (new, ix86_GOT_alias_set ());
5927
5928 if (reg == 0)
5929 reg = gen_reg_rtx (Pmode);
5930 /* Use directly gen_movsi, otherwise the address is loaded
5931 into register for CSE. We don't want to CSE this addresses,
5932 instead we CSE addresses from the GOT table, so skip this. */
5933 emit_insn (gen_movsi (reg, new));
5934 new = reg;
5935 }
5936 else
5937 {
5938 /* This symbol must be referenced via a load from the
5939 Global Offset Table (@GOT). */
3b3c6a3f 5940
66edd3b4
RH
5941 if (reload_in_progress)
5942 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf 5943 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
14f73b5a
JH
5944 new = gen_rtx_CONST (Pmode, new);
5945 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5946 new = gen_rtx_MEM (Pmode, new);
5947 RTX_UNCHANGING_P (new) = 1;
5948 set_mem_alias_set (new, ix86_GOT_alias_set ());
3b3c6a3f 5949
14f73b5a
JH
5950 if (reg == 0)
5951 reg = gen_reg_rtx (Pmode);
5952 emit_move_insn (reg, new);
5953 new = reg;
5954 }
0f290768 5955 }
91bb873f
RH
5956 else
5957 {
5958 if (GET_CODE (addr) == CONST)
3b3c6a3f 5959 {
91bb873f 5960 addr = XEXP (addr, 0);
e3c8ea67
RH
5961
5962 /* We must match stuff we generate before. Assume the only
5963 unspecs that can get here are ours. Not that we could do
5964 anything with them anyway... */
5965 if (GET_CODE (addr) == UNSPEC
5966 || (GET_CODE (addr) == PLUS
5967 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5968 return orig;
5969 if (GET_CODE (addr) != PLUS)
564d80f4 5970 abort ();
3b3c6a3f 5971 }
91bb873f
RH
5972 if (GET_CODE (addr) == PLUS)
5973 {
5974 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
e9a25f70 5975
91bb873f
RH
5976 /* Check first to see if this is a constant offset from a @GOTOFF
5977 symbol reference. */
623fe810 5978 if (local_symbolic_operand (op0, Pmode)
91bb873f
RH
5979 && GET_CODE (op1) == CONST_INT)
5980 {
6eb791fc
JH
5981 if (!TARGET_64BIT)
5982 {
66edd3b4
RH
5983 if (reload_in_progress)
5984 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf
RH
5985 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5986 UNSPEC_GOTOFF);
6eb791fc
JH
5987 new = gen_rtx_PLUS (Pmode, new, op1);
5988 new = gen_rtx_CONST (Pmode, new);
5989 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
91bb873f 5990
6eb791fc
JH
5991 if (reg != 0)
5992 {
5993 emit_move_insn (reg, new);
5994 new = reg;
5995 }
5996 }
5997 else
91bb873f 5998 {
75d38379
JJ
5999 if (INTVAL (op1) < -16*1024*1024
6000 || INTVAL (op1) >= 16*1024*1024)
6001 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
91bb873f
RH
6002 }
6003 }
6004 else
6005 {
6006 base = legitimize_pic_address (XEXP (addr, 0), reg);
6007 new = legitimize_pic_address (XEXP (addr, 1),
6008 base == reg ? NULL_RTX : reg);
6009
6010 if (GET_CODE (new) == CONST_INT)
6011 new = plus_constant (base, INTVAL (new));
6012 else
6013 {
6014 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6015 {
6016 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6017 new = XEXP (new, 1);
6018 }
6019 new = gen_rtx_PLUS (Pmode, base, new);
6020 }
6021 }
6022 }
3b3c6a3f
MM
6023 }
6024 return new;
6025}
fb49053f 6026
fb49053f 6027static void
f996902d 6028ix86_encode_section_info (decl, first)
fb49053f
RH
6029 tree decl;
6030 int first ATTRIBUTE_UNUSED;
6031{
f996902d
RH
6032 bool local_p = (*targetm.binds_local_p) (decl);
6033 rtx rtl, symbol;
6034
6035 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
6036 if (GET_CODE (rtl) != MEM)
6037 return;
6038 symbol = XEXP (rtl, 0);
6039 if (GET_CODE (symbol) != SYMBOL_REF)
6040 return;
6041
6042 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
6043 symbol so that we may access it directly in the GOT. */
6044
fb49053f 6045 if (flag_pic)
f996902d
RH
6046 SYMBOL_REF_FLAG (symbol) = local_p;
6047
6048 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
6049 "local dynamic", "initial exec" or "local exec" TLS models
6050 respectively. */
6051
6052 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
fb49053f 6053 {
f996902d
RH
6054 const char *symbol_str;
6055 char *newstr;
6056 size_t len;
dce81a1a 6057 enum tls_model kind = decl_tls_model (decl);
f996902d 6058
75d38379
JJ
6059 if (TARGET_64BIT && ! flag_pic)
6060 {
6061 /* x86-64 doesn't allow non-pic code for shared libraries,
6062 so don't generate GD/LD TLS models for non-pic code. */
6063 switch (kind)
6064 {
6065 case TLS_MODEL_GLOBAL_DYNAMIC:
6066 kind = TLS_MODEL_INITIAL_EXEC; break;
6067 case TLS_MODEL_LOCAL_DYNAMIC:
6068 kind = TLS_MODEL_LOCAL_EXEC; break;
6069 default:
6070 break;
6071 }
6072 }
6073
f996902d 6074 symbol_str = XSTR (symbol, 0);
fb49053f 6075
f996902d
RH
6076 if (symbol_str[0] == '%')
6077 {
6078 if (symbol_str[1] == tls_model_chars[kind])
6079 return;
6080 symbol_str += 2;
6081 }
6082 len = strlen (symbol_str) + 1;
6083 newstr = alloca (len + 2);
6084
6085 newstr[0] = '%';
6086 newstr[1] = tls_model_chars[kind];
6087 memcpy (newstr + 2, symbol_str, len);
6088
6089 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
fb49053f
RH
6090 }
6091}
f996902d
RH
6092
6093/* Undo the above when printing symbol names. */
6094
6095static const char *
6096ix86_strip_name_encoding (str)
6097 const char *str;
6098{
6099 if (str[0] == '%')
6100 str += 2;
6101 if (str [0] == '*')
6102 str += 1;
6103 return str;
6104}
3b3c6a3f 6105\f
f996902d
RH
6106/* Load the thread pointer into a register. */
6107
6108static rtx
6109get_thread_pointer ()
6110{
6111 rtx tp;
6112
6113 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
9e20be0c
JJ
6114 tp = gen_rtx_MEM (Pmode, tp);
6115 RTX_UNCHANGING_P (tp) = 1;
6116 set_mem_alias_set (tp, ix86_GOT_alias_set ());
f996902d
RH
6117 tp = force_reg (Pmode, tp);
6118
6119 return tp;
6120}
fce5a9f2 6121
3b3c6a3f
MM
6122/* Try machine-dependent ways of modifying an illegitimate address
6123 to be legitimate. If we find one, return the new, valid address.
6124 This macro is used in only one place: `memory_address' in explow.c.
6125
6126 OLDX is the address as it was before break_out_memory_refs was called.
6127 In some cases it is useful to look at this to decide what needs to be done.
6128
6129 MODE and WIN are passed so that this macro can use
6130 GO_IF_LEGITIMATE_ADDRESS.
6131
6132 It is always safe for this macro to do nothing. It exists to recognize
6133 opportunities to optimize the output.
6134
6135 For the 80386, we handle X+REG by loading X into a register R and
6136 using R+REG. R will go in a general reg and indexing will be used.
6137 However, if REG is a broken-out memory address or multiplication,
6138 nothing needs to be done because REG can certainly go in a general reg.
6139
6140 When -fpic is used, special handling is needed for symbolic references.
6141 See comments by legitimize_pic_address in i386.c for details. */
6142
6143rtx
6144legitimize_address (x, oldx, mode)
6145 register rtx x;
bb5177ac 6146 register rtx oldx ATTRIBUTE_UNUSED;
3b3c6a3f
MM
6147 enum machine_mode mode;
6148{
6149 int changed = 0;
6150 unsigned log;
6151
6152 if (TARGET_DEBUG_ADDR)
6153 {
e9a25f70
JL
6154 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6155 GET_MODE_NAME (mode));
3b3c6a3f
MM
6156 debug_rtx (x);
6157 }
6158
f996902d
RH
6159 log = tls_symbolic_operand (x, mode);
6160 if (log)
6161 {
6162 rtx dest, base, off, pic;
75d38379 6163 int type;
f996902d 6164
755ac5d4 6165 switch (log)
f996902d
RH
6166 {
6167 case TLS_MODEL_GLOBAL_DYNAMIC:
6168 dest = gen_reg_rtx (Pmode);
75d38379
JJ
6169 if (TARGET_64BIT)
6170 {
6171 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6172
6173 start_sequence ();
6174 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6175 insns = get_insns ();
6176 end_sequence ();
6177
6178 emit_libcall_block (insns, dest, rax, x);
6179 }
6180 else
6181 emit_insn (gen_tls_global_dynamic_32 (dest, x));
f996902d
RH
6182 break;
6183
6184 case TLS_MODEL_LOCAL_DYNAMIC:
6185 base = gen_reg_rtx (Pmode);
75d38379
JJ
6186 if (TARGET_64BIT)
6187 {
6188 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6189
6190 start_sequence ();
6191 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6192 insns = get_insns ();
6193 end_sequence ();
6194
6195 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6196 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6197 emit_libcall_block (insns, base, rax, note);
6198 }
6199 else
6200 emit_insn (gen_tls_local_dynamic_base_32 (base));
f996902d
RH
6201
6202 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6203 off = gen_rtx_CONST (Pmode, off);
6204
6205 return gen_rtx_PLUS (Pmode, base, off);
6206
6207 case TLS_MODEL_INITIAL_EXEC:
75d38379
JJ
6208 if (TARGET_64BIT)
6209 {
6210 pic = NULL;
6211 type = UNSPEC_GOTNTPOFF;
6212 }
6213 else if (flag_pic)
f996902d 6214 {
66edd3b4
RH
6215 if (reload_in_progress)
6216 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
f996902d 6217 pic = pic_offset_table_rtx;
75d38379 6218 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
f996902d 6219 }
dea73790 6220 else if (!TARGET_GNU_TLS)
f996902d
RH
6221 {
6222 pic = gen_reg_rtx (Pmode);
6223 emit_insn (gen_set_got (pic));
75d38379 6224 type = UNSPEC_GOTTPOFF;
f996902d 6225 }
dea73790 6226 else
75d38379
JJ
6227 {
6228 pic = NULL;
6229 type = UNSPEC_INDNTPOFF;
6230 }
f996902d
RH
6231
6232 base = get_thread_pointer ();
6233
75d38379 6234 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
f996902d 6235 off = gen_rtx_CONST (Pmode, off);
75d38379 6236 if (pic)
dea73790 6237 off = gen_rtx_PLUS (Pmode, pic, off);
f996902d
RH
6238 off = gen_rtx_MEM (Pmode, off);
6239 RTX_UNCHANGING_P (off) = 1;
6240 set_mem_alias_set (off, ix86_GOT_alias_set ());
f996902d 6241 dest = gen_reg_rtx (Pmode);
dea73790 6242
75d38379 6243 if (TARGET_64BIT || TARGET_GNU_TLS)
dea73790
JJ
6244 {
6245 emit_move_insn (dest, off);
6246 return gen_rtx_PLUS (Pmode, base, dest);
6247 }
6248 else
6249 emit_insn (gen_subsi3 (dest, base, off));
f996902d
RH
6250 break;
6251
6252 case TLS_MODEL_LOCAL_EXEC:
6253 base = get_thread_pointer ();
6254
6255 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
75d38379
JJ
6256 (TARGET_64BIT || TARGET_GNU_TLS)
6257 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
f996902d
RH
6258 off = gen_rtx_CONST (Pmode, off);
6259
75d38379 6260 if (TARGET_64BIT || TARGET_GNU_TLS)
f996902d
RH
6261 return gen_rtx_PLUS (Pmode, base, off);
6262 else
6263 {
6264 dest = gen_reg_rtx (Pmode);
6265 emit_insn (gen_subsi3 (dest, base, off));
6266 }
6267 break;
6268
6269 default:
6270 abort ();
6271 }
6272
6273 return dest;
6274 }
6275
3b3c6a3f
MM
6276 if (flag_pic && SYMBOLIC_CONST (x))
6277 return legitimize_pic_address (x, 0);
6278
6279 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6280 if (GET_CODE (x) == ASHIFT
6281 && GET_CODE (XEXP (x, 1)) == CONST_INT
b531087a 6282 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
3b3c6a3f
MM
6283 {
6284 changed = 1;
a269a03c
JC
6285 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6286 GEN_INT (1 << log));
3b3c6a3f
MM
6287 }
6288
6289 if (GET_CODE (x) == PLUS)
6290 {
0f290768 6291 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
e9a25f70 6292
3b3c6a3f
MM
6293 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6294 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
b531087a 6295 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
3b3c6a3f
MM
6296 {
6297 changed = 1;
c5c76735
JL
6298 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6299 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6300 GEN_INT (1 << log));
3b3c6a3f
MM
6301 }
6302
6303 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6304 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
b531087a 6305 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
3b3c6a3f
MM
6306 {
6307 changed = 1;
c5c76735
JL
6308 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6309 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6310 GEN_INT (1 << log));
3b3c6a3f
MM
6311 }
6312
0f290768 6313 /* Put multiply first if it isn't already. */
3b3c6a3f
MM
6314 if (GET_CODE (XEXP (x, 1)) == MULT)
6315 {
6316 rtx tmp = XEXP (x, 0);
6317 XEXP (x, 0) = XEXP (x, 1);
6318 XEXP (x, 1) = tmp;
6319 changed = 1;
6320 }
6321
6322 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6323 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6324 created by virtual register instantiation, register elimination, and
6325 similar optimizations. */
6326 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6327 {
6328 changed = 1;
c5c76735
JL
6329 x = gen_rtx_PLUS (Pmode,
6330 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6331 XEXP (XEXP (x, 1), 0)),
6332 XEXP (XEXP (x, 1), 1));
3b3c6a3f
MM
6333 }
6334
e9a25f70
JL
6335 /* Canonicalize
6336 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3b3c6a3f
MM
6337 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6338 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6339 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6340 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6341 && CONSTANT_P (XEXP (x, 1)))
6342 {
00c79232
ML
6343 rtx constant;
6344 rtx other = NULL_RTX;
3b3c6a3f
MM
6345
6346 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6347 {
6348 constant = XEXP (x, 1);
6349 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6350 }
6351 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6352 {
6353 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6354 other = XEXP (x, 1);
6355 }
6356 else
6357 constant = 0;
6358
6359 if (constant)
6360 {
6361 changed = 1;
c5c76735
JL
6362 x = gen_rtx_PLUS (Pmode,
6363 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6364 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6365 plus_constant (other, INTVAL (constant)));
3b3c6a3f
MM
6366 }
6367 }
6368
6369 if (changed && legitimate_address_p (mode, x, FALSE))
6370 return x;
6371
6372 if (GET_CODE (XEXP (x, 0)) == MULT)
6373 {
6374 changed = 1;
6375 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6376 }
6377
6378 if (GET_CODE (XEXP (x, 1)) == MULT)
6379 {
6380 changed = 1;
6381 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6382 }
6383
6384 if (changed
6385 && GET_CODE (XEXP (x, 1)) == REG
6386 && GET_CODE (XEXP (x, 0)) == REG)
6387 return x;
6388
6389 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6390 {
6391 changed = 1;
6392 x = legitimize_pic_address (x, 0);
6393 }
6394
6395 if (changed && legitimate_address_p (mode, x, FALSE))
6396 return x;
6397
6398 if (GET_CODE (XEXP (x, 0)) == REG)
6399 {
6400 register rtx temp = gen_reg_rtx (Pmode);
6401 register rtx val = force_operand (XEXP (x, 1), temp);
6402 if (val != temp)
6403 emit_move_insn (temp, val);
6404
6405 XEXP (x, 1) = temp;
6406 return x;
6407 }
6408
6409 else if (GET_CODE (XEXP (x, 1)) == REG)
6410 {
6411 register rtx temp = gen_reg_rtx (Pmode);
6412 register rtx val = force_operand (XEXP (x, 0), temp);
6413 if (val != temp)
6414 emit_move_insn (temp, val);
6415
6416 XEXP (x, 0) = temp;
6417 return x;
6418 }
6419 }
6420
6421 return x;
6422}
2a2ab3f9
JVA
6423\f
6424/* Print an integer constant expression in assembler syntax. Addition
6425 and subtraction are the only arithmetic that may appear in these
6426 expressions. FILE is the stdio stream to write to, X is the rtx, and
6427 CODE is the operand print code from the output string. */
6428
6429static void
6430output_pic_addr_const (file, x, code)
6431 FILE *file;
6432 rtx x;
6433 int code;
6434{
6435 char buf[256];
6436
6437 switch (GET_CODE (x))
6438 {
6439 case PC:
6440 if (flag_pic)
6441 putc ('.', file);
6442 else
6443 abort ();
6444 break;
6445
6446 case SYMBOL_REF:
91bb873f 6447 assemble_name (file, XSTR (x, 0));
b069de3b 6448 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x))
91bb873f 6449 fputs ("@PLT", file);
2a2ab3f9
JVA
6450 break;
6451
91bb873f
RH
6452 case LABEL_REF:
6453 x = XEXP (x, 0);
6454 /* FALLTHRU */
2a2ab3f9
JVA
6455 case CODE_LABEL:
6456 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6457 assemble_name (asm_out_file, buf);
6458 break;
6459
6460 case CONST_INT:
f64cecad 6461 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2a2ab3f9
JVA
6462 break;
6463
6464 case CONST:
6465 /* This used to output parentheses around the expression,
6466 but that does not work on the 386 (either ATT or BSD assembler). */
6467 output_pic_addr_const (file, XEXP (x, 0), code);
6468 break;
6469
6470 case CONST_DOUBLE:
6471 if (GET_MODE (x) == VOIDmode)
6472 {
6473 /* We can use %d if the number is <32 bits and positive. */
6474 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
f64cecad
JC
6475 fprintf (file, "0x%lx%08lx",
6476 (unsigned long) CONST_DOUBLE_HIGH (x),
6477 (unsigned long) CONST_DOUBLE_LOW (x));
2a2ab3f9 6478 else
f64cecad 6479 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2a2ab3f9
JVA
6480 }
6481 else
6482 /* We can't handle floating point constants;
6483 PRINT_OPERAND must handle them. */
6484 output_operand_lossage ("floating constant misused");
6485 break;
6486
6487 case PLUS:
e9a25f70 6488 /* Some assemblers need integer constants to appear first. */
2a2ab3f9
JVA
6489 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6490 {
2a2ab3f9 6491 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 6492 putc ('+', file);
e9a25f70 6493 output_pic_addr_const (file, XEXP (x, 1), code);
2a2ab3f9 6494 }
91bb873f 6495 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2a2ab3f9 6496 {
2a2ab3f9 6497 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 6498 putc ('+', file);
e9a25f70 6499 output_pic_addr_const (file, XEXP (x, 0), code);
2a2ab3f9 6500 }
91bb873f
RH
6501 else
6502 abort ();
2a2ab3f9
JVA
6503 break;
6504
6505 case MINUS:
b069de3b
SS
6506 if (!TARGET_MACHO)
6507 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
2a2ab3f9 6508 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 6509 putc ('-', file);
2a2ab3f9 6510 output_pic_addr_const (file, XEXP (x, 1), code);
b069de3b
SS
6511 if (!TARGET_MACHO)
6512 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
2a2ab3f9
JVA
6513 break;
6514
91bb873f
RH
6515 case UNSPEC:
6516 if (XVECLEN (x, 0) != 1)
5bf0ebab 6517 abort ();
91bb873f
RH
6518 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6519 switch (XINT (x, 1))
77ebd435 6520 {
8ee41eaf 6521 case UNSPEC_GOT:
77ebd435
AJ
6522 fputs ("@GOT", file);
6523 break;
8ee41eaf 6524 case UNSPEC_GOTOFF:
77ebd435
AJ
6525 fputs ("@GOTOFF", file);
6526 break;
8ee41eaf 6527 case UNSPEC_GOTPCREL:
edfe8595 6528 fputs ("@GOTPCREL(%rip)", file);
6eb791fc 6529 break;
f996902d 6530 case UNSPEC_GOTTPOFF:
dea73790 6531 /* FIXME: This might be @TPOFF in Sun ld too. */
f996902d
RH
6532 fputs ("@GOTTPOFF", file);
6533 break;
6534 case UNSPEC_TPOFF:
6535 fputs ("@TPOFF", file);
6536 break;
6537 case UNSPEC_NTPOFF:
75d38379
JJ
6538 if (TARGET_64BIT)
6539 fputs ("@TPOFF", file);
6540 else
6541 fputs ("@NTPOFF", file);
f996902d
RH
6542 break;
6543 case UNSPEC_DTPOFF:
6544 fputs ("@DTPOFF", file);
6545 break;
dea73790 6546 case UNSPEC_GOTNTPOFF:
75d38379
JJ
6547 if (TARGET_64BIT)
6548 fputs ("@GOTTPOFF(%rip)", file);
6549 else
6550 fputs ("@GOTNTPOFF", file);
dea73790
JJ
6551 break;
6552 case UNSPEC_INDNTPOFF:
6553 fputs ("@INDNTPOFF", file);
6554 break;
77ebd435
AJ
6555 default:
6556 output_operand_lossage ("invalid UNSPEC as operand");
6557 break;
6558 }
91bb873f
RH
6559 break;
6560
2a2ab3f9
JVA
6561 default:
6562 output_operand_lossage ("invalid expression as operand");
6563 }
6564}
1865dbb5 6565
0f290768 6566/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
1865dbb5
JM
6567 We need to handle our special PIC relocations. */
6568
0f290768 6569void
1865dbb5
JM
6570i386_dwarf_output_addr_const (file, x)
6571 FILE *file;
6572 rtx x;
6573{
14f73b5a 6574#ifdef ASM_QUAD
18b5b8d6 6575 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
14f73b5a
JH
6576#else
6577 if (TARGET_64BIT)
6578 abort ();
18b5b8d6 6579 fprintf (file, "%s", ASM_LONG);
14f73b5a 6580#endif
1865dbb5
JM
6581 if (flag_pic)
6582 output_pic_addr_const (file, x, '\0');
6583 else
6584 output_addr_const (file, x);
6585 fputc ('\n', file);
6586}
6587
b9203463
RH
6588/* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6589 We need to emit DTP-relative relocations. */
6590
6591void
6592i386_output_dwarf_dtprel (file, size, x)
6593 FILE *file;
6594 int size;
6595 rtx x;
6596{
75d38379
JJ
6597 fputs (ASM_LONG, file);
6598 output_addr_const (file, x);
6599 fputs ("@DTPOFF", file);
b9203463
RH
6600 switch (size)
6601 {
6602 case 4:
b9203463
RH
6603 break;
6604 case 8:
75d38379 6605 fputs (", 0", file);
b9203463 6606 break;
b9203463
RH
6607 default:
6608 abort ();
6609 }
b9203463
RH
6610}
6611
1865dbb5
JM
6612/* In the name of slightly smaller debug output, and to cater to
6613 general assembler losage, recognize PIC+GOTOFF and turn it back
6614 into a direct symbol reference. */
6615
6616rtx
6617i386_simplify_dwarf_addr (orig_x)
6618 rtx orig_x;
6619{
ec65b2e3 6620 rtx x = orig_x, y;
1865dbb5 6621
4c8c0dec
JJ
6622 if (GET_CODE (x) == MEM)
6623 x = XEXP (x, 0);
6624
6eb791fc
JH
6625 if (TARGET_64BIT)
6626 {
6627 if (GET_CODE (x) != CONST
6628 || GET_CODE (XEXP (x, 0)) != UNSPEC
8ee41eaf 6629 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
4c8c0dec 6630 || GET_CODE (orig_x) != MEM)
6eb791fc
JH
6631 return orig_x;
6632 return XVECEXP (XEXP (x, 0), 0, 0);
6633 }
6634
1865dbb5 6635 if (GET_CODE (x) != PLUS
1865dbb5
JM
6636 || GET_CODE (XEXP (x, 1)) != CONST)
6637 return orig_x;
6638
ec65b2e3
JJ
6639 if (GET_CODE (XEXP (x, 0)) == REG
6640 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6641 /* %ebx + GOT/GOTOFF */
6642 y = NULL;
6643 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6644 {
6645 /* %ebx + %reg * scale + GOT/GOTOFF */
6646 y = XEXP (x, 0);
6647 if (GET_CODE (XEXP (y, 0)) == REG
6648 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6649 y = XEXP (y, 1);
6650 else if (GET_CODE (XEXP (y, 1)) == REG
6651 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6652 y = XEXP (y, 0);
6653 else
6654 return orig_x;
6655 if (GET_CODE (y) != REG
6656 && GET_CODE (y) != MULT
6657 && GET_CODE (y) != ASHIFT)
6658 return orig_x;
6659 }
6660 else
6661 return orig_x;
6662
1865dbb5
JM
6663 x = XEXP (XEXP (x, 1), 0);
6664 if (GET_CODE (x) == UNSPEC
8ee41eaf
RH
6665 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6666 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
ec65b2e3
JJ
6667 {
6668 if (y)
6669 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6670 return XVECEXP (x, 0, 0);
6671 }
1865dbb5
JM
6672
6673 if (GET_CODE (x) == PLUS
6674 && GET_CODE (XEXP (x, 0)) == UNSPEC
6675 && GET_CODE (XEXP (x, 1)) == CONST_INT
8ee41eaf
RH
6676 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6677 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6678 && GET_CODE (orig_x) != MEM)))
ec65b2e3
JJ
6679 {
6680 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6681 if (y)
6682 return gen_rtx_PLUS (Pmode, y, x);
6683 return x;
6684 }
1865dbb5
JM
6685
6686 return orig_x;
6687}
2a2ab3f9 6688\f
a269a03c 6689static void
e075ae69 6690put_condition_code (code, mode, reverse, fp, file)
a269a03c 6691 enum rtx_code code;
e075ae69
RH
6692 enum machine_mode mode;
6693 int reverse, fp;
a269a03c
JC
6694 FILE *file;
6695{
a269a03c
JC
6696 const char *suffix;
6697
9a915772
JH
6698 if (mode == CCFPmode || mode == CCFPUmode)
6699 {
6700 enum rtx_code second_code, bypass_code;
6701 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6702 if (bypass_code != NIL || second_code != NIL)
b531087a 6703 abort ();
9a915772
JH
6704 code = ix86_fp_compare_code_to_integer (code);
6705 mode = CCmode;
6706 }
a269a03c
JC
6707 if (reverse)
6708 code = reverse_condition (code);
e075ae69 6709
a269a03c
JC
6710 switch (code)
6711 {
6712 case EQ:
6713 suffix = "e";
6714 break;
a269a03c
JC
6715 case NE:
6716 suffix = "ne";
6717 break;
a269a03c 6718 case GT:
7e08e190 6719 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
e075ae69
RH
6720 abort ();
6721 suffix = "g";
a269a03c 6722 break;
a269a03c 6723 case GTU:
e075ae69
RH
6724 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6725 Those same assemblers have the same but opposite losage on cmov. */
7e08e190 6726 if (mode != CCmode)
0f290768 6727 abort ();
e075ae69 6728 suffix = fp ? "nbe" : "a";
a269a03c 6729 break;
a269a03c 6730 case LT:
9076b9c1 6731 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 6732 suffix = "s";
7e08e190 6733 else if (mode == CCmode || mode == CCGCmode)
e075ae69 6734 suffix = "l";
9076b9c1 6735 else
0f290768 6736 abort ();
a269a03c 6737 break;
a269a03c 6738 case LTU:
9076b9c1 6739 if (mode != CCmode)
0f290768 6740 abort ();
a269a03c
JC
6741 suffix = "b";
6742 break;
a269a03c 6743 case GE:
9076b9c1 6744 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 6745 suffix = "ns";
7e08e190 6746 else if (mode == CCmode || mode == CCGCmode)
e075ae69 6747 suffix = "ge";
9076b9c1 6748 else
0f290768 6749 abort ();
a269a03c 6750 break;
a269a03c 6751 case GEU:
e075ae69 6752 /* ??? As above. */
7e08e190 6753 if (mode != CCmode)
0f290768 6754 abort ();
7e08e190 6755 suffix = fp ? "nb" : "ae";
a269a03c 6756 break;
a269a03c 6757 case LE:
7e08e190 6758 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
e075ae69
RH
6759 abort ();
6760 suffix = "le";
a269a03c 6761 break;
a269a03c 6762 case LEU:
9076b9c1
JH
6763 if (mode != CCmode)
6764 abort ();
7e08e190 6765 suffix = "be";
a269a03c 6766 break;
3a3677ff 6767 case UNORDERED:
9e7adcb3 6768 suffix = fp ? "u" : "p";
3a3677ff
RH
6769 break;
6770 case ORDERED:
9e7adcb3 6771 suffix = fp ? "nu" : "np";
3a3677ff 6772 break;
a269a03c
JC
6773 default:
6774 abort ();
6775 }
6776 fputs (suffix, file);
6777}
6778
e075ae69
RH
6779void
6780print_reg (x, code, file)
6781 rtx x;
6782 int code;
6783 FILE *file;
e5cb57e8 6784{
e075ae69 6785 if (REGNO (x) == ARG_POINTER_REGNUM
564d80f4 6786 || REGNO (x) == FRAME_POINTER_REGNUM
e075ae69
RH
6787 || REGNO (x) == FLAGS_REG
6788 || REGNO (x) == FPSR_REG)
6789 abort ();
e9a25f70 6790
5bf0ebab 6791 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
e075ae69
RH
6792 putc ('%', file);
6793
ef6257cd 6794 if (code == 'w' || MMX_REG_P (x))
e075ae69
RH
6795 code = 2;
6796 else if (code == 'b')
6797 code = 1;
6798 else if (code == 'k')
6799 code = 4;
3f3f2124
JH
6800 else if (code == 'q')
6801 code = 8;
e075ae69
RH
6802 else if (code == 'y')
6803 code = 3;
6804 else if (code == 'h')
6805 code = 0;
6806 else
6807 code = GET_MODE_SIZE (GET_MODE (x));
e9a25f70 6808
3f3f2124
JH
6809 /* Irritatingly, AMD extended registers use different naming convention
6810 from the normal registers. */
6811 if (REX_INT_REG_P (x))
6812 {
885a70fd
JH
6813 if (!TARGET_64BIT)
6814 abort ();
3f3f2124
JH
6815 switch (code)
6816 {
ef6257cd 6817 case 0:
c725bd79 6818 error ("extended registers have no high halves");
3f3f2124
JH
6819 break;
6820 case 1:
6821 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6822 break;
6823 case 2:
6824 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6825 break;
6826 case 4:
6827 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6828 break;
6829 case 8:
6830 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6831 break;
6832 default:
c725bd79 6833 error ("unsupported operand size for extended register");
3f3f2124
JH
6834 break;
6835 }
6836 return;
6837 }
e075ae69
RH
6838 switch (code)
6839 {
6840 case 3:
6841 if (STACK_TOP_P (x))
6842 {
6843 fputs ("st(0)", file);
6844 break;
6845 }
6846 /* FALLTHRU */
e075ae69 6847 case 8:
3f3f2124 6848 case 4:
e075ae69 6849 case 12:
446988df 6850 if (! ANY_FP_REG_P (x))
885a70fd 6851 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
e075ae69 6852 /* FALLTHRU */
a7180f70 6853 case 16:
e075ae69
RH
6854 case 2:
6855 fputs (hi_reg_name[REGNO (x)], file);
6856 break;
6857 case 1:
6858 fputs (qi_reg_name[REGNO (x)], file);
6859 break;
6860 case 0:
6861 fputs (qi_high_reg_name[REGNO (x)], file);
6862 break;
6863 default:
6864 abort ();
fe25fea3 6865 }
e5cb57e8
SC
6866}
6867
f996902d
RH
6868/* Locate some local-dynamic symbol still in use by this function
6869 so that we can print its name in some tls_local_dynamic_base
6870 pattern. */
6871
6872static const char *
6873get_some_local_dynamic_name ()
6874{
6875 rtx insn;
6876
6877 if (cfun->machine->some_ld_name)
6878 return cfun->machine->some_ld_name;
6879
6880 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6881 if (INSN_P (insn)
6882 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6883 return cfun->machine->some_ld_name;
6884
6885 abort ();
6886}
6887
6888static int
6889get_some_local_dynamic_name_1 (px, data)
6890 rtx *px;
6891 void *data ATTRIBUTE_UNUSED;
6892{
6893 rtx x = *px;
6894
6895 if (GET_CODE (x) == SYMBOL_REF
6896 && local_dynamic_symbolic_operand (x, Pmode))
6897 {
6898 cfun->machine->some_ld_name = XSTR (x, 0);
6899 return 1;
6900 }
6901
6902 return 0;
6903}
6904
2a2ab3f9 6905/* Meaning of CODE:
fe25fea3 6906 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
e5cb57e8 6907 C -- print opcode suffix for set/cmov insn.
fe25fea3 6908 c -- like C, but print reversed condition
ef6257cd 6909 F,f -- likewise, but for floating-point.
048b1c95
JJ
6910 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6911 nothing
2a2ab3f9
JVA
6912 R -- print the prefix for register names.
6913 z -- print the opcode suffix for the size of the current operand.
6914 * -- print a star (in certain assembler syntax)
fb204271 6915 A -- print an absolute memory reference.
2a2ab3f9 6916 w -- print the operand as if it's a "word" (HImode) even if it isn't.
2d49677f
SC
6917 s -- print a shift double count, followed by the assemblers argument
6918 delimiter.
fe25fea3
SC
6919 b -- print the QImode name of the register for the indicated operand.
6920 %b0 would print %al if operands[0] is reg 0.
6921 w -- likewise, print the HImode name of the register.
6922 k -- likewise, print the SImode name of the register.
3f3f2124 6923 q -- likewise, print the DImode name of the register.
ef6257cd
JH
6924 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6925 y -- print "st(0)" instead of "st" as a register.
a46d1d38 6926 D -- print condition for SSE cmp instruction.
ef6257cd
JH
6927 P -- if PIC, print an @PLT suffix.
6928 X -- don't print any sort of PIC '@' suffix for a symbol.
f996902d 6929 & -- print some in-use local-dynamic symbol name.
a46d1d38 6930 */
2a2ab3f9
JVA
6931
6932void
6933print_operand (file, x, code)
6934 FILE *file;
6935 rtx x;
6936 int code;
6937{
6938 if (code)
6939 {
6940 switch (code)
6941 {
6942 case '*':
80f33d06 6943 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9
JVA
6944 putc ('*', file);
6945 return;
6946
f996902d
RH
6947 case '&':
6948 assemble_name (file, get_some_local_dynamic_name ());
6949 return;
6950
fb204271 6951 case 'A':
80f33d06 6952 if (ASSEMBLER_DIALECT == ASM_ATT)
fb204271 6953 putc ('*', file);
80f33d06 6954 else if (ASSEMBLER_DIALECT == ASM_INTEL)
fb204271
DN
6955 {
6956 /* Intel syntax. For absolute addresses, registers should not
6957 be surrounded by braces. */
6958 if (GET_CODE (x) != REG)
6959 {
6960 putc ('[', file);
6961 PRINT_OPERAND (file, x, 0);
6962 putc (']', file);
6963 return;
6964 }
6965 }
80f33d06
GS
6966 else
6967 abort ();
fb204271
DN
6968
6969 PRINT_OPERAND (file, x, 0);
6970 return;
6971
6972
2a2ab3f9 6973 case 'L':
80f33d06 6974 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6975 putc ('l', file);
2a2ab3f9
JVA
6976 return;
6977
6978 case 'W':
80f33d06 6979 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6980 putc ('w', file);
2a2ab3f9
JVA
6981 return;
6982
6983 case 'B':
80f33d06 6984 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6985 putc ('b', file);
2a2ab3f9
JVA
6986 return;
6987
6988 case 'Q':
80f33d06 6989 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6990 putc ('l', file);
2a2ab3f9
JVA
6991 return;
6992
6993 case 'S':
80f33d06 6994 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6995 putc ('s', file);
2a2ab3f9
JVA
6996 return;
6997
5f1ec3e6 6998 case 'T':
80f33d06 6999 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7000 putc ('t', file);
5f1ec3e6
JVA
7001 return;
7002
2a2ab3f9
JVA
7003 case 'z':
7004 /* 387 opcodes don't get size suffixes if the operands are
0f290768 7005 registers. */
2a2ab3f9
JVA
7006 if (STACK_REG_P (x))
7007 return;
7008
831c4e87
KC
7009 /* Likewise if using Intel opcodes. */
7010 if (ASSEMBLER_DIALECT == ASM_INTEL)
7011 return;
7012
7013 /* This is the size of op from size of operand. */
2a2ab3f9
JVA
7014 switch (GET_MODE_SIZE (GET_MODE (x)))
7015 {
2a2ab3f9 7016 case 2:
155d8a47
JW
7017#ifdef HAVE_GAS_FILDS_FISTS
7018 putc ('s', file);
7019#endif
2a2ab3f9
JVA
7020 return;
7021
7022 case 4:
7023 if (GET_MODE (x) == SFmode)
7024 {
e075ae69 7025 putc ('s', file);
2a2ab3f9
JVA
7026 return;
7027 }
7028 else
e075ae69 7029 putc ('l', file);
2a2ab3f9
JVA
7030 return;
7031
5f1ec3e6 7032 case 12:
2b589241 7033 case 16:
e075ae69
RH
7034 putc ('t', file);
7035 return;
5f1ec3e6 7036
2a2ab3f9
JVA
7037 case 8:
7038 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
56c0e8fa
JVA
7039 {
7040#ifdef GAS_MNEMONICS
e075ae69 7041 putc ('q', file);
56c0e8fa 7042#else
e075ae69
RH
7043 putc ('l', file);
7044 putc ('l', file);
56c0e8fa
JVA
7045#endif
7046 }
e075ae69
RH
7047 else
7048 putc ('l', file);
2a2ab3f9 7049 return;
155d8a47
JW
7050
7051 default:
7052 abort ();
2a2ab3f9 7053 }
4af3895e
JVA
7054
7055 case 'b':
7056 case 'w':
7057 case 'k':
3f3f2124 7058 case 'q':
4af3895e
JVA
7059 case 'h':
7060 case 'y':
5cb6195d 7061 case 'X':
e075ae69 7062 case 'P':
4af3895e
JVA
7063 break;
7064
2d49677f
SC
7065 case 's':
7066 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7067 {
7068 PRINT_OPERAND (file, x, 0);
e075ae69 7069 putc (',', file);
2d49677f 7070 }
a269a03c
JC
7071 return;
7072
a46d1d38
JH
7073 case 'D':
7074 /* Little bit of braindamage here. The SSE compare instructions
7075 does use completely different names for the comparisons that the
7076 fp conditional moves. */
7077 switch (GET_CODE (x))
7078 {
7079 case EQ:
7080 case UNEQ:
7081 fputs ("eq", file);
7082 break;
7083 case LT:
7084 case UNLT:
7085 fputs ("lt", file);
7086 break;
7087 case LE:
7088 case UNLE:
7089 fputs ("le", file);
7090 break;
7091 case UNORDERED:
7092 fputs ("unord", file);
7093 break;
7094 case NE:
7095 case LTGT:
7096 fputs ("neq", file);
7097 break;
7098 case UNGE:
7099 case GE:
7100 fputs ("nlt", file);
7101 break;
7102 case UNGT:
7103 case GT:
7104 fputs ("nle", file);
7105 break;
7106 case ORDERED:
7107 fputs ("ord", file);
7108 break;
7109 default:
7110 abort ();
7111 break;
7112 }
7113 return;
048b1c95
JJ
7114 case 'O':
7115#ifdef CMOV_SUN_AS_SYNTAX
7116 if (ASSEMBLER_DIALECT == ASM_ATT)
7117 {
7118 switch (GET_MODE (x))
7119 {
7120 case HImode: putc ('w', file); break;
7121 case SImode:
7122 case SFmode: putc ('l', file); break;
7123 case DImode:
7124 case DFmode: putc ('q', file); break;
7125 default: abort ();
7126 }
7127 putc ('.', file);
7128 }
7129#endif
7130 return;
1853aadd 7131 case 'C':
e075ae69 7132 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
1853aadd 7133 return;
fe25fea3 7134 case 'F':
048b1c95
JJ
7135#ifdef CMOV_SUN_AS_SYNTAX
7136 if (ASSEMBLER_DIALECT == ASM_ATT)
7137 putc ('.', file);
7138#endif
e075ae69 7139 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
fe25fea3
SC
7140 return;
7141
e9a25f70 7142 /* Like above, but reverse condition */
e075ae69 7143 case 'c':
fce5a9f2 7144 /* Check to see if argument to %c is really a constant
c1d5afc4
CR
7145 and not a condition code which needs to be reversed. */
7146 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7147 {
7148 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7149 return;
7150 }
e075ae69
RH
7151 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7152 return;
fe25fea3 7153 case 'f':
048b1c95
JJ
7154#ifdef CMOV_SUN_AS_SYNTAX
7155 if (ASSEMBLER_DIALECT == ASM_ATT)
7156 putc ('.', file);
7157#endif
e075ae69 7158 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
1853aadd 7159 return;
ef6257cd
JH
7160 case '+':
7161 {
7162 rtx x;
e5cb57e8 7163
ef6257cd
JH
7164 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7165 return;
a4f31c00 7166
ef6257cd
JH
7167 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7168 if (x)
7169 {
7170 int pred_val = INTVAL (XEXP (x, 0));
7171
7172 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7173 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7174 {
7175 int taken = pred_val > REG_BR_PROB_BASE / 2;
7176 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7177
7178 /* Emit hints only in the case default branch prediction
d1f87653 7179 heuristics would fail. */
ef6257cd
JH
7180 if (taken != cputaken)
7181 {
7182 /* We use 3e (DS) prefix for taken branches and
7183 2e (CS) prefix for not taken branches. */
7184 if (taken)
7185 fputs ("ds ; ", file);
7186 else
7187 fputs ("cs ; ", file);
7188 }
7189 }
7190 }
7191 return;
7192 }
4af3895e 7193 default:
a52453cc 7194 output_operand_lossage ("invalid operand code `%c'", code);
2a2ab3f9
JVA
7195 }
7196 }
e9a25f70 7197
2a2ab3f9
JVA
7198 if (GET_CODE (x) == REG)
7199 {
7200 PRINT_REG (x, code, file);
7201 }
e9a25f70 7202
2a2ab3f9
JVA
7203 else if (GET_CODE (x) == MEM)
7204 {
e075ae69 7205 /* No `byte ptr' prefix for call instructions. */
80f33d06 7206 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
2a2ab3f9 7207 {
69ddee61 7208 const char * size;
e075ae69
RH
7209 switch (GET_MODE_SIZE (GET_MODE (x)))
7210 {
7211 case 1: size = "BYTE"; break;
7212 case 2: size = "WORD"; break;
7213 case 4: size = "DWORD"; break;
7214 case 8: size = "QWORD"; break;
7215 case 12: size = "XWORD"; break;
a7180f70 7216 case 16: size = "XMMWORD"; break;
e075ae69 7217 default:
564d80f4 7218 abort ();
e075ae69 7219 }
fb204271
DN
7220
7221 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7222 if (code == 'b')
7223 size = "BYTE";
7224 else if (code == 'w')
7225 size = "WORD";
7226 else if (code == 'k')
7227 size = "DWORD";
7228
e075ae69
RH
7229 fputs (size, file);
7230 fputs (" PTR ", file);
2a2ab3f9 7231 }
e075ae69
RH
7232
7233 x = XEXP (x, 0);
7234 if (flag_pic && CONSTANT_ADDRESS_P (x))
7235 output_pic_addr_const (file, x, code);
0d7d98ee 7236 /* Avoid (%rip) for call operands. */
5bf0ebab 7237 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
0d7d98ee
JH
7238 && GET_CODE (x) != CONST_INT)
7239 output_addr_const (file, x);
c8b94768
RH
7240 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7241 output_operand_lossage ("invalid constraints for operand");
2a2ab3f9 7242 else
e075ae69 7243 output_address (x);
2a2ab3f9 7244 }
e9a25f70 7245
2a2ab3f9
JVA
7246 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7247 {
e9a25f70
JL
7248 REAL_VALUE_TYPE r;
7249 long l;
7250
5f1ec3e6
JVA
7251 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7252 REAL_VALUE_TO_TARGET_SINGLE (r, l);
e075ae69 7253
80f33d06 7254 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7255 putc ('$', file);
52267fcb 7256 fprintf (file, "0x%lx", l);
5f1ec3e6 7257 }
e9a25f70 7258
0f290768 7259 /* These float cases don't actually occur as immediate operands. */
5f1ec3e6
JVA
7260 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7261 {
e9a25f70
JL
7262 char dstr[30];
7263
da6eec72 7264 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
5f1ec3e6 7265 fprintf (file, "%s", dstr);
2a2ab3f9 7266 }
e9a25f70 7267
2b589241
JH
7268 else if (GET_CODE (x) == CONST_DOUBLE
7269 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
2a2ab3f9 7270 {
e9a25f70
JL
7271 char dstr[30];
7272
da6eec72 7273 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
5f1ec3e6 7274 fprintf (file, "%s", dstr);
2a2ab3f9 7275 }
f996902d 7276
79325812 7277 else
2a2ab3f9 7278 {
4af3895e 7279 if (code != 'P')
2a2ab3f9 7280 {
695dac07 7281 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
e075ae69 7282 {
80f33d06 7283 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
7284 putc ('$', file);
7285 }
2a2ab3f9
JVA
7286 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7287 || GET_CODE (x) == LABEL_REF)
e075ae69 7288 {
80f33d06 7289 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
7290 putc ('$', file);
7291 else
7292 fputs ("OFFSET FLAT:", file);
7293 }
2a2ab3f9 7294 }
e075ae69
RH
7295 if (GET_CODE (x) == CONST_INT)
7296 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7297 else if (flag_pic)
2a2ab3f9
JVA
7298 output_pic_addr_const (file, x, code);
7299 else
7300 output_addr_const (file, x);
7301 }
7302}
7303\f
7304/* Print a memory operand whose address is ADDR. */
7305
7306void
7307print_operand_address (file, addr)
7308 FILE *file;
7309 register rtx addr;
7310{
e075ae69
RH
7311 struct ix86_address parts;
7312 rtx base, index, disp;
7313 int scale;
e9a25f70 7314
9e20be0c
JJ
7315 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
7316 {
7317 if (ASSEMBLER_DIALECT == ASM_INTEL)
7318 fputs ("DWORD PTR ", file);
7319 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7320 putc ('%', file);
75d38379
JJ
7321 if (TARGET_64BIT)
7322 fputs ("fs:0", file);
7323 else
7324 fputs ("gs:0", file);
9e20be0c
JJ
7325 return;
7326 }
7327
e075ae69
RH
7328 if (! ix86_decompose_address (addr, &parts))
7329 abort ();
e9a25f70 7330
e075ae69
RH
7331 base = parts.base;
7332 index = parts.index;
7333 disp = parts.disp;
7334 scale = parts.scale;
e9a25f70 7335
e075ae69
RH
7336 if (!base && !index)
7337 {
7338 /* Displacement only requires special attention. */
e9a25f70 7339
e075ae69 7340 if (GET_CODE (disp) == CONST_INT)
2a2ab3f9 7341 {
80f33d06 7342 if (ASSEMBLER_DIALECT == ASM_INTEL)
fb204271
DN
7343 {
7344 if (USER_LABEL_PREFIX[0] == 0)
7345 putc ('%', file);
7346 fputs ("ds:", file);
7347 }
e075ae69 7348 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
2a2ab3f9 7349 }
e075ae69
RH
7350 else if (flag_pic)
7351 output_pic_addr_const (file, addr, 0);
7352 else
7353 output_addr_const (file, addr);
0d7d98ee
JH
7354
7355 /* Use one byte shorter RIP relative addressing for 64bit mode. */
edfe8595 7356 if (TARGET_64BIT
75d38379
JJ
7357 && ((GET_CODE (addr) == SYMBOL_REF
7358 && ! tls_symbolic_operand (addr, GET_MODE (addr)))
edfe8595
RH
7359 || GET_CODE (addr) == LABEL_REF
7360 || (GET_CODE (addr) == CONST
7361 && GET_CODE (XEXP (addr, 0)) == PLUS
200bcf7e
JH
7362 && (GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
7363 || GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF)
edfe8595 7364 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
0d7d98ee 7365 fputs ("(%rip)", file);
e075ae69
RH
7366 }
7367 else
7368 {
80f33d06 7369 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9 7370 {
e075ae69 7371 if (disp)
2a2ab3f9 7372 {
c399861d 7373 if (flag_pic)
e075ae69
RH
7374 output_pic_addr_const (file, disp, 0);
7375 else if (GET_CODE (disp) == LABEL_REF)
7376 output_asm_label (disp);
2a2ab3f9 7377 else
e075ae69 7378 output_addr_const (file, disp);
2a2ab3f9
JVA
7379 }
7380
e075ae69
RH
7381 putc ('(', file);
7382 if (base)
7383 PRINT_REG (base, 0, file);
7384 if (index)
2a2ab3f9 7385 {
e075ae69
RH
7386 putc (',', file);
7387 PRINT_REG (index, 0, file);
7388 if (scale != 1)
7389 fprintf (file, ",%d", scale);
2a2ab3f9 7390 }
e075ae69 7391 putc (')', file);
2a2ab3f9 7392 }
2a2ab3f9
JVA
7393 else
7394 {
e075ae69 7395 rtx offset = NULL_RTX;
e9a25f70 7396
e075ae69
RH
7397 if (disp)
7398 {
7399 /* Pull out the offset of a symbol; print any symbol itself. */
7400 if (GET_CODE (disp) == CONST
7401 && GET_CODE (XEXP (disp, 0)) == PLUS
7402 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7403 {
7404 offset = XEXP (XEXP (disp, 0), 1);
7405 disp = gen_rtx_CONST (VOIDmode,
7406 XEXP (XEXP (disp, 0), 0));
7407 }
ce193852 7408
e075ae69
RH
7409 if (flag_pic)
7410 output_pic_addr_const (file, disp, 0);
7411 else if (GET_CODE (disp) == LABEL_REF)
7412 output_asm_label (disp);
7413 else if (GET_CODE (disp) == CONST_INT)
7414 offset = disp;
7415 else
7416 output_addr_const (file, disp);
7417 }
e9a25f70 7418
e075ae69
RH
7419 putc ('[', file);
7420 if (base)
a8620236 7421 {
e075ae69
RH
7422 PRINT_REG (base, 0, file);
7423 if (offset)
7424 {
7425 if (INTVAL (offset) >= 0)
7426 putc ('+', file);
7427 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7428 }
a8620236 7429 }
e075ae69
RH
7430 else if (offset)
7431 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
2247a58c 7432 else
e075ae69 7433 putc ('0', file);
e9a25f70 7434
e075ae69
RH
7435 if (index)
7436 {
7437 putc ('+', file);
7438 PRINT_REG (index, 0, file);
7439 if (scale != 1)
7440 fprintf (file, "*%d", scale);
7441 }
7442 putc (']', file);
7443 }
2a2ab3f9
JVA
7444 }
7445}
f996902d
RH
7446
7447bool
7448output_addr_const_extra (file, x)
7449 FILE *file;
7450 rtx x;
7451{
7452 rtx op;
7453
7454 if (GET_CODE (x) != UNSPEC)
7455 return false;
7456
7457 op = XVECEXP (x, 0, 0);
7458 switch (XINT (x, 1))
7459 {
7460 case UNSPEC_GOTTPOFF:
7461 output_addr_const (file, op);
dea73790 7462 /* FIXME: This might be @TPOFF in Sun ld. */
f996902d
RH
7463 fputs ("@GOTTPOFF", file);
7464 break;
7465 case UNSPEC_TPOFF:
7466 output_addr_const (file, op);
7467 fputs ("@TPOFF", file);
7468 break;
7469 case UNSPEC_NTPOFF:
7470 output_addr_const (file, op);
75d38379
JJ
7471 if (TARGET_64BIT)
7472 fputs ("@TPOFF", file);
7473 else
7474 fputs ("@NTPOFF", file);
f996902d
RH
7475 break;
7476 case UNSPEC_DTPOFF:
7477 output_addr_const (file, op);
7478 fputs ("@DTPOFF", file);
7479 break;
dea73790
JJ
7480 case UNSPEC_GOTNTPOFF:
7481 output_addr_const (file, op);
75d38379
JJ
7482 if (TARGET_64BIT)
7483 fputs ("@GOTTPOFF(%rip)", file);
7484 else
7485 fputs ("@GOTNTPOFF", file);
dea73790
JJ
7486 break;
7487 case UNSPEC_INDNTPOFF:
7488 output_addr_const (file, op);
7489 fputs ("@INDNTPOFF", file);
7490 break;
f996902d
RH
7491
7492 default:
7493 return false;
7494 }
7495
7496 return true;
7497}
2a2ab3f9
JVA
7498\f
7499/* Split one or more DImode RTL references into pairs of SImode
7500 references. The RTL can be REG, offsettable MEM, integer constant, or
7501 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7502 split and "num" is its length. lo_half and hi_half are output arrays
0f290768 7503 that parallel "operands". */
2a2ab3f9
JVA
7504
7505void
7506split_di (operands, num, lo_half, hi_half)
7507 rtx operands[];
7508 int num;
7509 rtx lo_half[], hi_half[];
7510{
7511 while (num--)
7512 {
57dbca5e 7513 rtx op = operands[num];
b932f770
JH
7514
7515 /* simplify_subreg refuse to split volatile memory addresses,
7516 but we still have to handle it. */
7517 if (GET_CODE (op) == MEM)
2a2ab3f9 7518 {
f4ef873c 7519 lo_half[num] = adjust_address (op, SImode, 0);
b72f00af 7520 hi_half[num] = adjust_address (op, SImode, 4);
2a2ab3f9
JVA
7521 }
7522 else
b932f770 7523 {
38ca929b
JH
7524 lo_half[num] = simplify_gen_subreg (SImode, op,
7525 GET_MODE (op) == VOIDmode
7526 ? DImode : GET_MODE (op), 0);
7527 hi_half[num] = simplify_gen_subreg (SImode, op,
7528 GET_MODE (op) == VOIDmode
7529 ? DImode : GET_MODE (op), 4);
b932f770 7530 }
2a2ab3f9
JVA
7531 }
7532}
44cf5b6a
JH
7533/* Split one or more TImode RTL references into pairs of SImode
7534 references. The RTL can be REG, offsettable MEM, integer constant, or
7535 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7536 split and "num" is its length. lo_half and hi_half are output arrays
7537 that parallel "operands". */
7538
7539void
7540split_ti (operands, num, lo_half, hi_half)
7541 rtx operands[];
7542 int num;
7543 rtx lo_half[], hi_half[];
7544{
7545 while (num--)
7546 {
7547 rtx op = operands[num];
b932f770
JH
7548
7549 /* simplify_subreg refuse to split volatile memory addresses, but we
7550 still have to handle it. */
7551 if (GET_CODE (op) == MEM)
44cf5b6a
JH
7552 {
7553 lo_half[num] = adjust_address (op, DImode, 0);
7554 hi_half[num] = adjust_address (op, DImode, 8);
7555 }
7556 else
b932f770
JH
7557 {
7558 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7559 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7560 }
44cf5b6a
JH
7561 }
7562}
2a2ab3f9 7563\f
2a2ab3f9
JVA
7564/* Output code to perform a 387 binary operation in INSN, one of PLUS,
7565 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7566 is the expression of the binary operation. The output may either be
7567 emitted here, or returned to the caller, like all output_* functions.
7568
7569 There is no guarantee that the operands are the same mode, as they
0f290768 7570 might be within FLOAT or FLOAT_EXTEND expressions. */
2a2ab3f9 7571
e3c2afab
AM
7572#ifndef SYSV386_COMPAT
7573/* Set to 1 for compatibility with brain-damaged assemblers. No-one
7574 wants to fix the assemblers because that causes incompatibility
7575 with gcc. No-one wants to fix gcc because that causes
7576 incompatibility with assemblers... You can use the option of
7577 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7578#define SYSV386_COMPAT 1
7579#endif
7580
69ddee61 7581const char *
2a2ab3f9
JVA
7582output_387_binary_op (insn, operands)
7583 rtx insn;
7584 rtx *operands;
7585{
e3c2afab 7586 static char buf[30];
69ddee61 7587 const char *p;
1deaa899
JH
7588 const char *ssep;
7589 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
2a2ab3f9 7590
e3c2afab
AM
7591#ifdef ENABLE_CHECKING
7592 /* Even if we do not want to check the inputs, this documents input
7593 constraints. Which helps in understanding the following code. */
7594 if (STACK_REG_P (operands[0])
7595 && ((REG_P (operands[1])
7596 && REGNO (operands[0]) == REGNO (operands[1])
7597 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7598 || (REG_P (operands[2])
7599 && REGNO (operands[0]) == REGNO (operands[2])
7600 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7601 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7602 ; /* ok */
1deaa899 7603 else if (!is_sse)
e3c2afab
AM
7604 abort ();
7605#endif
7606
2a2ab3f9
JVA
7607 switch (GET_CODE (operands[3]))
7608 {
7609 case PLUS:
e075ae69
RH
7610 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7611 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7612 p = "fiadd";
7613 else
7614 p = "fadd";
1deaa899 7615 ssep = "add";
2a2ab3f9
JVA
7616 break;
7617
7618 case MINUS:
e075ae69
RH
7619 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7620 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7621 p = "fisub";
7622 else
7623 p = "fsub";
1deaa899 7624 ssep = "sub";
2a2ab3f9
JVA
7625 break;
7626
7627 case MULT:
e075ae69
RH
7628 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7629 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7630 p = "fimul";
7631 else
7632 p = "fmul";
1deaa899 7633 ssep = "mul";
2a2ab3f9
JVA
7634 break;
7635
7636 case DIV:
e075ae69
RH
7637 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7638 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7639 p = "fidiv";
7640 else
7641 p = "fdiv";
1deaa899 7642 ssep = "div";
2a2ab3f9
JVA
7643 break;
7644
7645 default:
7646 abort ();
7647 }
7648
1deaa899
JH
7649 if (is_sse)
7650 {
7651 strcpy (buf, ssep);
7652 if (GET_MODE (operands[0]) == SFmode)
7653 strcat (buf, "ss\t{%2, %0|%0, %2}");
7654 else
7655 strcat (buf, "sd\t{%2, %0|%0, %2}");
7656 return buf;
7657 }
e075ae69 7658 strcpy (buf, p);
2a2ab3f9
JVA
7659
7660 switch (GET_CODE (operands[3]))
7661 {
7662 case MULT:
7663 case PLUS:
7664 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7665 {
e3c2afab 7666 rtx temp = operands[2];
2a2ab3f9
JVA
7667 operands[2] = operands[1];
7668 operands[1] = temp;
7669 }
7670
e3c2afab
AM
7671 /* know operands[0] == operands[1]. */
7672
2a2ab3f9 7673 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
7674 {
7675 p = "%z2\t%2";
7676 break;
7677 }
2a2ab3f9
JVA
7678
7679 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63
JL
7680 {
7681 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
7682 /* How is it that we are storing to a dead operand[2]?
7683 Well, presumably operands[1] is dead too. We can't
7684 store the result to st(0) as st(0) gets popped on this
7685 instruction. Instead store to operands[2] (which I
7686 think has to be st(1)). st(1) will be popped later.
7687 gcc <= 2.8.1 didn't have this check and generated
7688 assembly code that the Unixware assembler rejected. */
7689 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 7690 else
e3c2afab 7691 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
e075ae69 7692 break;
6b28fd63 7693 }
2a2ab3f9
JVA
7694
7695 if (STACK_TOP_P (operands[0]))
e3c2afab 7696 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 7697 else
e3c2afab 7698 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
e075ae69 7699 break;
2a2ab3f9
JVA
7700
7701 case MINUS:
7702 case DIV:
7703 if (GET_CODE (operands[1]) == MEM)
e075ae69
RH
7704 {
7705 p = "r%z1\t%1";
7706 break;
7707 }
2a2ab3f9
JVA
7708
7709 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
7710 {
7711 p = "%z2\t%2";
7712 break;
7713 }
2a2ab3f9 7714
2a2ab3f9 7715 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63 7716 {
e3c2afab
AM
7717#if SYSV386_COMPAT
7718 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7719 derived assemblers, confusingly reverse the direction of
7720 the operation for fsub{r} and fdiv{r} when the
7721 destination register is not st(0). The Intel assembler
7722 doesn't have this brain damage. Read !SYSV386_COMPAT to
7723 figure out what the hardware really does. */
7724 if (STACK_TOP_P (operands[0]))
7725 p = "{p\t%0, %2|rp\t%2, %0}";
7726 else
7727 p = "{rp\t%2, %0|p\t%0, %2}";
7728#else
6b28fd63 7729 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
7730 /* As above for fmul/fadd, we can't store to st(0). */
7731 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 7732 else
e3c2afab
AM
7733 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7734#endif
e075ae69 7735 break;
6b28fd63 7736 }
2a2ab3f9
JVA
7737
7738 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6b28fd63 7739 {
e3c2afab 7740#if SYSV386_COMPAT
6b28fd63 7741 if (STACK_TOP_P (operands[0]))
e3c2afab 7742 p = "{rp\t%0, %1|p\t%1, %0}";
6b28fd63 7743 else
e3c2afab
AM
7744 p = "{p\t%1, %0|rp\t%0, %1}";
7745#else
7746 if (STACK_TOP_P (operands[0]))
7747 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7748 else
7749 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7750#endif
e075ae69 7751 break;
6b28fd63 7752 }
2a2ab3f9
JVA
7753
7754 if (STACK_TOP_P (operands[0]))
7755 {
7756 if (STACK_TOP_P (operands[1]))
e3c2afab 7757 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 7758 else
e3c2afab 7759 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
e075ae69 7760 break;
2a2ab3f9
JVA
7761 }
7762 else if (STACK_TOP_P (operands[1]))
e3c2afab
AM
7763 {
7764#if SYSV386_COMPAT
7765 p = "{\t%1, %0|r\t%0, %1}";
7766#else
7767 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7768#endif
7769 }
2a2ab3f9 7770 else
e3c2afab
AM
7771 {
7772#if SYSV386_COMPAT
7773 p = "{r\t%2, %0|\t%0, %2}";
7774#else
7775 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7776#endif
7777 }
e075ae69 7778 break;
2a2ab3f9
JVA
7779
7780 default:
7781 abort ();
7782 }
e075ae69
RH
7783
7784 strcat (buf, p);
7785 return buf;
2a2ab3f9 7786}
e075ae69 7787
a4f31c00 7788/* Output code to initialize control word copies used by
7a2e09f4
JH
7789 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7790 is set to control word rounding downwards. */
7791void
7792emit_i387_cw_initialization (normal, round_down)
7793 rtx normal, round_down;
7794{
7795 rtx reg = gen_reg_rtx (HImode);
7796
7797 emit_insn (gen_x86_fnstcw_1 (normal));
7798 emit_move_insn (reg, normal);
7799 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7800 && !TARGET_64BIT)
7801 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7802 else
7803 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7804 emit_move_insn (round_down, reg);
7805}
7806
2a2ab3f9 7807/* Output code for INSN to convert a float to a signed int. OPERANDS
46d21d2c 7808 are the insn operands. The output may be [HSD]Imode and the input
e075ae69 7809 operand may be [SDX]Fmode. */
2a2ab3f9 7810
69ddee61 7811const char *
2a2ab3f9
JVA
7812output_fix_trunc (insn, operands)
7813 rtx insn;
7814 rtx *operands;
7815{
7816 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
e075ae69 7817 int dimode_p = GET_MODE (operands[0]) == DImode;
2a2ab3f9 7818
e075ae69
RH
7819 /* Jump through a hoop or two for DImode, since the hardware has no
7820 non-popping instruction. We used to do this a different way, but
7821 that was somewhat fragile and broke with post-reload splitters. */
a05924f9
JH
7822 if (dimode_p && !stack_top_dies)
7823 output_asm_insn ("fld\t%y1", operands);
e075ae69 7824
7a2e09f4 7825 if (!STACK_TOP_P (operands[1]))
10195bd8
JW
7826 abort ();
7827
e075ae69 7828 if (GET_CODE (operands[0]) != MEM)
7a2e09f4 7829 abort ();
e9a25f70 7830
7a2e09f4 7831 output_asm_insn ("fldcw\t%3", operands);
e075ae69 7832 if (stack_top_dies || dimode_p)
7a2e09f4 7833 output_asm_insn ("fistp%z0\t%0", operands);
10195bd8 7834 else
7a2e09f4 7835 output_asm_insn ("fist%z0\t%0", operands);
e075ae69 7836 output_asm_insn ("fldcw\t%2", operands);
10195bd8 7837
e075ae69 7838 return "";
2a2ab3f9 7839}
cda749b1 7840
e075ae69
RH
7841/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7842 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7843 when fucom should be used. */
7844
69ddee61 7845const char *
e075ae69 7846output_fp_compare (insn, operands, eflags_p, unordered_p)
cda749b1
JW
7847 rtx insn;
7848 rtx *operands;
e075ae69 7849 int eflags_p, unordered_p;
cda749b1 7850{
e075ae69
RH
7851 int stack_top_dies;
7852 rtx cmp_op0 = operands[0];
7853 rtx cmp_op1 = operands[1];
0644b628 7854 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
e075ae69
RH
7855
7856 if (eflags_p == 2)
7857 {
7858 cmp_op0 = cmp_op1;
7859 cmp_op1 = operands[2];
7860 }
0644b628
JH
7861 if (is_sse)
7862 {
7863 if (GET_MODE (operands[0]) == SFmode)
7864 if (unordered_p)
7865 return "ucomiss\t{%1, %0|%0, %1}";
7866 else
a5cf80f0 7867 return "comiss\t{%1, %0|%0, %1}";
0644b628
JH
7868 else
7869 if (unordered_p)
7870 return "ucomisd\t{%1, %0|%0, %1}";
7871 else
a5cf80f0 7872 return "comisd\t{%1, %0|%0, %1}";
0644b628 7873 }
cda749b1 7874
e075ae69 7875 if (! STACK_TOP_P (cmp_op0))
cda749b1
JW
7876 abort ();
7877
e075ae69 7878 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
cda749b1 7879
e075ae69
RH
7880 if (STACK_REG_P (cmp_op1)
7881 && stack_top_dies
7882 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7883 && REGNO (cmp_op1) != FIRST_STACK_REG)
cda749b1 7884 {
e075ae69
RH
7885 /* If both the top of the 387 stack dies, and the other operand
7886 is also a stack register that dies, then this must be a
7887 `fcompp' float compare */
7888
7889 if (eflags_p == 1)
7890 {
7891 /* There is no double popping fcomi variant. Fortunately,
7892 eflags is immune from the fstp's cc clobbering. */
7893 if (unordered_p)
7894 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7895 else
7896 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7897 return "fstp\t%y0";
7898 }
7899 else
cda749b1 7900 {
e075ae69
RH
7901 if (eflags_p == 2)
7902 {
7903 if (unordered_p)
7904 return "fucompp\n\tfnstsw\t%0";
7905 else
7906 return "fcompp\n\tfnstsw\t%0";
7907 }
cda749b1
JW
7908 else
7909 {
e075ae69
RH
7910 if (unordered_p)
7911 return "fucompp";
7912 else
7913 return "fcompp";
cda749b1
JW
7914 }
7915 }
cda749b1
JW
7916 }
7917 else
7918 {
e075ae69 7919 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
cda749b1 7920
0f290768 7921 static const char * const alt[24] =
e075ae69
RH
7922 {
7923 "fcom%z1\t%y1",
7924 "fcomp%z1\t%y1",
7925 "fucom%z1\t%y1",
7926 "fucomp%z1\t%y1",
0f290768 7927
e075ae69
RH
7928 "ficom%z1\t%y1",
7929 "ficomp%z1\t%y1",
7930 NULL,
7931 NULL,
7932
7933 "fcomi\t{%y1, %0|%0, %y1}",
7934 "fcomip\t{%y1, %0|%0, %y1}",
7935 "fucomi\t{%y1, %0|%0, %y1}",
7936 "fucomip\t{%y1, %0|%0, %y1}",
7937
7938 NULL,
7939 NULL,
7940 NULL,
7941 NULL,
7942
7943 "fcom%z2\t%y2\n\tfnstsw\t%0",
7944 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7945 "fucom%z2\t%y2\n\tfnstsw\t%0",
7946 "fucomp%z2\t%y2\n\tfnstsw\t%0",
0f290768 7947
e075ae69
RH
7948 "ficom%z2\t%y2\n\tfnstsw\t%0",
7949 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7950 NULL,
7951 NULL
7952 };
7953
7954 int mask;
69ddee61 7955 const char *ret;
e075ae69
RH
7956
7957 mask = eflags_p << 3;
7958 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7959 mask |= unordered_p << 1;
7960 mask |= stack_top_dies;
7961
7962 if (mask >= 24)
7963 abort ();
7964 ret = alt[mask];
7965 if (ret == NULL)
7966 abort ();
cda749b1 7967
e075ae69 7968 return ret;
cda749b1
JW
7969 }
7970}
2a2ab3f9 7971
f88c65f7
RH
7972void
7973ix86_output_addr_vec_elt (file, value)
7974 FILE *file;
7975 int value;
7976{
7977 const char *directive = ASM_LONG;
7978
7979 if (TARGET_64BIT)
7980 {
7981#ifdef ASM_QUAD
7982 directive = ASM_QUAD;
7983#else
7984 abort ();
7985#endif
7986 }
7987
7988 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7989}
7990
7991void
7992ix86_output_addr_diff_elt (file, value, rel)
7993 FILE *file;
7994 int value, rel;
7995{
7996 if (TARGET_64BIT)
74411039 7997 fprintf (file, "%s%s%d-%s%d\n",
f88c65f7
RH
7998 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7999 else if (HAVE_AS_GOTOFF_IN_DATA)
8000 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
b069de3b
SS
8001#if TARGET_MACHO
8002 else if (TARGET_MACHO)
8003 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
8004 machopic_function_base_name () + 1);
8005#endif
f88c65f7 8006 else
5fc0e5df
KW
8007 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8008 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
f88c65f7 8009}
32b5b1aa 8010\f
a8bac9ab
RH
8011/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8012 for the target. */
8013
8014void
8015ix86_expand_clear (dest)
8016 rtx dest;
8017{
8018 rtx tmp;
8019
8020 /* We play register width games, which are only valid after reload. */
8021 if (!reload_completed)
8022 abort ();
8023
8024 /* Avoid HImode and its attendant prefix byte. */
8025 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8026 dest = gen_rtx_REG (SImode, REGNO (dest));
8027
8028 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8029
8030 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8031 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8032 {
8033 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8034 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8035 }
8036
8037 emit_insn (tmp);
8038}
8039
f996902d
RH
8040/* X is an unchanging MEM. If it is a constant pool reference, return
8041 the constant pool rtx, else NULL. */
8042
8043static rtx
8044maybe_get_pool_constant (x)
8045 rtx x;
8046{
8047 x = XEXP (x, 0);
8048
75d38379 8049 if (flag_pic && ! TARGET_64BIT)
f996902d
RH
8050 {
8051 if (GET_CODE (x) != PLUS)
8052 return NULL_RTX;
8053 if (XEXP (x, 0) != pic_offset_table_rtx)
8054 return NULL_RTX;
8055 x = XEXP (x, 1);
8056 if (GET_CODE (x) != CONST)
8057 return NULL_RTX;
8058 x = XEXP (x, 0);
8059 if (GET_CODE (x) != UNSPEC)
8060 return NULL_RTX;
8061 if (XINT (x, 1) != UNSPEC_GOTOFF)
8062 return NULL_RTX;
8063 x = XVECEXP (x, 0, 0);
8064 }
8065
8066 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8067 return get_pool_constant (x);
8068
8069 return NULL_RTX;
8070}
8071
79325812 8072void
e075ae69
RH
8073ix86_expand_move (mode, operands)
8074 enum machine_mode mode;
8075 rtx operands[];
32b5b1aa 8076{
e075ae69 8077 int strict = (reload_in_progress || reload_completed);
f996902d
RH
8078 rtx insn, op0, op1, tmp;
8079
8080 op0 = operands[0];
8081 op1 = operands[1];
8082
f996902d
RH
8083 if (tls_symbolic_operand (op1, Pmode))
8084 {
8085 op1 = legitimize_address (op1, op1, VOIDmode);
8086 if (GET_CODE (op0) == MEM)
8087 {
8088 tmp = gen_reg_rtx (mode);
8089 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
8090 op1 = tmp;
8091 }
8092 }
8093 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8094 {
b069de3b
SS
8095#if TARGET_MACHO
8096 if (MACHOPIC_PURE)
8097 {
8098 rtx temp = ((reload_in_progress
8099 || ((op0 && GET_CODE (op0) == REG)
8100 && mode == Pmode))
8101 ? op0 : gen_reg_rtx (Pmode));
8102 op1 = machopic_indirect_data_reference (op1, temp);
8103 op1 = machopic_legitimize_pic_address (op1, mode,
8104 temp == op1 ? 0 : temp);
8105 }
8106 else
8107 {
8108 if (MACHOPIC_INDIRECT)
8109 op1 = machopic_indirect_data_reference (op1, 0);
8110 }
8111 if (op0 != op1)
8112 {
8113 insn = gen_rtx_SET (VOIDmode, op0, op1);
8114 emit_insn (insn);
8115 }
8116 return;
8117#endif /* TARGET_MACHO */
f996902d
RH
8118 if (GET_CODE (op0) == MEM)
8119 op1 = force_reg (Pmode, op1);
e075ae69 8120 else
32b5b1aa 8121 {
f996902d 8122 rtx temp = op0;
e075ae69
RH
8123 if (GET_CODE (temp) != REG)
8124 temp = gen_reg_rtx (Pmode);
f996902d
RH
8125 temp = legitimize_pic_address (op1, temp);
8126 if (temp == op0)
e075ae69 8127 return;
f996902d 8128 op1 = temp;
32b5b1aa 8129 }
e075ae69
RH
8130 }
8131 else
8132 {
f996902d 8133 if (GET_CODE (op0) == MEM
44cf5b6a 8134 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
f996902d
RH
8135 || !push_operand (op0, mode))
8136 && GET_CODE (op1) == MEM)
8137 op1 = force_reg (mode, op1);
e9a25f70 8138
f996902d
RH
8139 if (push_operand (op0, mode)
8140 && ! general_no_elim_operand (op1, mode))
8141 op1 = copy_to_mode_reg (mode, op1);
2c5a510c 8142
44cf5b6a
JH
8143 /* Force large constants in 64bit compilation into register
8144 to get them CSEed. */
8145 if (TARGET_64BIT && mode == DImode
f996902d
RH
8146 && immediate_operand (op1, mode)
8147 && !x86_64_zero_extended_value (op1)
8148 && !register_operand (op0, mode)
44cf5b6a 8149 && optimize && !reload_completed && !reload_in_progress)
f996902d 8150 op1 = copy_to_mode_reg (mode, op1);
44cf5b6a 8151
e075ae69 8152 if (FLOAT_MODE_P (mode))
32b5b1aa 8153 {
d7a29404
JH
8154 /* If we are loading a floating point constant to a register,
8155 force the value to memory now, since we'll get better code
8156 out the back end. */
e075ae69
RH
8157
8158 if (strict)
8159 ;
f996902d
RH
8160 else if (GET_CODE (op1) == CONST_DOUBLE
8161 && register_operand (op0, mode))
8162 op1 = validize_mem (force_const_mem (mode, op1));
32b5b1aa 8163 }
32b5b1aa 8164 }
e9a25f70 8165
f996902d 8166 insn = gen_rtx_SET (VOIDmode, op0, op1);
e9a25f70 8167
e075ae69
RH
8168 emit_insn (insn);
8169}
e9a25f70 8170
e37af218
RH
8171void
8172ix86_expand_vector_move (mode, operands)
8173 enum machine_mode mode;
8174 rtx operands[];
8175{
8176 /* Force constants other than zero into memory. We do not know how
8177 the instructions used to build constants modify the upper 64 bits
8178 of the register, once we have that information we may be able
8179 to handle some of them more efficiently. */
8180 if ((reload_in_progress | reload_completed) == 0
8181 && register_operand (operands[0], mode)
8182 && CONSTANT_P (operands[1]))
2b28d405 8183 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
e37af218
RH
8184
8185 /* Make operand1 a register if it isn't already. */
f8ca7923 8186 if (!no_new_pseudos
e37af218 8187 && !register_operand (operands[0], mode)
b105d6da 8188 && !register_operand (operands[1], mode))
e37af218 8189 {
59bef189 8190 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
e37af218
RH
8191 emit_move_insn (operands[0], temp);
8192 return;
8193 }
8194
8195 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
fce5a9f2 8196}
e37af218 8197
e075ae69
RH
8198/* Attempt to expand a binary operator. Make the expansion closer to the
8199 actual machine, then just general_operand, which will allow 3 separate
9d81fc27 8200 memory references (one output, two input) in a single insn. */
e9a25f70 8201
e075ae69
RH
8202void
8203ix86_expand_binary_operator (code, mode, operands)
8204 enum rtx_code code;
8205 enum machine_mode mode;
8206 rtx operands[];
8207{
8208 int matching_memory;
8209 rtx src1, src2, dst, op, clob;
8210
8211 dst = operands[0];
8212 src1 = operands[1];
8213 src2 = operands[2];
8214
8215 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8216 if (GET_RTX_CLASS (code) == 'c'
8217 && (rtx_equal_p (dst, src2)
8218 || immediate_operand (src1, mode)))
8219 {
8220 rtx temp = src1;
8221 src1 = src2;
8222 src2 = temp;
32b5b1aa 8223 }
e9a25f70 8224
e075ae69
RH
8225 /* If the destination is memory, and we do not have matching source
8226 operands, do things in registers. */
8227 matching_memory = 0;
8228 if (GET_CODE (dst) == MEM)
32b5b1aa 8229 {
e075ae69
RH
8230 if (rtx_equal_p (dst, src1))
8231 matching_memory = 1;
8232 else if (GET_RTX_CLASS (code) == 'c'
8233 && rtx_equal_p (dst, src2))
8234 matching_memory = 2;
8235 else
8236 dst = gen_reg_rtx (mode);
8237 }
0f290768 8238
e075ae69
RH
8239 /* Both source operands cannot be in memory. */
8240 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8241 {
8242 if (matching_memory != 2)
8243 src2 = force_reg (mode, src2);
8244 else
8245 src1 = force_reg (mode, src1);
32b5b1aa 8246 }
e9a25f70 8247
06a964de
JH
8248 /* If the operation is not commutable, source 1 cannot be a constant
8249 or non-matching memory. */
0f290768 8250 if ((CONSTANT_P (src1)
06a964de
JH
8251 || (!matching_memory && GET_CODE (src1) == MEM))
8252 && GET_RTX_CLASS (code) != 'c')
e075ae69 8253 src1 = force_reg (mode, src1);
0f290768 8254
e075ae69 8255 /* If optimizing, copy to regs to improve CSE */
fe577e58 8256 if (optimize && ! no_new_pseudos)
32b5b1aa 8257 {
e075ae69
RH
8258 if (GET_CODE (dst) == MEM)
8259 dst = gen_reg_rtx (mode);
8260 if (GET_CODE (src1) == MEM)
8261 src1 = force_reg (mode, src1);
8262 if (GET_CODE (src2) == MEM)
8263 src2 = force_reg (mode, src2);
32b5b1aa 8264 }
e9a25f70 8265
e075ae69
RH
8266 /* Emit the instruction. */
8267
8268 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8269 if (reload_in_progress)
8270 {
8271 /* Reload doesn't know about the flags register, and doesn't know that
8272 it doesn't want to clobber it. We can only do this with PLUS. */
8273 if (code != PLUS)
8274 abort ();
8275 emit_insn (op);
8276 }
8277 else
32b5b1aa 8278 {
e075ae69
RH
8279 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8280 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
32b5b1aa 8281 }
e9a25f70 8282
e075ae69
RH
8283 /* Fix up the destination if needed. */
8284 if (dst != operands[0])
8285 emit_move_insn (operands[0], dst);
8286}
8287
8288/* Return TRUE or FALSE depending on whether the binary operator meets the
8289 appropriate constraints. */
8290
8291int
8292ix86_binary_operator_ok (code, mode, operands)
8293 enum rtx_code code;
8294 enum machine_mode mode ATTRIBUTE_UNUSED;
8295 rtx operands[3];
8296{
8297 /* Both source operands cannot be in memory. */
8298 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8299 return 0;
8300 /* If the operation is not commutable, source 1 cannot be a constant. */
8301 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8302 return 0;
8303 /* If the destination is memory, we must have a matching source operand. */
8304 if (GET_CODE (operands[0]) == MEM
8305 && ! (rtx_equal_p (operands[0], operands[1])
8306 || (GET_RTX_CLASS (code) == 'c'
8307 && rtx_equal_p (operands[0], operands[2]))))
8308 return 0;
06a964de 8309 /* If the operation is not commutable and the source 1 is memory, we must
d6a7951f 8310 have a matching destination. */
06a964de
JH
8311 if (GET_CODE (operands[1]) == MEM
8312 && GET_RTX_CLASS (code) != 'c'
8313 && ! rtx_equal_p (operands[0], operands[1]))
8314 return 0;
e075ae69
RH
8315 return 1;
8316}
8317
8318/* Attempt to expand a unary operator. Make the expansion closer to the
8319 actual machine, then just general_operand, which will allow 2 separate
9d81fc27 8320 memory references (one output, one input) in a single insn. */
e075ae69 8321
9d81fc27 8322void
e075ae69
RH
8323ix86_expand_unary_operator (code, mode, operands)
8324 enum rtx_code code;
8325 enum machine_mode mode;
8326 rtx operands[];
8327{
06a964de
JH
8328 int matching_memory;
8329 rtx src, dst, op, clob;
8330
8331 dst = operands[0];
8332 src = operands[1];
e075ae69 8333
06a964de
JH
8334 /* If the destination is memory, and we do not have matching source
8335 operands, do things in registers. */
8336 matching_memory = 0;
8337 if (GET_CODE (dst) == MEM)
32b5b1aa 8338 {
06a964de
JH
8339 if (rtx_equal_p (dst, src))
8340 matching_memory = 1;
e075ae69 8341 else
06a964de 8342 dst = gen_reg_rtx (mode);
32b5b1aa 8343 }
e9a25f70 8344
06a964de
JH
8345 /* When source operand is memory, destination must match. */
8346 if (!matching_memory && GET_CODE (src) == MEM)
8347 src = force_reg (mode, src);
0f290768 8348
06a964de 8349 /* If optimizing, copy to regs to improve CSE */
fe577e58 8350 if (optimize && ! no_new_pseudos)
06a964de
JH
8351 {
8352 if (GET_CODE (dst) == MEM)
8353 dst = gen_reg_rtx (mode);
8354 if (GET_CODE (src) == MEM)
8355 src = force_reg (mode, src);
8356 }
8357
8358 /* Emit the instruction. */
8359
8360 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8361 if (reload_in_progress || code == NOT)
8362 {
8363 /* Reload doesn't know about the flags register, and doesn't know that
8364 it doesn't want to clobber it. */
8365 if (code != NOT)
8366 abort ();
8367 emit_insn (op);
8368 }
8369 else
8370 {
8371 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8372 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8373 }
8374
8375 /* Fix up the destination if needed. */
8376 if (dst != operands[0])
8377 emit_move_insn (operands[0], dst);
e075ae69
RH
8378}
8379
8380/* Return TRUE or FALSE depending on whether the unary operator meets the
8381 appropriate constraints. */
8382
8383int
8384ix86_unary_operator_ok (code, mode, operands)
8385 enum rtx_code code ATTRIBUTE_UNUSED;
8386 enum machine_mode mode ATTRIBUTE_UNUSED;
8387 rtx operands[2] ATTRIBUTE_UNUSED;
8388{
06a964de
JH
8389 /* If one of operands is memory, source and destination must match. */
8390 if ((GET_CODE (operands[0]) == MEM
8391 || GET_CODE (operands[1]) == MEM)
8392 && ! rtx_equal_p (operands[0], operands[1]))
8393 return FALSE;
e075ae69
RH
8394 return TRUE;
8395}
8396
16189740
RH
8397/* Return TRUE or FALSE depending on whether the first SET in INSN
8398 has source and destination with matching CC modes, and that the
8399 CC mode is at least as constrained as REQ_MODE. */
8400
8401int
8402ix86_match_ccmode (insn, req_mode)
8403 rtx insn;
8404 enum machine_mode req_mode;
8405{
8406 rtx set;
8407 enum machine_mode set_mode;
8408
8409 set = PATTERN (insn);
8410 if (GET_CODE (set) == PARALLEL)
8411 set = XVECEXP (set, 0, 0);
8412 if (GET_CODE (set) != SET)
8413 abort ();
9076b9c1
JH
8414 if (GET_CODE (SET_SRC (set)) != COMPARE)
8415 abort ();
16189740
RH
8416
8417 set_mode = GET_MODE (SET_DEST (set));
8418 switch (set_mode)
8419 {
9076b9c1
JH
8420 case CCNOmode:
8421 if (req_mode != CCNOmode
8422 && (req_mode != CCmode
8423 || XEXP (SET_SRC (set), 1) != const0_rtx))
8424 return 0;
8425 break;
16189740 8426 case CCmode:
9076b9c1 8427 if (req_mode == CCGCmode)
16189740
RH
8428 return 0;
8429 /* FALLTHRU */
9076b9c1
JH
8430 case CCGCmode:
8431 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8432 return 0;
8433 /* FALLTHRU */
8434 case CCGOCmode:
16189740
RH
8435 if (req_mode == CCZmode)
8436 return 0;
8437 /* FALLTHRU */
8438 case CCZmode:
8439 break;
8440
8441 default:
8442 abort ();
8443 }
8444
8445 return (GET_MODE (SET_SRC (set)) == set_mode);
8446}
8447
e075ae69
RH
8448/* Generate insn patterns to do an integer compare of OPERANDS. */
8449
8450static rtx
8451ix86_expand_int_compare (code, op0, op1)
8452 enum rtx_code code;
8453 rtx op0, op1;
8454{
8455 enum machine_mode cmpmode;
8456 rtx tmp, flags;
8457
8458 cmpmode = SELECT_CC_MODE (code, op0, op1);
8459 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8460
8461 /* This is very simple, but making the interface the same as in the
8462 FP case makes the rest of the code easier. */
8463 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8464 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8465
8466 /* Return the test that should be put into the flags user, i.e.
8467 the bcc, scc, or cmov instruction. */
8468 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8469}
8470
3a3677ff
RH
8471/* Figure out whether to use ordered or unordered fp comparisons.
8472 Return the appropriate mode to use. */
e075ae69 8473
b1cdafbb 8474enum machine_mode
3a3677ff 8475ix86_fp_compare_mode (code)
8752c357 8476 enum rtx_code code ATTRIBUTE_UNUSED;
e075ae69 8477{
9e7adcb3
JH
8478 /* ??? In order to make all comparisons reversible, we do all comparisons
8479 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8480 all forms trapping and nontrapping comparisons, we can make inequality
8481 comparisons trapping again, since it results in better code when using
8482 FCOM based compares. */
8483 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
3a3677ff
RH
8484}
8485
9076b9c1
JH
8486enum machine_mode
8487ix86_cc_mode (code, op0, op1)
8488 enum rtx_code code;
8489 rtx op0, op1;
8490{
8491 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8492 return ix86_fp_compare_mode (code);
8493 switch (code)
8494 {
8495 /* Only zero flag is needed. */
8496 case EQ: /* ZF=0 */
8497 case NE: /* ZF!=0 */
8498 return CCZmode;
8499 /* Codes needing carry flag. */
265dab10
JH
8500 case GEU: /* CF=0 */
8501 case GTU: /* CF=0 & ZF=0 */
7e08e190
JH
8502 case LTU: /* CF=1 */
8503 case LEU: /* CF=1 | ZF=1 */
265dab10 8504 return CCmode;
9076b9c1
JH
8505 /* Codes possibly doable only with sign flag when
8506 comparing against zero. */
8507 case GE: /* SF=OF or SF=0 */
7e08e190 8508 case LT: /* SF<>OF or SF=1 */
9076b9c1
JH
8509 if (op1 == const0_rtx)
8510 return CCGOCmode;
8511 else
8512 /* For other cases Carry flag is not required. */
8513 return CCGCmode;
8514 /* Codes doable only with sign flag when comparing
8515 against zero, but we miss jump instruction for it
4aae8a9a 8516 so we need to use relational tests against overflow
9076b9c1
JH
8517 that thus needs to be zero. */
8518 case GT: /* ZF=0 & SF=OF */
8519 case LE: /* ZF=1 | SF<>OF */
8520 if (op1 == const0_rtx)
8521 return CCNOmode;
8522 else
8523 return CCGCmode;
7fcd7218
JH
8524 /* strcmp pattern do (use flags) and combine may ask us for proper
8525 mode. */
8526 case USE:
8527 return CCmode;
9076b9c1 8528 default:
0f290768 8529 abort ();
9076b9c1
JH
8530 }
8531}
8532
3a3677ff
RH
8533/* Return true if we should use an FCOMI instruction for this fp comparison. */
8534
a940d8bd 8535int
3a3677ff 8536ix86_use_fcomi_compare (code)
9e7adcb3 8537 enum rtx_code code ATTRIBUTE_UNUSED;
3a3677ff 8538{
9e7adcb3
JH
8539 enum rtx_code swapped_code = swap_condition (code);
8540 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8541 || (ix86_fp_comparison_cost (swapped_code)
8542 == ix86_fp_comparison_fcomi_cost (swapped_code)));
3a3677ff
RH
8543}
8544
0f290768 8545/* Swap, force into registers, or otherwise massage the two operands
3a3677ff 8546 to a fp comparison. The operands are updated in place; the new
d1f87653 8547 comparison code is returned. */
3a3677ff
RH
8548
8549static enum rtx_code
8550ix86_prepare_fp_compare_args (code, pop0, pop1)
8551 enum rtx_code code;
8552 rtx *pop0, *pop1;
8553{
8554 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8555 rtx op0 = *pop0, op1 = *pop1;
8556 enum machine_mode op_mode = GET_MODE (op0);
0644b628 8557 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
3a3677ff 8558
e075ae69 8559 /* All of the unordered compare instructions only work on registers.
3a3677ff
RH
8560 The same is true of the XFmode compare instructions. The same is
8561 true of the fcomi compare instructions. */
8562
0644b628
JH
8563 if (!is_sse
8564 && (fpcmp_mode == CCFPUmode
8565 || op_mode == XFmode
8566 || op_mode == TFmode
8567 || ix86_use_fcomi_compare (code)))
e075ae69 8568 {
3a3677ff
RH
8569 op0 = force_reg (op_mode, op0);
8570 op1 = force_reg (op_mode, op1);
e075ae69
RH
8571 }
8572 else
8573 {
8574 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8575 things around if they appear profitable, otherwise force op0
8576 into a register. */
8577
8578 if (standard_80387_constant_p (op0) == 0
8579 || (GET_CODE (op0) == MEM
8580 && ! (standard_80387_constant_p (op1) == 0
8581 || GET_CODE (op1) == MEM)))
32b5b1aa 8582 {
e075ae69
RH
8583 rtx tmp;
8584 tmp = op0, op0 = op1, op1 = tmp;
8585 code = swap_condition (code);
8586 }
8587
8588 if (GET_CODE (op0) != REG)
3a3677ff 8589 op0 = force_reg (op_mode, op0);
e075ae69
RH
8590
8591 if (CONSTANT_P (op1))
8592 {
8593 if (standard_80387_constant_p (op1))
3a3677ff 8594 op1 = force_reg (op_mode, op1);
e075ae69 8595 else
3a3677ff 8596 op1 = validize_mem (force_const_mem (op_mode, op1));
32b5b1aa
SC
8597 }
8598 }
e9a25f70 8599
9e7adcb3
JH
8600 /* Try to rearrange the comparison to make it cheaper. */
8601 if (ix86_fp_comparison_cost (code)
8602 > ix86_fp_comparison_cost (swap_condition (code))
558740bf 8603 && (GET_CODE (op1) == REG || !no_new_pseudos))
9e7adcb3
JH
8604 {
8605 rtx tmp;
8606 tmp = op0, op0 = op1, op1 = tmp;
8607 code = swap_condition (code);
8608 if (GET_CODE (op0) != REG)
8609 op0 = force_reg (op_mode, op0);
8610 }
8611
3a3677ff
RH
8612 *pop0 = op0;
8613 *pop1 = op1;
8614 return code;
8615}
8616
c0c102a9
JH
8617/* Convert comparison codes we use to represent FP comparison to integer
8618 code that will result in proper branch. Return UNKNOWN if no such code
8619 is available. */
8620static enum rtx_code
8621ix86_fp_compare_code_to_integer (code)
8622 enum rtx_code code;
8623{
8624 switch (code)
8625 {
8626 case GT:
8627 return GTU;
8628 case GE:
8629 return GEU;
8630 case ORDERED:
8631 case UNORDERED:
8632 return code;
8633 break;
8634 case UNEQ:
8635 return EQ;
8636 break;
8637 case UNLT:
8638 return LTU;
8639 break;
8640 case UNLE:
8641 return LEU;
8642 break;
8643 case LTGT:
8644 return NE;
8645 break;
8646 default:
8647 return UNKNOWN;
8648 }
8649}
8650
8651/* Split comparison code CODE into comparisons we can do using branch
8652 instructions. BYPASS_CODE is comparison code for branch that will
8653 branch around FIRST_CODE and SECOND_CODE. If some of branches
8654 is not required, set value to NIL.
8655 We never require more than two branches. */
8656static void
8657ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8658 enum rtx_code code, *bypass_code, *first_code, *second_code;
8659{
8660 *first_code = code;
8661 *bypass_code = NIL;
8662 *second_code = NIL;
8663
8664 /* The fcomi comparison sets flags as follows:
8665
8666 cmp ZF PF CF
8667 > 0 0 0
8668 < 0 0 1
8669 = 1 0 0
8670 un 1 1 1 */
8671
8672 switch (code)
8673 {
8674 case GT: /* GTU - CF=0 & ZF=0 */
8675 case GE: /* GEU - CF=0 */
8676 case ORDERED: /* PF=0 */
8677 case UNORDERED: /* PF=1 */
8678 case UNEQ: /* EQ - ZF=1 */
8679 case UNLT: /* LTU - CF=1 */
8680 case UNLE: /* LEU - CF=1 | ZF=1 */
8681 case LTGT: /* EQ - ZF=0 */
8682 break;
8683 case LT: /* LTU - CF=1 - fails on unordered */
8684 *first_code = UNLT;
8685 *bypass_code = UNORDERED;
8686 break;
8687 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8688 *first_code = UNLE;
8689 *bypass_code = UNORDERED;
8690 break;
8691 case EQ: /* EQ - ZF=1 - fails on unordered */
8692 *first_code = UNEQ;
8693 *bypass_code = UNORDERED;
8694 break;
8695 case NE: /* NE - ZF=0 - fails on unordered */
8696 *first_code = LTGT;
8697 *second_code = UNORDERED;
8698 break;
8699 case UNGE: /* GEU - CF=0 - fails on unordered */
8700 *first_code = GE;
8701 *second_code = UNORDERED;
8702 break;
8703 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8704 *first_code = GT;
8705 *second_code = UNORDERED;
8706 break;
8707 default:
8708 abort ();
8709 }
8710 if (!TARGET_IEEE_FP)
8711 {
8712 *second_code = NIL;
8713 *bypass_code = NIL;
8714 }
8715}
8716
9e7adcb3 8717/* Return cost of comparison done fcom + arithmetics operations on AX.
5bdc5878 8718 All following functions do use number of instructions as a cost metrics.
9e7adcb3
JH
8719 In future this should be tweaked to compute bytes for optimize_size and
8720 take into account performance of various instructions on various CPUs. */
8721static int
8722ix86_fp_comparison_arithmetics_cost (code)
8723 enum rtx_code code;
8724{
8725 if (!TARGET_IEEE_FP)
8726 return 4;
8727 /* The cost of code output by ix86_expand_fp_compare. */
8728 switch (code)
8729 {
8730 case UNLE:
8731 case UNLT:
8732 case LTGT:
8733 case GT:
8734 case GE:
8735 case UNORDERED:
8736 case ORDERED:
8737 case UNEQ:
8738 return 4;
8739 break;
8740 case LT:
8741 case NE:
8742 case EQ:
8743 case UNGE:
8744 return 5;
8745 break;
8746 case LE:
8747 case UNGT:
8748 return 6;
8749 break;
8750 default:
8751 abort ();
8752 }
8753}
8754
8755/* Return cost of comparison done using fcomi operation.
8756 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8757static int
8758ix86_fp_comparison_fcomi_cost (code)
8759 enum rtx_code code;
8760{
8761 enum rtx_code bypass_code, first_code, second_code;
d1f87653 8762 /* Return arbitrarily high cost when instruction is not supported - this
9e7adcb3
JH
8763 prevents gcc from using it. */
8764 if (!TARGET_CMOVE)
8765 return 1024;
8766 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8767 return (bypass_code != NIL || second_code != NIL) + 2;
8768}
8769
8770/* Return cost of comparison done using sahf operation.
8771 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8772static int
8773ix86_fp_comparison_sahf_cost (code)
8774 enum rtx_code code;
8775{
8776 enum rtx_code bypass_code, first_code, second_code;
d1f87653 8777 /* Return arbitrarily high cost when instruction is not preferred - this
9e7adcb3
JH
8778 avoids gcc from using it. */
8779 if (!TARGET_USE_SAHF && !optimize_size)
8780 return 1024;
8781 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8782 return (bypass_code != NIL || second_code != NIL) + 3;
8783}
8784
8785/* Compute cost of the comparison done using any method.
8786 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8787static int
8788ix86_fp_comparison_cost (code)
8789 enum rtx_code code;
8790{
8791 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8792 int min;
8793
8794 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8795 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8796
8797 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8798 if (min > sahf_cost)
8799 min = sahf_cost;
8800 if (min > fcomi_cost)
8801 min = fcomi_cost;
8802 return min;
8803}
c0c102a9 8804
3a3677ff
RH
8805/* Generate insn patterns to do a floating point compare of OPERANDS. */
8806
9e7adcb3
JH
8807static rtx
8808ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
3a3677ff
RH
8809 enum rtx_code code;
8810 rtx op0, op1, scratch;
9e7adcb3
JH
8811 rtx *second_test;
8812 rtx *bypass_test;
3a3677ff
RH
8813{
8814 enum machine_mode fpcmp_mode, intcmp_mode;
c0c102a9 8815 rtx tmp, tmp2;
9e7adcb3 8816 int cost = ix86_fp_comparison_cost (code);
c0c102a9 8817 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
8818
8819 fpcmp_mode = ix86_fp_compare_mode (code);
8820 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8821
9e7adcb3
JH
8822 if (second_test)
8823 *second_test = NULL_RTX;
8824 if (bypass_test)
8825 *bypass_test = NULL_RTX;
8826
c0c102a9
JH
8827 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8828
9e7adcb3
JH
8829 /* Do fcomi/sahf based test when profitable. */
8830 if ((bypass_code == NIL || bypass_test)
8831 && (second_code == NIL || second_test)
8832 && ix86_fp_comparison_arithmetics_cost (code) > cost)
32b5b1aa 8833 {
c0c102a9
JH
8834 if (TARGET_CMOVE)
8835 {
8836 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8837 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8838 tmp);
8839 emit_insn (tmp);
8840 }
8841 else
8842 {
8843 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 8844 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
8845 if (!scratch)
8846 scratch = gen_reg_rtx (HImode);
c0c102a9
JH
8847 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8848 emit_insn (gen_x86_sahf_1 (scratch));
8849 }
e075ae69
RH
8850
8851 /* The FP codes work out to act like unsigned. */
9a915772 8852 intcmp_mode = fpcmp_mode;
9e7adcb3
JH
8853 code = first_code;
8854 if (bypass_code != NIL)
8855 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8856 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8857 const0_rtx);
8858 if (second_code != NIL)
8859 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8860 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8861 const0_rtx);
e075ae69
RH
8862 }
8863 else
8864 {
8865 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
e075ae69 8866 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 8867 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
8868 if (!scratch)
8869 scratch = gen_reg_rtx (HImode);
3a3677ff 8870 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
e075ae69 8871
9a915772
JH
8872 /* In the unordered case, we have to check C2 for NaN's, which
8873 doesn't happen to work out to anything nice combination-wise.
8874 So do some bit twiddling on the value we've got in AH to come
8875 up with an appropriate set of condition codes. */
e075ae69 8876
9a915772
JH
8877 intcmp_mode = CCNOmode;
8878 switch (code)
32b5b1aa 8879 {
9a915772
JH
8880 case GT:
8881 case UNGT:
8882 if (code == GT || !TARGET_IEEE_FP)
32b5b1aa 8883 {
3a3677ff 8884 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
e075ae69 8885 code = EQ;
9a915772
JH
8886 }
8887 else
8888 {
8889 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8890 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8891 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8892 intcmp_mode = CCmode;
8893 code = GEU;
8894 }
8895 break;
8896 case LT:
8897 case UNLT:
8898 if (code == LT && TARGET_IEEE_FP)
8899 {
3a3677ff
RH
8900 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8901 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
e075ae69
RH
8902 intcmp_mode = CCmode;
8903 code = EQ;
9a915772
JH
8904 }
8905 else
8906 {
8907 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8908 code = NE;
8909 }
8910 break;
8911 case GE:
8912 case UNGE:
8913 if (code == GE || !TARGET_IEEE_FP)
8914 {
3a3677ff 8915 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
e075ae69 8916 code = EQ;
9a915772
JH
8917 }
8918 else
8919 {
8920 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8921 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8922 GEN_INT (0x01)));
8923 code = NE;
8924 }
8925 break;
8926 case LE:
8927 case UNLE:
8928 if (code == LE && TARGET_IEEE_FP)
8929 {
3a3677ff
RH
8930 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8931 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8932 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
8933 intcmp_mode = CCmode;
8934 code = LTU;
9a915772
JH
8935 }
8936 else
8937 {
8938 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8939 code = NE;
8940 }
8941 break;
8942 case EQ:
8943 case UNEQ:
8944 if (code == EQ && TARGET_IEEE_FP)
8945 {
3a3677ff
RH
8946 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8947 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
8948 intcmp_mode = CCmode;
8949 code = EQ;
9a915772
JH
8950 }
8951 else
8952 {
3a3677ff
RH
8953 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8954 code = NE;
8955 break;
9a915772
JH
8956 }
8957 break;
8958 case NE:
8959 case LTGT:
8960 if (code == NE && TARGET_IEEE_FP)
8961 {
3a3677ff 8962 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9a915772
JH
8963 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8964 GEN_INT (0x40)));
3a3677ff 8965 code = NE;
9a915772
JH
8966 }
8967 else
8968 {
3a3677ff
RH
8969 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8970 code = EQ;
32b5b1aa 8971 }
9a915772
JH
8972 break;
8973
8974 case UNORDERED:
8975 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8976 code = NE;
8977 break;
8978 case ORDERED:
8979 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8980 code = EQ;
8981 break;
8982
8983 default:
8984 abort ();
32b5b1aa 8985 }
32b5b1aa 8986 }
e075ae69
RH
8987
8988 /* Return the test that should be put into the flags user, i.e.
8989 the bcc, scc, or cmov instruction. */
8990 return gen_rtx_fmt_ee (code, VOIDmode,
8991 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8992 const0_rtx);
8993}
8994
9e3e266c 8995rtx
a1b8572c 8996ix86_expand_compare (code, second_test, bypass_test)
e075ae69 8997 enum rtx_code code;
a1b8572c 8998 rtx *second_test, *bypass_test;
e075ae69
RH
8999{
9000 rtx op0, op1, ret;
9001 op0 = ix86_compare_op0;
9002 op1 = ix86_compare_op1;
9003
a1b8572c
JH
9004 if (second_test)
9005 *second_test = NULL_RTX;
9006 if (bypass_test)
9007 *bypass_test = NULL_RTX;
9008
e075ae69 9009 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
bf71a4f8 9010 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
77ebd435 9011 second_test, bypass_test);
32b5b1aa 9012 else
e075ae69
RH
9013 ret = ix86_expand_int_compare (code, op0, op1);
9014
9015 return ret;
9016}
9017
03598dea
JH
9018/* Return true if the CODE will result in nontrivial jump sequence. */
9019bool
9020ix86_fp_jump_nontrivial_p (code)
9021 enum rtx_code code;
9022{
9023 enum rtx_code bypass_code, first_code, second_code;
9024 if (!TARGET_CMOVE)
9025 return true;
9026 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9027 return bypass_code != NIL || second_code != NIL;
9028}
9029
e075ae69 9030void
3a3677ff 9031ix86_expand_branch (code, label)
e075ae69 9032 enum rtx_code code;
e075ae69
RH
9033 rtx label;
9034{
3a3677ff 9035 rtx tmp;
e075ae69 9036
3a3677ff 9037 switch (GET_MODE (ix86_compare_op0))
32b5b1aa 9038 {
3a3677ff
RH
9039 case QImode:
9040 case HImode:
9041 case SImode:
0d7d98ee 9042 simple:
a1b8572c 9043 tmp = ix86_expand_compare (code, NULL, NULL);
e075ae69
RH
9044 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9045 gen_rtx_LABEL_REF (VOIDmode, label),
9046 pc_rtx);
9047 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32b5b1aa 9048 return;
e075ae69 9049
3a3677ff
RH
9050 case SFmode:
9051 case DFmode:
0f290768 9052 case XFmode:
2b589241 9053 case TFmode:
3a3677ff
RH
9054 {
9055 rtvec vec;
9056 int use_fcomi;
03598dea 9057 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
9058
9059 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9060 &ix86_compare_op1);
fce5a9f2 9061
03598dea
JH
9062 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9063
9064 /* Check whether we will use the natural sequence with one jump. If
9065 so, we can expand jump early. Otherwise delay expansion by
9066 creating compound insn to not confuse optimizers. */
9067 if (bypass_code == NIL && second_code == NIL
9068 && TARGET_CMOVE)
9069 {
9070 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9071 gen_rtx_LABEL_REF (VOIDmode, label),
9072 pc_rtx, NULL_RTX);
9073 }
9074 else
9075 {
9076 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9077 ix86_compare_op0, ix86_compare_op1);
9078 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9079 gen_rtx_LABEL_REF (VOIDmode, label),
9080 pc_rtx);
9081 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9082
9083 use_fcomi = ix86_use_fcomi_compare (code);
9084 vec = rtvec_alloc (3 + !use_fcomi);
9085 RTVEC_ELT (vec, 0) = tmp;
9086 RTVEC_ELT (vec, 1)
9087 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9088 RTVEC_ELT (vec, 2)
9089 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9090 if (! use_fcomi)
9091 RTVEC_ELT (vec, 3)
9092 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9093
9094 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9095 }
3a3677ff
RH
9096 return;
9097 }
32b5b1aa 9098
3a3677ff 9099 case DImode:
0d7d98ee
JH
9100 if (TARGET_64BIT)
9101 goto simple;
3a3677ff
RH
9102 /* Expand DImode branch into multiple compare+branch. */
9103 {
9104 rtx lo[2], hi[2], label2;
9105 enum rtx_code code1, code2, code3;
32b5b1aa 9106
3a3677ff
RH
9107 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9108 {
9109 tmp = ix86_compare_op0;
9110 ix86_compare_op0 = ix86_compare_op1;
9111 ix86_compare_op1 = tmp;
9112 code = swap_condition (code);
9113 }
9114 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9115 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
32b5b1aa 9116
3a3677ff
RH
9117 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9118 avoid two branches. This costs one extra insn, so disable when
9119 optimizing for size. */
32b5b1aa 9120
3a3677ff
RH
9121 if ((code == EQ || code == NE)
9122 && (!optimize_size
9123 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9124 {
9125 rtx xor0, xor1;
32b5b1aa 9126
3a3677ff
RH
9127 xor1 = hi[0];
9128 if (hi[1] != const0_rtx)
9129 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9130 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 9131
3a3677ff
RH
9132 xor0 = lo[0];
9133 if (lo[1] != const0_rtx)
9134 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9135 NULL_RTX, 0, OPTAB_WIDEN);
e075ae69 9136
3a3677ff
RH
9137 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9138 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 9139
3a3677ff
RH
9140 ix86_compare_op0 = tmp;
9141 ix86_compare_op1 = const0_rtx;
9142 ix86_expand_branch (code, label);
9143 return;
9144 }
e075ae69 9145
1f9124e4
JJ
9146 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9147 op1 is a constant and the low word is zero, then we can just
9148 examine the high word. */
32b5b1aa 9149
1f9124e4
JJ
9150 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9151 switch (code)
9152 {
9153 case LT: case LTU: case GE: case GEU:
9154 ix86_compare_op0 = hi[0];
9155 ix86_compare_op1 = hi[1];
9156 ix86_expand_branch (code, label);
9157 return;
9158 default:
9159 break;
9160 }
e075ae69 9161
3a3677ff 9162 /* Otherwise, we need two or three jumps. */
e075ae69 9163
3a3677ff 9164 label2 = gen_label_rtx ();
e075ae69 9165
3a3677ff
RH
9166 code1 = code;
9167 code2 = swap_condition (code);
9168 code3 = unsigned_condition (code);
e075ae69 9169
3a3677ff
RH
9170 switch (code)
9171 {
9172 case LT: case GT: case LTU: case GTU:
9173 break;
e075ae69 9174
3a3677ff
RH
9175 case LE: code1 = LT; code2 = GT; break;
9176 case GE: code1 = GT; code2 = LT; break;
9177 case LEU: code1 = LTU; code2 = GTU; break;
9178 case GEU: code1 = GTU; code2 = LTU; break;
e075ae69 9179
3a3677ff
RH
9180 case EQ: code1 = NIL; code2 = NE; break;
9181 case NE: code2 = NIL; break;
e075ae69 9182
3a3677ff
RH
9183 default:
9184 abort ();
9185 }
e075ae69 9186
3a3677ff
RH
9187 /*
9188 * a < b =>
9189 * if (hi(a) < hi(b)) goto true;
9190 * if (hi(a) > hi(b)) goto false;
9191 * if (lo(a) < lo(b)) goto true;
9192 * false:
9193 */
9194
9195 ix86_compare_op0 = hi[0];
9196 ix86_compare_op1 = hi[1];
9197
9198 if (code1 != NIL)
9199 ix86_expand_branch (code1, label);
9200 if (code2 != NIL)
9201 ix86_expand_branch (code2, label2);
9202
9203 ix86_compare_op0 = lo[0];
9204 ix86_compare_op1 = lo[1];
9205 ix86_expand_branch (code3, label);
9206
9207 if (code2 != NIL)
9208 emit_label (label2);
9209 return;
9210 }
e075ae69 9211
3a3677ff
RH
9212 default:
9213 abort ();
9214 }
32b5b1aa 9215}
e075ae69 9216
9e7adcb3
JH
9217/* Split branch based on floating point condition. */
9218void
03598dea
JH
9219ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
9220 enum rtx_code code;
9221 rtx op1, op2, target1, target2, tmp;
9e7adcb3
JH
9222{
9223 rtx second, bypass;
9224 rtx label = NULL_RTX;
03598dea 9225 rtx condition;
6b24c259
JH
9226 int bypass_probability = -1, second_probability = -1, probability = -1;
9227 rtx i;
9e7adcb3
JH
9228
9229 if (target2 != pc_rtx)
9230 {
9231 rtx tmp = target2;
9232 code = reverse_condition_maybe_unordered (code);
9233 target2 = target1;
9234 target1 = tmp;
9235 }
9236
9237 condition = ix86_expand_fp_compare (code, op1, op2,
9238 tmp, &second, &bypass);
6b24c259
JH
9239
9240 if (split_branch_probability >= 0)
9241 {
9242 /* Distribute the probabilities across the jumps.
9243 Assume the BYPASS and SECOND to be always test
9244 for UNORDERED. */
9245 probability = split_branch_probability;
9246
d6a7951f 9247 /* Value of 1 is low enough to make no need for probability
6b24c259
JH
9248 to be updated. Later we may run some experiments and see
9249 if unordered values are more frequent in practice. */
9250 if (bypass)
9251 bypass_probability = 1;
9252 if (second)
9253 second_probability = 1;
9254 }
9e7adcb3
JH
9255 if (bypass != NULL_RTX)
9256 {
9257 label = gen_label_rtx ();
6b24c259
JH
9258 i = emit_jump_insn (gen_rtx_SET
9259 (VOIDmode, pc_rtx,
9260 gen_rtx_IF_THEN_ELSE (VOIDmode,
9261 bypass,
9262 gen_rtx_LABEL_REF (VOIDmode,
9263 label),
9264 pc_rtx)));
9265 if (bypass_probability >= 0)
9266 REG_NOTES (i)
9267 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9268 GEN_INT (bypass_probability),
9269 REG_NOTES (i));
9270 }
9271 i = emit_jump_insn (gen_rtx_SET
9e7adcb3
JH
9272 (VOIDmode, pc_rtx,
9273 gen_rtx_IF_THEN_ELSE (VOIDmode,
6b24c259
JH
9274 condition, target1, target2)));
9275 if (probability >= 0)
9276 REG_NOTES (i)
9277 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9278 GEN_INT (probability),
9279 REG_NOTES (i));
9280 if (second != NULL_RTX)
9e7adcb3 9281 {
6b24c259
JH
9282 i = emit_jump_insn (gen_rtx_SET
9283 (VOIDmode, pc_rtx,
9284 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9285 target2)));
9286 if (second_probability >= 0)
9287 REG_NOTES (i)
9288 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9289 GEN_INT (second_probability),
9290 REG_NOTES (i));
9e7adcb3 9291 }
9e7adcb3
JH
9292 if (label != NULL_RTX)
9293 emit_label (label);
9294}
9295
32b5b1aa 9296int
3a3677ff 9297ix86_expand_setcc (code, dest)
e075ae69 9298 enum rtx_code code;
e075ae69 9299 rtx dest;
32b5b1aa 9300{
a1b8572c
JH
9301 rtx ret, tmp, tmpreg;
9302 rtx second_test, bypass_test;
e075ae69 9303
885a70fd
JH
9304 if (GET_MODE (ix86_compare_op0) == DImode
9305 && !TARGET_64BIT)
e075ae69
RH
9306 return 0; /* FAIL */
9307
b932f770
JH
9308 if (GET_MODE (dest) != QImode)
9309 abort ();
e075ae69 9310
a1b8572c 9311 ret = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
9312 PUT_MODE (ret, QImode);
9313
9314 tmp = dest;
a1b8572c 9315 tmpreg = dest;
32b5b1aa 9316
e075ae69 9317 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
a1b8572c
JH
9318 if (bypass_test || second_test)
9319 {
9320 rtx test = second_test;
9321 int bypass = 0;
9322 rtx tmp2 = gen_reg_rtx (QImode);
9323 if (bypass_test)
9324 {
9325 if (second_test)
b531087a 9326 abort ();
a1b8572c
JH
9327 test = bypass_test;
9328 bypass = 1;
9329 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9330 }
9331 PUT_MODE (test, QImode);
9332 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9333
9334 if (bypass)
9335 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9336 else
9337 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9338 }
e075ae69 9339
e075ae69 9340 return 1; /* DONE */
32b5b1aa 9341}
e075ae69 9342
d1f87653 9343/* Expand comparison setting or clearing carry flag. Return true when successful
4977bab6
ZW
9344 and set pop for the operation. */
9345bool
9346ix86_expand_carry_flag_compare (code, op0, op1, pop)
9347 rtx op0, op1, *pop;
9348 enum rtx_code code;
9349{
9350 enum machine_mode mode =
9351 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9352
9353 /* Do not handle DImode compares that go trought special path. Also we can't
9354 deal with FP compares yet. This is possible to add. */
e6e81735
JH
9355 if ((mode == DImode && !TARGET_64BIT))
9356 return false;
9357 if (FLOAT_MODE_P (mode))
9358 {
9359 rtx second_test = NULL, bypass_test = NULL;
9360 rtx compare_op, compare_seq;
9361
9362 /* Shortcut: following common codes never translate into carry flag compares. */
9363 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9364 || code == ORDERED || code == UNORDERED)
9365 return false;
9366
9367 /* These comparisons require zero flag; swap operands so they won't. */
9368 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9369 && !TARGET_IEEE_FP)
9370 {
9371 rtx tmp = op0;
9372 op0 = op1;
9373 op1 = tmp;
9374 code = swap_condition (code);
9375 }
9376
9377 /* Try to expand the comparsion and verify that we end up with carry flag
9378 based comparsion. This is fails to be true only when we decide to expand
9379 comparsion using arithmetic that is not too common scenario. */
9380 start_sequence ();
9381 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9382 &second_test, &bypass_test);
9383 compare_seq = get_insns ();
9384 end_sequence ();
9385
9386 if (second_test || bypass_test)
9387 return false;
9388 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9389 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9390 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9391 else
9392 code = GET_CODE (compare_op);
9393 if (code != LTU && code != GEU)
9394 return false;
9395 emit_insn (compare_seq);
9396 *pop = compare_op;
9397 return true;
9398 }
9399 if (!INTEGRAL_MODE_P (mode))
4977bab6
ZW
9400 return false;
9401 switch (code)
9402 {
9403 case LTU:
9404 case GEU:
9405 break;
9406
9407 /* Convert a==0 into (unsigned)a<1. */
9408 case EQ:
9409 case NE:
9410 if (op1 != const0_rtx)
9411 return false;
9412 op1 = const1_rtx;
9413 code = (code == EQ ? LTU : GEU);
9414 break;
9415
9416 /* Convert a>b into b<a or a>=b-1. */
9417 case GTU:
9418 case LEU:
9419 if (GET_CODE (op1) == CONST_INT)
9420 {
9421 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9422 /* Bail out on overflow. We still can swap operands but that
9423 would force loading of the constant into register. */
9424 if (op1 == const0_rtx
9425 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9426 return false;
9427 code = (code == GTU ? GEU : LTU);
9428 }
9429 else
9430 {
9431 rtx tmp = op1;
9432 op1 = op0;
9433 op0 = tmp;
9434 code = (code == GTU ? LTU : GEU);
9435 }
9436 break;
9437
9438 /* Convert a>0 into (unsigned)a<0x7fffffff. */
9439 case LT:
9440 case GE:
9441 if (mode == DImode || op1 != const0_rtx)
9442 return false;
9443 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9444 code = (code == LT ? GEU : LTU);
9445 break;
9446 case LE:
9447 case GT:
9448 if (mode == DImode || op1 != constm1_rtx)
9449 return false;
9450 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9451 code = (code == LE ? GEU : LTU);
9452 break;
9453
9454 default:
9455 return false;
9456 }
9457 ix86_compare_op0 = op0;
9458 ix86_compare_op1 = op1;
9459 *pop = ix86_expand_compare (code, NULL, NULL);
9460 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9461 abort ();
9462 return true;
9463}
9464
32b5b1aa 9465int
e075ae69
RH
9466ix86_expand_int_movcc (operands)
9467 rtx operands[];
32b5b1aa 9468{
e075ae69
RH
9469 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9470 rtx compare_seq, compare_op;
a1b8572c 9471 rtx second_test, bypass_test;
635559ab 9472 enum machine_mode mode = GET_MODE (operands[0]);
4977bab6 9473 bool sign_bit_compare_p = false;;
3a3677ff 9474
e075ae69 9475 start_sequence ();
a1b8572c 9476 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
2f937369 9477 compare_seq = get_insns ();
e075ae69
RH
9478 end_sequence ();
9479
9480 compare_code = GET_CODE (compare_op);
9481
4977bab6
ZW
9482 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9483 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9484 sign_bit_compare_p = true;
9485
e075ae69
RH
9486 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9487 HImode insns, we'd be swallowed in word prefix ops. */
9488
4977bab6 9489 if ((mode != HImode || TARGET_FAST_PREFIX)
635559ab 9490 && (mode != DImode || TARGET_64BIT)
0f290768 9491 && GET_CODE (operands[2]) == CONST_INT
e075ae69
RH
9492 && GET_CODE (operands[3]) == CONST_INT)
9493 {
9494 rtx out = operands[0];
9495 HOST_WIDE_INT ct = INTVAL (operands[2]);
9496 HOST_WIDE_INT cf = INTVAL (operands[3]);
9497 HOST_WIDE_INT diff;
9498
4977bab6
ZW
9499 diff = ct - cf;
9500 /* Sign bit compares are better done using shifts than we do by using
9501 sbb. */
9502 if (sign_bit_compare_p
9503 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9504 ix86_compare_op1, &compare_op))
e075ae69 9505 {
e075ae69
RH
9506 /* Detect overlap between destination and compare sources. */
9507 rtx tmp = out;
9508
4977bab6 9509 if (!sign_bit_compare_p)
36583fea 9510 {
e6e81735
JH
9511 bool fpcmp = false;
9512
4977bab6
ZW
9513 compare_code = GET_CODE (compare_op);
9514
e6e81735
JH
9515 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9516 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9517 {
9518 fpcmp = true;
9519 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9520 }
9521
4977bab6
ZW
9522 /* To simplify rest of code, restrict to the GEU case. */
9523 if (compare_code == LTU)
9524 {
9525 HOST_WIDE_INT tmp = ct;
9526 ct = cf;
9527 cf = tmp;
9528 compare_code = reverse_condition (compare_code);
9529 code = reverse_condition (code);
9530 }
e6e81735
JH
9531 else
9532 {
9533 if (fpcmp)
9534 PUT_CODE (compare_op,
9535 reverse_condition_maybe_unordered
9536 (GET_CODE (compare_op)));
9537 else
9538 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9539 }
4977bab6 9540 diff = ct - cf;
36583fea 9541
4977bab6
ZW
9542 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9543 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9544 tmp = gen_reg_rtx (mode);
e075ae69 9545
4977bab6 9546 if (mode == DImode)
e6e81735 9547 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
4977bab6 9548 else
e6e81735 9549 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
4977bab6 9550 }
14f73b5a 9551 else
4977bab6
ZW
9552 {
9553 if (code == GT || code == GE)
9554 code = reverse_condition (code);
9555 else
9556 {
9557 HOST_WIDE_INT tmp = ct;
9558 ct = cf;
9559 cf = tmp;
9560 }
9561 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9562 ix86_compare_op1, VOIDmode, 0, -1);
9563 }
e075ae69 9564
36583fea
JH
9565 if (diff == 1)
9566 {
9567 /*
9568 * cmpl op0,op1
9569 * sbbl dest,dest
9570 * [addl dest, ct]
9571 *
9572 * Size 5 - 8.
9573 */
9574 if (ct)
635559ab
JH
9575 tmp = expand_simple_binop (mode, PLUS,
9576 tmp, GEN_INT (ct),
4977bab6 9577 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
9578 }
9579 else if (cf == -1)
9580 {
9581 /*
9582 * cmpl op0,op1
9583 * sbbl dest,dest
9584 * orl $ct, dest
9585 *
9586 * Size 8.
9587 */
635559ab
JH
9588 tmp = expand_simple_binop (mode, IOR,
9589 tmp, GEN_INT (ct),
4977bab6 9590 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
9591 }
9592 else if (diff == -1 && ct)
9593 {
9594 /*
9595 * cmpl op0,op1
9596 * sbbl dest,dest
06ec023f 9597 * notl dest
36583fea
JH
9598 * [addl dest, cf]
9599 *
9600 * Size 8 - 11.
9601 */
4977bab6 9602 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
635559ab
JH
9603 if (cf)
9604 tmp = expand_simple_binop (mode, PLUS,
4977bab6
ZW
9605 copy_rtx (tmp), GEN_INT (cf),
9606 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
9607 }
9608 else
9609 {
9610 /*
9611 * cmpl op0,op1
9612 * sbbl dest,dest
06ec023f 9613 * [notl dest]
36583fea
JH
9614 * andl cf - ct, dest
9615 * [addl dest, ct]
9616 *
9617 * Size 8 - 11.
9618 */
06ec023f
RB
9619
9620 if (cf == 0)
9621 {
9622 cf = ct;
9623 ct = 0;
4977bab6 9624 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
06ec023f
RB
9625 }
9626
635559ab 9627 tmp = expand_simple_binop (mode, AND,
4977bab6 9628 copy_rtx (tmp),
d8bf17f9 9629 gen_int_mode (cf - ct, mode),
4977bab6 9630 copy_rtx (tmp), 1, OPTAB_DIRECT);
635559ab
JH
9631 if (ct)
9632 tmp = expand_simple_binop (mode, PLUS,
4977bab6
ZW
9633 copy_rtx (tmp), GEN_INT (ct),
9634 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea 9635 }
e075ae69 9636
4977bab6
ZW
9637 if (!rtx_equal_p (tmp, out))
9638 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
e075ae69
RH
9639
9640 return 1; /* DONE */
9641 }
9642
e075ae69
RH
9643 if (diff < 0)
9644 {
9645 HOST_WIDE_INT tmp;
9646 tmp = ct, ct = cf, cf = tmp;
9647 diff = -diff;
734dba19
JH
9648 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9649 {
9650 /* We may be reversing unordered compare to normal compare, that
9651 is not valid in general (we may convert non-trapping condition
9652 to trapping one), however on i386 we currently emit all
9653 comparisons unordered. */
9654 compare_code = reverse_condition_maybe_unordered (compare_code);
9655 code = reverse_condition_maybe_unordered (code);
9656 }
9657 else
9658 {
9659 compare_code = reverse_condition (compare_code);
9660 code = reverse_condition (code);
9661 }
e075ae69 9662 }
0f2a3457
JJ
9663
9664 compare_code = NIL;
9665 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9666 && GET_CODE (ix86_compare_op1) == CONST_INT)
9667 {
9668 if (ix86_compare_op1 == const0_rtx
9669 && (code == LT || code == GE))
9670 compare_code = code;
9671 else if (ix86_compare_op1 == constm1_rtx)
9672 {
9673 if (code == LE)
9674 compare_code = LT;
9675 else if (code == GT)
9676 compare_code = GE;
9677 }
9678 }
9679
9680 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9681 if (compare_code != NIL
9682 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9683 && (cf == -1 || ct == -1))
9684 {
9685 /* If lea code below could be used, only optimize
9686 if it results in a 2 insn sequence. */
9687
9688 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9689 || diff == 3 || diff == 5 || diff == 9)
9690 || (compare_code == LT && ct == -1)
9691 || (compare_code == GE && cf == -1))
9692 {
9693 /*
9694 * notl op1 (if necessary)
9695 * sarl $31, op1
9696 * orl cf, op1
9697 */
9698 if (ct != -1)
9699 {
9700 cf = ct;
9701 ct = -1;
9702 code = reverse_condition (code);
9703 }
9704
9705 out = emit_store_flag (out, code, ix86_compare_op0,
9706 ix86_compare_op1, VOIDmode, 0, -1);
9707
9708 out = expand_simple_binop (mode, IOR,
9709 out, GEN_INT (cf),
9710 out, 1, OPTAB_DIRECT);
9711 if (out != operands[0])
9712 emit_move_insn (operands[0], out);
9713
9714 return 1; /* DONE */
9715 }
9716 }
9717
4977bab6 9718
635559ab
JH
9719 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9720 || diff == 3 || diff == 5 || diff == 9)
4977bab6 9721 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
c05dbe81 9722 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
e075ae69
RH
9723 {
9724 /*
9725 * xorl dest,dest
9726 * cmpl op1,op2
9727 * setcc dest
9728 * lea cf(dest*(ct-cf)),dest
9729 *
9730 * Size 14.
9731 *
9732 * This also catches the degenerate setcc-only case.
9733 */
9734
9735 rtx tmp;
9736 int nops;
9737
9738 out = emit_store_flag (out, code, ix86_compare_op0,
9739 ix86_compare_op1, VOIDmode, 0, 1);
9740
9741 nops = 0;
97f51ac4
RB
9742 /* On x86_64 the lea instruction operates on Pmode, so we need
9743 to get arithmetics done in proper mode to match. */
e075ae69 9744 if (diff == 1)
068f5dea 9745 tmp = copy_rtx (out);
e075ae69
RH
9746 else
9747 {
885a70fd 9748 rtx out1;
068f5dea 9749 out1 = copy_rtx (out);
635559ab 9750 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
e075ae69
RH
9751 nops++;
9752 if (diff & 1)
9753 {
635559ab 9754 tmp = gen_rtx_PLUS (mode, tmp, out1);
e075ae69
RH
9755 nops++;
9756 }
9757 }
9758 if (cf != 0)
9759 {
635559ab 9760 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
e075ae69
RH
9761 nops++;
9762 }
4977bab6 9763 if (!rtx_equal_p (tmp, out))
e075ae69 9764 {
14f73b5a 9765 if (nops == 1)
a5cf80f0 9766 out = force_operand (tmp, copy_rtx (out));
e075ae69 9767 else
4977bab6 9768 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
e075ae69 9769 }
4977bab6 9770 if (!rtx_equal_p (out, operands[0]))
1985ef90 9771 emit_move_insn (operands[0], copy_rtx (out));
e075ae69
RH
9772
9773 return 1; /* DONE */
9774 }
9775
9776 /*
9777 * General case: Jumpful:
9778 * xorl dest,dest cmpl op1, op2
9779 * cmpl op1, op2 movl ct, dest
9780 * setcc dest jcc 1f
9781 * decl dest movl cf, dest
9782 * andl (cf-ct),dest 1:
9783 * addl ct,dest
0f290768 9784 *
e075ae69
RH
9785 * Size 20. Size 14.
9786 *
9787 * This is reasonably steep, but branch mispredict costs are
9788 * high on modern cpus, so consider failing only if optimizing
9789 * for space.
e075ae69
RH
9790 */
9791
4977bab6
ZW
9792 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9793 && BRANCH_COST >= 2)
e075ae69 9794 {
97f51ac4 9795 if (cf == 0)
e075ae69 9796 {
97f51ac4
RB
9797 cf = ct;
9798 ct = 0;
734dba19 9799 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
0f2a3457
JJ
9800 /* We may be reversing unordered compare to normal compare,
9801 that is not valid in general (we may convert non-trapping
9802 condition to trapping one), however on i386 we currently
9803 emit all comparisons unordered. */
9804 code = reverse_condition_maybe_unordered (code);
9805 else
9806 {
9807 code = reverse_condition (code);
9808 if (compare_code != NIL)
9809 compare_code = reverse_condition (compare_code);
9810 }
9811 }
9812
9813 if (compare_code != NIL)
9814 {
9815 /* notl op1 (if needed)
9816 sarl $31, op1
9817 andl (cf-ct), op1
9818 addl ct, op1
9819
9820 For x < 0 (resp. x <= -1) there will be no notl,
9821 so if possible swap the constants to get rid of the
9822 complement.
9823 True/false will be -1/0 while code below (store flag
9824 followed by decrement) is 0/-1, so the constants need
9825 to be exchanged once more. */
9826
9827 if (compare_code == GE || !cf)
734dba19 9828 {
0f2a3457
JJ
9829 code = reverse_condition (code);
9830 compare_code = LT;
734dba19
JH
9831 }
9832 else
9833 {
0f2a3457
JJ
9834 HOST_WIDE_INT tmp = cf;
9835 cf = ct;
9836 ct = tmp;
734dba19 9837 }
0f2a3457
JJ
9838
9839 out = emit_store_flag (out, code, ix86_compare_op0,
9840 ix86_compare_op1, VOIDmode, 0, -1);
e075ae69 9841 }
0f2a3457
JJ
9842 else
9843 {
9844 out = emit_store_flag (out, code, ix86_compare_op0,
9845 ix86_compare_op1, VOIDmode, 0, 1);
e075ae69 9846
4977bab6
ZW
9847 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
9848 copy_rtx (out), 1, OPTAB_DIRECT);
0f2a3457 9849 }
e075ae69 9850
4977bab6 9851 out = expand_simple_binop (mode, AND, copy_rtx (out),
d8bf17f9 9852 gen_int_mode (cf - ct, mode),
4977bab6 9853 copy_rtx (out), 1, OPTAB_DIRECT);
97f51ac4 9854 if (ct)
4977bab6
ZW
9855 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
9856 copy_rtx (out), 1, OPTAB_DIRECT);
9857 if (!rtx_equal_p (out, operands[0]))
9858 emit_move_insn (operands[0], copy_rtx (out));
e075ae69
RH
9859
9860 return 1; /* DONE */
9861 }
9862 }
9863
4977bab6 9864 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
e075ae69
RH
9865 {
9866 /* Try a few things more with specific constants and a variable. */
9867
78a0d70c 9868 optab op;
e075ae69
RH
9869 rtx var, orig_out, out, tmp;
9870
4977bab6 9871 if (BRANCH_COST <= 2)
e075ae69
RH
9872 return 0; /* FAIL */
9873
0f290768 9874 /* If one of the two operands is an interesting constant, load a
e075ae69 9875 constant with the above and mask it in with a logical operation. */
0f290768 9876
e075ae69
RH
9877 if (GET_CODE (operands[2]) == CONST_INT)
9878 {
9879 var = operands[3];
4977bab6 9880 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
e075ae69 9881 operands[3] = constm1_rtx, op = and_optab;
4977bab6 9882 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
e075ae69 9883 operands[3] = const0_rtx, op = ior_optab;
78a0d70c
ZW
9884 else
9885 return 0; /* FAIL */
e075ae69
RH
9886 }
9887 else if (GET_CODE (operands[3]) == CONST_INT)
9888 {
9889 var = operands[2];
4977bab6 9890 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
e075ae69 9891 operands[2] = constm1_rtx, op = and_optab;
4977bab6 9892 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
e075ae69 9893 operands[2] = const0_rtx, op = ior_optab;
78a0d70c
ZW
9894 else
9895 return 0; /* FAIL */
e075ae69 9896 }
78a0d70c 9897 else
e075ae69
RH
9898 return 0; /* FAIL */
9899
9900 orig_out = operands[0];
635559ab 9901 tmp = gen_reg_rtx (mode);
e075ae69
RH
9902 operands[0] = tmp;
9903
9904 /* Recurse to get the constant loaded. */
9905 if (ix86_expand_int_movcc (operands) == 0)
9906 return 0; /* FAIL */
9907
9908 /* Mask in the interesting variable. */
635559ab 9909 out = expand_binop (mode, op, var, tmp, orig_out, 0,
e075ae69 9910 OPTAB_WIDEN);
4977bab6
ZW
9911 if (!rtx_equal_p (out, orig_out))
9912 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
e075ae69
RH
9913
9914 return 1; /* DONE */
9915 }
9916
9917 /*
9918 * For comparison with above,
9919 *
9920 * movl cf,dest
9921 * movl ct,tmp
9922 * cmpl op1,op2
9923 * cmovcc tmp,dest
9924 *
9925 * Size 15.
9926 */
9927
635559ab
JH
9928 if (! nonimmediate_operand (operands[2], mode))
9929 operands[2] = force_reg (mode, operands[2]);
9930 if (! nonimmediate_operand (operands[3], mode))
9931 operands[3] = force_reg (mode, operands[3]);
e075ae69 9932
a1b8572c
JH
9933 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9934 {
635559ab 9935 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
9936 emit_move_insn (tmp, operands[3]);
9937 operands[3] = tmp;
9938 }
9939 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9940 {
635559ab 9941 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
9942 emit_move_insn (tmp, operands[2]);
9943 operands[2] = tmp;
9944 }
4977bab6 9945
c9682caf 9946 if (! register_operand (operands[2], VOIDmode)
4977bab6
ZW
9947 && (mode == QImode
9948 || ! register_operand (operands[3], VOIDmode)))
635559ab 9949 operands[2] = force_reg (mode, operands[2]);
a1b8572c 9950
4977bab6
ZW
9951 if (mode == QImode
9952 && ! register_operand (operands[3], VOIDmode))
9953 operands[3] = force_reg (mode, operands[3]);
9954
e075ae69
RH
9955 emit_insn (compare_seq);
9956 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 9957 gen_rtx_IF_THEN_ELSE (mode,
e075ae69
RH
9958 compare_op, operands[2],
9959 operands[3])));
a1b8572c 9960 if (bypass_test)
4977bab6 9961 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
635559ab 9962 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c 9963 bypass_test,
4977bab6
ZW
9964 copy_rtx (operands[3]),
9965 copy_rtx (operands[0]))));
a1b8572c 9966 if (second_test)
4977bab6 9967 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
635559ab 9968 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c 9969 second_test,
4977bab6
ZW
9970 copy_rtx (operands[2]),
9971 copy_rtx (operands[0]))));
e075ae69
RH
9972
9973 return 1; /* DONE */
e9a25f70 9974}
e075ae69 9975
32b5b1aa 9976int
e075ae69
RH
9977ix86_expand_fp_movcc (operands)
9978 rtx operands[];
32b5b1aa 9979{
e075ae69 9980 enum rtx_code code;
e075ae69 9981 rtx tmp;
a1b8572c 9982 rtx compare_op, second_test, bypass_test;
32b5b1aa 9983
0073023d
JH
9984 /* For SF/DFmode conditional moves based on comparisons
9985 in same mode, we may want to use SSE min/max instructions. */
965f5423
JH
9986 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9987 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
0073023d 9988 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
fa9f36a1
JH
9989 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9990 && (!TARGET_IEEE_FP
9991 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
0073023d
JH
9992 /* We may be called from the post-reload splitter. */
9993 && (!REG_P (operands[0])
9994 || SSE_REG_P (operands[0])
52a661a6 9995 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
0073023d
JH
9996 {
9997 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9998 code = GET_CODE (operands[1]);
9999
10000 /* See if we have (cross) match between comparison operands and
10001 conditional move operands. */
10002 if (rtx_equal_p (operands[2], op1))
10003 {
10004 rtx tmp = op0;
10005 op0 = op1;
10006 op1 = tmp;
10007 code = reverse_condition_maybe_unordered (code);
10008 }
10009 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10010 {
10011 /* Check for min operation. */
4977bab6 10012 if (code == LT || code == UNLE)
0073023d 10013 {
4977bab6
ZW
10014 if (code == UNLE)
10015 {
10016 rtx tmp = op0;
10017 op0 = op1;
10018 op1 = tmp;
10019 }
0073023d
JH
10020 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10021 if (memory_operand (op0, VOIDmode))
10022 op0 = force_reg (GET_MODE (operands[0]), op0);
10023 if (GET_MODE (operands[0]) == SFmode)
10024 emit_insn (gen_minsf3 (operands[0], op0, op1));
10025 else
10026 emit_insn (gen_mindf3 (operands[0], op0, op1));
10027 return 1;
10028 }
10029 /* Check for max operation. */
4977bab6 10030 if (code == GT || code == UNGE)
0073023d 10031 {
4977bab6
ZW
10032 if (code == UNGE)
10033 {
10034 rtx tmp = op0;
10035 op0 = op1;
10036 op1 = tmp;
10037 }
0073023d
JH
10038 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10039 if (memory_operand (op0, VOIDmode))
10040 op0 = force_reg (GET_MODE (operands[0]), op0);
10041 if (GET_MODE (operands[0]) == SFmode)
10042 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10043 else
10044 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10045 return 1;
10046 }
10047 }
10048 /* Manage condition to be sse_comparison_operator. In case we are
10049 in non-ieee mode, try to canonicalize the destination operand
10050 to be first in the comparison - this helps reload to avoid extra
10051 moves. */
10052 if (!sse_comparison_operator (operands[1], VOIDmode)
10053 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10054 {
10055 rtx tmp = ix86_compare_op0;
10056 ix86_compare_op0 = ix86_compare_op1;
10057 ix86_compare_op1 = tmp;
10058 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10059 VOIDmode, ix86_compare_op0,
10060 ix86_compare_op1);
10061 }
d1f87653 10062 /* Similarly try to manage result to be first operand of conditional
fa9f36a1
JH
10063 move. We also don't support the NE comparison on SSE, so try to
10064 avoid it. */
037f20f1
JH
10065 if ((rtx_equal_p (operands[0], operands[3])
10066 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10067 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
0073023d
JH
10068 {
10069 rtx tmp = operands[2];
10070 operands[2] = operands[3];
92d0fb09 10071 operands[3] = tmp;
0073023d
JH
10072 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10073 (GET_CODE (operands[1])),
10074 VOIDmode, ix86_compare_op0,
10075 ix86_compare_op1);
10076 }
10077 if (GET_MODE (operands[0]) == SFmode)
10078 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10079 operands[2], operands[3],
10080 ix86_compare_op0, ix86_compare_op1));
10081 else
10082 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10083 operands[2], operands[3],
10084 ix86_compare_op0, ix86_compare_op1));
10085 return 1;
10086 }
10087
e075ae69 10088 /* The floating point conditional move instructions don't directly
0f290768 10089 support conditions resulting from a signed integer comparison. */
32b5b1aa 10090
e075ae69 10091 code = GET_CODE (operands[1]);
a1b8572c 10092 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9e7adcb3
JH
10093
10094 /* The floating point conditional move instructions don't directly
10095 support signed integer comparisons. */
10096
a1b8572c 10097 if (!fcmov_comparison_operator (compare_op, VOIDmode))
e075ae69 10098 {
a1b8572c 10099 if (second_test != NULL || bypass_test != NULL)
b531087a 10100 abort ();
e075ae69 10101 tmp = gen_reg_rtx (QImode);
3a3677ff 10102 ix86_expand_setcc (code, tmp);
e075ae69
RH
10103 code = NE;
10104 ix86_compare_op0 = tmp;
10105 ix86_compare_op1 = const0_rtx;
a1b8572c
JH
10106 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10107 }
10108 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10109 {
10110 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10111 emit_move_insn (tmp, operands[3]);
10112 operands[3] = tmp;
10113 }
10114 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10115 {
10116 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10117 emit_move_insn (tmp, operands[2]);
10118 operands[2] = tmp;
e075ae69 10119 }
e9a25f70 10120
e075ae69
RH
10121 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10122 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9e7adcb3 10123 compare_op,
e075ae69
RH
10124 operands[2],
10125 operands[3])));
a1b8572c
JH
10126 if (bypass_test)
10127 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10128 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10129 bypass_test,
10130 operands[3],
10131 operands[0])));
10132 if (second_test)
10133 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10134 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10135 second_test,
10136 operands[2],
10137 operands[0])));
32b5b1aa 10138
e075ae69 10139 return 1;
32b5b1aa
SC
10140}
10141
7b52eede
JH
10142/* Expand conditional increment or decrement using adb/sbb instructions.
10143 The default case using setcc followed by the conditional move can be
10144 done by generic code. */
10145int
10146ix86_expand_int_addcc (operands)
10147 rtx operands[];
10148{
10149 enum rtx_code code = GET_CODE (operands[1]);
10150 rtx compare_op;
10151 rtx val = const0_rtx;
e6e81735
JH
10152 bool fpcmp = false;
10153 rtx pat, clob;
10154 enum machine_mode mode = GET_MODE (operands[0]);
7b52eede
JH
10155
10156 if (operands[3] != const1_rtx
10157 && operands[3] != constm1_rtx)
10158 return 0;
10159 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10160 ix86_compare_op1, &compare_op))
10161 return 0;
e6e81735
JH
10162 code = GET_CODE (compare_op);
10163
10164 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10165 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10166 {
10167 fpcmp = true;
10168 code = ix86_fp_compare_code_to_integer (code);
10169 }
10170
10171 if (code != LTU)
10172 {
10173 val = constm1_rtx;
10174 if (fpcmp)
10175 PUT_CODE (compare_op,
10176 reverse_condition_maybe_unordered
10177 (GET_CODE (compare_op)));
10178 else
10179 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10180 }
10181 PUT_MODE (compare_op, mode);
10182
10183 /* Construct either adc or sbb insn. */
10184 if ((code == LTU) == (operands[3] == constm1_rtx))
7b52eede
JH
10185 {
10186 switch (GET_MODE (operands[0]))
10187 {
10188 case QImode:
e6e81735 10189 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10190 break;
10191 case HImode:
e6e81735 10192 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10193 break;
10194 case SImode:
e6e81735 10195 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10196 break;
10197 case DImode:
e6e81735 10198 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
7b52eede
JH
10199 break;
10200 default:
10201 abort ();
10202 }
10203 }
10204 else
10205 {
10206 switch (GET_MODE (operands[0]))
10207 {
10208 case QImode:
e6e81735 10209 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10210 break;
10211 case HImode:
e6e81735 10212 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10213 break;
10214 case SImode:
e6e81735 10215 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10216 break;
10217 case DImode:
e6e81735 10218 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
7b52eede
JH
10219 break;
10220 default:
10221 abort ();
10222 }
10223 }
10224 return 1; /* DONE */
10225}
10226
10227
2450a057
JH
10228/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10229 works for floating pointer parameters and nonoffsetable memories.
10230 For pushes, it returns just stack offsets; the values will be saved
10231 in the right order. Maximally three parts are generated. */
10232
2b589241 10233static int
2450a057
JH
10234ix86_split_to_parts (operand, parts, mode)
10235 rtx operand;
10236 rtx *parts;
10237 enum machine_mode mode;
32b5b1aa 10238{
26e5b205
JH
10239 int size;
10240
10241 if (!TARGET_64BIT)
10242 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
10243 else
10244 size = (GET_MODE_SIZE (mode) + 4) / 8;
2450a057 10245
a7180f70
BS
10246 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10247 abort ();
2450a057
JH
10248 if (size < 2 || size > 3)
10249 abort ();
10250
f996902d
RH
10251 /* Optimize constant pool reference to immediates. This is used by fp
10252 moves, that force all constants to memory to allow combining. */
10253 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10254 {
10255 rtx tmp = maybe_get_pool_constant (operand);
10256 if (tmp)
10257 operand = tmp;
10258 }
d7a29404 10259
2450a057 10260 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
e075ae69 10261 {
2450a057
JH
10262 /* The only non-offsetable memories we handle are pushes. */
10263 if (! push_operand (operand, VOIDmode))
10264 abort ();
10265
26e5b205
JH
10266 operand = copy_rtx (operand);
10267 PUT_MODE (operand, Pmode);
2450a057
JH
10268 parts[0] = parts[1] = parts[2] = operand;
10269 }
26e5b205 10270 else if (!TARGET_64BIT)
2450a057
JH
10271 {
10272 if (mode == DImode)
10273 split_di (&operand, 1, &parts[0], &parts[1]);
10274 else
e075ae69 10275 {
2450a057
JH
10276 if (REG_P (operand))
10277 {
10278 if (!reload_completed)
10279 abort ();
10280 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10281 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10282 if (size == 3)
10283 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10284 }
10285 else if (offsettable_memref_p (operand))
10286 {
f4ef873c 10287 operand = adjust_address (operand, SImode, 0);
2450a057 10288 parts[0] = operand;
b72f00af 10289 parts[1] = adjust_address (operand, SImode, 4);
2450a057 10290 if (size == 3)
b72f00af 10291 parts[2] = adjust_address (operand, SImode, 8);
2450a057
JH
10292 }
10293 else if (GET_CODE (operand) == CONST_DOUBLE)
10294 {
10295 REAL_VALUE_TYPE r;
2b589241 10296 long l[4];
2450a057
JH
10297
10298 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10299 switch (mode)
10300 {
10301 case XFmode:
2b589241 10302 case TFmode:
2450a057 10303 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
d8bf17f9 10304 parts[2] = gen_int_mode (l[2], SImode);
2450a057
JH
10305 break;
10306 case DFmode:
10307 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10308 break;
10309 default:
10310 abort ();
10311 }
d8bf17f9
LB
10312 parts[1] = gen_int_mode (l[1], SImode);
10313 parts[0] = gen_int_mode (l[0], SImode);
2450a057
JH
10314 }
10315 else
10316 abort ();
e075ae69 10317 }
2450a057 10318 }
26e5b205
JH
10319 else
10320 {
44cf5b6a
JH
10321 if (mode == TImode)
10322 split_ti (&operand, 1, &parts[0], &parts[1]);
26e5b205
JH
10323 if (mode == XFmode || mode == TFmode)
10324 {
10325 if (REG_P (operand))
10326 {
10327 if (!reload_completed)
10328 abort ();
10329 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10330 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10331 }
10332 else if (offsettable_memref_p (operand))
10333 {
b72f00af 10334 operand = adjust_address (operand, DImode, 0);
26e5b205 10335 parts[0] = operand;
b72f00af 10336 parts[1] = adjust_address (operand, SImode, 8);
26e5b205
JH
10337 }
10338 else if (GET_CODE (operand) == CONST_DOUBLE)
10339 {
10340 REAL_VALUE_TYPE r;
10341 long l[3];
10342
10343 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10344 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10345 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10346 if (HOST_BITS_PER_WIDE_INT >= 64)
523fbd9d 10347 parts[0]
d8bf17f9 10348 = gen_int_mode
44cf5b6a 10349 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
b531087a 10350 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
d8bf17f9 10351 DImode);
26e5b205
JH
10352 else
10353 parts[0] = immed_double_const (l[0], l[1], DImode);
d8bf17f9 10354 parts[1] = gen_int_mode (l[2], SImode);
26e5b205
JH
10355 }
10356 else
10357 abort ();
10358 }
10359 }
2450a057 10360
2b589241 10361 return size;
2450a057
JH
10362}
10363
10364/* Emit insns to perform a move or push of DI, DF, and XF values.
10365 Return false when normal moves are needed; true when all required
10366 insns have been emitted. Operands 2-4 contain the input values
10367 int the correct order; operands 5-7 contain the output values. */
10368
26e5b205
JH
10369void
10370ix86_split_long_move (operands)
10371 rtx operands[];
2450a057
JH
10372{
10373 rtx part[2][3];
26e5b205 10374 int nparts;
2450a057
JH
10375 int push = 0;
10376 int collisions = 0;
26e5b205
JH
10377 enum machine_mode mode = GET_MODE (operands[0]);
10378
10379 /* The DFmode expanders may ask us to move double.
10380 For 64bit target this is single move. By hiding the fact
10381 here we simplify i386.md splitters. */
10382 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10383 {
8cdfa312
RH
10384 /* Optimize constant pool reference to immediates. This is used by
10385 fp moves, that force all constants to memory to allow combining. */
26e5b205
JH
10386
10387 if (GET_CODE (operands[1]) == MEM
10388 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10389 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10390 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10391 if (push_operand (operands[0], VOIDmode))
b47b4f21
JH
10392 {
10393 operands[0] = copy_rtx (operands[0]);
10394 PUT_MODE (operands[0], Pmode);
10395 }
26e5b205
JH
10396 else
10397 operands[0] = gen_lowpart (DImode, operands[0]);
10398 operands[1] = gen_lowpart (DImode, operands[1]);
10399 emit_move_insn (operands[0], operands[1]);
10400 return;
10401 }
2450a057 10402
2450a057
JH
10403 /* The only non-offsettable memory we handle is push. */
10404 if (push_operand (operands[0], VOIDmode))
10405 push = 1;
10406 else if (GET_CODE (operands[0]) == MEM
10407 && ! offsettable_memref_p (operands[0]))
10408 abort ();
10409
26e5b205
JH
10410 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10411 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
2450a057
JH
10412
10413 /* When emitting push, take care for source operands on the stack. */
10414 if (push && GET_CODE (operands[1]) == MEM
10415 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10416 {
26e5b205 10417 if (nparts == 3)
886cbb88
JH
10418 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10419 XEXP (part[1][2], 0));
10420 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10421 XEXP (part[1][1], 0));
2450a057
JH
10422 }
10423
0f290768 10424 /* We need to do copy in the right order in case an address register
2450a057
JH
10425 of the source overlaps the destination. */
10426 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10427 {
10428 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10429 collisions++;
10430 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10431 collisions++;
26e5b205 10432 if (nparts == 3
2450a057
JH
10433 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10434 collisions++;
10435
10436 /* Collision in the middle part can be handled by reordering. */
26e5b205 10437 if (collisions == 1 && nparts == 3
2450a057 10438 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
e075ae69 10439 {
2450a057
JH
10440 rtx tmp;
10441 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10442 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10443 }
e075ae69 10444
2450a057
JH
10445 /* If there are more collisions, we can't handle it by reordering.
10446 Do an lea to the last part and use only one colliding move. */
10447 else if (collisions > 1)
10448 {
10449 collisions = 1;
26e5b205 10450 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
2450a057 10451 XEXP (part[1][0], 0)));
26e5b205
JH
10452 part[1][0] = change_address (part[1][0],
10453 TARGET_64BIT ? DImode : SImode,
10454 part[0][nparts - 1]);
b72f00af 10455 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
26e5b205 10456 if (nparts == 3)
b72f00af 10457 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
2450a057
JH
10458 }
10459 }
10460
10461 if (push)
10462 {
26e5b205 10463 if (!TARGET_64BIT)
2b589241 10464 {
26e5b205
JH
10465 if (nparts == 3)
10466 {
10467 /* We use only first 12 bytes of TFmode value, but for pushing we
10468 are required to adjust stack as if we were pushing real 16byte
10469 value. */
10470 if (mode == TFmode && !TARGET_64BIT)
10471 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10472 GEN_INT (-4)));
10473 emit_move_insn (part[0][2], part[1][2]);
10474 }
2b589241 10475 }
26e5b205
JH
10476 else
10477 {
10478 /* In 64bit mode we don't have 32bit push available. In case this is
10479 register, it is OK - we will just use larger counterpart. We also
10480 retype memory - these comes from attempt to avoid REX prefix on
10481 moving of second half of TFmode value. */
10482 if (GET_MODE (part[1][1]) == SImode)
10483 {
10484 if (GET_CODE (part[1][1]) == MEM)
f4ef873c 10485 part[1][1] = adjust_address (part[1][1], DImode, 0);
26e5b205
JH
10486 else if (REG_P (part[1][1]))
10487 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10488 else
b531087a 10489 abort ();
886cbb88
JH
10490 if (GET_MODE (part[1][0]) == SImode)
10491 part[1][0] = part[1][1];
26e5b205
JH
10492 }
10493 }
10494 emit_move_insn (part[0][1], part[1][1]);
10495 emit_move_insn (part[0][0], part[1][0]);
10496 return;
2450a057
JH
10497 }
10498
10499 /* Choose correct order to not overwrite the source before it is copied. */
10500 if ((REG_P (part[0][0])
10501 && REG_P (part[1][1])
10502 && (REGNO (part[0][0]) == REGNO (part[1][1])
26e5b205 10503 || (nparts == 3
2450a057
JH
10504 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10505 || (collisions > 0
10506 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10507 {
26e5b205 10508 if (nparts == 3)
2450a057 10509 {
26e5b205
JH
10510 operands[2] = part[0][2];
10511 operands[3] = part[0][1];
10512 operands[4] = part[0][0];
10513 operands[5] = part[1][2];
10514 operands[6] = part[1][1];
10515 operands[7] = part[1][0];
2450a057
JH
10516 }
10517 else
10518 {
26e5b205
JH
10519 operands[2] = part[0][1];
10520 operands[3] = part[0][0];
10521 operands[5] = part[1][1];
10522 operands[6] = part[1][0];
2450a057
JH
10523 }
10524 }
10525 else
10526 {
26e5b205 10527 if (nparts == 3)
2450a057 10528 {
26e5b205
JH
10529 operands[2] = part[0][0];
10530 operands[3] = part[0][1];
10531 operands[4] = part[0][2];
10532 operands[5] = part[1][0];
10533 operands[6] = part[1][1];
10534 operands[7] = part[1][2];
2450a057
JH
10535 }
10536 else
10537 {
26e5b205
JH
10538 operands[2] = part[0][0];
10539 operands[3] = part[0][1];
10540 operands[5] = part[1][0];
10541 operands[6] = part[1][1];
e075ae69
RH
10542 }
10543 }
26e5b205
JH
10544 emit_move_insn (operands[2], operands[5]);
10545 emit_move_insn (operands[3], operands[6]);
10546 if (nparts == 3)
10547 emit_move_insn (operands[4], operands[7]);
32b5b1aa 10548
26e5b205 10549 return;
32b5b1aa 10550}
32b5b1aa 10551
e075ae69
RH
10552void
10553ix86_split_ashldi (operands, scratch)
10554 rtx *operands, scratch;
32b5b1aa 10555{
e075ae69
RH
10556 rtx low[2], high[2];
10557 int count;
b985a30f 10558
e075ae69
RH
10559 if (GET_CODE (operands[2]) == CONST_INT)
10560 {
10561 split_di (operands, 2, low, high);
10562 count = INTVAL (operands[2]) & 63;
32b5b1aa 10563
e075ae69
RH
10564 if (count >= 32)
10565 {
10566 emit_move_insn (high[0], low[1]);
10567 emit_move_insn (low[0], const0_rtx);
b985a30f 10568
e075ae69
RH
10569 if (count > 32)
10570 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10571 }
10572 else
10573 {
10574 if (!rtx_equal_p (operands[0], operands[1]))
10575 emit_move_insn (operands[0], operands[1]);
10576 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10577 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10578 }
10579 }
10580 else
10581 {
10582 if (!rtx_equal_p (operands[0], operands[1]))
10583 emit_move_insn (operands[0], operands[1]);
b985a30f 10584
e075ae69 10585 split_di (operands, 1, low, high);
b985a30f 10586
e075ae69
RH
10587 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10588 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
32b5b1aa 10589
fe577e58 10590 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 10591 {
fe577e58 10592 if (! no_new_pseudos)
e075ae69
RH
10593 scratch = force_reg (SImode, const0_rtx);
10594 else
10595 emit_move_insn (scratch, const0_rtx);
10596
10597 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10598 scratch));
10599 }
10600 else
10601 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10602 }
e9a25f70 10603}
32b5b1aa 10604
e075ae69
RH
10605void
10606ix86_split_ashrdi (operands, scratch)
10607 rtx *operands, scratch;
32b5b1aa 10608{
e075ae69
RH
10609 rtx low[2], high[2];
10610 int count;
32b5b1aa 10611
e075ae69
RH
10612 if (GET_CODE (operands[2]) == CONST_INT)
10613 {
10614 split_di (operands, 2, low, high);
10615 count = INTVAL (operands[2]) & 63;
32b5b1aa 10616
e075ae69
RH
10617 if (count >= 32)
10618 {
10619 emit_move_insn (low[0], high[1]);
32b5b1aa 10620
e075ae69
RH
10621 if (! reload_completed)
10622 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10623 else
10624 {
10625 emit_move_insn (high[0], low[0]);
10626 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10627 }
10628
10629 if (count > 32)
10630 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10631 }
10632 else
10633 {
10634 if (!rtx_equal_p (operands[0], operands[1]))
10635 emit_move_insn (operands[0], operands[1]);
10636 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10637 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10638 }
10639 }
10640 else
32b5b1aa 10641 {
e075ae69
RH
10642 if (!rtx_equal_p (operands[0], operands[1]))
10643 emit_move_insn (operands[0], operands[1]);
10644
10645 split_di (operands, 1, low, high);
10646
10647 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10648 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10649
fe577e58 10650 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 10651 {
fe577e58 10652 if (! no_new_pseudos)
e075ae69
RH
10653 scratch = gen_reg_rtx (SImode);
10654 emit_move_insn (scratch, high[0]);
10655 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10656 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10657 scratch));
10658 }
10659 else
10660 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
32b5b1aa 10661 }
e075ae69 10662}
32b5b1aa 10663
e075ae69
RH
10664void
10665ix86_split_lshrdi (operands, scratch)
10666 rtx *operands, scratch;
10667{
10668 rtx low[2], high[2];
10669 int count;
32b5b1aa 10670
e075ae69 10671 if (GET_CODE (operands[2]) == CONST_INT)
32b5b1aa 10672 {
e075ae69
RH
10673 split_di (operands, 2, low, high);
10674 count = INTVAL (operands[2]) & 63;
10675
10676 if (count >= 32)
c7271385 10677 {
e075ae69
RH
10678 emit_move_insn (low[0], high[1]);
10679 emit_move_insn (high[0], const0_rtx);
32b5b1aa 10680
e075ae69
RH
10681 if (count > 32)
10682 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10683 }
10684 else
10685 {
10686 if (!rtx_equal_p (operands[0], operands[1]))
10687 emit_move_insn (operands[0], operands[1]);
10688 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10689 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10690 }
32b5b1aa 10691 }
e075ae69
RH
10692 else
10693 {
10694 if (!rtx_equal_p (operands[0], operands[1]))
10695 emit_move_insn (operands[0], operands[1]);
32b5b1aa 10696
e075ae69
RH
10697 split_di (operands, 1, low, high);
10698
10699 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10700 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10701
10702 /* Heh. By reversing the arguments, we can reuse this pattern. */
fe577e58 10703 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 10704 {
fe577e58 10705 if (! no_new_pseudos)
e075ae69
RH
10706 scratch = force_reg (SImode, const0_rtx);
10707 else
10708 emit_move_insn (scratch, const0_rtx);
10709
10710 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10711 scratch));
10712 }
10713 else
10714 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10715 }
32b5b1aa 10716}
3f803cd9 10717
0407c02b 10718/* Helper function for the string operations below. Dest VARIABLE whether
0945b39d
JH
10719 it is aligned to VALUE bytes. If true, jump to the label. */
10720static rtx
10721ix86_expand_aligntest (variable, value)
10722 rtx variable;
10723 int value;
10724{
10725 rtx label = gen_label_rtx ();
10726 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10727 if (GET_MODE (variable) == DImode)
10728 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10729 else
10730 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10731 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
d43e0b7d 10732 1, label);
0945b39d
JH
10733 return label;
10734}
10735
10736/* Adjust COUNTER by the VALUE. */
10737static void
10738ix86_adjust_counter (countreg, value)
10739 rtx countreg;
10740 HOST_WIDE_INT value;
10741{
10742 if (GET_MODE (countreg) == DImode)
10743 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10744 else
10745 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10746}
10747
10748/* Zero extend possibly SImode EXP to Pmode register. */
d24b3457 10749rtx
0945b39d
JH
10750ix86_zero_extend_to_Pmode (exp)
10751 rtx exp;
10752{
10753 rtx r;
10754 if (GET_MODE (exp) == VOIDmode)
10755 return force_reg (Pmode, exp);
10756 if (GET_MODE (exp) == Pmode)
10757 return copy_to_mode_reg (Pmode, exp);
10758 r = gen_reg_rtx (Pmode);
10759 emit_insn (gen_zero_extendsidi2 (r, exp));
10760 return r;
10761}
10762
10763/* Expand string move (memcpy) operation. Use i386 string operations when
10764 profitable. expand_clrstr contains similar code. */
10765int
10766ix86_expand_movstr (dst, src, count_exp, align_exp)
10767 rtx dst, src, count_exp, align_exp;
10768{
10769 rtx srcreg, destreg, countreg;
10770 enum machine_mode counter_mode;
10771 HOST_WIDE_INT align = 0;
10772 unsigned HOST_WIDE_INT count = 0;
10773 rtx insns;
10774
0945b39d
JH
10775
10776 if (GET_CODE (align_exp) == CONST_INT)
10777 align = INTVAL (align_exp);
10778
5519a4f9 10779 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
10780 if (!TARGET_ALIGN_STRINGOPS)
10781 align = 64;
10782
10783 if (GET_CODE (count_exp) == CONST_INT)
26771da7
JH
10784 {
10785 count = INTVAL (count_exp);
10786 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10787 return 0;
10788 }
0945b39d
JH
10789
10790 /* Figure out proper mode for counter. For 32bits it is always SImode,
10791 for 64bits use SImode when possible, otherwise DImode.
10792 Set count to number of bytes copied when known at compile time. */
10793 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10794 || x86_64_zero_extended_value (count_exp))
10795 counter_mode = SImode;
10796 else
10797 counter_mode = DImode;
10798
26771da7
JH
10799 start_sequence ();
10800
0945b39d
JH
10801 if (counter_mode != SImode && counter_mode != DImode)
10802 abort ();
10803
10804 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10805 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10806
10807 emit_insn (gen_cld ());
10808
10809 /* When optimizing for size emit simple rep ; movsb instruction for
10810 counts not divisible by 4. */
10811
10812 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10813 {
10814 countreg = ix86_zero_extend_to_Pmode (count_exp);
10815 if (TARGET_64BIT)
10816 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
10817 destreg, srcreg, countreg));
10818 else
10819 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
10820 destreg, srcreg, countreg));
10821 }
10822
10823 /* For constant aligned (or small unaligned) copies use rep movsl
10824 followed by code copying the rest. For PentiumPro ensure 8 byte
10825 alignment to allow rep movsl acceleration. */
10826
10827 else if (count != 0
10828 && (align >= 8
10829 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 10830 || optimize_size || count < (unsigned int) 64))
0945b39d
JH
10831 {
10832 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10833 if (count & ~(size - 1))
10834 {
10835 countreg = copy_to_mode_reg (counter_mode,
10836 GEN_INT ((count >> (size == 4 ? 2 : 3))
10837 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10838 countreg = ix86_zero_extend_to_Pmode (countreg);
10839 if (size == 4)
10840 {
10841 if (TARGET_64BIT)
10842 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
10843 destreg, srcreg, countreg));
10844 else
10845 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
10846 destreg, srcreg, countreg));
10847 }
10848 else
10849 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
10850 destreg, srcreg, countreg));
10851 }
10852 if (size == 8 && (count & 0x04))
10853 emit_insn (gen_strmovsi (destreg, srcreg));
10854 if (count & 0x02)
10855 emit_insn (gen_strmovhi (destreg, srcreg));
10856 if (count & 0x01)
10857 emit_insn (gen_strmovqi (destreg, srcreg));
10858 }
10859 /* The generic code based on the glibc implementation:
10860 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10861 allowing accelerated copying there)
10862 - copy the data using rep movsl
10863 - copy the rest. */
10864 else
10865 {
10866 rtx countreg2;
10867 rtx label = NULL;
37ad04a5
JH
10868 int desired_alignment = (TARGET_PENTIUMPRO
10869 && (count == 0 || count >= (unsigned int) 260)
10870 ? 8 : UNITS_PER_WORD);
0945b39d
JH
10871
10872 /* In case we don't know anything about the alignment, default to
10873 library version, since it is usually equally fast and result in
4977bab6
ZW
10874 shorter code.
10875
10876 Also emit call when we know that the count is large and call overhead
10877 will not be important. */
10878 if (!TARGET_INLINE_ALL_STRINGOPS
10879 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
0945b39d
JH
10880 {
10881 end_sequence ();
10882 return 0;
10883 }
10884
10885 if (TARGET_SINGLE_STRINGOP)
10886 emit_insn (gen_cld ());
10887
10888 countreg2 = gen_reg_rtx (Pmode);
10889 countreg = copy_to_mode_reg (counter_mode, count_exp);
10890
10891 /* We don't use loops to align destination and to copy parts smaller
10892 than 4 bytes, because gcc is able to optimize such code better (in
10893 the case the destination or the count really is aligned, gcc is often
10894 able to predict the branches) and also it is friendlier to the
a4f31c00 10895 hardware branch prediction.
0945b39d 10896
d1f87653 10897 Using loops is beneficial for generic case, because we can
0945b39d
JH
10898 handle small counts using the loops. Many CPUs (such as Athlon)
10899 have large REP prefix setup costs.
10900
4aae8a9a 10901 This is quite costly. Maybe we can revisit this decision later or
0945b39d
JH
10902 add some customizability to this code. */
10903
37ad04a5 10904 if (count == 0 && align < desired_alignment)
0945b39d
JH
10905 {
10906 label = gen_label_rtx ();
aaae0bb9 10907 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
d43e0b7d 10908 LEU, 0, counter_mode, 1, label);
0945b39d
JH
10909 }
10910 if (align <= 1)
10911 {
10912 rtx label = ix86_expand_aligntest (destreg, 1);
10913 emit_insn (gen_strmovqi (destreg, srcreg));
10914 ix86_adjust_counter (countreg, 1);
10915 emit_label (label);
10916 LABEL_NUSES (label) = 1;
10917 }
10918 if (align <= 2)
10919 {
10920 rtx label = ix86_expand_aligntest (destreg, 2);
10921 emit_insn (gen_strmovhi (destreg, srcreg));
10922 ix86_adjust_counter (countreg, 2);
10923 emit_label (label);
10924 LABEL_NUSES (label) = 1;
10925 }
37ad04a5 10926 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
10927 {
10928 rtx label = ix86_expand_aligntest (destreg, 4);
10929 emit_insn (gen_strmovsi (destreg, srcreg));
10930 ix86_adjust_counter (countreg, 4);
10931 emit_label (label);
10932 LABEL_NUSES (label) = 1;
10933 }
10934
37ad04a5
JH
10935 if (label && desired_alignment > 4 && !TARGET_64BIT)
10936 {
10937 emit_label (label);
10938 LABEL_NUSES (label) = 1;
10939 label = NULL_RTX;
10940 }
0945b39d
JH
10941 if (!TARGET_SINGLE_STRINGOP)
10942 emit_insn (gen_cld ());
10943 if (TARGET_64BIT)
10944 {
10945 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10946 GEN_INT (3)));
10947 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10948 destreg, srcreg, countreg2));
10949 }
10950 else
10951 {
10952 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10953 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10954 destreg, srcreg, countreg2));
10955 }
10956
10957 if (label)
10958 {
10959 emit_label (label);
10960 LABEL_NUSES (label) = 1;
10961 }
10962 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10963 emit_insn (gen_strmovsi (destreg, srcreg));
10964 if ((align <= 4 || count == 0) && TARGET_64BIT)
10965 {
10966 rtx label = ix86_expand_aligntest (countreg, 4);
10967 emit_insn (gen_strmovsi (destreg, srcreg));
10968 emit_label (label);
10969 LABEL_NUSES (label) = 1;
10970 }
10971 if (align > 2 && count != 0 && (count & 2))
10972 emit_insn (gen_strmovhi (destreg, srcreg));
10973 if (align <= 2 || count == 0)
10974 {
10975 rtx label = ix86_expand_aligntest (countreg, 2);
10976 emit_insn (gen_strmovhi (destreg, srcreg));
10977 emit_label (label);
10978 LABEL_NUSES (label) = 1;
10979 }
10980 if (align > 1 && count != 0 && (count & 1))
10981 emit_insn (gen_strmovqi (destreg, srcreg));
10982 if (align <= 1 || count == 0)
10983 {
10984 rtx label = ix86_expand_aligntest (countreg, 1);
10985 emit_insn (gen_strmovqi (destreg, srcreg));
10986 emit_label (label);
10987 LABEL_NUSES (label) = 1;
10988 }
10989 }
10990
10991 insns = get_insns ();
10992 end_sequence ();
10993
10994 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
2f937369 10995 emit_insn (insns);
0945b39d
JH
10996 return 1;
10997}
10998
10999/* Expand string clear operation (bzero). Use i386 string operations when
11000 profitable. expand_movstr contains similar code. */
11001int
11002ix86_expand_clrstr (src, count_exp, align_exp)
11003 rtx src, count_exp, align_exp;
11004{
11005 rtx destreg, zeroreg, countreg;
11006 enum machine_mode counter_mode;
11007 HOST_WIDE_INT align = 0;
11008 unsigned HOST_WIDE_INT count = 0;
11009
11010 if (GET_CODE (align_exp) == CONST_INT)
11011 align = INTVAL (align_exp);
11012
5519a4f9 11013 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
11014 if (!TARGET_ALIGN_STRINGOPS)
11015 align = 32;
11016
11017 if (GET_CODE (count_exp) == CONST_INT)
26771da7
JH
11018 {
11019 count = INTVAL (count_exp);
11020 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11021 return 0;
11022 }
0945b39d
JH
11023 /* Figure out proper mode for counter. For 32bits it is always SImode,
11024 for 64bits use SImode when possible, otherwise DImode.
11025 Set count to number of bytes copied when known at compile time. */
11026 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11027 || x86_64_zero_extended_value (count_exp))
11028 counter_mode = SImode;
11029 else
11030 counter_mode = DImode;
11031
11032 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11033
11034 emit_insn (gen_cld ());
11035
11036 /* When optimizing for size emit simple rep ; movsb instruction for
11037 counts not divisible by 4. */
11038
11039 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11040 {
11041 countreg = ix86_zero_extend_to_Pmode (count_exp);
11042 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11043 if (TARGET_64BIT)
11044 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
11045 destreg, countreg));
11046 else
11047 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
11048 destreg, countreg));
11049 }
11050 else if (count != 0
11051 && (align >= 8
11052 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 11053 || optimize_size || count < (unsigned int) 64))
0945b39d
JH
11054 {
11055 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11056 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11057 if (count & ~(size - 1))
11058 {
11059 countreg = copy_to_mode_reg (counter_mode,
11060 GEN_INT ((count >> (size == 4 ? 2 : 3))
11061 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11062 countreg = ix86_zero_extend_to_Pmode (countreg);
11063 if (size == 4)
11064 {
11065 if (TARGET_64BIT)
11066 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
11067 destreg, countreg));
11068 else
11069 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
11070 destreg, countreg));
11071 }
11072 else
11073 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
11074 destreg, countreg));
11075 }
11076 if (size == 8 && (count & 0x04))
11077 emit_insn (gen_strsetsi (destreg,
11078 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11079 if (count & 0x02)
11080 emit_insn (gen_strsethi (destreg,
11081 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11082 if (count & 0x01)
11083 emit_insn (gen_strsetqi (destreg,
11084 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11085 }
11086 else
11087 {
11088 rtx countreg2;
11089 rtx label = NULL;
37ad04a5
JH
11090 /* Compute desired alignment of the string operation. */
11091 int desired_alignment = (TARGET_PENTIUMPRO
11092 && (count == 0 || count >= (unsigned int) 260)
11093 ? 8 : UNITS_PER_WORD);
0945b39d
JH
11094
11095 /* In case we don't know anything about the alignment, default to
11096 library version, since it is usually equally fast and result in
4977bab6
ZW
11097 shorter code.
11098
11099 Also emit call when we know that the count is large and call overhead
11100 will not be important. */
11101 if (!TARGET_INLINE_ALL_STRINGOPS
11102 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
0945b39d
JH
11103 return 0;
11104
11105 if (TARGET_SINGLE_STRINGOP)
11106 emit_insn (gen_cld ());
11107
11108 countreg2 = gen_reg_rtx (Pmode);
11109 countreg = copy_to_mode_reg (counter_mode, count_exp);
11110 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11111
37ad04a5 11112 if (count == 0 && align < desired_alignment)
0945b39d
JH
11113 {
11114 label = gen_label_rtx ();
37ad04a5 11115 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
d43e0b7d 11116 LEU, 0, counter_mode, 1, label);
0945b39d
JH
11117 }
11118 if (align <= 1)
11119 {
11120 rtx label = ix86_expand_aligntest (destreg, 1);
11121 emit_insn (gen_strsetqi (destreg,
11122 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11123 ix86_adjust_counter (countreg, 1);
11124 emit_label (label);
11125 LABEL_NUSES (label) = 1;
11126 }
11127 if (align <= 2)
11128 {
11129 rtx label = ix86_expand_aligntest (destreg, 2);
11130 emit_insn (gen_strsethi (destreg,
11131 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11132 ix86_adjust_counter (countreg, 2);
11133 emit_label (label);
11134 LABEL_NUSES (label) = 1;
11135 }
37ad04a5 11136 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
11137 {
11138 rtx label = ix86_expand_aligntest (destreg, 4);
11139 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
11140 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11141 : zeroreg)));
11142 ix86_adjust_counter (countreg, 4);
11143 emit_label (label);
11144 LABEL_NUSES (label) = 1;
11145 }
11146
37ad04a5
JH
11147 if (label && desired_alignment > 4 && !TARGET_64BIT)
11148 {
11149 emit_label (label);
11150 LABEL_NUSES (label) = 1;
11151 label = NULL_RTX;
11152 }
11153
0945b39d
JH
11154 if (!TARGET_SINGLE_STRINGOP)
11155 emit_insn (gen_cld ());
11156 if (TARGET_64BIT)
11157 {
11158 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11159 GEN_INT (3)));
11160 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
11161 destreg, countreg2));
11162 }
11163 else
11164 {
11165 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
11166 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
11167 destreg, countreg2));
11168 }
0945b39d
JH
11169 if (label)
11170 {
11171 emit_label (label);
11172 LABEL_NUSES (label) = 1;
11173 }
37ad04a5 11174
0945b39d
JH
11175 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11176 emit_insn (gen_strsetsi (destreg,
11177 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11178 if (TARGET_64BIT && (align <= 4 || count == 0))
11179 {
79258dce 11180 rtx label = ix86_expand_aligntest (countreg, 4);
0945b39d
JH
11181 emit_insn (gen_strsetsi (destreg,
11182 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11183 emit_label (label);
11184 LABEL_NUSES (label) = 1;
11185 }
11186 if (align > 2 && count != 0 && (count & 2))
11187 emit_insn (gen_strsethi (destreg,
11188 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11189 if (align <= 2 || count == 0)
11190 {
74411039 11191 rtx label = ix86_expand_aligntest (countreg, 2);
0945b39d
JH
11192 emit_insn (gen_strsethi (destreg,
11193 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11194 emit_label (label);
11195 LABEL_NUSES (label) = 1;
11196 }
11197 if (align > 1 && count != 0 && (count & 1))
11198 emit_insn (gen_strsetqi (destreg,
11199 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11200 if (align <= 1 || count == 0)
11201 {
74411039 11202 rtx label = ix86_expand_aligntest (countreg, 1);
0945b39d
JH
11203 emit_insn (gen_strsetqi (destreg,
11204 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11205 emit_label (label);
11206 LABEL_NUSES (label) = 1;
11207 }
11208 }
11209 return 1;
11210}
11211/* Expand strlen. */
11212int
11213ix86_expand_strlen (out, src, eoschar, align)
11214 rtx out, src, eoschar, align;
11215{
11216 rtx addr, scratch1, scratch2, scratch3, scratch4;
11217
11218 /* The generic case of strlen expander is long. Avoid it's
11219 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11220
11221 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11222 && !TARGET_INLINE_ALL_STRINGOPS
11223 && !optimize_size
11224 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11225 return 0;
11226
11227 addr = force_reg (Pmode, XEXP (src, 0));
11228 scratch1 = gen_reg_rtx (Pmode);
11229
11230 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11231 && !optimize_size)
11232 {
11233 /* Well it seems that some optimizer does not combine a call like
11234 foo(strlen(bar), strlen(bar));
11235 when the move and the subtraction is done here. It does calculate
11236 the length just once when these instructions are done inside of
11237 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11238 often used and I use one fewer register for the lifetime of
11239 output_strlen_unroll() this is better. */
11240
11241 emit_move_insn (out, addr);
11242
11243 ix86_expand_strlensi_unroll_1 (out, align);
11244
11245 /* strlensi_unroll_1 returns the address of the zero at the end of
11246 the string, like memchr(), so compute the length by subtracting
11247 the start address. */
11248 if (TARGET_64BIT)
11249 emit_insn (gen_subdi3 (out, out, addr));
11250 else
11251 emit_insn (gen_subsi3 (out, out, addr));
11252 }
11253 else
11254 {
11255 scratch2 = gen_reg_rtx (Pmode);
11256 scratch3 = gen_reg_rtx (Pmode);
11257 scratch4 = force_reg (Pmode, constm1_rtx);
11258
11259 emit_move_insn (scratch3, addr);
11260 eoschar = force_reg (QImode, eoschar);
11261
11262 emit_insn (gen_cld ());
11263 if (TARGET_64BIT)
11264 {
11265 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
11266 align, scratch4, scratch3));
11267 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11268 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11269 }
11270 else
11271 {
11272 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
11273 align, scratch4, scratch3));
11274 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11275 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11276 }
11277 }
11278 return 1;
11279}
11280
e075ae69
RH
11281/* Expand the appropriate insns for doing strlen if not just doing
11282 repnz; scasb
11283
11284 out = result, initialized with the start address
11285 align_rtx = alignment of the address.
11286 scratch = scratch register, initialized with the startaddress when
77ebd435 11287 not aligned, otherwise undefined
3f803cd9
SC
11288
11289 This is just the body. It needs the initialisations mentioned above and
11290 some address computing at the end. These things are done in i386.md. */
11291
0945b39d
JH
11292static void
11293ix86_expand_strlensi_unroll_1 (out, align_rtx)
11294 rtx out, align_rtx;
3f803cd9 11295{
e075ae69
RH
11296 int align;
11297 rtx tmp;
11298 rtx align_2_label = NULL_RTX;
11299 rtx align_3_label = NULL_RTX;
11300 rtx align_4_label = gen_label_rtx ();
11301 rtx end_0_label = gen_label_rtx ();
e075ae69 11302 rtx mem;
e2e52e1b 11303 rtx tmpreg = gen_reg_rtx (SImode);
0945b39d 11304 rtx scratch = gen_reg_rtx (SImode);
e6e81735 11305 rtx cmp;
e075ae69
RH
11306
11307 align = 0;
11308 if (GET_CODE (align_rtx) == CONST_INT)
11309 align = INTVAL (align_rtx);
3f803cd9 11310
e9a25f70 11311 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
3f803cd9 11312
e9a25f70 11313 /* Is there a known alignment and is it less than 4? */
e075ae69 11314 if (align < 4)
3f803cd9 11315 {
0945b39d
JH
11316 rtx scratch1 = gen_reg_rtx (Pmode);
11317 emit_move_insn (scratch1, out);
e9a25f70 11318 /* Is there a known alignment and is it not 2? */
e075ae69 11319 if (align != 2)
3f803cd9 11320 {
e075ae69
RH
11321 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11322 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11323
11324 /* Leave just the 3 lower bits. */
0945b39d 11325 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
e075ae69
RH
11326 NULL_RTX, 0, OPTAB_WIDEN);
11327
9076b9c1 11328 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 11329 Pmode, 1, align_4_label);
9076b9c1 11330 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
d43e0b7d 11331 Pmode, 1, align_2_label);
9076b9c1 11332 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
d43e0b7d 11333 Pmode, 1, align_3_label);
3f803cd9
SC
11334 }
11335 else
11336 {
e9a25f70
JL
11337 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11338 check if is aligned to 4 - byte. */
e9a25f70 11339
0945b39d 11340 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
e075ae69
RH
11341 NULL_RTX, 0, OPTAB_WIDEN);
11342
9076b9c1 11343 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 11344 Pmode, 1, align_4_label);
3f803cd9
SC
11345 }
11346
e075ae69 11347 mem = gen_rtx_MEM (QImode, out);
e9a25f70 11348
e075ae69 11349 /* Now compare the bytes. */
e9a25f70 11350
0f290768 11351 /* Compare the first n unaligned byte on a byte per byte basis. */
9076b9c1 11352 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
d43e0b7d 11353 QImode, 1, end_0_label);
3f803cd9 11354
0f290768 11355 /* Increment the address. */
0945b39d
JH
11356 if (TARGET_64BIT)
11357 emit_insn (gen_adddi3 (out, out, const1_rtx));
11358 else
11359 emit_insn (gen_addsi3 (out, out, const1_rtx));
e9a25f70 11360
e075ae69
RH
11361 /* Not needed with an alignment of 2 */
11362 if (align != 2)
11363 {
11364 emit_label (align_2_label);
3f803cd9 11365
d43e0b7d
RK
11366 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11367 end_0_label);
e075ae69 11368
0945b39d
JH
11369 if (TARGET_64BIT)
11370 emit_insn (gen_adddi3 (out, out, const1_rtx));
11371 else
11372 emit_insn (gen_addsi3 (out, out, const1_rtx));
e075ae69
RH
11373
11374 emit_label (align_3_label);
11375 }
11376
d43e0b7d
RK
11377 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11378 end_0_label);
e075ae69 11379
0945b39d
JH
11380 if (TARGET_64BIT)
11381 emit_insn (gen_adddi3 (out, out, const1_rtx));
11382 else
11383 emit_insn (gen_addsi3 (out, out, const1_rtx));
3f803cd9
SC
11384 }
11385
e075ae69
RH
11386 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11387 align this loop. It gives only huge programs, but does not help to
11388 speed up. */
11389 emit_label (align_4_label);
3f803cd9 11390
e075ae69
RH
11391 mem = gen_rtx_MEM (SImode, out);
11392 emit_move_insn (scratch, mem);
0945b39d
JH
11393 if (TARGET_64BIT)
11394 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11395 else
11396 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
e075ae69 11397
e2e52e1b
JH
11398 /* This formula yields a nonzero result iff one of the bytes is zero.
11399 This saves three branches inside loop and many cycles. */
11400
11401 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11402 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11403 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
7471a1f0 11404 emit_insn (gen_andsi3 (tmpreg, tmpreg,
d8bf17f9 11405 gen_int_mode (0x80808080, SImode)));
d43e0b7d
RK
11406 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11407 align_4_label);
e2e52e1b
JH
11408
11409 if (TARGET_CMOVE)
11410 {
11411 rtx reg = gen_reg_rtx (SImode);
0945b39d 11412 rtx reg2 = gen_reg_rtx (Pmode);
e2e52e1b
JH
11413 emit_move_insn (reg, tmpreg);
11414 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11415
0f290768 11416 /* If zero is not in the first two bytes, move two bytes forward. */
16189740 11417 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
11418 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11419 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11420 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11421 gen_rtx_IF_THEN_ELSE (SImode, tmp,
77ebd435
AJ
11422 reg,
11423 tmpreg)));
e2e52e1b 11424 /* Emit lea manually to avoid clobbering of flags. */
0945b39d
JH
11425 emit_insn (gen_rtx_SET (SImode, reg2,
11426 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
e2e52e1b
JH
11427
11428 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11429 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11430 emit_insn (gen_rtx_SET (VOIDmode, out,
0945b39d 11431 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
a4f31c00
AJ
11432 reg2,
11433 out)));
e2e52e1b
JH
11434
11435 }
11436 else
11437 {
11438 rtx end_2_label = gen_label_rtx ();
11439 /* Is zero in the first two bytes? */
11440
16189740 11441 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
11442 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11443 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11444 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11445 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11446 pc_rtx);
11447 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11448 JUMP_LABEL (tmp) = end_2_label;
11449
0f290768 11450 /* Not in the first two. Move two bytes forward. */
e2e52e1b 11451 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
0945b39d
JH
11452 if (TARGET_64BIT)
11453 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11454 else
11455 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
e2e52e1b
JH
11456
11457 emit_label (end_2_label);
11458
11459 }
11460
0f290768 11461 /* Avoid branch in fixing the byte. */
e2e52e1b 11462 tmpreg = gen_lowpart (QImode, tmpreg);
7e08e190 11463 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
e6e81735 11464 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
0945b39d 11465 if (TARGET_64BIT)
e6e81735 11466 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
0945b39d 11467 else
e6e81735 11468 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
e075ae69
RH
11469
11470 emit_label (end_0_label);
11471}
0e07aff3
RH
11472
11473void
4977bab6 11474ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop, sibcall)
0e07aff3 11475 rtx retval, fnaddr, callarg1, callarg2, pop;
4977bab6 11476 int sibcall;
0e07aff3
RH
11477{
11478 rtx use = NULL, call;
11479
11480 if (pop == const0_rtx)
11481 pop = NULL;
11482 if (TARGET_64BIT && pop)
11483 abort ();
11484
b069de3b
SS
11485#if TARGET_MACHO
11486 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11487 fnaddr = machopic_indirect_call_target (fnaddr);
11488#else
0e07aff3
RH
11489 /* Static functions and indirect calls don't need the pic register. */
11490 if (! TARGET_64BIT && flag_pic
11491 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11492 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
66edd3b4 11493 use_reg (&use, pic_offset_table_rtx);
0e07aff3
RH
11494
11495 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11496 {
11497 rtx al = gen_rtx_REG (QImode, 0);
11498 emit_move_insn (al, callarg2);
11499 use_reg (&use, al);
11500 }
b069de3b 11501#endif /* TARGET_MACHO */
0e07aff3
RH
11502
11503 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11504 {
11505 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11506 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11507 }
4977bab6
ZW
11508 if (sibcall && TARGET_64BIT
11509 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11510 {
11511 rtx addr;
11512 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11513 fnaddr = gen_rtx_REG (Pmode, 40);
11514 emit_move_insn (fnaddr, addr);
11515 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11516 }
0e07aff3
RH
11517
11518 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11519 if (retval)
11520 call = gen_rtx_SET (VOIDmode, retval, call);
11521 if (pop)
11522 {
11523 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11524 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11525 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11526 }
11527
11528 call = emit_call_insn (call);
11529 if (use)
11530 CALL_INSN_FUNCTION_USAGE (call) = use;
11531}
fce5a9f2 11532
e075ae69 11533\f
e075ae69
RH
11534/* Clear stack slot assignments remembered from previous functions.
11535 This is called from INIT_EXPANDERS once before RTL is emitted for each
11536 function. */
11537
e2500fed
GK
11538static struct machine_function *
11539ix86_init_machine_status ()
37b15744 11540{
e2500fed 11541 return ggc_alloc_cleared (sizeof (struct machine_function));
1526a060
BS
11542}
11543
e075ae69
RH
11544/* Return a MEM corresponding to a stack slot with mode MODE.
11545 Allocate a new slot if necessary.
11546
11547 The RTL for a function can have several slots available: N is
11548 which slot to use. */
11549
11550rtx
11551assign_386_stack_local (mode, n)
11552 enum machine_mode mode;
11553 int n;
11554{
11555 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11556 abort ();
11557
11558 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
11559 ix86_stack_locals[(int) mode][n]
11560 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11561
11562 return ix86_stack_locals[(int) mode][n];
11563}
f996902d
RH
11564
11565/* Construct the SYMBOL_REF for the tls_get_addr function. */
11566
e2500fed 11567static GTY(()) rtx ix86_tls_symbol;
f996902d
RH
11568rtx
11569ix86_tls_get_addr ()
11570{
f996902d 11571
e2500fed 11572 if (!ix86_tls_symbol)
f996902d 11573 {
75d38379
JJ
11574 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11575 (TARGET_GNU_TLS && !TARGET_64BIT)
11576 ? "___tls_get_addr"
11577 : "__tls_get_addr");
f996902d
RH
11578 }
11579
e2500fed 11580 return ix86_tls_symbol;
f996902d 11581}
e075ae69
RH
11582\f
11583/* Calculate the length of the memory address in the instruction
11584 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11585
11586static int
11587memory_address_length (addr)
11588 rtx addr;
11589{
11590 struct ix86_address parts;
11591 rtx base, index, disp;
11592 int len;
11593
11594 if (GET_CODE (addr) == PRE_DEC
0d7d98ee
JH
11595 || GET_CODE (addr) == POST_INC
11596 || GET_CODE (addr) == PRE_MODIFY
11597 || GET_CODE (addr) == POST_MODIFY)
e075ae69 11598 return 0;
3f803cd9 11599
e075ae69
RH
11600 if (! ix86_decompose_address (addr, &parts))
11601 abort ();
3f803cd9 11602
e075ae69
RH
11603 base = parts.base;
11604 index = parts.index;
11605 disp = parts.disp;
11606 len = 0;
3f803cd9 11607
e075ae69
RH
11608 /* Register Indirect. */
11609 if (base && !index && !disp)
11610 {
11611 /* Special cases: ebp and esp need the two-byte modrm form. */
11612 if (addr == stack_pointer_rtx
11613 || addr == arg_pointer_rtx
564d80f4
JH
11614 || addr == frame_pointer_rtx
11615 || addr == hard_frame_pointer_rtx)
e075ae69 11616 len = 1;
3f803cd9 11617 }
e9a25f70 11618
e075ae69
RH
11619 /* Direct Addressing. */
11620 else if (disp && !base && !index)
11621 len = 4;
11622
3f803cd9
SC
11623 else
11624 {
e075ae69
RH
11625 /* Find the length of the displacement constant. */
11626 if (disp)
11627 {
11628 if (GET_CODE (disp) == CONST_INT
11629 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
11630 len = 1;
11631 else
11632 len = 4;
11633 }
3f803cd9 11634
e075ae69
RH
11635 /* An index requires the two-byte modrm form. */
11636 if (index)
11637 len += 1;
3f803cd9
SC
11638 }
11639
e075ae69
RH
11640 return len;
11641}
79325812 11642
5bf0ebab
RH
11643/* Compute default value for "length_immediate" attribute. When SHORTFORM
11644 is set, expect that insn have 8bit immediate alternative. */
e075ae69 11645int
6ef67412 11646ix86_attr_length_immediate_default (insn, shortform)
e075ae69 11647 rtx insn;
6ef67412 11648 int shortform;
e075ae69 11649{
6ef67412
JH
11650 int len = 0;
11651 int i;
6c698a6d 11652 extract_insn_cached (insn);
6ef67412
JH
11653 for (i = recog_data.n_operands - 1; i >= 0; --i)
11654 if (CONSTANT_P (recog_data.operand[i]))
3071fab5 11655 {
6ef67412 11656 if (len)
3071fab5 11657 abort ();
6ef67412
JH
11658 if (shortform
11659 && GET_CODE (recog_data.operand[i]) == CONST_INT
11660 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11661 len = 1;
11662 else
11663 {
11664 switch (get_attr_mode (insn))
11665 {
11666 case MODE_QI:
11667 len+=1;
11668 break;
11669 case MODE_HI:
11670 len+=2;
11671 break;
11672 case MODE_SI:
11673 len+=4;
11674 break;
14f73b5a
JH
11675 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11676 case MODE_DI:
11677 len+=4;
11678 break;
6ef67412 11679 default:
c725bd79 11680 fatal_insn ("unknown insn mode", insn);
6ef67412
JH
11681 }
11682 }
3071fab5 11683 }
6ef67412
JH
11684 return len;
11685}
11686/* Compute default value for "length_address" attribute. */
11687int
11688ix86_attr_length_address_default (insn)
11689 rtx insn;
11690{
11691 int i;
6c698a6d 11692 extract_insn_cached (insn);
1ccbefce
RH
11693 for (i = recog_data.n_operands - 1; i >= 0; --i)
11694 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 11695 {
6ef67412 11696 return memory_address_length (XEXP (recog_data.operand[i], 0));
e075ae69
RH
11697 break;
11698 }
6ef67412 11699 return 0;
3f803cd9 11700}
e075ae69
RH
11701\f
11702/* Return the maximum number of instructions a cpu can issue. */
b657fc39 11703
c237e94a 11704static int
e075ae69 11705ix86_issue_rate ()
b657fc39 11706{
e075ae69 11707 switch (ix86_cpu)
b657fc39 11708 {
e075ae69
RH
11709 case PROCESSOR_PENTIUM:
11710 case PROCESSOR_K6:
11711 return 2;
79325812 11712
e075ae69 11713 case PROCESSOR_PENTIUMPRO:
b4e89e2d
JH
11714 case PROCESSOR_PENTIUM4:
11715 case PROCESSOR_ATHLON:
4977bab6 11716 case PROCESSOR_K8:
e075ae69 11717 return 3;
b657fc39 11718
b657fc39 11719 default:
e075ae69 11720 return 1;
b657fc39 11721 }
b657fc39
L
11722}
11723
e075ae69
RH
11724/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11725 by DEP_INSN and nothing set by DEP_INSN. */
b657fc39 11726
e075ae69
RH
11727static int
11728ix86_flags_dependant (insn, dep_insn, insn_type)
11729 rtx insn, dep_insn;
11730 enum attr_type insn_type;
11731{
11732 rtx set, set2;
b657fc39 11733
e075ae69
RH
11734 /* Simplify the test for uninteresting insns. */
11735 if (insn_type != TYPE_SETCC
11736 && insn_type != TYPE_ICMOV
11737 && insn_type != TYPE_FCMOV
11738 && insn_type != TYPE_IBR)
11739 return 0;
b657fc39 11740
e075ae69
RH
11741 if ((set = single_set (dep_insn)) != 0)
11742 {
11743 set = SET_DEST (set);
11744 set2 = NULL_RTX;
11745 }
11746 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11747 && XVECLEN (PATTERN (dep_insn), 0) == 2
11748 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11749 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11750 {
11751 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11752 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11753 }
78a0d70c
ZW
11754 else
11755 return 0;
b657fc39 11756
78a0d70c
ZW
11757 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11758 return 0;
b657fc39 11759
f5143c46 11760 /* This test is true if the dependent insn reads the flags but
78a0d70c
ZW
11761 not any other potentially set register. */
11762 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11763 return 0;
11764
11765 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11766 return 0;
11767
11768 return 1;
e075ae69 11769}
b657fc39 11770
e075ae69
RH
11771/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11772 address with operands set by DEP_INSN. */
11773
11774static int
11775ix86_agi_dependant (insn, dep_insn, insn_type)
11776 rtx insn, dep_insn;
11777 enum attr_type insn_type;
11778{
11779 rtx addr;
11780
6ad48e84
JH
11781 if (insn_type == TYPE_LEA
11782 && TARGET_PENTIUM)
5fbdde42
RH
11783 {
11784 addr = PATTERN (insn);
11785 if (GET_CODE (addr) == SET)
11786 ;
11787 else if (GET_CODE (addr) == PARALLEL
11788 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11789 addr = XVECEXP (addr, 0, 0);
11790 else
11791 abort ();
11792 addr = SET_SRC (addr);
11793 }
e075ae69
RH
11794 else
11795 {
11796 int i;
6c698a6d 11797 extract_insn_cached (insn);
1ccbefce
RH
11798 for (i = recog_data.n_operands - 1; i >= 0; --i)
11799 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 11800 {
1ccbefce 11801 addr = XEXP (recog_data.operand[i], 0);
e075ae69
RH
11802 goto found;
11803 }
11804 return 0;
11805 found:;
b657fc39
L
11806 }
11807
e075ae69 11808 return modified_in_p (addr, dep_insn);
b657fc39 11809}
a269a03c 11810
c237e94a 11811static int
e075ae69 11812ix86_adjust_cost (insn, link, dep_insn, cost)
a269a03c
JC
11813 rtx insn, link, dep_insn;
11814 int cost;
11815{
e075ae69 11816 enum attr_type insn_type, dep_insn_type;
6ad48e84 11817 enum attr_memory memory, dep_memory;
e075ae69 11818 rtx set, set2;
9b00189f 11819 int dep_insn_code_number;
a269a03c 11820
d1f87653 11821 /* Anti and output dependencies have zero cost on all CPUs. */
e075ae69 11822 if (REG_NOTE_KIND (link) != 0)
309ada50 11823 return 0;
a269a03c 11824
9b00189f
JH
11825 dep_insn_code_number = recog_memoized (dep_insn);
11826
e075ae69 11827 /* If we can't recognize the insns, we can't really do anything. */
9b00189f 11828 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
e075ae69 11829 return cost;
a269a03c 11830
1c71e60e
JH
11831 insn_type = get_attr_type (insn);
11832 dep_insn_type = get_attr_type (dep_insn);
9b00189f 11833
a269a03c
JC
11834 switch (ix86_cpu)
11835 {
11836 case PROCESSOR_PENTIUM:
e075ae69
RH
11837 /* Address Generation Interlock adds a cycle of latency. */
11838 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11839 cost += 1;
11840
11841 /* ??? Compares pair with jump/setcc. */
11842 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11843 cost = 0;
11844
d1f87653 11845 /* Floating point stores require value to be ready one cycle earlier. */
0f290768 11846 if (insn_type == TYPE_FMOV
e075ae69
RH
11847 && get_attr_memory (insn) == MEMORY_STORE
11848 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11849 cost += 1;
11850 break;
a269a03c 11851
e075ae69 11852 case PROCESSOR_PENTIUMPRO:
6ad48e84
JH
11853 memory = get_attr_memory (insn);
11854 dep_memory = get_attr_memory (dep_insn);
11855
0f290768 11856 /* Since we can't represent delayed latencies of load+operation,
e075ae69
RH
11857 increase the cost here for non-imov insns. */
11858 if (dep_insn_type != TYPE_IMOV
6ad48e84
JH
11859 && dep_insn_type != TYPE_FMOV
11860 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
e075ae69
RH
11861 cost += 1;
11862
11863 /* INT->FP conversion is expensive. */
11864 if (get_attr_fp_int_src (dep_insn))
11865 cost += 5;
11866
11867 /* There is one cycle extra latency between an FP op and a store. */
11868 if (insn_type == TYPE_FMOV
11869 && (set = single_set (dep_insn)) != NULL_RTX
11870 && (set2 = single_set (insn)) != NULL_RTX
11871 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11872 && GET_CODE (SET_DEST (set2)) == MEM)
11873 cost += 1;
6ad48e84
JH
11874
11875 /* Show ability of reorder buffer to hide latency of load by executing
11876 in parallel with previous instruction in case
11877 previous instruction is not needed to compute the address. */
11878 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11879 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11880 {
11881 /* Claim moves to take one cycle, as core can issue one load
11882 at time and the next load can start cycle later. */
11883 if (dep_insn_type == TYPE_IMOV
11884 || dep_insn_type == TYPE_FMOV)
11885 cost = 1;
11886 else if (cost > 1)
11887 cost--;
11888 }
e075ae69 11889 break;
a269a03c 11890
e075ae69 11891 case PROCESSOR_K6:
6ad48e84
JH
11892 memory = get_attr_memory (insn);
11893 dep_memory = get_attr_memory (dep_insn);
e075ae69
RH
11894 /* The esp dependency is resolved before the instruction is really
11895 finished. */
11896 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11897 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11898 return 1;
a269a03c 11899
0f290768 11900 /* Since we can't represent delayed latencies of load+operation,
e075ae69 11901 increase the cost here for non-imov insns. */
6ad48e84 11902 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
e075ae69
RH
11903 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
11904
11905 /* INT->FP conversion is expensive. */
11906 if (get_attr_fp_int_src (dep_insn))
11907 cost += 5;
6ad48e84
JH
11908
11909 /* Show ability of reorder buffer to hide latency of load by executing
11910 in parallel with previous instruction in case
11911 previous instruction is not needed to compute the address. */
11912 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11913 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11914 {
11915 /* Claim moves to take one cycle, as core can issue one load
11916 at time and the next load can start cycle later. */
11917 if (dep_insn_type == TYPE_IMOV
11918 || dep_insn_type == TYPE_FMOV)
11919 cost = 1;
11920 else if (cost > 2)
11921 cost -= 2;
11922 else
11923 cost = 1;
11924 }
a14003ee 11925 break;
e075ae69 11926
309ada50 11927 case PROCESSOR_ATHLON:
4977bab6 11928 case PROCESSOR_K8:
6ad48e84
JH
11929 memory = get_attr_memory (insn);
11930 dep_memory = get_attr_memory (dep_insn);
11931
6ad48e84
JH
11932 /* Show ability of reorder buffer to hide latency of load by executing
11933 in parallel with previous instruction in case
11934 previous instruction is not needed to compute the address. */
11935 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11936 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11937 {
11938 /* Claim moves to take one cycle, as core can issue one load
11939 at time and the next load can start cycle later. */
11940 if (dep_insn_type == TYPE_IMOV
11941 || dep_insn_type == TYPE_FMOV)
11942 cost = 0;
11943 else if (cost >= 3)
11944 cost -= 3;
11945 else
11946 cost = 0;
11947 }
309ada50 11948
a269a03c 11949 default:
a269a03c
JC
11950 break;
11951 }
11952
11953 return cost;
11954}
0a726ef1 11955
e075ae69
RH
11956static union
11957{
11958 struct ppro_sched_data
11959 {
11960 rtx decode[3];
11961 int issued_this_cycle;
11962 } ppro;
11963} ix86_sched_data;
0a726ef1 11964
e075ae69
RH
11965static enum attr_ppro_uops
11966ix86_safe_ppro_uops (insn)
11967 rtx insn;
11968{
11969 if (recog_memoized (insn) >= 0)
11970 return get_attr_ppro_uops (insn);
11971 else
11972 return PPRO_UOPS_MANY;
11973}
0a726ef1 11974
e075ae69
RH
11975static void
11976ix86_dump_ppro_packet (dump)
11977 FILE *dump;
0a726ef1 11978{
e075ae69 11979 if (ix86_sched_data.ppro.decode[0])
0a726ef1 11980 {
e075ae69
RH
11981 fprintf (dump, "PPRO packet: %d",
11982 INSN_UID (ix86_sched_data.ppro.decode[0]));
11983 if (ix86_sched_data.ppro.decode[1])
11984 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
11985 if (ix86_sched_data.ppro.decode[2])
11986 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
11987 fputc ('\n', dump);
11988 }
11989}
0a726ef1 11990
e075ae69 11991/* We're beginning a new block. Initialize data structures as necessary. */
0a726ef1 11992
c237e94a
ZW
11993static void
11994ix86_sched_init (dump, sched_verbose, veclen)
e075ae69
RH
11995 FILE *dump ATTRIBUTE_UNUSED;
11996 int sched_verbose ATTRIBUTE_UNUSED;
c237e94a 11997 int veclen ATTRIBUTE_UNUSED;
e075ae69
RH
11998{
11999 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
12000}
12001
12002/* Shift INSN to SLOT, and shift everything else down. */
12003
12004static void
12005ix86_reorder_insn (insnp, slot)
12006 rtx *insnp, *slot;
12007{
12008 if (insnp != slot)
12009 {
12010 rtx insn = *insnp;
0f290768 12011 do
e075ae69
RH
12012 insnp[0] = insnp[1];
12013 while (++insnp != slot);
12014 *insnp = insn;
0a726ef1 12015 }
e075ae69
RH
12016}
12017
c6991660 12018static void
78a0d70c
ZW
12019ix86_sched_reorder_ppro (ready, e_ready)
12020 rtx *ready;
12021 rtx *e_ready;
12022{
12023 rtx decode[3];
12024 enum attr_ppro_uops cur_uops;
12025 int issued_this_cycle;
12026 rtx *insnp;
12027 int i;
e075ae69 12028
0f290768 12029 /* At this point .ppro.decode contains the state of the three
78a0d70c 12030 decoders from last "cycle". That is, those insns that were
0f290768 12031 actually independent. But here we're scheduling for the
78a0d70c
ZW
12032 decoder, and we may find things that are decodable in the
12033 same cycle. */
e075ae69 12034
0f290768 12035 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
78a0d70c 12036 issued_this_cycle = 0;
e075ae69 12037
78a0d70c
ZW
12038 insnp = e_ready;
12039 cur_uops = ix86_safe_ppro_uops (*insnp);
0a726ef1 12040
78a0d70c
ZW
12041 /* If the decoders are empty, and we've a complex insn at the
12042 head of the priority queue, let it issue without complaint. */
12043 if (decode[0] == NULL)
12044 {
12045 if (cur_uops == PPRO_UOPS_MANY)
12046 {
12047 decode[0] = *insnp;
12048 goto ppro_done;
12049 }
12050
12051 /* Otherwise, search for a 2-4 uop unsn to issue. */
12052 while (cur_uops != PPRO_UOPS_FEW)
12053 {
12054 if (insnp == ready)
12055 break;
12056 cur_uops = ix86_safe_ppro_uops (*--insnp);
12057 }
12058
12059 /* If so, move it to the head of the line. */
12060 if (cur_uops == PPRO_UOPS_FEW)
12061 ix86_reorder_insn (insnp, e_ready);
0a726ef1 12062
78a0d70c
ZW
12063 /* Issue the head of the queue. */
12064 issued_this_cycle = 1;
12065 decode[0] = *e_ready--;
12066 }
fb693d44 12067
78a0d70c
ZW
12068 /* Look for simple insns to fill in the other two slots. */
12069 for (i = 1; i < 3; ++i)
12070 if (decode[i] == NULL)
12071 {
a151daf0 12072 if (ready > e_ready)
78a0d70c 12073 goto ppro_done;
fb693d44 12074
e075ae69
RH
12075 insnp = e_ready;
12076 cur_uops = ix86_safe_ppro_uops (*insnp);
78a0d70c
ZW
12077 while (cur_uops != PPRO_UOPS_ONE)
12078 {
12079 if (insnp == ready)
12080 break;
12081 cur_uops = ix86_safe_ppro_uops (*--insnp);
12082 }
fb693d44 12083
78a0d70c
ZW
12084 /* Found one. Move it to the head of the queue and issue it. */
12085 if (cur_uops == PPRO_UOPS_ONE)
e075ae69 12086 {
78a0d70c
ZW
12087 ix86_reorder_insn (insnp, e_ready);
12088 decode[i] = *e_ready--;
12089 issued_this_cycle++;
12090 continue;
12091 }
fb693d44 12092
78a0d70c
ZW
12093 /* ??? Didn't find one. Ideally, here we would do a lazy split
12094 of 2-uop insns, issue one and queue the other. */
12095 }
fb693d44 12096
78a0d70c
ZW
12097 ppro_done:
12098 if (issued_this_cycle == 0)
12099 issued_this_cycle = 1;
12100 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
12101}
fb693d44 12102
0f290768 12103/* We are about to being issuing insns for this clock cycle.
78a0d70c 12104 Override the default sort algorithm to better slot instructions. */
c237e94a
ZW
12105static int
12106ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
78a0d70c
ZW
12107 FILE *dump ATTRIBUTE_UNUSED;
12108 int sched_verbose ATTRIBUTE_UNUSED;
12109 rtx *ready;
c237e94a 12110 int *n_readyp;
78a0d70c
ZW
12111 int clock_var ATTRIBUTE_UNUSED;
12112{
c237e94a 12113 int n_ready = *n_readyp;
78a0d70c 12114 rtx *e_ready = ready + n_ready - 1;
fb693d44 12115
fce5a9f2 12116 /* Make sure to go ahead and initialize key items in
a151daf0
JL
12117 ix86_sched_data if we are not going to bother trying to
12118 reorder the ready queue. */
78a0d70c 12119 if (n_ready < 2)
a151daf0
JL
12120 {
12121 ix86_sched_data.ppro.issued_this_cycle = 1;
12122 goto out;
12123 }
e075ae69 12124
78a0d70c
ZW
12125 switch (ix86_cpu)
12126 {
12127 default:
12128 break;
e075ae69 12129
78a0d70c
ZW
12130 case PROCESSOR_PENTIUMPRO:
12131 ix86_sched_reorder_ppro (ready, e_ready);
e075ae69 12132 break;
fb693d44
RH
12133 }
12134
e075ae69
RH
12135out:
12136 return ix86_issue_rate ();
12137}
fb693d44 12138
e075ae69
RH
12139/* We are about to issue INSN. Return the number of insns left on the
12140 ready queue that can be issued this cycle. */
b222082e 12141
c237e94a 12142static int
e075ae69
RH
12143ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
12144 FILE *dump;
12145 int sched_verbose;
12146 rtx insn;
12147 int can_issue_more;
12148{
12149 int i;
12150 switch (ix86_cpu)
fb693d44 12151 {
e075ae69
RH
12152 default:
12153 return can_issue_more - 1;
fb693d44 12154
e075ae69
RH
12155 case PROCESSOR_PENTIUMPRO:
12156 {
12157 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
fb693d44 12158
e075ae69
RH
12159 if (uops == PPRO_UOPS_MANY)
12160 {
12161 if (sched_verbose)
12162 ix86_dump_ppro_packet (dump);
12163 ix86_sched_data.ppro.decode[0] = insn;
12164 ix86_sched_data.ppro.decode[1] = NULL;
12165 ix86_sched_data.ppro.decode[2] = NULL;
12166 if (sched_verbose)
12167 ix86_dump_ppro_packet (dump);
12168 ix86_sched_data.ppro.decode[0] = NULL;
12169 }
12170 else if (uops == PPRO_UOPS_FEW)
12171 {
12172 if (sched_verbose)
12173 ix86_dump_ppro_packet (dump);
12174 ix86_sched_data.ppro.decode[0] = insn;
12175 ix86_sched_data.ppro.decode[1] = NULL;
12176 ix86_sched_data.ppro.decode[2] = NULL;
12177 }
12178 else
12179 {
12180 for (i = 0; i < 3; ++i)
12181 if (ix86_sched_data.ppro.decode[i] == NULL)
12182 {
12183 ix86_sched_data.ppro.decode[i] = insn;
12184 break;
12185 }
12186 if (i == 3)
12187 abort ();
12188 if (i == 2)
12189 {
12190 if (sched_verbose)
12191 ix86_dump_ppro_packet (dump);
12192 ix86_sched_data.ppro.decode[0] = NULL;
12193 ix86_sched_data.ppro.decode[1] = NULL;
12194 ix86_sched_data.ppro.decode[2] = NULL;
12195 }
12196 }
12197 }
12198 return --ix86_sched_data.ppro.issued_this_cycle;
12199 }
fb693d44 12200}
9b690711
RH
12201
12202static int
12203ia32_use_dfa_pipeline_interface ()
12204{
4977bab6 12205 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
9b690711
RH
12206 return 1;
12207 return 0;
12208}
12209
12210/* How many alternative schedules to try. This should be as wide as the
12211 scheduling freedom in the DFA, but no wider. Making this value too
12212 large results extra work for the scheduler. */
12213
12214static int
12215ia32_multipass_dfa_lookahead ()
12216{
12217 if (ix86_cpu == PROCESSOR_PENTIUM)
12218 return 2;
12219 else
12220 return 0;
12221}
12222
a7180f70 12223\f
0e4970d7
RK
12224/* Walk through INSNS and look for MEM references whose address is DSTREG or
12225 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
12226 appropriate. */
12227
12228void
12229ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
12230 rtx insns;
12231 rtx dstref, srcref, dstreg, srcreg;
12232{
12233 rtx insn;
12234
12235 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
12236 if (INSN_P (insn))
12237 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
12238 dstreg, srcreg);
12239}
12240
12241/* Subroutine of above to actually do the updating by recursively walking
12242 the rtx. */
12243
12244static void
12245ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
12246 rtx x;
12247 rtx dstref, srcref, dstreg, srcreg;
12248{
12249 enum rtx_code code = GET_CODE (x);
12250 const char *format_ptr = GET_RTX_FORMAT (code);
12251 int i, j;
12252
12253 if (code == MEM && XEXP (x, 0) == dstreg)
12254 MEM_COPY_ATTRIBUTES (x, dstref);
12255 else if (code == MEM && XEXP (x, 0) == srcreg)
12256 MEM_COPY_ATTRIBUTES (x, srcref);
12257
12258 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
12259 {
12260 if (*format_ptr == 'e')
12261 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
12262 dstreg, srcreg);
12263 else if (*format_ptr == 'E')
12264 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
397be6cf 12265 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
0e4970d7
RK
12266 dstreg, srcreg);
12267 }
12268}
12269\f
a7180f70
BS
12270/* Compute the alignment given to a constant that is being placed in memory.
12271 EXP is the constant and ALIGN is the alignment that the object would
12272 ordinarily have.
12273 The value of this function is used instead of that alignment to align
12274 the object. */
12275
12276int
12277ix86_constant_alignment (exp, align)
12278 tree exp;
12279 int align;
12280{
12281 if (TREE_CODE (exp) == REAL_CST)
12282 {
12283 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12284 return 64;
12285 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12286 return 128;
12287 }
12288 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
12289 && align < 256)
12290 return 256;
12291
12292 return align;
12293}
12294
12295/* Compute the alignment for a static variable.
12296 TYPE is the data type, and ALIGN is the alignment that
12297 the object would ordinarily have. The value of this function is used
12298 instead of that alignment to align the object. */
12299
12300int
12301ix86_data_alignment (type, align)
12302 tree type;
12303 int align;
12304{
12305 if (AGGREGATE_TYPE_P (type)
12306 && TYPE_SIZE (type)
12307 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12308 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12309 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12310 return 256;
12311
0d7d98ee
JH
12312 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12313 to 16byte boundary. */
12314 if (TARGET_64BIT)
12315 {
12316 if (AGGREGATE_TYPE_P (type)
12317 && TYPE_SIZE (type)
12318 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12319 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12320 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12321 return 128;
12322 }
12323
a7180f70
BS
12324 if (TREE_CODE (type) == ARRAY_TYPE)
12325 {
12326 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12327 return 64;
12328 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12329 return 128;
12330 }
12331 else if (TREE_CODE (type) == COMPLEX_TYPE)
12332 {
0f290768 12333
a7180f70
BS
12334 if (TYPE_MODE (type) == DCmode && align < 64)
12335 return 64;
12336 if (TYPE_MODE (type) == XCmode && align < 128)
12337 return 128;
12338 }
12339 else if ((TREE_CODE (type) == RECORD_TYPE
12340 || TREE_CODE (type) == UNION_TYPE
12341 || TREE_CODE (type) == QUAL_UNION_TYPE)
12342 && TYPE_FIELDS (type))
12343 {
12344 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12345 return 64;
12346 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12347 return 128;
12348 }
12349 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12350 || TREE_CODE (type) == INTEGER_TYPE)
12351 {
12352 if (TYPE_MODE (type) == DFmode && align < 64)
12353 return 64;
12354 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12355 return 128;
12356 }
12357
12358 return align;
12359}
12360
12361/* Compute the alignment for a local variable.
12362 TYPE is the data type, and ALIGN is the alignment that
12363 the object would ordinarily have. The value of this macro is used
12364 instead of that alignment to align the object. */
12365
12366int
12367ix86_local_alignment (type, align)
12368 tree type;
12369 int align;
12370{
0d7d98ee
JH
12371 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12372 to 16byte boundary. */
12373 if (TARGET_64BIT)
12374 {
12375 if (AGGREGATE_TYPE_P (type)
12376 && TYPE_SIZE (type)
12377 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12378 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12379 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12380 return 128;
12381 }
a7180f70
BS
12382 if (TREE_CODE (type) == ARRAY_TYPE)
12383 {
12384 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12385 return 64;
12386 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12387 return 128;
12388 }
12389 else if (TREE_CODE (type) == COMPLEX_TYPE)
12390 {
12391 if (TYPE_MODE (type) == DCmode && align < 64)
12392 return 64;
12393 if (TYPE_MODE (type) == XCmode && align < 128)
12394 return 128;
12395 }
12396 else if ((TREE_CODE (type) == RECORD_TYPE
12397 || TREE_CODE (type) == UNION_TYPE
12398 || TREE_CODE (type) == QUAL_UNION_TYPE)
12399 && TYPE_FIELDS (type))
12400 {
12401 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12402 return 64;
12403 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12404 return 128;
12405 }
12406 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12407 || TREE_CODE (type) == INTEGER_TYPE)
12408 {
0f290768 12409
a7180f70
BS
12410 if (TYPE_MODE (type) == DFmode && align < 64)
12411 return 64;
12412 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12413 return 128;
12414 }
12415 return align;
12416}
0ed08620
JH
12417\f
12418/* Emit RTL insns to initialize the variable parts of a trampoline.
12419 FNADDR is an RTX for the address of the function's pure code.
12420 CXT is an RTX for the static chain value for the function. */
12421void
12422x86_initialize_trampoline (tramp, fnaddr, cxt)
12423 rtx tramp, fnaddr, cxt;
12424{
12425 if (!TARGET_64BIT)
12426 {
12427 /* Compute offset from the end of the jmp to the target function. */
12428 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12429 plus_constant (tramp, 10),
12430 NULL_RTX, 1, OPTAB_DIRECT);
12431 emit_move_insn (gen_rtx_MEM (QImode, tramp),
d8bf17f9 12432 gen_int_mode (0xb9, QImode));
0ed08620
JH
12433 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12434 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
d8bf17f9 12435 gen_int_mode (0xe9, QImode));
0ed08620
JH
12436 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12437 }
12438 else
12439 {
12440 int offset = 0;
12441 /* Try to load address using shorter movl instead of movabs.
12442 We may want to support movq for kernel mode, but kernel does not use
12443 trampolines at the moment. */
12444 if (x86_64_zero_extended_value (fnaddr))
12445 {
12446 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12447 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12448 gen_int_mode (0xbb41, HImode));
0ed08620
JH
12449 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12450 gen_lowpart (SImode, fnaddr));
12451 offset += 6;
12452 }
12453 else
12454 {
12455 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12456 gen_int_mode (0xbb49, HImode));
0ed08620
JH
12457 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12458 fnaddr);
12459 offset += 10;
12460 }
12461 /* Load static chain using movabs to r10. */
12462 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12463 gen_int_mode (0xba49, HImode));
0ed08620
JH
12464 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12465 cxt);
12466 offset += 10;
12467 /* Jump to the r11 */
12468 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12469 gen_int_mode (0xff49, HImode));
0ed08620 12470 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
d8bf17f9 12471 gen_int_mode (0xe3, QImode));
0ed08620
JH
12472 offset += 3;
12473 if (offset > TRAMPOLINE_SIZE)
b531087a 12474 abort ();
0ed08620 12475 }
5791cc29
JT
12476
12477#ifdef TRANSFER_FROM_TRAMPOLINE
12478 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12479 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12480#endif
0ed08620 12481}
eeb06b1b 12482\f
6a2dd09a
RS
12483#define def_builtin(MASK, NAME, TYPE, CODE) \
12484do { \
12485 if ((MASK) & target_flags) \
12486 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12487 NULL, NULL_TREE); \
eeb06b1b 12488} while (0)
bd793c65 12489
bd793c65
BS
12490struct builtin_description
12491{
8b60264b
KG
12492 const unsigned int mask;
12493 const enum insn_code icode;
12494 const char *const name;
12495 const enum ix86_builtins code;
12496 const enum rtx_code comparison;
12497 const unsigned int flag;
bd793c65
BS
12498};
12499
fbe5eb6d
BS
12500/* Used for builtins that are enabled both by -msse and -msse2. */
12501#define MASK_SSE1 (MASK_SSE | MASK_SSE2)
12502
8b60264b 12503static const struct builtin_description bdesc_comi[] =
bd793c65 12504{
1194ca05
JH
12505 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12506 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12507 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12508 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12509 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12510 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12511 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12512 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12513 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12514 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12515 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12516 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12517 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12518 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12519 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12520 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12521 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12522 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12523 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12524 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12525 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12526 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12527 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12528 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
bd793c65
BS
12529};
12530
8b60264b 12531static const struct builtin_description bdesc_2arg[] =
bd793c65
BS
12532{
12533 /* SSE */
fbe5eb6d
BS
12534 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12535 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12536 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12537 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12538 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12539 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12540 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12541 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12542
12543 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12544 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12545 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12546 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12547 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12548 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12549 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12550 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12551 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12552 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12553 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12554 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12555 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12556 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12557 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
fbe5eb6d
BS
12558 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12559 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12560 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12561 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
fbe5eb6d
BS
12562 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12563
12564 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12565 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12566 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12567 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12568
1877be45
JH
12569 { MASK_SSE1, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12570 { MASK_SSE1, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12571 { MASK_SSE1, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12572 { MASK_SSE1, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12573
fbe5eb6d
BS
12574 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12575 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12576 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12577 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12578 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
bd793c65
BS
12579
12580 /* MMX */
eeb06b1b
BS
12581 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12582 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12583 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12584 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12585 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12586 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12587
12588 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12589 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12590 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12591 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12592 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12593 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12594 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12595 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12596
12597 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12598 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
fbe5eb6d 12599 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
eeb06b1b
BS
12600
12601 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12602 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12603 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12604 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12605
fbe5eb6d
BS
12606 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12607 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
eeb06b1b
BS
12608
12609 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12610 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12611 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12612 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12613 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12614 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12615
fbe5eb6d
BS
12616 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12617 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12618 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12619 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
eeb06b1b
BS
12620
12621 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12622 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12623 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12624 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12625 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12626 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
bd793c65
BS
12627
12628 /* Special. */
eeb06b1b
BS
12629 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12630 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12631 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12632
fbe5eb6d
BS
12633 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12634 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
eeb06b1b
BS
12635
12636 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12637 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12638 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12639 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12640 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12641 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12642
12643 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12644 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12645 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12646 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12647 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12648 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12649
12650 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12651 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12652 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12653 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12654
fbe5eb6d
BS
12655 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12656 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12657
12658 /* SSE2 */
12659 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12660 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12661 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12662 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12663 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12664 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12665 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12666 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12667
12668 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12669 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12670 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12671 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12672 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12673 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12674 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12675 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12676 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12677 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12678 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12679 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12680 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12681 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12682 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
fbe5eb6d
BS
12683 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12684 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12685 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12686 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
fbe5eb6d
BS
12687 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12688
12689 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12690 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12691 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12692 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12693
1877be45
JH
12694 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12695 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12696 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12697 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
fbe5eb6d
BS
12698
12699 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12700 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12701 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12702
12703 /* SSE2 MMX */
12704 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12705 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12706 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12707 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12708 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12709 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12710 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12711 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12712
12713 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12714 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12715 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12716 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12717 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12718 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12719 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12720 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12721
12722 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12723 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12724 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12725 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12726
916b60b7
BS
12727 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12728 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12729 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12730 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
fbe5eb6d
BS
12731
12732 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12733 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12734
12735 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12736 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12737 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12738 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12739 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12740 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12741
12742 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12743 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12744 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12745 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12746
12747 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12748 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12749 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
077084dd 12750 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
fbe5eb6d
BS
12751 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12752 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12753 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
f02e1358 12754 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
fbe5eb6d 12755
916b60b7
BS
12756 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12757 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12758 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12759
12760 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12761 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12762
12763 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12764 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12765 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12766 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12767 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12768 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12769
12770 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12771 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12772 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12773 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12774 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12775 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12776
12777 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12778 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12779 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12780 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12781
12782 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12783
fbe5eb6d
BS
12784 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12785 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12786 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
bd793c65
BS
12787};
12788
8b60264b 12789static const struct builtin_description bdesc_1arg[] =
bd793c65 12790{
fbe5eb6d
BS
12791 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12792 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12793
12794 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12795 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12796 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12797
12798 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12799 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12800 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12801 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12802
12803 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12804 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12805 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
f02e1358 12806 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
fbe5eb6d
BS
12807
12808 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12809
12810 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12811 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
bd793c65 12812
fbe5eb6d
BS
12813 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12814 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12815 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12816 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12817 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
bd793c65 12818
fbe5eb6d 12819 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
bd793c65 12820
fbe5eb6d
BS
12821 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12822 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12823
12824 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12825 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
f02e1358
JH
12826 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12827
12828 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 }
bd793c65
BS
12829};
12830
f6155fda
SS
12831void
12832ix86_init_builtins ()
12833{
12834 if (TARGET_MMX)
12835 ix86_init_mmx_sse_builtins ();
12836}
12837
12838/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
bd793c65
BS
12839 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12840 builtins. */
e37af218 12841static void
f6155fda 12842ix86_init_mmx_sse_builtins ()
bd793c65 12843{
8b60264b 12844 const struct builtin_description * d;
77ebd435 12845 size_t i;
bd793c65
BS
12846
12847 tree pchar_type_node = build_pointer_type (char_type_node);
068f5dea
JH
12848 tree pcchar_type_node = build_pointer_type (
12849 build_type_variant (char_type_node, 1, 0));
bd793c65 12850 tree pfloat_type_node = build_pointer_type (float_type_node);
068f5dea
JH
12851 tree pcfloat_type_node = build_pointer_type (
12852 build_type_variant (float_type_node, 1, 0));
bd793c65 12853 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
916b60b7 12854 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
bd793c65
BS
12855 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12856
12857 /* Comparisons. */
12858 tree int_ftype_v4sf_v4sf
b4de2f7d
AH
12859 = build_function_type_list (integer_type_node,
12860 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12861 tree v4si_ftype_v4sf_v4sf
b4de2f7d
AH
12862 = build_function_type_list (V4SI_type_node,
12863 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12864 /* MMX/SSE/integer conversions. */
bd793c65 12865 tree int_ftype_v4sf
b4de2f7d
AH
12866 = build_function_type_list (integer_type_node,
12867 V4SF_type_node, NULL_TREE);
bd793c65 12868 tree int_ftype_v8qi
b4de2f7d 12869 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
bd793c65 12870 tree v4sf_ftype_v4sf_int
b4de2f7d
AH
12871 = build_function_type_list (V4SF_type_node,
12872 V4SF_type_node, integer_type_node, NULL_TREE);
bd793c65 12873 tree v4sf_ftype_v4sf_v2si
b4de2f7d
AH
12874 = build_function_type_list (V4SF_type_node,
12875 V4SF_type_node, V2SI_type_node, NULL_TREE);
bd793c65 12876 tree int_ftype_v4hi_int
b4de2f7d
AH
12877 = build_function_type_list (integer_type_node,
12878 V4HI_type_node, integer_type_node, NULL_TREE);
bd793c65 12879 tree v4hi_ftype_v4hi_int_int
e7a60f56 12880 = build_function_type_list (V4HI_type_node, V4HI_type_node,
b4de2f7d
AH
12881 integer_type_node, integer_type_node,
12882 NULL_TREE);
bd793c65
BS
12883 /* Miscellaneous. */
12884 tree v8qi_ftype_v4hi_v4hi
b4de2f7d
AH
12885 = build_function_type_list (V8QI_type_node,
12886 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 12887 tree v4hi_ftype_v2si_v2si
b4de2f7d
AH
12888 = build_function_type_list (V4HI_type_node,
12889 V2SI_type_node, V2SI_type_node, NULL_TREE);
bd793c65 12890 tree v4sf_ftype_v4sf_v4sf_int
b4de2f7d
AH
12891 = build_function_type_list (V4SF_type_node,
12892 V4SF_type_node, V4SF_type_node,
12893 integer_type_node, NULL_TREE);
bd793c65 12894 tree v2si_ftype_v4hi_v4hi
b4de2f7d
AH
12895 = build_function_type_list (V2SI_type_node,
12896 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 12897 tree v4hi_ftype_v4hi_int
b4de2f7d 12898 = build_function_type_list (V4HI_type_node,
e7a60f56 12899 V4HI_type_node, integer_type_node, NULL_TREE);
bd793c65 12900 tree v4hi_ftype_v4hi_di
b4de2f7d
AH
12901 = build_function_type_list (V4HI_type_node,
12902 V4HI_type_node, long_long_unsigned_type_node,
12903 NULL_TREE);
bd793c65 12904 tree v2si_ftype_v2si_di
b4de2f7d
AH
12905 = build_function_type_list (V2SI_type_node,
12906 V2SI_type_node, long_long_unsigned_type_node,
12907 NULL_TREE);
bd793c65 12908 tree void_ftype_void
b4de2f7d 12909 = build_function_type (void_type_node, void_list_node);
bd793c65 12910 tree void_ftype_unsigned
b4de2f7d 12911 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
bd793c65 12912 tree unsigned_ftype_void
b4de2f7d 12913 = build_function_type (unsigned_type_node, void_list_node);
bd793c65 12914 tree di_ftype_void
b4de2f7d 12915 = build_function_type (long_long_unsigned_type_node, void_list_node);
e37af218 12916 tree v4sf_ftype_void
b4de2f7d 12917 = build_function_type (V4SF_type_node, void_list_node);
bd793c65 12918 tree v2si_ftype_v4sf
b4de2f7d 12919 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12920 /* Loads/stores. */
bd793c65 12921 tree void_ftype_v8qi_v8qi_pchar
b4de2f7d
AH
12922 = build_function_type_list (void_type_node,
12923 V8QI_type_node, V8QI_type_node,
12924 pchar_type_node, NULL_TREE);
068f5dea
JH
12925 tree v4sf_ftype_pcfloat
12926 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
bd793c65
BS
12927 /* @@@ the type is bogus */
12928 tree v4sf_ftype_v4sf_pv2si
b4de2f7d 12929 = build_function_type_list (V4SF_type_node,
f8ca7923 12930 V4SF_type_node, pv2si_type_node, NULL_TREE);
1255c85c 12931 tree void_ftype_pv2si_v4sf
b4de2f7d 12932 = build_function_type_list (void_type_node,
f8ca7923 12933 pv2si_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12934 tree void_ftype_pfloat_v4sf
b4de2f7d
AH
12935 = build_function_type_list (void_type_node,
12936 pfloat_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12937 tree void_ftype_pdi_di
b4de2f7d
AH
12938 = build_function_type_list (void_type_node,
12939 pdi_type_node, long_long_unsigned_type_node,
12940 NULL_TREE);
916b60b7 12941 tree void_ftype_pv2di_v2di
b4de2f7d
AH
12942 = build_function_type_list (void_type_node,
12943 pv2di_type_node, V2DI_type_node, NULL_TREE);
bd793c65
BS
12944 /* Normal vector unops. */
12945 tree v4sf_ftype_v4sf
b4de2f7d 12946 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
0f290768 12947
bd793c65
BS
12948 /* Normal vector binops. */
12949 tree v4sf_ftype_v4sf_v4sf
b4de2f7d
AH
12950 = build_function_type_list (V4SF_type_node,
12951 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12952 tree v8qi_ftype_v8qi_v8qi
b4de2f7d
AH
12953 = build_function_type_list (V8QI_type_node,
12954 V8QI_type_node, V8QI_type_node, NULL_TREE);
bd793c65 12955 tree v4hi_ftype_v4hi_v4hi
b4de2f7d
AH
12956 = build_function_type_list (V4HI_type_node,
12957 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 12958 tree v2si_ftype_v2si_v2si
b4de2f7d
AH
12959 = build_function_type_list (V2SI_type_node,
12960 V2SI_type_node, V2SI_type_node, NULL_TREE);
bd793c65 12961 tree di_ftype_di_di
b4de2f7d
AH
12962 = build_function_type_list (long_long_unsigned_type_node,
12963 long_long_unsigned_type_node,
12964 long_long_unsigned_type_node, NULL_TREE);
bd793c65 12965
47f339cf 12966 tree v2si_ftype_v2sf
ae3aa00d 12967 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
47f339cf 12968 tree v2sf_ftype_v2si
b4de2f7d 12969 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
47f339cf 12970 tree v2si_ftype_v2si
b4de2f7d 12971 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
47f339cf 12972 tree v2sf_ftype_v2sf
b4de2f7d 12973 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 12974 tree v2sf_ftype_v2sf_v2sf
b4de2f7d
AH
12975 = build_function_type_list (V2SF_type_node,
12976 V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 12977 tree v2si_ftype_v2sf_v2sf
b4de2f7d
AH
12978 = build_function_type_list (V2SI_type_node,
12979 V2SF_type_node, V2SF_type_node, NULL_TREE);
fbe5eb6d 12980 tree pint_type_node = build_pointer_type (integer_type_node);
068f5dea
JH
12981 tree pcint_type_node = build_pointer_type (
12982 build_type_variant (integer_type_node, 1, 0));
fbe5eb6d 12983 tree pdouble_type_node = build_pointer_type (double_type_node);
068f5dea
JH
12984 tree pcdouble_type_node = build_pointer_type (
12985 build_type_variant (double_type_node, 1, 0));
fbe5eb6d 12986 tree int_ftype_v2df_v2df
b4de2f7d
AH
12987 = build_function_type_list (integer_type_node,
12988 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d
BS
12989
12990 tree ti_ftype_void
b4de2f7d 12991 = build_function_type (intTI_type_node, void_list_node);
f02e1358
JH
12992 tree v2di_ftype_void
12993 = build_function_type (V2DI_type_node, void_list_node);
fbe5eb6d 12994 tree ti_ftype_ti_ti
b4de2f7d
AH
12995 = build_function_type_list (intTI_type_node,
12996 intTI_type_node, intTI_type_node, NULL_TREE);
068f5dea
JH
12997 tree void_ftype_pcvoid
12998 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
fbe5eb6d 12999 tree v2di_ftype_di
b4de2f7d
AH
13000 = build_function_type_list (V2DI_type_node,
13001 long_long_unsigned_type_node, NULL_TREE);
f02e1358
JH
13002 tree di_ftype_v2di
13003 = build_function_type_list (long_long_unsigned_type_node,
13004 V2DI_type_node, NULL_TREE);
fbe5eb6d 13005 tree v4sf_ftype_v4si
b4de2f7d 13006 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 13007 tree v4si_ftype_v4sf
b4de2f7d 13008 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 13009 tree v2df_ftype_v4si
b4de2f7d 13010 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 13011 tree v4si_ftype_v2df
b4de2f7d 13012 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13013 tree v2si_ftype_v2df
b4de2f7d 13014 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13015 tree v4sf_ftype_v2df
b4de2f7d 13016 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13017 tree v2df_ftype_v2si
b4de2f7d 13018 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
fbe5eb6d 13019 tree v2df_ftype_v4sf
b4de2f7d 13020 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 13021 tree int_ftype_v2df
b4de2f7d 13022 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13023 tree v2df_ftype_v2df_int
b4de2f7d
AH
13024 = build_function_type_list (V2DF_type_node,
13025 V2DF_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 13026 tree v4sf_ftype_v4sf_v2df
b4de2f7d
AH
13027 = build_function_type_list (V4SF_type_node,
13028 V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13029 tree v2df_ftype_v2df_v4sf
b4de2f7d
AH
13030 = build_function_type_list (V2DF_type_node,
13031 V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 13032 tree v2df_ftype_v2df_v2df_int
b4de2f7d
AH
13033 = build_function_type_list (V2DF_type_node,
13034 V2DF_type_node, V2DF_type_node,
13035 integer_type_node,
13036 NULL_TREE);
fbe5eb6d 13037 tree v2df_ftype_v2df_pv2si
b4de2f7d
AH
13038 = build_function_type_list (V2DF_type_node,
13039 V2DF_type_node, pv2si_type_node, NULL_TREE);
fbe5eb6d 13040 tree void_ftype_pv2si_v2df
b4de2f7d
AH
13041 = build_function_type_list (void_type_node,
13042 pv2si_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13043 tree void_ftype_pdouble_v2df
b4de2f7d
AH
13044 = build_function_type_list (void_type_node,
13045 pdouble_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13046 tree void_ftype_pint_int
b4de2f7d
AH
13047 = build_function_type_list (void_type_node,
13048 pint_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 13049 tree void_ftype_v16qi_v16qi_pchar
b4de2f7d
AH
13050 = build_function_type_list (void_type_node,
13051 V16QI_type_node, V16QI_type_node,
13052 pchar_type_node, NULL_TREE);
068f5dea
JH
13053 tree v2df_ftype_pcdouble
13054 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
fbe5eb6d 13055 tree v2df_ftype_v2df_v2df
b4de2f7d
AH
13056 = build_function_type_list (V2DF_type_node,
13057 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13058 tree v16qi_ftype_v16qi_v16qi
b4de2f7d
AH
13059 = build_function_type_list (V16QI_type_node,
13060 V16QI_type_node, V16QI_type_node, NULL_TREE);
fbe5eb6d 13061 tree v8hi_ftype_v8hi_v8hi
b4de2f7d
AH
13062 = build_function_type_list (V8HI_type_node,
13063 V8HI_type_node, V8HI_type_node, NULL_TREE);
fbe5eb6d 13064 tree v4si_ftype_v4si_v4si
b4de2f7d
AH
13065 = build_function_type_list (V4SI_type_node,
13066 V4SI_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 13067 tree v2di_ftype_v2di_v2di
b4de2f7d
AH
13068 = build_function_type_list (V2DI_type_node,
13069 V2DI_type_node, V2DI_type_node, NULL_TREE);
fbe5eb6d 13070 tree v2di_ftype_v2df_v2df
b4de2f7d
AH
13071 = build_function_type_list (V2DI_type_node,
13072 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13073 tree v2df_ftype_v2df
b4de2f7d 13074 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13075 tree v2df_ftype_double
b4de2f7d 13076 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
fbe5eb6d 13077 tree v2df_ftype_double_double
b4de2f7d
AH
13078 = build_function_type_list (V2DF_type_node,
13079 double_type_node, double_type_node, NULL_TREE);
fbe5eb6d 13080 tree int_ftype_v8hi_int
b4de2f7d
AH
13081 = build_function_type_list (integer_type_node,
13082 V8HI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 13083 tree v8hi_ftype_v8hi_int_int
b4de2f7d
AH
13084 = build_function_type_list (V8HI_type_node,
13085 V8HI_type_node, integer_type_node,
13086 integer_type_node, NULL_TREE);
916b60b7 13087 tree v2di_ftype_v2di_int
b4de2f7d
AH
13088 = build_function_type_list (V2DI_type_node,
13089 V2DI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 13090 tree v4si_ftype_v4si_int
b4de2f7d
AH
13091 = build_function_type_list (V4SI_type_node,
13092 V4SI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 13093 tree v8hi_ftype_v8hi_int
b4de2f7d
AH
13094 = build_function_type_list (V8HI_type_node,
13095 V8HI_type_node, integer_type_node, NULL_TREE);
916b60b7 13096 tree v8hi_ftype_v8hi_v2di
b4de2f7d
AH
13097 = build_function_type_list (V8HI_type_node,
13098 V8HI_type_node, V2DI_type_node, NULL_TREE);
916b60b7 13099 tree v4si_ftype_v4si_v2di
b4de2f7d
AH
13100 = build_function_type_list (V4SI_type_node,
13101 V4SI_type_node, V2DI_type_node, NULL_TREE);
916b60b7 13102 tree v4si_ftype_v8hi_v8hi
b4de2f7d
AH
13103 = build_function_type_list (V4SI_type_node,
13104 V8HI_type_node, V8HI_type_node, NULL_TREE);
916b60b7 13105 tree di_ftype_v8qi_v8qi
b4de2f7d
AH
13106 = build_function_type_list (long_long_unsigned_type_node,
13107 V8QI_type_node, V8QI_type_node, NULL_TREE);
916b60b7 13108 tree v2di_ftype_v16qi_v16qi
b4de2f7d
AH
13109 = build_function_type_list (V2DI_type_node,
13110 V16QI_type_node, V16QI_type_node, NULL_TREE);
916b60b7 13111 tree int_ftype_v16qi
b4de2f7d 13112 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
068f5dea
JH
13113 tree v16qi_ftype_pcchar
13114 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
f02e1358
JH
13115 tree void_ftype_pchar_v16qi
13116 = build_function_type_list (void_type_node,
13117 pchar_type_node, V16QI_type_node, NULL_TREE);
068f5dea
JH
13118 tree v4si_ftype_pcint
13119 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13120 tree void_ftype_pcint_v4si
f02e1358 13121 = build_function_type_list (void_type_node,
068f5dea 13122 pcint_type_node, V4SI_type_node, NULL_TREE);
f02e1358
JH
13123 tree v2di_ftype_v2di
13124 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
47f339cf 13125
bd793c65
BS
13126 /* Add all builtins that are more or less simple operations on two
13127 operands. */
ca7558fc 13128 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
13129 {
13130 /* Use one of the operands; the target can have a different mode for
13131 mask-generating compares. */
13132 enum machine_mode mode;
13133 tree type;
13134
13135 if (d->name == 0)
13136 continue;
13137 mode = insn_data[d->icode].operand[1].mode;
13138
bd793c65
BS
13139 switch (mode)
13140 {
fbe5eb6d
BS
13141 case V16QImode:
13142 type = v16qi_ftype_v16qi_v16qi;
13143 break;
13144 case V8HImode:
13145 type = v8hi_ftype_v8hi_v8hi;
13146 break;
13147 case V4SImode:
13148 type = v4si_ftype_v4si_v4si;
13149 break;
13150 case V2DImode:
13151 type = v2di_ftype_v2di_v2di;
13152 break;
13153 case V2DFmode:
13154 type = v2df_ftype_v2df_v2df;
13155 break;
13156 case TImode:
13157 type = ti_ftype_ti_ti;
13158 break;
bd793c65
BS
13159 case V4SFmode:
13160 type = v4sf_ftype_v4sf_v4sf;
13161 break;
13162 case V8QImode:
13163 type = v8qi_ftype_v8qi_v8qi;
13164 break;
13165 case V4HImode:
13166 type = v4hi_ftype_v4hi_v4hi;
13167 break;
13168 case V2SImode:
13169 type = v2si_ftype_v2si_v2si;
13170 break;
bd793c65
BS
13171 case DImode:
13172 type = di_ftype_di_di;
13173 break;
13174
13175 default:
13176 abort ();
13177 }
0f290768 13178
bd793c65
BS
13179 /* Override for comparisons. */
13180 if (d->icode == CODE_FOR_maskcmpv4sf3
13181 || d->icode == CODE_FOR_maskncmpv4sf3
13182 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13183 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13184 type = v4si_ftype_v4sf_v4sf;
13185
fbe5eb6d
BS
13186 if (d->icode == CODE_FOR_maskcmpv2df3
13187 || d->icode == CODE_FOR_maskncmpv2df3
13188 || d->icode == CODE_FOR_vmmaskcmpv2df3
13189 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13190 type = v2di_ftype_v2df_v2df;
13191
eeb06b1b 13192 def_builtin (d->mask, d->name, type, d->code);
bd793c65
BS
13193 }
13194
13195 /* Add the remaining MMX insns with somewhat more complicated types. */
eeb06b1b
BS
13196 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13197 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
eeb06b1b
BS
13198 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13199 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13200 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13201
13202 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13203 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13204 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13205
13206 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13207 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13208
13209 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13210 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
0f290768 13211
bd793c65 13212 /* comi/ucomi insns. */
ca7558fc 13213 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
fbe5eb6d
BS
13214 if (d->mask == MASK_SSE2)
13215 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13216 else
13217 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
bd793c65 13218
1255c85c
BS
13219 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13220 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13221 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
bd793c65 13222
36210500
SP
13223 def_builtin (MASK_SSE1, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13224 def_builtin (MASK_SSE1, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
fbe5eb6d
BS
13225 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13226 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13227 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13228 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13229 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13230 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
bd793c65 13231
fbe5eb6d
BS
13232 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13233 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
bd793c65 13234
fbe5eb6d 13235 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
bd793c65 13236
068f5dea
JH
13237 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13238 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13239 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
fbe5eb6d
BS
13240 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13241 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13242 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
bd793c65 13243
fbe5eb6d
BS
13244 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13245 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13246 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13247 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
bd793c65 13248
fbe5eb6d
BS
13249 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13250 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13251 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13252 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
bd793c65 13253
fbe5eb6d 13254 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
bd793c65 13255
916b60b7 13256 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
bd793c65 13257
fbe5eb6d
BS
13258 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13259 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13260 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13261 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13262 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13263 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
bd793c65 13264
fbe5eb6d 13265 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
bd793c65 13266
47f339cf
BS
13267 /* Original 3DNow! */
13268 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13269 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13270 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13271 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13272 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13273 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13274 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13275 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13276 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13277 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13278 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13279 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13280 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13281 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13282 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13283 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13284 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13285 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13286 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13287 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
47f339cf
BS
13288
13289 /* 3DNow! extension as used in the Athlon CPU. */
13290 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13291 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13292 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13293 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13294 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13295 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13296
fbe5eb6d
BS
13297 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13298
13299 /* SSE2 */
13300 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13301 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13302
13303 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13304 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
f02e1358 13305 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
fbe5eb6d 13306
068f5dea
JH
13307 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13308 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13309 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
fbe5eb6d
BS
13310 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13311 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13312 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13313
13314 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13315 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13316 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13317 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13318
13319 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
916b60b7 13320 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
fbe5eb6d
BS
13321 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13322 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
916b60b7 13323 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
fbe5eb6d
BS
13324
13325 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13326 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13327 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
916b60b7 13328 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
fbe5eb6d
BS
13329
13330 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13331 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13332
13333 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13334
13335 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
916b60b7 13336 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
fbe5eb6d
BS
13337
13338 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13339 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13340 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13341 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13342 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13343
13344 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13345
13346 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13347 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13348
13349 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13350 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13351 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13352
13353 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13354 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13355 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13356
13357 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13358 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13359 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
068f5dea
JH
13360 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13361 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
fbe5eb6d
BS
13362 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13363 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13364
068f5dea 13365 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
fbe5eb6d
BS
13366 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13367 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
916b60b7 13368
068f5dea
JH
13369 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13370 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13371 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
f02e1358
JH
13372 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13373 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
068f5dea 13374 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
f02e1358
JH
13375 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13376
13377 def_builtin (MASK_SSE1, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13378
916b60b7
BS
13379 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13380 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13381 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13382
13383 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13384 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13385 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13386
13387 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13388 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13389
ab3146fd 13390 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
916b60b7
BS
13391 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13392 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13393 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13394
ab3146fd 13395 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
916b60b7
BS
13396 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13397 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13398 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13399
13400 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13401 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13402
13403 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
bd793c65
BS
13404}
13405
13406/* Errors in the source file can cause expand_expr to return const0_rtx
13407 where we expect a vector. To avoid crashing, use one of the vector
13408 clear instructions. */
13409static rtx
13410safe_vector_operand (x, mode)
13411 rtx x;
13412 enum machine_mode mode;
13413{
13414 if (x != const0_rtx)
13415 return x;
13416 x = gen_reg_rtx (mode);
13417
47f339cf 13418 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
bd793c65
BS
13419 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13420 : gen_rtx_SUBREG (DImode, x, 0)));
13421 else
e37af218 13422 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
4977bab6
ZW
13423 : gen_rtx_SUBREG (V4SFmode, x, 0),
13424 CONST0_RTX (V4SFmode)));
bd793c65
BS
13425 return x;
13426}
13427
13428/* Subroutine of ix86_expand_builtin to take care of binop insns. */
13429
13430static rtx
13431ix86_expand_binop_builtin (icode, arglist, target)
13432 enum insn_code icode;
13433 tree arglist;
13434 rtx target;
13435{
13436 rtx pat;
13437 tree arg0 = TREE_VALUE (arglist);
13438 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13439 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13440 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13441 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13442 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13443 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13444
13445 if (VECTOR_MODE_P (mode0))
13446 op0 = safe_vector_operand (op0, mode0);
13447 if (VECTOR_MODE_P (mode1))
13448 op1 = safe_vector_operand (op1, mode1);
13449
13450 if (! target
13451 || GET_MODE (target) != tmode
13452 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13453 target = gen_reg_rtx (tmode);
13454
13455 /* In case the insn wants input operands in modes different from
13456 the result, abort. */
13457 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
13458 abort ();
13459
13460 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13461 op0 = copy_to_mode_reg (mode0, op0);
13462 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13463 op1 = copy_to_mode_reg (mode1, op1);
13464
59bef189
RH
13465 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13466 yet one of the two must not be a memory. This is normally enforced
13467 by expanders, but we didn't bother to create one here. */
13468 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13469 op0 = copy_to_mode_reg (mode0, op0);
13470
bd793c65
BS
13471 pat = GEN_FCN (icode) (target, op0, op1);
13472 if (! pat)
13473 return 0;
13474 emit_insn (pat);
13475 return target;
13476}
13477
13478/* Subroutine of ix86_expand_builtin to take care of stores. */
13479
13480static rtx
e37af218 13481ix86_expand_store_builtin (icode, arglist)
bd793c65
BS
13482 enum insn_code icode;
13483 tree arglist;
bd793c65
BS
13484{
13485 rtx pat;
13486 tree arg0 = TREE_VALUE (arglist);
13487 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13488 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13489 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13490 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13491 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13492
13493 if (VECTOR_MODE_P (mode1))
13494 op1 = safe_vector_operand (op1, mode1);
13495
13496 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
59bef189
RH
13497
13498 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13499 op1 = copy_to_mode_reg (mode1, op1);
13500
bd793c65
BS
13501 pat = GEN_FCN (icode) (op0, op1);
13502 if (pat)
13503 emit_insn (pat);
13504 return 0;
13505}
13506
13507/* Subroutine of ix86_expand_builtin to take care of unop insns. */
13508
13509static rtx
13510ix86_expand_unop_builtin (icode, arglist, target, do_load)
13511 enum insn_code icode;
13512 tree arglist;
13513 rtx target;
13514 int do_load;
13515{
13516 rtx pat;
13517 tree arg0 = TREE_VALUE (arglist);
13518 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13519 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13520 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13521
13522 if (! target
13523 || GET_MODE (target) != tmode
13524 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13525 target = gen_reg_rtx (tmode);
13526 if (do_load)
13527 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13528 else
13529 {
13530 if (VECTOR_MODE_P (mode0))
13531 op0 = safe_vector_operand (op0, mode0);
13532
13533 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13534 op0 = copy_to_mode_reg (mode0, op0);
13535 }
13536
13537 pat = GEN_FCN (icode) (target, op0);
13538 if (! pat)
13539 return 0;
13540 emit_insn (pat);
13541 return target;
13542}
13543
13544/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13545 sqrtss, rsqrtss, rcpss. */
13546
13547static rtx
13548ix86_expand_unop1_builtin (icode, arglist, target)
13549 enum insn_code icode;
13550 tree arglist;
13551 rtx target;
13552{
13553 rtx pat;
13554 tree arg0 = TREE_VALUE (arglist);
59bef189 13555 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
bd793c65
BS
13556 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13557 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13558
13559 if (! target
13560 || GET_MODE (target) != tmode
13561 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13562 target = gen_reg_rtx (tmode);
13563
13564 if (VECTOR_MODE_P (mode0))
13565 op0 = safe_vector_operand (op0, mode0);
13566
13567 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13568 op0 = copy_to_mode_reg (mode0, op0);
fce5a9f2 13569
59bef189
RH
13570 op1 = op0;
13571 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13572 op1 = copy_to_mode_reg (mode0, op1);
fce5a9f2 13573
59bef189 13574 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
13575 if (! pat)
13576 return 0;
13577 emit_insn (pat);
13578 return target;
13579}
13580
13581/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13582
13583static rtx
13584ix86_expand_sse_compare (d, arglist, target)
8b60264b 13585 const struct builtin_description *d;
bd793c65
BS
13586 tree arglist;
13587 rtx target;
13588{
13589 rtx pat;
13590 tree arg0 = TREE_VALUE (arglist);
13591 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13592 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13593 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13594 rtx op2;
13595 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13596 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13597 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13598 enum rtx_code comparison = d->comparison;
13599
13600 if (VECTOR_MODE_P (mode0))
13601 op0 = safe_vector_operand (op0, mode0);
13602 if (VECTOR_MODE_P (mode1))
13603 op1 = safe_vector_operand (op1, mode1);
13604
13605 /* Swap operands if we have a comparison that isn't available in
13606 hardware. */
13607 if (d->flag)
13608 {
21e1b5f1
BS
13609 rtx tmp = gen_reg_rtx (mode1);
13610 emit_move_insn (tmp, op1);
bd793c65 13611 op1 = op0;
21e1b5f1 13612 op0 = tmp;
bd793c65 13613 }
21e1b5f1
BS
13614
13615 if (! target
13616 || GET_MODE (target) != tmode
13617 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
bd793c65
BS
13618 target = gen_reg_rtx (tmode);
13619
13620 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13621 op0 = copy_to_mode_reg (mode0, op0);
13622 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13623 op1 = copy_to_mode_reg (mode1, op1);
13624
13625 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13626 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13627 if (! pat)
13628 return 0;
13629 emit_insn (pat);
13630 return target;
13631}
13632
13633/* Subroutine of ix86_expand_builtin to take care of comi insns. */
13634
13635static rtx
13636ix86_expand_sse_comi (d, arglist, target)
8b60264b 13637 const struct builtin_description *d;
bd793c65
BS
13638 tree arglist;
13639 rtx target;
13640{
13641 rtx pat;
13642 tree arg0 = TREE_VALUE (arglist);
13643 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13644 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13645 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13646 rtx op2;
13647 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13648 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13649 enum rtx_code comparison = d->comparison;
13650
13651 if (VECTOR_MODE_P (mode0))
13652 op0 = safe_vector_operand (op0, mode0);
13653 if (VECTOR_MODE_P (mode1))
13654 op1 = safe_vector_operand (op1, mode1);
13655
13656 /* Swap operands if we have a comparison that isn't available in
13657 hardware. */
13658 if (d->flag)
13659 {
13660 rtx tmp = op1;
13661 op1 = op0;
13662 op0 = tmp;
bd793c65
BS
13663 }
13664
13665 target = gen_reg_rtx (SImode);
13666 emit_move_insn (target, const0_rtx);
13667 target = gen_rtx_SUBREG (QImode, target, 0);
13668
13669 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13670 op0 = copy_to_mode_reg (mode0, op0);
13671 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13672 op1 = copy_to_mode_reg (mode1, op1);
13673
13674 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
1194ca05 13675 pat = GEN_FCN (d->icode) (op0, op1);
bd793c65
BS
13676 if (! pat)
13677 return 0;
13678 emit_insn (pat);
29628f27
BS
13679 emit_insn (gen_rtx_SET (VOIDmode,
13680 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13681 gen_rtx_fmt_ee (comparison, QImode,
1194ca05 13682 SET_DEST (pat),
29628f27 13683 const0_rtx)));
bd793c65 13684
6f1a6c5b 13685 return SUBREG_REG (target);
bd793c65
BS
13686}
13687
13688/* Expand an expression EXP that calls a built-in function,
13689 with result going to TARGET if that's convenient
13690 (and in mode MODE if that's convenient).
13691 SUBTARGET may be used as the target for computing one of EXP's operands.
13692 IGNORE is nonzero if the value is to be ignored. */
13693
13694rtx
13695ix86_expand_builtin (exp, target, subtarget, mode, ignore)
13696 tree exp;
13697 rtx target;
13698 rtx subtarget ATTRIBUTE_UNUSED;
13699 enum machine_mode mode ATTRIBUTE_UNUSED;
13700 int ignore ATTRIBUTE_UNUSED;
13701{
8b60264b 13702 const struct builtin_description *d;
77ebd435 13703 size_t i;
bd793c65
BS
13704 enum insn_code icode;
13705 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13706 tree arglist = TREE_OPERAND (exp, 1);
e37af218 13707 tree arg0, arg1, arg2;
bd793c65
BS
13708 rtx op0, op1, op2, pat;
13709 enum machine_mode tmode, mode0, mode1, mode2;
8752c357 13710 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
bd793c65
BS
13711
13712 switch (fcode)
13713 {
13714 case IX86_BUILTIN_EMMS:
13715 emit_insn (gen_emms ());
13716 return 0;
13717
13718 case IX86_BUILTIN_SFENCE:
13719 emit_insn (gen_sfence ());
13720 return 0;
13721
bd793c65 13722 case IX86_BUILTIN_PEXTRW:
fbe5eb6d
BS
13723 case IX86_BUILTIN_PEXTRW128:
13724 icode = (fcode == IX86_BUILTIN_PEXTRW
13725 ? CODE_FOR_mmx_pextrw
13726 : CODE_FOR_sse2_pextrw);
bd793c65
BS
13727 arg0 = TREE_VALUE (arglist);
13728 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13729 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13730 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13731 tmode = insn_data[icode].operand[0].mode;
13732 mode0 = insn_data[icode].operand[1].mode;
13733 mode1 = insn_data[icode].operand[2].mode;
13734
13735 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13736 op0 = copy_to_mode_reg (mode0, op0);
13737 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13738 {
13739 /* @@@ better error message */
13740 error ("selector must be an immediate");
6f1a6c5b 13741 return gen_reg_rtx (tmode);
bd793c65
BS
13742 }
13743 if (target == 0
13744 || GET_MODE (target) != tmode
13745 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13746 target = gen_reg_rtx (tmode);
13747 pat = GEN_FCN (icode) (target, op0, op1);
13748 if (! pat)
13749 return 0;
13750 emit_insn (pat);
13751 return target;
13752
13753 case IX86_BUILTIN_PINSRW:
fbe5eb6d
BS
13754 case IX86_BUILTIN_PINSRW128:
13755 icode = (fcode == IX86_BUILTIN_PINSRW
13756 ? CODE_FOR_mmx_pinsrw
13757 : CODE_FOR_sse2_pinsrw);
bd793c65
BS
13758 arg0 = TREE_VALUE (arglist);
13759 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13760 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13761 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13762 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13763 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13764 tmode = insn_data[icode].operand[0].mode;
13765 mode0 = insn_data[icode].operand[1].mode;
13766 mode1 = insn_data[icode].operand[2].mode;
13767 mode2 = insn_data[icode].operand[3].mode;
13768
13769 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13770 op0 = copy_to_mode_reg (mode0, op0);
13771 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13772 op1 = copy_to_mode_reg (mode1, op1);
13773 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13774 {
13775 /* @@@ better error message */
13776 error ("selector must be an immediate");
13777 return const0_rtx;
13778 }
13779 if (target == 0
13780 || GET_MODE (target) != tmode
13781 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13782 target = gen_reg_rtx (tmode);
13783 pat = GEN_FCN (icode) (target, op0, op1, op2);
13784 if (! pat)
13785 return 0;
13786 emit_insn (pat);
13787 return target;
13788
13789 case IX86_BUILTIN_MASKMOVQ:
077084dd 13790 case IX86_BUILTIN_MASKMOVDQU:
fbe5eb6d
BS
13791 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13792 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
f8ca7923
JH
13793 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
13794 : CODE_FOR_sse2_maskmovdqu));
bd793c65
BS
13795 /* Note the arg order is different from the operand order. */
13796 arg1 = TREE_VALUE (arglist);
13797 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13798 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13799 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13800 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13801 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13802 mode0 = insn_data[icode].operand[0].mode;
13803 mode1 = insn_data[icode].operand[1].mode;
13804 mode2 = insn_data[icode].operand[2].mode;
13805
5c464583 13806 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
bd793c65
BS
13807 op0 = copy_to_mode_reg (mode0, op0);
13808 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13809 op1 = copy_to_mode_reg (mode1, op1);
13810 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13811 op2 = copy_to_mode_reg (mode2, op2);
13812 pat = GEN_FCN (icode) (op0, op1, op2);
13813 if (! pat)
13814 return 0;
13815 emit_insn (pat);
13816 return 0;
13817
13818 case IX86_BUILTIN_SQRTSS:
13819 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13820 case IX86_BUILTIN_RSQRTSS:
13821 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13822 case IX86_BUILTIN_RCPSS:
13823 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13824
13825 case IX86_BUILTIN_LOADAPS:
13826 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13827
13828 case IX86_BUILTIN_LOADUPS:
13829 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13830
13831 case IX86_BUILTIN_STOREAPS:
e37af218 13832 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
f02e1358 13833
bd793c65 13834 case IX86_BUILTIN_STOREUPS:
e37af218 13835 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
bd793c65
BS
13836
13837 case IX86_BUILTIN_LOADSS:
13838 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13839
13840 case IX86_BUILTIN_STORESS:
e37af218 13841 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
bd793c65 13842
0f290768 13843 case IX86_BUILTIN_LOADHPS:
bd793c65 13844 case IX86_BUILTIN_LOADLPS:
fbe5eb6d
BS
13845 case IX86_BUILTIN_LOADHPD:
13846 case IX86_BUILTIN_LOADLPD:
13847 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13848 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13849 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13850 : CODE_FOR_sse2_movlpd);
bd793c65
BS
13851 arg0 = TREE_VALUE (arglist);
13852 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13853 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13854 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13855 tmode = insn_data[icode].operand[0].mode;
13856 mode0 = insn_data[icode].operand[1].mode;
13857 mode1 = insn_data[icode].operand[2].mode;
13858
13859 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13860 op0 = copy_to_mode_reg (mode0, op0);
13861 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13862 if (target == 0
13863 || GET_MODE (target) != tmode
13864 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13865 target = gen_reg_rtx (tmode);
13866 pat = GEN_FCN (icode) (target, op0, op1);
13867 if (! pat)
13868 return 0;
13869 emit_insn (pat);
13870 return target;
0f290768 13871
bd793c65
BS
13872 case IX86_BUILTIN_STOREHPS:
13873 case IX86_BUILTIN_STORELPS:
fbe5eb6d
BS
13874 case IX86_BUILTIN_STOREHPD:
13875 case IX86_BUILTIN_STORELPD:
13876 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13877 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13878 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13879 : CODE_FOR_sse2_movlpd);
bd793c65
BS
13880 arg0 = TREE_VALUE (arglist);
13881 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13882 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13883 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13884 mode0 = insn_data[icode].operand[1].mode;
13885 mode1 = insn_data[icode].operand[2].mode;
13886
13887 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13888 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13889 op1 = copy_to_mode_reg (mode1, op1);
13890
13891 pat = GEN_FCN (icode) (op0, op0, op1);
13892 if (! pat)
13893 return 0;
13894 emit_insn (pat);
13895 return 0;
13896
13897 case IX86_BUILTIN_MOVNTPS:
e37af218 13898 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
bd793c65 13899 case IX86_BUILTIN_MOVNTQ:
e37af218 13900 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
bd793c65
BS
13901
13902 case IX86_BUILTIN_LDMXCSR:
13903 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13904 target = assign_386_stack_local (SImode, 0);
13905 emit_move_insn (target, op0);
13906 emit_insn (gen_ldmxcsr (target));
13907 return 0;
13908
13909 case IX86_BUILTIN_STMXCSR:
13910 target = assign_386_stack_local (SImode, 0);
13911 emit_insn (gen_stmxcsr (target));
13912 return copy_to_mode_reg (SImode, target);
13913
bd793c65 13914 case IX86_BUILTIN_SHUFPS:
fbe5eb6d
BS
13915 case IX86_BUILTIN_SHUFPD:
13916 icode = (fcode == IX86_BUILTIN_SHUFPS
13917 ? CODE_FOR_sse_shufps
13918 : CODE_FOR_sse2_shufpd);
bd793c65
BS
13919 arg0 = TREE_VALUE (arglist);
13920 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13921 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13922 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13923 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13924 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13925 tmode = insn_data[icode].operand[0].mode;
13926 mode0 = insn_data[icode].operand[1].mode;
13927 mode1 = insn_data[icode].operand[2].mode;
13928 mode2 = insn_data[icode].operand[3].mode;
13929
13930 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13931 op0 = copy_to_mode_reg (mode0, op0);
13932 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13933 op1 = copy_to_mode_reg (mode1, op1);
13934 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13935 {
13936 /* @@@ better error message */
13937 error ("mask must be an immediate");
6f1a6c5b 13938 return gen_reg_rtx (tmode);
bd793c65
BS
13939 }
13940 if (target == 0
13941 || GET_MODE (target) != tmode
13942 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13943 target = gen_reg_rtx (tmode);
13944 pat = GEN_FCN (icode) (target, op0, op1, op2);
13945 if (! pat)
13946 return 0;
13947 emit_insn (pat);
13948 return target;
13949
13950 case IX86_BUILTIN_PSHUFW:
fbe5eb6d
BS
13951 case IX86_BUILTIN_PSHUFD:
13952 case IX86_BUILTIN_PSHUFHW:
13953 case IX86_BUILTIN_PSHUFLW:
13954 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13955 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13956 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13957 : CODE_FOR_mmx_pshufw);
bd793c65
BS
13958 arg0 = TREE_VALUE (arglist);
13959 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13960 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13961 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13962 tmode = insn_data[icode].operand[0].mode;
29628f27
BS
13963 mode1 = insn_data[icode].operand[1].mode;
13964 mode2 = insn_data[icode].operand[2].mode;
bd793c65 13965
29628f27
BS
13966 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13967 op0 = copy_to_mode_reg (mode1, op0);
13968 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
bd793c65
BS
13969 {
13970 /* @@@ better error message */
13971 error ("mask must be an immediate");
13972 return const0_rtx;
13973 }
13974 if (target == 0
13975 || GET_MODE (target) != tmode
13976 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13977 target = gen_reg_rtx (tmode);
29628f27 13978 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
13979 if (! pat)
13980 return 0;
13981 emit_insn (pat);
13982 return target;
13983
ab3146fd
ZD
13984 case IX86_BUILTIN_PSLLDQI128:
13985 case IX86_BUILTIN_PSRLDQI128:
13986 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
13987 : CODE_FOR_sse2_lshrti3);
13988 arg0 = TREE_VALUE (arglist);
13989 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13990 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13991 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13992 tmode = insn_data[icode].operand[0].mode;
13993 mode1 = insn_data[icode].operand[1].mode;
13994 mode2 = insn_data[icode].operand[2].mode;
13995
13996 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13997 {
13998 op0 = copy_to_reg (op0);
13999 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14000 }
14001 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14002 {
14003 error ("shift must be an immediate");
14004 return const0_rtx;
14005 }
14006 target = gen_reg_rtx (V2DImode);
14007 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14008 if (! pat)
14009 return 0;
14010 emit_insn (pat);
14011 return target;
14012
47f339cf
BS
14013 case IX86_BUILTIN_FEMMS:
14014 emit_insn (gen_femms ());
14015 return NULL_RTX;
14016
14017 case IX86_BUILTIN_PAVGUSB:
14018 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14019
14020 case IX86_BUILTIN_PF2ID:
14021 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14022
14023 case IX86_BUILTIN_PFACC:
14024 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14025
14026 case IX86_BUILTIN_PFADD:
14027 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14028
14029 case IX86_BUILTIN_PFCMPEQ:
14030 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14031
14032 case IX86_BUILTIN_PFCMPGE:
14033 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14034
14035 case IX86_BUILTIN_PFCMPGT:
14036 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14037
14038 case IX86_BUILTIN_PFMAX:
14039 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14040
14041 case IX86_BUILTIN_PFMIN:
14042 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14043
14044 case IX86_BUILTIN_PFMUL:
14045 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14046
14047 case IX86_BUILTIN_PFRCP:
14048 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14049
14050 case IX86_BUILTIN_PFRCPIT1:
14051 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14052
14053 case IX86_BUILTIN_PFRCPIT2:
14054 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14055
14056 case IX86_BUILTIN_PFRSQIT1:
14057 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14058
14059 case IX86_BUILTIN_PFRSQRT:
14060 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14061
14062 case IX86_BUILTIN_PFSUB:
14063 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14064
14065 case IX86_BUILTIN_PFSUBR:
14066 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14067
14068 case IX86_BUILTIN_PI2FD:
14069 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14070
14071 case IX86_BUILTIN_PMULHRW:
14072 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14073
47f339cf
BS
14074 case IX86_BUILTIN_PF2IW:
14075 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14076
14077 case IX86_BUILTIN_PFNACC:
14078 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14079
14080 case IX86_BUILTIN_PFPNACC:
14081 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14082
14083 case IX86_BUILTIN_PI2FW:
14084 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14085
14086 case IX86_BUILTIN_PSWAPDSI:
14087 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14088
14089 case IX86_BUILTIN_PSWAPDSF:
14090 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14091
e37af218
RH
14092 case IX86_BUILTIN_SSE_ZERO:
14093 target = gen_reg_rtx (V4SFmode);
4977bab6 14094 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
bd793c65
BS
14095 return target;
14096
bd793c65
BS
14097 case IX86_BUILTIN_MMX_ZERO:
14098 target = gen_reg_rtx (DImode);
14099 emit_insn (gen_mmx_clrdi (target));
14100 return target;
14101
f02e1358
JH
14102 case IX86_BUILTIN_CLRTI:
14103 target = gen_reg_rtx (V2DImode);
14104 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14105 return target;
14106
14107
fbe5eb6d
BS
14108 case IX86_BUILTIN_SQRTSD:
14109 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14110 case IX86_BUILTIN_LOADAPD:
14111 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14112 case IX86_BUILTIN_LOADUPD:
14113 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14114
14115 case IX86_BUILTIN_STOREAPD:
14116 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14117 case IX86_BUILTIN_STOREUPD:
14118 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14119
14120 case IX86_BUILTIN_LOADSD:
14121 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14122
14123 case IX86_BUILTIN_STORESD:
14124 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14125
14126 case IX86_BUILTIN_SETPD1:
14127 target = assign_386_stack_local (DFmode, 0);
14128 arg0 = TREE_VALUE (arglist);
14129 emit_move_insn (adjust_address (target, DFmode, 0),
14130 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14131 op0 = gen_reg_rtx (V2DFmode);
14132 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14133 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
14134 return op0;
14135
14136 case IX86_BUILTIN_SETPD:
14137 target = assign_386_stack_local (V2DFmode, 0);
14138 arg0 = TREE_VALUE (arglist);
14139 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14140 emit_move_insn (adjust_address (target, DFmode, 0),
14141 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14142 emit_move_insn (adjust_address (target, DFmode, 8),
14143 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14144 op0 = gen_reg_rtx (V2DFmode);
14145 emit_insn (gen_sse2_movapd (op0, target));
14146 return op0;
14147
14148 case IX86_BUILTIN_LOADRPD:
14149 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14150 gen_reg_rtx (V2DFmode), 1);
14151 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
14152 return target;
14153
14154 case IX86_BUILTIN_LOADPD1:
14155 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14156 gen_reg_rtx (V2DFmode), 1);
14157 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14158 return target;
14159
14160 case IX86_BUILTIN_STOREPD1:
14161 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14162 case IX86_BUILTIN_STORERPD:
14163 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14164
48126a97
JH
14165 case IX86_BUILTIN_CLRPD:
14166 target = gen_reg_rtx (V2DFmode);
14167 emit_insn (gen_sse_clrv2df (target));
14168 return target;
14169
fbe5eb6d
BS
14170 case IX86_BUILTIN_MFENCE:
14171 emit_insn (gen_sse2_mfence ());
14172 return 0;
14173 case IX86_BUILTIN_LFENCE:
14174 emit_insn (gen_sse2_lfence ());
14175 return 0;
14176
14177 case IX86_BUILTIN_CLFLUSH:
14178 arg0 = TREE_VALUE (arglist);
14179 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14180 icode = CODE_FOR_sse2_clflush;
1194ca05
JH
14181 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14182 op0 = copy_to_mode_reg (Pmode, op0);
fbe5eb6d
BS
14183
14184 emit_insn (gen_sse2_clflush (op0));
14185 return 0;
14186
14187 case IX86_BUILTIN_MOVNTPD:
14188 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14189 case IX86_BUILTIN_MOVNTDQ:
916b60b7 14190 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
fbe5eb6d
BS
14191 case IX86_BUILTIN_MOVNTI:
14192 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14193
f02e1358
JH
14194 case IX86_BUILTIN_LOADDQA:
14195 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14196 case IX86_BUILTIN_LOADDQU:
14197 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14198 case IX86_BUILTIN_LOADD:
14199 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14200
14201 case IX86_BUILTIN_STOREDQA:
14202 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14203 case IX86_BUILTIN_STOREDQU:
14204 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14205 case IX86_BUILTIN_STORED:
14206 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14207
bd793c65
BS
14208 default:
14209 break;
14210 }
14211
ca7558fc 14212 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
14213 if (d->code == fcode)
14214 {
14215 /* Compares are treated specially. */
14216 if (d->icode == CODE_FOR_maskcmpv4sf3
14217 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14218 || d->icode == CODE_FOR_maskncmpv4sf3
fbe5eb6d
BS
14219 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14220 || d->icode == CODE_FOR_maskcmpv2df3
14221 || d->icode == CODE_FOR_vmmaskcmpv2df3
14222 || d->icode == CODE_FOR_maskncmpv2df3
14223 || d->icode == CODE_FOR_vmmaskncmpv2df3)
bd793c65
BS
14224 return ix86_expand_sse_compare (d, arglist, target);
14225
14226 return ix86_expand_binop_builtin (d->icode, arglist, target);
14227 }
14228
ca7558fc 14229 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
bd793c65
BS
14230 if (d->code == fcode)
14231 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
0f290768 14232
ca7558fc 14233 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
bd793c65
BS
14234 if (d->code == fcode)
14235 return ix86_expand_sse_comi (d, arglist, target);
0f290768 14236
bd793c65
BS
14237 /* @@@ Should really do something sensible here. */
14238 return 0;
bd793c65 14239}
4211a8fb
JH
14240
14241/* Store OPERAND to the memory after reload is completed. This means
f710504c 14242 that we can't easily use assign_stack_local. */
4211a8fb
JH
14243rtx
14244ix86_force_to_memory (mode, operand)
14245 enum machine_mode mode;
14246 rtx operand;
14247{
898d374d 14248 rtx result;
4211a8fb
JH
14249 if (!reload_completed)
14250 abort ();
898d374d
JH
14251 if (TARGET_64BIT && TARGET_RED_ZONE)
14252 {
14253 result = gen_rtx_MEM (mode,
14254 gen_rtx_PLUS (Pmode,
14255 stack_pointer_rtx,
14256 GEN_INT (-RED_ZONE_SIZE)));
14257 emit_move_insn (result, operand);
14258 }
14259 else if (TARGET_64BIT && !TARGET_RED_ZONE)
4211a8fb 14260 {
898d374d 14261 switch (mode)
4211a8fb 14262 {
898d374d
JH
14263 case HImode:
14264 case SImode:
14265 operand = gen_lowpart (DImode, operand);
14266 /* FALLTHRU */
14267 case DImode:
4211a8fb 14268 emit_insn (
898d374d
JH
14269 gen_rtx_SET (VOIDmode,
14270 gen_rtx_MEM (DImode,
14271 gen_rtx_PRE_DEC (DImode,
14272 stack_pointer_rtx)),
14273 operand));
14274 break;
14275 default:
14276 abort ();
14277 }
14278 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14279 }
14280 else
14281 {
14282 switch (mode)
14283 {
14284 case DImode:
14285 {
14286 rtx operands[2];
14287 split_di (&operand, 1, operands, operands + 1);
14288 emit_insn (
14289 gen_rtx_SET (VOIDmode,
14290 gen_rtx_MEM (SImode,
14291 gen_rtx_PRE_DEC (Pmode,
14292 stack_pointer_rtx)),
14293 operands[1]));
14294 emit_insn (
14295 gen_rtx_SET (VOIDmode,
14296 gen_rtx_MEM (SImode,
14297 gen_rtx_PRE_DEC (Pmode,
14298 stack_pointer_rtx)),
14299 operands[0]));
14300 }
14301 break;
14302 case HImode:
14303 /* It is better to store HImodes as SImodes. */
14304 if (!TARGET_PARTIAL_REG_STALL)
14305 operand = gen_lowpart (SImode, operand);
14306 /* FALLTHRU */
14307 case SImode:
4211a8fb 14308 emit_insn (
898d374d
JH
14309 gen_rtx_SET (VOIDmode,
14310 gen_rtx_MEM (GET_MODE (operand),
14311 gen_rtx_PRE_DEC (SImode,
14312 stack_pointer_rtx)),
14313 operand));
14314 break;
14315 default:
14316 abort ();
4211a8fb 14317 }
898d374d 14318 result = gen_rtx_MEM (mode, stack_pointer_rtx);
4211a8fb 14319 }
898d374d 14320 return result;
4211a8fb
JH
14321}
14322
14323/* Free operand from the memory. */
14324void
14325ix86_free_from_memory (mode)
14326 enum machine_mode mode;
14327{
898d374d
JH
14328 if (!TARGET_64BIT || !TARGET_RED_ZONE)
14329 {
14330 int size;
14331
14332 if (mode == DImode || TARGET_64BIT)
14333 size = 8;
14334 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14335 size = 2;
14336 else
14337 size = 4;
14338 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14339 to pop or add instruction if registers are available. */
14340 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14341 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14342 GEN_INT (size))));
14343 }
4211a8fb 14344}
a946dd00 14345
f84aa48a
JH
14346/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14347 QImode must go into class Q_REGS.
14348 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
892a2d68 14349 movdf to do mem-to-mem moves through integer regs. */
f84aa48a
JH
14350enum reg_class
14351ix86_preferred_reload_class (x, class)
14352 rtx x;
14353 enum reg_class class;
14354{
1877be45
JH
14355 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14356 return NO_REGS;
f84aa48a
JH
14357 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14358 {
14359 /* SSE can't load any constant directly yet. */
14360 if (SSE_CLASS_P (class))
14361 return NO_REGS;
14362 /* Floats can load 0 and 1. */
14363 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14364 {
14365 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14366 if (MAYBE_SSE_CLASS_P (class))
14367 return (reg_class_subset_p (class, GENERAL_REGS)
14368 ? GENERAL_REGS : FLOAT_REGS);
14369 else
14370 return class;
14371 }
14372 /* General regs can load everything. */
14373 if (reg_class_subset_p (class, GENERAL_REGS))
14374 return GENERAL_REGS;
14375 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14376 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14377 return NO_REGS;
14378 }
14379 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14380 return NO_REGS;
14381 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14382 return Q_REGS;
14383 return class;
14384}
14385
14386/* If we are copying between general and FP registers, we need a memory
14387 location. The same is true for SSE and MMX registers.
14388
14389 The macro can't work reliably when one of the CLASSES is class containing
14390 registers from multiple units (SSE, MMX, integer). We avoid this by never
14391 combining those units in single alternative in the machine description.
14392 Ensure that this constraint holds to avoid unexpected surprises.
14393
14394 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14395 enforce these sanity checks. */
14396int
14397ix86_secondary_memory_needed (class1, class2, mode, strict)
14398 enum reg_class class1, class2;
14399 enum machine_mode mode;
14400 int strict;
14401{
14402 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14403 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14404 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14405 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14406 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14407 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14408 {
14409 if (strict)
14410 abort ();
14411 else
14412 return 1;
14413 }
14414 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14415 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14416 && (mode) != SImode)
14417 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14418 && (mode) != SImode));
14419}
14420/* Return the cost of moving data from a register in class CLASS1 to
a4f31c00 14421 one in class CLASS2.
f84aa48a
JH
14422
14423 It is not required that the cost always equal 2 when FROM is the same as TO;
14424 on some machines it is expensive to move between registers if they are not
14425 general registers. */
14426int
14427ix86_register_move_cost (mode, class1, class2)
14428 enum machine_mode mode;
14429 enum reg_class class1, class2;
14430{
14431 /* In case we require secondary memory, compute cost of the store followed
d631b80a
RH
14432 by load. In order to avoid bad register allocation choices, we need
14433 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14434
f84aa48a
JH
14435 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14436 {
d631b80a
RH
14437 int cost = 1;
14438
14439 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14440 MEMORY_MOVE_COST (mode, class1, 1));
14441 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14442 MEMORY_MOVE_COST (mode, class2, 1));
14443
14444 /* In case of copying from general_purpose_register we may emit multiple
14445 stores followed by single load causing memory size mismatch stall.
d1f87653 14446 Count this as arbitrarily high cost of 20. */
62415523 14447 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
d631b80a
RH
14448 cost += 20;
14449
14450 /* In the case of FP/MMX moves, the registers actually overlap, and we
14451 have to switch modes in order to treat them differently. */
14452 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14453 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14454 cost += 20;
14455
14456 return cost;
f84aa48a 14457 }
d631b80a 14458
92d0fb09 14459 /* Moves between SSE/MMX and integer unit are expensive. */
62415523
JH
14460 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14461 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
fa79946e
JH
14462 return ix86_cost->mmxsse_to_integer;
14463 if (MAYBE_FLOAT_CLASS_P (class1))
14464 return ix86_cost->fp_move;
14465 if (MAYBE_SSE_CLASS_P (class1))
14466 return ix86_cost->sse_move;
14467 if (MAYBE_MMX_CLASS_P (class1))
14468 return ix86_cost->mmx_move;
f84aa48a
JH
14469 return 2;
14470}
14471
a946dd00
JH
14472/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14473int
14474ix86_hard_regno_mode_ok (regno, mode)
14475 int regno;
14476 enum machine_mode mode;
14477{
14478 /* Flags and only flags can only hold CCmode values. */
14479 if (CC_REGNO_P (regno))
14480 return GET_MODE_CLASS (mode) == MODE_CC;
14481 if (GET_MODE_CLASS (mode) == MODE_CC
14482 || GET_MODE_CLASS (mode) == MODE_RANDOM
14483 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14484 return 0;
14485 if (FP_REGNO_P (regno))
14486 return VALID_FP_MODE_P (mode);
14487 if (SSE_REGNO_P (regno))
14488 return VALID_SSE_REG_MODE (mode);
14489 if (MMX_REGNO_P (regno))
47f339cf 14490 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
a946dd00
JH
14491 /* We handle both integer and floats in the general purpose registers.
14492 In future we should be able to handle vector modes as well. */
14493 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14494 return 0;
14495 /* Take care for QImode values - they can be in non-QI regs, but then
14496 they do cause partial register stalls. */
d2836273 14497 if (regno < 4 || mode != QImode || TARGET_64BIT)
a946dd00
JH
14498 return 1;
14499 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14500}
fa79946e
JH
14501
14502/* Return the cost of moving data of mode M between a
14503 register and memory. A value of 2 is the default; this cost is
14504 relative to those in `REGISTER_MOVE_COST'.
14505
14506 If moving between registers and memory is more expensive than
14507 between two registers, you should define this macro to express the
a4f31c00
AJ
14508 relative cost.
14509
fa79946e
JH
14510 Model also increased moving costs of QImode registers in non
14511 Q_REGS classes.
14512 */
14513int
14514ix86_memory_move_cost (mode, class, in)
14515 enum machine_mode mode;
14516 enum reg_class class;
14517 int in;
14518{
14519 if (FLOAT_CLASS_P (class))
14520 {
14521 int index;
14522 switch (mode)
14523 {
14524 case SFmode:
14525 index = 0;
14526 break;
14527 case DFmode:
14528 index = 1;
14529 break;
14530 case XFmode:
14531 case TFmode:
14532 index = 2;
14533 break;
14534 default:
14535 return 100;
14536 }
14537 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14538 }
14539 if (SSE_CLASS_P (class))
14540 {
14541 int index;
14542 switch (GET_MODE_SIZE (mode))
14543 {
14544 case 4:
14545 index = 0;
14546 break;
14547 case 8:
14548 index = 1;
14549 break;
14550 case 16:
14551 index = 2;
14552 break;
14553 default:
14554 return 100;
14555 }
14556 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14557 }
14558 if (MMX_CLASS_P (class))
14559 {
14560 int index;
14561 switch (GET_MODE_SIZE (mode))
14562 {
14563 case 4:
14564 index = 0;
14565 break;
14566 case 8:
14567 index = 1;
14568 break;
14569 default:
14570 return 100;
14571 }
14572 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14573 }
14574 switch (GET_MODE_SIZE (mode))
14575 {
14576 case 1:
14577 if (in)
14578 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14579 : ix86_cost->movzbl_load);
14580 else
14581 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14582 : ix86_cost->int_store[0] + 4);
14583 break;
14584 case 2:
14585 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14586 default:
14587 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14588 if (mode == TFmode)
14589 mode = XFmode;
3bb7e126 14590 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
d09e61b9
JH
14591 * ((int) GET_MODE_SIZE (mode)
14592 + UNITS_PER_WORD -1 ) / UNITS_PER_WORD);
fa79946e
JH
14593 }
14594}
0ecf09f9 14595
3c50106f
RH
14596/* Compute a (partial) cost for rtx X. Return true if the complete
14597 cost has been computed, and false if subexpressions should be
14598 scanned. In either case, *TOTAL contains the cost result. */
14599
14600static bool
14601ix86_rtx_costs (x, code, outer_code, total)
14602 rtx x;
14603 int code, outer_code;
14604 int *total;
14605{
14606 enum machine_mode mode = GET_MODE (x);
14607
14608 switch (code)
14609 {
14610 case CONST_INT:
14611 case CONST:
14612 case LABEL_REF:
14613 case SYMBOL_REF:
14614 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
14615 *total = 3;
14616 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
14617 *total = 2;
14618 else if (flag_pic && SYMBOLIC_CONST (x))
14619 *total = 1;
14620 else
14621 *total = 0;
14622 return true;
14623
14624 case CONST_DOUBLE:
14625 if (mode == VOIDmode)
14626 *total = 0;
14627 else
14628 switch (standard_80387_constant_p (x))
14629 {
14630 case 1: /* 0.0 */
14631 *total = 1;
14632 break;
14633 case 2: /* 1.0 */
14634 *total = 2;
14635 break;
14636 default:
14637 /* Start with (MEM (SYMBOL_REF)), since that's where
14638 it'll probably end up. Add a penalty for size. */
14639 *total = (COSTS_N_INSNS (1)
14640 + (flag_pic != 0)
14641 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14642 break;
14643 }
14644 return true;
14645
14646 case ZERO_EXTEND:
14647 /* The zero extensions is often completely free on x86_64, so make
14648 it as cheap as possible. */
14649 if (TARGET_64BIT && mode == DImode
14650 && GET_MODE (XEXP (x, 0)) == SImode)
14651 *total = 1;
14652 else if (TARGET_ZERO_EXTEND_WITH_AND)
14653 *total = COSTS_N_INSNS (ix86_cost->add);
14654 else
14655 *total = COSTS_N_INSNS (ix86_cost->movzx);
14656 return false;
14657
14658 case SIGN_EXTEND:
14659 *total = COSTS_N_INSNS (ix86_cost->movsx);
14660 return false;
14661
14662 case ASHIFT:
14663 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14664 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14665 {
14666 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14667 if (value == 1)
14668 {
14669 *total = COSTS_N_INSNS (ix86_cost->add);
14670 return false;
14671 }
14672 if ((value == 2 || value == 3)
14673 && !TARGET_DECOMPOSE_LEA
14674 && ix86_cost->lea <= ix86_cost->shift_const)
14675 {
14676 *total = COSTS_N_INSNS (ix86_cost->lea);
14677 return false;
14678 }
14679 }
14680 /* FALLTHRU */
14681
14682 case ROTATE:
14683 case ASHIFTRT:
14684 case LSHIFTRT:
14685 case ROTATERT:
14686 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
14687 {
14688 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14689 {
14690 if (INTVAL (XEXP (x, 1)) > 32)
14691 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
14692 else
14693 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
14694 }
14695 else
14696 {
14697 if (GET_CODE (XEXP (x, 1)) == AND)
14698 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
14699 else
14700 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
14701 }
14702 }
14703 else
14704 {
14705 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14706 *total = COSTS_N_INSNS (ix86_cost->shift_const);
14707 else
14708 *total = COSTS_N_INSNS (ix86_cost->shift_var);
14709 }
14710 return false;
14711
14712 case MULT:
14713 if (FLOAT_MODE_P (mode))
14714 *total = COSTS_N_INSNS (ix86_cost->fmul);
14715 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14716 {
14717 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14718 int nbits;
14719
14720 for (nbits = 0; value != 0; value >>= 1)
14721 nbits++;
14722
14723 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14724 + nbits * ix86_cost->mult_bit);
14725 }
14726 else
14727 {
14728 /* This is arbitrary */
14729 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14730 + 7 * ix86_cost->mult_bit);
14731 }
14732 return false;
14733
14734 case DIV:
14735 case UDIV:
14736 case MOD:
14737 case UMOD:
14738 if (FLOAT_MODE_P (mode))
14739 *total = COSTS_N_INSNS (ix86_cost->fdiv);
14740 else
14741 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
14742 return false;
14743
14744 case PLUS:
14745 if (FLOAT_MODE_P (mode))
14746 *total = COSTS_N_INSNS (ix86_cost->fadd);
14747 else if (!TARGET_DECOMPOSE_LEA
14748 && GET_MODE_CLASS (mode) == MODE_INT
14749 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
14750 {
14751 if (GET_CODE (XEXP (x, 0)) == PLUS
14752 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14753 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
14754 && CONSTANT_P (XEXP (x, 1)))
14755 {
14756 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
14757 if (val == 2 || val == 4 || val == 8)
14758 {
14759 *total = COSTS_N_INSNS (ix86_cost->lea);
14760 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14761 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
14762 outer_code);
14763 *total += rtx_cost (XEXP (x, 1), outer_code);
14764 return true;
14765 }
14766 }
14767 else if (GET_CODE (XEXP (x, 0)) == MULT
14768 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
14769 {
14770 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
14771 if (val == 2 || val == 4 || val == 8)
14772 {
14773 *total = COSTS_N_INSNS (ix86_cost->lea);
14774 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14775 *total += rtx_cost (XEXP (x, 1), outer_code);
14776 return true;
14777 }
14778 }
14779 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14780 {
14781 *total = COSTS_N_INSNS (ix86_cost->lea);
14782 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14783 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14784 *total += rtx_cost (XEXP (x, 1), outer_code);
14785 return true;
14786 }
14787 }
14788 /* FALLTHRU */
14789
14790 case MINUS:
14791 if (FLOAT_MODE_P (mode))
14792 {
14793 *total = COSTS_N_INSNS (ix86_cost->fadd);
14794 return false;
14795 }
14796 /* FALLTHRU */
14797
14798 case AND:
14799 case IOR:
14800 case XOR:
14801 if (!TARGET_64BIT && mode == DImode)
14802 {
14803 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
14804 + (rtx_cost (XEXP (x, 0), outer_code)
14805 << (GET_MODE (XEXP (x, 0)) != DImode))
14806 + (rtx_cost (XEXP (x, 1), outer_code)
14807 << (GET_MODE (XEXP (x, 1)) != DImode)));
14808 return true;
14809 }
14810 /* FALLTHRU */
14811
14812 case NEG:
14813 if (FLOAT_MODE_P (mode))
14814 {
14815 *total = COSTS_N_INSNS (ix86_cost->fchs);
14816 return false;
14817 }
14818 /* FALLTHRU */
14819
14820 case NOT:
14821 if (!TARGET_64BIT && mode == DImode)
14822 *total = COSTS_N_INSNS (ix86_cost->add * 2);
14823 else
14824 *total = COSTS_N_INSNS (ix86_cost->add);
14825 return false;
14826
14827 case FLOAT_EXTEND:
14828 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
14829 *total = 0;
14830 return false;
14831
14832 case ABS:
14833 if (FLOAT_MODE_P (mode))
14834 *total = COSTS_N_INSNS (ix86_cost->fabs);
14835 return false;
14836
14837 case SQRT:
14838 if (FLOAT_MODE_P (mode))
14839 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
14840 return false;
14841
14842 default:
14843 return false;
14844 }
14845}
14846
21c318ba 14847#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
2cc07db4
RH
14848static void
14849ix86_svr3_asm_out_constructor (symbol, priority)
14850 rtx symbol;
14851 int priority ATTRIBUTE_UNUSED;
14852{
14853 init_section ();
14854 fputs ("\tpushl $", asm_out_file);
14855 assemble_name (asm_out_file, XSTR (symbol, 0));
14856 fputc ('\n', asm_out_file);
14857}
14858#endif
162f023b 14859
b069de3b
SS
14860#if TARGET_MACHO
14861
14862static int current_machopic_label_num;
14863
14864/* Given a symbol name and its associated stub, write out the
14865 definition of the stub. */
14866
14867void
14868machopic_output_stub (file, symb, stub)
14869 FILE *file;
14870 const char *symb, *stub;
14871{
14872 unsigned int length;
14873 char *binder_name, *symbol_name, lazy_ptr_name[32];
14874 int label = ++current_machopic_label_num;
14875
14876 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
14877 symb = (*targetm.strip_name_encoding) (symb);
14878
14879 length = strlen (stub);
14880 binder_name = alloca (length + 32);
14881 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
14882
14883 length = strlen (symb);
14884 symbol_name = alloca (length + 32);
14885 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
14886
14887 sprintf (lazy_ptr_name, "L%d$lz", label);
14888
14889 if (MACHOPIC_PURE)
14890 machopic_picsymbol_stub_section ();
14891 else
14892 machopic_symbol_stub_section ();
14893
14894 fprintf (file, "%s:\n", stub);
14895 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14896
14897 if (MACHOPIC_PURE)
14898 {
14899 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
14900 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
14901 fprintf (file, "\tjmp %%edx\n");
14902 }
14903 else
14904 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
14905
14906 fprintf (file, "%s:\n", binder_name);
14907
14908 if (MACHOPIC_PURE)
14909 {
14910 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
14911 fprintf (file, "\tpushl %%eax\n");
14912 }
14913 else
14914 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
14915
14916 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
14917
14918 machopic_lazy_symbol_ptr_section ();
14919 fprintf (file, "%s:\n", lazy_ptr_name);
14920 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14921 fprintf (file, "\t.long %s\n", binder_name);
14922}
14923#endif /* TARGET_MACHO */
14924
162f023b
JH
14925/* Order the registers for register allocator. */
14926
14927void
14928x86_order_regs_for_local_alloc ()
14929{
14930 int pos = 0;
14931 int i;
14932
14933 /* First allocate the local general purpose registers. */
14934 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14935 if (GENERAL_REGNO_P (i) && call_used_regs[i])
14936 reg_alloc_order [pos++] = i;
14937
14938 /* Global general purpose registers. */
14939 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14940 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
14941 reg_alloc_order [pos++] = i;
14942
14943 /* x87 registers come first in case we are doing FP math
14944 using them. */
14945 if (!TARGET_SSE_MATH)
14946 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14947 reg_alloc_order [pos++] = i;
fce5a9f2 14948
162f023b
JH
14949 /* SSE registers. */
14950 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
14951 reg_alloc_order [pos++] = i;
14952 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
14953 reg_alloc_order [pos++] = i;
14954
d1f87653 14955 /* x87 registers. */
162f023b
JH
14956 if (TARGET_SSE_MATH)
14957 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14958 reg_alloc_order [pos++] = i;
14959
14960 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
14961 reg_alloc_order [pos++] = i;
14962
14963 /* Initialize the rest of array as we do not allocate some registers
14964 at all. */
14965 while (pos < FIRST_PSEUDO_REGISTER)
14966 reg_alloc_order [pos++] = 0;
14967}
194734e9 14968
4977bab6
ZW
14969#ifndef TARGET_USE_MS_BITFIELD_LAYOUT
14970#define TARGET_USE_MS_BITFIELD_LAYOUT 0
14971#endif
14972
fe77449a
DR
14973/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
14974 struct attribute_spec.handler. */
14975static tree
14976ix86_handle_struct_attribute (node, name, args, flags, no_add_attrs)
14977 tree *node;
14978 tree name;
14979 tree args ATTRIBUTE_UNUSED;
14980 int flags ATTRIBUTE_UNUSED;
14981 bool *no_add_attrs;
14982{
14983 tree *type = NULL;
14984 if (DECL_P (*node))
14985 {
14986 if (TREE_CODE (*node) == TYPE_DECL)
14987 type = &TREE_TYPE (*node);
14988 }
14989 else
14990 type = node;
14991
14992 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
14993 || TREE_CODE (*type) == UNION_TYPE)))
14994 {
14995 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
14996 *no_add_attrs = true;
14997 }
14998
14999 else if ((is_attribute_p ("ms_struct", name)
15000 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15001 || ((is_attribute_p ("gcc_struct", name)
15002 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15003 {
15004 warning ("`%s' incompatible attribute ignored",
15005 IDENTIFIER_POINTER (name));
15006 *no_add_attrs = true;
15007 }
15008
15009 return NULL_TREE;
15010}
15011
4977bab6
ZW
15012static bool
15013ix86_ms_bitfield_layout_p (record_type)
fe77449a 15014 tree record_type;
4977bab6 15015{
fe77449a
DR
15016 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15017 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15018 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
4977bab6
ZW
15019}
15020
483ab821
MM
15021/* Returns an expression indicating where the this parameter is
15022 located on entry to the FUNCTION. */
15023
15024static rtx
3961e8fe 15025x86_this_parameter (function)
483ab821
MM
15026 tree function;
15027{
15028 tree type = TREE_TYPE (function);
15029
3961e8fe
RH
15030 if (TARGET_64BIT)
15031 {
15032 int n = aggregate_value_p (TREE_TYPE (type)) != 0;
15033 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15034 }
15035
483ab821
MM
15036 if (ix86_fntype_regparm (type) > 0)
15037 {
15038 tree parm;
15039
15040 parm = TYPE_ARG_TYPES (type);
15041 /* Figure out whether or not the function has a variable number of
15042 arguments. */
3961e8fe 15043 for (; parm; parm = TREE_CHAIN (parm))
483ab821
MM
15044 if (TREE_VALUE (parm) == void_type_node)
15045 break;
15046 /* If not, the this parameter is in %eax. */
15047 if (parm)
15048 return gen_rtx_REG (SImode, 0);
15049 }
15050
15051 if (aggregate_value_p (TREE_TYPE (type)))
15052 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15053 else
15054 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15055}
15056
3961e8fe
RH
15057/* Determine whether x86_output_mi_thunk can succeed. */
15058
15059static bool
15060x86_can_output_mi_thunk (thunk, delta, vcall_offset, function)
15061 tree thunk ATTRIBUTE_UNUSED;
15062 HOST_WIDE_INT delta ATTRIBUTE_UNUSED;
15063 HOST_WIDE_INT vcall_offset;
15064 tree function;
15065{
15066 /* 64-bit can handle anything. */
15067 if (TARGET_64BIT)
15068 return true;
15069
15070 /* For 32-bit, everything's fine if we have one free register. */
15071 if (ix86_fntype_regparm (TREE_TYPE (function)) < 3)
15072 return true;
15073
15074 /* Need a free register for vcall_offset. */
15075 if (vcall_offset)
15076 return false;
15077
15078 /* Need a free register for GOT references. */
15079 if (flag_pic && !(*targetm.binds_local_p) (function))
15080 return false;
15081
15082 /* Otherwise ok. */
15083 return true;
15084}
15085
15086/* Output the assembler code for a thunk function. THUNK_DECL is the
15087 declaration for the thunk function itself, FUNCTION is the decl for
15088 the target function. DELTA is an immediate constant offset to be
272d0bee 15089 added to THIS. If VCALL_OFFSET is nonzero, the word at
3961e8fe 15090 *(*this + vcall_offset) should be added to THIS. */
483ab821 15091
c590b625 15092static void
3961e8fe
RH
15093x86_output_mi_thunk (file, thunk, delta, vcall_offset, function)
15094 FILE *file ATTRIBUTE_UNUSED;
483ab821 15095 tree thunk ATTRIBUTE_UNUSED;
eb0424da 15096 HOST_WIDE_INT delta;
3961e8fe 15097 HOST_WIDE_INT vcall_offset;
194734e9
JH
15098 tree function;
15099{
194734e9 15100 rtx xops[3];
3961e8fe
RH
15101 rtx this = x86_this_parameter (function);
15102 rtx this_reg, tmp;
194734e9 15103
3961e8fe
RH
15104 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15105 pull it in now and let DELTA benefit. */
15106 if (REG_P (this))
15107 this_reg = this;
15108 else if (vcall_offset)
15109 {
15110 /* Put the this parameter into %eax. */
15111 xops[0] = this;
15112 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15113 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15114 }
15115 else
15116 this_reg = NULL_RTX;
15117
15118 /* Adjust the this parameter by a fixed constant. */
15119 if (delta)
194734e9 15120 {
483ab821 15121 xops[0] = GEN_INT (delta);
3961e8fe
RH
15122 xops[1] = this_reg ? this_reg : this;
15123 if (TARGET_64BIT)
194734e9 15124 {
3961e8fe
RH
15125 if (!x86_64_general_operand (xops[0], DImode))
15126 {
15127 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15128 xops[1] = tmp;
15129 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15130 xops[0] = tmp;
15131 xops[1] = this;
15132 }
15133 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
194734e9
JH
15134 }
15135 else
3961e8fe 15136 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
194734e9 15137 }
3961e8fe
RH
15138
15139 /* Adjust the this parameter by a value stored in the vtable. */
15140 if (vcall_offset)
194734e9 15141 {
3961e8fe
RH
15142 if (TARGET_64BIT)
15143 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15144 else
15145 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
483ab821 15146
3961e8fe
RH
15147 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15148 xops[1] = tmp;
15149 if (TARGET_64BIT)
15150 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15151 else
15152 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
483ab821 15153
3961e8fe
RH
15154 /* Adjust the this parameter. */
15155 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15156 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15157 {
15158 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15159 xops[0] = GEN_INT (vcall_offset);
15160 xops[1] = tmp2;
15161 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15162 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
483ab821 15163 }
3961e8fe
RH
15164 xops[1] = this_reg;
15165 if (TARGET_64BIT)
15166 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15167 else
15168 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15169 }
194734e9 15170
3961e8fe
RH
15171 /* If necessary, drop THIS back to its stack slot. */
15172 if (this_reg && this_reg != this)
15173 {
15174 xops[0] = this_reg;
15175 xops[1] = this;
15176 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15177 }
194734e9 15178
3961e8fe
RH
15179 xops[0] = DECL_RTL (function);
15180 if (TARGET_64BIT)
15181 {
15182 if (!flag_pic || (*targetm.binds_local_p) (function))
15183 output_asm_insn ("jmp\t%P0", xops);
15184 else
fcbe3b89
RH
15185 {
15186 tmp = XEXP (xops[0], 0);
15187 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, tmp), UNSPEC_GOTPCREL);
15188 tmp = gen_rtx_CONST (Pmode, tmp);
15189 tmp = gen_rtx_MEM (QImode, tmp);
15190 xops[0] = tmp;
15191 output_asm_insn ("jmp\t%A0", xops);
15192 }
3961e8fe
RH
15193 }
15194 else
15195 {
15196 if (!flag_pic || (*targetm.binds_local_p) (function))
15197 output_asm_insn ("jmp\t%P0", xops);
194734e9 15198 else
21ff35fb 15199#if TARGET_MACHO
095fa594
SH
15200 if (TARGET_MACHO)
15201 {
15202 char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15203 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15204 tmp = gen_rtx_MEM (QImode, tmp);
15205 xops[0] = tmp;
15206 output_asm_insn ("jmp\t%0", xops);
15207 }
15208 else
15209#endif /* TARGET_MACHO */
194734e9 15210 {
3961e8fe
RH
15211 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15212 output_set_got (tmp);
15213
15214 xops[1] = tmp;
15215 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15216 output_asm_insn ("jmp\t{*}%1", xops);
194734e9
JH
15217 }
15218 }
15219}
e2500fed 15220
e932b21b
JH
15221int
15222x86_field_alignment (field, computed)
15223 tree field;
15224 int computed;
15225{
15226 enum machine_mode mode;
ad9335eb
JJ
15227 tree type = TREE_TYPE (field);
15228
15229 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
e932b21b 15230 return computed;
ad9335eb
JJ
15231 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15232 ? get_inner_array_type (type) : type);
39e3a681
JJ
15233 if (mode == DFmode || mode == DCmode
15234 || GET_MODE_CLASS (mode) == MODE_INT
15235 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
e932b21b
JH
15236 return MIN (32, computed);
15237 return computed;
15238}
15239
a5fa1ecd
JH
15240/* Output assembler code to FILE to increment profiler label # LABELNO
15241 for profiling a function entry. */
15242void
15243x86_function_profiler (file, labelno)
15244 FILE *file;
b9b21a05 15245 int labelno ATTRIBUTE_UNUSED;
a5fa1ecd
JH
15246{
15247 if (TARGET_64BIT)
15248 if (flag_pic)
15249 {
15250#ifndef NO_PROFILE_COUNTERS
15251 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15252#endif
15253 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15254 }
15255 else
15256 {
15257#ifndef NO_PROFILE_COUNTERS
15258 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15259#endif
15260 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15261 }
15262 else if (flag_pic)
15263 {
15264#ifndef NO_PROFILE_COUNTERS
15265 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15266 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15267#endif
15268 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15269 }
15270 else
15271 {
15272#ifndef NO_PROFILE_COUNTERS
095f9093 15273 fprintf (file, "\tmovl\t$%sP%d,%%$%s\n", LPREFIX, labelno,
a5fa1ecd
JH
15274 PROFILE_COUNT_REGISTER);
15275#endif
15276 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15277 }
15278}
15279
2a500b9e
JH
15280/* Implement machine specific optimizations.
15281 At the moment we implement single transformation: AMD Athlon works faster
d1f87653 15282 when RET is not destination of conditional jump or directly preceded
2a500b9e
JH
15283 by other jump instruction. We avoid the penalty by inserting NOP just
15284 before the RET instructions in such cases. */
15285void
15286x86_machine_dependent_reorg (first)
15287 rtx first ATTRIBUTE_UNUSED;
15288{
15289 edge e;
15290
4977bab6 15291 if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
2a500b9e
JH
15292 return;
15293 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15294 {
15295 basic_block bb = e->src;
15296 rtx ret = bb->end;
15297 rtx prev;
15298 bool insert = false;
15299
15300 if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
15301 continue;
4977bab6
ZW
15302 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15303 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15304 break;
2a500b9e
JH
15305 if (prev && GET_CODE (prev) == CODE_LABEL)
15306 {
15307 edge e;
15308 for (e = bb->pred; e; e = e->pred_next)
4977bab6 15309 if (EDGE_FREQUENCY (e) && e->src->index >= 0
2a500b9e
JH
15310 && !(e->flags & EDGE_FALLTHRU))
15311 insert = 1;
15312 }
15313 if (!insert)
15314 {
4977bab6 15315 prev = prev_active_insn (ret);
2a500b9e
JH
15316 if (prev && GET_CODE (prev) == JUMP_INSN
15317 && any_condjump_p (prev))
15318 insert = 1;
4977bab6
ZW
15319 /* Empty functions get branch misspredict even when the jump destination
15320 is not visible to us. */
15321 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15322 insert = 1;
2a500b9e
JH
15323 }
15324 if (insert)
15325 emit_insn_before (gen_nop (), ret);
15326 }
15327}
15328
4977bab6
ZW
15329/* Return nonzero when QImode register that must be represented via REX prefix
15330 is used. */
15331bool
15332x86_extended_QIreg_mentioned_p (insn)
15333 rtx insn;
15334{
15335 int i;
15336 extract_insn_cached (insn);
15337 for (i = 0; i < recog_data.n_operands; i++)
15338 if (REG_P (recog_data.operand[i])
15339 && REGNO (recog_data.operand[i]) >= 4)
15340 return true;
15341 return false;
15342}
15343
15344/* Return nonzero when P points to register encoded via REX prefix.
15345 Called via for_each_rtx. */
15346static int
15347extended_reg_mentioned_1 (p, data)
15348 rtx *p;
15349 void *data ATTRIBUTE_UNUSED;
15350{
15351 unsigned int regno;
15352 if (!REG_P (*p))
15353 return 0;
15354 regno = REGNO (*p);
15355 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15356}
15357
15358/* Return true when INSN mentions register that must be encoded using REX
15359 prefix. */
15360bool
15361x86_extended_reg_mentioned_p (insn)
15362 rtx insn;
15363{
15364 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15365}
15366
e2500fed 15367#include "gt-i386.h"
This page took 3.530237 seconds and 5 git commands to generate.