]> gcc.gnu.org Git - gcc.git/blame - gcc/config/i386/i386.c
Daily bump.
[gcc.git] / gcc / config / i386 / i386.c
CommitLineData
e075ae69 1/* Subroutines used for code generation on IA-32.
07933f72 2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
e129d93a 3 2002, 2003, 2004 Free Software Foundation, Inc.
2a2ab3f9 4
188fc5b5 5This file is part of GCC.
2a2ab3f9 6
188fc5b5 7GCC is free software; you can redistribute it and/or modify
2a2ab3f9
JVA
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
188fc5b5 12GCC is distributed in the hope that it will be useful,
2a2ab3f9
JVA
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
188fc5b5 18along with GCC; see the file COPYING. If not, write to
97aadbb9 19the Free Software Foundation, 59 Temple Place - Suite 330,
0f290768 20Boston, MA 02111-1307, USA. */
2a2ab3f9 21
2a2ab3f9 22#include "config.h"
bb5177ac 23#include "system.h"
4977bab6
ZW
24#include "coretypes.h"
25#include "tm.h"
2a2ab3f9 26#include "rtl.h"
6baf1cc8
BS
27#include "tree.h"
28#include "tm_p.h"
2a2ab3f9
JVA
29#include "regs.h"
30#include "hard-reg-set.h"
31#include "real.h"
32#include "insn-config.h"
33#include "conditions.h"
2a2ab3f9
JVA
34#include "output.h"
35#include "insn-attr.h"
2a2ab3f9 36#include "flags.h"
a8ffcc81 37#include "except.h"
ecbc4695 38#include "function.h"
00c79232 39#include "recog.h"
ced8dd8c 40#include "expr.h"
e78d8e51 41#include "optabs.h"
f103890b 42#include "toplev.h"
e075ae69 43#include "basic-block.h"
1526a060 44#include "ggc.h"
672a6f42
NB
45#include "target.h"
46#include "target-def.h"
f1e639b1 47#include "langhooks.h"
dafc5b82 48#include "cgraph.h"
2a2ab3f9 49
8dfe5673 50#ifndef CHECK_STACK_LIMIT
07933f72 51#define CHECK_STACK_LIMIT (-1)
8dfe5673
RK
52#endif
53
3c50106f
RH
54/* Return index of given mode in mult and division cost tables. */
55#define MODE_INDEX(mode) \
56 ((mode) == QImode ? 0 \
57 : (mode) == HImode ? 1 \
58 : (mode) == SImode ? 2 \
59 : (mode) == DImode ? 3 \
60 : 4)
61
2ab0437e 62/* Processor costs (relative to an add) */
fce5a9f2 63static const
2ab0437e
JH
64struct processor_costs size_cost = { /* costs for tunning for size */
65 2, /* cost of an add instruction */
66 3, /* cost of a lea instruction */
67 2, /* variable shift costs */
68 3, /* constant shift costs */
4977bab6 69 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
2ab0437e 70 0, /* cost of multiply per each bit set */
4977bab6 71 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
44cf5b6a
JH
72 3, /* cost of movsx */
73 3, /* cost of movzx */
2ab0437e
JH
74 0, /* "large" insn */
75 2, /* MOVE_RATIO */
76 2, /* cost for loading QImode using movzbl */
77 {2, 2, 2}, /* cost of loading integer registers
78 in QImode, HImode and SImode.
79 Relative to reg-reg move (2). */
80 {2, 2, 2}, /* cost of storing integer registers */
81 2, /* cost of reg,reg fld/fst */
82 {2, 2, 2}, /* cost of loading fp registers
83 in SFmode, DFmode and XFmode */
84 {2, 2, 2}, /* cost of loading integer registers */
85 3, /* cost of moving MMX register */
86 {3, 3}, /* cost of loading MMX registers
87 in SImode and DImode */
88 {3, 3}, /* cost of storing MMX registers
89 in SImode and DImode */
90 3, /* cost of moving SSE register */
91 {3, 3, 3}, /* cost of loading SSE registers
92 in SImode, DImode and TImode */
93 {3, 3, 3}, /* cost of storing SSE registers
94 in SImode, DImode and TImode */
95 3, /* MMX or SSE register to integer */
f4365627
JH
96 0, /* size of prefetch block */
97 0, /* number of parallel prefetches */
4977bab6 98 1, /* Branch cost */
229b303a
RS
99 2, /* cost of FADD and FSUB insns. */
100 2, /* cost of FMUL instruction. */
101 2, /* cost of FDIV instruction. */
102 2, /* cost of FABS instruction. */
103 2, /* cost of FCHS instruction. */
104 2, /* cost of FSQRT instruction. */
2ab0437e 105};
229b303a 106
32b5b1aa 107/* Processor costs (relative to an add) */
fce5a9f2 108static const
32b5b1aa 109struct processor_costs i386_cost = { /* 386 specific costs */
e9a25f70 110 1, /* cost of an add instruction */
32b5b1aa
SC
111 1, /* cost of a lea instruction */
112 3, /* variable shift costs */
113 2, /* constant shift costs */
4977bab6 114 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
32b5b1aa 115 1, /* cost of multiply per each bit set */
4977bab6 116 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
44cf5b6a
JH
117 3, /* cost of movsx */
118 2, /* cost of movzx */
96e7ae40 119 15, /* "large" insn */
e2e52e1b 120 3, /* MOVE_RATIO */
7c6b971d 121 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
122 {2, 4, 2}, /* cost of loading integer registers
123 in QImode, HImode and SImode.
0f290768 124 Relative to reg-reg move (2). */
96e7ae40
JH
125 {2, 4, 2}, /* cost of storing integer registers */
126 2, /* cost of reg,reg fld/fst */
127 {8, 8, 8}, /* cost of loading fp registers
128 in SFmode, DFmode and XFmode */
fa79946e
JH
129 {8, 8, 8}, /* cost of loading integer registers */
130 2, /* cost of moving MMX register */
131 {4, 8}, /* cost of loading MMX registers
132 in SImode and DImode */
133 {4, 8}, /* cost of storing MMX registers
134 in SImode and DImode */
135 2, /* cost of moving SSE register */
136 {4, 8, 16}, /* cost of loading SSE registers
137 in SImode, DImode and TImode */
138 {4, 8, 16}, /* cost of storing SSE registers
139 in SImode, DImode and TImode */
140 3, /* MMX or SSE register to integer */
f4365627
JH
141 0, /* size of prefetch block */
142 0, /* number of parallel prefetches */
4977bab6 143 1, /* Branch cost */
229b303a
RS
144 23, /* cost of FADD and FSUB insns. */
145 27, /* cost of FMUL instruction. */
146 88, /* cost of FDIV instruction. */
147 22, /* cost of FABS instruction. */
148 24, /* cost of FCHS instruction. */
149 122, /* cost of FSQRT instruction. */
32b5b1aa
SC
150};
151
fce5a9f2 152static const
32b5b1aa
SC
153struct processor_costs i486_cost = { /* 486 specific costs */
154 1, /* cost of an add instruction */
155 1, /* cost of a lea instruction */
156 3, /* variable shift costs */
157 2, /* constant shift costs */
4977bab6 158 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
32b5b1aa 159 1, /* cost of multiply per each bit set */
4977bab6 160 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
44cf5b6a
JH
161 3, /* cost of movsx */
162 2, /* cost of movzx */
96e7ae40 163 15, /* "large" insn */
e2e52e1b 164 3, /* MOVE_RATIO */
7c6b971d 165 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
166 {2, 4, 2}, /* cost of loading integer registers
167 in QImode, HImode and SImode.
0f290768 168 Relative to reg-reg move (2). */
96e7ae40
JH
169 {2, 4, 2}, /* cost of storing integer registers */
170 2, /* cost of reg,reg fld/fst */
171 {8, 8, 8}, /* cost of loading fp registers
172 in SFmode, DFmode and XFmode */
fa79946e
JH
173 {8, 8, 8}, /* cost of loading integer registers */
174 2, /* cost of moving MMX register */
175 {4, 8}, /* cost of loading MMX registers
176 in SImode and DImode */
177 {4, 8}, /* cost of storing MMX registers
178 in SImode and DImode */
179 2, /* cost of moving SSE register */
180 {4, 8, 16}, /* cost of loading SSE registers
181 in SImode, DImode and TImode */
182 {4, 8, 16}, /* cost of storing SSE registers
183 in SImode, DImode and TImode */
f4365627
JH
184 3, /* MMX or SSE register to integer */
185 0, /* size of prefetch block */
186 0, /* number of parallel prefetches */
4977bab6 187 1, /* Branch cost */
229b303a
RS
188 8, /* cost of FADD and FSUB insns. */
189 16, /* cost of FMUL instruction. */
190 73, /* cost of FDIV instruction. */
191 3, /* cost of FABS instruction. */
192 3, /* cost of FCHS instruction. */
193 83, /* cost of FSQRT instruction. */
32b5b1aa
SC
194};
195
fce5a9f2 196static const
e5cb57e8 197struct processor_costs pentium_cost = {
32b5b1aa
SC
198 1, /* cost of an add instruction */
199 1, /* cost of a lea instruction */
856b07a1 200 4, /* variable shift costs */
e5cb57e8 201 1, /* constant shift costs */
4977bab6 202 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
856b07a1 203 0, /* cost of multiply per each bit set */
4977bab6 204 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
44cf5b6a
JH
205 3, /* cost of movsx */
206 2, /* cost of movzx */
96e7ae40 207 8, /* "large" insn */
e2e52e1b 208 6, /* MOVE_RATIO */
7c6b971d 209 6, /* cost for loading QImode using movzbl */
96e7ae40
JH
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
0f290768 212 Relative to reg-reg move (2). */
96e7ae40
JH
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {2, 2, 6}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
fa79946e
JH
217 {4, 4, 6}, /* cost of loading integer registers */
218 8, /* cost of moving MMX register */
219 {8, 8}, /* cost of loading MMX registers
220 in SImode and DImode */
221 {8, 8}, /* cost of storing MMX registers
222 in SImode and DImode */
223 2, /* cost of moving SSE register */
224 {4, 8, 16}, /* cost of loading SSE registers
225 in SImode, DImode and TImode */
226 {4, 8, 16}, /* cost of storing SSE registers
227 in SImode, DImode and TImode */
f4365627
JH
228 3, /* MMX or SSE register to integer */
229 0, /* size of prefetch block */
230 0, /* number of parallel prefetches */
4977bab6 231 2, /* Branch cost */
229b303a
RS
232 3, /* cost of FADD and FSUB insns. */
233 3, /* cost of FMUL instruction. */
234 39, /* cost of FDIV instruction. */
235 1, /* cost of FABS instruction. */
236 1, /* cost of FCHS instruction. */
237 70, /* cost of FSQRT instruction. */
32b5b1aa
SC
238};
239
fce5a9f2 240static const
856b07a1
SC
241struct processor_costs pentiumpro_cost = {
242 1, /* cost of an add instruction */
243 1, /* cost of a lea instruction */
e075ae69 244 1, /* variable shift costs */
856b07a1 245 1, /* constant shift costs */
4977bab6 246 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
856b07a1 247 0, /* cost of multiply per each bit set */
4977bab6 248 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
44cf5b6a
JH
249 1, /* cost of movsx */
250 1, /* cost of movzx */
96e7ae40 251 8, /* "large" insn */
e2e52e1b 252 6, /* MOVE_RATIO */
7c6b971d 253 2, /* cost for loading QImode using movzbl */
96e7ae40
JH
254 {4, 4, 4}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
0f290768 256 Relative to reg-reg move (2). */
96e7ae40
JH
257 {2, 2, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
fa79946e
JH
261 {4, 4, 6}, /* cost of loading integer registers */
262 2, /* cost of moving MMX register */
263 {2, 2}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {2, 2}, /* cost of storing MMX registers
266 in SImode and DImode */
267 2, /* cost of moving SSE register */
268 {2, 2, 8}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {2, 2, 8}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
f4365627
JH
272 3, /* MMX or SSE register to integer */
273 32, /* size of prefetch block */
274 6, /* number of parallel prefetches */
4977bab6 275 2, /* Branch cost */
229b303a
RS
276 3, /* cost of FADD and FSUB insns. */
277 5, /* cost of FMUL instruction. */
278 56, /* cost of FDIV instruction. */
279 2, /* cost of FABS instruction. */
280 2, /* cost of FCHS instruction. */
281 56, /* cost of FSQRT instruction. */
856b07a1
SC
282};
283
fce5a9f2 284static const
a269a03c
JC
285struct processor_costs k6_cost = {
286 1, /* cost of an add instruction */
e075ae69 287 2, /* cost of a lea instruction */
a269a03c
JC
288 1, /* variable shift costs */
289 1, /* constant shift costs */
4977bab6 290 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
a269a03c 291 0, /* cost of multiply per each bit set */
4977bab6 292 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
44cf5b6a
JH
293 2, /* cost of movsx */
294 2, /* cost of movzx */
96e7ae40 295 8, /* "large" insn */
e2e52e1b 296 4, /* MOVE_RATIO */
7c6b971d 297 3, /* cost for loading QImode using movzbl */
96e7ae40
JH
298 {4, 5, 4}, /* cost of loading integer registers
299 in QImode, HImode and SImode.
0f290768 300 Relative to reg-reg move (2). */
96e7ae40
JH
301 {2, 3, 2}, /* cost of storing integer registers */
302 4, /* cost of reg,reg fld/fst */
303 {6, 6, 6}, /* cost of loading fp registers
304 in SFmode, DFmode and XFmode */
fa79946e
JH
305 {4, 4, 4}, /* cost of loading integer registers */
306 2, /* cost of moving MMX register */
307 {2, 2}, /* cost of loading MMX registers
308 in SImode and DImode */
309 {2, 2}, /* cost of storing MMX registers
310 in SImode and DImode */
311 2, /* cost of moving SSE register */
312 {2, 2, 8}, /* cost of loading SSE registers
313 in SImode, DImode and TImode */
314 {2, 2, 8}, /* cost of storing SSE registers
315 in SImode, DImode and TImode */
f4365627
JH
316 6, /* MMX or SSE register to integer */
317 32, /* size of prefetch block */
318 1, /* number of parallel prefetches */
4977bab6 319 1, /* Branch cost */
229b303a
RS
320 2, /* cost of FADD and FSUB insns. */
321 2, /* cost of FMUL instruction. */
4f770e7b
RS
322 56, /* cost of FDIV instruction. */
323 2, /* cost of FABS instruction. */
229b303a
RS
324 2, /* cost of FCHS instruction. */
325 56, /* cost of FSQRT instruction. */
a269a03c
JC
326};
327
fce5a9f2 328static const
309ada50
JH
329struct processor_costs athlon_cost = {
330 1, /* cost of an add instruction */
0b5107cf 331 2, /* cost of a lea instruction */
309ada50
JH
332 1, /* variable shift costs */
333 1, /* constant shift costs */
4977bab6 334 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
309ada50 335 0, /* cost of multiply per each bit set */
4977bab6 336 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
44cf5b6a
JH
337 1, /* cost of movsx */
338 1, /* cost of movzx */
309ada50 339 8, /* "large" insn */
e2e52e1b 340 9, /* MOVE_RATIO */
309ada50 341 4, /* cost for loading QImode using movzbl */
b72b1c29 342 {3, 4, 3}, /* cost of loading integer registers
309ada50 343 in QImode, HImode and SImode.
0f290768 344 Relative to reg-reg move (2). */
b72b1c29 345 {3, 4, 3}, /* cost of storing integer registers */
309ada50 346 4, /* cost of reg,reg fld/fst */
b72b1c29 347 {4, 4, 12}, /* cost of loading fp registers
309ada50 348 in SFmode, DFmode and XFmode */
b72b1c29 349 {6, 6, 8}, /* cost of loading integer registers */
fa79946e 350 2, /* cost of moving MMX register */
b72b1c29 351 {4, 4}, /* cost of loading MMX registers
fa79946e 352 in SImode and DImode */
b72b1c29 353 {4, 4}, /* cost of storing MMX registers
fa79946e
JH
354 in SImode and DImode */
355 2, /* cost of moving SSE register */
b72b1c29 356 {4, 4, 6}, /* cost of loading SSE registers
fa79946e 357 in SImode, DImode and TImode */
b72b1c29 358 {4, 4, 5}, /* cost of storing SSE registers
fa79946e 359 in SImode, DImode and TImode */
b72b1c29 360 5, /* MMX or SSE register to integer */
f4365627
JH
361 64, /* size of prefetch block */
362 6, /* number of parallel prefetches */
4977bab6 363 2, /* Branch cost */
229b303a
RS
364 4, /* cost of FADD and FSUB insns. */
365 4, /* cost of FMUL instruction. */
366 24, /* cost of FDIV instruction. */
367 2, /* cost of FABS instruction. */
368 2, /* cost of FCHS instruction. */
369 35, /* cost of FSQRT instruction. */
309ada50
JH
370};
371
4977bab6
ZW
372static const
373struct processor_costs k8_cost = {
374 1, /* cost of an add instruction */
375 2, /* cost of a lea instruction */
376 1, /* variable shift costs */
377 1, /* constant shift costs */
378 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
379 0, /* cost of multiply per each bit set */
380 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
381 1, /* cost of movsx */
382 1, /* cost of movzx */
383 8, /* "large" insn */
384 9, /* MOVE_RATIO */
385 4, /* cost for loading QImode using movzbl */
386 {3, 4, 3}, /* cost of loading integer registers
387 in QImode, HImode and SImode.
388 Relative to reg-reg move (2). */
389 {3, 4, 3}, /* cost of storing integer registers */
390 4, /* cost of reg,reg fld/fst */
391 {4, 4, 12}, /* cost of loading fp registers
392 in SFmode, DFmode and XFmode */
393 {6, 6, 8}, /* cost of loading integer registers */
394 2, /* cost of moving MMX register */
395 {3, 3}, /* cost of loading MMX registers
396 in SImode and DImode */
397 {4, 4}, /* cost of storing MMX registers
398 in SImode and DImode */
399 2, /* cost of moving SSE register */
400 {4, 3, 6}, /* cost of loading SSE registers
401 in SImode, DImode and TImode */
402 {4, 4, 5}, /* cost of storing SSE registers
403 in SImode, DImode and TImode */
404 5, /* MMX or SSE register to integer */
405 64, /* size of prefetch block */
406 6, /* number of parallel prefetches */
407 2, /* Branch cost */
408 4, /* cost of FADD and FSUB insns. */
409 4, /* cost of FMUL instruction. */
410 19, /* cost of FDIV instruction. */
411 2, /* cost of FABS instruction. */
412 2, /* cost of FCHS instruction. */
413 35, /* cost of FSQRT instruction. */
414};
415
fce5a9f2 416static const
b4e89e2d
JH
417struct processor_costs pentium4_cost = {
418 1, /* cost of an add instruction */
419 1, /* cost of a lea instruction */
4977bab6
ZW
420 4, /* variable shift costs */
421 4, /* constant shift costs */
422 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
b4e89e2d 423 0, /* cost of multiply per each bit set */
4977bab6 424 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
44cf5b6a
JH
425 1, /* cost of movsx */
426 1, /* cost of movzx */
b4e89e2d
JH
427 16, /* "large" insn */
428 6, /* MOVE_RATIO */
429 2, /* cost for loading QImode using movzbl */
430 {4, 5, 4}, /* cost of loading integer registers
431 in QImode, HImode and SImode.
432 Relative to reg-reg move (2). */
433 {2, 3, 2}, /* cost of storing integer registers */
434 2, /* cost of reg,reg fld/fst */
435 {2, 2, 6}, /* cost of loading fp registers
436 in SFmode, DFmode and XFmode */
437 {4, 4, 6}, /* cost of loading integer registers */
438 2, /* cost of moving MMX register */
439 {2, 2}, /* cost of loading MMX registers
440 in SImode and DImode */
441 {2, 2}, /* cost of storing MMX registers
442 in SImode and DImode */
443 12, /* cost of moving SSE register */
444 {12, 12, 12}, /* cost of loading SSE registers
445 in SImode, DImode and TImode */
446 {2, 2, 8}, /* cost of storing SSE registers
447 in SImode, DImode and TImode */
448 10, /* MMX or SSE register to integer */
f4365627
JH
449 64, /* size of prefetch block */
450 6, /* number of parallel prefetches */
4977bab6 451 2, /* Branch cost */
229b303a
RS
452 5, /* cost of FADD and FSUB insns. */
453 7, /* cost of FMUL instruction. */
454 43, /* cost of FDIV instruction. */
455 2, /* cost of FABS instruction. */
456 2, /* cost of FCHS instruction. */
457 43, /* cost of FSQRT instruction. */
b4e89e2d
JH
458};
459
8b60264b 460const struct processor_costs *ix86_cost = &pentium_cost;
32b5b1aa 461
a269a03c
JC
462/* Processor feature/optimization bitmasks. */
463#define m_386 (1<<PROCESSOR_I386)
464#define m_486 (1<<PROCESSOR_I486)
465#define m_PENT (1<<PROCESSOR_PENTIUM)
466#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
467#define m_K6 (1<<PROCESSOR_K6)
309ada50 468#define m_ATHLON (1<<PROCESSOR_ATHLON)
b4e89e2d 469#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
4977bab6
ZW
470#define m_K8 (1<<PROCESSOR_K8)
471#define m_ATHLON_K8 (m_K8 | m_ATHLON)
a269a03c 472
4977bab6
ZW
473const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
474const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
a269a03c 475const int x86_zero_extend_with_and = m_486 | m_PENT;
4977bab6 476const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
e075ae69 477const int x86_double_with_add = ~m_386;
a269a03c 478const int x86_use_bit_test = m_386;
4977bab6
ZW
479const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
480const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
481const int x86_3dnow_a = m_ATHLON_K8;
482const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
ef6257cd 483const int x86_branch_hints = m_PENT4;
b4e89e2d 484const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
e075ae69
RH
485const int x86_partial_reg_stall = m_PPRO;
486const int x86_use_loop = m_K6;
4977bab6 487const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
e075ae69
RH
488const int x86_use_mov0 = m_K6;
489const int x86_use_cltd = ~(m_PENT | m_K6);
490const int x86_read_modify_write = ~m_PENT;
491const int x86_read_modify = ~(m_PENT | m_PPRO);
492const int x86_split_long_moves = m_PPRO;
4977bab6 493const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
285464d0 494const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
b4e89e2d 495const int x86_single_stringop = m_386 | m_PENT4;
d9f32422
JH
496const int x86_qimode_math = ~(0);
497const int x86_promote_qi_regs = 0;
498const int x86_himode_math = ~(m_PPRO);
499const int x86_promote_hi_regs = m_PPRO;
4977bab6
ZW
500const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
501const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
502const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
503const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
504const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
505const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
506const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
507const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
7b50a809
JH
508const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
509const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
b972dd02 510const int x86_decompose_lea = m_PENT4;
495333a6 511const int x86_shift1 = ~m_486;
4977bab6
ZW
512const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
513const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
514/* Set for machines where the type and dependencies are resolved on SSE register
d1f87653 515 parts instead of whole registers, so we may maintain just lower part of
4977bab6
ZW
516 scalar values in proper format leaving the upper part undefined. */
517const int x86_sse_partial_regs = m_ATHLON_K8;
518/* Athlon optimizes partial-register FPS special case, thus avoiding the
519 need for extra instructions beforehand */
520const int x86_sse_partial_regs_for_cvtsd2ss = 0;
521const int x86_sse_typeless_stores = m_ATHLON_K8;
522const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
523const int x86_use_ffreep = m_ATHLON_K8;
524const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
8f62128d 525const int x86_inter_unit_moves = ~(m_ATHLON_K8);
881b2a96 526const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PPRO;
be04394b
JH
527/* Some CPU cores are not able to predict more than 4 branch instructions in
528 the 16 byte window. */
529const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4;
a269a03c 530
d1f87653 531/* In case the average insn count for single function invocation is
6ab16dd9
JH
532 lower than this constant, emit fast (but longer) prologue and
533 epilogue code. */
4977bab6 534#define FAST_PROLOGUE_INSN_COUNT 20
5bf0ebab 535
5bf0ebab
RH
536/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
537static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
538static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
539static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
4c0d89b5
RS
540
541/* Array of the smallest class containing reg number REGNO, indexed by
0f290768 542 REGNO. Used by REGNO_REG_CLASS in i386.h. */
4c0d89b5 543
e075ae69 544enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
4c0d89b5
RS
545{
546 /* ax, dx, cx, bx */
ab408a86 547 AREG, DREG, CREG, BREG,
4c0d89b5 548 /* si, di, bp, sp */
e075ae69 549 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
4c0d89b5
RS
550 /* FP registers */
551 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
79325812 552 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
4c0d89b5 553 /* arg pointer */
83774849 554 NON_Q_REGS,
564d80f4 555 /* flags, fpsr, dirflag, frame */
a7180f70
BS
556 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
557 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
558 SSE_REGS, SSE_REGS,
559 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
3d117b30
JH
560 MMX_REGS, MMX_REGS,
561 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
562 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
563 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
564 SSE_REGS, SSE_REGS,
4c0d89b5 565};
c572e5ba 566
3d117b30 567/* The "default" register map used in 32bit mode. */
83774849 568
0f290768 569int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
570{
571 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
572 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
3d117b30 573 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
574 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
575 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
3d117b30
JH
576 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
577 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
578};
579
5bf0ebab
RH
580static int const x86_64_int_parameter_registers[6] =
581{
582 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
583 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
584};
585
586static int const x86_64_int_return_registers[4] =
587{
588 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
589};
53c17031 590
0f7fa3d0
JH
591/* The "default" register map used in 64bit mode. */
592int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
593{
594 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
3de72741 595 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
0f7fa3d0
JH
596 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
597 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
598 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
599 8,9,10,11,12,13,14,15, /* extended integer registers */
600 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
601};
602
83774849
RH
603/* Define the register numbers to be used in Dwarf debugging information.
604 The SVR4 reference port C compiler uses the following register numbers
605 in its Dwarf output code:
606 0 for %eax (gcc regno = 0)
607 1 for %ecx (gcc regno = 2)
608 2 for %edx (gcc regno = 1)
609 3 for %ebx (gcc regno = 3)
610 4 for %esp (gcc regno = 7)
611 5 for %ebp (gcc regno = 6)
612 6 for %esi (gcc regno = 4)
613 7 for %edi (gcc regno = 5)
614 The following three DWARF register numbers are never generated by
615 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
616 believes these numbers have these meanings.
617 8 for %eip (no gcc equivalent)
618 9 for %eflags (gcc regno = 17)
619 10 for %trapno (no gcc equivalent)
620 It is not at all clear how we should number the FP stack registers
621 for the x86 architecture. If the version of SDB on x86/svr4 were
622 a bit less brain dead with respect to floating-point then we would
623 have a precedent to follow with respect to DWARF register numbers
624 for x86 FP registers, but the SDB on x86/svr4 is so completely
625 broken with respect to FP registers that it is hardly worth thinking
626 of it as something to strive for compatibility with.
627 The version of x86/svr4 SDB I have at the moment does (partially)
628 seem to believe that DWARF register number 11 is associated with
629 the x86 register %st(0), but that's about all. Higher DWARF
630 register numbers don't seem to be associated with anything in
631 particular, and even for DWARF regno 11, SDB only seems to under-
632 stand that it should say that a variable lives in %st(0) (when
633 asked via an `=' command) if we said it was in DWARF regno 11,
634 but SDB still prints garbage when asked for the value of the
635 variable in question (via a `/' command).
636 (Also note that the labels SDB prints for various FP stack regs
637 when doing an `x' command are all wrong.)
638 Note that these problems generally don't affect the native SVR4
639 C compiler because it doesn't allow the use of -O with -g and
640 because when it is *not* optimizing, it allocates a memory
641 location for each floating-point variable, and the memory
642 location is what gets described in the DWARF AT_location
643 attribute for the variable in question.
644 Regardless of the severe mental illness of the x86/svr4 SDB, we
645 do something sensible here and we use the following DWARF
646 register numbers. Note that these are all stack-top-relative
647 numbers.
648 11 for %st(0) (gcc regno = 8)
649 12 for %st(1) (gcc regno = 9)
650 13 for %st(2) (gcc regno = 10)
651 14 for %st(3) (gcc regno = 11)
652 15 for %st(4) (gcc regno = 12)
653 16 for %st(5) (gcc regno = 13)
654 17 for %st(6) (gcc regno = 14)
655 18 for %st(7) (gcc regno = 15)
656*/
0f290768 657int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
658{
659 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
660 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
3f3f2124 661 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
662 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
663 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
d1f87653
KH
664 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
665 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
666};
667
c572e5ba
JVA
668/* Test and compare insns in i386.md store the information needed to
669 generate branch and scc insns here. */
670
07933f72
GS
671rtx ix86_compare_op0 = NULL_RTX;
672rtx ix86_compare_op1 = NULL_RTX;
f5316dfe 673
7a2e09f4 674#define MAX_386_STACK_LOCALS 3
8362f420
JH
675/* Size of the register save area. */
676#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
36edd3cc
BS
677
678/* Define the structure for the machine field in struct function. */
ddb0ae00
ZW
679
680struct stack_local_entry GTY(())
681{
682 unsigned short mode;
683 unsigned short n;
684 rtx rtl;
685 struct stack_local_entry *next;
686};
687
4dd2ac2c
JH
688/* Structure describing stack frame layout.
689 Stack grows downward:
690
691 [arguments]
692 <- ARG_POINTER
693 saved pc
694
695 saved frame pointer if frame_pointer_needed
696 <- HARD_FRAME_POINTER
697 [saved regs]
698
699 [padding1] \
700 )
701 [va_arg registers] (
702 > to_allocate <- FRAME_POINTER
703 [frame] (
704 )
705 [padding2] /
706 */
707struct ix86_frame
708{
709 int nregs;
710 int padding1;
8362f420 711 int va_arg_size;
4dd2ac2c
JH
712 HOST_WIDE_INT frame;
713 int padding2;
714 int outgoing_arguments_size;
8362f420 715 int red_zone_size;
4dd2ac2c
JH
716
717 HOST_WIDE_INT to_allocate;
718 /* The offsets relative to ARG_POINTER. */
719 HOST_WIDE_INT frame_pointer_offset;
720 HOST_WIDE_INT hard_frame_pointer_offset;
721 HOST_WIDE_INT stack_pointer_offset;
d9b40e8d
JH
722
723 /* When save_regs_using_mov is set, emit prologue using
724 move instead of push instructions. */
725 bool save_regs_using_mov;
4dd2ac2c
JH
726};
727
c93e80a5
JH
728/* Used to enable/disable debugging features. */
729const char *ix86_debug_arg_string, *ix86_debug_addr_string;
6189a572
JH
730/* Code model option as passed by user. */
731const char *ix86_cmodel_string;
732/* Parsed value. */
733enum cmodel ix86_cmodel;
80f33d06
GS
734/* Asm dialect. */
735const char *ix86_asm_string;
736enum asm_dialect ix86_asm_dialect = ASM_ATT;
f996902d
RH
737/* TLS dialext. */
738const char *ix86_tls_dialect_string;
739enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
6189a572 740
5bf0ebab 741/* Which unit we are generating floating point math for. */
965f5423
JH
742enum fpmath_unit ix86_fpmath;
743
5bf0ebab 744/* Which cpu are we scheduling for. */
9e555526 745enum processor_type ix86_tune;
5bf0ebab
RH
746/* Which instruction set architecture to use. */
747enum processor_type ix86_arch;
c8c5cb99
SC
748
749/* Strings to hold which cpu and instruction set architecture to use. */
9e555526 750const char *ix86_tune_string; /* for -mtune=<xxx> */
9c23aa47 751const char *ix86_arch_string; /* for -march=<xxx> */
965f5423 752const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
c8c5cb99 753
0f290768 754/* # of registers to use to pass arguments. */
e075ae69 755const char *ix86_regparm_string;
e9a25f70 756
f4365627
JH
757/* true if sse prefetch instruction is not NOOP. */
758int x86_prefetch_sse;
759
e075ae69
RH
760/* ix86_regparm_string as a number */
761int ix86_regparm;
e9a25f70
JL
762
763/* Alignment to use for loops and jumps: */
764
0f290768 765/* Power of two alignment for loops. */
e075ae69 766const char *ix86_align_loops_string;
e9a25f70 767
0f290768 768/* Power of two alignment for non-loop jumps. */
e075ae69 769const char *ix86_align_jumps_string;
e9a25f70 770
3af4bd89 771/* Power of two alignment for stack boundary in bytes. */
e075ae69 772const char *ix86_preferred_stack_boundary_string;
3af4bd89
JH
773
774/* Preferred alignment for stack boundary in bits. */
e075ae69 775int ix86_preferred_stack_boundary;
3af4bd89 776
e9a25f70 777/* Values 1-5: see jump.c */
e075ae69
RH
778int ix86_branch_cost;
779const char *ix86_branch_cost_string;
e9a25f70 780
0f290768 781/* Power of two alignment for functions. */
e075ae69 782const char *ix86_align_funcs_string;
623fe810
RH
783
784/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
785static char internal_label_prefix[16];
786static int internal_label_prefix_len;
e075ae69 787\f
b96a374d
AJ
788static int local_symbolic_operand (rtx, enum machine_mode);
789static int tls_symbolic_operand_1 (rtx, enum tls_model);
790static void output_pic_addr_const (FILE *, rtx, int);
791static void put_condition_code (enum rtx_code, enum machine_mode,
792 int, int, FILE *);
793static const char *get_some_local_dynamic_name (void);
794static int get_some_local_dynamic_name_1 (rtx *, void *);
795static rtx maybe_get_pool_constant (rtx);
796static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
797static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
798 rtx *);
e129d93a
ILT
799static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
800static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
801 enum machine_mode);
b96a374d
AJ
802static rtx get_thread_pointer (int);
803static rtx legitimize_tls_address (rtx, enum tls_model, int);
804static void get_pc_thunk_name (char [32], unsigned int);
805static rtx gen_push (rtx);
806static int memory_address_length (rtx addr);
807static int ix86_flags_dependant (rtx, rtx, enum attr_type);
808static int ix86_agi_dependant (rtx, rtx, enum attr_type);
809static enum attr_ppro_uops ix86_safe_ppro_uops (rtx);
810static void ix86_dump_ppro_packet (FILE *);
811static void ix86_reorder_insn (rtx *, rtx *);
812static struct machine_function * ix86_init_machine_status (void);
813static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
814static int ix86_nsaved_regs (void);
815static void ix86_emit_save_regs (void);
816static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
72613dfa 817static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
b96a374d 818static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
b96a374d
AJ
819static void ix86_sched_reorder_ppro (rtx *, rtx *);
820static HOST_WIDE_INT ix86_GOT_alias_set (void);
821static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
822static rtx ix86_expand_aligntest (rtx, int);
4e44c1ef 823static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
b96a374d
AJ
824static int ix86_issue_rate (void);
825static int ix86_adjust_cost (rtx, rtx, rtx, int);
826static void ix86_sched_init (FILE *, int, int);
827static int ix86_sched_reorder (FILE *, int, rtx *, int *, int);
828static int ix86_variable_issue (FILE *, int, rtx, int);
829static int ia32_use_dfa_pipeline_interface (void);
830static int ia32_multipass_dfa_lookahead (void);
831static void ix86_init_mmx_sse_builtins (void);
832static rtx x86_this_parameter (tree);
833static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
834 HOST_WIDE_INT, tree);
835static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
836static void x86_file_start (void);
837static void ix86_reorg (void);
c35d187f
RH
838static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
839static tree ix86_build_builtin_va_list (void);
a0524eb3
KH
840static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
841 tree, int *, int);
e075ae69
RH
842
843struct ix86_address
844{
845 rtx base, index, disp;
846 HOST_WIDE_INT scale;
74dc3e94 847 enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg;
e075ae69 848};
b08de47e 849
b96a374d
AJ
850static int ix86_decompose_address (rtx, struct ix86_address *);
851static int ix86_address_cost (rtx);
852static bool ix86_cannot_force_const_mem (rtx);
853static rtx ix86_delegitimize_address (rtx);
bd793c65
BS
854
855struct builtin_description;
b96a374d
AJ
856static rtx ix86_expand_sse_comi (const struct builtin_description *,
857 tree, rtx);
858static rtx ix86_expand_sse_compare (const struct builtin_description *,
859 tree, rtx);
860static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
861static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
862static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
863static rtx ix86_expand_store_builtin (enum insn_code, tree);
864static rtx safe_vector_operand (rtx, enum machine_mode);
865static enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code);
866static void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
867 enum rtx_code *, enum rtx_code *);
868static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
869static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
870static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
871static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
872static int ix86_fp_comparison_cost (enum rtx_code code);
873static unsigned int ix86_select_alt_pic_regnum (void);
874static int ix86_save_reg (unsigned int, int);
875static void ix86_compute_frame_layout (struct ix86_frame *);
876static int ix86_comp_type_attributes (tree, tree);
e767b5be 877static int ix86_function_regparm (tree, tree);
91d231cb 878const struct attribute_spec ix86_attribute_table[];
b96a374d
AJ
879static bool ix86_function_ok_for_sibcall (tree, tree);
880static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
881static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
882static int ix86_value_regno (enum machine_mode);
883static bool contains_128bit_aligned_vector_p (tree);
884static bool ix86_ms_bitfield_layout_p (tree);
885static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
886static int extended_reg_mentioned_1 (rtx *, void *);
887static bool ix86_rtx_costs (rtx, int, int, int *);
888static int min_insn_size (rtx);
7c262518 889
21c318ba 890#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
b96a374d 891static void ix86_svr3_asm_out_constructor (rtx, int);
2cc07db4 892#endif
e56feed6 893
53c17031
JH
894/* Register class used for passing given 64bit part of the argument.
895 These represent classes as documented by the PS ABI, with the exception
896 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
d1f87653 897 use SF or DFmode move instead of DImode to avoid reformatting penalties.
53c17031 898
d1f87653 899 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
53c17031
JH
900 whenever possible (upper half does contain padding).
901 */
902enum x86_64_reg_class
903 {
904 X86_64_NO_CLASS,
905 X86_64_INTEGER_CLASS,
906 X86_64_INTEGERSI_CLASS,
907 X86_64_SSE_CLASS,
908 X86_64_SSESF_CLASS,
909 X86_64_SSEDF_CLASS,
910 X86_64_SSEUP_CLASS,
911 X86_64_X87_CLASS,
912 X86_64_X87UP_CLASS,
913 X86_64_MEMORY_CLASS
914 };
0b5826ac 915static const char * const x86_64_reg_class_name[] =
53c17031
JH
916 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
917
918#define MAX_CLASSES 4
b96a374d
AJ
919static int classify_argument (enum machine_mode, tree,
920 enum x86_64_reg_class [MAX_CLASSES], int);
921static int examine_argument (enum machine_mode, tree, int, int *, int *);
922static rtx construct_container (enum machine_mode, tree, int, int, int,
923 const int *, int);
924static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
925 enum x86_64_reg_class);
881b2a96 926
43f3a59d 927/* Table of constants used by fldpi, fldln2, etc.... */
881b2a96
RS
928static REAL_VALUE_TYPE ext_80387_constants_table [5];
929static bool ext_80387_constants_init = 0;
b96a374d 930static void init_ext_80387_constants (void);
672a6f42
NB
931\f
932/* Initialize the GCC target structure. */
91d231cb
JM
933#undef TARGET_ATTRIBUTE_TABLE
934#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
672a6f42 935#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
f5f4be42
NB
936# undef TARGET_MERGE_DECL_ATTRIBUTES
937# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
672a6f42
NB
938#endif
939
8d8e52be
JM
940#undef TARGET_COMP_TYPE_ATTRIBUTES
941#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
942
f6155fda
SS
943#undef TARGET_INIT_BUILTINS
944#define TARGET_INIT_BUILTINS ix86_init_builtins
945
946#undef TARGET_EXPAND_BUILTIN
947#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
948
bd09bdeb
RH
949#undef TARGET_ASM_FUNCTION_EPILOGUE
950#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
08c148a8 951
17b53c33
NB
952#undef TARGET_ASM_OPEN_PAREN
953#define TARGET_ASM_OPEN_PAREN ""
954#undef TARGET_ASM_CLOSE_PAREN
955#define TARGET_ASM_CLOSE_PAREN ""
956
301d03af
RS
957#undef TARGET_ASM_ALIGNED_HI_OP
958#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
959#undef TARGET_ASM_ALIGNED_SI_OP
960#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
961#ifdef ASM_QUAD
962#undef TARGET_ASM_ALIGNED_DI_OP
963#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
964#endif
965
966#undef TARGET_ASM_UNALIGNED_HI_OP
967#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
968#undef TARGET_ASM_UNALIGNED_SI_OP
969#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
970#undef TARGET_ASM_UNALIGNED_DI_OP
971#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
972
c237e94a
ZW
973#undef TARGET_SCHED_ADJUST_COST
974#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
975#undef TARGET_SCHED_ISSUE_RATE
976#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
977#undef TARGET_SCHED_VARIABLE_ISSUE
978#define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
979#undef TARGET_SCHED_INIT
980#define TARGET_SCHED_INIT ix86_sched_init
981#undef TARGET_SCHED_REORDER
982#define TARGET_SCHED_REORDER ix86_sched_reorder
fce5a9f2 983#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
9b690711
RH
984#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
985 ia32_use_dfa_pipeline_interface
986#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
987#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
988 ia32_multipass_dfa_lookahead
c237e94a 989
4977bab6
ZW
990#undef TARGET_FUNCTION_OK_FOR_SIBCALL
991#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
992
f996902d
RH
993#ifdef HAVE_AS_TLS
994#undef TARGET_HAVE_TLS
995#define TARGET_HAVE_TLS true
996#endif
3a04ff64
RH
997#undef TARGET_CANNOT_FORCE_CONST_MEM
998#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
f996902d 999
7daebb7a 1000#undef TARGET_DELEGITIMIZE_ADDRESS
69bd9368 1001#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
7daebb7a 1002
4977bab6
ZW
1003#undef TARGET_MS_BITFIELD_LAYOUT_P
1004#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1005
c590b625
RH
1006#undef TARGET_ASM_OUTPUT_MI_THUNK
1007#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
3961e8fe
RH
1008#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1009#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
c590b625 1010
1bc7c5b6
ZW
1011#undef TARGET_ASM_FILE_START
1012#define TARGET_ASM_FILE_START x86_file_start
1013
3c50106f
RH
1014#undef TARGET_RTX_COSTS
1015#define TARGET_RTX_COSTS ix86_rtx_costs
dcefdf67
RH
1016#undef TARGET_ADDRESS_COST
1017#define TARGET_ADDRESS_COST ix86_address_cost
3c50106f 1018
e129d93a
ILT
1019#undef TARGET_FIXED_CONDITION_CODE_REGS
1020#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1021#undef TARGET_CC_MODES_COMPATIBLE
1022#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1023
18dbd950
RS
1024#undef TARGET_MACHINE_DEPENDENT_REORG
1025#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1026
c35d187f
RH
1027#undef TARGET_BUILD_BUILTIN_VA_LIST
1028#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1029
9184f892
KH
1030#undef TARGET_PROMOTE_PROTOTYPES
1031#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1032
a0524eb3
KH
1033#undef TARGET_SETUP_INCOMING_VARARGS
1034#define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1035
f6897b10 1036struct gcc_target targetm = TARGET_INITIALIZER;
e075ae69 1037\f
67c2b45f
JS
1038/* The svr4 ABI for the i386 says that records and unions are returned
1039 in memory. */
1040#ifndef DEFAULT_PCC_STRUCT_RETURN
1041#define DEFAULT_PCC_STRUCT_RETURN 1
1042#endif
1043
f5316dfe
MM
1044/* Sometimes certain combinations of command options do not make
1045 sense on a particular target machine. You can define a macro
1046 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1047 defined, is executed once just after all the command options have
1048 been parsed.
1049
1050 Don't use this macro to turn on various extra optimizations for
1051 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1052
1053void
b96a374d 1054override_options (void)
f5316dfe 1055{
400500c4 1056 int i;
e075ae69
RH
1057 /* Comes from final.c -- no real reason to change it. */
1058#define MAX_CODE_ALIGN 16
f5316dfe 1059
c8c5cb99
SC
1060 static struct ptt
1061 {
8b60264b
KG
1062 const struct processor_costs *cost; /* Processor costs */
1063 const int target_enable; /* Target flags to enable. */
1064 const int target_disable; /* Target flags to disable. */
1065 const int align_loop; /* Default alignments. */
2cca7283 1066 const int align_loop_max_skip;
8b60264b 1067 const int align_jump;
2cca7283 1068 const int align_jump_max_skip;
8b60264b 1069 const int align_func;
e075ae69 1070 }
0f290768 1071 const processor_target_table[PROCESSOR_max] =
e075ae69 1072 {
4977bab6
ZW
1073 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1074 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1075 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1076 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1077 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1078 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1079 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1080 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
e075ae69
RH
1081 };
1082
f4365627 1083 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
e075ae69
RH
1084 static struct pta
1085 {
8b60264b
KG
1086 const char *const name; /* processor name or nickname. */
1087 const enum processor_type processor;
0dd0e980
JH
1088 const enum pta_flags
1089 {
1090 PTA_SSE = 1,
1091 PTA_SSE2 = 2,
1092 PTA_MMX = 4,
f4365627 1093 PTA_PREFETCH_SSE = 8,
0dd0e980 1094 PTA_3DNOW = 16,
4977bab6
ZW
1095 PTA_3DNOW_A = 64,
1096 PTA_64BIT = 128
0dd0e980 1097 } flags;
e075ae69 1098 }
0f290768 1099 const processor_alias_table[] =
e075ae69 1100 {
0dd0e980
JH
1101 {"i386", PROCESSOR_I386, 0},
1102 {"i486", PROCESSOR_I486, 0},
1103 {"i586", PROCESSOR_PENTIUM, 0},
1104 {"pentium", PROCESSOR_PENTIUM, 0},
1105 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
3780101d
JG
1106 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1107 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1108 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
3462df62 1109 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
0dd0e980
JH
1110 {"i686", PROCESSOR_PENTIUMPRO, 0},
1111 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1112 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
f4365627 1113 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
0dd0e980 1114 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
f4365627 1115 PTA_MMX | PTA_PREFETCH_SSE},
0dd0e980
JH
1116 {"k6", PROCESSOR_K6, PTA_MMX},
1117 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1118 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
f4365627 1119 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1120 | PTA_3DNOW_A},
f4365627 1121 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
0dd0e980 1122 | PTA_3DNOW | PTA_3DNOW_A},
f4365627 1123 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1124 | PTA_3DNOW_A | PTA_SSE},
f4365627 1125 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1126 | PTA_3DNOW_A | PTA_SSE},
f4365627 1127 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1128 | PTA_3DNOW_A | PTA_SSE},
3fec9fa9
JJ
1129 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1130 | PTA_SSE | PTA_SSE2 },
4977bab6
ZW
1131 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1132 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
9a609388
JH
1133 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1134 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1135 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1136 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1137 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1138 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
3af4bd89 1139 };
c8c5cb99 1140
ca7558fc 1141 int const pta_size = ARRAY_SIZE (processor_alias_table);
c8c5cb99 1142
41ed2237 1143 /* Set the default values for switches whose default depends on TARGET_64BIT
d1f87653 1144 in case they weren't overwritten by command line options. */
55ba61f3
JH
1145 if (TARGET_64BIT)
1146 {
1147 if (flag_omit_frame_pointer == 2)
1148 flag_omit_frame_pointer = 1;
1149 if (flag_asynchronous_unwind_tables == 2)
1150 flag_asynchronous_unwind_tables = 1;
1151 if (flag_pcc_struct_return == 2)
1152 flag_pcc_struct_return = 0;
1153 }
1154 else
1155 {
1156 if (flag_omit_frame_pointer == 2)
1157 flag_omit_frame_pointer = 0;
1158 if (flag_asynchronous_unwind_tables == 2)
1159 flag_asynchronous_unwind_tables = 0;
1160 if (flag_pcc_struct_return == 2)
7c712dcc 1161 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
55ba61f3
JH
1162 }
1163
f5316dfe
MM
1164#ifdef SUBTARGET_OVERRIDE_OPTIONS
1165 SUBTARGET_OVERRIDE_OPTIONS;
1166#endif
1167
9e555526
RH
1168 if (!ix86_tune_string && ix86_arch_string)
1169 ix86_tune_string = ix86_arch_string;
1170 if (!ix86_tune_string)
1171 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
f4365627 1172 if (!ix86_arch_string)
3fec9fa9 1173 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
e075ae69 1174
6189a572
JH
1175 if (ix86_cmodel_string != 0)
1176 {
1177 if (!strcmp (ix86_cmodel_string, "small"))
1178 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1179 else if (flag_pic)
c725bd79 1180 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
6189a572
JH
1181 else if (!strcmp (ix86_cmodel_string, "32"))
1182 ix86_cmodel = CM_32;
1183 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1184 ix86_cmodel = CM_KERNEL;
1185 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1186 ix86_cmodel = CM_MEDIUM;
1187 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1188 ix86_cmodel = CM_LARGE;
1189 else
1190 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1191 }
1192 else
1193 {
1194 ix86_cmodel = CM_32;
1195 if (TARGET_64BIT)
1196 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1197 }
c93e80a5
JH
1198 if (ix86_asm_string != 0)
1199 {
1200 if (!strcmp (ix86_asm_string, "intel"))
1201 ix86_asm_dialect = ASM_INTEL;
1202 else if (!strcmp (ix86_asm_string, "att"))
1203 ix86_asm_dialect = ASM_ATT;
1204 else
1205 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1206 }
6189a572 1207 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
c725bd79 1208 error ("code model `%s' not supported in the %s bit mode",
6189a572
JH
1209 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1210 if (ix86_cmodel == CM_LARGE)
c725bd79 1211 sorry ("code model `large' not supported yet");
0c2dc519 1212 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
c725bd79 1213 sorry ("%i-bit mode not compiled in",
0c2dc519 1214 (target_flags & MASK_64BIT) ? 64 : 32);
6189a572 1215
f4365627
JH
1216 for (i = 0; i < pta_size; i++)
1217 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1218 {
1219 ix86_arch = processor_alias_table[i].processor;
1220 /* Default cpu tuning to the architecture. */
9e555526 1221 ix86_tune = ix86_arch;
f4365627 1222 if (processor_alias_table[i].flags & PTA_MMX
9ef1b13a 1223 && !(target_flags_explicit & MASK_MMX))
f4365627
JH
1224 target_flags |= MASK_MMX;
1225 if (processor_alias_table[i].flags & PTA_3DNOW
9ef1b13a 1226 && !(target_flags_explicit & MASK_3DNOW))
f4365627
JH
1227 target_flags |= MASK_3DNOW;
1228 if (processor_alias_table[i].flags & PTA_3DNOW_A
9ef1b13a 1229 && !(target_flags_explicit & MASK_3DNOW_A))
f4365627
JH
1230 target_flags |= MASK_3DNOW_A;
1231 if (processor_alias_table[i].flags & PTA_SSE
9ef1b13a 1232 && !(target_flags_explicit & MASK_SSE))
f4365627
JH
1233 target_flags |= MASK_SSE;
1234 if (processor_alias_table[i].flags & PTA_SSE2
9ef1b13a 1235 && !(target_flags_explicit & MASK_SSE2))
f4365627
JH
1236 target_flags |= MASK_SSE2;
1237 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1238 x86_prefetch_sse = true;
4977bab6
ZW
1239 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1240 error ("CPU you selected does not support x86-64 instruction set");
f4365627
JH
1241 break;
1242 }
400500c4 1243
f4365627
JH
1244 if (i == pta_size)
1245 error ("bad value (%s) for -march= switch", ix86_arch_string);
e075ae69 1246
f4365627 1247 for (i = 0; i < pta_size; i++)
9e555526 1248 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
f4365627 1249 {
9e555526 1250 ix86_tune = processor_alias_table[i].processor;
4977bab6
ZW
1251 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1252 error ("CPU you selected does not support x86-64 instruction set");
f4365627
JH
1253 break;
1254 }
1255 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1256 x86_prefetch_sse = true;
1257 if (i == pta_size)
9e555526 1258 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
e075ae69 1259
2ab0437e
JH
1260 if (optimize_size)
1261 ix86_cost = &size_cost;
1262 else
9e555526
RH
1263 ix86_cost = processor_target_table[ix86_tune].cost;
1264 target_flags |= processor_target_table[ix86_tune].target_enable;
1265 target_flags &= ~processor_target_table[ix86_tune].target_disable;
e075ae69 1266
36edd3cc
BS
1267 /* Arrange to set up i386_stack_locals for all functions. */
1268 init_machine_status = ix86_init_machine_status;
fce5a9f2 1269
0f290768 1270 /* Validate -mregparm= value. */
e075ae69 1271 if (ix86_regparm_string)
b08de47e 1272 {
400500c4
RK
1273 i = atoi (ix86_regparm_string);
1274 if (i < 0 || i > REGPARM_MAX)
1275 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1276 else
1277 ix86_regparm = i;
b08de47e 1278 }
0d7d98ee
JH
1279 else
1280 if (TARGET_64BIT)
1281 ix86_regparm = REGPARM_MAX;
b08de47e 1282
3e18fdf6 1283 /* If the user has provided any of the -malign-* options,
a4f31c00 1284 warn and use that value only if -falign-* is not set.
3e18fdf6 1285 Remove this code in GCC 3.2 or later. */
e075ae69 1286 if (ix86_align_loops_string)
b08de47e 1287 {
3e18fdf6
GK
1288 warning ("-malign-loops is obsolete, use -falign-loops");
1289 if (align_loops == 0)
1290 {
1291 i = atoi (ix86_align_loops_string);
1292 if (i < 0 || i > MAX_CODE_ALIGN)
1293 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1294 else
1295 align_loops = 1 << i;
1296 }
b08de47e 1297 }
3af4bd89 1298
e075ae69 1299 if (ix86_align_jumps_string)
b08de47e 1300 {
3e18fdf6
GK
1301 warning ("-malign-jumps is obsolete, use -falign-jumps");
1302 if (align_jumps == 0)
1303 {
1304 i = atoi (ix86_align_jumps_string);
1305 if (i < 0 || i > MAX_CODE_ALIGN)
1306 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1307 else
1308 align_jumps = 1 << i;
1309 }
b08de47e 1310 }
b08de47e 1311
e075ae69 1312 if (ix86_align_funcs_string)
b08de47e 1313 {
3e18fdf6
GK
1314 warning ("-malign-functions is obsolete, use -falign-functions");
1315 if (align_functions == 0)
1316 {
1317 i = atoi (ix86_align_funcs_string);
1318 if (i < 0 || i > MAX_CODE_ALIGN)
1319 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1320 else
1321 align_functions = 1 << i;
1322 }
b08de47e 1323 }
3af4bd89 1324
3e18fdf6 1325 /* Default align_* from the processor table. */
3e18fdf6 1326 if (align_loops == 0)
2cca7283 1327 {
9e555526
RH
1328 align_loops = processor_target_table[ix86_tune].align_loop;
1329 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2cca7283 1330 }
3e18fdf6 1331 if (align_jumps == 0)
2cca7283 1332 {
9e555526
RH
1333 align_jumps = processor_target_table[ix86_tune].align_jump;
1334 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2cca7283 1335 }
3e18fdf6 1336 if (align_functions == 0)
2cca7283 1337 {
9e555526 1338 align_functions = processor_target_table[ix86_tune].align_func;
2cca7283 1339 }
3e18fdf6 1340
e4c0478d 1341 /* Validate -mpreferred-stack-boundary= value, or provide default.
fbb83b43
AO
1342 The default of 128 bits is for Pentium III's SSE __m128, but we
1343 don't want additional code to keep the stack aligned when
1344 optimizing for code size. */
1345 ix86_preferred_stack_boundary = (optimize_size
ef49d42e 1346 ? TARGET_64BIT ? 128 : 32
fbb83b43 1347 : 128);
e075ae69 1348 if (ix86_preferred_stack_boundary_string)
3af4bd89 1349 {
400500c4 1350 i = atoi (ix86_preferred_stack_boundary_string);
ef49d42e 1351 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
c6257c5d 1352 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
ef49d42e 1353 TARGET_64BIT ? 4 : 2);
400500c4
RK
1354 else
1355 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3af4bd89 1356 }
77a989d1 1357
0f290768 1358 /* Validate -mbranch-cost= value, or provide default. */
9e555526 1359 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
e075ae69 1360 if (ix86_branch_cost_string)
804a8ee0 1361 {
400500c4
RK
1362 i = atoi (ix86_branch_cost_string);
1363 if (i < 0 || i > 5)
1364 error ("-mbranch-cost=%d is not between 0 and 5", i);
1365 else
1366 ix86_branch_cost = i;
804a8ee0 1367 }
804a8ee0 1368
f996902d
RH
1369 if (ix86_tls_dialect_string)
1370 {
1371 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1372 ix86_tls_dialect = TLS_DIALECT_GNU;
1373 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1374 ix86_tls_dialect = TLS_DIALECT_SUN;
1375 else
1376 error ("bad value (%s) for -mtls-dialect= switch",
1377 ix86_tls_dialect_string);
1378 }
1379
e9a25f70
JL
1380 /* Keep nonleaf frame pointers. */
1381 if (TARGET_OMIT_LEAF_FRAME_POINTER)
77a989d1 1382 flag_omit_frame_pointer = 1;
e075ae69
RH
1383
1384 /* If we're doing fast math, we don't care about comparison order
1385 wrt NaNs. This lets us use a shorter comparison sequence. */
de6c5979 1386 if (flag_unsafe_math_optimizations)
e075ae69
RH
1387 target_flags &= ~MASK_IEEE_FP;
1388
30c99a84
RH
1389 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1390 since the insns won't need emulation. */
1391 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1392 target_flags &= ~MASK_NO_FANCY_MATH_387;
1393
9e200aaf
KC
1394 /* Turn on SSE2 builtins for -msse3. */
1395 if (TARGET_SSE3)
22c7c85e
L
1396 target_flags |= MASK_SSE2;
1397
1398 /* Turn on SSE builtins for -msse2. */
1399 if (TARGET_SSE2)
1400 target_flags |= MASK_SSE;
1401
14f73b5a
JH
1402 if (TARGET_64BIT)
1403 {
1404 if (TARGET_ALIGN_DOUBLE)
c725bd79 1405 error ("-malign-double makes no sense in the 64bit mode");
14f73b5a 1406 if (TARGET_RTD)
c725bd79 1407 error ("-mrtd calling convention not supported in the 64bit mode");
14f73b5a 1408 /* Enable by default the SSE and MMX builtins. */
965f5423
JH
1409 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1410 ix86_fpmath = FPMATH_SSE;
14f73b5a 1411 }
965f5423 1412 else
a5b378d6
JH
1413 {
1414 ix86_fpmath = FPMATH_387;
1415 /* i386 ABI does not specify red zone. It still makes sense to use it
1416 when programmer takes care to stack from being destroyed. */
1417 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1418 target_flags |= MASK_NO_RED_ZONE;
1419 }
965f5423
JH
1420
1421 if (ix86_fpmath_string != 0)
1422 {
1423 if (! strcmp (ix86_fpmath_string, "387"))
1424 ix86_fpmath = FPMATH_387;
1425 else if (! strcmp (ix86_fpmath_string, "sse"))
1426 {
1427 if (!TARGET_SSE)
1428 {
1429 warning ("SSE instruction set disabled, using 387 arithmetics");
1430 ix86_fpmath = FPMATH_387;
1431 }
1432 else
1433 ix86_fpmath = FPMATH_SSE;
1434 }
1435 else if (! strcmp (ix86_fpmath_string, "387,sse")
1436 || ! strcmp (ix86_fpmath_string, "sse,387"))
1437 {
1438 if (!TARGET_SSE)
1439 {
1440 warning ("SSE instruction set disabled, using 387 arithmetics");
1441 ix86_fpmath = FPMATH_387;
1442 }
1443 else if (!TARGET_80387)
1444 {
1445 warning ("387 instruction set disabled, using SSE arithmetics");
1446 ix86_fpmath = FPMATH_SSE;
1447 }
1448 else
1449 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1450 }
fce5a9f2 1451 else
965f5423
JH
1452 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1453 }
14f73b5a 1454
a7180f70
BS
1455 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1456 on by -msse. */
1457 if (TARGET_SSE)
e37af218
RH
1458 {
1459 target_flags |= MASK_MMX;
1460 x86_prefetch_sse = true;
1461 }
c6036a37 1462
47f339cf
BS
1463 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1464 if (TARGET_3DNOW)
1465 {
1466 target_flags |= MASK_MMX;
d1f87653 1467 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
47f339cf
BS
1468 extensions it adds. */
1469 if (x86_3dnow_a & (1 << ix86_arch))
1470 target_flags |= MASK_3DNOW_A;
1471 }
9e555526 1472 if ((x86_accumulate_outgoing_args & TUNEMASK)
9ef1b13a 1473 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
c6036a37
JH
1474 && !optimize_size)
1475 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
623fe810
RH
1476
1477 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1478 {
1479 char *p;
1480 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1481 p = strchr (internal_label_prefix, 'X');
1482 internal_label_prefix_len = p - internal_label_prefix;
1483 *p = '\0';
1484 }
f5316dfe
MM
1485}
1486\f
32b5b1aa 1487void
b96a374d 1488optimization_options (int level, int size ATTRIBUTE_UNUSED)
32b5b1aa 1489{
e9a25f70
JL
1490 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1491 make the problem with not enough registers even worse. */
32b5b1aa
SC
1492#ifdef INSN_SCHEDULING
1493 if (level > 1)
1494 flag_schedule_insns = 0;
1495#endif
55ba61f3
JH
1496
1497 /* The default values of these switches depend on the TARGET_64BIT
1498 that is not known at this moment. Mark these values with 2 and
1499 let user the to override these. In case there is no command line option
1500 specifying them, we will set the defaults in override_options. */
1501 if (optimize >= 1)
1502 flag_omit_frame_pointer = 2;
1503 flag_pcc_struct_return = 2;
1504 flag_asynchronous_unwind_tables = 2;
32b5b1aa 1505}
b08de47e 1506\f
91d231cb
JM
1507/* Table of valid machine attributes. */
1508const struct attribute_spec ix86_attribute_table[] =
b08de47e 1509{
91d231cb 1510 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
b08de47e
MM
1511 /* Stdcall attribute says callee is responsible for popping arguments
1512 if they are not variable. */
91d231cb 1513 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
e91f04de
CH
1514 /* Fastcall attribute says callee is responsible for popping arguments
1515 if they are not variable. */
1516 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
91d231cb
JM
1517 /* Cdecl attribute says the callee is a normal C declaration */
1518 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
b08de47e 1519 /* Regparm attribute specifies how many integer arguments are to be
0f290768 1520 passed in registers. */
91d231cb
JM
1521 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1522#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
3da1eb0b
DS
1523 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1524 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1525 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
91d231cb 1526#endif
fe77449a
DR
1527 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1528 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
91d231cb
JM
1529 { NULL, 0, 0, false, false, false, NULL }
1530};
1531
5fbf0217
EB
1532/* Decide whether we can make a sibling call to a function. DECL is the
1533 declaration of the function being targeted by the call and EXP is the
1534 CALL_EXPR representing the call. */
4977bab6
ZW
1535
1536static bool
b96a374d 1537ix86_function_ok_for_sibcall (tree decl, tree exp)
4977bab6
ZW
1538{
1539 /* If we are generating position-independent code, we cannot sibcall
1540 optimize any indirect call, or a direct call to a global function,
1541 as the PLT requires %ebx be live. */
1542 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1543 return false;
1544
1545 /* If we are returning floats on the 80387 register stack, we cannot
1546 make a sibcall from a function that doesn't return a float to a
5fbf0217
EB
1547 function that does or, conversely, from a function that does return
1548 a float to a function that doesn't; the necessary stack adjustment
1549 would not be executed. */
4977bab6 1550 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
5fbf0217 1551 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
4977bab6
ZW
1552 return false;
1553
1554 /* If this call is indirect, we'll need to be able to use a call-clobbered
b96a374d 1555 register for the address of the target function. Make sure that all
4977bab6
ZW
1556 such registers are not used for passing parameters. */
1557 if (!decl && !TARGET_64BIT)
1558 {
e767b5be 1559 tree type;
4977bab6
ZW
1560
1561 /* We're looking at the CALL_EXPR, we need the type of the function. */
1562 type = TREE_OPERAND (exp, 0); /* pointer expression */
1563 type = TREE_TYPE (type); /* pointer type */
1564 type = TREE_TYPE (type); /* function type */
1565
e767b5be 1566 if (ix86_function_regparm (type, NULL) >= 3)
4977bab6
ZW
1567 {
1568 /* ??? Need to count the actual number of registers to be used,
1569 not the possible number of registers. Fix later. */
1570 return false;
1571 }
1572 }
1573
1574 /* Otherwise okay. That also includes certain types of indirect calls. */
1575 return true;
1576}
1577
e91f04de 1578/* Handle a "cdecl", "stdcall", or "fastcall" attribute;
91d231cb
JM
1579 arguments as in struct attribute_spec.handler. */
1580static tree
b96a374d
AJ
1581ix86_handle_cdecl_attribute (tree *node, tree name,
1582 tree args ATTRIBUTE_UNUSED,
1583 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
91d231cb
JM
1584{
1585 if (TREE_CODE (*node) != FUNCTION_TYPE
1586 && TREE_CODE (*node) != METHOD_TYPE
1587 && TREE_CODE (*node) != FIELD_DECL
1588 && TREE_CODE (*node) != TYPE_DECL)
b08de47e 1589 {
91d231cb
JM
1590 warning ("`%s' attribute only applies to functions",
1591 IDENTIFIER_POINTER (name));
1592 *no_add_attrs = true;
1593 }
e91f04de
CH
1594 else
1595 {
1596 if (is_attribute_p ("fastcall", name))
1597 {
1598 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1599 {
1600 error ("fastcall and stdcall attributes are not compatible");
1601 }
1602 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1603 {
1604 error ("fastcall and regparm attributes are not compatible");
1605 }
1606 }
1607 else if (is_attribute_p ("stdcall", name))
1608 {
1609 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1610 {
1611 error ("fastcall and stdcall attributes are not compatible");
1612 }
1613 }
1614 }
b08de47e 1615
91d231cb
JM
1616 if (TARGET_64BIT)
1617 {
1618 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1619 *no_add_attrs = true;
1620 }
b08de47e 1621
91d231cb
JM
1622 return NULL_TREE;
1623}
b08de47e 1624
91d231cb
JM
1625/* Handle a "regparm" attribute;
1626 arguments as in struct attribute_spec.handler. */
1627static tree
b96a374d
AJ
1628ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1629 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
91d231cb
JM
1630{
1631 if (TREE_CODE (*node) != FUNCTION_TYPE
1632 && TREE_CODE (*node) != METHOD_TYPE
1633 && TREE_CODE (*node) != FIELD_DECL
1634 && TREE_CODE (*node) != TYPE_DECL)
1635 {
1636 warning ("`%s' attribute only applies to functions",
1637 IDENTIFIER_POINTER (name));
1638 *no_add_attrs = true;
1639 }
1640 else
1641 {
1642 tree cst;
b08de47e 1643
91d231cb
JM
1644 cst = TREE_VALUE (args);
1645 if (TREE_CODE (cst) != INTEGER_CST)
1646 {
1647 warning ("`%s' attribute requires an integer constant argument",
1648 IDENTIFIER_POINTER (name));
1649 *no_add_attrs = true;
1650 }
1651 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1652 {
1653 warning ("argument to `%s' attribute larger than %d",
1654 IDENTIFIER_POINTER (name), REGPARM_MAX);
1655 *no_add_attrs = true;
1656 }
e91f04de
CH
1657
1658 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
e767b5be
JH
1659 {
1660 error ("fastcall and regparm attributes are not compatible");
1661 }
b08de47e
MM
1662 }
1663
91d231cb 1664 return NULL_TREE;
b08de47e
MM
1665}
1666
1667/* Return 0 if the attributes for two types are incompatible, 1 if they
1668 are compatible, and 2 if they are nearly compatible (which causes a
1669 warning to be generated). */
1670
8d8e52be 1671static int
b96a374d 1672ix86_comp_type_attributes (tree type1, tree type2)
b08de47e 1673{
0f290768 1674 /* Check for mismatch of non-default calling convention. */
27c38fbe 1675 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
afcfe58c
MM
1676
1677 if (TREE_CODE (type1) != FUNCTION_TYPE)
1678 return 1;
1679
b96a374d 1680 /* Check for mismatched fastcall types */
e91f04de
CH
1681 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1682 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
b96a374d 1683 return 0;
e91f04de 1684
afcfe58c 1685 /* Check for mismatched return types (cdecl vs stdcall). */
6093f019
RH
1686 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1687 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
afcfe58c 1688 return 0;
b08de47e
MM
1689 return 1;
1690}
b08de47e 1691\f
e767b5be
JH
1692/* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1693 DECL may be NULL when calling function indirectly
839a4992 1694 or considering a libcall. */
483ab821
MM
1695
1696static int
e767b5be 1697ix86_function_regparm (tree type, tree decl)
483ab821
MM
1698{
1699 tree attr;
e767b5be
JH
1700 int regparm = ix86_regparm;
1701 bool user_convention = false;
483ab821 1702
e767b5be
JH
1703 if (!TARGET_64BIT)
1704 {
1705 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1706 if (attr)
1707 {
1708 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1709 user_convention = true;
1710 }
1711
1712 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1713 {
1714 regparm = 2;
1715 user_convention = true;
1716 }
1717
1718 /* Use register calling convention for local functions when possible. */
1719 if (!TARGET_64BIT && !user_convention && decl
cb0bc263 1720 && flag_unit_at_a_time && !profile_flag)
e767b5be
JH
1721 {
1722 struct cgraph_local_info *i = cgraph_local_info (decl);
1723 if (i && i->local)
1724 {
1725 /* We can't use regparm(3) for nested functions as these use
1726 static chain pointer in third argument. */
1727 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1728 regparm = 2;
1729 else
1730 regparm = 3;
1731 }
1732 }
1733 }
1734 return regparm;
483ab821
MM
1735}
1736
fe9f516f
RH
1737/* Return true if EAX is live at the start of the function. Used by
1738 ix86_expand_prologue to determine if we need special help before
1739 calling allocate_stack_worker. */
1740
1741static bool
1742ix86_eax_live_at_start_p (void)
1743{
1744 /* Cheat. Don't bother working forward from ix86_function_regparm
1745 to the function type to whether an actual argument is located in
1746 eax. Instead just look at cfg info, which is still close enough
1747 to correct at this point. This gives false positives for broken
1748 functions that might use uninitialized data that happens to be
1749 allocated in eax, but who cares? */
1750 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1751}
1752
b08de47e
MM
1753/* Value is the number of bytes of arguments automatically
1754 popped when returning from a subroutine call.
1755 FUNDECL is the declaration node of the function (as a tree),
1756 FUNTYPE is the data type of the function (as a tree),
1757 or for a library call it is an identifier node for the subroutine name.
1758 SIZE is the number of bytes of arguments passed on the stack.
1759
1760 On the 80386, the RTD insn may be used to pop them if the number
1761 of args is fixed, but if the number is variable then the caller
1762 must pop them all. RTD can't be used for library calls now
1763 because the library is compiled with the Unix compiler.
1764 Use of RTD is a selectable option, since it is incompatible with
1765 standard Unix calling sequences. If the option is not selected,
1766 the caller must always pop the args.
1767
1768 The attribute stdcall is equivalent to RTD on a per module basis. */
1769
1770int
b96a374d 1771ix86_return_pops_args (tree fundecl, tree funtype, int size)
79325812 1772{
3345ee7d 1773 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
b08de47e 1774
43f3a59d 1775 /* Cdecl functions override -mrtd, and never pop the stack. */
e9a25f70 1776 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
79325812 1777
43f3a59d
KH
1778 /* Stdcall and fastcall functions will pop the stack if not
1779 variable args. */
e91f04de
CH
1780 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1781 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
698cdd84 1782 rtd = 1;
79325812 1783
698cdd84
SC
1784 if (rtd
1785 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
e9a25f70
JL
1786 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1787 == void_type_node)))
698cdd84
SC
1788 return size;
1789 }
79325812 1790
232b8f52 1791 /* Lose any fake structure return argument if it is passed on the stack. */
61f71b34 1792 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
0d7d98ee 1793 && !TARGET_64BIT)
232b8f52 1794 {
e767b5be 1795 int nregs = ix86_function_regparm (funtype, fundecl);
232b8f52
JJ
1796
1797 if (!nregs)
1798 return GET_MODE_SIZE (Pmode);
1799 }
1800
1801 return 0;
b08de47e 1802}
b08de47e
MM
1803\f
1804/* Argument support functions. */
1805
53c17031
JH
1806/* Return true when register may be used to pass function parameters. */
1807bool
b96a374d 1808ix86_function_arg_regno_p (int regno)
53c17031
JH
1809{
1810 int i;
1811 if (!TARGET_64BIT)
0333394e
JJ
1812 return (regno < REGPARM_MAX
1813 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
53c17031
JH
1814 if (SSE_REGNO_P (regno) && TARGET_SSE)
1815 return true;
1816 /* RAX is used as hidden argument to va_arg functions. */
1817 if (!regno)
1818 return true;
1819 for (i = 0; i < REGPARM_MAX; i++)
1820 if (regno == x86_64_int_parameter_registers[i])
1821 return true;
1822 return false;
1823}
1824
b08de47e
MM
1825/* Initialize a variable CUM of type CUMULATIVE_ARGS
1826 for a call to a function whose data type is FNTYPE.
1827 For a library call, FNTYPE is 0. */
1828
1829void
b96a374d
AJ
1830init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1831 tree fntype, /* tree ptr for function decl */
1832 rtx libname, /* SYMBOL_REF of library name or 0 */
1833 tree fndecl)
b08de47e
MM
1834{
1835 static CUMULATIVE_ARGS zero_cum;
1836 tree param, next_param;
1837
1838 if (TARGET_DEBUG_ARG)
1839 {
1840 fprintf (stderr, "\ninit_cumulative_args (");
1841 if (fntype)
e9a25f70
JL
1842 fprintf (stderr, "fntype code = %s, ret code = %s",
1843 tree_code_name[(int) TREE_CODE (fntype)],
1844 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
b08de47e
MM
1845 else
1846 fprintf (stderr, "no fntype");
1847
1848 if (libname)
1849 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1850 }
1851
1852 *cum = zero_cum;
1853
1854 /* Set up the number of registers to use for passing arguments. */
e767b5be
JH
1855 if (fntype)
1856 cum->nregs = ix86_function_regparm (fntype, fndecl);
1857 else
1858 cum->nregs = ix86_regparm;
53c17031 1859 cum->sse_nregs = SSE_REGPARM_MAX;
bcf17554 1860 cum->mmx_nregs = MMX_REGPARM_MAX;
e1be55d0
JH
1861 cum->warn_sse = true;
1862 cum->warn_mmx = true;
53c17031 1863 cum->maybe_vaarg = false;
b08de47e 1864
e91f04de
CH
1865 /* Use ecx and edx registers if function has fastcall attribute */
1866 if (fntype && !TARGET_64BIT)
1867 {
1868 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1869 {
1870 cum->nregs = 2;
1871 cum->fastcall = 1;
1872 }
1873 }
1874
1875
b08de47e
MM
1876 /* Determine if this function has variable arguments. This is
1877 indicated by the last argument being 'void_type_mode' if there
1878 are no variable arguments. If there are variable arguments, then
1879 we won't pass anything in registers */
1880
e1be55d0 1881 if (cum->nregs || !TARGET_MMX || !TARGET_SSE)
b08de47e
MM
1882 {
1883 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
e9a25f70 1884 param != 0; param = next_param)
b08de47e
MM
1885 {
1886 next_param = TREE_CHAIN (param);
e9a25f70 1887 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
53c17031
JH
1888 {
1889 if (!TARGET_64BIT)
e91f04de
CH
1890 {
1891 cum->nregs = 0;
e1be55d0
JH
1892 cum->sse_nregs = 0;
1893 cum->mmx_nregs = 0;
1894 cum->warn_sse = 0;
1895 cum->warn_mmx = 0;
e91f04de
CH
1896 cum->fastcall = 0;
1897 }
53c17031
JH
1898 cum->maybe_vaarg = true;
1899 }
b08de47e
MM
1900 }
1901 }
53c17031
JH
1902 if ((!fntype && !libname)
1903 || (fntype && !TYPE_ARG_TYPES (fntype)))
1904 cum->maybe_vaarg = 1;
b08de47e
MM
1905
1906 if (TARGET_DEBUG_ARG)
1907 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1908
1909 return;
1910}
1911
d1f87653 1912/* x86-64 register passing implementation. See x86-64 ABI for details. Goal
f710504c 1913 of this code is to classify each 8bytes of incoming argument by the register
53c17031
JH
1914 class and assign registers accordingly. */
1915
1916/* Return the union class of CLASS1 and CLASS2.
1917 See the x86-64 PS ABI for details. */
1918
1919static enum x86_64_reg_class
b96a374d 1920merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
53c17031
JH
1921{
1922 /* Rule #1: If both classes are equal, this is the resulting class. */
1923 if (class1 == class2)
1924 return class1;
1925
1926 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1927 the other class. */
1928 if (class1 == X86_64_NO_CLASS)
1929 return class2;
1930 if (class2 == X86_64_NO_CLASS)
1931 return class1;
1932
1933 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1934 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1935 return X86_64_MEMORY_CLASS;
1936
1937 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1938 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1939 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1940 return X86_64_INTEGERSI_CLASS;
1941 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1942 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1943 return X86_64_INTEGER_CLASS;
1944
1945 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1946 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1947 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1948 return X86_64_MEMORY_CLASS;
1949
1950 /* Rule #6: Otherwise class SSE is used. */
1951 return X86_64_SSE_CLASS;
1952}
1953
1954/* Classify the argument of type TYPE and mode MODE.
1955 CLASSES will be filled by the register class used to pass each word
1956 of the operand. The number of words is returned. In case the parameter
1957 should be passed in memory, 0 is returned. As a special case for zero
1958 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1959
1960 BIT_OFFSET is used internally for handling records and specifies offset
1961 of the offset in bits modulo 256 to avoid overflow cases.
1962
1963 See the x86-64 PS ABI for details.
1964*/
1965
1966static int
b96a374d
AJ
1967classify_argument (enum machine_mode mode, tree type,
1968 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
53c17031 1969{
296e4ae8 1970 HOST_WIDE_INT bytes =
53c17031 1971 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
23327dae 1972 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
53c17031 1973
c60ee6f5
JH
1974 /* Variable sized entities are always passed/returned in memory. */
1975 if (bytes < 0)
1976 return 0;
1977
dafc5b82
JH
1978 if (mode != VOIDmode
1979 && MUST_PASS_IN_STACK (mode, type))
1980 return 0;
1981
53c17031
JH
1982 if (type && AGGREGATE_TYPE_P (type))
1983 {
1984 int i;
1985 tree field;
1986 enum x86_64_reg_class subclasses[MAX_CLASSES];
1987
1988 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1989 if (bytes > 16)
1990 return 0;
1991
1992 for (i = 0; i < words; i++)
1993 classes[i] = X86_64_NO_CLASS;
1994
1995 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1996 signalize memory class, so handle it as special case. */
1997 if (!words)
1998 {
1999 classes[0] = X86_64_NO_CLASS;
2000 return 1;
2001 }
2002
2003 /* Classify each field of record and merge classes. */
2004 if (TREE_CODE (type) == RECORD_TYPE)
2005 {
91ea38f9
JH
2006 /* For classes first merge in the field of the subclasses. */
2007 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2008 {
2009 tree bases = TYPE_BINFO_BASETYPES (type);
2010 int n_bases = TREE_VEC_LENGTH (bases);
2011 int i;
2012
2013 for (i = 0; i < n_bases; ++i)
2014 {
2015 tree binfo = TREE_VEC_ELT (bases, i);
2016 int num;
2017 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2018 tree type = BINFO_TYPE (binfo);
2019
2020 num = classify_argument (TYPE_MODE (type),
2021 type, subclasses,
2022 (offset + bit_offset) % 256);
2023 if (!num)
2024 return 0;
2025 for (i = 0; i < num; i++)
2026 {
db01f480 2027 int pos = (offset + (bit_offset % 64)) / 8 / 8;
91ea38f9
JH
2028 classes[i + pos] =
2029 merge_classes (subclasses[i], classes[i + pos]);
2030 }
2031 }
2032 }
43f3a59d 2033 /* And now merge the fields of structure. */
53c17031
JH
2034 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2035 {
2036 if (TREE_CODE (field) == FIELD_DECL)
2037 {
2038 int num;
2039
2040 /* Bitfields are always classified as integer. Handle them
2041 early, since later code would consider them to be
2042 misaligned integers. */
2043 if (DECL_BIT_FIELD (field))
2044 {
2045 for (i = int_bit_position (field) / 8 / 8;
2046 i < (int_bit_position (field)
2047 + tree_low_cst (DECL_SIZE (field), 0)
b96a374d 2048 + 63) / 8 / 8; i++)
53c17031
JH
2049 classes[i] =
2050 merge_classes (X86_64_INTEGER_CLASS,
2051 classes[i]);
2052 }
2053 else
2054 {
2055 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2056 TREE_TYPE (field), subclasses,
2057 (int_bit_position (field)
2058 + bit_offset) % 256);
2059 if (!num)
2060 return 0;
2061 for (i = 0; i < num; i++)
2062 {
2063 int pos =
db01f480 2064 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
53c17031
JH
2065 classes[i + pos] =
2066 merge_classes (subclasses[i], classes[i + pos]);
2067 }
2068 }
2069 }
2070 }
2071 }
2072 /* Arrays are handled as small records. */
2073 else if (TREE_CODE (type) == ARRAY_TYPE)
2074 {
2075 int num;
2076 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2077 TREE_TYPE (type), subclasses, bit_offset);
2078 if (!num)
2079 return 0;
2080
2081 /* The partial classes are now full classes. */
2082 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2083 subclasses[0] = X86_64_SSE_CLASS;
2084 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2085 subclasses[0] = X86_64_INTEGER_CLASS;
2086
2087 for (i = 0; i < words; i++)
2088 classes[i] = subclasses[i % num];
2089 }
2090 /* Unions are similar to RECORD_TYPE but offset is always 0. */
e4dbaed5
AS
2091 else if (TREE_CODE (type) == UNION_TYPE
2092 || TREE_CODE (type) == QUAL_UNION_TYPE)
53c17031 2093 {
91ea38f9
JH
2094 /* For classes first merge in the field of the subclasses. */
2095 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2096 {
2097 tree bases = TYPE_BINFO_BASETYPES (type);
2098 int n_bases = TREE_VEC_LENGTH (bases);
2099 int i;
2100
2101 for (i = 0; i < n_bases; ++i)
2102 {
2103 tree binfo = TREE_VEC_ELT (bases, i);
2104 int num;
2105 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2106 tree type = BINFO_TYPE (binfo);
2107
2108 num = classify_argument (TYPE_MODE (type),
2109 type, subclasses,
db01f480 2110 (offset + (bit_offset % 64)) % 256);
91ea38f9
JH
2111 if (!num)
2112 return 0;
2113 for (i = 0; i < num; i++)
2114 {
c16576e6 2115 int pos = (offset + (bit_offset % 64)) / 8 / 8;
91ea38f9
JH
2116 classes[i + pos] =
2117 merge_classes (subclasses[i], classes[i + pos]);
2118 }
2119 }
2120 }
53c17031
JH
2121 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2122 {
2123 if (TREE_CODE (field) == FIELD_DECL)
2124 {
2125 int num;
2126 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2127 TREE_TYPE (field), subclasses,
2128 bit_offset);
2129 if (!num)
2130 return 0;
2131 for (i = 0; i < num; i++)
2132 classes[i] = merge_classes (subclasses[i], classes[i]);
2133 }
2134 }
2135 }
448ec26c
WH
2136 else if (TREE_CODE (type) == SET_TYPE)
2137 {
2138 if (bytes <= 4)
2139 {
2140 classes[0] = X86_64_INTEGERSI_CLASS;
2141 return 1;
2142 }
2143 else if (bytes <= 8)
2144 {
2145 classes[0] = X86_64_INTEGER_CLASS;
2146 return 1;
2147 }
2148 else if (bytes <= 12)
2149 {
2150 classes[0] = X86_64_INTEGER_CLASS;
2151 classes[1] = X86_64_INTEGERSI_CLASS;
2152 return 2;
2153 }
2154 else
2155 {
2156 classes[0] = X86_64_INTEGER_CLASS;
2157 classes[1] = X86_64_INTEGER_CLASS;
2158 return 2;
2159 }
2160 }
53c17031
JH
2161 else
2162 abort ();
2163
2164 /* Final merger cleanup. */
2165 for (i = 0; i < words; i++)
2166 {
2167 /* If one class is MEMORY, everything should be passed in
2168 memory. */
2169 if (classes[i] == X86_64_MEMORY_CLASS)
2170 return 0;
2171
d6a7951f 2172 /* The X86_64_SSEUP_CLASS should be always preceded by
53c17031
JH
2173 X86_64_SSE_CLASS. */
2174 if (classes[i] == X86_64_SSEUP_CLASS
2175 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2176 classes[i] = X86_64_SSE_CLASS;
2177
d6a7951f 2178 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
53c17031
JH
2179 if (classes[i] == X86_64_X87UP_CLASS
2180 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2181 classes[i] = X86_64_SSE_CLASS;
2182 }
2183 return words;
2184 }
2185
2186 /* Compute alignment needed. We align all types to natural boundaries with
2187 exception of XFmode that is aligned to 64bits. */
2188 if (mode != VOIDmode && mode != BLKmode)
2189 {
2190 int mode_alignment = GET_MODE_BITSIZE (mode);
2191
2192 if (mode == XFmode)
2193 mode_alignment = 128;
2194 else if (mode == XCmode)
2195 mode_alignment = 256;
f5143c46 2196 /* Misaligned fields are always returned in memory. */
53c17031
JH
2197 if (bit_offset % mode_alignment)
2198 return 0;
2199 }
2200
2201 /* Classification of atomic types. */
2202 switch (mode)
2203 {
2204 case DImode:
2205 case SImode:
2206 case HImode:
2207 case QImode:
2208 case CSImode:
2209 case CHImode:
2210 case CQImode:
2211 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2212 classes[0] = X86_64_INTEGERSI_CLASS;
2213 else
2214 classes[0] = X86_64_INTEGER_CLASS;
2215 return 1;
2216 case CDImode:
2217 case TImode:
2218 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2219 return 2;
2220 case CTImode:
2221 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2222 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2223 return 4;
2224 case SFmode:
2225 if (!(bit_offset % 64))
2226 classes[0] = X86_64_SSESF_CLASS;
2227 else
2228 classes[0] = X86_64_SSE_CLASS;
2229 return 1;
2230 case DFmode:
2231 classes[0] = X86_64_SSEDF_CLASS;
2232 return 1;
f8a1ebc6 2233 case XFmode:
53c17031
JH
2234 classes[0] = X86_64_X87_CLASS;
2235 classes[1] = X86_64_X87UP_CLASS;
2236 return 2;
f8a1ebc6 2237 case TFmode:
cf2348cb
JH
2238 case TCmode:
2239 return 0;
f8a1ebc6 2240 case XCmode:
53c17031
JH
2241 classes[0] = X86_64_X87_CLASS;
2242 classes[1] = X86_64_X87UP_CLASS;
2243 classes[2] = X86_64_X87_CLASS;
2244 classes[3] = X86_64_X87UP_CLASS;
2245 return 4;
2246 case DCmode:
2247 classes[0] = X86_64_SSEDF_CLASS;
2248 classes[1] = X86_64_SSEDF_CLASS;
2249 return 2;
2250 case SCmode:
2251 classes[0] = X86_64_SSE_CLASS;
2252 return 1;
e95d6b23
JH
2253 case V4SFmode:
2254 case V4SImode:
495333a6
JH
2255 case V16QImode:
2256 case V8HImode:
2257 case V2DFmode:
2258 case V2DImode:
e95d6b23
JH
2259 classes[0] = X86_64_SSE_CLASS;
2260 classes[1] = X86_64_SSEUP_CLASS;
2261 return 2;
2262 case V2SFmode:
2263 case V2SImode:
2264 case V4HImode:
2265 case V8QImode:
1194ca05 2266 return 0;
53c17031 2267 case BLKmode:
e95d6b23 2268 case VOIDmode:
53c17031
JH
2269 return 0;
2270 default:
2271 abort ();
2272 }
2273}
2274
2275/* Examine the argument and return set number of register required in each
f5143c46 2276 class. Return 0 iff parameter should be passed in memory. */
53c17031 2277static int
b96a374d
AJ
2278examine_argument (enum machine_mode mode, tree type, int in_return,
2279 int *int_nregs, int *sse_nregs)
53c17031
JH
2280{
2281 enum x86_64_reg_class class[MAX_CLASSES];
2282 int n = classify_argument (mode, type, class, 0);
2283
2284 *int_nregs = 0;
2285 *sse_nregs = 0;
2286 if (!n)
2287 return 0;
2288 for (n--; n >= 0; n--)
2289 switch (class[n])
2290 {
2291 case X86_64_INTEGER_CLASS:
2292 case X86_64_INTEGERSI_CLASS:
2293 (*int_nregs)++;
2294 break;
2295 case X86_64_SSE_CLASS:
2296 case X86_64_SSESF_CLASS:
2297 case X86_64_SSEDF_CLASS:
2298 (*sse_nregs)++;
2299 break;
2300 case X86_64_NO_CLASS:
2301 case X86_64_SSEUP_CLASS:
2302 break;
2303 case X86_64_X87_CLASS:
2304 case X86_64_X87UP_CLASS:
2305 if (!in_return)
2306 return 0;
2307 break;
2308 case X86_64_MEMORY_CLASS:
2309 abort ();
2310 }
2311 return 1;
2312}
2313/* Construct container for the argument used by GCC interface. See
2314 FUNCTION_ARG for the detailed description. */
2315static rtx
b96a374d
AJ
2316construct_container (enum machine_mode mode, tree type, int in_return,
2317 int nintregs, int nsseregs, const int * intreg,
2318 int sse_regno)
53c17031
JH
2319{
2320 enum machine_mode tmpmode;
2321 int bytes =
2322 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2323 enum x86_64_reg_class class[MAX_CLASSES];
2324 int n;
2325 int i;
2326 int nexps = 0;
2327 int needed_sseregs, needed_intregs;
2328 rtx exp[MAX_CLASSES];
2329 rtx ret;
2330
2331 n = classify_argument (mode, type, class, 0);
2332 if (TARGET_DEBUG_ARG)
2333 {
2334 if (!n)
2335 fprintf (stderr, "Memory class\n");
2336 else
2337 {
2338 fprintf (stderr, "Classes:");
2339 for (i = 0; i < n; i++)
2340 {
2341 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2342 }
2343 fprintf (stderr, "\n");
2344 }
2345 }
2346 if (!n)
2347 return NULL;
2348 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2349 return NULL;
2350 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2351 return NULL;
2352
2353 /* First construct simple cases. Avoid SCmode, since we want to use
2354 single register to pass this type. */
2355 if (n == 1 && mode != SCmode)
2356 switch (class[0])
2357 {
2358 case X86_64_INTEGER_CLASS:
2359 case X86_64_INTEGERSI_CLASS:
2360 return gen_rtx_REG (mode, intreg[0]);
2361 case X86_64_SSE_CLASS:
2362 case X86_64_SSESF_CLASS:
2363 case X86_64_SSEDF_CLASS:
2364 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2365 case X86_64_X87_CLASS:
2366 return gen_rtx_REG (mode, FIRST_STACK_REG);
2367 case X86_64_NO_CLASS:
2368 /* Zero sized array, struct or class. */
2369 return NULL;
2370 default:
2371 abort ();
2372 }
2373 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
e95d6b23 2374 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
53c17031
JH
2375 if (n == 2
2376 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
f8a1ebc6 2377 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
53c17031
JH
2378 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2379 && class[1] == X86_64_INTEGER_CLASS
f8a1ebc6 2380 && (mode == CDImode || mode == TImode || mode == TFmode)
53c17031
JH
2381 && intreg[0] + 1 == intreg[1])
2382 return gen_rtx_REG (mode, intreg[0]);
2383 if (n == 4
2384 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2385 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
f8a1ebc6 2386 return gen_rtx_REG (XCmode, FIRST_STACK_REG);
53c17031
JH
2387
2388 /* Otherwise figure out the entries of the PARALLEL. */
2389 for (i = 0; i < n; i++)
2390 {
2391 switch (class[i])
2392 {
2393 case X86_64_NO_CLASS:
2394 break;
2395 case X86_64_INTEGER_CLASS:
2396 case X86_64_INTEGERSI_CLASS:
d1f87653 2397 /* Merge TImodes on aligned occasions here too. */
53c17031
JH
2398 if (i * 8 + 8 > bytes)
2399 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2400 else if (class[i] == X86_64_INTEGERSI_CLASS)
2401 tmpmode = SImode;
2402 else
2403 tmpmode = DImode;
2404 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2405 if (tmpmode == BLKmode)
2406 tmpmode = DImode;
2407 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2408 gen_rtx_REG (tmpmode, *intreg),
2409 GEN_INT (i*8));
2410 intreg++;
2411 break;
2412 case X86_64_SSESF_CLASS:
2413 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2414 gen_rtx_REG (SFmode,
2415 SSE_REGNO (sse_regno)),
2416 GEN_INT (i*8));
2417 sse_regno++;
2418 break;
2419 case X86_64_SSEDF_CLASS:
2420 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2421 gen_rtx_REG (DFmode,
2422 SSE_REGNO (sse_regno)),
2423 GEN_INT (i*8));
2424 sse_regno++;
2425 break;
2426 case X86_64_SSE_CLASS:
12f5c45e
JH
2427 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2428 tmpmode = TImode;
53c17031
JH
2429 else
2430 tmpmode = DImode;
2431 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2432 gen_rtx_REG (tmpmode,
2433 SSE_REGNO (sse_regno)),
2434 GEN_INT (i*8));
12f5c45e
JH
2435 if (tmpmode == TImode)
2436 i++;
53c17031
JH
2437 sse_regno++;
2438 break;
2439 default:
2440 abort ();
2441 }
2442 }
2443 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2444 for (i = 0; i < nexps; i++)
2445 XVECEXP (ret, 0, i) = exp [i];
2446 return ret;
2447}
2448
b08de47e
MM
2449/* Update the data in CUM to advance over an argument
2450 of mode MODE and data type TYPE.
2451 (TYPE is null for libcalls where that information may not be available.) */
2452
2453void
b96a374d
AJ
2454function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
2455 enum machine_mode mode, /* current arg mode */
2456 tree type, /* type of the argument or 0 if lib support */
2457 int named) /* whether or not the argument was named */
b08de47e 2458{
5ac9118e
KG
2459 int bytes =
2460 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
2461 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2462
2463 if (TARGET_DEBUG_ARG)
2464 fprintf (stderr,
bcf17554
JH
2465 "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
2466 words, cum->words, cum->nregs, cum->sse_nregs, GET_MODE_NAME (mode), named);
53c17031 2467 if (TARGET_64BIT)
b08de47e 2468 {
53c17031
JH
2469 int int_nregs, sse_nregs;
2470 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2471 cum->words += words;
2472 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
82a127a9 2473 {
53c17031
JH
2474 cum->nregs -= int_nregs;
2475 cum->sse_nregs -= sse_nregs;
2476 cum->regno += int_nregs;
2477 cum->sse_regno += sse_nregs;
82a127a9 2478 }
53c17031
JH
2479 else
2480 cum->words += words;
b08de47e 2481 }
a4f31c00 2482 else
82a127a9 2483 {
bcf17554
JH
2484 if (TARGET_SSE && SSE_REG_MODE_P (mode)
2485 && (!type || !AGGREGATE_TYPE_P (type)))
53c17031
JH
2486 {
2487 cum->sse_words += words;
2488 cum->sse_nregs -= 1;
2489 cum->sse_regno += 1;
2490 if (cum->sse_nregs <= 0)
2491 {
2492 cum->sse_nregs = 0;
2493 cum->sse_regno = 0;
2494 }
2495 }
bcf17554
JH
2496 else if (TARGET_MMX && MMX_REG_MODE_P (mode)
2497 && (!type || !AGGREGATE_TYPE_P (type)))
2498 {
2499 cum->mmx_words += words;
2500 cum->mmx_nregs -= 1;
2501 cum->mmx_regno += 1;
2502 if (cum->mmx_nregs <= 0)
2503 {
2504 cum->mmx_nregs = 0;
2505 cum->mmx_regno = 0;
2506 }
2507 }
53c17031 2508 else
82a127a9 2509 {
53c17031
JH
2510 cum->words += words;
2511 cum->nregs -= words;
2512 cum->regno += words;
2513
2514 if (cum->nregs <= 0)
2515 {
2516 cum->nregs = 0;
2517 cum->regno = 0;
2518 }
82a127a9
CM
2519 }
2520 }
b08de47e
MM
2521 return;
2522}
2523
2524/* Define where to put the arguments to a function.
2525 Value is zero to push the argument on the stack,
2526 or a hard register in which to store the argument.
2527
2528 MODE is the argument's machine mode.
2529 TYPE is the data type of the argument (as a tree).
2530 This is null for libcalls where that information may
2531 not be available.
2532 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2533 the preceding args and about the function being called.
2534 NAMED is nonzero if this argument is a named parameter
2535 (otherwise it is an extra parameter matching an ellipsis). */
2536
07933f72 2537rtx
b96a374d
AJ
2538function_arg (CUMULATIVE_ARGS *cum, /* current arg information */
2539 enum machine_mode mode, /* current arg mode */
2540 tree type, /* type of the argument or 0 if lib support */
2541 int named) /* != 0 for normal args, == 0 for ... args */
b08de47e
MM
2542{
2543 rtx ret = NULL_RTX;
5ac9118e
KG
2544 int bytes =
2545 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e 2546 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
bcf17554 2547 static bool warnedsse, warnedmmx;
b08de47e 2548
5bdc5878 2549 /* Handle a hidden AL argument containing number of registers for varargs
53c17031
JH
2550 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2551 any AL settings. */
32ee7d1d 2552 if (mode == VOIDmode)
b08de47e 2553 {
53c17031
JH
2554 if (TARGET_64BIT)
2555 return GEN_INT (cum->maybe_vaarg
2556 ? (cum->sse_nregs < 0
2557 ? SSE_REGPARM_MAX
2558 : cum->sse_regno)
2559 : -1);
2560 else
2561 return constm1_rtx;
b08de47e 2562 }
53c17031
JH
2563 if (TARGET_64BIT)
2564 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2565 &x86_64_int_parameter_registers [cum->regno],
2566 cum->sse_regno);
2567 else
2568 switch (mode)
2569 {
2570 /* For now, pass fp/complex values on the stack. */
2571 default:
2572 break;
2573
2574 case BLKmode:
8d454008
RH
2575 if (bytes < 0)
2576 break;
5efb1046 2577 /* FALLTHRU */
53c17031
JH
2578 case DImode:
2579 case SImode:
2580 case HImode:
2581 case QImode:
2582 if (words <= cum->nregs)
b96a374d
AJ
2583 {
2584 int regno = cum->regno;
2585
2586 /* Fastcall allocates the first two DWORD (SImode) or
2587 smaller arguments to ECX and EDX. */
2588 if (cum->fastcall)
2589 {
2590 if (mode == BLKmode || mode == DImode)
2591 break;
2592
2593 /* ECX not EAX is the first allocated register. */
2594 if (regno == 0)
e767b5be 2595 regno = 2;
b96a374d
AJ
2596 }
2597 ret = gen_rtx_REG (mode, regno);
2598 }
53c17031
JH
2599 break;
2600 case TImode:
bcf17554
JH
2601 case V16QImode:
2602 case V8HImode:
2603 case V4SImode:
2604 case V2DImode:
2605 case V4SFmode:
2606 case V2DFmode:
2607 if (!type || !AGGREGATE_TYPE_P (type))
2608 {
e1be55d0 2609 if (!TARGET_SSE && !warnedmmx && cum->warn_sse)
bcf17554
JH
2610 {
2611 warnedsse = true;
2612 warning ("SSE vector argument without SSE enabled "
2613 "changes the ABI");
2614 }
2615 if (cum->sse_nregs)
2616 ret = gen_rtx_REG (mode, cum->sse_regno + FIRST_SSE_REG);
2617 }
2618 break;
2619 case V8QImode:
2620 case V4HImode:
2621 case V2SImode:
2622 case V2SFmode:
2623 if (!type || !AGGREGATE_TYPE_P (type))
2624 {
e1be55d0 2625 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
bcf17554
JH
2626 {
2627 warnedmmx = true;
2628 warning ("MMX vector argument without MMX enabled "
2629 "changes the ABI");
2630 }
2631 if (cum->mmx_nregs)
2632 ret = gen_rtx_REG (mode, cum->mmx_regno + FIRST_MMX_REG);
2633 }
53c17031
JH
2634 break;
2635 }
b08de47e
MM
2636
2637 if (TARGET_DEBUG_ARG)
2638 {
2639 fprintf (stderr,
91ea38f9 2640 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
b08de47e
MM
2641 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2642
2643 if (ret)
91ea38f9 2644 print_simple_rtl (stderr, ret);
b08de47e
MM
2645 else
2646 fprintf (stderr, ", stack");
2647
2648 fprintf (stderr, " )\n");
2649 }
2650
2651 return ret;
2652}
53c17031 2653
09b2e78d
ZD
2654/* A C expression that indicates when an argument must be passed by
2655 reference. If nonzero for an argument, a copy of that argument is
2656 made in memory and a pointer to the argument is passed instead of
2657 the argument itself. The pointer is passed in whatever way is
2658 appropriate for passing a pointer to that type. */
2659
2660int
b96a374d
AJ
2661function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2662 enum machine_mode mode ATTRIBUTE_UNUSED,
2663 tree type, int named ATTRIBUTE_UNUSED)
09b2e78d
ZD
2664{
2665 if (!TARGET_64BIT)
2666 return 0;
2667
2668 if (type && int_size_in_bytes (type) == -1)
2669 {
2670 if (TARGET_DEBUG_ARG)
2671 fprintf (stderr, "function_arg_pass_by_reference\n");
2672 return 1;
2673 }
2674
2675 return 0;
2676}
2677
8b978a57
JH
2678/* Return true when TYPE should be 128bit aligned for 32bit argument passing
2679 ABI */
2680static bool
b96a374d 2681contains_128bit_aligned_vector_p (tree type)
8b978a57
JH
2682{
2683 enum machine_mode mode = TYPE_MODE (type);
2684 if (SSE_REG_MODE_P (mode)
2685 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2686 return true;
2687 if (TYPE_ALIGN (type) < 128)
2688 return false;
2689
2690 if (AGGREGATE_TYPE_P (type))
2691 {
2a43945f 2692 /* Walk the aggregates recursively. */
8b978a57
JH
2693 if (TREE_CODE (type) == RECORD_TYPE
2694 || TREE_CODE (type) == UNION_TYPE
2695 || TREE_CODE (type) == QUAL_UNION_TYPE)
2696 {
2697 tree field;
2698
2699 if (TYPE_BINFO (type) != NULL
2700 && TYPE_BINFO_BASETYPES (type) != NULL)
2701 {
2702 tree bases = TYPE_BINFO_BASETYPES (type);
2703 int n_bases = TREE_VEC_LENGTH (bases);
2704 int i;
2705
2706 for (i = 0; i < n_bases; ++i)
2707 {
2708 tree binfo = TREE_VEC_ELT (bases, i);
2709 tree type = BINFO_TYPE (binfo);
2710
2711 if (contains_128bit_aligned_vector_p (type))
2712 return true;
2713 }
2714 }
43f3a59d 2715 /* And now merge the fields of structure. */
8b978a57
JH
2716 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2717 {
2718 if (TREE_CODE (field) == FIELD_DECL
2719 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2720 return true;
2721 }
2722 }
2723 /* Just for use if some languages passes arrays by value. */
2724 else if (TREE_CODE (type) == ARRAY_TYPE)
2725 {
2726 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2727 return true;
2728 }
2729 else
2730 abort ();
2731 }
2732 return false;
2733}
2734
bb498ea3
AH
2735/* Gives the alignment boundary, in bits, of an argument with the
2736 specified mode and type. */
53c17031
JH
2737
2738int
b96a374d 2739ix86_function_arg_boundary (enum machine_mode mode, tree type)
53c17031
JH
2740{
2741 int align;
53c17031
JH
2742 if (type)
2743 align = TYPE_ALIGN (type);
2744 else
2745 align = GET_MODE_ALIGNMENT (mode);
2746 if (align < PARM_BOUNDARY)
2747 align = PARM_BOUNDARY;
8b978a57
JH
2748 if (!TARGET_64BIT)
2749 {
2750 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2751 make an exception for SSE modes since these require 128bit
b96a374d 2752 alignment.
8b978a57
JH
2753
2754 The handling here differs from field_alignment. ICC aligns MMX
2755 arguments to 4 byte boundaries, while structure fields are aligned
2756 to 8 byte boundaries. */
2757 if (!type)
2758 {
2759 if (!SSE_REG_MODE_P (mode))
2760 align = PARM_BOUNDARY;
2761 }
2762 else
2763 {
2764 if (!contains_128bit_aligned_vector_p (type))
2765 align = PARM_BOUNDARY;
2766 }
8b978a57 2767 }
53c17031
JH
2768 if (align > 128)
2769 align = 128;
2770 return align;
2771}
2772
2773/* Return true if N is a possible register number of function value. */
2774bool
b96a374d 2775ix86_function_value_regno_p (int regno)
53c17031
JH
2776{
2777 if (!TARGET_64BIT)
2778 {
2779 return ((regno) == 0
2780 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2781 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2782 }
2783 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2784 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2785 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2786}
2787
2788/* Define how to find the value returned by a function.
2789 VALTYPE is the data type of the value (as a tree).
2790 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2791 otherwise, FUNC is 0. */
2792rtx
b96a374d 2793ix86_function_value (tree valtype)
53c17031
JH
2794{
2795 if (TARGET_64BIT)
2796 {
2797 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2798 REGPARM_MAX, SSE_REGPARM_MAX,
2799 x86_64_int_return_registers, 0);
d1f87653
KH
2800 /* For zero sized structures, construct_container return NULL, but we need
2801 to keep rest of compiler happy by returning meaningful value. */
53c17031
JH
2802 if (!ret)
2803 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2804 return ret;
2805 }
2806 else
b069de3b
SS
2807 return gen_rtx_REG (TYPE_MODE (valtype),
2808 ix86_value_regno (TYPE_MODE (valtype)));
53c17031
JH
2809}
2810
f5143c46 2811/* Return false iff type is returned in memory. */
53c17031 2812int
b96a374d 2813ix86_return_in_memory (tree type)
53c17031 2814{
a30b6839
RH
2815 int needed_intregs, needed_sseregs, size;
2816 enum machine_mode mode = TYPE_MODE (type);
2817
53c17031 2818 if (TARGET_64BIT)
a30b6839
RH
2819 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2820
2821 if (mode == BLKmode)
2822 return 1;
2823
2824 size = int_size_in_bytes (type);
2825
2826 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
2827 return 0;
2828
2829 if (VECTOR_MODE_P (mode) || mode == TImode)
53c17031 2830 {
a30b6839
RH
2831 /* User-created vectors small enough to fit in EAX. */
2832 if (size < 8)
5e062767 2833 return 0;
a30b6839
RH
2834
2835 /* MMX/3dNow values are returned on the stack, since we've
2836 got to EMMS/FEMMS before returning. */
2837 if (size == 8)
53c17031 2838 return 1;
a30b6839
RH
2839
2840 /* SSE values are returned in XMM0. */
2841 /* ??? Except when it doesn't exist? We have a choice of
2842 either (1) being abi incompatible with a -march switch,
2843 or (2) generating an error here. Given no good solution,
2844 I think the safest thing is one warning. The user won't
43f3a59d 2845 be able to use -Werror, but.... */
a30b6839
RH
2846 if (size == 16)
2847 {
2848 static bool warned;
2849
2850 if (TARGET_SSE)
2851 return 0;
2852
2853 if (!warned)
2854 {
2855 warned = true;
2856 warning ("SSE vector return without SSE enabled "
2857 "changes the ABI");
2858 }
2859 return 1;
2860 }
53c17031 2861 }
a30b6839 2862
cf2348cb 2863 if (mode == XFmode)
a30b6839 2864 return 0;
f8a1ebc6 2865
a30b6839
RH
2866 if (size > 12)
2867 return 1;
2868 return 0;
53c17031
JH
2869}
2870
2871/* Define how to find the value returned by a library function
2872 assuming the value has mode MODE. */
2873rtx
b96a374d 2874ix86_libcall_value (enum machine_mode mode)
53c17031
JH
2875{
2876 if (TARGET_64BIT)
2877 {
2878 switch (mode)
2879 {
f8a1ebc6
JH
2880 case SFmode:
2881 case SCmode:
2882 case DFmode:
2883 case DCmode:
2884 return gen_rtx_REG (mode, FIRST_SSE_REG);
2885 case XFmode:
2886 case XCmode:
2887 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2888 case TFmode:
f8a1ebc6
JH
2889 case TCmode:
2890 return NULL;
2891 default:
2892 return gen_rtx_REG (mode, 0);
53c17031
JH
2893 }
2894 }
2895 else
f8a1ebc6 2896 return gen_rtx_REG (mode, ix86_value_regno (mode));
b069de3b
SS
2897}
2898
2899/* Given a mode, return the register to use for a return value. */
2900
2901static int
b96a374d 2902ix86_value_regno (enum machine_mode mode)
b069de3b 2903{
a30b6839 2904 /* Floating point return values in %st(0). */
b069de3b
SS
2905 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2906 return FIRST_FLOAT_REG;
a30b6839
RH
2907 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
2908 we prevent this case when sse is not available. */
2909 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
b069de3b 2910 return FIRST_SSE_REG;
a30b6839 2911 /* Everything else in %eax. */
b069de3b 2912 return 0;
53c17031 2913}
ad919812
JH
2914\f
2915/* Create the va_list data type. */
53c17031 2916
c35d187f
RH
2917static tree
2918ix86_build_builtin_va_list (void)
ad919812
JH
2919{
2920 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
53c17031 2921
ad919812
JH
2922 /* For i386 we use plain pointer to argument area. */
2923 if (!TARGET_64BIT)
2924 return build_pointer_type (char_type_node);
2925
f1e639b1 2926 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
ad919812
JH
2927 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2928
fce5a9f2 2929 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
ad919812 2930 unsigned_type_node);
fce5a9f2 2931 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
ad919812
JH
2932 unsigned_type_node);
2933 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2934 ptr_type_node);
2935 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2936 ptr_type_node);
2937
2938 DECL_FIELD_CONTEXT (f_gpr) = record;
2939 DECL_FIELD_CONTEXT (f_fpr) = record;
2940 DECL_FIELD_CONTEXT (f_ovf) = record;
2941 DECL_FIELD_CONTEXT (f_sav) = record;
2942
2943 TREE_CHAIN (record) = type_decl;
2944 TYPE_NAME (record) = type_decl;
2945 TYPE_FIELDS (record) = f_gpr;
2946 TREE_CHAIN (f_gpr) = f_fpr;
2947 TREE_CHAIN (f_fpr) = f_ovf;
2948 TREE_CHAIN (f_ovf) = f_sav;
2949
2950 layout_type (record);
2951
2952 /* The correct type is an array type of one element. */
2953 return build_array_type (record, build_index_type (size_zero_node));
2954}
2955
a0524eb3 2956/* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
ad919812 2957
a0524eb3 2958static void
b96a374d
AJ
2959ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
2960 tree type, int *pretend_size ATTRIBUTE_UNUSED,
2961 int no_rtl)
ad919812
JH
2962{
2963 CUMULATIVE_ARGS next_cum;
2964 rtx save_area = NULL_RTX, mem;
2965 rtx label;
2966 rtx label_ref;
2967 rtx tmp_reg;
2968 rtx nsse_reg;
2969 int set;
2970 tree fntype;
2971 int stdarg_p;
2972 int i;
2973
2974 if (!TARGET_64BIT)
2975 return;
2976
2977 /* Indicate to allocate space on the stack for varargs save area. */
2978 ix86_save_varrargs_registers = 1;
2979
5474eed5
JH
2980 cfun->stack_alignment_needed = 128;
2981
ad919812
JH
2982 fntype = TREE_TYPE (current_function_decl);
2983 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2984 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2985 != void_type_node));
2986
2987 /* For varargs, we do not want to skip the dummy va_dcl argument.
2988 For stdargs, we do want to skip the last named argument. */
2989 next_cum = *cum;
2990 if (stdarg_p)
2991 function_arg_advance (&next_cum, mode, type, 1);
2992
2993 if (!no_rtl)
2994 save_area = frame_pointer_rtx;
2995
2996 set = get_varargs_alias_set ();
2997
2998 for (i = next_cum.regno; i < ix86_regparm; i++)
2999 {
3000 mem = gen_rtx_MEM (Pmode,
3001 plus_constant (save_area, i * UNITS_PER_WORD));
0692acba 3002 set_mem_alias_set (mem, set);
ad919812
JH
3003 emit_move_insn (mem, gen_rtx_REG (Pmode,
3004 x86_64_int_parameter_registers[i]));
3005 }
3006
3007 if (next_cum.sse_nregs)
3008 {
3009 /* Now emit code to save SSE registers. The AX parameter contains number
d1f87653 3010 of SSE parameter registers used to call this function. We use
ad919812
JH
3011 sse_prologue_save insn template that produces computed jump across
3012 SSE saves. We need some preparation work to get this working. */
3013
3014 label = gen_label_rtx ();
3015 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3016
3017 /* Compute address to jump to :
3018 label - 5*eax + nnamed_sse_arguments*5 */
3019 tmp_reg = gen_reg_rtx (Pmode);
3020 nsse_reg = gen_reg_rtx (Pmode);
3021 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3022 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
44cf5b6a 3023 gen_rtx_MULT (Pmode, nsse_reg,
ad919812
JH
3024 GEN_INT (4))));
3025 if (next_cum.sse_regno)
3026 emit_move_insn
3027 (nsse_reg,
3028 gen_rtx_CONST (DImode,
3029 gen_rtx_PLUS (DImode,
3030 label_ref,
3031 GEN_INT (next_cum.sse_regno * 4))));
3032 else
3033 emit_move_insn (nsse_reg, label_ref);
3034 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3035
3036 /* Compute address of memory block we save into. We always use pointer
3037 pointing 127 bytes after first byte to store - this is needed to keep
3038 instruction size limited by 4 bytes. */
3039 tmp_reg = gen_reg_rtx (Pmode);
8ac61af7
RK
3040 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3041 plus_constant (save_area,
3042 8 * REGPARM_MAX + 127)));
ad919812 3043 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
14f73b5a 3044 set_mem_alias_set (mem, set);
8ac61af7 3045 set_mem_align (mem, BITS_PER_WORD);
ad919812
JH
3046
3047 /* And finally do the dirty job! */
8ac61af7
RK
3048 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3049 GEN_INT (next_cum.sse_regno), label));
ad919812
JH
3050 }
3051
3052}
3053
3054/* Implement va_start. */
3055
3056void
b96a374d 3057ix86_va_start (tree valist, rtx nextarg)
ad919812
JH
3058{
3059 HOST_WIDE_INT words, n_gpr, n_fpr;
3060 tree f_gpr, f_fpr, f_ovf, f_sav;
3061 tree gpr, fpr, ovf, sav, t;
3062
3063 /* Only 64bit target needs something special. */
3064 if (!TARGET_64BIT)
3065 {
e5faf155 3066 std_expand_builtin_va_start (valist, nextarg);
ad919812
JH
3067 return;
3068 }
3069
3070 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3071 f_fpr = TREE_CHAIN (f_gpr);
3072 f_ovf = TREE_CHAIN (f_fpr);
3073 f_sav = TREE_CHAIN (f_ovf);
3074
3075 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3076 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3077 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3078 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3079 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3080
3081 /* Count number of gp and fp argument registers used. */
3082 words = current_function_args_info.words;
3083 n_gpr = current_function_args_info.regno;
3084 n_fpr = current_function_args_info.sse_regno;
3085
3086 if (TARGET_DEBUG_ARG)
3087 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
b531087a 3088 (int) words, (int) n_gpr, (int) n_fpr);
ad919812
JH
3089
3090 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3091 build_int_2 (n_gpr * 8, 0));
3092 TREE_SIDE_EFFECTS (t) = 1;
3093 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3094
3095 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3096 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
3097 TREE_SIDE_EFFECTS (t) = 1;
3098 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3099
3100 /* Find the overflow area. */
3101 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3102 if (words != 0)
3103 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3104 build_int_2 (words * UNITS_PER_WORD, 0));
3105 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3106 TREE_SIDE_EFFECTS (t) = 1;
3107 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3108
3109 /* Find the register save area.
3110 Prologue of the function save it right above stack frame. */
3111 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3112 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3113 TREE_SIDE_EFFECTS (t) = 1;
3114 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3115}
3116
3117/* Implement va_arg. */
3118rtx
b96a374d 3119ix86_va_arg (tree valist, tree type)
ad919812 3120{
0139adca 3121 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
ad919812
JH
3122 tree f_gpr, f_fpr, f_ovf, f_sav;
3123 tree gpr, fpr, ovf, sav, t;
b932f770 3124 int size, rsize;
ad919812
JH
3125 rtx lab_false, lab_over = NULL_RTX;
3126 rtx addr_rtx, r;
3127 rtx container;
09b2e78d 3128 int indirect_p = 0;
ad919812
JH
3129
3130 /* Only 64bit target needs something special. */
3131 if (!TARGET_64BIT)
3132 {
3133 return std_expand_builtin_va_arg (valist, type);
3134 }
3135
3136 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3137 f_fpr = TREE_CHAIN (f_gpr);
3138 f_ovf = TREE_CHAIN (f_fpr);
3139 f_sav = TREE_CHAIN (f_ovf);
3140
3141 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3142 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3143 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3144 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3145 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3146
3147 size = int_size_in_bytes (type);
09b2e78d
ZD
3148 if (size == -1)
3149 {
3150 /* Passed by reference. */
3151 indirect_p = 1;
3152 type = build_pointer_type (type);
3153 size = int_size_in_bytes (type);
3154 }
ad919812
JH
3155 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3156
3157 container = construct_container (TYPE_MODE (type), type, 0,
3158 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3159 /*
3160 * Pull the value out of the saved registers ...
3161 */
3162
3163 addr_rtx = gen_reg_rtx (Pmode);
3164
3165 if (container)
3166 {
3167 rtx int_addr_rtx, sse_addr_rtx;
3168 int needed_intregs, needed_sseregs;
3169 int need_temp;
3170
3171 lab_over = gen_label_rtx ();
3172 lab_false = gen_label_rtx ();
8bad7136 3173
ad919812
JH
3174 examine_argument (TYPE_MODE (type), type, 0,
3175 &needed_intregs, &needed_sseregs);
3176
3177
3178 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
3179 || TYPE_ALIGN (type) > 128);
3180
d1f87653 3181 /* In case we are passing structure, verify that it is consecutive block
ad919812
JH
3182 on the register save area. If not we need to do moves. */
3183 if (!need_temp && !REG_P (container))
3184 {
d1f87653 3185 /* Verify that all registers are strictly consecutive */
ad919812
JH
3186 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3187 {
3188 int i;
3189
3190 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3191 {
3192 rtx slot = XVECEXP (container, 0, i);
b531087a 3193 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
ad919812
JH
3194 || INTVAL (XEXP (slot, 1)) != i * 16)
3195 need_temp = 1;
3196 }
3197 }
3198 else
3199 {
3200 int i;
3201
3202 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3203 {
3204 rtx slot = XVECEXP (container, 0, i);
b531087a 3205 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
ad919812
JH
3206 || INTVAL (XEXP (slot, 1)) != i * 8)
3207 need_temp = 1;
3208 }
3209 }
3210 }
3211 if (!need_temp)
3212 {
3213 int_addr_rtx = addr_rtx;
3214 sse_addr_rtx = addr_rtx;
3215 }
3216 else
3217 {
3218 int_addr_rtx = gen_reg_rtx (Pmode);
3219 sse_addr_rtx = gen_reg_rtx (Pmode);
3220 }
3221 /* First ensure that we fit completely in registers. */
3222 if (needed_intregs)
3223 {
3224 emit_cmp_and_jump_insns (expand_expr
3225 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
3226 GEN_INT ((REGPARM_MAX - needed_intregs +
3227 1) * 8), GE, const1_rtx, SImode,
d43e0b7d 3228 1, lab_false);
ad919812
JH
3229 }
3230 if (needed_sseregs)
3231 {
3232 emit_cmp_and_jump_insns (expand_expr
3233 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
3234 GEN_INT ((SSE_REGPARM_MAX -
3235 needed_sseregs + 1) * 16 +
3236 REGPARM_MAX * 8), GE, const1_rtx,
d43e0b7d 3237 SImode, 1, lab_false);
ad919812
JH
3238 }
3239
3240 /* Compute index to start of area used for integer regs. */
3241 if (needed_intregs)
3242 {
3243 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3244 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3245 if (r != int_addr_rtx)
3246 emit_move_insn (int_addr_rtx, r);
3247 }
3248 if (needed_sseregs)
3249 {
3250 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3251 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3252 if (r != sse_addr_rtx)
3253 emit_move_insn (sse_addr_rtx, r);
3254 }
3255 if (need_temp)
3256 {
3257 int i;
3258 rtx mem;
70642ee3 3259 rtx x;
ad919812 3260
b932f770 3261 /* Never use the memory itself, as it has the alias set. */
70642ee3
JH
3262 x = XEXP (assign_temp (type, 0, 1, 0), 0);
3263 mem = gen_rtx_MEM (BLKmode, x);
3264 force_operand (x, addr_rtx);
0692acba 3265 set_mem_alias_set (mem, get_varargs_alias_set ());
8ac61af7 3266 set_mem_align (mem, BITS_PER_UNIT);
b932f770 3267
ad919812
JH
3268 for (i = 0; i < XVECLEN (container, 0); i++)
3269 {
3270 rtx slot = XVECEXP (container, 0, i);
3271 rtx reg = XEXP (slot, 0);
3272 enum machine_mode mode = GET_MODE (reg);
3273 rtx src_addr;
3274 rtx src_mem;
3275 int src_offset;
3276 rtx dest_mem;
3277
3278 if (SSE_REGNO_P (REGNO (reg)))
3279 {
3280 src_addr = sse_addr_rtx;
3281 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3282 }
3283 else
3284 {
3285 src_addr = int_addr_rtx;
3286 src_offset = REGNO (reg) * 8;
3287 }
3288 src_mem = gen_rtx_MEM (mode, src_addr);
0692acba 3289 set_mem_alias_set (src_mem, get_varargs_alias_set ());
ad919812
JH
3290 src_mem = adjust_address (src_mem, mode, src_offset);
3291 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
ad919812
JH
3292 emit_move_insn (dest_mem, src_mem);
3293 }
3294 }
3295
3296 if (needed_intregs)
3297 {
3298 t =
3299 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3300 build_int_2 (needed_intregs * 8, 0));
3301 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3302 TREE_SIDE_EFFECTS (t) = 1;
3303 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3304 }
3305 if (needed_sseregs)
3306 {
3307 t =
3308 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3309 build_int_2 (needed_sseregs * 16, 0));
3310 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3311 TREE_SIDE_EFFECTS (t) = 1;
3312 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3313 }
3314
3315 emit_jump_insn (gen_jump (lab_over));
3316 emit_barrier ();
3317 emit_label (lab_false);
3318 }
3319
3320 /* ... otherwise out of the overflow area. */
3321
3322 /* Care for on-stack alignment if needed. */
3323 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3324 t = ovf;
3325 else
3326 {
3327 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3328 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3329 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3330 }
3331 t = save_expr (t);
3332
3333 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3334 if (r != addr_rtx)
3335 emit_move_insn (addr_rtx, r);
3336
3337 t =
3338 build (PLUS_EXPR, TREE_TYPE (t), t,
3339 build_int_2 (rsize * UNITS_PER_WORD, 0));
3340 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3341 TREE_SIDE_EFFECTS (t) = 1;
3342 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3343
3344 if (container)
3345 emit_label (lab_over);
3346
09b2e78d
ZD
3347 if (indirect_p)
3348 {
3349 r = gen_rtx_MEM (Pmode, addr_rtx);
3350 set_mem_alias_set (r, get_varargs_alias_set ());
3351 emit_move_insn (addr_rtx, r);
3352 }
3353
ad919812
JH
3354 return addr_rtx;
3355}
3356\f
c3c637e3
GS
3357/* Return nonzero if OP is either a i387 or SSE fp register. */
3358int
b96a374d 3359any_fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
c3c637e3
GS
3360{
3361 return ANY_FP_REG_P (op);
3362}
3363
3364/* Return nonzero if OP is an i387 fp register. */
3365int
b96a374d 3366fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
c3c637e3
GS
3367{
3368 return FP_REG_P (op);
3369}
3370
3371/* Return nonzero if OP is a non-fp register_operand. */
3372int
b96a374d 3373register_and_not_any_fp_reg_operand (rtx op, enum machine_mode mode)
c3c637e3
GS
3374{
3375 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3376}
3377
40b982a9 3378/* Return nonzero if OP is a register operand other than an
c3c637e3
GS
3379 i387 fp register. */
3380int
b96a374d 3381register_and_not_fp_reg_operand (rtx op, enum machine_mode mode)
c3c637e3
GS
3382{
3383 return register_operand (op, mode) && !FP_REG_P (op);
3384}
3385
7dd4b4a3
JH
3386/* Return nonzero if OP is general operand representable on x86_64. */
3387
3388int
b96a374d 3389x86_64_general_operand (rtx op, enum machine_mode mode)
7dd4b4a3
JH
3390{
3391 if (!TARGET_64BIT)
3392 return general_operand (op, mode);
3393 if (nonimmediate_operand (op, mode))
3394 return 1;
c05dbe81 3395 return x86_64_sign_extended_value (op);
7dd4b4a3
JH
3396}
3397
3398/* Return nonzero if OP is general operand representable on x86_64
d6a7951f 3399 as either sign extended or zero extended constant. */
7dd4b4a3
JH
3400
3401int
b96a374d 3402x86_64_szext_general_operand (rtx op, enum machine_mode mode)
7dd4b4a3
JH
3403{
3404 if (!TARGET_64BIT)
3405 return general_operand (op, mode);
3406 if (nonimmediate_operand (op, mode))
3407 return 1;
c05dbe81 3408 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
7dd4b4a3
JH
3409}
3410
3411/* Return nonzero if OP is nonmemory operand representable on x86_64. */
3412
3413int
b96a374d 3414x86_64_nonmemory_operand (rtx op, enum machine_mode mode)
7dd4b4a3
JH
3415{
3416 if (!TARGET_64BIT)
3417 return nonmemory_operand (op, mode);
3418 if (register_operand (op, mode))
3419 return 1;
c05dbe81 3420 return x86_64_sign_extended_value (op);
7dd4b4a3
JH
3421}
3422
3423/* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3424
3425int
b96a374d 3426x86_64_movabs_operand (rtx op, enum machine_mode mode)
7dd4b4a3
JH
3427{
3428 if (!TARGET_64BIT || !flag_pic)
3429 return nonmemory_operand (op, mode);
c05dbe81 3430 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
7dd4b4a3
JH
3431 return 1;
3432 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3433 return 1;
3434 return 0;
3435}
3436
7e6dc358
JJ
3437/* Return nonzero if OPNUM's MEM should be matched
3438 in movabs* patterns. */
3439
3440int
3441ix86_check_movabs (rtx insn, int opnum)
3442{
3443 rtx set, mem;
3444
3445 set = PATTERN (insn);
3446 if (GET_CODE (set) == PARALLEL)
3447 set = XVECEXP (set, 0, 0);
3448 if (GET_CODE (set) != SET)
3449 abort ();
3450 mem = XEXP (set, opnum);
3451 while (GET_CODE (mem) == SUBREG)
3452 mem = SUBREG_REG (mem);
3453 if (GET_CODE (mem) != MEM)
3454 abort ();
3455 return (volatile_ok || !MEM_VOLATILE_P (mem));
3456}
3457
7dd4b4a3
JH
3458/* Return nonzero if OP is nonmemory operand representable on x86_64. */
3459
3460int
b96a374d 3461x86_64_szext_nonmemory_operand (rtx op, enum machine_mode mode)
7dd4b4a3
JH
3462{
3463 if (!TARGET_64BIT)
3464 return nonmemory_operand (op, mode);
3465 if (register_operand (op, mode))
3466 return 1;
c05dbe81 3467 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
7dd4b4a3
JH
3468}
3469
3470/* Return nonzero if OP is immediate operand representable on x86_64. */
3471
3472int
b96a374d 3473x86_64_immediate_operand (rtx op, enum machine_mode mode)
7dd4b4a3
JH
3474{
3475 if (!TARGET_64BIT)
3476 return immediate_operand (op, mode);
c05dbe81 3477 return x86_64_sign_extended_value (op);
7dd4b4a3
JH
3478}
3479
3480/* Return nonzero if OP is immediate operand representable on x86_64. */
3481
3482int
b96a374d 3483x86_64_zext_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7dd4b4a3
JH
3484{
3485 return x86_64_zero_extended_value (op);
3486}
3487
794a292d
JJ
3488/* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3489 for shift & compare patterns, as shifting by 0 does not change flags),
3490 else return zero. */
3491
3492int
b96a374d 3493const_int_1_31_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
794a292d
JJ
3494{
3495 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3496}
3497
e075ae69
RH
3498/* Returns 1 if OP is either a symbol reference or a sum of a symbol
3499 reference and a constant. */
b08de47e
MM
3500
3501int
8d531ab9 3502symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
2a2ab3f9 3503{
e075ae69 3504 switch (GET_CODE (op))
2a2ab3f9 3505 {
e075ae69
RH
3506 case SYMBOL_REF:
3507 case LABEL_REF:
3508 return 1;
3509
3510 case CONST:
3511 op = XEXP (op, 0);
3512 if (GET_CODE (op) == SYMBOL_REF
3513 || GET_CODE (op) == LABEL_REF
3514 || (GET_CODE (op) == UNSPEC
8ee41eaf
RH
3515 && (XINT (op, 1) == UNSPEC_GOT
3516 || XINT (op, 1) == UNSPEC_GOTOFF
3517 || XINT (op, 1) == UNSPEC_GOTPCREL)))
e075ae69
RH
3518 return 1;
3519 if (GET_CODE (op) != PLUS
3520 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3521 return 0;
3522
3523 op = XEXP (op, 0);
3524 if (GET_CODE (op) == SYMBOL_REF
3525 || GET_CODE (op) == LABEL_REF)
3526 return 1;
3527 /* Only @GOTOFF gets offsets. */
3528 if (GET_CODE (op) != UNSPEC
8ee41eaf 3529 || XINT (op, 1) != UNSPEC_GOTOFF)
e075ae69
RH
3530 return 0;
3531
3532 op = XVECEXP (op, 0, 0);
3533 if (GET_CODE (op) == SYMBOL_REF
3534 || GET_CODE (op) == LABEL_REF)
3535 return 1;
3536 return 0;
3537
3538 default:
3539 return 0;
2a2ab3f9
JVA
3540 }
3541}
2a2ab3f9 3542
e075ae69 3543/* Return true if the operand contains a @GOT or @GOTOFF reference. */
3b3c6a3f 3544
e075ae69 3545int
8d531ab9 3546pic_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69 3547{
6eb791fc
JH
3548 if (GET_CODE (op) != CONST)
3549 return 0;
3550 op = XEXP (op, 0);
3551 if (TARGET_64BIT)
3552 {
a0c8285b
JH
3553 if (GET_CODE (op) == UNSPEC
3554 && XINT (op, 1) == UNSPEC_GOTPCREL)
3555 return 1;
3556 if (GET_CODE (op) == PLUS
fdacb904
JH
3557 && GET_CODE (XEXP (op, 0)) == UNSPEC
3558 && XINT (XEXP (op, 0), 1) == UNSPEC_GOTPCREL)
6eb791fc
JH
3559 return 1;
3560 }
fce5a9f2 3561 else
2a2ab3f9 3562 {
e075ae69
RH
3563 if (GET_CODE (op) == UNSPEC)
3564 return 1;
3565 if (GET_CODE (op) != PLUS
3566 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3567 return 0;
3568 op = XEXP (op, 0);
3569 if (GET_CODE (op) == UNSPEC)
3570 return 1;
2a2ab3f9 3571 }
e075ae69 3572 return 0;
2a2ab3f9 3573}
2a2ab3f9 3574
623fe810
RH
3575/* Return true if OP is a symbolic operand that resolves locally. */
3576
3577static int
b96a374d 3578local_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
623fe810 3579{
623fe810
RH
3580 if (GET_CODE (op) == CONST
3581 && GET_CODE (XEXP (op, 0)) == PLUS
c05dbe81 3582 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
623fe810
RH
3583 op = XEXP (XEXP (op, 0), 0);
3584
8bfb45f8
JJ
3585 if (GET_CODE (op) == LABEL_REF)
3586 return 1;
3587
623fe810
RH
3588 if (GET_CODE (op) != SYMBOL_REF)
3589 return 0;
3590
2ae5ae57 3591 if (SYMBOL_REF_LOCAL_P (op))
623fe810
RH
3592 return 1;
3593
3594 /* There is, however, a not insubstantial body of code in the rest of
fce5a9f2 3595 the compiler that assumes it can just stick the results of
623fe810
RH
3596 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3597 /* ??? This is a hack. Should update the body of the compiler to
fb49053f 3598 always create a DECL an invoke targetm.encode_section_info. */
623fe810
RH
3599 if (strncmp (XSTR (op, 0), internal_label_prefix,
3600 internal_label_prefix_len) == 0)
3601 return 1;
3602
3603 return 0;
3604}
3605
2ae5ae57 3606/* Test for various thread-local symbols. */
f996902d
RH
3607
3608int
8d531ab9 3609tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
f996902d 3610{
f996902d
RH
3611 if (GET_CODE (op) != SYMBOL_REF)
3612 return 0;
2ae5ae57 3613 return SYMBOL_REF_TLS_MODEL (op);
f996902d
RH
3614}
3615
2ae5ae57 3616static inline int
b96a374d 3617tls_symbolic_operand_1 (rtx op, enum tls_model kind)
f996902d 3618{
f996902d
RH
3619 if (GET_CODE (op) != SYMBOL_REF)
3620 return 0;
2ae5ae57 3621 return SYMBOL_REF_TLS_MODEL (op) == kind;
f996902d
RH
3622}
3623
3624int
8d531ab9 3625global_dynamic_symbolic_operand (rtx op,
b96a374d 3626 enum machine_mode mode ATTRIBUTE_UNUSED)
f996902d
RH
3627{
3628 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3629}
3630
3631int
8d531ab9 3632local_dynamic_symbolic_operand (rtx op,
b96a374d 3633 enum machine_mode mode ATTRIBUTE_UNUSED)
f996902d
RH
3634{
3635 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3636}
3637
3638int
8d531ab9 3639initial_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
f996902d
RH
3640{
3641 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3642}
3643
3644int
8d531ab9 3645local_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
f996902d
RH
3646{
3647 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3648}
3649
28d52ffb
RH
3650/* Test for a valid operand for a call instruction. Don't allow the
3651 arg pointer register or virtual regs since they may decay into
3652 reg + const, which the patterns can't handle. */
2a2ab3f9 3653
e075ae69 3654int
b96a374d 3655call_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69 3656{
e075ae69
RH
3657 /* Disallow indirect through a virtual register. This leads to
3658 compiler aborts when trying to eliminate them. */
3659 if (GET_CODE (op) == REG
3660 && (op == arg_pointer_rtx
564d80f4 3661 || op == frame_pointer_rtx
e075ae69
RH
3662 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3663 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3664 return 0;
2a2ab3f9 3665
28d52ffb
RH
3666 /* Disallow `call 1234'. Due to varying assembler lameness this
3667 gets either rejected or translated to `call .+1234'. */
3668 if (GET_CODE (op) == CONST_INT)
3669 return 0;
3670
cbbf65e0
RH
3671 /* Explicitly allow SYMBOL_REF even if pic. */
3672 if (GET_CODE (op) == SYMBOL_REF)
e075ae69 3673 return 1;
2a2ab3f9 3674
cbbf65e0
RH
3675 /* Otherwise we can allow any general_operand in the address. */
3676 return general_operand (op, Pmode);
e075ae69 3677}
79325812 3678
4977bab6
ZW
3679/* Test for a valid operand for a call instruction. Don't allow the
3680 arg pointer register or virtual regs since they may decay into
3681 reg + const, which the patterns can't handle. */
3682
3683int
b96a374d 3684sibcall_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4977bab6
ZW
3685{
3686 /* Disallow indirect through a virtual register. This leads to
3687 compiler aborts when trying to eliminate them. */
3688 if (GET_CODE (op) == REG
3689 && (op == arg_pointer_rtx
3690 || op == frame_pointer_rtx
3691 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3692 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3693 return 0;
3694
3695 /* Explicitly allow SYMBOL_REF even if pic. */
3696 if (GET_CODE (op) == SYMBOL_REF)
3697 return 1;
3698
3699 /* Otherwise we can only allow register operands. */
3700 return register_operand (op, Pmode);
3701}
3702
e075ae69 3703int
b96a374d 3704constant_call_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69 3705{
eaf19aba
JJ
3706 if (GET_CODE (op) == CONST
3707 && GET_CODE (XEXP (op, 0)) == PLUS
3708 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3709 op = XEXP (XEXP (op, 0), 0);
e1ff012c 3710 return GET_CODE (op) == SYMBOL_REF;
e075ae69 3711}
2a2ab3f9 3712
e075ae69 3713/* Match exactly zero and one. */
e9a25f70 3714
0f290768 3715int
8d531ab9 3716const0_operand (rtx op, enum machine_mode mode)
e075ae69
RH
3717{
3718 return op == CONST0_RTX (mode);
3719}
e9a25f70 3720
0f290768 3721int
8d531ab9 3722const1_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69
RH
3723{
3724 return op == const1_rtx;
3725}
2a2ab3f9 3726
e075ae69 3727/* Match 2, 4, or 8. Used for leal multiplicands. */
e9a25f70 3728
e075ae69 3729int
8d531ab9 3730const248_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69
RH
3731{
3732 return (GET_CODE (op) == CONST_INT
3733 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3734}
e9a25f70 3735
ebe75517 3736int
8d531ab9 3737const_0_to_3_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
ebe75517
JH
3738{
3739 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 4);
3740}
3741
3742int
8d531ab9 3743const_0_to_7_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
ebe75517
JH
3744{
3745 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 8);
3746}
3747
3748int
8d531ab9 3749const_0_to_15_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
ebe75517
JH
3750{
3751 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 16);
3752}
3753
3754int
8d531ab9 3755const_0_to_255_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
ebe75517
JH
3756{
3757 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 256);
3758}
3759
3760
d1f87653 3761/* True if this is a constant appropriate for an increment or decrement. */
81fd0956 3762
e075ae69 3763int
8d531ab9 3764incdec_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69 3765{
f5143c46 3766 /* On Pentium4, the inc and dec operations causes extra dependency on flag
b4e89e2d
JH
3767 registers, since carry flag is not set. */
3768 if (TARGET_PENTIUM4 && !optimize_size)
3769 return 0;
2b1c08f5 3770 return op == const1_rtx || op == constm1_rtx;
e075ae69 3771}
2a2ab3f9 3772
371bc54b
JH
3773/* Return nonzero if OP is acceptable as operand of DImode shift
3774 expander. */
3775
3776int
b96a374d 3777shiftdi_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
371bc54b
JH
3778{
3779 if (TARGET_64BIT)
3780 return nonimmediate_operand (op, mode);
3781 else
3782 return register_operand (op, mode);
3783}
3784
0f290768 3785/* Return false if this is the stack pointer, or any other fake
e075ae69
RH
3786 register eliminable to the stack pointer. Otherwise, this is
3787 a register operand.
2a2ab3f9 3788
e075ae69
RH
3789 This is used to prevent esp from being used as an index reg.
3790 Which would only happen in pathological cases. */
5f1ec3e6 3791
e075ae69 3792int
8d531ab9 3793reg_no_sp_operand (rtx op, enum machine_mode mode)
e075ae69
RH
3794{
3795 rtx t = op;
3796 if (GET_CODE (t) == SUBREG)
3797 t = SUBREG_REG (t);
564d80f4 3798 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
e075ae69 3799 return 0;
2a2ab3f9 3800
e075ae69 3801 return register_operand (op, mode);
2a2ab3f9 3802}
b840bfb0 3803
915119a5 3804int
8d531ab9 3805mmx_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
915119a5
BS
3806{
3807 return MMX_REG_P (op);
3808}
3809
2c5a510c
RH
3810/* Return false if this is any eliminable register. Otherwise
3811 general_operand. */
3812
3813int
8d531ab9 3814general_no_elim_operand (rtx op, enum machine_mode mode)
2c5a510c
RH
3815{
3816 rtx t = op;
3817 if (GET_CODE (t) == SUBREG)
3818 t = SUBREG_REG (t);
3819 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3820 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3821 || t == virtual_stack_dynamic_rtx)
3822 return 0;
1020a5ab
RH
3823 if (REG_P (t)
3824 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3825 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3826 return 0;
2c5a510c
RH
3827
3828 return general_operand (op, mode);
3829}
3830
3831/* Return false if this is any eliminable register. Otherwise
3832 register_operand or const_int. */
3833
3834int
8d531ab9 3835nonmemory_no_elim_operand (rtx op, enum machine_mode mode)
2c5a510c
RH
3836{
3837 rtx t = op;
3838 if (GET_CODE (t) == SUBREG)
3839 t = SUBREG_REG (t);
3840 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3841 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3842 || t == virtual_stack_dynamic_rtx)
3843 return 0;
3844
3845 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3846}
3847
7ec70495
JH
3848/* Return false if this is any eliminable register or stack register,
3849 otherwise work like register_operand. */
3850
3851int
8d531ab9 3852index_register_operand (rtx op, enum machine_mode mode)
7ec70495
JH
3853{
3854 rtx t = op;
3855 if (GET_CODE (t) == SUBREG)
3856 t = SUBREG_REG (t);
3857 if (!REG_P (t))
3858 return 0;
3859 if (t == arg_pointer_rtx
3860 || t == frame_pointer_rtx
3861 || t == virtual_incoming_args_rtx
3862 || t == virtual_stack_vars_rtx
3863 || t == virtual_stack_dynamic_rtx
3864 || REGNO (t) == STACK_POINTER_REGNUM)
3865 return 0;
3866
3867 return general_operand (op, mode);
3868}
3869
e075ae69 3870/* Return true if op is a Q_REGS class register. */
b840bfb0 3871
e075ae69 3872int
8d531ab9 3873q_regs_operand (rtx op, enum machine_mode mode)
b840bfb0 3874{
e075ae69
RH
3875 if (mode != VOIDmode && GET_MODE (op) != mode)
3876 return 0;
3877 if (GET_CODE (op) == SUBREG)
3878 op = SUBREG_REG (op);
7799175f 3879 return ANY_QI_REG_P (op);
0f290768 3880}
b840bfb0 3881
4977bab6
ZW
3882/* Return true if op is an flags register. */
3883
3884int
8d531ab9 3885flags_reg_operand (rtx op, enum machine_mode mode)
4977bab6
ZW
3886{
3887 if (mode != VOIDmode && GET_MODE (op) != mode)
3888 return 0;
3889 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3890}
3891
e075ae69 3892/* Return true if op is a NON_Q_REGS class register. */
b840bfb0 3893
e075ae69 3894int
8d531ab9 3895non_q_regs_operand (rtx op, enum machine_mode mode)
e075ae69
RH
3896{
3897 if (mode != VOIDmode && GET_MODE (op) != mode)
3898 return 0;
3899 if (GET_CODE (op) == SUBREG)
3900 op = SUBREG_REG (op);
3901 return NON_QI_REG_P (op);
0f290768 3902}
b840bfb0 3903
4977bab6 3904int
b96a374d
AJ
3905zero_extended_scalar_load_operand (rtx op,
3906 enum machine_mode mode ATTRIBUTE_UNUSED)
4977bab6
ZW
3907{
3908 unsigned n_elts;
3909 if (GET_CODE (op) != MEM)
3910 return 0;
3911 op = maybe_get_pool_constant (op);
3912 if (!op)
3913 return 0;
3914 if (GET_CODE (op) != CONST_VECTOR)
3915 return 0;
3916 n_elts =
3917 (GET_MODE_SIZE (GET_MODE (op)) /
3918 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3919 for (n_elts--; n_elts > 0; n_elts--)
3920 {
3921 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3922 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3923 return 0;
3924 }
3925 return 1;
3926}
3927
fdc4b40b
JH
3928/* Return 1 when OP is operand acceptable for standard SSE move. */
3929int
b96a374d 3930vector_move_operand (rtx op, enum machine_mode mode)
fdc4b40b
JH
3931{
3932 if (nonimmediate_operand (op, mode))
3933 return 1;
3934 if (GET_MODE (op) != mode && mode != VOIDmode)
3935 return 0;
3936 return (op == CONST0_RTX (GET_MODE (op)));
3937}
3938
74dc3e94
RH
3939/* Return true if op if a valid address, and does not contain
3940 a segment override. */
3941
3942int
8d531ab9 3943no_seg_address_operand (rtx op, enum machine_mode mode)
74dc3e94
RH
3944{
3945 struct ix86_address parts;
3946
3947 if (! address_operand (op, mode))
3948 return 0;
3949
3950 if (! ix86_decompose_address (op, &parts))
3951 abort ();
3952
3953 return parts.seg == SEG_DEFAULT;
3954}
3955
915119a5
BS
3956/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3957 insns. */
3958int
b96a374d 3959sse_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
915119a5
BS
3960{
3961 enum rtx_code code = GET_CODE (op);
a46d1d38
JH
3962 switch (code)
3963 {
3964 /* Operations supported directly. */
3965 case EQ:
3966 case LT:
3967 case LE:
3968 case UNORDERED:
3969 case NE:
3970 case UNGE:
3971 case UNGT:
3972 case ORDERED:
3973 return 1;
3974 /* These are equivalent to ones above in non-IEEE comparisons. */
3975 case UNEQ:
3976 case UNLT:
3977 case UNLE:
3978 case LTGT:
3979 case GE:
3980 case GT:
3981 return !TARGET_IEEE_FP;
3982 default:
3983 return 0;
3984 }
915119a5 3985}
9076b9c1 3986/* Return 1 if OP is a valid comparison operator in valid mode. */
e075ae69 3987int
8d531ab9 3988ix86_comparison_operator (rtx op, enum machine_mode mode)
e075ae69 3989{
9076b9c1 3990 enum machine_mode inmode;
9a915772 3991 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
3992 if (mode != VOIDmode && GET_MODE (op) != mode)
3993 return 0;
9a915772
JH
3994 if (GET_RTX_CLASS (code) != '<')
3995 return 0;
3996 inmode = GET_MODE (XEXP (op, 0));
3997
3998 if (inmode == CCFPmode || inmode == CCFPUmode)
3999 {
4000 enum rtx_code second_code, bypass_code;
4001 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4002 return (bypass_code == NIL && second_code == NIL);
4003 }
4004 switch (code)
3a3677ff
RH
4005 {
4006 case EQ: case NE:
3a3677ff 4007 return 1;
9076b9c1 4008 case LT: case GE:
7e08e190 4009 if (inmode == CCmode || inmode == CCGCmode
9076b9c1
JH
4010 || inmode == CCGOCmode || inmode == CCNOmode)
4011 return 1;
4012 return 0;
7e08e190 4013 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
7e08e190 4014 if (inmode == CCmode)
9076b9c1
JH
4015 return 1;
4016 return 0;
4017 case GT: case LE:
7e08e190 4018 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
9076b9c1
JH
4019 return 1;
4020 return 0;
3a3677ff
RH
4021 default:
4022 return 0;
4023 }
4024}
4025
e6e81735
JH
4026/* Return 1 if OP is a valid comparison operator testing carry flag
4027 to be set. */
4028int
8d531ab9 4029ix86_carry_flag_operator (rtx op, enum machine_mode mode)
e6e81735
JH
4030{
4031 enum machine_mode inmode;
4032 enum rtx_code code = GET_CODE (op);
4033
4034 if (mode != VOIDmode && GET_MODE (op) != mode)
4035 return 0;
4036 if (GET_RTX_CLASS (code) != '<')
4037 return 0;
4038 inmode = GET_MODE (XEXP (op, 0));
4039 if (GET_CODE (XEXP (op, 0)) != REG
4040 || REGNO (XEXP (op, 0)) != 17
4041 || XEXP (op, 1) != const0_rtx)
4042 return 0;
4043
4044 if (inmode == CCFPmode || inmode == CCFPUmode)
4045 {
4046 enum rtx_code second_code, bypass_code;
4047
4048 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4049 if (bypass_code != NIL || second_code != NIL)
4050 return 0;
4051 code = ix86_fp_compare_code_to_integer (code);
4052 }
4053 else if (inmode != CCmode)
4054 return 0;
4055 return code == LTU;
4056}
4057
9076b9c1 4058/* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3a3677ff 4059
9076b9c1 4060int
8d531ab9 4061fcmov_comparison_operator (rtx op, enum machine_mode mode)
3a3677ff 4062{
b62d22a2 4063 enum machine_mode inmode;
9a915772 4064 enum rtx_code code = GET_CODE (op);
e6e81735 4065
3a3677ff
RH
4066 if (mode != VOIDmode && GET_MODE (op) != mode)
4067 return 0;
9a915772
JH
4068 if (GET_RTX_CLASS (code) != '<')
4069 return 0;
4070 inmode = GET_MODE (XEXP (op, 0));
4071 if (inmode == CCFPmode || inmode == CCFPUmode)
3a3677ff 4072 {
9a915772 4073 enum rtx_code second_code, bypass_code;
e6e81735 4074
9a915772
JH
4075 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4076 if (bypass_code != NIL || second_code != NIL)
4077 return 0;
4078 code = ix86_fp_compare_code_to_integer (code);
4079 }
4080 /* i387 supports just limited amount of conditional codes. */
4081 switch (code)
4082 {
4083 case LTU: case GTU: case LEU: case GEU:
4084 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
9076b9c1
JH
4085 return 1;
4086 return 0;
9a915772
JH
4087 case ORDERED: case UNORDERED:
4088 case EQ: case NE:
4089 return 1;
3a3677ff
RH
4090 default:
4091 return 0;
4092 }
e075ae69 4093}
b840bfb0 4094
e9e80858
JH
4095/* Return 1 if OP is a binary operator that can be promoted to wider mode. */
4096
4097int
8d531ab9 4098promotable_binary_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e9e80858
JH
4099{
4100 switch (GET_CODE (op))
4101 {
4102 case MULT:
4103 /* Modern CPUs have same latency for HImode and SImode multiply,
4104 but 386 and 486 do HImode multiply faster. */
9e555526 4105 return ix86_tune > PROCESSOR_I486;
e9e80858
JH
4106 case PLUS:
4107 case AND:
4108 case IOR:
4109 case XOR:
4110 case ASHIFT:
4111 return 1;
4112 default:
4113 return 0;
4114 }
4115}
4116
e075ae69
RH
4117/* Nearly general operand, but accept any const_double, since we wish
4118 to be able to drop them into memory rather than have them get pulled
4119 into registers. */
b840bfb0 4120
2a2ab3f9 4121int
8d531ab9 4122cmp_fp_expander_operand (rtx op, enum machine_mode mode)
2a2ab3f9 4123{
e075ae69 4124 if (mode != VOIDmode && mode != GET_MODE (op))
0b6b2900 4125 return 0;
e075ae69 4126 if (GET_CODE (op) == CONST_DOUBLE)
2a2ab3f9 4127 return 1;
e075ae69 4128 return general_operand (op, mode);
2a2ab3f9
JVA
4129}
4130
e075ae69 4131/* Match an SI or HImode register for a zero_extract. */
2a2ab3f9
JVA
4132
4133int
8d531ab9 4134ext_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
2a2ab3f9 4135{
3522082b 4136 int regno;
0d7d98ee
JH
4137 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
4138 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
e075ae69 4139 return 0;
3522082b
JH
4140
4141 if (!register_operand (op, VOIDmode))
4142 return 0;
4143
d1f87653 4144 /* Be careful to accept only registers having upper parts. */
3522082b
JH
4145 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
4146 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
e075ae69
RH
4147}
4148
4149/* Return 1 if this is a valid binary floating-point operation.
0f290768 4150 OP is the expression matched, and MODE is its mode. */
e075ae69
RH
4151
4152int
8d531ab9 4153binary_fp_operator (rtx op, enum machine_mode mode)
e075ae69
RH
4154{
4155 if (mode != VOIDmode && mode != GET_MODE (op))
4156 return 0;
4157
2a2ab3f9
JVA
4158 switch (GET_CODE (op))
4159 {
e075ae69
RH
4160 case PLUS:
4161 case MINUS:
4162 case MULT:
4163 case DIV:
4164 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
e9a25f70 4165
2a2ab3f9
JVA
4166 default:
4167 return 0;
4168 }
4169}
fee2770d 4170
e075ae69 4171int
8d531ab9 4172mult_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69
RH
4173{
4174 return GET_CODE (op) == MULT;
4175}
4176
4177int
8d531ab9 4178div_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69
RH
4179{
4180 return GET_CODE (op) == DIV;
4181}
0a726ef1
JL
4182
4183int
b96a374d 4184arith_or_logical_operator (rtx op, enum machine_mode mode)
0a726ef1 4185{
e075ae69
RH
4186 return ((mode == VOIDmode || GET_MODE (op) == mode)
4187 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
4188 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
0a726ef1
JL
4189}
4190
e075ae69 4191/* Returns 1 if OP is memory operand with a displacement. */
fee2770d
RS
4192
4193int
8d531ab9 4194memory_displacement_operand (rtx op, enum machine_mode mode)
4f2c8ebb 4195{
e075ae69 4196 struct ix86_address parts;
e9a25f70 4197
e075ae69
RH
4198 if (! memory_operand (op, mode))
4199 return 0;
4200
4201 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4202 abort ();
4203
4204 return parts.disp != NULL_RTX;
4f2c8ebb
RS
4205}
4206
16189740 4207/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
e075ae69
RH
4208 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4209
4210 ??? It seems likely that this will only work because cmpsi is an
4211 expander, and no actual insns use this. */
4f2c8ebb
RS
4212
4213int
b96a374d 4214cmpsi_operand (rtx op, enum machine_mode mode)
fee2770d 4215{
b9b2c339 4216 if (nonimmediate_operand (op, mode))
e075ae69
RH
4217 return 1;
4218
4219 if (GET_CODE (op) == AND
4220 && GET_MODE (op) == SImode
4221 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4222 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4223 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4224 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4225 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4226 && GET_CODE (XEXP (op, 1)) == CONST_INT)
fee2770d 4227 return 1;
e9a25f70 4228
fee2770d
RS
4229 return 0;
4230}
d784886d 4231
e075ae69
RH
4232/* Returns 1 if OP is memory operand that can not be represented by the
4233 modRM array. */
d784886d
RK
4234
4235int
8d531ab9 4236long_memory_operand (rtx op, enum machine_mode mode)
d784886d 4237{
e075ae69 4238 if (! memory_operand (op, mode))
d784886d
RK
4239 return 0;
4240
e075ae69 4241 return memory_address_length (op) != 0;
d784886d 4242}
2247f6ed
JH
4243
4244/* Return nonzero if the rtx is known aligned. */
4245
4246int
b96a374d 4247aligned_operand (rtx op, enum machine_mode mode)
2247f6ed
JH
4248{
4249 struct ix86_address parts;
4250
4251 if (!general_operand (op, mode))
4252 return 0;
4253
0f290768 4254 /* Registers and immediate operands are always "aligned". */
2247f6ed
JH
4255 if (GET_CODE (op) != MEM)
4256 return 1;
4257
0f290768 4258 /* Don't even try to do any aligned optimizations with volatiles. */
2247f6ed
JH
4259 if (MEM_VOLATILE_P (op))
4260 return 0;
4261
4262 op = XEXP (op, 0);
4263
4264 /* Pushes and pops are only valid on the stack pointer. */
4265 if (GET_CODE (op) == PRE_DEC
4266 || GET_CODE (op) == POST_INC)
4267 return 1;
4268
4269 /* Decode the address. */
4270 if (! ix86_decompose_address (op, &parts))
4271 abort ();
4272
4273 /* Look for some component that isn't known to be aligned. */
4274 if (parts.index)
4275 {
4276 if (parts.scale < 4
bdb429a5 4277 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
2247f6ed
JH
4278 return 0;
4279 }
4280 if (parts.base)
4281 {
bdb429a5 4282 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
2247f6ed
JH
4283 return 0;
4284 }
4285 if (parts.disp)
4286 {
4287 if (GET_CODE (parts.disp) != CONST_INT
4288 || (INTVAL (parts.disp) & 3) != 0)
4289 return 0;
4290 }
4291
4292 /* Didn't find one -- this must be an aligned address. */
4293 return 1;
4294}
e075ae69 4295\f
881b2a96
RS
4296/* Initialize the table of extra 80387 mathematical constants. */
4297
4298static void
b96a374d 4299init_ext_80387_constants (void)
881b2a96
RS
4300{
4301 static const char * cst[5] =
4302 {
4303 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4304 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4305 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4306 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4307 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4308 };
4309 int i;
4310
4311 for (i = 0; i < 5; i++)
4312 {
4313 real_from_string (&ext_80387_constants_table[i], cst[i]);
4314 /* Ensure each constant is rounded to XFmode precision. */
1f48e56d 4315 real_convert (&ext_80387_constants_table[i],
f8a1ebc6 4316 XFmode, &ext_80387_constants_table[i]);
881b2a96
RS
4317 }
4318
4319 ext_80387_constants_init = 1;
4320}
4321
e075ae69 4322/* Return true if the constant is something that can be loaded with
881b2a96 4323 a special instruction. */
57dbca5e
BS
4324
4325int
b96a374d 4326standard_80387_constant_p (rtx x)
57dbca5e 4327{
2b04e52b 4328 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
e075ae69 4329 return -1;
881b2a96 4330
2b04e52b
JH
4331 if (x == CONST0_RTX (GET_MODE (x)))
4332 return 1;
4333 if (x == CONST1_RTX (GET_MODE (x)))
4334 return 2;
881b2a96 4335
22cc69c4
RS
4336 /* For XFmode constants, try to find a special 80387 instruction when
4337 optimizing for size or on those CPUs that benefit from them. */
f8a1ebc6 4338 if (GET_MODE (x) == XFmode
22cc69c4 4339 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
881b2a96
RS
4340 {
4341 REAL_VALUE_TYPE r;
4342 int i;
4343
4344 if (! ext_80387_constants_init)
4345 init_ext_80387_constants ();
4346
4347 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4348 for (i = 0; i < 5; i++)
4349 if (real_identical (&r, &ext_80387_constants_table[i]))
4350 return i + 3;
4351 }
4352
e075ae69 4353 return 0;
57dbca5e
BS
4354}
4355
881b2a96
RS
4356/* Return the opcode of the special instruction to be used to load
4357 the constant X. */
4358
4359const char *
b96a374d 4360standard_80387_constant_opcode (rtx x)
881b2a96
RS
4361{
4362 switch (standard_80387_constant_p (x))
4363 {
b96a374d 4364 case 1:
881b2a96
RS
4365 return "fldz";
4366 case 2:
4367 return "fld1";
b96a374d 4368 case 3:
881b2a96
RS
4369 return "fldlg2";
4370 case 4:
4371 return "fldln2";
b96a374d 4372 case 5:
881b2a96
RS
4373 return "fldl2e";
4374 case 6:
4375 return "fldl2t";
b96a374d 4376 case 7:
881b2a96
RS
4377 return "fldpi";
4378 }
4379 abort ();
4380}
4381
4382/* Return the CONST_DOUBLE representing the 80387 constant that is
4383 loaded by the specified special instruction. The argument IDX
4384 matches the return value from standard_80387_constant_p. */
4385
4386rtx
b96a374d 4387standard_80387_constant_rtx (int idx)
881b2a96
RS
4388{
4389 int i;
4390
4391 if (! ext_80387_constants_init)
4392 init_ext_80387_constants ();
4393
4394 switch (idx)
4395 {
4396 case 3:
4397 case 4:
4398 case 5:
4399 case 6:
4400 case 7:
4401 i = idx - 3;
4402 break;
4403
4404 default:
4405 abort ();
4406 }
4407
1f48e56d 4408 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
f8a1ebc6 4409 XFmode);
881b2a96
RS
4410}
4411
2b04e52b
JH
4412/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4413 */
4414int
b96a374d 4415standard_sse_constant_p (rtx x)
2b04e52b 4416{
0e67d460
JH
4417 if (x == const0_rtx)
4418 return 1;
2b04e52b
JH
4419 return (x == CONST0_RTX (GET_MODE (x)));
4420}
4421
2a2ab3f9
JVA
4422/* Returns 1 if OP contains a symbol reference */
4423
4424int
b96a374d 4425symbolic_reference_mentioned_p (rtx op)
2a2ab3f9 4426{
8d531ab9
KH
4427 const char *fmt;
4428 int i;
2a2ab3f9
JVA
4429
4430 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4431 return 1;
4432
4433 fmt = GET_RTX_FORMAT (GET_CODE (op));
4434 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4435 {
4436 if (fmt[i] == 'E')
4437 {
8d531ab9 4438 int j;
2a2ab3f9
JVA
4439
4440 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4441 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4442 return 1;
4443 }
e9a25f70 4444
2a2ab3f9
JVA
4445 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4446 return 1;
4447 }
4448
4449 return 0;
4450}
e075ae69
RH
4451
4452/* Return 1 if it is appropriate to emit `ret' instructions in the
4453 body of a function. Do this only if the epilogue is simple, needing a
4454 couple of insns. Prior to reloading, we can't tell how many registers
4455 must be saved, so return 0 then. Return 0 if there is no frame
4456 marker to de-allocate.
4457
4458 If NON_SAVING_SETJMP is defined and true, then it is not possible
4459 for the epilogue to be simple, so return 0. This is a special case
4460 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4461 until final, but jump_optimize may need to know sooner if a
4462 `return' is OK. */
32b5b1aa
SC
4463
4464int
b96a374d 4465ix86_can_use_return_insn_p (void)
32b5b1aa 4466{
4dd2ac2c 4467 struct ix86_frame frame;
9a7372d6 4468
e075ae69
RH
4469#ifdef NON_SAVING_SETJMP
4470 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4471 return 0;
4472#endif
9a7372d6
RH
4473
4474 if (! reload_completed || frame_pointer_needed)
4475 return 0;
32b5b1aa 4476
9a7372d6
RH
4477 /* Don't allow more than 32 pop, since that's all we can do
4478 with one instruction. */
4479 if (current_function_pops_args
4480 && current_function_args_size >= 32768)
e075ae69 4481 return 0;
32b5b1aa 4482
4dd2ac2c
JH
4483 ix86_compute_frame_layout (&frame);
4484 return frame.to_allocate == 0 && frame.nregs == 0;
e075ae69 4485}
6189a572
JH
4486\f
4487/* Return 1 if VALUE can be stored in the sign extended immediate field. */
4488int
b96a374d 4489x86_64_sign_extended_value (rtx value)
6189a572
JH
4490{
4491 switch (GET_CODE (value))
4492 {
4493 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4494 to be at least 32 and this all acceptable constants are
4495 represented as CONST_INT. */
4496 case CONST_INT:
4497 if (HOST_BITS_PER_WIDE_INT == 32)
4498 return 1;
4499 else
4500 {
4501 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
fa9f36a1 4502 return trunc_int_for_mode (val, SImode) == val;
6189a572
JH
4503 }
4504 break;
4505
75d38379
JJ
4506 /* For certain code models, the symbolic references are known to fit.
4507 in CM_SMALL_PIC model we know it fits if it is local to the shared
4508 library. Don't count TLS SYMBOL_REFs here, since they should fit
4509 only if inside of UNSPEC handled below. */
6189a572 4510 case SYMBOL_REF:
d7222e38
JH
4511 /* TLS symbols are not constant. */
4512 if (tls_symbolic_operand (value, Pmode))
4513 return false;
c05dbe81 4514 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
6189a572
JH
4515
4516 /* For certain code models, the code is near as well. */
4517 case LABEL_REF:
c05dbe81
JH
4518 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4519 || ix86_cmodel == CM_KERNEL);
6189a572
JH
4520
4521 /* We also may accept the offsetted memory references in certain special
4522 cases. */
4523 case CONST:
75d38379
JJ
4524 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4525 switch (XINT (XEXP (value, 0), 1))
4526 {
4527 case UNSPEC_GOTPCREL:
4528 case UNSPEC_DTPOFF:
4529 case UNSPEC_GOTNTPOFF:
4530 case UNSPEC_NTPOFF:
4531 return 1;
4532 default:
4533 break;
4534 }
4535 if (GET_CODE (XEXP (value, 0)) == PLUS)
6189a572
JH
4536 {
4537 rtx op1 = XEXP (XEXP (value, 0), 0);
4538 rtx op2 = XEXP (XEXP (value, 0), 1);
4539 HOST_WIDE_INT offset;
4540
4541 if (ix86_cmodel == CM_LARGE)
4542 return 0;
4543 if (GET_CODE (op2) != CONST_INT)
4544 return 0;
4545 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4546 switch (GET_CODE (op1))
4547 {
4548 case SYMBOL_REF:
75d38379 4549 /* For CM_SMALL assume that latest object is 16MB before
6189a572
JH
4550 end of 31bits boundary. We may also accept pretty
4551 large negative constants knowing that all objects are
4552 in the positive half of address space. */
4553 if (ix86_cmodel == CM_SMALL
75d38379 4554 && offset < 16*1024*1024
6189a572
JH
4555 && trunc_int_for_mode (offset, SImode) == offset)
4556 return 1;
4557 /* For CM_KERNEL we know that all object resist in the
4558 negative half of 32bits address space. We may not
4559 accept negative offsets, since they may be just off
d6a7951f 4560 and we may accept pretty large positive ones. */
6189a572
JH
4561 if (ix86_cmodel == CM_KERNEL
4562 && offset > 0
4563 && trunc_int_for_mode (offset, SImode) == offset)
4564 return 1;
4565 break;
4566 case LABEL_REF:
4567 /* These conditions are similar to SYMBOL_REF ones, just the
4568 constraints for code models differ. */
c05dbe81 4569 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
75d38379 4570 && offset < 16*1024*1024
6189a572
JH
4571 && trunc_int_for_mode (offset, SImode) == offset)
4572 return 1;
4573 if (ix86_cmodel == CM_KERNEL
4574 && offset > 0
4575 && trunc_int_for_mode (offset, SImode) == offset)
4576 return 1;
4577 break;
75d38379
JJ
4578 case UNSPEC:
4579 switch (XINT (op1, 1))
4580 {
4581 case UNSPEC_DTPOFF:
4582 case UNSPEC_NTPOFF:
4583 if (offset > 0
4584 && trunc_int_for_mode (offset, SImode) == offset)
4585 return 1;
4586 }
4587 break;
6189a572
JH
4588 default:
4589 return 0;
4590 }
4591 }
4592 return 0;
4593 default:
4594 return 0;
4595 }
4596}
4597
4598/* Return 1 if VALUE can be stored in the zero extended immediate field. */
4599int
b96a374d 4600x86_64_zero_extended_value (rtx value)
6189a572
JH
4601{
4602 switch (GET_CODE (value))
4603 {
4604 case CONST_DOUBLE:
4605 if (HOST_BITS_PER_WIDE_INT == 32)
4606 return (GET_MODE (value) == VOIDmode
4607 && !CONST_DOUBLE_HIGH (value));
4608 else
4609 return 0;
4610 case CONST_INT:
4611 if (HOST_BITS_PER_WIDE_INT == 32)
4612 return INTVAL (value) >= 0;
4613 else
b531087a 4614 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
6189a572
JH
4615 break;
4616
4617 /* For certain code models, the symbolic references are known to fit. */
4618 case SYMBOL_REF:
d7222e38
JH
4619 /* TLS symbols are not constant. */
4620 if (tls_symbolic_operand (value, Pmode))
4621 return false;
6189a572
JH
4622 return ix86_cmodel == CM_SMALL;
4623
4624 /* For certain code models, the code is near as well. */
4625 case LABEL_REF:
4626 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4627
4628 /* We also may accept the offsetted memory references in certain special
4629 cases. */
4630 case CONST:
4631 if (GET_CODE (XEXP (value, 0)) == PLUS)
4632 {
4633 rtx op1 = XEXP (XEXP (value, 0), 0);
4634 rtx op2 = XEXP (XEXP (value, 0), 1);
4635
4636 if (ix86_cmodel == CM_LARGE)
4637 return 0;
4638 switch (GET_CODE (op1))
4639 {
4640 case SYMBOL_REF:
4641 return 0;
d6a7951f 4642 /* For small code model we may accept pretty large positive
6189a572
JH
4643 offsets, since one bit is available for free. Negative
4644 offsets are limited by the size of NULL pointer area
4645 specified by the ABI. */
4646 if (ix86_cmodel == CM_SMALL
4647 && GET_CODE (op2) == CONST_INT
4648 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4649 && (trunc_int_for_mode (INTVAL (op2), SImode)
4650 == INTVAL (op2)))
4651 return 1;
4652 /* ??? For the kernel, we may accept adjustment of
4653 -0x10000000, since we know that it will just convert
d6a7951f 4654 negative address space to positive, but perhaps this
6189a572
JH
4655 is not worthwhile. */
4656 break;
4657 case LABEL_REF:
4658 /* These conditions are similar to SYMBOL_REF ones, just the
4659 constraints for code models differ. */
4660 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4661 && GET_CODE (op2) == CONST_INT
4662 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4663 && (trunc_int_for_mode (INTVAL (op2), SImode)
4664 == INTVAL (op2)))
4665 return 1;
4666 break;
4667 default:
4668 return 0;
4669 }
4670 }
4671 return 0;
4672 default:
4673 return 0;
4674 }
4675}
6fca22eb
RH
4676
4677/* Value should be nonzero if functions must have frame pointers.
4678 Zero means the frame pointer need not be set up (and parms may
4679 be accessed via the stack pointer) in functions that seem suitable. */
4680
4681int
b96a374d 4682ix86_frame_pointer_required (void)
6fca22eb
RH
4683{
4684 /* If we accessed previous frames, then the generated code expects
4685 to be able to access the saved ebp value in our frame. */
4686 if (cfun->machine->accesses_prev_frame)
4687 return 1;
a4f31c00 4688
6fca22eb
RH
4689 /* Several x86 os'es need a frame pointer for other reasons,
4690 usually pertaining to setjmp. */
4691 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4692 return 1;
4693
4694 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4695 the frame pointer by default. Turn it back on now if we've not
4696 got a leaf function. */
a7943381 4697 if (TARGET_OMIT_LEAF_FRAME_POINTER
55ba61f3
JH
4698 && (!current_function_is_leaf))
4699 return 1;
4700
4701 if (current_function_profile)
6fca22eb
RH
4702 return 1;
4703
4704 return 0;
4705}
4706
4707/* Record that the current function accesses previous call frames. */
4708
4709void
b96a374d 4710ix86_setup_frame_addresses (void)
6fca22eb
RH
4711{
4712 cfun->machine->accesses_prev_frame = 1;
4713}
e075ae69 4714\f
145aacc2
RH
4715#if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4716# define USE_HIDDEN_LINKONCE 1
4717#else
4718# define USE_HIDDEN_LINKONCE 0
4719#endif
4720
bd09bdeb 4721static int pic_labels_used;
e9a25f70 4722
145aacc2
RH
4723/* Fills in the label name that should be used for a pc thunk for
4724 the given register. */
4725
4726static void
b96a374d 4727get_pc_thunk_name (char name[32], unsigned int regno)
145aacc2
RH
4728{
4729 if (USE_HIDDEN_LINKONCE)
4730 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4731 else
4732 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4733}
4734
4735
e075ae69
RH
4736/* This function generates code for -fpic that loads %ebx with
4737 the return address of the caller and then returns. */
4738
4739void
b96a374d 4740ix86_file_end (void)
e075ae69
RH
4741{
4742 rtx xops[2];
bd09bdeb 4743 int regno;
32b5b1aa 4744
bd09bdeb 4745 for (regno = 0; regno < 8; ++regno)
7c262518 4746 {
145aacc2
RH
4747 char name[32];
4748
bd09bdeb
RH
4749 if (! ((pic_labels_used >> regno) & 1))
4750 continue;
4751
145aacc2 4752 get_pc_thunk_name (name, regno);
bd09bdeb 4753
145aacc2
RH
4754 if (USE_HIDDEN_LINKONCE)
4755 {
4756 tree decl;
4757
4758 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4759 error_mark_node);
4760 TREE_PUBLIC (decl) = 1;
4761 TREE_STATIC (decl) = 1;
4762 DECL_ONE_ONLY (decl) = 1;
4763
4764 (*targetm.asm_out.unique_section) (decl, 0);
4765 named_section (decl, NULL, 0);
4766
a5fe455b
ZW
4767 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4768 fputs ("\t.hidden\t", asm_out_file);
4769 assemble_name (asm_out_file, name);
4770 fputc ('\n', asm_out_file);
4771 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
145aacc2
RH
4772 }
4773 else
4774 {
4775 text_section ();
a5fe455b 4776 ASM_OUTPUT_LABEL (asm_out_file, name);
145aacc2 4777 }
bd09bdeb
RH
4778
4779 xops[0] = gen_rtx_REG (SImode, regno);
4780 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4781 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4782 output_asm_insn ("ret", xops);
7c262518 4783 }
3edc56a9 4784
a5fe455b
ZW
4785 if (NEED_INDICATE_EXEC_STACK)
4786 file_end_indicate_exec_stack ();
32b5b1aa 4787}
32b5b1aa 4788
c8c03509 4789/* Emit code for the SET_GOT patterns. */
32b5b1aa 4790
c8c03509 4791const char *
b96a374d 4792output_set_got (rtx dest)
c8c03509
RH
4793{
4794 rtx xops[3];
0d7d98ee 4795
c8c03509 4796 xops[0] = dest;
5fc0e5df 4797 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
32b5b1aa 4798
c8c03509 4799 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
32b5b1aa 4800 {
c8c03509
RH
4801 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4802
4803 if (!flag_pic)
4804 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4805 else
4806 output_asm_insn ("call\t%a2", xops);
4807
b069de3b
SS
4808#if TARGET_MACHO
4809 /* Output the "canonical" label name ("Lxx$pb") here too. This
4810 is what will be referred to by the Mach-O PIC subsystem. */
4811 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4812#endif
4977bab6 4813 (*targetm.asm_out.internal_label) (asm_out_file, "L",
c8c03509
RH
4814 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4815
4816 if (flag_pic)
4817 output_asm_insn ("pop{l}\t%0", xops);
32b5b1aa 4818 }
e075ae69 4819 else
e5cb57e8 4820 {
145aacc2
RH
4821 char name[32];
4822 get_pc_thunk_name (name, REGNO (dest));
bd09bdeb 4823 pic_labels_used |= 1 << REGNO (dest);
f996902d 4824
145aacc2 4825 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
c8c03509
RH
4826 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4827 output_asm_insn ("call\t%X2", xops);
e5cb57e8 4828 }
e5cb57e8 4829
c8c03509
RH
4830 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4831 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
b069de3b 4832 else if (!TARGET_MACHO)
8e9fadc3 4833 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
79325812 4834
c8c03509 4835 return "";
e9a25f70 4836}
8dfe5673 4837
0d7d98ee 4838/* Generate an "push" pattern for input ARG. */
e9a25f70 4839
e075ae69 4840static rtx
b96a374d 4841gen_push (rtx arg)
e9a25f70 4842{
c5c76735 4843 return gen_rtx_SET (VOIDmode,
0d7d98ee
JH
4844 gen_rtx_MEM (Pmode,
4845 gen_rtx_PRE_DEC (Pmode,
c5c76735
JL
4846 stack_pointer_rtx)),
4847 arg);
e9a25f70
JL
4848}
4849
bd09bdeb
RH
4850/* Return >= 0 if there is an unused call-clobbered register available
4851 for the entire function. */
4852
4853static unsigned int
b96a374d 4854ix86_select_alt_pic_regnum (void)
bd09bdeb
RH
4855{
4856 if (current_function_is_leaf && !current_function_profile)
4857 {
4858 int i;
4859 for (i = 2; i >= 0; --i)
4860 if (!regs_ever_live[i])
4861 return i;
4862 }
4863
4864 return INVALID_REGNUM;
4865}
fce5a9f2 4866
4dd2ac2c
JH
4867/* Return 1 if we need to save REGNO. */
4868static int
b96a374d 4869ix86_save_reg (unsigned int regno, int maybe_eh_return)
1020a5ab 4870{
bd09bdeb
RH
4871 if (pic_offset_table_rtx
4872 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4873 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
66edd3b4 4874 || current_function_profile
8c38a24f
MM
4875 || current_function_calls_eh_return
4876 || current_function_uses_const_pool))
bd09bdeb
RH
4877 {
4878 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4879 return 0;
4880 return 1;
4881 }
1020a5ab
RH
4882
4883 if (current_function_calls_eh_return && maybe_eh_return)
4884 {
4885 unsigned i;
4886 for (i = 0; ; i++)
4887 {
b531087a 4888 unsigned test = EH_RETURN_DATA_REGNO (i);
1020a5ab
RH
4889 if (test == INVALID_REGNUM)
4890 break;
9b690711 4891 if (test == regno)
1020a5ab
RH
4892 return 1;
4893 }
4894 }
4dd2ac2c 4895
1020a5ab
RH
4896 return (regs_ever_live[regno]
4897 && !call_used_regs[regno]
4898 && !fixed_regs[regno]
4899 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4dd2ac2c
JH
4900}
4901
0903fcab
JH
4902/* Return number of registers to be saved on the stack. */
4903
4904static int
b96a374d 4905ix86_nsaved_regs (void)
0903fcab
JH
4906{
4907 int nregs = 0;
0903fcab
JH
4908 int regno;
4909
4dd2ac2c 4910 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 4911 if (ix86_save_reg (regno, true))
4dd2ac2c 4912 nregs++;
0903fcab
JH
4913 return nregs;
4914}
4915
4916/* Return the offset between two registers, one to be eliminated, and the other
4917 its replacement, at the start of a routine. */
4918
4919HOST_WIDE_INT
b96a374d 4920ix86_initial_elimination_offset (int from, int to)
0903fcab 4921{
4dd2ac2c
JH
4922 struct ix86_frame frame;
4923 ix86_compute_frame_layout (&frame);
564d80f4
JH
4924
4925 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 4926 return frame.hard_frame_pointer_offset;
564d80f4
JH
4927 else if (from == FRAME_POINTER_REGNUM
4928 && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 4929 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
4930 else
4931 {
564d80f4
JH
4932 if (to != STACK_POINTER_REGNUM)
4933 abort ();
4934 else if (from == ARG_POINTER_REGNUM)
4dd2ac2c 4935 return frame.stack_pointer_offset;
564d80f4
JH
4936 else if (from != FRAME_POINTER_REGNUM)
4937 abort ();
0903fcab 4938 else
4dd2ac2c 4939 return frame.stack_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
4940 }
4941}
4942
4dd2ac2c 4943/* Fill structure ix86_frame about frame of currently computed function. */
0f290768 4944
4dd2ac2c 4945static void
b96a374d 4946ix86_compute_frame_layout (struct ix86_frame *frame)
65954bd8 4947{
65954bd8 4948 HOST_WIDE_INT total_size;
564d80f4 4949 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
b19ee4bd 4950 HOST_WIDE_INT offset;
44affdae 4951 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4dd2ac2c 4952 HOST_WIDE_INT size = get_frame_size ();
65954bd8 4953
4dd2ac2c 4954 frame->nregs = ix86_nsaved_regs ();
564d80f4 4955 total_size = size;
65954bd8 4956
d7394366
JH
4957 /* During reload iteration the amount of registers saved can change.
4958 Recompute the value as needed. Do not recompute when amount of registers
4959 didn't change as reload does mutiple calls to the function and does not
4960 expect the decision to change within single iteration. */
4961 if (!optimize_size
4962 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
d9b40e8d
JH
4963 {
4964 int count = frame->nregs;
4965
d7394366 4966 cfun->machine->use_fast_prologue_epilogue_nregs = count;
d9b40e8d
JH
4967 /* The fast prologue uses move instead of push to save registers. This
4968 is significantly longer, but also executes faster as modern hardware
4969 can execute the moves in parallel, but can't do that for push/pop.
b96a374d 4970
d9b40e8d
JH
4971 Be careful about choosing what prologue to emit: When function takes
4972 many instructions to execute we may use slow version as well as in
4973 case function is known to be outside hot spot (this is known with
4974 feedback only). Weight the size of function by number of registers
4975 to save as it is cheap to use one or two push instructions but very
4976 slow to use many of them. */
4977 if (count)
4978 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4979 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4980 || (flag_branch_probabilities
4981 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4982 cfun->machine->use_fast_prologue_epilogue = false;
4983 else
4984 cfun->machine->use_fast_prologue_epilogue
4985 = !expensive_function_p (count);
4986 }
4987 if (TARGET_PROLOGUE_USING_MOVE
4988 && cfun->machine->use_fast_prologue_epilogue)
4989 frame->save_regs_using_mov = true;
4990 else
4991 frame->save_regs_using_mov = false;
4992
4993
9ba81eaa 4994 /* Skip return address and saved base pointer. */
4dd2ac2c
JH
4995 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4996
4997 frame->hard_frame_pointer_offset = offset;
564d80f4 4998
fcbfaa65
RK
4999 /* Do some sanity checking of stack_alignment_needed and
5000 preferred_alignment, since i386 port is the only using those features
f710504c 5001 that may break easily. */
564d80f4 5002
44affdae
JH
5003 if (size && !stack_alignment_needed)
5004 abort ();
44affdae
JH
5005 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
5006 abort ();
5007 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5008 abort ();
5009 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5010 abort ();
564d80f4 5011
4dd2ac2c
JH
5012 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5013 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
564d80f4 5014
4dd2ac2c
JH
5015 /* Register save area */
5016 offset += frame->nregs * UNITS_PER_WORD;
65954bd8 5017
8362f420
JH
5018 /* Va-arg area */
5019 if (ix86_save_varrargs_registers)
5020 {
5021 offset += X86_64_VARARGS_SIZE;
5022 frame->va_arg_size = X86_64_VARARGS_SIZE;
5023 }
5024 else
5025 frame->va_arg_size = 0;
5026
4dd2ac2c
JH
5027 /* Align start of frame for local function. */
5028 frame->padding1 = ((offset + stack_alignment_needed - 1)
5029 & -stack_alignment_needed) - offset;
f73ad30e 5030
4dd2ac2c 5031 offset += frame->padding1;
65954bd8 5032
4dd2ac2c
JH
5033 /* Frame pointer points here. */
5034 frame->frame_pointer_offset = offset;
54ff41b7 5035
4dd2ac2c 5036 offset += size;
65954bd8 5037
0b7ae565 5038 /* Add outgoing arguments area. Can be skipped if we eliminated
965514bd
JH
5039 all the function calls as dead code.
5040 Skipping is however impossible when function calls alloca. Alloca
5041 expander assumes that last current_function_outgoing_args_size
5042 of stack frame are unused. */
5043 if (ACCUMULATE_OUTGOING_ARGS
5044 && (!current_function_is_leaf || current_function_calls_alloca))
4dd2ac2c
JH
5045 {
5046 offset += current_function_outgoing_args_size;
5047 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5048 }
5049 else
5050 frame->outgoing_arguments_size = 0;
564d80f4 5051
002ff5bc
RH
5052 /* Align stack boundary. Only needed if we're calling another function
5053 or using alloca. */
5054 if (!current_function_is_leaf || current_function_calls_alloca)
0b7ae565
RH
5055 frame->padding2 = ((offset + preferred_alignment - 1)
5056 & -preferred_alignment) - offset;
5057 else
5058 frame->padding2 = 0;
4dd2ac2c
JH
5059
5060 offset += frame->padding2;
5061
5062 /* We've reached end of stack frame. */
5063 frame->stack_pointer_offset = offset;
5064
5065 /* Size prologue needs to allocate. */
5066 frame->to_allocate =
5067 (size + frame->padding1 + frame->padding2
8362f420 5068 + frame->outgoing_arguments_size + frame->va_arg_size);
4dd2ac2c 5069
b19ee4bd
JJ
5070 if ((!frame->to_allocate && frame->nregs <= 1)
5071 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
d9b40e8d
JH
5072 frame->save_regs_using_mov = false;
5073
a5b378d6 5074 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
8362f420
JH
5075 && current_function_is_leaf)
5076 {
5077 frame->red_zone_size = frame->to_allocate;
d9b40e8d
JH
5078 if (frame->save_regs_using_mov)
5079 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8362f420
JH
5080 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5081 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5082 }
5083 else
5084 frame->red_zone_size = 0;
5085 frame->to_allocate -= frame->red_zone_size;
5086 frame->stack_pointer_offset -= frame->red_zone_size;
4dd2ac2c
JH
5087#if 0
5088 fprintf (stderr, "nregs: %i\n", frame->nregs);
5089 fprintf (stderr, "size: %i\n", size);
5090 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5091 fprintf (stderr, "padding1: %i\n", frame->padding1);
8362f420 5092 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4dd2ac2c
JH
5093 fprintf (stderr, "padding2: %i\n", frame->padding2);
5094 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
8362f420 5095 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4dd2ac2c
JH
5096 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5097 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5098 frame->hard_frame_pointer_offset);
5099 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5100#endif
65954bd8
JL
5101}
5102
0903fcab
JH
5103/* Emit code to save registers in the prologue. */
5104
5105static void
b96a374d 5106ix86_emit_save_regs (void)
0903fcab 5107{
8d531ab9 5108 int regno;
0903fcab 5109 rtx insn;
0903fcab 5110
4dd2ac2c 5111 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 5112 if (ix86_save_reg (regno, true))
0903fcab 5113 {
0d7d98ee 5114 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
0903fcab
JH
5115 RTX_FRAME_RELATED_P (insn) = 1;
5116 }
5117}
5118
c6036a37
JH
5119/* Emit code to save registers using MOV insns. First register
5120 is restored from POINTER + OFFSET. */
5121static void
b96a374d 5122ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
c6036a37
JH
5123{
5124 int regno;
5125 rtx insn;
5126
5127 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5128 if (ix86_save_reg (regno, true))
5129 {
b72f00af
RK
5130 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5131 Pmode, offset),
c6036a37
JH
5132 gen_rtx_REG (Pmode, regno));
5133 RTX_FRAME_RELATED_P (insn) = 1;
5134 offset += UNITS_PER_WORD;
5135 }
5136}
5137
839a4992 5138/* Expand prologue or epilogue stack adjustment.
b19ee4bd
JJ
5139 The pattern exist to put a dependency on all ebp-based memory accesses.
5140 STYLE should be negative if instructions should be marked as frame related,
5141 zero if %r11 register is live and cannot be freely used and positive
5142 otherwise. */
5143
5144static void
5145pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5146{
5147 rtx insn;
5148
5149 if (! TARGET_64BIT)
5150 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5151 else if (x86_64_immediate_operand (offset, DImode))
5152 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5153 else
5154 {
5155 rtx r11;
5156 /* r11 is used by indirect sibcall return as well, set before the
5157 epilogue and used after the epilogue. ATM indirect sibcall
5158 shouldn't be used together with huge frame sizes in one
5159 function because of the frame_size check in sibcall.c. */
5160 if (style == 0)
5161 abort ();
5162 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5163 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5164 if (style < 0)
5165 RTX_FRAME_RELATED_P (insn) = 1;
5166 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5167 offset));
5168 }
5169 if (style < 0)
5170 RTX_FRAME_RELATED_P (insn) = 1;
5171}
5172
0f290768 5173/* Expand the prologue into a bunch of separate insns. */
e075ae69
RH
5174
5175void
b96a374d 5176ix86_expand_prologue (void)
2a2ab3f9 5177{
564d80f4 5178 rtx insn;
bd09bdeb 5179 bool pic_reg_used;
4dd2ac2c 5180 struct ix86_frame frame;
c6036a37 5181 HOST_WIDE_INT allocate;
4dd2ac2c 5182
4977bab6 5183 ix86_compute_frame_layout (&frame);
79325812 5184
e075ae69
RH
5185 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5186 slower on all targets. Also sdb doesn't like it. */
e9a25f70 5187
2a2ab3f9
JVA
5188 if (frame_pointer_needed)
5189 {
564d80f4 5190 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
e075ae69 5191 RTX_FRAME_RELATED_P (insn) = 1;
e9a25f70 5192
564d80f4 5193 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
e075ae69 5194 RTX_FRAME_RELATED_P (insn) = 1;
2a2ab3f9
JVA
5195 }
5196
c6036a37 5197 allocate = frame.to_allocate;
c6036a37 5198
d9b40e8d 5199 if (!frame.save_regs_using_mov)
c6036a37
JH
5200 ix86_emit_save_regs ();
5201 else
5202 allocate += frame.nregs * UNITS_PER_WORD;
564d80f4 5203
d9b40e8d
JH
5204 /* When using red zone we may start register saving before allocating
5205 the stack frame saving one cycle of the prologue. */
5206 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5207 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5208 : stack_pointer_rtx,
5209 -frame.nregs * UNITS_PER_WORD);
5210
c6036a37 5211 if (allocate == 0)
8dfe5673 5212 ;
e323735c 5213 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
b19ee4bd
JJ
5214 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5215 GEN_INT (-allocate), -1);
79325812 5216 else
8dfe5673 5217 {
fe9f516f
RH
5218 /* Only valid for Win32. */
5219 rtx eax = gen_rtx_REG (SImode, 0);
5220 bool eax_live = ix86_eax_live_at_start_p ();
e9a25f70 5221
8362f420 5222 if (TARGET_64BIT)
b1177d69 5223 abort ();
e075ae69 5224
fe9f516f
RH
5225 if (eax_live)
5226 {
5227 emit_insn (gen_push (eax));
5228 allocate -= 4;
5229 }
5230
5231 insn = emit_move_insn (eax, GEN_INT (allocate));
b1177d69 5232 RTX_FRAME_RELATED_P (insn) = 1;
98417968 5233
b1177d69
KC
5234 insn = emit_insn (gen_allocate_stack_worker (eax));
5235 RTX_FRAME_RELATED_P (insn) = 1;
fe9f516f
RH
5236
5237 if (eax_live)
5238 {
5239 rtx t = plus_constant (stack_pointer_rtx, allocate);
5240 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5241 }
e075ae69 5242 }
fe9f516f 5243
d9b40e8d 5244 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
c6036a37
JH
5245 {
5246 if (!frame_pointer_needed || !frame.to_allocate)
5247 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5248 else
5249 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5250 -frame.nregs * UNITS_PER_WORD);
5251 }
e9a25f70 5252
bd09bdeb
RH
5253 pic_reg_used = false;
5254 if (pic_offset_table_rtx
5255 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5256 || current_function_profile))
5257 {
5258 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5259
5260 if (alt_pic_reg_used != INVALID_REGNUM)
5261 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5262
5263 pic_reg_used = true;
5264 }
5265
e9a25f70 5266 if (pic_reg_used)
c8c03509
RH
5267 {
5268 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5269
66edd3b4
RH
5270 /* Even with accurate pre-reload life analysis, we can wind up
5271 deleting all references to the pic register after reload.
5272 Consider if cross-jumping unifies two sides of a branch
d1f87653 5273 controlled by a comparison vs the only read from a global.
66edd3b4
RH
5274 In which case, allow the set_got to be deleted, though we're
5275 too late to do anything about the ebx save in the prologue. */
c8c03509
RH
5276 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5277 }
77a989d1 5278
66edd3b4
RH
5279 /* Prevent function calls from be scheduled before the call to mcount.
5280 In the pic_reg_used case, make sure that the got load isn't deleted. */
5281 if (current_function_profile)
5282 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
77a989d1
SC
5283}
5284
da2d1d3a
JH
5285/* Emit code to restore saved registers using MOV insns. First register
5286 is restored from POINTER + OFFSET. */
5287static void
72613dfa
JH
5288ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5289 int maybe_eh_return)
da2d1d3a
JH
5290{
5291 int regno;
72613dfa 5292 rtx base_address = gen_rtx_MEM (Pmode, pointer);
da2d1d3a 5293
4dd2ac2c 5294 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 5295 if (ix86_save_reg (regno, maybe_eh_return))
da2d1d3a 5296 {
72613dfa
JH
5297 /* Ensure that adjust_address won't be forced to produce pointer
5298 out of range allowed by x86-64 instruction set. */
5299 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5300 {
5301 rtx r11;
5302
5303 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5304 emit_move_insn (r11, GEN_INT (offset));
5305 emit_insn (gen_adddi3 (r11, r11, pointer));
5306 base_address = gen_rtx_MEM (Pmode, r11);
5307 offset = 0;
5308 }
4dd2ac2c 5309 emit_move_insn (gen_rtx_REG (Pmode, regno),
72613dfa 5310 adjust_address (base_address, Pmode, offset));
4dd2ac2c 5311 offset += UNITS_PER_WORD;
da2d1d3a
JH
5312 }
5313}
5314
0f290768 5315/* Restore function stack, frame, and registers. */
e9a25f70 5316
2a2ab3f9 5317void
b96a374d 5318ix86_expand_epilogue (int style)
2a2ab3f9 5319{
1c71e60e 5320 int regno;
fdb8a883 5321 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4dd2ac2c 5322 struct ix86_frame frame;
65954bd8 5323 HOST_WIDE_INT offset;
4dd2ac2c
JH
5324
5325 ix86_compute_frame_layout (&frame);
2a2ab3f9 5326
a4f31c00 5327 /* Calculate start of saved registers relative to ebp. Special care
84e306b4
RH
5328 must be taken for the normal return case of a function using
5329 eh_return: the eax and edx registers are marked as saved, but not
5330 restored along this path. */
5331 offset = frame.nregs;
5332 if (current_function_calls_eh_return && style != 2)
5333 offset -= 2;
5334 offset *= -UNITS_PER_WORD;
2a2ab3f9 5335
fdb8a883
JW
5336 /* If we're only restoring one register and sp is not valid then
5337 using a move instruction to restore the register since it's
0f290768 5338 less work than reloading sp and popping the register.
da2d1d3a
JH
5339
5340 The default code result in stack adjustment using add/lea instruction,
5341 while this code results in LEAVE instruction (or discrete equivalent),
5342 so it is profitable in some other cases as well. Especially when there
5343 are no registers to restore. We also use this code when TARGET_USE_LEAVE
d1f87653 5344 and there is exactly one register to pop. This heuristic may need some
da2d1d3a 5345 tuning in future. */
4dd2ac2c 5346 if ((!sp_valid && frame.nregs <= 1)
2ab0437e 5347 || (TARGET_EPILOGUE_USING_MOVE
d9b40e8d 5348 && cfun->machine->use_fast_prologue_epilogue
c6036a37 5349 && (frame.nregs > 1 || frame.to_allocate))
4dd2ac2c 5350 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
2ab0437e 5351 || (frame_pointer_needed && TARGET_USE_LEAVE
d9b40e8d
JH
5352 && cfun->machine->use_fast_prologue_epilogue
5353 && frame.nregs == 1)
2ab0437e 5354 || current_function_calls_eh_return)
2a2ab3f9 5355 {
da2d1d3a
JH
5356 /* Restore registers. We can use ebp or esp to address the memory
5357 locations. If both are available, default to ebp, since offsets
5358 are known to be small. Only exception is esp pointing directly to the
5359 end of block of saved registers, where we may simplify addressing
5360 mode. */
5361
4dd2ac2c 5362 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
1020a5ab
RH
5363 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5364 frame.to_allocate, style == 2);
da2d1d3a 5365 else
1020a5ab
RH
5366 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5367 offset, style == 2);
5368
5369 /* eh_return epilogues need %ecx added to the stack pointer. */
5370 if (style == 2)
5371 {
5372 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
2a2ab3f9 5373
1020a5ab
RH
5374 if (frame_pointer_needed)
5375 {
5376 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5377 tmp = plus_constant (tmp, UNITS_PER_WORD);
5378 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5379
5380 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5381 emit_move_insn (hard_frame_pointer_rtx, tmp);
5382
b19ee4bd
JJ
5383 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5384 const0_rtx, style);
1020a5ab
RH
5385 }
5386 else
5387 {
5388 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5389 tmp = plus_constant (tmp, (frame.to_allocate
5390 + frame.nregs * UNITS_PER_WORD));
5391 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5392 }
5393 }
5394 else if (!frame_pointer_needed)
b19ee4bd
JJ
5395 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5396 GEN_INT (frame.to_allocate
5397 + frame.nregs * UNITS_PER_WORD),
5398 style);
0f290768 5399 /* If not an i386, mov & pop is faster than "leave". */
d9b40e8d
JH
5400 else if (TARGET_USE_LEAVE || optimize_size
5401 || !cfun->machine->use_fast_prologue_epilogue)
8362f420 5402 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
c8c5cb99 5403 else
2a2ab3f9 5404 {
b19ee4bd
JJ
5405 pro_epilogue_adjust_stack (stack_pointer_rtx,
5406 hard_frame_pointer_rtx,
5407 const0_rtx, style);
8362f420
JH
5408 if (TARGET_64BIT)
5409 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5410 else
5411 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
e9a25f70
JL
5412 }
5413 }
1c71e60e 5414 else
68f654ec 5415 {
1c71e60e
JH
5416 /* First step is to deallocate the stack frame so that we can
5417 pop the registers. */
5418 if (!sp_valid)
5419 {
5420 if (!frame_pointer_needed)
5421 abort ();
b19ee4bd
JJ
5422 pro_epilogue_adjust_stack (stack_pointer_rtx,
5423 hard_frame_pointer_rtx,
5424 GEN_INT (offset), style);
1c71e60e 5425 }
4dd2ac2c 5426 else if (frame.to_allocate)
b19ee4bd
JJ
5427 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5428 GEN_INT (frame.to_allocate), style);
1c71e60e 5429
4dd2ac2c 5430 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 5431 if (ix86_save_reg (regno, false))
8362f420
JH
5432 {
5433 if (TARGET_64BIT)
5434 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5435 else
5436 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5437 }
4dd2ac2c 5438 if (frame_pointer_needed)
8362f420 5439 {
f5143c46 5440 /* Leave results in shorter dependency chains on CPUs that are
2ab0437e
JH
5441 able to grok it fast. */
5442 if (TARGET_USE_LEAVE)
5443 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5444 else if (TARGET_64BIT)
8362f420
JH
5445 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5446 else
5447 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5448 }
68f654ec 5449 }
68f654ec 5450
cbbf65e0 5451 /* Sibcall epilogues don't want a return instruction. */
1020a5ab 5452 if (style == 0)
cbbf65e0
RH
5453 return;
5454
2a2ab3f9
JVA
5455 if (current_function_pops_args && current_function_args_size)
5456 {
e075ae69 5457 rtx popc = GEN_INT (current_function_pops_args);
2a2ab3f9 5458
b8c752c8
UD
5459 /* i386 can only pop 64K bytes. If asked to pop more, pop
5460 return address, do explicit add, and jump indirectly to the
0f290768 5461 caller. */
2a2ab3f9 5462
b8c752c8 5463 if (current_function_pops_args >= 65536)
2a2ab3f9 5464 {
e075ae69 5465 rtx ecx = gen_rtx_REG (SImode, 2);
e9a25f70 5466
b19ee4bd 5467 /* There is no "pascal" calling convention in 64bit ABI. */
8362f420 5468 if (TARGET_64BIT)
b531087a 5469 abort ();
8362f420 5470
e075ae69
RH
5471 emit_insn (gen_popsi1 (ecx));
5472 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
11837777 5473 emit_jump_insn (gen_return_indirect_internal (ecx));
e9a25f70 5474 }
79325812 5475 else
e075ae69
RH
5476 emit_jump_insn (gen_return_pop_internal (popc));
5477 }
5478 else
5479 emit_jump_insn (gen_return_internal ());
5480}
bd09bdeb
RH
5481
5482/* Reset from the function's potential modifications. */
5483
5484static void
b96a374d
AJ
5485ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5486 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
bd09bdeb
RH
5487{
5488 if (pic_offset_table_rtx)
5489 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5490}
e075ae69
RH
5491\f
5492/* Extract the parts of an RTL expression that is a valid memory address
b446e5a2
JH
5493 for an instruction. Return 0 if the structure of the address is
5494 grossly off. Return -1 if the address contains ASHIFT, so it is not
74dc3e94 5495 strictly valid, but still used for computing length of lea instruction. */
e075ae69
RH
5496
5497static int
8d531ab9 5498ix86_decompose_address (rtx addr, struct ix86_address *out)
e075ae69
RH
5499{
5500 rtx base = NULL_RTX;
5501 rtx index = NULL_RTX;
5502 rtx disp = NULL_RTX;
5503 HOST_WIDE_INT scale = 1;
5504 rtx scale_rtx = NULL_RTX;
b446e5a2 5505 int retval = 1;
74dc3e94 5506 enum ix86_address_seg seg = SEG_DEFAULT;
e075ae69 5507
90e4e4c5 5508 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
e075ae69
RH
5509 base = addr;
5510 else if (GET_CODE (addr) == PLUS)
5511 {
74dc3e94
RH
5512 rtx addends[4], op;
5513 int n = 0, i;
e075ae69 5514
74dc3e94
RH
5515 op = addr;
5516 do
e075ae69 5517 {
74dc3e94
RH
5518 if (n >= 4)
5519 return 0;
5520 addends[n++] = XEXP (op, 1);
5521 op = XEXP (op, 0);
2a2ab3f9 5522 }
74dc3e94
RH
5523 while (GET_CODE (op) == PLUS);
5524 if (n >= 4)
5525 return 0;
5526 addends[n] = op;
5527
5528 for (i = n; i >= 0; --i)
e075ae69 5529 {
74dc3e94
RH
5530 op = addends[i];
5531 switch (GET_CODE (op))
5532 {
5533 case MULT:
5534 if (index)
5535 return 0;
5536 index = XEXP (op, 0);
5537 scale_rtx = XEXP (op, 1);
5538 break;
5539
5540 case UNSPEC:
5541 if (XINT (op, 1) == UNSPEC_TP
5542 && TARGET_TLS_DIRECT_SEG_REFS
5543 && seg == SEG_DEFAULT)
5544 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5545 else
5546 return 0;
5547 break;
5548
5549 case REG:
5550 case SUBREG:
5551 if (!base)
5552 base = op;
5553 else if (!index)
5554 index = op;
5555 else
5556 return 0;
5557 break;
5558
5559 case CONST:
5560 case CONST_INT:
5561 case SYMBOL_REF:
5562 case LABEL_REF:
5563 if (disp)
5564 return 0;
5565 disp = op;
5566 break;
5567
5568 default:
5569 return 0;
5570 }
e075ae69 5571 }
e075ae69
RH
5572 }
5573 else if (GET_CODE (addr) == MULT)
5574 {
5575 index = XEXP (addr, 0); /* index*scale */
5576 scale_rtx = XEXP (addr, 1);
5577 }
5578 else if (GET_CODE (addr) == ASHIFT)
5579 {
5580 rtx tmp;
5581
5582 /* We're called for lea too, which implements ashift on occasion. */
5583 index = XEXP (addr, 0);
5584 tmp = XEXP (addr, 1);
5585 if (GET_CODE (tmp) != CONST_INT)
b446e5a2 5586 return 0;
e075ae69
RH
5587 scale = INTVAL (tmp);
5588 if ((unsigned HOST_WIDE_INT) scale > 3)
b446e5a2 5589 return 0;
e075ae69 5590 scale = 1 << scale;
b446e5a2 5591 retval = -1;
2a2ab3f9 5592 }
2a2ab3f9 5593 else
e075ae69
RH
5594 disp = addr; /* displacement */
5595
5596 /* Extract the integral value of scale. */
5597 if (scale_rtx)
e9a25f70 5598 {
e075ae69 5599 if (GET_CODE (scale_rtx) != CONST_INT)
b446e5a2 5600 return 0;
e075ae69 5601 scale = INTVAL (scale_rtx);
e9a25f70 5602 }
3b3c6a3f 5603
74dc3e94 5604 /* Allow arg pointer and stack pointer as index if there is not scaling. */
e075ae69 5605 if (base && index && scale == 1
74dc3e94
RH
5606 && (index == arg_pointer_rtx
5607 || index == frame_pointer_rtx
5608 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
e075ae69
RH
5609 {
5610 rtx tmp = base;
5611 base = index;
5612 index = tmp;
5613 }
5614
5615 /* Special case: %ebp cannot be encoded as a base without a displacement. */
564d80f4
JH
5616 if ((base == hard_frame_pointer_rtx
5617 || base == frame_pointer_rtx
5618 || base == arg_pointer_rtx) && !disp)
e075ae69
RH
5619 disp = const0_rtx;
5620
5621 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5622 Avoid this by transforming to [%esi+0]. */
9e555526 5623 if (ix86_tune == PROCESSOR_K6 && !optimize_size
e075ae69 5624 && base && !index && !disp
329e1d01 5625 && REG_P (base)
e075ae69
RH
5626 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5627 disp = const0_rtx;
5628
5629 /* Special case: encode reg+reg instead of reg*2. */
5630 if (!base && index && scale && scale == 2)
5631 base = index, scale = 1;
0f290768 5632
e075ae69
RH
5633 /* Special case: scaling cannot be encoded without base or displacement. */
5634 if (!base && !disp && index && scale != 1)
5635 disp = const0_rtx;
5636
5637 out->base = base;
5638 out->index = index;
5639 out->disp = disp;
5640 out->scale = scale;
74dc3e94 5641 out->seg = seg;
3b3c6a3f 5642
b446e5a2 5643 return retval;
e075ae69 5644}
01329426
JH
5645\f
5646/* Return cost of the memory address x.
5647 For i386, it is better to use a complex address than let gcc copy
5648 the address into a reg and make a new pseudo. But not if the address
5649 requires to two regs - that would mean more pseudos with longer
5650 lifetimes. */
dcefdf67 5651static int
b96a374d 5652ix86_address_cost (rtx x)
01329426
JH
5653{
5654 struct ix86_address parts;
5655 int cost = 1;
3b3c6a3f 5656
01329426
JH
5657 if (!ix86_decompose_address (x, &parts))
5658 abort ();
5659
5660 /* More complex memory references are better. */
5661 if (parts.disp && parts.disp != const0_rtx)
5662 cost--;
74dc3e94
RH
5663 if (parts.seg != SEG_DEFAULT)
5664 cost--;
01329426
JH
5665
5666 /* Attempt to minimize number of registers in the address. */
5667 if ((parts.base
5668 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5669 || (parts.index
5670 && (!REG_P (parts.index)
5671 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5672 cost++;
5673
5674 if (parts.base
5675 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5676 && parts.index
5677 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5678 && parts.base != parts.index)
5679 cost++;
5680
5681 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5682 since it's predecode logic can't detect the length of instructions
5683 and it degenerates to vector decoded. Increase cost of such
5684 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
0f290768 5685 to split such addresses or even refuse such addresses at all.
01329426
JH
5686
5687 Following addressing modes are affected:
5688 [base+scale*index]
5689 [scale*index+disp]
5690 [base+index]
0f290768 5691
01329426
JH
5692 The first and last case may be avoidable by explicitly coding the zero in
5693 memory address, but I don't have AMD-K6 machine handy to check this
5694 theory. */
5695
5696 if (TARGET_K6
5697 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5698 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5699 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5700 cost += 10;
0f290768 5701
01329426
JH
5702 return cost;
5703}
5704\f
b949ea8b
JW
5705/* If X is a machine specific address (i.e. a symbol or label being
5706 referenced as a displacement from the GOT implemented using an
5707 UNSPEC), then return the base term. Otherwise return X. */
5708
5709rtx
b96a374d 5710ix86_find_base_term (rtx x)
b949ea8b
JW
5711{
5712 rtx term;
5713
6eb791fc
JH
5714 if (TARGET_64BIT)
5715 {
5716 if (GET_CODE (x) != CONST)
5717 return x;
5718 term = XEXP (x, 0);
5719 if (GET_CODE (term) == PLUS
5720 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5721 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5722 term = XEXP (term, 0);
5723 if (GET_CODE (term) != UNSPEC
8ee41eaf 5724 || XINT (term, 1) != UNSPEC_GOTPCREL)
6eb791fc
JH
5725 return x;
5726
5727 term = XVECEXP (term, 0, 0);
5728
5729 if (GET_CODE (term) != SYMBOL_REF
5730 && GET_CODE (term) != LABEL_REF)
5731 return x;
5732
5733 return term;
5734 }
5735
69bd9368 5736 term = ix86_delegitimize_address (x);
b949ea8b
JW
5737
5738 if (GET_CODE (term) != SYMBOL_REF
5739 && GET_CODE (term) != LABEL_REF)
5740 return x;
5741
5742 return term;
5743}
5744\f
f996902d
RH
5745/* Determine if a given RTX is a valid constant. We already know this
5746 satisfies CONSTANT_P. */
5747
5748bool
b96a374d 5749legitimate_constant_p (rtx x)
f996902d
RH
5750{
5751 rtx inner;
5752
5753 switch (GET_CODE (x))
5754 {
5755 case SYMBOL_REF:
5756 /* TLS symbols are not constant. */
5757 if (tls_symbolic_operand (x, Pmode))
5758 return false;
5759 break;
5760
5761 case CONST:
5762 inner = XEXP (x, 0);
5763
5764 /* Offsets of TLS symbols are never valid.
5765 Discourage CSE from creating them. */
5766 if (GET_CODE (inner) == PLUS
5767 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5768 return false;
5769
799b33a0
JH
5770 if (GET_CODE (inner) == PLUS)
5771 {
5772 if (GET_CODE (XEXP (inner, 1)) != CONST_INT)
5773 return false;
5774 inner = XEXP (inner, 0);
5775 }
5776
f996902d
RH
5777 /* Only some unspecs are valid as "constants". */
5778 if (GET_CODE (inner) == UNSPEC)
5779 switch (XINT (inner, 1))
5780 {
5781 case UNSPEC_TPOFF:
cb0e3e3f 5782 case UNSPEC_NTPOFF:
f996902d 5783 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
cb0e3e3f
RH
5784 case UNSPEC_DTPOFF:
5785 return local_dynamic_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
f996902d
RH
5786 default:
5787 return false;
5788 }
5789 break;
5790
5791 default:
5792 break;
5793 }
5794
5795 /* Otherwise we handle everything else in the move patterns. */
5796 return true;
5797}
5798
3a04ff64
RH
5799/* Determine if it's legal to put X into the constant pool. This
5800 is not possible for the address of thread-local symbols, which
5801 is checked above. */
5802
5803static bool
b96a374d 5804ix86_cannot_force_const_mem (rtx x)
3a04ff64
RH
5805{
5806 return !legitimate_constant_p (x);
5807}
5808
f996902d
RH
5809/* Determine if a given RTX is a valid constant address. */
5810
5811bool
b96a374d 5812constant_address_p (rtx x)
f996902d 5813{
a94f136b 5814 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
f996902d
RH
5815}
5816
5817/* Nonzero if the constant value X is a legitimate general operand
fce5a9f2 5818 when generating PIC code. It is given that flag_pic is on and
f996902d
RH
5819 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5820
5821bool
b96a374d 5822legitimate_pic_operand_p (rtx x)
f996902d
RH
5823{
5824 rtx inner;
5825
5826 switch (GET_CODE (x))
5827 {
5828 case CONST:
5829 inner = XEXP (x, 0);
5830
5831 /* Only some unspecs are valid as "constants". */
5832 if (GET_CODE (inner) == UNSPEC)
5833 switch (XINT (inner, 1))
5834 {
5835 case UNSPEC_TPOFF:
5836 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
f996902d
RH
5837 default:
5838 return false;
5839 }
5efb1046 5840 /* FALLTHRU */
f996902d
RH
5841
5842 case SYMBOL_REF:
5843 case LABEL_REF:
5844 return legitimate_pic_address_disp_p (x);
5845
5846 default:
5847 return true;
5848 }
5849}
5850
e075ae69
RH
5851/* Determine if a given CONST RTX is a valid memory displacement
5852 in PIC mode. */
0f290768 5853
59be65f6 5854int
8d531ab9 5855legitimate_pic_address_disp_p (rtx disp)
91bb873f 5856{
f996902d
RH
5857 bool saw_plus;
5858
6eb791fc
JH
5859 /* In 64bit mode we can allow direct addresses of symbols and labels
5860 when they are not dynamic symbols. */
c05dbe81
JH
5861 if (TARGET_64BIT)
5862 {
5863 /* TLS references should always be enclosed in UNSPEC. */
5864 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5865 return 0;
5866 if (GET_CODE (disp) == SYMBOL_REF
5867 && ix86_cmodel == CM_SMALL_PIC
2ae5ae57 5868 && SYMBOL_REF_LOCAL_P (disp))
c05dbe81
JH
5869 return 1;
5870 if (GET_CODE (disp) == LABEL_REF)
5871 return 1;
5872 if (GET_CODE (disp) == CONST
a132b6a8
JJ
5873 && GET_CODE (XEXP (disp, 0)) == PLUS)
5874 {
5875 rtx op0 = XEXP (XEXP (disp, 0), 0);
5876 rtx op1 = XEXP (XEXP (disp, 0), 1);
5877
5878 /* TLS references should always be enclosed in UNSPEC. */
5879 if (tls_symbolic_operand (op0, GET_MODE (op0)))
5880 return 0;
5881 if (((GET_CODE (op0) == SYMBOL_REF
5882 && ix86_cmodel == CM_SMALL_PIC
5883 && SYMBOL_REF_LOCAL_P (op0))
5884 || GET_CODE (op0) == LABEL_REF)
5885 && GET_CODE (op1) == CONST_INT
5886 && INTVAL (op1) < 16*1024*1024
5887 && INTVAL (op1) >= -16*1024*1024)
5888 return 1;
5889 }
c05dbe81 5890 }
91bb873f
RH
5891 if (GET_CODE (disp) != CONST)
5892 return 0;
5893 disp = XEXP (disp, 0);
5894
6eb791fc
JH
5895 if (TARGET_64BIT)
5896 {
5897 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5898 of GOT tables. We should not need these anyway. */
5899 if (GET_CODE (disp) != UNSPEC
8ee41eaf 5900 || XINT (disp, 1) != UNSPEC_GOTPCREL)
6eb791fc
JH
5901 return 0;
5902
5903 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5904 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5905 return 0;
5906 return 1;
5907 }
5908
f996902d 5909 saw_plus = false;
91bb873f
RH
5910 if (GET_CODE (disp) == PLUS)
5911 {
5912 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5913 return 0;
5914 disp = XEXP (disp, 0);
f996902d 5915 saw_plus = true;
91bb873f
RH
5916 }
5917
b069de3b
SS
5918 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5919 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5920 {
5921 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5922 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5923 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5924 {
5925 const char *sym_name = XSTR (XEXP (disp, 1), 0);
86ecdfb6 5926 if (! strcmp (sym_name, "<pic base>"))
b069de3b
SS
5927 return 1;
5928 }
5929 }
5930
8ee41eaf 5931 if (GET_CODE (disp) != UNSPEC)
91bb873f
RH
5932 return 0;
5933
623fe810
RH
5934 switch (XINT (disp, 1))
5935 {
8ee41eaf 5936 case UNSPEC_GOT:
f996902d
RH
5937 if (saw_plus)
5938 return false;
623fe810 5939 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
8ee41eaf 5940 case UNSPEC_GOTOFF:
799b33a0
JH
5941 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5942 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5943 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5944 return false;
f996902d 5945 case UNSPEC_GOTTPOFF:
dea73790
JJ
5946 case UNSPEC_GOTNTPOFF:
5947 case UNSPEC_INDNTPOFF:
f996902d
RH
5948 if (saw_plus)
5949 return false;
5950 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5951 case UNSPEC_NTPOFF:
f996902d
RH
5952 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5953 case UNSPEC_DTPOFF:
f996902d 5954 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
623fe810 5955 }
fce5a9f2 5956
623fe810 5957 return 0;
91bb873f
RH
5958}
5959
e075ae69
RH
5960/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5961 memory address for an instruction. The MODE argument is the machine mode
5962 for the MEM expression that wants to use this address.
5963
5964 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5965 convert common non-canonical forms to canonical form so that they will
5966 be recognized. */
5967
3b3c6a3f 5968int
8d531ab9 5969legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
3b3c6a3f 5970{
e075ae69
RH
5971 struct ix86_address parts;
5972 rtx base, index, disp;
5973 HOST_WIDE_INT scale;
5974 const char *reason = NULL;
5975 rtx reason_rtx = NULL_RTX;
3b3c6a3f
MM
5976
5977 if (TARGET_DEBUG_ADDR)
5978 {
5979 fprintf (stderr,
e9a25f70 5980 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
3b3c6a3f 5981 GET_MODE_NAME (mode), strict);
3b3c6a3f
MM
5982 debug_rtx (addr);
5983 }
5984
b446e5a2 5985 if (ix86_decompose_address (addr, &parts) <= 0)
3b3c6a3f 5986 {
e075ae69 5987 reason = "decomposition failed";
50e60bc3 5988 goto report_error;
3b3c6a3f
MM
5989 }
5990
e075ae69
RH
5991 base = parts.base;
5992 index = parts.index;
5993 disp = parts.disp;
5994 scale = parts.scale;
91f0226f 5995
e075ae69 5996 /* Validate base register.
e9a25f70
JL
5997
5998 Don't allow SUBREG's here, it can lead to spill failures when the base
3d771dfd
MM
5999 is one word out of a two word structure, which is represented internally
6000 as a DImode int. */
e9a25f70 6001
3b3c6a3f
MM
6002 if (base)
6003 {
e075ae69
RH
6004 reason_rtx = base;
6005
90e4e4c5 6006 if (GET_CODE (base) != REG)
3b3c6a3f 6007 {
e075ae69 6008 reason = "base is not a register";
50e60bc3 6009 goto report_error;
3b3c6a3f
MM
6010 }
6011
c954bd01
RH
6012 if (GET_MODE (base) != Pmode)
6013 {
e075ae69 6014 reason = "base is not in Pmode";
50e60bc3 6015 goto report_error;
c954bd01
RH
6016 }
6017
90e4e4c5
RH
6018 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
6019 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
3b3c6a3f 6020 {
e075ae69 6021 reason = "base is not valid";
50e60bc3 6022 goto report_error;
3b3c6a3f
MM
6023 }
6024 }
6025
e075ae69 6026 /* Validate index register.
e9a25f70
JL
6027
6028 Don't allow SUBREG's here, it can lead to spill failures when the index
3d771dfd
MM
6029 is one word out of a two word structure, which is represented internally
6030 as a DImode int. */
e075ae69
RH
6031
6032 if (index)
3b3c6a3f 6033 {
e075ae69
RH
6034 reason_rtx = index;
6035
90e4e4c5 6036 if (GET_CODE (index) != REG)
3b3c6a3f 6037 {
e075ae69 6038 reason = "index is not a register";
50e60bc3 6039 goto report_error;
3b3c6a3f
MM
6040 }
6041
e075ae69 6042 if (GET_MODE (index) != Pmode)
c954bd01 6043 {
e075ae69 6044 reason = "index is not in Pmode";
50e60bc3 6045 goto report_error;
c954bd01
RH
6046 }
6047
90e4e4c5
RH
6048 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
6049 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
3b3c6a3f 6050 {
e075ae69 6051 reason = "index is not valid";
50e60bc3 6052 goto report_error;
3b3c6a3f
MM
6053 }
6054 }
3b3c6a3f 6055
e075ae69
RH
6056 /* Validate scale factor. */
6057 if (scale != 1)
3b3c6a3f 6058 {
e075ae69
RH
6059 reason_rtx = GEN_INT (scale);
6060 if (!index)
3b3c6a3f 6061 {
e075ae69 6062 reason = "scale without index";
50e60bc3 6063 goto report_error;
3b3c6a3f
MM
6064 }
6065
e075ae69 6066 if (scale != 2 && scale != 4 && scale != 8)
3b3c6a3f 6067 {
e075ae69 6068 reason = "scale is not a valid multiplier";
50e60bc3 6069 goto report_error;
3b3c6a3f
MM
6070 }
6071 }
6072
91bb873f 6073 /* Validate displacement. */
3b3c6a3f
MM
6074 if (disp)
6075 {
e075ae69
RH
6076 reason_rtx = disp;
6077
f996902d
RH
6078 if (GET_CODE (disp) == CONST
6079 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6080 switch (XINT (XEXP (disp, 0), 1))
6081 {
6082 case UNSPEC_GOT:
6083 case UNSPEC_GOTOFF:
6084 case UNSPEC_GOTPCREL:
6085 if (!flag_pic)
6086 abort ();
6087 goto is_legitimate_pic;
6088
6089 case UNSPEC_GOTTPOFF:
dea73790
JJ
6090 case UNSPEC_GOTNTPOFF:
6091 case UNSPEC_INDNTPOFF:
f996902d
RH
6092 case UNSPEC_NTPOFF:
6093 case UNSPEC_DTPOFF:
6094 break;
6095
6096 default:
6097 reason = "invalid address unspec";
6098 goto report_error;
6099 }
6100
b069de3b
SS
6101 else if (flag_pic && (SYMBOLIC_CONST (disp)
6102#if TARGET_MACHO
6103 && !machopic_operand_p (disp)
6104#endif
6105 ))
3b3c6a3f 6106 {
f996902d 6107 is_legitimate_pic:
0d7d98ee
JH
6108 if (TARGET_64BIT && (index || base))
6109 {
75d38379
JJ
6110 /* foo@dtpoff(%rX) is ok. */
6111 if (GET_CODE (disp) != CONST
6112 || GET_CODE (XEXP (disp, 0)) != PLUS
6113 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6114 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6115 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6116 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6117 {
6118 reason = "non-constant pic memory reference";
6119 goto report_error;
6120 }
0d7d98ee 6121 }
75d38379 6122 else if (! legitimate_pic_address_disp_p (disp))
91bb873f 6123 {
e075ae69 6124 reason = "displacement is an invalid pic construct";
50e60bc3 6125 goto report_error;
91bb873f
RH
6126 }
6127
4e9efe54 6128 /* This code used to verify that a symbolic pic displacement
0f290768
KH
6129 includes the pic_offset_table_rtx register.
6130
4e9efe54
JH
6131 While this is good idea, unfortunately these constructs may
6132 be created by "adds using lea" optimization for incorrect
6133 code like:
6134
6135 int a;
6136 int foo(int i)
6137 {
6138 return *(&a+i);
6139 }
6140
50e60bc3 6141 This code is nonsensical, but results in addressing
4e9efe54 6142 GOT table with pic_offset_table_rtx base. We can't
f710504c 6143 just refuse it easily, since it gets matched by
4e9efe54
JH
6144 "addsi3" pattern, that later gets split to lea in the
6145 case output register differs from input. While this
6146 can be handled by separate addsi pattern for this case
6147 that never results in lea, this seems to be easier and
6148 correct fix for crash to disable this test. */
3b3c6a3f 6149 }
a94f136b
JH
6150 else if (GET_CODE (disp) != LABEL_REF
6151 && GET_CODE (disp) != CONST_INT
6152 && (GET_CODE (disp) != CONST
6153 || !legitimate_constant_p (disp))
6154 && (GET_CODE (disp) != SYMBOL_REF
6155 || !legitimate_constant_p (disp)))
f996902d
RH
6156 {
6157 reason = "displacement is not constant";
6158 goto report_error;
6159 }
c05dbe81
JH
6160 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
6161 {
6162 reason = "displacement is out of range";
6163 goto report_error;
6164 }
3b3c6a3f
MM
6165 }
6166
e075ae69 6167 /* Everything looks valid. */
3b3c6a3f 6168 if (TARGET_DEBUG_ADDR)
e075ae69 6169 fprintf (stderr, "Success.\n");
3b3c6a3f 6170 return TRUE;
e075ae69 6171
5bf0ebab 6172 report_error:
e075ae69
RH
6173 if (TARGET_DEBUG_ADDR)
6174 {
6175 fprintf (stderr, "Error: %s\n", reason);
6176 debug_rtx (reason_rtx);
6177 }
6178 return FALSE;
3b3c6a3f 6179}
3b3c6a3f 6180\f
55efb413
JW
6181/* Return an unique alias set for the GOT. */
6182
0f290768 6183static HOST_WIDE_INT
b96a374d 6184ix86_GOT_alias_set (void)
55efb413 6185{
5bf0ebab
RH
6186 static HOST_WIDE_INT set = -1;
6187 if (set == -1)
6188 set = new_alias_set ();
6189 return set;
0f290768 6190}
55efb413 6191
3b3c6a3f
MM
6192/* Return a legitimate reference for ORIG (an address) using the
6193 register REG. If REG is 0, a new pseudo is generated.
6194
91bb873f 6195 There are two types of references that must be handled:
3b3c6a3f
MM
6196
6197 1. Global data references must load the address from the GOT, via
6198 the PIC reg. An insn is emitted to do this load, and the reg is
6199 returned.
6200
91bb873f
RH
6201 2. Static data references, constant pool addresses, and code labels
6202 compute the address as an offset from the GOT, whose base is in
2ae5ae57 6203 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
91bb873f
RH
6204 differentiate them from global data objects. The returned
6205 address is the PIC reg + an unspec constant.
3b3c6a3f
MM
6206
6207 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
91bb873f 6208 reg also appears in the address. */
3b3c6a3f
MM
6209
6210rtx
b96a374d 6211legitimize_pic_address (rtx orig, rtx reg)
3b3c6a3f
MM
6212{
6213 rtx addr = orig;
6214 rtx new = orig;
91bb873f 6215 rtx base;
3b3c6a3f 6216
b069de3b
SS
6217#if TARGET_MACHO
6218 if (reg == 0)
6219 reg = gen_reg_rtx (Pmode);
6220 /* Use the generic Mach-O PIC machinery. */
6221 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6222#endif
6223
c05dbe81
JH
6224 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6225 new = addr;
6226 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
3b3c6a3f 6227 {
c05dbe81
JH
6228 /* This symbol may be referenced via a displacement from the PIC
6229 base address (@GOTOFF). */
3b3c6a3f 6230
c05dbe81
JH
6231 if (reload_in_progress)
6232 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
799b33a0
JH
6233 if (GET_CODE (addr) == CONST)
6234 addr = XEXP (addr, 0);
6235 if (GET_CODE (addr) == PLUS)
6236 {
6237 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6238 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6239 }
6240 else
6241 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
c05dbe81
JH
6242 new = gen_rtx_CONST (Pmode, new);
6243 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3b3c6a3f 6244
c05dbe81
JH
6245 if (reg != 0)
6246 {
6247 emit_move_insn (reg, new);
6248 new = reg;
6249 }
3b3c6a3f 6250 }
91bb873f 6251 else if (GET_CODE (addr) == SYMBOL_REF)
3b3c6a3f 6252 {
14f73b5a
JH
6253 if (TARGET_64BIT)
6254 {
8ee41eaf 6255 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
14f73b5a
JH
6256 new = gen_rtx_CONST (Pmode, new);
6257 new = gen_rtx_MEM (Pmode, new);
6258 RTX_UNCHANGING_P (new) = 1;
6259 set_mem_alias_set (new, ix86_GOT_alias_set ());
6260
6261 if (reg == 0)
6262 reg = gen_reg_rtx (Pmode);
6263 /* Use directly gen_movsi, otherwise the address is loaded
6264 into register for CSE. We don't want to CSE this addresses,
6265 instead we CSE addresses from the GOT table, so skip this. */
6266 emit_insn (gen_movsi (reg, new));
6267 new = reg;
6268 }
6269 else
6270 {
6271 /* This symbol must be referenced via a load from the
6272 Global Offset Table (@GOT). */
3b3c6a3f 6273
66edd3b4
RH
6274 if (reload_in_progress)
6275 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf 6276 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
14f73b5a
JH
6277 new = gen_rtx_CONST (Pmode, new);
6278 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6279 new = gen_rtx_MEM (Pmode, new);
6280 RTX_UNCHANGING_P (new) = 1;
6281 set_mem_alias_set (new, ix86_GOT_alias_set ());
3b3c6a3f 6282
14f73b5a
JH
6283 if (reg == 0)
6284 reg = gen_reg_rtx (Pmode);
6285 emit_move_insn (reg, new);
6286 new = reg;
6287 }
0f290768 6288 }
91bb873f
RH
6289 else
6290 {
6291 if (GET_CODE (addr) == CONST)
3b3c6a3f 6292 {
91bb873f 6293 addr = XEXP (addr, 0);
e3c8ea67
RH
6294
6295 /* We must match stuff we generate before. Assume the only
6296 unspecs that can get here are ours. Not that we could do
43f3a59d 6297 anything with them anyway.... */
e3c8ea67
RH
6298 if (GET_CODE (addr) == UNSPEC
6299 || (GET_CODE (addr) == PLUS
6300 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6301 return orig;
6302 if (GET_CODE (addr) != PLUS)
564d80f4 6303 abort ();
3b3c6a3f 6304 }
91bb873f
RH
6305 if (GET_CODE (addr) == PLUS)
6306 {
6307 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
e9a25f70 6308
91bb873f
RH
6309 /* Check first to see if this is a constant offset from a @GOTOFF
6310 symbol reference. */
623fe810 6311 if (local_symbolic_operand (op0, Pmode)
91bb873f
RH
6312 && GET_CODE (op1) == CONST_INT)
6313 {
6eb791fc
JH
6314 if (!TARGET_64BIT)
6315 {
66edd3b4
RH
6316 if (reload_in_progress)
6317 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf
RH
6318 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6319 UNSPEC_GOTOFF);
6eb791fc
JH
6320 new = gen_rtx_PLUS (Pmode, new, op1);
6321 new = gen_rtx_CONST (Pmode, new);
6322 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
91bb873f 6323
6eb791fc
JH
6324 if (reg != 0)
6325 {
6326 emit_move_insn (reg, new);
6327 new = reg;
6328 }
6329 }
6330 else
91bb873f 6331 {
75d38379
JJ
6332 if (INTVAL (op1) < -16*1024*1024
6333 || INTVAL (op1) >= 16*1024*1024)
6334 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
91bb873f
RH
6335 }
6336 }
6337 else
6338 {
6339 base = legitimize_pic_address (XEXP (addr, 0), reg);
6340 new = legitimize_pic_address (XEXP (addr, 1),
6341 base == reg ? NULL_RTX : reg);
6342
6343 if (GET_CODE (new) == CONST_INT)
6344 new = plus_constant (base, INTVAL (new));
6345 else
6346 {
6347 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6348 {
6349 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6350 new = XEXP (new, 1);
6351 }
6352 new = gen_rtx_PLUS (Pmode, base, new);
6353 }
6354 }
6355 }
3b3c6a3f
MM
6356 }
6357 return new;
6358}
6359\f
74dc3e94 6360/* Load the thread pointer. If TO_REG is true, force it into a register. */
f996902d
RH
6361
6362static rtx
b96a374d 6363get_thread_pointer (int to_reg)
f996902d 6364{
74dc3e94 6365 rtx tp, reg, insn;
f996902d
RH
6366
6367 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
74dc3e94
RH
6368 if (!to_reg)
6369 return tp;
f996902d 6370
74dc3e94
RH
6371 reg = gen_reg_rtx (Pmode);
6372 insn = gen_rtx_SET (VOIDmode, reg, tp);
6373 insn = emit_insn (insn);
6374
6375 return reg;
6376}
6377
6378/* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6379 false if we expect this to be used for a memory address and true if
6380 we expect to load the address into a register. */
6381
6382static rtx
b96a374d 6383legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
74dc3e94
RH
6384{
6385 rtx dest, base, off, pic;
6386 int type;
6387
6388 switch (model)
6389 {
6390 case TLS_MODEL_GLOBAL_DYNAMIC:
6391 dest = gen_reg_rtx (Pmode);
6392 if (TARGET_64BIT)
6393 {
6394 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6395
6396 start_sequence ();
6397 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6398 insns = get_insns ();
6399 end_sequence ();
6400
6401 emit_libcall_block (insns, dest, rax, x);
6402 }
6403 else
6404 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6405 break;
6406
6407 case TLS_MODEL_LOCAL_DYNAMIC:
6408 base = gen_reg_rtx (Pmode);
6409 if (TARGET_64BIT)
6410 {
6411 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6412
6413 start_sequence ();
6414 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6415 insns = get_insns ();
6416 end_sequence ();
6417
6418 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6419 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6420 emit_libcall_block (insns, base, rax, note);
6421 }
6422 else
6423 emit_insn (gen_tls_local_dynamic_base_32 (base));
6424
6425 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6426 off = gen_rtx_CONST (Pmode, off);
6427
6428 return gen_rtx_PLUS (Pmode, base, off);
6429
6430 case TLS_MODEL_INITIAL_EXEC:
6431 if (TARGET_64BIT)
6432 {
6433 pic = NULL;
6434 type = UNSPEC_GOTNTPOFF;
6435 }
6436 else if (flag_pic)
6437 {
6438 if (reload_in_progress)
6439 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6440 pic = pic_offset_table_rtx;
6441 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6442 }
6443 else if (!TARGET_GNU_TLS)
6444 {
6445 pic = gen_reg_rtx (Pmode);
6446 emit_insn (gen_set_got (pic));
6447 type = UNSPEC_GOTTPOFF;
6448 }
6449 else
6450 {
6451 pic = NULL;
6452 type = UNSPEC_INDNTPOFF;
6453 }
6454
6455 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6456 off = gen_rtx_CONST (Pmode, off);
6457 if (pic)
6458 off = gen_rtx_PLUS (Pmode, pic, off);
6459 off = gen_rtx_MEM (Pmode, off);
6460 RTX_UNCHANGING_P (off) = 1;
6461 set_mem_alias_set (off, ix86_GOT_alias_set ());
6462
6463 if (TARGET_64BIT || TARGET_GNU_TLS)
6464 {
6465 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6466 off = force_reg (Pmode, off);
6467 return gen_rtx_PLUS (Pmode, base, off);
6468 }
6469 else
6470 {
6471 base = get_thread_pointer (true);
6472 dest = gen_reg_rtx (Pmode);
6473 emit_insn (gen_subsi3 (dest, base, off));
6474 }
6475 break;
6476
6477 case TLS_MODEL_LOCAL_EXEC:
6478 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6479 (TARGET_64BIT || TARGET_GNU_TLS)
6480 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6481 off = gen_rtx_CONST (Pmode, off);
6482
6483 if (TARGET_64BIT || TARGET_GNU_TLS)
6484 {
6485 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6486 return gen_rtx_PLUS (Pmode, base, off);
6487 }
6488 else
6489 {
6490 base = get_thread_pointer (true);
6491 dest = gen_reg_rtx (Pmode);
6492 emit_insn (gen_subsi3 (dest, base, off));
6493 }
6494 break;
6495
6496 default:
6497 abort ();
6498 }
6499
6500 return dest;
f996902d 6501}
fce5a9f2 6502
3b3c6a3f
MM
6503/* Try machine-dependent ways of modifying an illegitimate address
6504 to be legitimate. If we find one, return the new, valid address.
6505 This macro is used in only one place: `memory_address' in explow.c.
6506
6507 OLDX is the address as it was before break_out_memory_refs was called.
6508 In some cases it is useful to look at this to decide what needs to be done.
6509
6510 MODE and WIN are passed so that this macro can use
6511 GO_IF_LEGITIMATE_ADDRESS.
6512
6513 It is always safe for this macro to do nothing. It exists to recognize
6514 opportunities to optimize the output.
6515
6516 For the 80386, we handle X+REG by loading X into a register R and
6517 using R+REG. R will go in a general reg and indexing will be used.
6518 However, if REG is a broken-out memory address or multiplication,
6519 nothing needs to be done because REG can certainly go in a general reg.
6520
6521 When -fpic is used, special handling is needed for symbolic references.
6522 See comments by legitimize_pic_address in i386.c for details. */
6523
6524rtx
8d531ab9 6525legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
3b3c6a3f
MM
6526{
6527 int changed = 0;
6528 unsigned log;
6529
6530 if (TARGET_DEBUG_ADDR)
6531 {
e9a25f70
JL
6532 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6533 GET_MODE_NAME (mode));
3b3c6a3f
MM
6534 debug_rtx (x);
6535 }
6536
f996902d
RH
6537 log = tls_symbolic_operand (x, mode);
6538 if (log)
74dc3e94 6539 return legitimize_tls_address (x, log, false);
f996902d 6540
3b3c6a3f
MM
6541 if (flag_pic && SYMBOLIC_CONST (x))
6542 return legitimize_pic_address (x, 0);
6543
6544 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6545 if (GET_CODE (x) == ASHIFT
6546 && GET_CODE (XEXP (x, 1)) == CONST_INT
b531087a 6547 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
3b3c6a3f
MM
6548 {
6549 changed = 1;
a269a03c
JC
6550 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6551 GEN_INT (1 << log));
3b3c6a3f
MM
6552 }
6553
6554 if (GET_CODE (x) == PLUS)
6555 {
0f290768 6556 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
e9a25f70 6557
3b3c6a3f
MM
6558 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6559 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
b531087a 6560 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
3b3c6a3f
MM
6561 {
6562 changed = 1;
c5c76735
JL
6563 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6564 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6565 GEN_INT (1 << log));
3b3c6a3f
MM
6566 }
6567
6568 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6569 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
b531087a 6570 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
3b3c6a3f
MM
6571 {
6572 changed = 1;
c5c76735
JL
6573 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6574 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6575 GEN_INT (1 << log));
3b3c6a3f
MM
6576 }
6577
0f290768 6578 /* Put multiply first if it isn't already. */
3b3c6a3f
MM
6579 if (GET_CODE (XEXP (x, 1)) == MULT)
6580 {
6581 rtx tmp = XEXP (x, 0);
6582 XEXP (x, 0) = XEXP (x, 1);
6583 XEXP (x, 1) = tmp;
6584 changed = 1;
6585 }
6586
6587 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6588 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6589 created by virtual register instantiation, register elimination, and
6590 similar optimizations. */
6591 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6592 {
6593 changed = 1;
c5c76735
JL
6594 x = gen_rtx_PLUS (Pmode,
6595 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6596 XEXP (XEXP (x, 1), 0)),
6597 XEXP (XEXP (x, 1), 1));
3b3c6a3f
MM
6598 }
6599
e9a25f70
JL
6600 /* Canonicalize
6601 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3b3c6a3f
MM
6602 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6603 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6604 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6605 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6606 && CONSTANT_P (XEXP (x, 1)))
6607 {
00c79232
ML
6608 rtx constant;
6609 rtx other = NULL_RTX;
3b3c6a3f
MM
6610
6611 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6612 {
6613 constant = XEXP (x, 1);
6614 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6615 }
6616 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6617 {
6618 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6619 other = XEXP (x, 1);
6620 }
6621 else
6622 constant = 0;
6623
6624 if (constant)
6625 {
6626 changed = 1;
c5c76735
JL
6627 x = gen_rtx_PLUS (Pmode,
6628 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6629 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6630 plus_constant (other, INTVAL (constant)));
3b3c6a3f
MM
6631 }
6632 }
6633
6634 if (changed && legitimate_address_p (mode, x, FALSE))
6635 return x;
6636
6637 if (GET_CODE (XEXP (x, 0)) == MULT)
6638 {
6639 changed = 1;
6640 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6641 }
6642
6643 if (GET_CODE (XEXP (x, 1)) == MULT)
6644 {
6645 changed = 1;
6646 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6647 }
6648
6649 if (changed
6650 && GET_CODE (XEXP (x, 1)) == REG
6651 && GET_CODE (XEXP (x, 0)) == REG)
6652 return x;
6653
6654 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6655 {
6656 changed = 1;
6657 x = legitimize_pic_address (x, 0);
6658 }
6659
6660 if (changed && legitimate_address_p (mode, x, FALSE))
6661 return x;
6662
6663 if (GET_CODE (XEXP (x, 0)) == REG)
6664 {
8d531ab9
KH
6665 rtx temp = gen_reg_rtx (Pmode);
6666 rtx val = force_operand (XEXP (x, 1), temp);
3b3c6a3f
MM
6667 if (val != temp)
6668 emit_move_insn (temp, val);
6669
6670 XEXP (x, 1) = temp;
6671 return x;
6672 }
6673
6674 else if (GET_CODE (XEXP (x, 1)) == REG)
6675 {
8d531ab9
KH
6676 rtx temp = gen_reg_rtx (Pmode);
6677 rtx val = force_operand (XEXP (x, 0), temp);
3b3c6a3f
MM
6678 if (val != temp)
6679 emit_move_insn (temp, val);
6680
6681 XEXP (x, 0) = temp;
6682 return x;
6683 }
6684 }
6685
6686 return x;
6687}
2a2ab3f9
JVA
6688\f
6689/* Print an integer constant expression in assembler syntax. Addition
6690 and subtraction are the only arithmetic that may appear in these
6691 expressions. FILE is the stdio stream to write to, X is the rtx, and
6692 CODE is the operand print code from the output string. */
6693
6694static void
b96a374d 6695output_pic_addr_const (FILE *file, rtx x, int code)
2a2ab3f9
JVA
6696{
6697 char buf[256];
6698
6699 switch (GET_CODE (x))
6700 {
6701 case PC:
6702 if (flag_pic)
6703 putc ('.', file);
6704 else
6705 abort ();
6706 break;
6707
6708 case SYMBOL_REF:
91bb873f 6709 assemble_name (file, XSTR (x, 0));
12969f45 6710 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
91bb873f 6711 fputs ("@PLT", file);
2a2ab3f9
JVA
6712 break;
6713
91bb873f
RH
6714 case LABEL_REF:
6715 x = XEXP (x, 0);
5efb1046 6716 /* FALLTHRU */
2a2ab3f9
JVA
6717 case CODE_LABEL:
6718 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6719 assemble_name (asm_out_file, buf);
6720 break;
6721
6722 case CONST_INT:
f64cecad 6723 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2a2ab3f9
JVA
6724 break;
6725
6726 case CONST:
6727 /* This used to output parentheses around the expression,
6728 but that does not work on the 386 (either ATT or BSD assembler). */
6729 output_pic_addr_const (file, XEXP (x, 0), code);
6730 break;
6731
6732 case CONST_DOUBLE:
6733 if (GET_MODE (x) == VOIDmode)
6734 {
6735 /* We can use %d if the number is <32 bits and positive. */
6736 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
f64cecad
JC
6737 fprintf (file, "0x%lx%08lx",
6738 (unsigned long) CONST_DOUBLE_HIGH (x),
6739 (unsigned long) CONST_DOUBLE_LOW (x));
2a2ab3f9 6740 else
f64cecad 6741 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2a2ab3f9
JVA
6742 }
6743 else
6744 /* We can't handle floating point constants;
6745 PRINT_OPERAND must handle them. */
6746 output_operand_lossage ("floating constant misused");
6747 break;
6748
6749 case PLUS:
e9a25f70 6750 /* Some assemblers need integer constants to appear first. */
2a2ab3f9
JVA
6751 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6752 {
2a2ab3f9 6753 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 6754 putc ('+', file);
e9a25f70 6755 output_pic_addr_const (file, XEXP (x, 1), code);
2a2ab3f9 6756 }
91bb873f 6757 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2a2ab3f9 6758 {
2a2ab3f9 6759 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 6760 putc ('+', file);
e9a25f70 6761 output_pic_addr_const (file, XEXP (x, 0), code);
2a2ab3f9 6762 }
91bb873f
RH
6763 else
6764 abort ();
2a2ab3f9
JVA
6765 break;
6766
6767 case MINUS:
b069de3b
SS
6768 if (!TARGET_MACHO)
6769 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
2a2ab3f9 6770 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 6771 putc ('-', file);
2a2ab3f9 6772 output_pic_addr_const (file, XEXP (x, 1), code);
b069de3b
SS
6773 if (!TARGET_MACHO)
6774 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
2a2ab3f9
JVA
6775 break;
6776
91bb873f
RH
6777 case UNSPEC:
6778 if (XVECLEN (x, 0) != 1)
5bf0ebab 6779 abort ();
91bb873f
RH
6780 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6781 switch (XINT (x, 1))
77ebd435 6782 {
8ee41eaf 6783 case UNSPEC_GOT:
77ebd435
AJ
6784 fputs ("@GOT", file);
6785 break;
8ee41eaf 6786 case UNSPEC_GOTOFF:
77ebd435
AJ
6787 fputs ("@GOTOFF", file);
6788 break;
8ee41eaf 6789 case UNSPEC_GOTPCREL:
edfe8595 6790 fputs ("@GOTPCREL(%rip)", file);
6eb791fc 6791 break;
f996902d 6792 case UNSPEC_GOTTPOFF:
dea73790 6793 /* FIXME: This might be @TPOFF in Sun ld too. */
f996902d
RH
6794 fputs ("@GOTTPOFF", file);
6795 break;
6796 case UNSPEC_TPOFF:
6797 fputs ("@TPOFF", file);
6798 break;
6799 case UNSPEC_NTPOFF:
75d38379
JJ
6800 if (TARGET_64BIT)
6801 fputs ("@TPOFF", file);
6802 else
6803 fputs ("@NTPOFF", file);
f996902d
RH
6804 break;
6805 case UNSPEC_DTPOFF:
6806 fputs ("@DTPOFF", file);
6807 break;
dea73790 6808 case UNSPEC_GOTNTPOFF:
75d38379
JJ
6809 if (TARGET_64BIT)
6810 fputs ("@GOTTPOFF(%rip)", file);
6811 else
6812 fputs ("@GOTNTPOFF", file);
dea73790
JJ
6813 break;
6814 case UNSPEC_INDNTPOFF:
6815 fputs ("@INDNTPOFF", file);
6816 break;
77ebd435
AJ
6817 default:
6818 output_operand_lossage ("invalid UNSPEC as operand");
6819 break;
6820 }
91bb873f
RH
6821 break;
6822
2a2ab3f9
JVA
6823 default:
6824 output_operand_lossage ("invalid expression as operand");
6825 }
6826}
1865dbb5 6827
0f290768 6828/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
1865dbb5
JM
6829 We need to handle our special PIC relocations. */
6830
0f290768 6831void
b96a374d 6832i386_dwarf_output_addr_const (FILE *file, rtx x)
1865dbb5 6833{
14f73b5a 6834#ifdef ASM_QUAD
18b5b8d6 6835 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
14f73b5a
JH
6836#else
6837 if (TARGET_64BIT)
6838 abort ();
18b5b8d6 6839 fprintf (file, "%s", ASM_LONG);
14f73b5a 6840#endif
1865dbb5
JM
6841 if (flag_pic)
6842 output_pic_addr_const (file, x, '\0');
6843 else
6844 output_addr_const (file, x);
6845 fputc ('\n', file);
6846}
6847
b9203463
RH
6848/* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6849 We need to emit DTP-relative relocations. */
6850
6851void
b96a374d 6852i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
b9203463 6853{
75d38379
JJ
6854 fputs (ASM_LONG, file);
6855 output_addr_const (file, x);
6856 fputs ("@DTPOFF", file);
b9203463
RH
6857 switch (size)
6858 {
6859 case 4:
b9203463
RH
6860 break;
6861 case 8:
75d38379 6862 fputs (", 0", file);
b9203463 6863 break;
b9203463
RH
6864 default:
6865 abort ();
6866 }
b9203463
RH
6867}
6868
1865dbb5
JM
6869/* In the name of slightly smaller debug output, and to cater to
6870 general assembler losage, recognize PIC+GOTOFF and turn it back
6871 into a direct symbol reference. */
6872
69bd9368 6873static rtx
b96a374d 6874ix86_delegitimize_address (rtx orig_x)
1865dbb5 6875{
ec65b2e3 6876 rtx x = orig_x, y;
1865dbb5 6877
4c8c0dec
JJ
6878 if (GET_CODE (x) == MEM)
6879 x = XEXP (x, 0);
6880
6eb791fc
JH
6881 if (TARGET_64BIT)
6882 {
6883 if (GET_CODE (x) != CONST
6884 || GET_CODE (XEXP (x, 0)) != UNSPEC
8ee41eaf 6885 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
4c8c0dec 6886 || GET_CODE (orig_x) != MEM)
6eb791fc
JH
6887 return orig_x;
6888 return XVECEXP (XEXP (x, 0), 0, 0);
6889 }
6890
1865dbb5 6891 if (GET_CODE (x) != PLUS
1865dbb5
JM
6892 || GET_CODE (XEXP (x, 1)) != CONST)
6893 return orig_x;
6894
ec65b2e3
JJ
6895 if (GET_CODE (XEXP (x, 0)) == REG
6896 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6897 /* %ebx + GOT/GOTOFF */
6898 y = NULL;
6899 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6900 {
6901 /* %ebx + %reg * scale + GOT/GOTOFF */
6902 y = XEXP (x, 0);
6903 if (GET_CODE (XEXP (y, 0)) == REG
6904 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6905 y = XEXP (y, 1);
6906 else if (GET_CODE (XEXP (y, 1)) == REG
6907 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6908 y = XEXP (y, 0);
6909 else
6910 return orig_x;
6911 if (GET_CODE (y) != REG
6912 && GET_CODE (y) != MULT
6913 && GET_CODE (y) != ASHIFT)
6914 return orig_x;
6915 }
6916 else
6917 return orig_x;
6918
1865dbb5
JM
6919 x = XEXP (XEXP (x, 1), 0);
6920 if (GET_CODE (x) == UNSPEC
8ee41eaf
RH
6921 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6922 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
ec65b2e3
JJ
6923 {
6924 if (y)
6925 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6926 return XVECEXP (x, 0, 0);
6927 }
1865dbb5
JM
6928
6929 if (GET_CODE (x) == PLUS
6930 && GET_CODE (XEXP (x, 0)) == UNSPEC
6931 && GET_CODE (XEXP (x, 1)) == CONST_INT
8ee41eaf
RH
6932 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6933 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6934 && GET_CODE (orig_x) != MEM)))
ec65b2e3
JJ
6935 {
6936 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6937 if (y)
6938 return gen_rtx_PLUS (Pmode, y, x);
6939 return x;
6940 }
1865dbb5
JM
6941
6942 return orig_x;
6943}
2a2ab3f9 6944\f
a269a03c 6945static void
b96a374d
AJ
6946put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6947 int fp, FILE *file)
a269a03c 6948{
a269a03c
JC
6949 const char *suffix;
6950
9a915772
JH
6951 if (mode == CCFPmode || mode == CCFPUmode)
6952 {
6953 enum rtx_code second_code, bypass_code;
6954 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6955 if (bypass_code != NIL || second_code != NIL)
b531087a 6956 abort ();
9a915772
JH
6957 code = ix86_fp_compare_code_to_integer (code);
6958 mode = CCmode;
6959 }
a269a03c
JC
6960 if (reverse)
6961 code = reverse_condition (code);
e075ae69 6962
a269a03c
JC
6963 switch (code)
6964 {
6965 case EQ:
6966 suffix = "e";
6967 break;
a269a03c
JC
6968 case NE:
6969 suffix = "ne";
6970 break;
a269a03c 6971 case GT:
7e08e190 6972 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
e075ae69
RH
6973 abort ();
6974 suffix = "g";
a269a03c 6975 break;
a269a03c 6976 case GTU:
e075ae69
RH
6977 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6978 Those same assemblers have the same but opposite losage on cmov. */
7e08e190 6979 if (mode != CCmode)
0f290768 6980 abort ();
e075ae69 6981 suffix = fp ? "nbe" : "a";
a269a03c 6982 break;
a269a03c 6983 case LT:
9076b9c1 6984 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 6985 suffix = "s";
7e08e190 6986 else if (mode == CCmode || mode == CCGCmode)
e075ae69 6987 suffix = "l";
9076b9c1 6988 else
0f290768 6989 abort ();
a269a03c 6990 break;
a269a03c 6991 case LTU:
9076b9c1 6992 if (mode != CCmode)
0f290768 6993 abort ();
a269a03c
JC
6994 suffix = "b";
6995 break;
a269a03c 6996 case GE:
9076b9c1 6997 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 6998 suffix = "ns";
7e08e190 6999 else if (mode == CCmode || mode == CCGCmode)
e075ae69 7000 suffix = "ge";
9076b9c1 7001 else
0f290768 7002 abort ();
a269a03c 7003 break;
a269a03c 7004 case GEU:
e075ae69 7005 /* ??? As above. */
7e08e190 7006 if (mode != CCmode)
0f290768 7007 abort ();
7e08e190 7008 suffix = fp ? "nb" : "ae";
a269a03c 7009 break;
a269a03c 7010 case LE:
7e08e190 7011 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
e075ae69
RH
7012 abort ();
7013 suffix = "le";
a269a03c 7014 break;
a269a03c 7015 case LEU:
9076b9c1
JH
7016 if (mode != CCmode)
7017 abort ();
7e08e190 7018 suffix = "be";
a269a03c 7019 break;
3a3677ff 7020 case UNORDERED:
9e7adcb3 7021 suffix = fp ? "u" : "p";
3a3677ff
RH
7022 break;
7023 case ORDERED:
9e7adcb3 7024 suffix = fp ? "nu" : "np";
3a3677ff 7025 break;
a269a03c
JC
7026 default:
7027 abort ();
7028 }
7029 fputs (suffix, file);
7030}
7031
a55f4481
RK
7032/* Print the name of register X to FILE based on its machine mode and number.
7033 If CODE is 'w', pretend the mode is HImode.
7034 If CODE is 'b', pretend the mode is QImode.
7035 If CODE is 'k', pretend the mode is SImode.
7036 If CODE is 'q', pretend the mode is DImode.
7037 If CODE is 'h', pretend the reg is the `high' byte register.
7038 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7039
e075ae69 7040void
b96a374d 7041print_reg (rtx x, int code, FILE *file)
e5cb57e8 7042{
a55f4481
RK
7043 if (REGNO (x) == ARG_POINTER_REGNUM
7044 || REGNO (x) == FRAME_POINTER_REGNUM
7045 || REGNO (x) == FLAGS_REG
7046 || REGNO (x) == FPSR_REG)
480feac0
ZW
7047 abort ();
7048
5bf0ebab 7049 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
e075ae69
RH
7050 putc ('%', file);
7051
ef6257cd 7052 if (code == 'w' || MMX_REG_P (x))
e075ae69
RH
7053 code = 2;
7054 else if (code == 'b')
7055 code = 1;
7056 else if (code == 'k')
7057 code = 4;
3f3f2124
JH
7058 else if (code == 'q')
7059 code = 8;
e075ae69
RH
7060 else if (code == 'y')
7061 code = 3;
7062 else if (code == 'h')
7063 code = 0;
7064 else
7065 code = GET_MODE_SIZE (GET_MODE (x));
e9a25f70 7066
3f3f2124
JH
7067 /* Irritatingly, AMD extended registers use different naming convention
7068 from the normal registers. */
7069 if (REX_INT_REG_P (x))
7070 {
885a70fd
JH
7071 if (!TARGET_64BIT)
7072 abort ();
3f3f2124
JH
7073 switch (code)
7074 {
ef6257cd 7075 case 0:
c725bd79 7076 error ("extended registers have no high halves");
3f3f2124
JH
7077 break;
7078 case 1:
7079 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7080 break;
7081 case 2:
7082 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7083 break;
7084 case 4:
7085 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7086 break;
7087 case 8:
7088 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7089 break;
7090 default:
c725bd79 7091 error ("unsupported operand size for extended register");
3f3f2124
JH
7092 break;
7093 }
7094 return;
7095 }
e075ae69
RH
7096 switch (code)
7097 {
7098 case 3:
7099 if (STACK_TOP_P (x))
7100 {
7101 fputs ("st(0)", file);
7102 break;
7103 }
5efb1046 7104 /* FALLTHRU */
e075ae69 7105 case 8:
3f3f2124 7106 case 4:
e075ae69 7107 case 12:
446988df 7108 if (! ANY_FP_REG_P (x))
885a70fd 7109 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
5efb1046 7110 /* FALLTHRU */
a7180f70 7111 case 16:
e075ae69 7112 case 2:
d4c32b6f 7113 normal:
e075ae69
RH
7114 fputs (hi_reg_name[REGNO (x)], file);
7115 break;
7116 case 1:
d4c32b6f
RH
7117 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7118 goto normal;
e075ae69
RH
7119 fputs (qi_reg_name[REGNO (x)], file);
7120 break;
7121 case 0:
d4c32b6f
RH
7122 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7123 goto normal;
e075ae69
RH
7124 fputs (qi_high_reg_name[REGNO (x)], file);
7125 break;
7126 default:
7127 abort ();
fe25fea3 7128 }
e5cb57e8
SC
7129}
7130
f996902d
RH
7131/* Locate some local-dynamic symbol still in use by this function
7132 so that we can print its name in some tls_local_dynamic_base
7133 pattern. */
7134
7135static const char *
b96a374d 7136get_some_local_dynamic_name (void)
f996902d
RH
7137{
7138 rtx insn;
7139
7140 if (cfun->machine->some_ld_name)
7141 return cfun->machine->some_ld_name;
7142
7143 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7144 if (INSN_P (insn)
7145 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7146 return cfun->machine->some_ld_name;
7147
7148 abort ();
7149}
7150
7151static int
b96a374d 7152get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
f996902d
RH
7153{
7154 rtx x = *px;
7155
7156 if (GET_CODE (x) == SYMBOL_REF
7157 && local_dynamic_symbolic_operand (x, Pmode))
7158 {
7159 cfun->machine->some_ld_name = XSTR (x, 0);
7160 return 1;
7161 }
7162
7163 return 0;
7164}
7165
2a2ab3f9 7166/* Meaning of CODE:
fe25fea3 7167 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
e5cb57e8 7168 C -- print opcode suffix for set/cmov insn.
fe25fea3 7169 c -- like C, but print reversed condition
ef6257cd 7170 F,f -- likewise, but for floating-point.
f6f5dff2
RO
7171 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7172 otherwise nothing
2a2ab3f9
JVA
7173 R -- print the prefix for register names.
7174 z -- print the opcode suffix for the size of the current operand.
7175 * -- print a star (in certain assembler syntax)
fb204271 7176 A -- print an absolute memory reference.
2a2ab3f9 7177 w -- print the operand as if it's a "word" (HImode) even if it isn't.
2d49677f
SC
7178 s -- print a shift double count, followed by the assemblers argument
7179 delimiter.
fe25fea3
SC
7180 b -- print the QImode name of the register for the indicated operand.
7181 %b0 would print %al if operands[0] is reg 0.
7182 w -- likewise, print the HImode name of the register.
7183 k -- likewise, print the SImode name of the register.
3f3f2124 7184 q -- likewise, print the DImode name of the register.
ef6257cd
JH
7185 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7186 y -- print "st(0)" instead of "st" as a register.
a46d1d38 7187 D -- print condition for SSE cmp instruction.
ef6257cd
JH
7188 P -- if PIC, print an @PLT suffix.
7189 X -- don't print any sort of PIC '@' suffix for a symbol.
f996902d 7190 & -- print some in-use local-dynamic symbol name.
a46d1d38 7191 */
2a2ab3f9
JVA
7192
7193void
b96a374d 7194print_operand (FILE *file, rtx x, int code)
2a2ab3f9
JVA
7195{
7196 if (code)
7197 {
7198 switch (code)
7199 {
7200 case '*':
80f33d06 7201 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9
JVA
7202 putc ('*', file);
7203 return;
7204
f996902d
RH
7205 case '&':
7206 assemble_name (file, get_some_local_dynamic_name ());
7207 return;
7208
fb204271 7209 case 'A':
80f33d06 7210 if (ASSEMBLER_DIALECT == ASM_ATT)
fb204271 7211 putc ('*', file);
80f33d06 7212 else if (ASSEMBLER_DIALECT == ASM_INTEL)
fb204271
DN
7213 {
7214 /* Intel syntax. For absolute addresses, registers should not
7215 be surrounded by braces. */
7216 if (GET_CODE (x) != REG)
7217 {
7218 putc ('[', file);
7219 PRINT_OPERAND (file, x, 0);
7220 putc (']', file);
7221 return;
7222 }
7223 }
80f33d06
GS
7224 else
7225 abort ();
fb204271
DN
7226
7227 PRINT_OPERAND (file, x, 0);
7228 return;
7229
7230
2a2ab3f9 7231 case 'L':
80f33d06 7232 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7233 putc ('l', file);
2a2ab3f9
JVA
7234 return;
7235
7236 case 'W':
80f33d06 7237 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7238 putc ('w', file);
2a2ab3f9
JVA
7239 return;
7240
7241 case 'B':
80f33d06 7242 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7243 putc ('b', file);
2a2ab3f9
JVA
7244 return;
7245
7246 case 'Q':
80f33d06 7247 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7248 putc ('l', file);
2a2ab3f9
JVA
7249 return;
7250
7251 case 'S':
80f33d06 7252 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7253 putc ('s', file);
2a2ab3f9
JVA
7254 return;
7255
5f1ec3e6 7256 case 'T':
80f33d06 7257 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7258 putc ('t', file);
5f1ec3e6
JVA
7259 return;
7260
2a2ab3f9
JVA
7261 case 'z':
7262 /* 387 opcodes don't get size suffixes if the operands are
0f290768 7263 registers. */
2a2ab3f9
JVA
7264 if (STACK_REG_P (x))
7265 return;
7266
831c4e87
KC
7267 /* Likewise if using Intel opcodes. */
7268 if (ASSEMBLER_DIALECT == ASM_INTEL)
7269 return;
7270
7271 /* This is the size of op from size of operand. */
2a2ab3f9
JVA
7272 switch (GET_MODE_SIZE (GET_MODE (x)))
7273 {
2a2ab3f9 7274 case 2:
155d8a47
JW
7275#ifdef HAVE_GAS_FILDS_FISTS
7276 putc ('s', file);
7277#endif
2a2ab3f9
JVA
7278 return;
7279
7280 case 4:
7281 if (GET_MODE (x) == SFmode)
7282 {
e075ae69 7283 putc ('s', file);
2a2ab3f9
JVA
7284 return;
7285 }
7286 else
e075ae69 7287 putc ('l', file);
2a2ab3f9
JVA
7288 return;
7289
5f1ec3e6 7290 case 12:
2b589241 7291 case 16:
e075ae69
RH
7292 putc ('t', file);
7293 return;
5f1ec3e6 7294
2a2ab3f9
JVA
7295 case 8:
7296 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
56c0e8fa
JVA
7297 {
7298#ifdef GAS_MNEMONICS
e075ae69 7299 putc ('q', file);
56c0e8fa 7300#else
e075ae69
RH
7301 putc ('l', file);
7302 putc ('l', file);
56c0e8fa
JVA
7303#endif
7304 }
e075ae69
RH
7305 else
7306 putc ('l', file);
2a2ab3f9 7307 return;
155d8a47
JW
7308
7309 default:
7310 abort ();
2a2ab3f9 7311 }
4af3895e
JVA
7312
7313 case 'b':
7314 case 'w':
7315 case 'k':
3f3f2124 7316 case 'q':
4af3895e
JVA
7317 case 'h':
7318 case 'y':
5cb6195d 7319 case 'X':
e075ae69 7320 case 'P':
4af3895e
JVA
7321 break;
7322
2d49677f
SC
7323 case 's':
7324 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7325 {
7326 PRINT_OPERAND (file, x, 0);
e075ae69 7327 putc (',', file);
2d49677f 7328 }
a269a03c
JC
7329 return;
7330
a46d1d38
JH
7331 case 'D':
7332 /* Little bit of braindamage here. The SSE compare instructions
7333 does use completely different names for the comparisons that the
7334 fp conditional moves. */
7335 switch (GET_CODE (x))
7336 {
7337 case EQ:
7338 case UNEQ:
7339 fputs ("eq", file);
7340 break;
7341 case LT:
7342 case UNLT:
7343 fputs ("lt", file);
7344 break;
7345 case LE:
7346 case UNLE:
7347 fputs ("le", file);
7348 break;
7349 case UNORDERED:
7350 fputs ("unord", file);
7351 break;
7352 case NE:
7353 case LTGT:
7354 fputs ("neq", file);
7355 break;
7356 case UNGE:
7357 case GE:
7358 fputs ("nlt", file);
7359 break;
7360 case UNGT:
7361 case GT:
7362 fputs ("nle", file);
7363 break;
7364 case ORDERED:
7365 fputs ("ord", file);
7366 break;
7367 default:
7368 abort ();
7369 break;
7370 }
7371 return;
048b1c95 7372 case 'O':
f6f5dff2 7373#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
048b1c95
JJ
7374 if (ASSEMBLER_DIALECT == ASM_ATT)
7375 {
7376 switch (GET_MODE (x))
7377 {
7378 case HImode: putc ('w', file); break;
7379 case SImode:
7380 case SFmode: putc ('l', file); break;
7381 case DImode:
7382 case DFmode: putc ('q', file); break;
7383 default: abort ();
7384 }
7385 putc ('.', file);
7386 }
7387#endif
7388 return;
1853aadd 7389 case 'C':
e075ae69 7390 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
1853aadd 7391 return;
fe25fea3 7392 case 'F':
f6f5dff2 7393#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
048b1c95
JJ
7394 if (ASSEMBLER_DIALECT == ASM_ATT)
7395 putc ('.', file);
7396#endif
e075ae69 7397 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
fe25fea3
SC
7398 return;
7399
e9a25f70 7400 /* Like above, but reverse condition */
e075ae69 7401 case 'c':
fce5a9f2 7402 /* Check to see if argument to %c is really a constant
c1d5afc4
CR
7403 and not a condition code which needs to be reversed. */
7404 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7405 {
7406 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7407 return;
7408 }
e075ae69
RH
7409 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7410 return;
fe25fea3 7411 case 'f':
f6f5dff2 7412#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
048b1c95
JJ
7413 if (ASSEMBLER_DIALECT == ASM_ATT)
7414 putc ('.', file);
7415#endif
e075ae69 7416 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
1853aadd 7417 return;
ef6257cd
JH
7418 case '+':
7419 {
7420 rtx x;
e5cb57e8 7421
ef6257cd
JH
7422 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7423 return;
a4f31c00 7424
ef6257cd
JH
7425 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7426 if (x)
7427 {
7428 int pred_val = INTVAL (XEXP (x, 0));
7429
7430 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7431 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7432 {
7433 int taken = pred_val > REG_BR_PROB_BASE / 2;
7434 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7435
7436 /* Emit hints only in the case default branch prediction
d1f87653 7437 heuristics would fail. */
ef6257cd
JH
7438 if (taken != cputaken)
7439 {
7440 /* We use 3e (DS) prefix for taken branches and
7441 2e (CS) prefix for not taken branches. */
7442 if (taken)
7443 fputs ("ds ; ", file);
7444 else
7445 fputs ("cs ; ", file);
7446 }
7447 }
7448 }
7449 return;
7450 }
4af3895e 7451 default:
a52453cc 7452 output_operand_lossage ("invalid operand code `%c'", code);
2a2ab3f9
JVA
7453 }
7454 }
e9a25f70 7455
2a2ab3f9 7456 if (GET_CODE (x) == REG)
a55f4481 7457 print_reg (x, code, file);
e9a25f70 7458
2a2ab3f9
JVA
7459 else if (GET_CODE (x) == MEM)
7460 {
e075ae69 7461 /* No `byte ptr' prefix for call instructions. */
80f33d06 7462 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
2a2ab3f9 7463 {
69ddee61 7464 const char * size;
e075ae69
RH
7465 switch (GET_MODE_SIZE (GET_MODE (x)))
7466 {
7467 case 1: size = "BYTE"; break;
7468 case 2: size = "WORD"; break;
7469 case 4: size = "DWORD"; break;
7470 case 8: size = "QWORD"; break;
7471 case 12: size = "XWORD"; break;
a7180f70 7472 case 16: size = "XMMWORD"; break;
e075ae69 7473 default:
564d80f4 7474 abort ();
e075ae69 7475 }
fb204271
DN
7476
7477 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7478 if (code == 'b')
7479 size = "BYTE";
7480 else if (code == 'w')
7481 size = "WORD";
7482 else if (code == 'k')
7483 size = "DWORD";
7484
e075ae69
RH
7485 fputs (size, file);
7486 fputs (" PTR ", file);
2a2ab3f9 7487 }
e075ae69
RH
7488
7489 x = XEXP (x, 0);
0d7d98ee 7490 /* Avoid (%rip) for call operands. */
d10f5ecf 7491 if (CONSTANT_ADDRESS_P (x) && code == 'P'
0d7d98ee
JH
7492 && GET_CODE (x) != CONST_INT)
7493 output_addr_const (file, x);
c8b94768
RH
7494 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7495 output_operand_lossage ("invalid constraints for operand");
2a2ab3f9 7496 else
e075ae69 7497 output_address (x);
2a2ab3f9 7498 }
e9a25f70 7499
2a2ab3f9
JVA
7500 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7501 {
e9a25f70
JL
7502 REAL_VALUE_TYPE r;
7503 long l;
7504
5f1ec3e6
JVA
7505 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7506 REAL_VALUE_TO_TARGET_SINGLE (r, l);
e075ae69 7507
80f33d06 7508 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7509 putc ('$', file);
781f4ec1 7510 fprintf (file, "0x%08lx", l);
5f1ec3e6 7511 }
e9a25f70 7512
74dc3e94
RH
7513 /* These float cases don't actually occur as immediate operands. */
7514 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
5f1ec3e6 7515 {
e9a25f70
JL
7516 char dstr[30];
7517
da6eec72 7518 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
5f1ec3e6 7519 fprintf (file, "%s", dstr);
2a2ab3f9 7520 }
e9a25f70 7521
2b589241 7522 else if (GET_CODE (x) == CONST_DOUBLE
f8a1ebc6 7523 && GET_MODE (x) == XFmode)
2a2ab3f9 7524 {
e9a25f70
JL
7525 char dstr[30];
7526
da6eec72 7527 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
5f1ec3e6 7528 fprintf (file, "%s", dstr);
2a2ab3f9 7529 }
f996902d 7530
79325812 7531 else
2a2ab3f9 7532 {
4af3895e 7533 if (code != 'P')
2a2ab3f9 7534 {
695dac07 7535 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
e075ae69 7536 {
80f33d06 7537 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
7538 putc ('$', file);
7539 }
2a2ab3f9
JVA
7540 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7541 || GET_CODE (x) == LABEL_REF)
e075ae69 7542 {
80f33d06 7543 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
7544 putc ('$', file);
7545 else
7546 fputs ("OFFSET FLAT:", file);
7547 }
2a2ab3f9 7548 }
e075ae69
RH
7549 if (GET_CODE (x) == CONST_INT)
7550 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7551 else if (flag_pic)
2a2ab3f9
JVA
7552 output_pic_addr_const (file, x, code);
7553 else
7554 output_addr_const (file, x);
7555 }
7556}
7557\f
7558/* Print a memory operand whose address is ADDR. */
7559
7560void
8d531ab9 7561print_operand_address (FILE *file, rtx addr)
2a2ab3f9 7562{
e075ae69
RH
7563 struct ix86_address parts;
7564 rtx base, index, disp;
7565 int scale;
e9a25f70 7566
e075ae69
RH
7567 if (! ix86_decompose_address (addr, &parts))
7568 abort ();
e9a25f70 7569
e075ae69
RH
7570 base = parts.base;
7571 index = parts.index;
7572 disp = parts.disp;
7573 scale = parts.scale;
e9a25f70 7574
74dc3e94
RH
7575 switch (parts.seg)
7576 {
7577 case SEG_DEFAULT:
7578 break;
7579 case SEG_FS:
7580 case SEG_GS:
7581 if (USER_LABEL_PREFIX[0] == 0)
7582 putc ('%', file);
7583 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7584 break;
7585 default:
7586 abort ();
7587 }
7588
e075ae69
RH
7589 if (!base && !index)
7590 {
7591 /* Displacement only requires special attention. */
e9a25f70 7592
e075ae69 7593 if (GET_CODE (disp) == CONST_INT)
2a2ab3f9 7594 {
74dc3e94 7595 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
fb204271
DN
7596 {
7597 if (USER_LABEL_PREFIX[0] == 0)
7598 putc ('%', file);
7599 fputs ("ds:", file);
7600 }
74dc3e94 7601 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
2a2ab3f9 7602 }
e075ae69 7603 else if (flag_pic)
74dc3e94 7604 output_pic_addr_const (file, disp, 0);
e075ae69 7605 else
74dc3e94 7606 output_addr_const (file, disp);
0d7d98ee
JH
7607
7608 /* Use one byte shorter RIP relative addressing for 64bit mode. */
edfe8595 7609 if (TARGET_64BIT
74dc3e94
RH
7610 && ((GET_CODE (disp) == SYMBOL_REF
7611 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7612 || GET_CODE (disp) == LABEL_REF
7613 || (GET_CODE (disp) == CONST
7614 && GET_CODE (XEXP (disp, 0)) == PLUS
7615 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7616 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7617 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
0d7d98ee 7618 fputs ("(%rip)", file);
e075ae69
RH
7619 }
7620 else
7621 {
80f33d06 7622 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9 7623 {
e075ae69 7624 if (disp)
2a2ab3f9 7625 {
c399861d 7626 if (flag_pic)
e075ae69
RH
7627 output_pic_addr_const (file, disp, 0);
7628 else if (GET_CODE (disp) == LABEL_REF)
7629 output_asm_label (disp);
2a2ab3f9 7630 else
e075ae69 7631 output_addr_const (file, disp);
2a2ab3f9
JVA
7632 }
7633
e075ae69
RH
7634 putc ('(', file);
7635 if (base)
a55f4481 7636 print_reg (base, 0, file);
e075ae69 7637 if (index)
2a2ab3f9 7638 {
e075ae69 7639 putc (',', file);
a55f4481 7640 print_reg (index, 0, file);
e075ae69
RH
7641 if (scale != 1)
7642 fprintf (file, ",%d", scale);
2a2ab3f9 7643 }
e075ae69 7644 putc (')', file);
2a2ab3f9 7645 }
2a2ab3f9
JVA
7646 else
7647 {
e075ae69 7648 rtx offset = NULL_RTX;
e9a25f70 7649
e075ae69
RH
7650 if (disp)
7651 {
7652 /* Pull out the offset of a symbol; print any symbol itself. */
7653 if (GET_CODE (disp) == CONST
7654 && GET_CODE (XEXP (disp, 0)) == PLUS
7655 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7656 {
7657 offset = XEXP (XEXP (disp, 0), 1);
7658 disp = gen_rtx_CONST (VOIDmode,
7659 XEXP (XEXP (disp, 0), 0));
7660 }
ce193852 7661
e075ae69
RH
7662 if (flag_pic)
7663 output_pic_addr_const (file, disp, 0);
7664 else if (GET_CODE (disp) == LABEL_REF)
7665 output_asm_label (disp);
7666 else if (GET_CODE (disp) == CONST_INT)
7667 offset = disp;
7668 else
7669 output_addr_const (file, disp);
7670 }
e9a25f70 7671
e075ae69
RH
7672 putc ('[', file);
7673 if (base)
a8620236 7674 {
a55f4481 7675 print_reg (base, 0, file);
e075ae69
RH
7676 if (offset)
7677 {
7678 if (INTVAL (offset) >= 0)
7679 putc ('+', file);
7680 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7681 }
a8620236 7682 }
e075ae69
RH
7683 else if (offset)
7684 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
2247a58c 7685 else
e075ae69 7686 putc ('0', file);
e9a25f70 7687
e075ae69
RH
7688 if (index)
7689 {
7690 putc ('+', file);
a55f4481 7691 print_reg (index, 0, file);
e075ae69
RH
7692 if (scale != 1)
7693 fprintf (file, "*%d", scale);
7694 }
7695 putc (']', file);
7696 }
2a2ab3f9
JVA
7697 }
7698}
f996902d
RH
7699
7700bool
b96a374d 7701output_addr_const_extra (FILE *file, rtx x)
f996902d
RH
7702{
7703 rtx op;
7704
7705 if (GET_CODE (x) != UNSPEC)
7706 return false;
7707
7708 op = XVECEXP (x, 0, 0);
7709 switch (XINT (x, 1))
7710 {
7711 case UNSPEC_GOTTPOFF:
7712 output_addr_const (file, op);
dea73790 7713 /* FIXME: This might be @TPOFF in Sun ld. */
f996902d
RH
7714 fputs ("@GOTTPOFF", file);
7715 break;
7716 case UNSPEC_TPOFF:
7717 output_addr_const (file, op);
7718 fputs ("@TPOFF", file);
7719 break;
7720 case UNSPEC_NTPOFF:
7721 output_addr_const (file, op);
75d38379
JJ
7722 if (TARGET_64BIT)
7723 fputs ("@TPOFF", file);
7724 else
7725 fputs ("@NTPOFF", file);
f996902d
RH
7726 break;
7727 case UNSPEC_DTPOFF:
7728 output_addr_const (file, op);
7729 fputs ("@DTPOFF", file);
7730 break;
dea73790
JJ
7731 case UNSPEC_GOTNTPOFF:
7732 output_addr_const (file, op);
75d38379
JJ
7733 if (TARGET_64BIT)
7734 fputs ("@GOTTPOFF(%rip)", file);
7735 else
7736 fputs ("@GOTNTPOFF", file);
dea73790
JJ
7737 break;
7738 case UNSPEC_INDNTPOFF:
7739 output_addr_const (file, op);
7740 fputs ("@INDNTPOFF", file);
7741 break;
f996902d
RH
7742
7743 default:
7744 return false;
7745 }
7746
7747 return true;
7748}
2a2ab3f9
JVA
7749\f
7750/* Split one or more DImode RTL references into pairs of SImode
7751 references. The RTL can be REG, offsettable MEM, integer constant, or
7752 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7753 split and "num" is its length. lo_half and hi_half are output arrays
0f290768 7754 that parallel "operands". */
2a2ab3f9
JVA
7755
7756void
b96a374d 7757split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
2a2ab3f9
JVA
7758{
7759 while (num--)
7760 {
57dbca5e 7761 rtx op = operands[num];
b932f770
JH
7762
7763 /* simplify_subreg refuse to split volatile memory addresses,
7764 but we still have to handle it. */
7765 if (GET_CODE (op) == MEM)
2a2ab3f9 7766 {
f4ef873c 7767 lo_half[num] = adjust_address (op, SImode, 0);
b72f00af 7768 hi_half[num] = adjust_address (op, SImode, 4);
2a2ab3f9
JVA
7769 }
7770 else
b932f770 7771 {
38ca929b
JH
7772 lo_half[num] = simplify_gen_subreg (SImode, op,
7773 GET_MODE (op) == VOIDmode
7774 ? DImode : GET_MODE (op), 0);
7775 hi_half[num] = simplify_gen_subreg (SImode, op,
7776 GET_MODE (op) == VOIDmode
7777 ? DImode : GET_MODE (op), 4);
b932f770 7778 }
2a2ab3f9
JVA
7779 }
7780}
44cf5b6a
JH
7781/* Split one or more TImode RTL references into pairs of SImode
7782 references. The RTL can be REG, offsettable MEM, integer constant, or
7783 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7784 split and "num" is its length. lo_half and hi_half are output arrays
7785 that parallel "operands". */
7786
7787void
b96a374d 7788split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
44cf5b6a
JH
7789{
7790 while (num--)
7791 {
7792 rtx op = operands[num];
b932f770
JH
7793
7794 /* simplify_subreg refuse to split volatile memory addresses, but we
7795 still have to handle it. */
7796 if (GET_CODE (op) == MEM)
44cf5b6a
JH
7797 {
7798 lo_half[num] = adjust_address (op, DImode, 0);
7799 hi_half[num] = adjust_address (op, DImode, 8);
7800 }
7801 else
b932f770
JH
7802 {
7803 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7804 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7805 }
44cf5b6a
JH
7806 }
7807}
2a2ab3f9 7808\f
2a2ab3f9
JVA
7809/* Output code to perform a 387 binary operation in INSN, one of PLUS,
7810 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7811 is the expression of the binary operation. The output may either be
7812 emitted here, or returned to the caller, like all output_* functions.
7813
7814 There is no guarantee that the operands are the same mode, as they
0f290768 7815 might be within FLOAT or FLOAT_EXTEND expressions. */
2a2ab3f9 7816
e3c2afab
AM
7817#ifndef SYSV386_COMPAT
7818/* Set to 1 for compatibility with brain-damaged assemblers. No-one
7819 wants to fix the assemblers because that causes incompatibility
7820 with gcc. No-one wants to fix gcc because that causes
7821 incompatibility with assemblers... You can use the option of
7822 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7823#define SYSV386_COMPAT 1
7824#endif
7825
69ddee61 7826const char *
b96a374d 7827output_387_binary_op (rtx insn, rtx *operands)
2a2ab3f9 7828{
e3c2afab 7829 static char buf[30];
69ddee61 7830 const char *p;
1deaa899
JH
7831 const char *ssep;
7832 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
2a2ab3f9 7833
e3c2afab
AM
7834#ifdef ENABLE_CHECKING
7835 /* Even if we do not want to check the inputs, this documents input
7836 constraints. Which helps in understanding the following code. */
7837 if (STACK_REG_P (operands[0])
7838 && ((REG_P (operands[1])
7839 && REGNO (operands[0]) == REGNO (operands[1])
7840 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7841 || (REG_P (operands[2])
7842 && REGNO (operands[0]) == REGNO (operands[2])
7843 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7844 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7845 ; /* ok */
1deaa899 7846 else if (!is_sse)
e3c2afab
AM
7847 abort ();
7848#endif
7849
2a2ab3f9
JVA
7850 switch (GET_CODE (operands[3]))
7851 {
7852 case PLUS:
e075ae69
RH
7853 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7854 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7855 p = "fiadd";
7856 else
7857 p = "fadd";
1deaa899 7858 ssep = "add";
2a2ab3f9
JVA
7859 break;
7860
7861 case MINUS:
e075ae69
RH
7862 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7863 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7864 p = "fisub";
7865 else
7866 p = "fsub";
1deaa899 7867 ssep = "sub";
2a2ab3f9
JVA
7868 break;
7869
7870 case MULT:
e075ae69
RH
7871 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7872 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7873 p = "fimul";
7874 else
7875 p = "fmul";
1deaa899 7876 ssep = "mul";
2a2ab3f9
JVA
7877 break;
7878
7879 case DIV:
e075ae69
RH
7880 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7881 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7882 p = "fidiv";
7883 else
7884 p = "fdiv";
1deaa899 7885 ssep = "div";
2a2ab3f9
JVA
7886 break;
7887
7888 default:
7889 abort ();
7890 }
7891
1deaa899
JH
7892 if (is_sse)
7893 {
7894 strcpy (buf, ssep);
7895 if (GET_MODE (operands[0]) == SFmode)
7896 strcat (buf, "ss\t{%2, %0|%0, %2}");
7897 else
7898 strcat (buf, "sd\t{%2, %0|%0, %2}");
7899 return buf;
7900 }
e075ae69 7901 strcpy (buf, p);
2a2ab3f9
JVA
7902
7903 switch (GET_CODE (operands[3]))
7904 {
7905 case MULT:
7906 case PLUS:
7907 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7908 {
e3c2afab 7909 rtx temp = operands[2];
2a2ab3f9
JVA
7910 operands[2] = operands[1];
7911 operands[1] = temp;
7912 }
7913
e3c2afab
AM
7914 /* know operands[0] == operands[1]. */
7915
2a2ab3f9 7916 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
7917 {
7918 p = "%z2\t%2";
7919 break;
7920 }
2a2ab3f9
JVA
7921
7922 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63
JL
7923 {
7924 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
7925 /* How is it that we are storing to a dead operand[2]?
7926 Well, presumably operands[1] is dead too. We can't
7927 store the result to st(0) as st(0) gets popped on this
7928 instruction. Instead store to operands[2] (which I
7929 think has to be st(1)). st(1) will be popped later.
7930 gcc <= 2.8.1 didn't have this check and generated
7931 assembly code that the Unixware assembler rejected. */
7932 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 7933 else
e3c2afab 7934 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
e075ae69 7935 break;
6b28fd63 7936 }
2a2ab3f9
JVA
7937
7938 if (STACK_TOP_P (operands[0]))
e3c2afab 7939 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 7940 else
e3c2afab 7941 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
e075ae69 7942 break;
2a2ab3f9
JVA
7943
7944 case MINUS:
7945 case DIV:
7946 if (GET_CODE (operands[1]) == MEM)
e075ae69
RH
7947 {
7948 p = "r%z1\t%1";
7949 break;
7950 }
2a2ab3f9
JVA
7951
7952 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
7953 {
7954 p = "%z2\t%2";
7955 break;
7956 }
2a2ab3f9 7957
2a2ab3f9 7958 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63 7959 {
e3c2afab
AM
7960#if SYSV386_COMPAT
7961 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7962 derived assemblers, confusingly reverse the direction of
7963 the operation for fsub{r} and fdiv{r} when the
7964 destination register is not st(0). The Intel assembler
7965 doesn't have this brain damage. Read !SYSV386_COMPAT to
7966 figure out what the hardware really does. */
7967 if (STACK_TOP_P (operands[0]))
7968 p = "{p\t%0, %2|rp\t%2, %0}";
7969 else
7970 p = "{rp\t%2, %0|p\t%0, %2}";
7971#else
6b28fd63 7972 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
7973 /* As above for fmul/fadd, we can't store to st(0). */
7974 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 7975 else
e3c2afab
AM
7976 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7977#endif
e075ae69 7978 break;
6b28fd63 7979 }
2a2ab3f9
JVA
7980
7981 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6b28fd63 7982 {
e3c2afab 7983#if SYSV386_COMPAT
6b28fd63 7984 if (STACK_TOP_P (operands[0]))
e3c2afab 7985 p = "{rp\t%0, %1|p\t%1, %0}";
6b28fd63 7986 else
e3c2afab
AM
7987 p = "{p\t%1, %0|rp\t%0, %1}";
7988#else
7989 if (STACK_TOP_P (operands[0]))
7990 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7991 else
7992 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7993#endif
e075ae69 7994 break;
6b28fd63 7995 }
2a2ab3f9
JVA
7996
7997 if (STACK_TOP_P (operands[0]))
7998 {
7999 if (STACK_TOP_P (operands[1]))
e3c2afab 8000 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 8001 else
e3c2afab 8002 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
e075ae69 8003 break;
2a2ab3f9
JVA
8004 }
8005 else if (STACK_TOP_P (operands[1]))
e3c2afab
AM
8006 {
8007#if SYSV386_COMPAT
8008 p = "{\t%1, %0|r\t%0, %1}";
8009#else
8010 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8011#endif
8012 }
2a2ab3f9 8013 else
e3c2afab
AM
8014 {
8015#if SYSV386_COMPAT
8016 p = "{r\t%2, %0|\t%0, %2}";
8017#else
8018 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8019#endif
8020 }
e075ae69 8021 break;
2a2ab3f9
JVA
8022
8023 default:
8024 abort ();
8025 }
e075ae69
RH
8026
8027 strcat (buf, p);
8028 return buf;
2a2ab3f9 8029}
e075ae69 8030
a4f31c00 8031/* Output code to initialize control word copies used by
7a2e09f4
JH
8032 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
8033 is set to control word rounding downwards. */
8034void
b96a374d 8035emit_i387_cw_initialization (rtx normal, rtx round_down)
7a2e09f4
JH
8036{
8037 rtx reg = gen_reg_rtx (HImode);
8038
8039 emit_insn (gen_x86_fnstcw_1 (normal));
8040 emit_move_insn (reg, normal);
8041 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
8042 && !TARGET_64BIT)
8043 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8044 else
8045 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
8046 emit_move_insn (round_down, reg);
8047}
8048
2a2ab3f9 8049/* Output code for INSN to convert a float to a signed int. OPERANDS
46d21d2c 8050 are the insn operands. The output may be [HSD]Imode and the input
e075ae69 8051 operand may be [SDX]Fmode. */
2a2ab3f9 8052
69ddee61 8053const char *
b96a374d 8054output_fix_trunc (rtx insn, rtx *operands)
2a2ab3f9
JVA
8055{
8056 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
e075ae69 8057 int dimode_p = GET_MODE (operands[0]) == DImode;
2a2ab3f9 8058
e075ae69
RH
8059 /* Jump through a hoop or two for DImode, since the hardware has no
8060 non-popping instruction. We used to do this a different way, but
8061 that was somewhat fragile and broke with post-reload splitters. */
a05924f9
JH
8062 if (dimode_p && !stack_top_dies)
8063 output_asm_insn ("fld\t%y1", operands);
e075ae69 8064
7a2e09f4 8065 if (!STACK_TOP_P (operands[1]))
10195bd8
JW
8066 abort ();
8067
e075ae69 8068 if (GET_CODE (operands[0]) != MEM)
7a2e09f4 8069 abort ();
e9a25f70 8070
7a2e09f4 8071 output_asm_insn ("fldcw\t%3", operands);
e075ae69 8072 if (stack_top_dies || dimode_p)
7a2e09f4 8073 output_asm_insn ("fistp%z0\t%0", operands);
10195bd8 8074 else
7a2e09f4 8075 output_asm_insn ("fist%z0\t%0", operands);
e075ae69 8076 output_asm_insn ("fldcw\t%2", operands);
10195bd8 8077
e075ae69 8078 return "";
2a2ab3f9 8079}
cda749b1 8080
e075ae69
RH
8081/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8082 should be used and 2 when fnstsw should be used. UNORDERED_P is true
8083 when fucom should be used. */
8084
69ddee61 8085const char *
b96a374d 8086output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
cda749b1 8087{
e075ae69
RH
8088 int stack_top_dies;
8089 rtx cmp_op0 = operands[0];
8090 rtx cmp_op1 = operands[1];
0644b628 8091 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
e075ae69
RH
8092
8093 if (eflags_p == 2)
8094 {
8095 cmp_op0 = cmp_op1;
8096 cmp_op1 = operands[2];
8097 }
0644b628
JH
8098 if (is_sse)
8099 {
8100 if (GET_MODE (operands[0]) == SFmode)
8101 if (unordered_p)
8102 return "ucomiss\t{%1, %0|%0, %1}";
8103 else
a5cf80f0 8104 return "comiss\t{%1, %0|%0, %1}";
0644b628
JH
8105 else
8106 if (unordered_p)
8107 return "ucomisd\t{%1, %0|%0, %1}";
8108 else
a5cf80f0 8109 return "comisd\t{%1, %0|%0, %1}";
0644b628 8110 }
cda749b1 8111
e075ae69 8112 if (! STACK_TOP_P (cmp_op0))
cda749b1
JW
8113 abort ();
8114
e075ae69 8115 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
cda749b1 8116
e075ae69
RH
8117 if (STACK_REG_P (cmp_op1)
8118 && stack_top_dies
8119 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8120 && REGNO (cmp_op1) != FIRST_STACK_REG)
cda749b1 8121 {
e075ae69
RH
8122 /* If both the top of the 387 stack dies, and the other operand
8123 is also a stack register that dies, then this must be a
8124 `fcompp' float compare */
8125
8126 if (eflags_p == 1)
8127 {
8128 /* There is no double popping fcomi variant. Fortunately,
8129 eflags is immune from the fstp's cc clobbering. */
8130 if (unordered_p)
8131 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8132 else
8133 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8134 return "fstp\t%y0";
8135 }
8136 else
cda749b1 8137 {
e075ae69
RH
8138 if (eflags_p == 2)
8139 {
8140 if (unordered_p)
8141 return "fucompp\n\tfnstsw\t%0";
8142 else
8143 return "fcompp\n\tfnstsw\t%0";
8144 }
cda749b1
JW
8145 else
8146 {
e075ae69
RH
8147 if (unordered_p)
8148 return "fucompp";
8149 else
8150 return "fcompp";
cda749b1
JW
8151 }
8152 }
cda749b1
JW
8153 }
8154 else
8155 {
e075ae69 8156 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
cda749b1 8157
0f290768 8158 static const char * const alt[24] =
e075ae69
RH
8159 {
8160 "fcom%z1\t%y1",
8161 "fcomp%z1\t%y1",
8162 "fucom%z1\t%y1",
8163 "fucomp%z1\t%y1",
0f290768 8164
e075ae69
RH
8165 "ficom%z1\t%y1",
8166 "ficomp%z1\t%y1",
8167 NULL,
8168 NULL,
8169
8170 "fcomi\t{%y1, %0|%0, %y1}",
8171 "fcomip\t{%y1, %0|%0, %y1}",
8172 "fucomi\t{%y1, %0|%0, %y1}",
8173 "fucomip\t{%y1, %0|%0, %y1}",
8174
8175 NULL,
8176 NULL,
8177 NULL,
8178 NULL,
8179
8180 "fcom%z2\t%y2\n\tfnstsw\t%0",
8181 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8182 "fucom%z2\t%y2\n\tfnstsw\t%0",
8183 "fucomp%z2\t%y2\n\tfnstsw\t%0",
0f290768 8184
e075ae69
RH
8185 "ficom%z2\t%y2\n\tfnstsw\t%0",
8186 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8187 NULL,
8188 NULL
8189 };
8190
8191 int mask;
69ddee61 8192 const char *ret;
e075ae69
RH
8193
8194 mask = eflags_p << 3;
8195 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
8196 mask |= unordered_p << 1;
8197 mask |= stack_top_dies;
8198
8199 if (mask >= 24)
8200 abort ();
8201 ret = alt[mask];
8202 if (ret == NULL)
8203 abort ();
cda749b1 8204
e075ae69 8205 return ret;
cda749b1
JW
8206 }
8207}
2a2ab3f9 8208
f88c65f7 8209void
b96a374d 8210ix86_output_addr_vec_elt (FILE *file, int value)
f88c65f7
RH
8211{
8212 const char *directive = ASM_LONG;
8213
8214 if (TARGET_64BIT)
8215 {
8216#ifdef ASM_QUAD
8217 directive = ASM_QUAD;
8218#else
8219 abort ();
8220#endif
8221 }
8222
8223 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8224}
8225
8226void
b96a374d 8227ix86_output_addr_diff_elt (FILE *file, int value, int rel)
f88c65f7
RH
8228{
8229 if (TARGET_64BIT)
74411039 8230 fprintf (file, "%s%s%d-%s%d\n",
f88c65f7
RH
8231 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8232 else if (HAVE_AS_GOTOFF_IN_DATA)
8233 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
b069de3b
SS
8234#if TARGET_MACHO
8235 else if (TARGET_MACHO)
86ecdfb6
AP
8236 {
8237 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8238 machopic_output_function_base_name (file);
8239 fprintf(file, "\n");
8240 }
b069de3b 8241#endif
f88c65f7 8242 else
5fc0e5df
KW
8243 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8244 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
f88c65f7 8245}
32b5b1aa 8246\f
a8bac9ab
RH
8247/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8248 for the target. */
8249
8250void
b96a374d 8251ix86_expand_clear (rtx dest)
a8bac9ab
RH
8252{
8253 rtx tmp;
8254
8255 /* We play register width games, which are only valid after reload. */
8256 if (!reload_completed)
8257 abort ();
8258
8259 /* Avoid HImode and its attendant prefix byte. */
8260 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8261 dest = gen_rtx_REG (SImode, REGNO (dest));
8262
8263 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8264
8265 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8266 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8267 {
8268 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8269 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8270 }
8271
8272 emit_insn (tmp);
8273}
8274
f996902d
RH
8275/* X is an unchanging MEM. If it is a constant pool reference, return
8276 the constant pool rtx, else NULL. */
8277
8278static rtx
b96a374d 8279maybe_get_pool_constant (rtx x)
f996902d 8280{
69bd9368 8281 x = ix86_delegitimize_address (XEXP (x, 0));
f996902d
RH
8282
8283 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8284 return get_pool_constant (x);
8285
8286 return NULL_RTX;
8287}
8288
79325812 8289void
b96a374d 8290ix86_expand_move (enum machine_mode mode, rtx operands[])
32b5b1aa 8291{
e075ae69 8292 int strict = (reload_in_progress || reload_completed);
74dc3e94
RH
8293 rtx op0, op1;
8294 enum tls_model model;
f996902d
RH
8295
8296 op0 = operands[0];
8297 op1 = operands[1];
8298
74dc3e94
RH
8299 model = tls_symbolic_operand (op1, Pmode);
8300 if (model)
f996902d 8301 {
74dc3e94
RH
8302 op1 = legitimize_tls_address (op1, model, true);
8303 op1 = force_operand (op1, op0);
8304 if (op1 == op0)
8305 return;
f996902d 8306 }
74dc3e94
RH
8307
8308 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
f996902d 8309 {
b069de3b
SS
8310#if TARGET_MACHO
8311 if (MACHOPIC_PURE)
8312 {
8313 rtx temp = ((reload_in_progress
8314 || ((op0 && GET_CODE (op0) == REG)
8315 && mode == Pmode))
8316 ? op0 : gen_reg_rtx (Pmode));
8317 op1 = machopic_indirect_data_reference (op1, temp);
8318 op1 = machopic_legitimize_pic_address (op1, mode,
8319 temp == op1 ? 0 : temp);
8320 }
74dc3e94
RH
8321 else if (MACHOPIC_INDIRECT)
8322 op1 = machopic_indirect_data_reference (op1, 0);
8323 if (op0 == op1)
8324 return;
8325#else
f996902d
RH
8326 if (GET_CODE (op0) == MEM)
8327 op1 = force_reg (Pmode, op1);
e075ae69 8328 else
32b5b1aa 8329 {
f996902d 8330 rtx temp = op0;
e075ae69
RH
8331 if (GET_CODE (temp) != REG)
8332 temp = gen_reg_rtx (Pmode);
f996902d
RH
8333 temp = legitimize_pic_address (op1, temp);
8334 if (temp == op0)
e075ae69 8335 return;
f996902d 8336 op1 = temp;
32b5b1aa 8337 }
74dc3e94 8338#endif /* TARGET_MACHO */
e075ae69
RH
8339 }
8340 else
8341 {
f996902d 8342 if (GET_CODE (op0) == MEM
44cf5b6a 8343 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
f996902d
RH
8344 || !push_operand (op0, mode))
8345 && GET_CODE (op1) == MEM)
8346 op1 = force_reg (mode, op1);
e9a25f70 8347
f996902d
RH
8348 if (push_operand (op0, mode)
8349 && ! general_no_elim_operand (op1, mode))
8350 op1 = copy_to_mode_reg (mode, op1);
2c5a510c 8351
44cf5b6a
JH
8352 /* Force large constants in 64bit compilation into register
8353 to get them CSEed. */
8354 if (TARGET_64BIT && mode == DImode
f996902d
RH
8355 && immediate_operand (op1, mode)
8356 && !x86_64_zero_extended_value (op1)
8357 && !register_operand (op0, mode)
44cf5b6a 8358 && optimize && !reload_completed && !reload_in_progress)
f996902d 8359 op1 = copy_to_mode_reg (mode, op1);
44cf5b6a 8360
e075ae69 8361 if (FLOAT_MODE_P (mode))
32b5b1aa 8362 {
d7a29404
JH
8363 /* If we are loading a floating point constant to a register,
8364 force the value to memory now, since we'll get better code
8365 out the back end. */
e075ae69
RH
8366
8367 if (strict)
8368 ;
ddc67067
MM
8369 else if (GET_CODE (op1) == CONST_DOUBLE)
8370 {
8371 op1 = validize_mem (force_const_mem (mode, op1));
8372 if (!register_operand (op0, mode))
8373 {
8374 rtx temp = gen_reg_rtx (mode);
8375 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8376 emit_move_insn (op0, temp);
8377 return;
8378 }
8379 }
32b5b1aa 8380 }
32b5b1aa 8381 }
e9a25f70 8382
74dc3e94 8383 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
e075ae69 8384}
e9a25f70 8385
e37af218 8386void
b96a374d 8387ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
e37af218
RH
8388{
8389 /* Force constants other than zero into memory. We do not know how
8390 the instructions used to build constants modify the upper 64 bits
8391 of the register, once we have that information we may be able
8392 to handle some of them more efficiently. */
8393 if ((reload_in_progress | reload_completed) == 0
8394 && register_operand (operands[0], mode)
fdc4b40b 8395 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
2b28d405 8396 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
e37af218
RH
8397
8398 /* Make operand1 a register if it isn't already. */
f8ca7923 8399 if (!no_new_pseudos
e37af218 8400 && !register_operand (operands[0], mode)
b105d6da 8401 && !register_operand (operands[1], mode))
e37af218 8402 {
59bef189 8403 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
e37af218
RH
8404 emit_move_insn (operands[0], temp);
8405 return;
8406 }
8407
8408 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
fce5a9f2 8409}
e37af218 8410
e075ae69
RH
8411/* Attempt to expand a binary operator. Make the expansion closer to the
8412 actual machine, then just general_operand, which will allow 3 separate
9d81fc27 8413 memory references (one output, two input) in a single insn. */
e9a25f70 8414
e075ae69 8415void
b96a374d
AJ
8416ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8417 rtx operands[])
e075ae69
RH
8418{
8419 int matching_memory;
8420 rtx src1, src2, dst, op, clob;
8421
8422 dst = operands[0];
8423 src1 = operands[1];
8424 src2 = operands[2];
8425
8426 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8427 if (GET_RTX_CLASS (code) == 'c'
8428 && (rtx_equal_p (dst, src2)
8429 || immediate_operand (src1, mode)))
8430 {
8431 rtx temp = src1;
8432 src1 = src2;
8433 src2 = temp;
32b5b1aa 8434 }
e9a25f70 8435
e075ae69
RH
8436 /* If the destination is memory, and we do not have matching source
8437 operands, do things in registers. */
8438 matching_memory = 0;
8439 if (GET_CODE (dst) == MEM)
32b5b1aa 8440 {
e075ae69
RH
8441 if (rtx_equal_p (dst, src1))
8442 matching_memory = 1;
8443 else if (GET_RTX_CLASS (code) == 'c'
8444 && rtx_equal_p (dst, src2))
8445 matching_memory = 2;
8446 else
8447 dst = gen_reg_rtx (mode);
8448 }
0f290768 8449
e075ae69
RH
8450 /* Both source operands cannot be in memory. */
8451 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8452 {
8453 if (matching_memory != 2)
8454 src2 = force_reg (mode, src2);
8455 else
8456 src1 = force_reg (mode, src1);
32b5b1aa 8457 }
e9a25f70 8458
06a964de
JH
8459 /* If the operation is not commutable, source 1 cannot be a constant
8460 or non-matching memory. */
0f290768 8461 if ((CONSTANT_P (src1)
06a964de
JH
8462 || (!matching_memory && GET_CODE (src1) == MEM))
8463 && GET_RTX_CLASS (code) != 'c')
e075ae69 8464 src1 = force_reg (mode, src1);
0f290768 8465
e075ae69 8466 /* If optimizing, copy to regs to improve CSE */
fe577e58 8467 if (optimize && ! no_new_pseudos)
32b5b1aa 8468 {
e075ae69
RH
8469 if (GET_CODE (dst) == MEM)
8470 dst = gen_reg_rtx (mode);
8471 if (GET_CODE (src1) == MEM)
8472 src1 = force_reg (mode, src1);
8473 if (GET_CODE (src2) == MEM)
8474 src2 = force_reg (mode, src2);
32b5b1aa 8475 }
e9a25f70 8476
e075ae69
RH
8477 /* Emit the instruction. */
8478
8479 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8480 if (reload_in_progress)
8481 {
8482 /* Reload doesn't know about the flags register, and doesn't know that
8483 it doesn't want to clobber it. We can only do this with PLUS. */
8484 if (code != PLUS)
8485 abort ();
8486 emit_insn (op);
8487 }
8488 else
32b5b1aa 8489 {
e075ae69
RH
8490 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8491 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
32b5b1aa 8492 }
e9a25f70 8493
e075ae69
RH
8494 /* Fix up the destination if needed. */
8495 if (dst != operands[0])
8496 emit_move_insn (operands[0], dst);
8497}
8498
8499/* Return TRUE or FALSE depending on whether the binary operator meets the
8500 appropriate constraints. */
8501
8502int
b96a374d
AJ
8503ix86_binary_operator_ok (enum rtx_code code,
8504 enum machine_mode mode ATTRIBUTE_UNUSED,
8505 rtx operands[3])
e075ae69
RH
8506{
8507 /* Both source operands cannot be in memory. */
8508 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8509 return 0;
8510 /* If the operation is not commutable, source 1 cannot be a constant. */
8511 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8512 return 0;
8513 /* If the destination is memory, we must have a matching source operand. */
8514 if (GET_CODE (operands[0]) == MEM
8515 && ! (rtx_equal_p (operands[0], operands[1])
8516 || (GET_RTX_CLASS (code) == 'c'
8517 && rtx_equal_p (operands[0], operands[2]))))
8518 return 0;
06a964de 8519 /* If the operation is not commutable and the source 1 is memory, we must
d6a7951f 8520 have a matching destination. */
06a964de
JH
8521 if (GET_CODE (operands[1]) == MEM
8522 && GET_RTX_CLASS (code) != 'c'
8523 && ! rtx_equal_p (operands[0], operands[1]))
8524 return 0;
e075ae69
RH
8525 return 1;
8526}
8527
8528/* Attempt to expand a unary operator. Make the expansion closer to the
8529 actual machine, then just general_operand, which will allow 2 separate
9d81fc27 8530 memory references (one output, one input) in a single insn. */
e075ae69 8531
9d81fc27 8532void
b96a374d
AJ
8533ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8534 rtx operands[])
e075ae69 8535{
06a964de
JH
8536 int matching_memory;
8537 rtx src, dst, op, clob;
8538
8539 dst = operands[0];
8540 src = operands[1];
e075ae69 8541
06a964de
JH
8542 /* If the destination is memory, and we do not have matching source
8543 operands, do things in registers. */
8544 matching_memory = 0;
8545 if (GET_CODE (dst) == MEM)
32b5b1aa 8546 {
06a964de
JH
8547 if (rtx_equal_p (dst, src))
8548 matching_memory = 1;
e075ae69 8549 else
06a964de 8550 dst = gen_reg_rtx (mode);
32b5b1aa 8551 }
e9a25f70 8552
06a964de
JH
8553 /* When source operand is memory, destination must match. */
8554 if (!matching_memory && GET_CODE (src) == MEM)
8555 src = force_reg (mode, src);
0f290768 8556
06a964de 8557 /* If optimizing, copy to regs to improve CSE */
fe577e58 8558 if (optimize && ! no_new_pseudos)
06a964de
JH
8559 {
8560 if (GET_CODE (dst) == MEM)
8561 dst = gen_reg_rtx (mode);
8562 if (GET_CODE (src) == MEM)
8563 src = force_reg (mode, src);
8564 }
8565
8566 /* Emit the instruction. */
8567
8568 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8569 if (reload_in_progress || code == NOT)
8570 {
8571 /* Reload doesn't know about the flags register, and doesn't know that
8572 it doesn't want to clobber it. */
8573 if (code != NOT)
8574 abort ();
8575 emit_insn (op);
8576 }
8577 else
8578 {
8579 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8580 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8581 }
8582
8583 /* Fix up the destination if needed. */
8584 if (dst != operands[0])
8585 emit_move_insn (operands[0], dst);
e075ae69
RH
8586}
8587
8588/* Return TRUE or FALSE depending on whether the unary operator meets the
8589 appropriate constraints. */
8590
8591int
b96a374d
AJ
8592ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8593 enum machine_mode mode ATTRIBUTE_UNUSED,
8594 rtx operands[2] ATTRIBUTE_UNUSED)
e075ae69 8595{
06a964de
JH
8596 /* If one of operands is memory, source and destination must match. */
8597 if ((GET_CODE (operands[0]) == MEM
8598 || GET_CODE (operands[1]) == MEM)
8599 && ! rtx_equal_p (operands[0], operands[1]))
8600 return FALSE;
e075ae69
RH
8601 return TRUE;
8602}
8603
16189740
RH
8604/* Return TRUE or FALSE depending on whether the first SET in INSN
8605 has source and destination with matching CC modes, and that the
8606 CC mode is at least as constrained as REQ_MODE. */
8607
8608int
b96a374d 8609ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
16189740
RH
8610{
8611 rtx set;
8612 enum machine_mode set_mode;
8613
8614 set = PATTERN (insn);
8615 if (GET_CODE (set) == PARALLEL)
8616 set = XVECEXP (set, 0, 0);
8617 if (GET_CODE (set) != SET)
8618 abort ();
9076b9c1
JH
8619 if (GET_CODE (SET_SRC (set)) != COMPARE)
8620 abort ();
16189740
RH
8621
8622 set_mode = GET_MODE (SET_DEST (set));
8623 switch (set_mode)
8624 {
9076b9c1
JH
8625 case CCNOmode:
8626 if (req_mode != CCNOmode
8627 && (req_mode != CCmode
8628 || XEXP (SET_SRC (set), 1) != const0_rtx))
8629 return 0;
8630 break;
16189740 8631 case CCmode:
9076b9c1 8632 if (req_mode == CCGCmode)
16189740 8633 return 0;
5efb1046 8634 /* FALLTHRU */
9076b9c1
JH
8635 case CCGCmode:
8636 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8637 return 0;
5efb1046 8638 /* FALLTHRU */
9076b9c1 8639 case CCGOCmode:
16189740
RH
8640 if (req_mode == CCZmode)
8641 return 0;
5efb1046 8642 /* FALLTHRU */
16189740
RH
8643 case CCZmode:
8644 break;
8645
8646 default:
8647 abort ();
8648 }
8649
8650 return (GET_MODE (SET_SRC (set)) == set_mode);
8651}
8652
e075ae69
RH
8653/* Generate insn patterns to do an integer compare of OPERANDS. */
8654
8655static rtx
b96a374d 8656ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
e075ae69
RH
8657{
8658 enum machine_mode cmpmode;
8659 rtx tmp, flags;
8660
8661 cmpmode = SELECT_CC_MODE (code, op0, op1);
8662 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8663
8664 /* This is very simple, but making the interface the same as in the
8665 FP case makes the rest of the code easier. */
8666 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8667 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8668
8669 /* Return the test that should be put into the flags user, i.e.
8670 the bcc, scc, or cmov instruction. */
8671 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8672}
8673
3a3677ff
RH
8674/* Figure out whether to use ordered or unordered fp comparisons.
8675 Return the appropriate mode to use. */
e075ae69 8676
b1cdafbb 8677enum machine_mode
b96a374d 8678ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
e075ae69 8679{
9e7adcb3
JH
8680 /* ??? In order to make all comparisons reversible, we do all comparisons
8681 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8682 all forms trapping and nontrapping comparisons, we can make inequality
8683 comparisons trapping again, since it results in better code when using
8684 FCOM based compares. */
8685 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
3a3677ff
RH
8686}
8687
9076b9c1 8688enum machine_mode
b96a374d 8689ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
9076b9c1
JH
8690{
8691 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8692 return ix86_fp_compare_mode (code);
8693 switch (code)
8694 {
8695 /* Only zero flag is needed. */
8696 case EQ: /* ZF=0 */
8697 case NE: /* ZF!=0 */
8698 return CCZmode;
8699 /* Codes needing carry flag. */
265dab10
JH
8700 case GEU: /* CF=0 */
8701 case GTU: /* CF=0 & ZF=0 */
7e08e190
JH
8702 case LTU: /* CF=1 */
8703 case LEU: /* CF=1 | ZF=1 */
265dab10 8704 return CCmode;
9076b9c1
JH
8705 /* Codes possibly doable only with sign flag when
8706 comparing against zero. */
8707 case GE: /* SF=OF or SF=0 */
7e08e190 8708 case LT: /* SF<>OF or SF=1 */
9076b9c1
JH
8709 if (op1 == const0_rtx)
8710 return CCGOCmode;
8711 else
8712 /* For other cases Carry flag is not required. */
8713 return CCGCmode;
8714 /* Codes doable only with sign flag when comparing
8715 against zero, but we miss jump instruction for it
4aae8a9a 8716 so we need to use relational tests against overflow
9076b9c1
JH
8717 that thus needs to be zero. */
8718 case GT: /* ZF=0 & SF=OF */
8719 case LE: /* ZF=1 | SF<>OF */
8720 if (op1 == const0_rtx)
8721 return CCNOmode;
8722 else
8723 return CCGCmode;
7fcd7218
JH
8724 /* strcmp pattern do (use flags) and combine may ask us for proper
8725 mode. */
8726 case USE:
8727 return CCmode;
9076b9c1 8728 default:
0f290768 8729 abort ();
9076b9c1
JH
8730 }
8731}
8732
e129d93a
ILT
8733/* Return the fixed registers used for condition codes. */
8734
8735static bool
8736ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
8737{
8738 *p1 = FLAGS_REG;
8739 *p2 = FPSR_REG;
8740 return true;
8741}
8742
8743/* If two condition code modes are compatible, return a condition code
8744 mode which is compatible with both. Otherwise, return
8745 VOIDmode. */
8746
8747static enum machine_mode
8748ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
8749{
8750 if (m1 == m2)
8751 return m1;
8752
8753 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8754 return VOIDmode;
8755
8756 if ((m1 == CCGCmode && m2 == CCGOCmode)
8757 || (m1 == CCGOCmode && m2 == CCGCmode))
8758 return CCGCmode;
8759
8760 switch (m1)
8761 {
8762 default:
8763 abort ();
8764
8765 case CCmode:
8766 case CCGCmode:
8767 case CCGOCmode:
8768 case CCNOmode:
8769 case CCZmode:
8770 switch (m2)
8771 {
8772 default:
8773 return VOIDmode;
8774
8775 case CCmode:
8776 case CCGCmode:
8777 case CCGOCmode:
8778 case CCNOmode:
8779 case CCZmode:
8780 return CCmode;
8781 }
8782
8783 case CCFPmode:
8784 case CCFPUmode:
8785 /* These are only compatible with themselves, which we already
8786 checked above. */
8787 return VOIDmode;
8788 }
8789}
8790
3a3677ff
RH
8791/* Return true if we should use an FCOMI instruction for this fp comparison. */
8792
a940d8bd 8793int
b96a374d 8794ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
3a3677ff 8795{
9e7adcb3
JH
8796 enum rtx_code swapped_code = swap_condition (code);
8797 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8798 || (ix86_fp_comparison_cost (swapped_code)
8799 == ix86_fp_comparison_fcomi_cost (swapped_code)));
3a3677ff
RH
8800}
8801
0f290768 8802/* Swap, force into registers, or otherwise massage the two operands
3a3677ff 8803 to a fp comparison. The operands are updated in place; the new
d1f87653 8804 comparison code is returned. */
3a3677ff
RH
8805
8806static enum rtx_code
b96a374d 8807ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
3a3677ff
RH
8808{
8809 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8810 rtx op0 = *pop0, op1 = *pop1;
8811 enum machine_mode op_mode = GET_MODE (op0);
0644b628 8812 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
3a3677ff 8813
e075ae69 8814 /* All of the unordered compare instructions only work on registers.
3a3677ff
RH
8815 The same is true of the XFmode compare instructions. The same is
8816 true of the fcomi compare instructions. */
8817
0644b628
JH
8818 if (!is_sse
8819 && (fpcmp_mode == CCFPUmode
8820 || op_mode == XFmode
0644b628 8821 || ix86_use_fcomi_compare (code)))
e075ae69 8822 {
3a3677ff
RH
8823 op0 = force_reg (op_mode, op0);
8824 op1 = force_reg (op_mode, op1);
e075ae69
RH
8825 }
8826 else
8827 {
8828 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8829 things around if they appear profitable, otherwise force op0
8830 into a register. */
8831
8832 if (standard_80387_constant_p (op0) == 0
8833 || (GET_CODE (op0) == MEM
8834 && ! (standard_80387_constant_p (op1) == 0
8835 || GET_CODE (op1) == MEM)))
32b5b1aa 8836 {
e075ae69
RH
8837 rtx tmp;
8838 tmp = op0, op0 = op1, op1 = tmp;
8839 code = swap_condition (code);
8840 }
8841
8842 if (GET_CODE (op0) != REG)
3a3677ff 8843 op0 = force_reg (op_mode, op0);
e075ae69
RH
8844
8845 if (CONSTANT_P (op1))
8846 {
8847 if (standard_80387_constant_p (op1))
3a3677ff 8848 op1 = force_reg (op_mode, op1);
e075ae69 8849 else
3a3677ff 8850 op1 = validize_mem (force_const_mem (op_mode, op1));
32b5b1aa
SC
8851 }
8852 }
e9a25f70 8853
9e7adcb3
JH
8854 /* Try to rearrange the comparison to make it cheaper. */
8855 if (ix86_fp_comparison_cost (code)
8856 > ix86_fp_comparison_cost (swap_condition (code))
558740bf 8857 && (GET_CODE (op1) == REG || !no_new_pseudos))
9e7adcb3
JH
8858 {
8859 rtx tmp;
8860 tmp = op0, op0 = op1, op1 = tmp;
8861 code = swap_condition (code);
8862 if (GET_CODE (op0) != REG)
8863 op0 = force_reg (op_mode, op0);
8864 }
8865
3a3677ff
RH
8866 *pop0 = op0;
8867 *pop1 = op1;
8868 return code;
8869}
8870
c0c102a9
JH
8871/* Convert comparison codes we use to represent FP comparison to integer
8872 code that will result in proper branch. Return UNKNOWN if no such code
8873 is available. */
8874static enum rtx_code
b96a374d 8875ix86_fp_compare_code_to_integer (enum rtx_code code)
c0c102a9
JH
8876{
8877 switch (code)
8878 {
8879 case GT:
8880 return GTU;
8881 case GE:
8882 return GEU;
8883 case ORDERED:
8884 case UNORDERED:
8885 return code;
8886 break;
8887 case UNEQ:
8888 return EQ;
8889 break;
8890 case UNLT:
8891 return LTU;
8892 break;
8893 case UNLE:
8894 return LEU;
8895 break;
8896 case LTGT:
8897 return NE;
8898 break;
8899 default:
8900 return UNKNOWN;
8901 }
8902}
8903
8904/* Split comparison code CODE into comparisons we can do using branch
8905 instructions. BYPASS_CODE is comparison code for branch that will
8906 branch around FIRST_CODE and SECOND_CODE. If some of branches
8907 is not required, set value to NIL.
8908 We never require more than two branches. */
8909static void
b96a374d
AJ
8910ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8911 enum rtx_code *first_code,
8912 enum rtx_code *second_code)
c0c102a9
JH
8913{
8914 *first_code = code;
8915 *bypass_code = NIL;
8916 *second_code = NIL;
8917
8918 /* The fcomi comparison sets flags as follows:
8919
8920 cmp ZF PF CF
8921 > 0 0 0
8922 < 0 0 1
8923 = 1 0 0
8924 un 1 1 1 */
8925
8926 switch (code)
8927 {
8928 case GT: /* GTU - CF=0 & ZF=0 */
8929 case GE: /* GEU - CF=0 */
8930 case ORDERED: /* PF=0 */
8931 case UNORDERED: /* PF=1 */
8932 case UNEQ: /* EQ - ZF=1 */
8933 case UNLT: /* LTU - CF=1 */
8934 case UNLE: /* LEU - CF=1 | ZF=1 */
8935 case LTGT: /* EQ - ZF=0 */
8936 break;
8937 case LT: /* LTU - CF=1 - fails on unordered */
8938 *first_code = UNLT;
8939 *bypass_code = UNORDERED;
8940 break;
8941 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8942 *first_code = UNLE;
8943 *bypass_code = UNORDERED;
8944 break;
8945 case EQ: /* EQ - ZF=1 - fails on unordered */
8946 *first_code = UNEQ;
8947 *bypass_code = UNORDERED;
8948 break;
8949 case NE: /* NE - ZF=0 - fails on unordered */
8950 *first_code = LTGT;
8951 *second_code = UNORDERED;
8952 break;
8953 case UNGE: /* GEU - CF=0 - fails on unordered */
8954 *first_code = GE;
8955 *second_code = UNORDERED;
8956 break;
8957 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8958 *first_code = GT;
8959 *second_code = UNORDERED;
8960 break;
8961 default:
8962 abort ();
8963 }
8964 if (!TARGET_IEEE_FP)
8965 {
8966 *second_code = NIL;
8967 *bypass_code = NIL;
8968 }
8969}
8970
9e7adcb3 8971/* Return cost of comparison done fcom + arithmetics operations on AX.
5bdc5878 8972 All following functions do use number of instructions as a cost metrics.
9e7adcb3
JH
8973 In future this should be tweaked to compute bytes for optimize_size and
8974 take into account performance of various instructions on various CPUs. */
8975static int
b96a374d 8976ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
9e7adcb3
JH
8977{
8978 if (!TARGET_IEEE_FP)
8979 return 4;
8980 /* The cost of code output by ix86_expand_fp_compare. */
8981 switch (code)
8982 {
8983 case UNLE:
8984 case UNLT:
8985 case LTGT:
8986 case GT:
8987 case GE:
8988 case UNORDERED:
8989 case ORDERED:
8990 case UNEQ:
8991 return 4;
8992 break;
8993 case LT:
8994 case NE:
8995 case EQ:
8996 case UNGE:
8997 return 5;
8998 break;
8999 case LE:
9000 case UNGT:
9001 return 6;
9002 break;
9003 default:
9004 abort ();
9005 }
9006}
9007
9008/* Return cost of comparison done using fcomi operation.
9009 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9010static int
b96a374d 9011ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9e7adcb3
JH
9012{
9013 enum rtx_code bypass_code, first_code, second_code;
d1f87653 9014 /* Return arbitrarily high cost when instruction is not supported - this
9e7adcb3
JH
9015 prevents gcc from using it. */
9016 if (!TARGET_CMOVE)
9017 return 1024;
9018 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9019 return (bypass_code != NIL || second_code != NIL) + 2;
9020}
9021
9022/* Return cost of comparison done using sahf operation.
9023 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9024static int
b96a374d 9025ix86_fp_comparison_sahf_cost (enum rtx_code code)
9e7adcb3
JH
9026{
9027 enum rtx_code bypass_code, first_code, second_code;
d1f87653 9028 /* Return arbitrarily high cost when instruction is not preferred - this
9e7adcb3
JH
9029 avoids gcc from using it. */
9030 if (!TARGET_USE_SAHF && !optimize_size)
9031 return 1024;
9032 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9033 return (bypass_code != NIL || second_code != NIL) + 3;
9034}
9035
9036/* Compute cost of the comparison done using any method.
9037 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9038static int
b96a374d 9039ix86_fp_comparison_cost (enum rtx_code code)
9e7adcb3
JH
9040{
9041 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9042 int min;
9043
9044 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9045 sahf_cost = ix86_fp_comparison_sahf_cost (code);
9046
9047 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9048 if (min > sahf_cost)
9049 min = sahf_cost;
9050 if (min > fcomi_cost)
9051 min = fcomi_cost;
9052 return min;
9053}
c0c102a9 9054
3a3677ff
RH
9055/* Generate insn patterns to do a floating point compare of OPERANDS. */
9056
9e7adcb3 9057static rtx
b96a374d
AJ
9058ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
9059 rtx *second_test, rtx *bypass_test)
3a3677ff
RH
9060{
9061 enum machine_mode fpcmp_mode, intcmp_mode;
c0c102a9 9062 rtx tmp, tmp2;
9e7adcb3 9063 int cost = ix86_fp_comparison_cost (code);
c0c102a9 9064 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
9065
9066 fpcmp_mode = ix86_fp_compare_mode (code);
9067 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9068
9e7adcb3
JH
9069 if (second_test)
9070 *second_test = NULL_RTX;
9071 if (bypass_test)
9072 *bypass_test = NULL_RTX;
9073
c0c102a9
JH
9074 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9075
9e7adcb3
JH
9076 /* Do fcomi/sahf based test when profitable. */
9077 if ((bypass_code == NIL || bypass_test)
9078 && (second_code == NIL || second_test)
9079 && ix86_fp_comparison_arithmetics_cost (code) > cost)
32b5b1aa 9080 {
c0c102a9
JH
9081 if (TARGET_CMOVE)
9082 {
9083 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9084 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9085 tmp);
9086 emit_insn (tmp);
9087 }
9088 else
9089 {
9090 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 9091 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
9092 if (!scratch)
9093 scratch = gen_reg_rtx (HImode);
c0c102a9
JH
9094 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9095 emit_insn (gen_x86_sahf_1 (scratch));
9096 }
e075ae69
RH
9097
9098 /* The FP codes work out to act like unsigned. */
9a915772 9099 intcmp_mode = fpcmp_mode;
9e7adcb3
JH
9100 code = first_code;
9101 if (bypass_code != NIL)
9102 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9103 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9104 const0_rtx);
9105 if (second_code != NIL)
9106 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9107 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9108 const0_rtx);
e075ae69
RH
9109 }
9110 else
9111 {
9112 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
e075ae69 9113 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 9114 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
9115 if (!scratch)
9116 scratch = gen_reg_rtx (HImode);
3a3677ff 9117 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
e075ae69 9118
9a915772
JH
9119 /* In the unordered case, we have to check C2 for NaN's, which
9120 doesn't happen to work out to anything nice combination-wise.
9121 So do some bit twiddling on the value we've got in AH to come
9122 up with an appropriate set of condition codes. */
e075ae69 9123
9a915772
JH
9124 intcmp_mode = CCNOmode;
9125 switch (code)
32b5b1aa 9126 {
9a915772
JH
9127 case GT:
9128 case UNGT:
9129 if (code == GT || !TARGET_IEEE_FP)
32b5b1aa 9130 {
3a3677ff 9131 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
e075ae69 9132 code = EQ;
9a915772
JH
9133 }
9134 else
9135 {
9136 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9137 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9138 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9139 intcmp_mode = CCmode;
9140 code = GEU;
9141 }
9142 break;
9143 case LT:
9144 case UNLT:
9145 if (code == LT && TARGET_IEEE_FP)
9146 {
3a3677ff
RH
9147 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9148 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
e075ae69
RH
9149 intcmp_mode = CCmode;
9150 code = EQ;
9a915772
JH
9151 }
9152 else
9153 {
9154 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9155 code = NE;
9156 }
9157 break;
9158 case GE:
9159 case UNGE:
9160 if (code == GE || !TARGET_IEEE_FP)
9161 {
3a3677ff 9162 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
e075ae69 9163 code = EQ;
9a915772
JH
9164 }
9165 else
9166 {
9167 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9168 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9169 GEN_INT (0x01)));
9170 code = NE;
9171 }
9172 break;
9173 case LE:
9174 case UNLE:
9175 if (code == LE && TARGET_IEEE_FP)
9176 {
3a3677ff
RH
9177 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9178 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9179 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
9180 intcmp_mode = CCmode;
9181 code = LTU;
9a915772
JH
9182 }
9183 else
9184 {
9185 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9186 code = NE;
9187 }
9188 break;
9189 case EQ:
9190 case UNEQ:
9191 if (code == EQ && TARGET_IEEE_FP)
9192 {
3a3677ff
RH
9193 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9194 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
9195 intcmp_mode = CCmode;
9196 code = EQ;
9a915772
JH
9197 }
9198 else
9199 {
3a3677ff
RH
9200 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9201 code = NE;
9202 break;
9a915772
JH
9203 }
9204 break;
9205 case NE:
9206 case LTGT:
9207 if (code == NE && TARGET_IEEE_FP)
9208 {
3a3677ff 9209 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9a915772
JH
9210 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9211 GEN_INT (0x40)));
3a3677ff 9212 code = NE;
9a915772
JH
9213 }
9214 else
9215 {
3a3677ff
RH
9216 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9217 code = EQ;
32b5b1aa 9218 }
9a915772
JH
9219 break;
9220
9221 case UNORDERED:
9222 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9223 code = NE;
9224 break;
9225 case ORDERED:
9226 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9227 code = EQ;
9228 break;
9229
9230 default:
9231 abort ();
32b5b1aa 9232 }
32b5b1aa 9233 }
e075ae69
RH
9234
9235 /* Return the test that should be put into the flags user, i.e.
9236 the bcc, scc, or cmov instruction. */
9237 return gen_rtx_fmt_ee (code, VOIDmode,
9238 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9239 const0_rtx);
9240}
9241
9e3e266c 9242rtx
b96a374d 9243ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
e075ae69
RH
9244{
9245 rtx op0, op1, ret;
9246 op0 = ix86_compare_op0;
9247 op1 = ix86_compare_op1;
9248
a1b8572c
JH
9249 if (second_test)
9250 *second_test = NULL_RTX;
9251 if (bypass_test)
9252 *bypass_test = NULL_RTX;
9253
e075ae69 9254 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
bf71a4f8 9255 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
77ebd435 9256 second_test, bypass_test);
32b5b1aa 9257 else
e075ae69
RH
9258 ret = ix86_expand_int_compare (code, op0, op1);
9259
9260 return ret;
9261}
9262
03598dea
JH
9263/* Return true if the CODE will result in nontrivial jump sequence. */
9264bool
b96a374d 9265ix86_fp_jump_nontrivial_p (enum rtx_code code)
03598dea
JH
9266{
9267 enum rtx_code bypass_code, first_code, second_code;
9268 if (!TARGET_CMOVE)
9269 return true;
9270 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9271 return bypass_code != NIL || second_code != NIL;
9272}
9273
e075ae69 9274void
b96a374d 9275ix86_expand_branch (enum rtx_code code, rtx label)
e075ae69 9276{
3a3677ff 9277 rtx tmp;
e075ae69 9278
3a3677ff 9279 switch (GET_MODE (ix86_compare_op0))
32b5b1aa 9280 {
3a3677ff
RH
9281 case QImode:
9282 case HImode:
9283 case SImode:
0d7d98ee 9284 simple:
a1b8572c 9285 tmp = ix86_expand_compare (code, NULL, NULL);
e075ae69
RH
9286 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9287 gen_rtx_LABEL_REF (VOIDmode, label),
9288 pc_rtx);
9289 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32b5b1aa 9290 return;
e075ae69 9291
3a3677ff
RH
9292 case SFmode:
9293 case DFmode:
0f290768 9294 case XFmode:
3a3677ff
RH
9295 {
9296 rtvec vec;
9297 int use_fcomi;
03598dea 9298 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
9299
9300 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9301 &ix86_compare_op1);
fce5a9f2 9302
03598dea
JH
9303 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9304
9305 /* Check whether we will use the natural sequence with one jump. If
9306 so, we can expand jump early. Otherwise delay expansion by
9307 creating compound insn to not confuse optimizers. */
9308 if (bypass_code == NIL && second_code == NIL
9309 && TARGET_CMOVE)
9310 {
9311 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9312 gen_rtx_LABEL_REF (VOIDmode, label),
9313 pc_rtx, NULL_RTX);
9314 }
9315 else
9316 {
9317 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9318 ix86_compare_op0, ix86_compare_op1);
9319 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9320 gen_rtx_LABEL_REF (VOIDmode, label),
9321 pc_rtx);
9322 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9323
9324 use_fcomi = ix86_use_fcomi_compare (code);
9325 vec = rtvec_alloc (3 + !use_fcomi);
9326 RTVEC_ELT (vec, 0) = tmp;
9327 RTVEC_ELT (vec, 1)
9328 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9329 RTVEC_ELT (vec, 2)
9330 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9331 if (! use_fcomi)
9332 RTVEC_ELT (vec, 3)
9333 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9334
9335 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9336 }
3a3677ff
RH
9337 return;
9338 }
32b5b1aa 9339
3a3677ff 9340 case DImode:
0d7d98ee
JH
9341 if (TARGET_64BIT)
9342 goto simple;
3a3677ff
RH
9343 /* Expand DImode branch into multiple compare+branch. */
9344 {
9345 rtx lo[2], hi[2], label2;
9346 enum rtx_code code1, code2, code3;
32b5b1aa 9347
3a3677ff
RH
9348 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9349 {
9350 tmp = ix86_compare_op0;
9351 ix86_compare_op0 = ix86_compare_op1;
9352 ix86_compare_op1 = tmp;
9353 code = swap_condition (code);
9354 }
9355 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9356 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
32b5b1aa 9357
3a3677ff
RH
9358 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9359 avoid two branches. This costs one extra insn, so disable when
9360 optimizing for size. */
32b5b1aa 9361
3a3677ff
RH
9362 if ((code == EQ || code == NE)
9363 && (!optimize_size
9364 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9365 {
9366 rtx xor0, xor1;
32b5b1aa 9367
3a3677ff
RH
9368 xor1 = hi[0];
9369 if (hi[1] != const0_rtx)
9370 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9371 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 9372
3a3677ff
RH
9373 xor0 = lo[0];
9374 if (lo[1] != const0_rtx)
9375 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9376 NULL_RTX, 0, OPTAB_WIDEN);
e075ae69 9377
3a3677ff
RH
9378 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9379 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 9380
3a3677ff
RH
9381 ix86_compare_op0 = tmp;
9382 ix86_compare_op1 = const0_rtx;
9383 ix86_expand_branch (code, label);
9384 return;
9385 }
e075ae69 9386
1f9124e4
JJ
9387 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9388 op1 is a constant and the low word is zero, then we can just
9389 examine the high word. */
32b5b1aa 9390
1f9124e4
JJ
9391 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9392 switch (code)
9393 {
9394 case LT: case LTU: case GE: case GEU:
9395 ix86_compare_op0 = hi[0];
9396 ix86_compare_op1 = hi[1];
9397 ix86_expand_branch (code, label);
9398 return;
9399 default:
9400 break;
9401 }
e075ae69 9402
3a3677ff 9403 /* Otherwise, we need two or three jumps. */
e075ae69 9404
3a3677ff 9405 label2 = gen_label_rtx ();
e075ae69 9406
3a3677ff
RH
9407 code1 = code;
9408 code2 = swap_condition (code);
9409 code3 = unsigned_condition (code);
e075ae69 9410
3a3677ff
RH
9411 switch (code)
9412 {
9413 case LT: case GT: case LTU: case GTU:
9414 break;
e075ae69 9415
3a3677ff
RH
9416 case LE: code1 = LT; code2 = GT; break;
9417 case GE: code1 = GT; code2 = LT; break;
9418 case LEU: code1 = LTU; code2 = GTU; break;
9419 case GEU: code1 = GTU; code2 = LTU; break;
e075ae69 9420
3a3677ff
RH
9421 case EQ: code1 = NIL; code2 = NE; break;
9422 case NE: code2 = NIL; break;
e075ae69 9423
3a3677ff
RH
9424 default:
9425 abort ();
9426 }
e075ae69 9427
3a3677ff
RH
9428 /*
9429 * a < b =>
9430 * if (hi(a) < hi(b)) goto true;
9431 * if (hi(a) > hi(b)) goto false;
9432 * if (lo(a) < lo(b)) goto true;
9433 * false:
9434 */
9435
9436 ix86_compare_op0 = hi[0];
9437 ix86_compare_op1 = hi[1];
9438
9439 if (code1 != NIL)
9440 ix86_expand_branch (code1, label);
9441 if (code2 != NIL)
9442 ix86_expand_branch (code2, label2);
9443
9444 ix86_compare_op0 = lo[0];
9445 ix86_compare_op1 = lo[1];
9446 ix86_expand_branch (code3, label);
9447
9448 if (code2 != NIL)
9449 emit_label (label2);
9450 return;
9451 }
e075ae69 9452
3a3677ff
RH
9453 default:
9454 abort ();
9455 }
32b5b1aa 9456}
e075ae69 9457
9e7adcb3
JH
9458/* Split branch based on floating point condition. */
9459void
b96a374d
AJ
9460ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9461 rtx target1, rtx target2, rtx tmp)
9e7adcb3
JH
9462{
9463 rtx second, bypass;
9464 rtx label = NULL_RTX;
03598dea 9465 rtx condition;
6b24c259
JH
9466 int bypass_probability = -1, second_probability = -1, probability = -1;
9467 rtx i;
9e7adcb3
JH
9468
9469 if (target2 != pc_rtx)
9470 {
9471 rtx tmp = target2;
9472 code = reverse_condition_maybe_unordered (code);
9473 target2 = target1;
9474 target1 = tmp;
9475 }
9476
9477 condition = ix86_expand_fp_compare (code, op1, op2,
9478 tmp, &second, &bypass);
6b24c259
JH
9479
9480 if (split_branch_probability >= 0)
9481 {
9482 /* Distribute the probabilities across the jumps.
9483 Assume the BYPASS and SECOND to be always test
9484 for UNORDERED. */
9485 probability = split_branch_probability;
9486
d6a7951f 9487 /* Value of 1 is low enough to make no need for probability
6b24c259
JH
9488 to be updated. Later we may run some experiments and see
9489 if unordered values are more frequent in practice. */
9490 if (bypass)
9491 bypass_probability = 1;
9492 if (second)
9493 second_probability = 1;
9494 }
9e7adcb3
JH
9495 if (bypass != NULL_RTX)
9496 {
9497 label = gen_label_rtx ();
6b24c259
JH
9498 i = emit_jump_insn (gen_rtx_SET
9499 (VOIDmode, pc_rtx,
9500 gen_rtx_IF_THEN_ELSE (VOIDmode,
9501 bypass,
9502 gen_rtx_LABEL_REF (VOIDmode,
9503 label),
9504 pc_rtx)));
9505 if (bypass_probability >= 0)
9506 REG_NOTES (i)
9507 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9508 GEN_INT (bypass_probability),
9509 REG_NOTES (i));
9510 }
9511 i = emit_jump_insn (gen_rtx_SET
9e7adcb3
JH
9512 (VOIDmode, pc_rtx,
9513 gen_rtx_IF_THEN_ELSE (VOIDmode,
6b24c259
JH
9514 condition, target1, target2)));
9515 if (probability >= 0)
9516 REG_NOTES (i)
9517 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9518 GEN_INT (probability),
9519 REG_NOTES (i));
9520 if (second != NULL_RTX)
9e7adcb3 9521 {
6b24c259
JH
9522 i = emit_jump_insn (gen_rtx_SET
9523 (VOIDmode, pc_rtx,
9524 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9525 target2)));
9526 if (second_probability >= 0)
9527 REG_NOTES (i)
9528 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9529 GEN_INT (second_probability),
9530 REG_NOTES (i));
9e7adcb3 9531 }
9e7adcb3
JH
9532 if (label != NULL_RTX)
9533 emit_label (label);
9534}
9535
32b5b1aa 9536int
b96a374d 9537ix86_expand_setcc (enum rtx_code code, rtx dest)
32b5b1aa 9538{
3a627503 9539 rtx ret, tmp, tmpreg, equiv;
a1b8572c 9540 rtx second_test, bypass_test;
e075ae69 9541
885a70fd
JH
9542 if (GET_MODE (ix86_compare_op0) == DImode
9543 && !TARGET_64BIT)
e075ae69
RH
9544 return 0; /* FAIL */
9545
b932f770
JH
9546 if (GET_MODE (dest) != QImode)
9547 abort ();
e075ae69 9548
a1b8572c 9549 ret = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
9550 PUT_MODE (ret, QImode);
9551
9552 tmp = dest;
a1b8572c 9553 tmpreg = dest;
32b5b1aa 9554
e075ae69 9555 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
a1b8572c
JH
9556 if (bypass_test || second_test)
9557 {
9558 rtx test = second_test;
9559 int bypass = 0;
9560 rtx tmp2 = gen_reg_rtx (QImode);
9561 if (bypass_test)
9562 {
9563 if (second_test)
b531087a 9564 abort ();
a1b8572c
JH
9565 test = bypass_test;
9566 bypass = 1;
9567 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9568 }
9569 PUT_MODE (test, QImode);
9570 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9571
9572 if (bypass)
9573 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9574 else
9575 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9576 }
e075ae69 9577
3a627503
RS
9578 /* Attach a REG_EQUAL note describing the comparison result. */
9579 equiv = simplify_gen_relational (code, QImode,
9580 GET_MODE (ix86_compare_op0),
9581 ix86_compare_op0, ix86_compare_op1);
9582 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9583
e075ae69 9584 return 1; /* DONE */
32b5b1aa 9585}
e075ae69 9586
c35d187f
RH
9587/* Expand comparison setting or clearing carry flag. Return true when
9588 successful and set pop for the operation. */
9589static bool
b96a374d 9590ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
4977bab6
ZW
9591{
9592 enum machine_mode mode =
9593 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9594
9595 /* Do not handle DImode compares that go trought special path. Also we can't
43f3a59d 9596 deal with FP compares yet. This is possible to add. */
e6e81735
JH
9597 if ((mode == DImode && !TARGET_64BIT))
9598 return false;
9599 if (FLOAT_MODE_P (mode))
9600 {
9601 rtx second_test = NULL, bypass_test = NULL;
9602 rtx compare_op, compare_seq;
9603
9604 /* Shortcut: following common codes never translate into carry flag compares. */
9605 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9606 || code == ORDERED || code == UNORDERED)
9607 return false;
9608
9609 /* These comparisons require zero flag; swap operands so they won't. */
9610 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9611 && !TARGET_IEEE_FP)
9612 {
9613 rtx tmp = op0;
9614 op0 = op1;
9615 op1 = tmp;
9616 code = swap_condition (code);
9617 }
9618
c51e6d85
KH
9619 /* Try to expand the comparison and verify that we end up with carry flag
9620 based comparison. This is fails to be true only when we decide to expand
9621 comparison using arithmetic that is not too common scenario. */
e6e81735
JH
9622 start_sequence ();
9623 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9624 &second_test, &bypass_test);
9625 compare_seq = get_insns ();
9626 end_sequence ();
9627
9628 if (second_test || bypass_test)
9629 return false;
9630 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9631 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9632 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9633 else
9634 code = GET_CODE (compare_op);
9635 if (code != LTU && code != GEU)
9636 return false;
9637 emit_insn (compare_seq);
9638 *pop = compare_op;
9639 return true;
9640 }
9641 if (!INTEGRAL_MODE_P (mode))
4977bab6
ZW
9642 return false;
9643 switch (code)
9644 {
9645 case LTU:
9646 case GEU:
9647 break;
9648
9649 /* Convert a==0 into (unsigned)a<1. */
9650 case EQ:
9651 case NE:
9652 if (op1 != const0_rtx)
9653 return false;
9654 op1 = const1_rtx;
9655 code = (code == EQ ? LTU : GEU);
9656 break;
9657
9658 /* Convert a>b into b<a or a>=b-1. */
9659 case GTU:
9660 case LEU:
9661 if (GET_CODE (op1) == CONST_INT)
9662 {
9663 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9664 /* Bail out on overflow. We still can swap operands but that
43f3a59d 9665 would force loading of the constant into register. */
4977bab6
ZW
9666 if (op1 == const0_rtx
9667 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9668 return false;
9669 code = (code == GTU ? GEU : LTU);
9670 }
9671 else
9672 {
9673 rtx tmp = op1;
9674 op1 = op0;
9675 op0 = tmp;
9676 code = (code == GTU ? LTU : GEU);
9677 }
9678 break;
9679
ccea753c 9680 /* Convert a>=0 into (unsigned)a<0x80000000. */
4977bab6
ZW
9681 case LT:
9682 case GE:
9683 if (mode == DImode || op1 != const0_rtx)
9684 return false;
ccea753c 9685 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
4977bab6
ZW
9686 code = (code == LT ? GEU : LTU);
9687 break;
9688 case LE:
9689 case GT:
9690 if (mode == DImode || op1 != constm1_rtx)
9691 return false;
ccea753c 9692 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
4977bab6
ZW
9693 code = (code == LE ? GEU : LTU);
9694 break;
9695
9696 default:
9697 return false;
9698 }
ebe75517
JH
9699 /* Swapping operands may cause constant to appear as first operand. */
9700 if (!nonimmediate_operand (op0, VOIDmode))
9701 {
9702 if (no_new_pseudos)
9703 return false;
9704 op0 = force_reg (mode, op0);
9705 }
4977bab6
ZW
9706 ix86_compare_op0 = op0;
9707 ix86_compare_op1 = op1;
9708 *pop = ix86_expand_compare (code, NULL, NULL);
9709 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9710 abort ();
9711 return true;
9712}
9713
32b5b1aa 9714int
b96a374d 9715ix86_expand_int_movcc (rtx operands[])
32b5b1aa 9716{
e075ae69
RH
9717 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9718 rtx compare_seq, compare_op;
a1b8572c 9719 rtx second_test, bypass_test;
635559ab 9720 enum machine_mode mode = GET_MODE (operands[0]);
4977bab6 9721 bool sign_bit_compare_p = false;;
3a3677ff 9722
e075ae69 9723 start_sequence ();
a1b8572c 9724 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
2f937369 9725 compare_seq = get_insns ();
e075ae69
RH
9726 end_sequence ();
9727
9728 compare_code = GET_CODE (compare_op);
9729
4977bab6
ZW
9730 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9731 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9732 sign_bit_compare_p = true;
9733
e075ae69
RH
9734 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9735 HImode insns, we'd be swallowed in word prefix ops. */
9736
4977bab6 9737 if ((mode != HImode || TARGET_FAST_PREFIX)
635559ab 9738 && (mode != DImode || TARGET_64BIT)
0f290768 9739 && GET_CODE (operands[2]) == CONST_INT
e075ae69
RH
9740 && GET_CODE (operands[3]) == CONST_INT)
9741 {
9742 rtx out = operands[0];
9743 HOST_WIDE_INT ct = INTVAL (operands[2]);
9744 HOST_WIDE_INT cf = INTVAL (operands[3]);
9745 HOST_WIDE_INT diff;
9746
4977bab6
ZW
9747 diff = ct - cf;
9748 /* Sign bit compares are better done using shifts than we do by using
b96a374d 9749 sbb. */
4977bab6
ZW
9750 if (sign_bit_compare_p
9751 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9752 ix86_compare_op1, &compare_op))
e075ae69 9753 {
e075ae69
RH
9754 /* Detect overlap between destination and compare sources. */
9755 rtx tmp = out;
9756
4977bab6 9757 if (!sign_bit_compare_p)
36583fea 9758 {
e6e81735
JH
9759 bool fpcmp = false;
9760
4977bab6
ZW
9761 compare_code = GET_CODE (compare_op);
9762
e6e81735
JH
9763 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9764 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9765 {
9766 fpcmp = true;
9767 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9768 }
9769
4977bab6
ZW
9770 /* To simplify rest of code, restrict to the GEU case. */
9771 if (compare_code == LTU)
9772 {
9773 HOST_WIDE_INT tmp = ct;
9774 ct = cf;
9775 cf = tmp;
9776 compare_code = reverse_condition (compare_code);
9777 code = reverse_condition (code);
9778 }
e6e81735
JH
9779 else
9780 {
9781 if (fpcmp)
9782 PUT_CODE (compare_op,
9783 reverse_condition_maybe_unordered
9784 (GET_CODE (compare_op)));
9785 else
9786 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9787 }
4977bab6 9788 diff = ct - cf;
36583fea 9789
4977bab6
ZW
9790 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9791 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9792 tmp = gen_reg_rtx (mode);
e075ae69 9793
4977bab6 9794 if (mode == DImode)
e6e81735 9795 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
4977bab6 9796 else
e6e81735 9797 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
4977bab6 9798 }
14f73b5a 9799 else
4977bab6
ZW
9800 {
9801 if (code == GT || code == GE)
9802 code = reverse_condition (code);
9803 else
9804 {
9805 HOST_WIDE_INT tmp = ct;
9806 ct = cf;
9807 cf = tmp;
5fb48685 9808 diff = ct - cf;
4977bab6
ZW
9809 }
9810 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9811 ix86_compare_op1, VOIDmode, 0, -1);
9812 }
e075ae69 9813
36583fea
JH
9814 if (diff == 1)
9815 {
9816 /*
9817 * cmpl op0,op1
9818 * sbbl dest,dest
9819 * [addl dest, ct]
9820 *
9821 * Size 5 - 8.
9822 */
9823 if (ct)
b96a374d 9824 tmp = expand_simple_binop (mode, PLUS,
635559ab 9825 tmp, GEN_INT (ct),
4977bab6 9826 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
9827 }
9828 else if (cf == -1)
9829 {
9830 /*
9831 * cmpl op0,op1
9832 * sbbl dest,dest
9833 * orl $ct, dest
9834 *
9835 * Size 8.
9836 */
635559ab
JH
9837 tmp = expand_simple_binop (mode, IOR,
9838 tmp, GEN_INT (ct),
4977bab6 9839 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
9840 }
9841 else if (diff == -1 && ct)
9842 {
9843 /*
9844 * cmpl op0,op1
9845 * sbbl dest,dest
06ec023f 9846 * notl dest
36583fea
JH
9847 * [addl dest, cf]
9848 *
9849 * Size 8 - 11.
9850 */
4977bab6 9851 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
635559ab 9852 if (cf)
b96a374d 9853 tmp = expand_simple_binop (mode, PLUS,
4977bab6
ZW
9854 copy_rtx (tmp), GEN_INT (cf),
9855 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
9856 }
9857 else
9858 {
9859 /*
9860 * cmpl op0,op1
9861 * sbbl dest,dest
06ec023f 9862 * [notl dest]
36583fea
JH
9863 * andl cf - ct, dest
9864 * [addl dest, ct]
9865 *
9866 * Size 8 - 11.
9867 */
06ec023f
RB
9868
9869 if (cf == 0)
9870 {
9871 cf = ct;
9872 ct = 0;
4977bab6 9873 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
06ec023f
RB
9874 }
9875
635559ab 9876 tmp = expand_simple_binop (mode, AND,
4977bab6 9877 copy_rtx (tmp),
d8bf17f9 9878 gen_int_mode (cf - ct, mode),
4977bab6 9879 copy_rtx (tmp), 1, OPTAB_DIRECT);
635559ab 9880 if (ct)
b96a374d 9881 tmp = expand_simple_binop (mode, PLUS,
4977bab6
ZW
9882 copy_rtx (tmp), GEN_INT (ct),
9883 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea 9884 }
e075ae69 9885
4977bab6
ZW
9886 if (!rtx_equal_p (tmp, out))
9887 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
e075ae69
RH
9888
9889 return 1; /* DONE */
9890 }
9891
e075ae69
RH
9892 if (diff < 0)
9893 {
9894 HOST_WIDE_INT tmp;
9895 tmp = ct, ct = cf, cf = tmp;
9896 diff = -diff;
734dba19
JH
9897 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9898 {
9899 /* We may be reversing unordered compare to normal compare, that
9900 is not valid in general (we may convert non-trapping condition
9901 to trapping one), however on i386 we currently emit all
9902 comparisons unordered. */
9903 compare_code = reverse_condition_maybe_unordered (compare_code);
9904 code = reverse_condition_maybe_unordered (code);
9905 }
9906 else
9907 {
9908 compare_code = reverse_condition (compare_code);
9909 code = reverse_condition (code);
9910 }
e075ae69 9911 }
0f2a3457
JJ
9912
9913 compare_code = NIL;
9914 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9915 && GET_CODE (ix86_compare_op1) == CONST_INT)
9916 {
9917 if (ix86_compare_op1 == const0_rtx
9918 && (code == LT || code == GE))
9919 compare_code = code;
9920 else if (ix86_compare_op1 == constm1_rtx)
9921 {
9922 if (code == LE)
9923 compare_code = LT;
9924 else if (code == GT)
9925 compare_code = GE;
9926 }
9927 }
9928
9929 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9930 if (compare_code != NIL
9931 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9932 && (cf == -1 || ct == -1))
9933 {
9934 /* If lea code below could be used, only optimize
9935 if it results in a 2 insn sequence. */
9936
9937 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9938 || diff == 3 || diff == 5 || diff == 9)
9939 || (compare_code == LT && ct == -1)
9940 || (compare_code == GE && cf == -1))
9941 {
9942 /*
9943 * notl op1 (if necessary)
9944 * sarl $31, op1
9945 * orl cf, op1
9946 */
9947 if (ct != -1)
9948 {
9949 cf = ct;
b96a374d 9950 ct = -1;
0f2a3457
JJ
9951 code = reverse_condition (code);
9952 }
9953
9954 out = emit_store_flag (out, code, ix86_compare_op0,
9955 ix86_compare_op1, VOIDmode, 0, -1);
9956
9957 out = expand_simple_binop (mode, IOR,
9958 out, GEN_INT (cf),
9959 out, 1, OPTAB_DIRECT);
9960 if (out != operands[0])
9961 emit_move_insn (operands[0], out);
9962
9963 return 1; /* DONE */
9964 }
9965 }
9966
4977bab6 9967
635559ab
JH
9968 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9969 || diff == 3 || diff == 5 || diff == 9)
4977bab6 9970 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
c05dbe81 9971 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
e075ae69
RH
9972 {
9973 /*
9974 * xorl dest,dest
9975 * cmpl op1,op2
9976 * setcc dest
9977 * lea cf(dest*(ct-cf)),dest
9978 *
9979 * Size 14.
9980 *
9981 * This also catches the degenerate setcc-only case.
9982 */
9983
9984 rtx tmp;
9985 int nops;
9986
9987 out = emit_store_flag (out, code, ix86_compare_op0,
9988 ix86_compare_op1, VOIDmode, 0, 1);
9989
9990 nops = 0;
97f51ac4
RB
9991 /* On x86_64 the lea instruction operates on Pmode, so we need
9992 to get arithmetics done in proper mode to match. */
e075ae69 9993 if (diff == 1)
068f5dea 9994 tmp = copy_rtx (out);
e075ae69
RH
9995 else
9996 {
885a70fd 9997 rtx out1;
068f5dea 9998 out1 = copy_rtx (out);
635559ab 9999 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
e075ae69
RH
10000 nops++;
10001 if (diff & 1)
10002 {
635559ab 10003 tmp = gen_rtx_PLUS (mode, tmp, out1);
e075ae69
RH
10004 nops++;
10005 }
10006 }
10007 if (cf != 0)
10008 {
635559ab 10009 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
e075ae69
RH
10010 nops++;
10011 }
4977bab6 10012 if (!rtx_equal_p (tmp, out))
e075ae69 10013 {
14f73b5a 10014 if (nops == 1)
a5cf80f0 10015 out = force_operand (tmp, copy_rtx (out));
e075ae69 10016 else
4977bab6 10017 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
e075ae69 10018 }
4977bab6 10019 if (!rtx_equal_p (out, operands[0]))
1985ef90 10020 emit_move_insn (operands[0], copy_rtx (out));
e075ae69
RH
10021
10022 return 1; /* DONE */
10023 }
10024
10025 /*
10026 * General case: Jumpful:
10027 * xorl dest,dest cmpl op1, op2
10028 * cmpl op1, op2 movl ct, dest
10029 * setcc dest jcc 1f
10030 * decl dest movl cf, dest
10031 * andl (cf-ct),dest 1:
10032 * addl ct,dest
0f290768 10033 *
e075ae69
RH
10034 * Size 20. Size 14.
10035 *
10036 * This is reasonably steep, but branch mispredict costs are
10037 * high on modern cpus, so consider failing only if optimizing
10038 * for space.
e075ae69
RH
10039 */
10040
4977bab6
ZW
10041 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10042 && BRANCH_COST >= 2)
e075ae69 10043 {
97f51ac4 10044 if (cf == 0)
e075ae69 10045 {
97f51ac4
RB
10046 cf = ct;
10047 ct = 0;
734dba19 10048 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
0f2a3457
JJ
10049 /* We may be reversing unordered compare to normal compare,
10050 that is not valid in general (we may convert non-trapping
10051 condition to trapping one), however on i386 we currently
10052 emit all comparisons unordered. */
10053 code = reverse_condition_maybe_unordered (code);
10054 else
10055 {
10056 code = reverse_condition (code);
10057 if (compare_code != NIL)
10058 compare_code = reverse_condition (compare_code);
10059 }
10060 }
10061
10062 if (compare_code != NIL)
10063 {
10064 /* notl op1 (if needed)
10065 sarl $31, op1
10066 andl (cf-ct), op1
b96a374d 10067 addl ct, op1
0f2a3457
JJ
10068
10069 For x < 0 (resp. x <= -1) there will be no notl,
10070 so if possible swap the constants to get rid of the
10071 complement.
10072 True/false will be -1/0 while code below (store flag
10073 followed by decrement) is 0/-1, so the constants need
10074 to be exchanged once more. */
10075
10076 if (compare_code == GE || !cf)
734dba19 10077 {
b96a374d 10078 code = reverse_condition (code);
0f2a3457 10079 compare_code = LT;
734dba19
JH
10080 }
10081 else
10082 {
0f2a3457 10083 HOST_WIDE_INT tmp = cf;
b96a374d 10084 cf = ct;
0f2a3457 10085 ct = tmp;
734dba19 10086 }
0f2a3457
JJ
10087
10088 out = emit_store_flag (out, code, ix86_compare_op0,
10089 ix86_compare_op1, VOIDmode, 0, -1);
e075ae69 10090 }
0f2a3457
JJ
10091 else
10092 {
10093 out = emit_store_flag (out, code, ix86_compare_op0,
10094 ix86_compare_op1, VOIDmode, 0, 1);
e075ae69 10095
4977bab6
ZW
10096 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10097 copy_rtx (out), 1, OPTAB_DIRECT);
0f2a3457 10098 }
e075ae69 10099
4977bab6 10100 out = expand_simple_binop (mode, AND, copy_rtx (out),
d8bf17f9 10101 gen_int_mode (cf - ct, mode),
4977bab6 10102 copy_rtx (out), 1, OPTAB_DIRECT);
97f51ac4 10103 if (ct)
4977bab6
ZW
10104 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10105 copy_rtx (out), 1, OPTAB_DIRECT);
10106 if (!rtx_equal_p (out, operands[0]))
10107 emit_move_insn (operands[0], copy_rtx (out));
e075ae69
RH
10108
10109 return 1; /* DONE */
10110 }
10111 }
10112
4977bab6 10113 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
e075ae69
RH
10114 {
10115 /* Try a few things more with specific constants and a variable. */
10116
78a0d70c 10117 optab op;
e075ae69
RH
10118 rtx var, orig_out, out, tmp;
10119
4977bab6 10120 if (BRANCH_COST <= 2)
e075ae69
RH
10121 return 0; /* FAIL */
10122
0f290768 10123 /* If one of the two operands is an interesting constant, load a
e075ae69 10124 constant with the above and mask it in with a logical operation. */
0f290768 10125
e075ae69
RH
10126 if (GET_CODE (operands[2]) == CONST_INT)
10127 {
10128 var = operands[3];
4977bab6 10129 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
e075ae69 10130 operands[3] = constm1_rtx, op = and_optab;
4977bab6 10131 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
e075ae69 10132 operands[3] = const0_rtx, op = ior_optab;
78a0d70c
ZW
10133 else
10134 return 0; /* FAIL */
e075ae69
RH
10135 }
10136 else if (GET_CODE (operands[3]) == CONST_INT)
10137 {
10138 var = operands[2];
4977bab6 10139 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
e075ae69 10140 operands[2] = constm1_rtx, op = and_optab;
4977bab6 10141 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
e075ae69 10142 operands[2] = const0_rtx, op = ior_optab;
78a0d70c
ZW
10143 else
10144 return 0; /* FAIL */
e075ae69 10145 }
78a0d70c 10146 else
e075ae69
RH
10147 return 0; /* FAIL */
10148
10149 orig_out = operands[0];
635559ab 10150 tmp = gen_reg_rtx (mode);
e075ae69
RH
10151 operands[0] = tmp;
10152
10153 /* Recurse to get the constant loaded. */
10154 if (ix86_expand_int_movcc (operands) == 0)
10155 return 0; /* FAIL */
10156
10157 /* Mask in the interesting variable. */
635559ab 10158 out = expand_binop (mode, op, var, tmp, orig_out, 0,
e075ae69 10159 OPTAB_WIDEN);
4977bab6
ZW
10160 if (!rtx_equal_p (out, orig_out))
10161 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
e075ae69
RH
10162
10163 return 1; /* DONE */
10164 }
10165
10166 /*
10167 * For comparison with above,
10168 *
10169 * movl cf,dest
10170 * movl ct,tmp
10171 * cmpl op1,op2
10172 * cmovcc tmp,dest
10173 *
10174 * Size 15.
10175 */
10176
635559ab
JH
10177 if (! nonimmediate_operand (operands[2], mode))
10178 operands[2] = force_reg (mode, operands[2]);
10179 if (! nonimmediate_operand (operands[3], mode))
10180 operands[3] = force_reg (mode, operands[3]);
e075ae69 10181
a1b8572c
JH
10182 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10183 {
635559ab 10184 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
10185 emit_move_insn (tmp, operands[3]);
10186 operands[3] = tmp;
10187 }
10188 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10189 {
635559ab 10190 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
10191 emit_move_insn (tmp, operands[2]);
10192 operands[2] = tmp;
10193 }
4977bab6 10194
c9682caf 10195 if (! register_operand (operands[2], VOIDmode)
b96a374d 10196 && (mode == QImode
4977bab6 10197 || ! register_operand (operands[3], VOIDmode)))
635559ab 10198 operands[2] = force_reg (mode, operands[2]);
a1b8572c 10199
4977bab6
ZW
10200 if (mode == QImode
10201 && ! register_operand (operands[3], VOIDmode))
10202 operands[3] = force_reg (mode, operands[3]);
10203
e075ae69
RH
10204 emit_insn (compare_seq);
10205 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 10206 gen_rtx_IF_THEN_ELSE (mode,
e075ae69
RH
10207 compare_op, operands[2],
10208 operands[3])));
a1b8572c 10209 if (bypass_test)
4977bab6 10210 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
635559ab 10211 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c 10212 bypass_test,
4977bab6
ZW
10213 copy_rtx (operands[3]),
10214 copy_rtx (operands[0]))));
a1b8572c 10215 if (second_test)
4977bab6 10216 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
635559ab 10217 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c 10218 second_test,
4977bab6
ZW
10219 copy_rtx (operands[2]),
10220 copy_rtx (operands[0]))));
e075ae69
RH
10221
10222 return 1; /* DONE */
e9a25f70 10223}
e075ae69 10224
32b5b1aa 10225int
b96a374d 10226ix86_expand_fp_movcc (rtx operands[])
32b5b1aa 10227{
e075ae69 10228 enum rtx_code code;
e075ae69 10229 rtx tmp;
a1b8572c 10230 rtx compare_op, second_test, bypass_test;
32b5b1aa 10231
0073023d
JH
10232 /* For SF/DFmode conditional moves based on comparisons
10233 in same mode, we may want to use SSE min/max instructions. */
965f5423
JH
10234 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
10235 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
0073023d 10236 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
fa9f36a1
JH
10237 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10238 && (!TARGET_IEEE_FP
10239 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
0073023d
JH
10240 /* We may be called from the post-reload splitter. */
10241 && (!REG_P (operands[0])
10242 || SSE_REG_P (operands[0])
52a661a6 10243 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
0073023d
JH
10244 {
10245 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10246 code = GET_CODE (operands[1]);
10247
10248 /* See if we have (cross) match between comparison operands and
10249 conditional move operands. */
10250 if (rtx_equal_p (operands[2], op1))
10251 {
10252 rtx tmp = op0;
10253 op0 = op1;
10254 op1 = tmp;
10255 code = reverse_condition_maybe_unordered (code);
10256 }
10257 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10258 {
10259 /* Check for min operation. */
4977bab6 10260 if (code == LT || code == UNLE)
0073023d 10261 {
4977bab6
ZW
10262 if (code == UNLE)
10263 {
10264 rtx tmp = op0;
10265 op0 = op1;
10266 op1 = tmp;
10267 }
0073023d
JH
10268 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10269 if (memory_operand (op0, VOIDmode))
10270 op0 = force_reg (GET_MODE (operands[0]), op0);
10271 if (GET_MODE (operands[0]) == SFmode)
10272 emit_insn (gen_minsf3 (operands[0], op0, op1));
10273 else
10274 emit_insn (gen_mindf3 (operands[0], op0, op1));
10275 return 1;
10276 }
10277 /* Check for max operation. */
4977bab6 10278 if (code == GT || code == UNGE)
0073023d 10279 {
4977bab6
ZW
10280 if (code == UNGE)
10281 {
10282 rtx tmp = op0;
10283 op0 = op1;
10284 op1 = tmp;
10285 }
0073023d
JH
10286 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10287 if (memory_operand (op0, VOIDmode))
10288 op0 = force_reg (GET_MODE (operands[0]), op0);
10289 if (GET_MODE (operands[0]) == SFmode)
10290 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10291 else
10292 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10293 return 1;
10294 }
10295 }
10296 /* Manage condition to be sse_comparison_operator. In case we are
10297 in non-ieee mode, try to canonicalize the destination operand
10298 to be first in the comparison - this helps reload to avoid extra
10299 moves. */
10300 if (!sse_comparison_operator (operands[1], VOIDmode)
10301 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10302 {
10303 rtx tmp = ix86_compare_op0;
10304 ix86_compare_op0 = ix86_compare_op1;
10305 ix86_compare_op1 = tmp;
10306 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10307 VOIDmode, ix86_compare_op0,
10308 ix86_compare_op1);
10309 }
d1f87653 10310 /* Similarly try to manage result to be first operand of conditional
fa9f36a1
JH
10311 move. We also don't support the NE comparison on SSE, so try to
10312 avoid it. */
037f20f1
JH
10313 if ((rtx_equal_p (operands[0], operands[3])
10314 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10315 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
0073023d
JH
10316 {
10317 rtx tmp = operands[2];
10318 operands[2] = operands[3];
92d0fb09 10319 operands[3] = tmp;
0073023d
JH
10320 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10321 (GET_CODE (operands[1])),
10322 VOIDmode, ix86_compare_op0,
10323 ix86_compare_op1);
10324 }
10325 if (GET_MODE (operands[0]) == SFmode)
10326 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10327 operands[2], operands[3],
10328 ix86_compare_op0, ix86_compare_op1));
10329 else
10330 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10331 operands[2], operands[3],
10332 ix86_compare_op0, ix86_compare_op1));
10333 return 1;
10334 }
10335
e075ae69 10336 /* The floating point conditional move instructions don't directly
0f290768 10337 support conditions resulting from a signed integer comparison. */
32b5b1aa 10338
e075ae69 10339 code = GET_CODE (operands[1]);
a1b8572c 10340 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9e7adcb3
JH
10341
10342 /* The floating point conditional move instructions don't directly
10343 support signed integer comparisons. */
10344
a1b8572c 10345 if (!fcmov_comparison_operator (compare_op, VOIDmode))
e075ae69 10346 {
a1b8572c 10347 if (second_test != NULL || bypass_test != NULL)
b531087a 10348 abort ();
e075ae69 10349 tmp = gen_reg_rtx (QImode);
3a3677ff 10350 ix86_expand_setcc (code, tmp);
e075ae69
RH
10351 code = NE;
10352 ix86_compare_op0 = tmp;
10353 ix86_compare_op1 = const0_rtx;
a1b8572c
JH
10354 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10355 }
10356 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10357 {
10358 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10359 emit_move_insn (tmp, operands[3]);
10360 operands[3] = tmp;
10361 }
10362 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10363 {
10364 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10365 emit_move_insn (tmp, operands[2]);
10366 operands[2] = tmp;
e075ae69 10367 }
e9a25f70 10368
e075ae69
RH
10369 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10370 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9e7adcb3 10371 compare_op,
e075ae69
RH
10372 operands[2],
10373 operands[3])));
a1b8572c
JH
10374 if (bypass_test)
10375 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10376 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10377 bypass_test,
10378 operands[3],
10379 operands[0])));
10380 if (second_test)
10381 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10382 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10383 second_test,
10384 operands[2],
10385 operands[0])));
32b5b1aa 10386
e075ae69 10387 return 1;
32b5b1aa
SC
10388}
10389
7b52eede
JH
10390/* Expand conditional increment or decrement using adb/sbb instructions.
10391 The default case using setcc followed by the conditional move can be
10392 done by generic code. */
10393int
b96a374d 10394ix86_expand_int_addcc (rtx operands[])
7b52eede
JH
10395{
10396 enum rtx_code code = GET_CODE (operands[1]);
10397 rtx compare_op;
10398 rtx val = const0_rtx;
e6e81735 10399 bool fpcmp = false;
e6e81735 10400 enum machine_mode mode = GET_MODE (operands[0]);
7b52eede
JH
10401
10402 if (operands[3] != const1_rtx
10403 && operands[3] != constm1_rtx)
10404 return 0;
10405 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10406 ix86_compare_op1, &compare_op))
10407 return 0;
e6e81735
JH
10408 code = GET_CODE (compare_op);
10409
10410 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10411 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10412 {
10413 fpcmp = true;
10414 code = ix86_fp_compare_code_to_integer (code);
10415 }
10416
10417 if (code != LTU)
10418 {
10419 val = constm1_rtx;
10420 if (fpcmp)
10421 PUT_CODE (compare_op,
10422 reverse_condition_maybe_unordered
10423 (GET_CODE (compare_op)));
10424 else
10425 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10426 }
10427 PUT_MODE (compare_op, mode);
10428
10429 /* Construct either adc or sbb insn. */
10430 if ((code == LTU) == (operands[3] == constm1_rtx))
7b52eede
JH
10431 {
10432 switch (GET_MODE (operands[0]))
10433 {
10434 case QImode:
e6e81735 10435 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10436 break;
10437 case HImode:
e6e81735 10438 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10439 break;
10440 case SImode:
e6e81735 10441 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10442 break;
10443 case DImode:
e6e81735 10444 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
7b52eede
JH
10445 break;
10446 default:
10447 abort ();
10448 }
10449 }
10450 else
10451 {
10452 switch (GET_MODE (operands[0]))
10453 {
10454 case QImode:
e6e81735 10455 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10456 break;
10457 case HImode:
e6e81735 10458 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10459 break;
10460 case SImode:
e6e81735 10461 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10462 break;
10463 case DImode:
e6e81735 10464 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
7b52eede
JH
10465 break;
10466 default:
10467 abort ();
10468 }
10469 }
10470 return 1; /* DONE */
10471}
10472
10473
2450a057
JH
10474/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10475 works for floating pointer parameters and nonoffsetable memories.
10476 For pushes, it returns just stack offsets; the values will be saved
10477 in the right order. Maximally three parts are generated. */
10478
2b589241 10479static int
b96a374d 10480ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
32b5b1aa 10481{
26e5b205
JH
10482 int size;
10483
10484 if (!TARGET_64BIT)
f8a1ebc6 10485 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
26e5b205
JH
10486 else
10487 size = (GET_MODE_SIZE (mode) + 4) / 8;
2450a057 10488
a7180f70
BS
10489 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10490 abort ();
2450a057
JH
10491 if (size < 2 || size > 3)
10492 abort ();
10493
f996902d
RH
10494 /* Optimize constant pool reference to immediates. This is used by fp
10495 moves, that force all constants to memory to allow combining. */
10496 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10497 {
10498 rtx tmp = maybe_get_pool_constant (operand);
10499 if (tmp)
10500 operand = tmp;
10501 }
d7a29404 10502
2450a057 10503 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
e075ae69 10504 {
2450a057
JH
10505 /* The only non-offsetable memories we handle are pushes. */
10506 if (! push_operand (operand, VOIDmode))
10507 abort ();
10508
26e5b205
JH
10509 operand = copy_rtx (operand);
10510 PUT_MODE (operand, Pmode);
2450a057
JH
10511 parts[0] = parts[1] = parts[2] = operand;
10512 }
26e5b205 10513 else if (!TARGET_64BIT)
2450a057
JH
10514 {
10515 if (mode == DImode)
10516 split_di (&operand, 1, &parts[0], &parts[1]);
10517 else
e075ae69 10518 {
2450a057
JH
10519 if (REG_P (operand))
10520 {
10521 if (!reload_completed)
10522 abort ();
10523 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10524 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10525 if (size == 3)
10526 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10527 }
10528 else if (offsettable_memref_p (operand))
10529 {
f4ef873c 10530 operand = adjust_address (operand, SImode, 0);
2450a057 10531 parts[0] = operand;
b72f00af 10532 parts[1] = adjust_address (operand, SImode, 4);
2450a057 10533 if (size == 3)
b72f00af 10534 parts[2] = adjust_address (operand, SImode, 8);
2450a057
JH
10535 }
10536 else if (GET_CODE (operand) == CONST_DOUBLE)
10537 {
10538 REAL_VALUE_TYPE r;
2b589241 10539 long l[4];
2450a057
JH
10540
10541 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10542 switch (mode)
10543 {
10544 case XFmode:
10545 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
d8bf17f9 10546 parts[2] = gen_int_mode (l[2], SImode);
2450a057
JH
10547 break;
10548 case DFmode:
10549 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10550 break;
10551 default:
10552 abort ();
10553 }
d8bf17f9
LB
10554 parts[1] = gen_int_mode (l[1], SImode);
10555 parts[0] = gen_int_mode (l[0], SImode);
2450a057
JH
10556 }
10557 else
10558 abort ();
e075ae69 10559 }
2450a057 10560 }
26e5b205
JH
10561 else
10562 {
44cf5b6a
JH
10563 if (mode == TImode)
10564 split_ti (&operand, 1, &parts[0], &parts[1]);
26e5b205
JH
10565 if (mode == XFmode || mode == TFmode)
10566 {
f8a1ebc6 10567 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
26e5b205
JH
10568 if (REG_P (operand))
10569 {
10570 if (!reload_completed)
10571 abort ();
10572 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
f8a1ebc6 10573 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
26e5b205
JH
10574 }
10575 else if (offsettable_memref_p (operand))
10576 {
b72f00af 10577 operand = adjust_address (operand, DImode, 0);
26e5b205 10578 parts[0] = operand;
f8a1ebc6 10579 parts[1] = adjust_address (operand, upper_mode, 8);
26e5b205
JH
10580 }
10581 else if (GET_CODE (operand) == CONST_DOUBLE)
10582 {
10583 REAL_VALUE_TYPE r;
10584 long l[3];
10585
10586 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10587 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10588 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10589 if (HOST_BITS_PER_WIDE_INT >= 64)
523fbd9d 10590 parts[0]
d8bf17f9 10591 = gen_int_mode
44cf5b6a 10592 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
b531087a 10593 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
d8bf17f9 10594 DImode);
26e5b205
JH
10595 else
10596 parts[0] = immed_double_const (l[0], l[1], DImode);
f8a1ebc6
JH
10597 if (upper_mode == SImode)
10598 parts[1] = gen_int_mode (l[2], SImode);
10599 else if (HOST_BITS_PER_WIDE_INT >= 64)
10600 parts[1]
10601 = gen_int_mode
10602 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10603 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10604 DImode);
10605 else
10606 parts[1] = immed_double_const (l[2], l[3], DImode);
26e5b205
JH
10607 }
10608 else
10609 abort ();
10610 }
10611 }
2450a057 10612
2b589241 10613 return size;
2450a057
JH
10614}
10615
10616/* Emit insns to perform a move or push of DI, DF, and XF values.
10617 Return false when normal moves are needed; true when all required
10618 insns have been emitted. Operands 2-4 contain the input values
10619 int the correct order; operands 5-7 contain the output values. */
10620
26e5b205 10621void
b96a374d 10622ix86_split_long_move (rtx operands[])
2450a057
JH
10623{
10624 rtx part[2][3];
26e5b205 10625 int nparts;
2450a057
JH
10626 int push = 0;
10627 int collisions = 0;
26e5b205
JH
10628 enum machine_mode mode = GET_MODE (operands[0]);
10629
10630 /* The DFmode expanders may ask us to move double.
10631 For 64bit target this is single move. By hiding the fact
10632 here we simplify i386.md splitters. */
10633 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10634 {
8cdfa312
RH
10635 /* Optimize constant pool reference to immediates. This is used by
10636 fp moves, that force all constants to memory to allow combining. */
26e5b205
JH
10637
10638 if (GET_CODE (operands[1]) == MEM
10639 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10640 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10641 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10642 if (push_operand (operands[0], VOIDmode))
b47b4f21
JH
10643 {
10644 operands[0] = copy_rtx (operands[0]);
10645 PUT_MODE (operands[0], Pmode);
10646 }
26e5b205
JH
10647 else
10648 operands[0] = gen_lowpart (DImode, operands[0]);
10649 operands[1] = gen_lowpart (DImode, operands[1]);
10650 emit_move_insn (operands[0], operands[1]);
10651 return;
10652 }
2450a057 10653
2450a057
JH
10654 /* The only non-offsettable memory we handle is push. */
10655 if (push_operand (operands[0], VOIDmode))
10656 push = 1;
10657 else if (GET_CODE (operands[0]) == MEM
10658 && ! offsettable_memref_p (operands[0]))
10659 abort ();
10660
26e5b205
JH
10661 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10662 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
2450a057
JH
10663
10664 /* When emitting push, take care for source operands on the stack. */
10665 if (push && GET_CODE (operands[1]) == MEM
10666 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10667 {
26e5b205 10668 if (nparts == 3)
886cbb88
JH
10669 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10670 XEXP (part[1][2], 0));
10671 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10672 XEXP (part[1][1], 0));
2450a057
JH
10673 }
10674
0f290768 10675 /* We need to do copy in the right order in case an address register
2450a057
JH
10676 of the source overlaps the destination. */
10677 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10678 {
10679 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10680 collisions++;
10681 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10682 collisions++;
26e5b205 10683 if (nparts == 3
2450a057
JH
10684 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10685 collisions++;
10686
10687 /* Collision in the middle part can be handled by reordering. */
26e5b205 10688 if (collisions == 1 && nparts == 3
2450a057 10689 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
e075ae69 10690 {
2450a057
JH
10691 rtx tmp;
10692 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10693 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10694 }
e075ae69 10695
2450a057
JH
10696 /* If there are more collisions, we can't handle it by reordering.
10697 Do an lea to the last part and use only one colliding move. */
10698 else if (collisions > 1)
10699 {
8231b3f9
RH
10700 rtx base;
10701
2450a057 10702 collisions = 1;
8231b3f9
RH
10703
10704 base = part[0][nparts - 1];
10705
10706 /* Handle the case when the last part isn't valid for lea.
10707 Happens in 64-bit mode storing the 12-byte XFmode. */
10708 if (GET_MODE (base) != Pmode)
10709 base = gen_rtx_REG (Pmode, REGNO (base));
10710
10711 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10712 part[1][0] = replace_equiv_address (part[1][0], base);
10713 part[1][1] = replace_equiv_address (part[1][1],
10714 plus_constant (base, UNITS_PER_WORD));
26e5b205 10715 if (nparts == 3)
8231b3f9
RH
10716 part[1][2] = replace_equiv_address (part[1][2],
10717 plus_constant (base, 8));
2450a057
JH
10718 }
10719 }
10720
10721 if (push)
10722 {
26e5b205 10723 if (!TARGET_64BIT)
2b589241 10724 {
26e5b205
JH
10725 if (nparts == 3)
10726 {
f8a1ebc6
JH
10727 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10728 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
26e5b205
JH
10729 emit_move_insn (part[0][2], part[1][2]);
10730 }
2b589241 10731 }
26e5b205
JH
10732 else
10733 {
10734 /* In 64bit mode we don't have 32bit push available. In case this is
10735 register, it is OK - we will just use larger counterpart. We also
10736 retype memory - these comes from attempt to avoid REX prefix on
10737 moving of second half of TFmode value. */
10738 if (GET_MODE (part[1][1]) == SImode)
10739 {
10740 if (GET_CODE (part[1][1]) == MEM)
f4ef873c 10741 part[1][1] = adjust_address (part[1][1], DImode, 0);
26e5b205
JH
10742 else if (REG_P (part[1][1]))
10743 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10744 else
b531087a 10745 abort ();
886cbb88
JH
10746 if (GET_MODE (part[1][0]) == SImode)
10747 part[1][0] = part[1][1];
26e5b205
JH
10748 }
10749 }
10750 emit_move_insn (part[0][1], part[1][1]);
10751 emit_move_insn (part[0][0], part[1][0]);
10752 return;
2450a057
JH
10753 }
10754
10755 /* Choose correct order to not overwrite the source before it is copied. */
10756 if ((REG_P (part[0][0])
10757 && REG_P (part[1][1])
10758 && (REGNO (part[0][0]) == REGNO (part[1][1])
26e5b205 10759 || (nparts == 3
2450a057
JH
10760 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10761 || (collisions > 0
10762 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10763 {
26e5b205 10764 if (nparts == 3)
2450a057 10765 {
26e5b205
JH
10766 operands[2] = part[0][2];
10767 operands[3] = part[0][1];
10768 operands[4] = part[0][0];
10769 operands[5] = part[1][2];
10770 operands[6] = part[1][1];
10771 operands[7] = part[1][0];
2450a057
JH
10772 }
10773 else
10774 {
26e5b205
JH
10775 operands[2] = part[0][1];
10776 operands[3] = part[0][0];
10777 operands[5] = part[1][1];
10778 operands[6] = part[1][0];
2450a057
JH
10779 }
10780 }
10781 else
10782 {
26e5b205 10783 if (nparts == 3)
2450a057 10784 {
26e5b205
JH
10785 operands[2] = part[0][0];
10786 operands[3] = part[0][1];
10787 operands[4] = part[0][2];
10788 operands[5] = part[1][0];
10789 operands[6] = part[1][1];
10790 operands[7] = part[1][2];
2450a057
JH
10791 }
10792 else
10793 {
26e5b205
JH
10794 operands[2] = part[0][0];
10795 operands[3] = part[0][1];
10796 operands[5] = part[1][0];
10797 operands[6] = part[1][1];
e075ae69
RH
10798 }
10799 }
26e5b205
JH
10800 emit_move_insn (operands[2], operands[5]);
10801 emit_move_insn (operands[3], operands[6]);
10802 if (nparts == 3)
10803 emit_move_insn (operands[4], operands[7]);
32b5b1aa 10804
26e5b205 10805 return;
32b5b1aa 10806}
32b5b1aa 10807
e075ae69 10808void
b96a374d 10809ix86_split_ashldi (rtx *operands, rtx scratch)
32b5b1aa 10810{
e075ae69
RH
10811 rtx low[2], high[2];
10812 int count;
b985a30f 10813
e075ae69
RH
10814 if (GET_CODE (operands[2]) == CONST_INT)
10815 {
10816 split_di (operands, 2, low, high);
10817 count = INTVAL (operands[2]) & 63;
32b5b1aa 10818
e075ae69
RH
10819 if (count >= 32)
10820 {
10821 emit_move_insn (high[0], low[1]);
10822 emit_move_insn (low[0], const0_rtx);
b985a30f 10823
e075ae69
RH
10824 if (count > 32)
10825 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10826 }
10827 else
10828 {
10829 if (!rtx_equal_p (operands[0], operands[1]))
10830 emit_move_insn (operands[0], operands[1]);
10831 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10832 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10833 }
10834 }
10835 else
10836 {
10837 if (!rtx_equal_p (operands[0], operands[1]))
10838 emit_move_insn (operands[0], operands[1]);
b985a30f 10839
e075ae69 10840 split_di (operands, 1, low, high);
b985a30f 10841
e075ae69
RH
10842 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10843 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
32b5b1aa 10844
fe577e58 10845 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 10846 {
fe577e58 10847 if (! no_new_pseudos)
e075ae69
RH
10848 scratch = force_reg (SImode, const0_rtx);
10849 else
10850 emit_move_insn (scratch, const0_rtx);
10851
10852 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10853 scratch));
10854 }
10855 else
10856 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10857 }
e9a25f70 10858}
32b5b1aa 10859
e075ae69 10860void
b96a374d 10861ix86_split_ashrdi (rtx *operands, rtx scratch)
32b5b1aa 10862{
e075ae69
RH
10863 rtx low[2], high[2];
10864 int count;
32b5b1aa 10865
e075ae69
RH
10866 if (GET_CODE (operands[2]) == CONST_INT)
10867 {
10868 split_di (operands, 2, low, high);
10869 count = INTVAL (operands[2]) & 63;
32b5b1aa 10870
e075ae69
RH
10871 if (count >= 32)
10872 {
10873 emit_move_insn (low[0], high[1]);
32b5b1aa 10874
e075ae69
RH
10875 if (! reload_completed)
10876 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10877 else
10878 {
10879 emit_move_insn (high[0], low[0]);
10880 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10881 }
10882
10883 if (count > 32)
10884 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10885 }
10886 else
10887 {
10888 if (!rtx_equal_p (operands[0], operands[1]))
10889 emit_move_insn (operands[0], operands[1]);
10890 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10891 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10892 }
10893 }
10894 else
32b5b1aa 10895 {
e075ae69
RH
10896 if (!rtx_equal_p (operands[0], operands[1]))
10897 emit_move_insn (operands[0], operands[1]);
10898
10899 split_di (operands, 1, low, high);
10900
10901 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10902 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10903
fe577e58 10904 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 10905 {
fe577e58 10906 if (! no_new_pseudos)
e075ae69
RH
10907 scratch = gen_reg_rtx (SImode);
10908 emit_move_insn (scratch, high[0]);
10909 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10910 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10911 scratch));
10912 }
10913 else
10914 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
32b5b1aa 10915 }
e075ae69 10916}
32b5b1aa 10917
e075ae69 10918void
b96a374d 10919ix86_split_lshrdi (rtx *operands, rtx scratch)
e075ae69
RH
10920{
10921 rtx low[2], high[2];
10922 int count;
32b5b1aa 10923
e075ae69 10924 if (GET_CODE (operands[2]) == CONST_INT)
32b5b1aa 10925 {
e075ae69
RH
10926 split_di (operands, 2, low, high);
10927 count = INTVAL (operands[2]) & 63;
10928
10929 if (count >= 32)
c7271385 10930 {
e075ae69
RH
10931 emit_move_insn (low[0], high[1]);
10932 emit_move_insn (high[0], const0_rtx);
32b5b1aa 10933
e075ae69
RH
10934 if (count > 32)
10935 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10936 }
10937 else
10938 {
10939 if (!rtx_equal_p (operands[0], operands[1]))
10940 emit_move_insn (operands[0], operands[1]);
10941 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10942 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10943 }
32b5b1aa 10944 }
e075ae69
RH
10945 else
10946 {
10947 if (!rtx_equal_p (operands[0], operands[1]))
10948 emit_move_insn (operands[0], operands[1]);
32b5b1aa 10949
e075ae69
RH
10950 split_di (operands, 1, low, high);
10951
10952 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10953 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10954
10955 /* Heh. By reversing the arguments, we can reuse this pattern. */
fe577e58 10956 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 10957 {
fe577e58 10958 if (! no_new_pseudos)
e075ae69
RH
10959 scratch = force_reg (SImode, const0_rtx);
10960 else
10961 emit_move_insn (scratch, const0_rtx);
10962
10963 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10964 scratch));
10965 }
10966 else
10967 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10968 }
32b5b1aa 10969}
3f803cd9 10970
0407c02b 10971/* Helper function for the string operations below. Dest VARIABLE whether
0945b39d
JH
10972 it is aligned to VALUE bytes. If true, jump to the label. */
10973static rtx
b96a374d 10974ix86_expand_aligntest (rtx variable, int value)
0945b39d
JH
10975{
10976 rtx label = gen_label_rtx ();
10977 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10978 if (GET_MODE (variable) == DImode)
10979 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10980 else
10981 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10982 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
d43e0b7d 10983 1, label);
0945b39d
JH
10984 return label;
10985}
10986
10987/* Adjust COUNTER by the VALUE. */
10988static void
b96a374d 10989ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
0945b39d
JH
10990{
10991 if (GET_MODE (countreg) == DImode)
10992 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10993 else
10994 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10995}
10996
10997/* Zero extend possibly SImode EXP to Pmode register. */
d24b3457 10998rtx
b96a374d 10999ix86_zero_extend_to_Pmode (rtx exp)
0945b39d
JH
11000{
11001 rtx r;
11002 if (GET_MODE (exp) == VOIDmode)
11003 return force_reg (Pmode, exp);
11004 if (GET_MODE (exp) == Pmode)
11005 return copy_to_mode_reg (Pmode, exp);
11006 r = gen_reg_rtx (Pmode);
11007 emit_insn (gen_zero_extendsidi2 (r, exp));
11008 return r;
11009}
11010
11011/* Expand string move (memcpy) operation. Use i386 string operations when
11012 profitable. expand_clrstr contains similar code. */
11013int
b96a374d 11014ix86_expand_movstr (rtx dst, rtx src, rtx count_exp, rtx align_exp)
0945b39d 11015{
4e44c1ef 11016 rtx srcreg, destreg, countreg, srcexp, destexp;
0945b39d
JH
11017 enum machine_mode counter_mode;
11018 HOST_WIDE_INT align = 0;
11019 unsigned HOST_WIDE_INT count = 0;
0945b39d 11020
0945b39d
JH
11021 if (GET_CODE (align_exp) == CONST_INT)
11022 align = INTVAL (align_exp);
11023
d0a5295a
RH
11024 /* Can't use any of this if the user has appropriated esi or edi. */
11025 if (global_regs[4] || global_regs[5])
11026 return 0;
11027
5519a4f9 11028 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
11029 if (!TARGET_ALIGN_STRINGOPS)
11030 align = 64;
11031
11032 if (GET_CODE (count_exp) == CONST_INT)
26771da7
JH
11033 {
11034 count = INTVAL (count_exp);
11035 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11036 return 0;
11037 }
0945b39d
JH
11038
11039 /* Figure out proper mode for counter. For 32bits it is always SImode,
11040 for 64bits use SImode when possible, otherwise DImode.
11041 Set count to number of bytes copied when known at compile time. */
11042 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11043 || x86_64_zero_extended_value (count_exp))
11044 counter_mode = SImode;
11045 else
11046 counter_mode = DImode;
11047
11048 if (counter_mode != SImode && counter_mode != DImode)
11049 abort ();
11050
11051 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
4e44c1ef
JJ
11052 if (destreg != XEXP (dst, 0))
11053 dst = replace_equiv_address_nv (dst, destreg);
0945b39d 11054 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
4e44c1ef
JJ
11055 if (srcreg != XEXP (src, 0))
11056 src = replace_equiv_address_nv (src, srcreg);
0945b39d
JH
11057
11058 /* When optimizing for size emit simple rep ; movsb instruction for
11059 counts not divisible by 4. */
11060
11061 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11062 {
4e44c1ef 11063 emit_insn (gen_cld ());
0945b39d 11064 countreg = ix86_zero_extend_to_Pmode (count_exp);
4e44c1ef
JJ
11065 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11066 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
11067 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
11068 destexp, srcexp));
0945b39d
JH
11069 }
11070
11071 /* For constant aligned (or small unaligned) copies use rep movsl
11072 followed by code copying the rest. For PentiumPro ensure 8 byte
11073 alignment to allow rep movsl acceleration. */
11074
11075 else if (count != 0
11076 && (align >= 8
11077 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 11078 || optimize_size || count < (unsigned int) 64))
0945b39d 11079 {
4e44c1ef 11080 unsigned HOST_WIDE_INT offset = 0;
0945b39d 11081 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
4e44c1ef
JJ
11082 rtx srcmem, dstmem;
11083
11084 emit_insn (gen_cld ());
0945b39d
JH
11085 if (count & ~(size - 1))
11086 {
11087 countreg = copy_to_mode_reg (counter_mode,
11088 GEN_INT ((count >> (size == 4 ? 2 : 3))
11089 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11090 countreg = ix86_zero_extend_to_Pmode (countreg);
4e44c1ef
JJ
11091
11092 destexp = gen_rtx_ASHIFT (Pmode, countreg,
11093 GEN_INT (size == 4 ? 2 : 3));
11094 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11095 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11096
11097 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11098 countreg, destexp, srcexp));
11099 offset = count & ~(size - 1);
0945b39d
JH
11100 }
11101 if (size == 8 && (count & 0x04))
4e44c1ef
JJ
11102 {
11103 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
11104 offset);
11105 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
11106 offset);
11107 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11108 offset += 4;
11109 }
0945b39d 11110 if (count & 0x02)
4e44c1ef
JJ
11111 {
11112 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
11113 offset);
11114 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
11115 offset);
11116 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11117 offset += 2;
11118 }
0945b39d 11119 if (count & 0x01)
4e44c1ef
JJ
11120 {
11121 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
11122 offset);
11123 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
11124 offset);
11125 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11126 }
0945b39d
JH
11127 }
11128 /* The generic code based on the glibc implementation:
11129 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11130 allowing accelerated copying there)
11131 - copy the data using rep movsl
11132 - copy the rest. */
11133 else
11134 {
11135 rtx countreg2;
11136 rtx label = NULL;
4e44c1ef 11137 rtx srcmem, dstmem;
37ad04a5
JH
11138 int desired_alignment = (TARGET_PENTIUMPRO
11139 && (count == 0 || count >= (unsigned int) 260)
11140 ? 8 : UNITS_PER_WORD);
4e44c1ef
JJ
11141 /* Get rid of MEM_OFFSETs, they won't be accurate. */
11142 dst = change_address (dst, BLKmode, destreg);
11143 src = change_address (src, BLKmode, srcreg);
0945b39d
JH
11144
11145 /* In case we don't know anything about the alignment, default to
11146 library version, since it is usually equally fast and result in
b96a374d 11147 shorter code.
4977bab6
ZW
11148
11149 Also emit call when we know that the count is large and call overhead
11150 will not be important. */
11151 if (!TARGET_INLINE_ALL_STRINGOPS
11152 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
4e44c1ef 11153 return 0;
0945b39d
JH
11154
11155 if (TARGET_SINGLE_STRINGOP)
11156 emit_insn (gen_cld ());
11157
11158 countreg2 = gen_reg_rtx (Pmode);
11159 countreg = copy_to_mode_reg (counter_mode, count_exp);
11160
11161 /* We don't use loops to align destination and to copy parts smaller
11162 than 4 bytes, because gcc is able to optimize such code better (in
11163 the case the destination or the count really is aligned, gcc is often
11164 able to predict the branches) and also it is friendlier to the
a4f31c00 11165 hardware branch prediction.
0945b39d 11166
d1f87653 11167 Using loops is beneficial for generic case, because we can
0945b39d
JH
11168 handle small counts using the loops. Many CPUs (such as Athlon)
11169 have large REP prefix setup costs.
11170
4aae8a9a 11171 This is quite costly. Maybe we can revisit this decision later or
0945b39d
JH
11172 add some customizability to this code. */
11173
37ad04a5 11174 if (count == 0 && align < desired_alignment)
0945b39d
JH
11175 {
11176 label = gen_label_rtx ();
aaae0bb9 11177 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
d43e0b7d 11178 LEU, 0, counter_mode, 1, label);
0945b39d
JH
11179 }
11180 if (align <= 1)
11181 {
11182 rtx label = ix86_expand_aligntest (destreg, 1);
4e44c1ef
JJ
11183 srcmem = change_address (src, QImode, srcreg);
11184 dstmem = change_address (dst, QImode, destreg);
11185 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
11186 ix86_adjust_counter (countreg, 1);
11187 emit_label (label);
11188 LABEL_NUSES (label) = 1;
11189 }
11190 if (align <= 2)
11191 {
11192 rtx label = ix86_expand_aligntest (destreg, 2);
4e44c1ef
JJ
11193 srcmem = change_address (src, HImode, srcreg);
11194 dstmem = change_address (dst, HImode, destreg);
11195 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
11196 ix86_adjust_counter (countreg, 2);
11197 emit_label (label);
11198 LABEL_NUSES (label) = 1;
11199 }
37ad04a5 11200 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
11201 {
11202 rtx label = ix86_expand_aligntest (destreg, 4);
4e44c1ef
JJ
11203 srcmem = change_address (src, SImode, srcreg);
11204 dstmem = change_address (dst, SImode, destreg);
11205 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
11206 ix86_adjust_counter (countreg, 4);
11207 emit_label (label);
11208 LABEL_NUSES (label) = 1;
11209 }
11210
37ad04a5
JH
11211 if (label && desired_alignment > 4 && !TARGET_64BIT)
11212 {
11213 emit_label (label);
11214 LABEL_NUSES (label) = 1;
11215 label = NULL_RTX;
11216 }
0945b39d
JH
11217 if (!TARGET_SINGLE_STRINGOP)
11218 emit_insn (gen_cld ());
11219 if (TARGET_64BIT)
11220 {
11221 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11222 GEN_INT (3)));
4e44c1ef 11223 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
0945b39d
JH
11224 }
11225 else
11226 {
4e44c1ef
JJ
11227 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11228 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
0945b39d 11229 }
4e44c1ef
JJ
11230 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11231 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11232 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11233 countreg2, destexp, srcexp));
0945b39d
JH
11234
11235 if (label)
11236 {
11237 emit_label (label);
11238 LABEL_NUSES (label) = 1;
11239 }
11240 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
4e44c1ef
JJ
11241 {
11242 srcmem = change_address (src, SImode, srcreg);
11243 dstmem = change_address (dst, SImode, destreg);
11244 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11245 }
0945b39d
JH
11246 if ((align <= 4 || count == 0) && TARGET_64BIT)
11247 {
11248 rtx label = ix86_expand_aligntest (countreg, 4);
4e44c1ef
JJ
11249 srcmem = change_address (src, SImode, srcreg);
11250 dstmem = change_address (dst, SImode, destreg);
11251 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
11252 emit_label (label);
11253 LABEL_NUSES (label) = 1;
11254 }
11255 if (align > 2 && count != 0 && (count & 2))
4e44c1ef
JJ
11256 {
11257 srcmem = change_address (src, HImode, srcreg);
11258 dstmem = change_address (dst, HImode, destreg);
11259 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11260 }
0945b39d
JH
11261 if (align <= 2 || count == 0)
11262 {
11263 rtx label = ix86_expand_aligntest (countreg, 2);
4e44c1ef
JJ
11264 srcmem = change_address (src, HImode, srcreg);
11265 dstmem = change_address (dst, HImode, destreg);
11266 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
11267 emit_label (label);
11268 LABEL_NUSES (label) = 1;
11269 }
11270 if (align > 1 && count != 0 && (count & 1))
4e44c1ef
JJ
11271 {
11272 srcmem = change_address (src, QImode, srcreg);
11273 dstmem = change_address (dst, QImode, destreg);
11274 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11275 }
0945b39d
JH
11276 if (align <= 1 || count == 0)
11277 {
11278 rtx label = ix86_expand_aligntest (countreg, 1);
4e44c1ef
JJ
11279 srcmem = change_address (src, QImode, srcreg);
11280 dstmem = change_address (dst, QImode, destreg);
11281 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
11282 emit_label (label);
11283 LABEL_NUSES (label) = 1;
11284 }
11285 }
11286
0945b39d
JH
11287 return 1;
11288}
11289
11290/* Expand string clear operation (bzero). Use i386 string operations when
11291 profitable. expand_movstr contains similar code. */
11292int
4e44c1ef 11293ix86_expand_clrstr (rtx dst, rtx count_exp, rtx align_exp)
0945b39d 11294{
4e44c1ef 11295 rtx destreg, zeroreg, countreg, destexp;
0945b39d
JH
11296 enum machine_mode counter_mode;
11297 HOST_WIDE_INT align = 0;
11298 unsigned HOST_WIDE_INT count = 0;
11299
11300 if (GET_CODE (align_exp) == CONST_INT)
11301 align = INTVAL (align_exp);
11302
d0a5295a
RH
11303 /* Can't use any of this if the user has appropriated esi. */
11304 if (global_regs[4])
11305 return 0;
11306
5519a4f9 11307 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
11308 if (!TARGET_ALIGN_STRINGOPS)
11309 align = 32;
11310
11311 if (GET_CODE (count_exp) == CONST_INT)
26771da7
JH
11312 {
11313 count = INTVAL (count_exp);
11314 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11315 return 0;
11316 }
0945b39d
JH
11317 /* Figure out proper mode for counter. For 32bits it is always SImode,
11318 for 64bits use SImode when possible, otherwise DImode.
11319 Set count to number of bytes copied when known at compile time. */
11320 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11321 || x86_64_zero_extended_value (count_exp))
11322 counter_mode = SImode;
11323 else
11324 counter_mode = DImode;
11325
4e44c1ef
JJ
11326 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11327 if (destreg != XEXP (dst, 0))
11328 dst = replace_equiv_address_nv (dst, destreg);
0945b39d
JH
11329
11330 emit_insn (gen_cld ());
11331
11332 /* When optimizing for size emit simple rep ; movsb instruction for
11333 counts not divisible by 4. */
11334
11335 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11336 {
11337 countreg = ix86_zero_extend_to_Pmode (count_exp);
11338 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
4e44c1ef
JJ
11339 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11340 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
0945b39d
JH
11341 }
11342 else if (count != 0
11343 && (align >= 8
11344 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 11345 || optimize_size || count < (unsigned int) 64))
0945b39d
JH
11346 {
11347 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
4e44c1ef
JJ
11348 unsigned HOST_WIDE_INT offset = 0;
11349
0945b39d
JH
11350 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11351 if (count & ~(size - 1))
11352 {
11353 countreg = copy_to_mode_reg (counter_mode,
11354 GEN_INT ((count >> (size == 4 ? 2 : 3))
11355 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11356 countreg = ix86_zero_extend_to_Pmode (countreg);
4e44c1ef
JJ
11357 destexp = gen_rtx_ASHIFT (Pmode, countreg, GEN_INT (size == 4 ? 2 : 3));
11358 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11359 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11360 offset = count & ~(size - 1);
0945b39d
JH
11361 }
11362 if (size == 8 && (count & 0x04))
4e44c1ef
JJ
11363 {
11364 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
11365 offset);
11366 emit_insn (gen_strset (destreg, mem,
0945b39d 11367 gen_rtx_SUBREG (SImode, zeroreg, 0)));
4e44c1ef
JJ
11368 offset += 4;
11369 }
0945b39d 11370 if (count & 0x02)
4e44c1ef
JJ
11371 {
11372 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
11373 offset);
11374 emit_insn (gen_strset (destreg, mem,
0945b39d 11375 gen_rtx_SUBREG (HImode, zeroreg, 0)));
4e44c1ef
JJ
11376 offset += 2;
11377 }
0945b39d 11378 if (count & 0x01)
4e44c1ef
JJ
11379 {
11380 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
11381 offset);
11382 emit_insn (gen_strset (destreg, mem,
0945b39d 11383 gen_rtx_SUBREG (QImode, zeroreg, 0)));
4e44c1ef 11384 }
0945b39d
JH
11385 }
11386 else
11387 {
11388 rtx countreg2;
11389 rtx label = NULL;
37ad04a5
JH
11390 /* Compute desired alignment of the string operation. */
11391 int desired_alignment = (TARGET_PENTIUMPRO
11392 && (count == 0 || count >= (unsigned int) 260)
11393 ? 8 : UNITS_PER_WORD);
0945b39d
JH
11394
11395 /* In case we don't know anything about the alignment, default to
11396 library version, since it is usually equally fast and result in
4977bab6
ZW
11397 shorter code.
11398
11399 Also emit call when we know that the count is large and call overhead
11400 will not be important. */
11401 if (!TARGET_INLINE_ALL_STRINGOPS
11402 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
0945b39d
JH
11403 return 0;
11404
11405 if (TARGET_SINGLE_STRINGOP)
11406 emit_insn (gen_cld ());
11407
11408 countreg2 = gen_reg_rtx (Pmode);
11409 countreg = copy_to_mode_reg (counter_mode, count_exp);
11410 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
4e44c1ef
JJ
11411 /* Get rid of MEM_OFFSET, it won't be accurate. */
11412 dst = change_address (dst, BLKmode, destreg);
0945b39d 11413
37ad04a5 11414 if (count == 0 && align < desired_alignment)
0945b39d
JH
11415 {
11416 label = gen_label_rtx ();
37ad04a5 11417 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
d43e0b7d 11418 LEU, 0, counter_mode, 1, label);
0945b39d
JH
11419 }
11420 if (align <= 1)
11421 {
11422 rtx label = ix86_expand_aligntest (destreg, 1);
4e44c1ef
JJ
11423 emit_insn (gen_strset (destreg, dst,
11424 gen_rtx_SUBREG (QImode, zeroreg, 0)));
0945b39d
JH
11425 ix86_adjust_counter (countreg, 1);
11426 emit_label (label);
11427 LABEL_NUSES (label) = 1;
11428 }
11429 if (align <= 2)
11430 {
11431 rtx label = ix86_expand_aligntest (destreg, 2);
4e44c1ef
JJ
11432 emit_insn (gen_strset (destreg, dst,
11433 gen_rtx_SUBREG (HImode, zeroreg, 0)));
0945b39d
JH
11434 ix86_adjust_counter (countreg, 2);
11435 emit_label (label);
11436 LABEL_NUSES (label) = 1;
11437 }
37ad04a5 11438 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
11439 {
11440 rtx label = ix86_expand_aligntest (destreg, 4);
4e44c1ef
JJ
11441 emit_insn (gen_strset (destreg, dst,
11442 (TARGET_64BIT
11443 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11444 : zeroreg)));
0945b39d
JH
11445 ix86_adjust_counter (countreg, 4);
11446 emit_label (label);
11447 LABEL_NUSES (label) = 1;
11448 }
11449
37ad04a5
JH
11450 if (label && desired_alignment > 4 && !TARGET_64BIT)
11451 {
11452 emit_label (label);
11453 LABEL_NUSES (label) = 1;
11454 label = NULL_RTX;
11455 }
11456
0945b39d
JH
11457 if (!TARGET_SINGLE_STRINGOP)
11458 emit_insn (gen_cld ());
11459 if (TARGET_64BIT)
11460 {
11461 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11462 GEN_INT (3)));
4e44c1ef 11463 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
0945b39d
JH
11464 }
11465 else
11466 {
4e44c1ef
JJ
11467 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11468 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
0945b39d 11469 }
4e44c1ef
JJ
11470 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11471 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
11472
0945b39d
JH
11473 if (label)
11474 {
11475 emit_label (label);
11476 LABEL_NUSES (label) = 1;
11477 }
37ad04a5 11478
0945b39d 11479 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
4e44c1ef
JJ
11480 emit_insn (gen_strset (destreg, dst,
11481 gen_rtx_SUBREG (SImode, zeroreg, 0)));
0945b39d
JH
11482 if (TARGET_64BIT && (align <= 4 || count == 0))
11483 {
79258dce 11484 rtx label = ix86_expand_aligntest (countreg, 4);
4e44c1ef
JJ
11485 emit_insn (gen_strset (destreg, dst,
11486 gen_rtx_SUBREG (SImode, zeroreg, 0)));
0945b39d
JH
11487 emit_label (label);
11488 LABEL_NUSES (label) = 1;
11489 }
11490 if (align > 2 && count != 0 && (count & 2))
4e44c1ef
JJ
11491 emit_insn (gen_strset (destreg, dst,
11492 gen_rtx_SUBREG (HImode, zeroreg, 0)));
0945b39d
JH
11493 if (align <= 2 || count == 0)
11494 {
74411039 11495 rtx label = ix86_expand_aligntest (countreg, 2);
4e44c1ef
JJ
11496 emit_insn (gen_strset (destreg, dst,
11497 gen_rtx_SUBREG (HImode, zeroreg, 0)));
0945b39d
JH
11498 emit_label (label);
11499 LABEL_NUSES (label) = 1;
11500 }
11501 if (align > 1 && count != 0 && (count & 1))
4e44c1ef
JJ
11502 emit_insn (gen_strset (destreg, dst,
11503 gen_rtx_SUBREG (QImode, zeroreg, 0)));
0945b39d
JH
11504 if (align <= 1 || count == 0)
11505 {
74411039 11506 rtx label = ix86_expand_aligntest (countreg, 1);
4e44c1ef
JJ
11507 emit_insn (gen_strset (destreg, dst,
11508 gen_rtx_SUBREG (QImode, zeroreg, 0)));
0945b39d
JH
11509 emit_label (label);
11510 LABEL_NUSES (label) = 1;
11511 }
11512 }
11513 return 1;
11514}
4e44c1ef 11515
0945b39d
JH
11516/* Expand strlen. */
11517int
b96a374d 11518ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
0945b39d
JH
11519{
11520 rtx addr, scratch1, scratch2, scratch3, scratch4;
11521
11522 /* The generic case of strlen expander is long. Avoid it's
11523 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11524
11525 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11526 && !TARGET_INLINE_ALL_STRINGOPS
11527 && !optimize_size
11528 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11529 return 0;
11530
11531 addr = force_reg (Pmode, XEXP (src, 0));
11532 scratch1 = gen_reg_rtx (Pmode);
11533
11534 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11535 && !optimize_size)
11536 {
11537 /* Well it seems that some optimizer does not combine a call like
11538 foo(strlen(bar), strlen(bar));
11539 when the move and the subtraction is done here. It does calculate
11540 the length just once when these instructions are done inside of
11541 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11542 often used and I use one fewer register for the lifetime of
11543 output_strlen_unroll() this is better. */
11544
11545 emit_move_insn (out, addr);
11546
4e44c1ef 11547 ix86_expand_strlensi_unroll_1 (out, src, align);
0945b39d
JH
11548
11549 /* strlensi_unroll_1 returns the address of the zero at the end of
11550 the string, like memchr(), so compute the length by subtracting
11551 the start address. */
11552 if (TARGET_64BIT)
11553 emit_insn (gen_subdi3 (out, out, addr));
11554 else
11555 emit_insn (gen_subsi3 (out, out, addr));
11556 }
11557 else
11558 {
4e44c1ef 11559 rtx unspec;
0945b39d
JH
11560 scratch2 = gen_reg_rtx (Pmode);
11561 scratch3 = gen_reg_rtx (Pmode);
11562 scratch4 = force_reg (Pmode, constm1_rtx);
11563
11564 emit_move_insn (scratch3, addr);
11565 eoschar = force_reg (QImode, eoschar);
11566
11567 emit_insn (gen_cld ());
4e44c1ef
JJ
11568 src = replace_equiv_address_nv (src, scratch3);
11569
11570 /* If .md starts supporting :P, this can be done in .md. */
11571 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
11572 scratch4), UNSPEC_SCAS);
11573 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
0945b39d
JH
11574 if (TARGET_64BIT)
11575 {
0945b39d
JH
11576 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11577 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11578 }
11579 else
11580 {
0945b39d
JH
11581 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11582 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11583 }
11584 }
11585 return 1;
11586}
11587
e075ae69
RH
11588/* Expand the appropriate insns for doing strlen if not just doing
11589 repnz; scasb
11590
11591 out = result, initialized with the start address
11592 align_rtx = alignment of the address.
11593 scratch = scratch register, initialized with the startaddress when
77ebd435 11594 not aligned, otherwise undefined
3f803cd9 11595
39e3f58c 11596 This is just the body. It needs the initializations mentioned above and
3f803cd9
SC
11597 some address computing at the end. These things are done in i386.md. */
11598
0945b39d 11599static void
4e44c1ef 11600ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
3f803cd9 11601{
e075ae69
RH
11602 int align;
11603 rtx tmp;
11604 rtx align_2_label = NULL_RTX;
11605 rtx align_3_label = NULL_RTX;
11606 rtx align_4_label = gen_label_rtx ();
11607 rtx end_0_label = gen_label_rtx ();
e075ae69 11608 rtx mem;
e2e52e1b 11609 rtx tmpreg = gen_reg_rtx (SImode);
0945b39d 11610 rtx scratch = gen_reg_rtx (SImode);
e6e81735 11611 rtx cmp;
e075ae69
RH
11612
11613 align = 0;
11614 if (GET_CODE (align_rtx) == CONST_INT)
11615 align = INTVAL (align_rtx);
3f803cd9 11616
e9a25f70 11617 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
3f803cd9 11618
e9a25f70 11619 /* Is there a known alignment and is it less than 4? */
e075ae69 11620 if (align < 4)
3f803cd9 11621 {
0945b39d
JH
11622 rtx scratch1 = gen_reg_rtx (Pmode);
11623 emit_move_insn (scratch1, out);
e9a25f70 11624 /* Is there a known alignment and is it not 2? */
e075ae69 11625 if (align != 2)
3f803cd9 11626 {
e075ae69
RH
11627 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11628 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11629
11630 /* Leave just the 3 lower bits. */
0945b39d 11631 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
e075ae69
RH
11632 NULL_RTX, 0, OPTAB_WIDEN);
11633
9076b9c1 11634 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 11635 Pmode, 1, align_4_label);
60c81c89 11636 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
d43e0b7d 11637 Pmode, 1, align_2_label);
60c81c89 11638 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
d43e0b7d 11639 Pmode, 1, align_3_label);
3f803cd9
SC
11640 }
11641 else
11642 {
e9a25f70
JL
11643 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11644 check if is aligned to 4 - byte. */
e9a25f70 11645
60c81c89 11646 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
e075ae69
RH
11647 NULL_RTX, 0, OPTAB_WIDEN);
11648
9076b9c1 11649 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 11650 Pmode, 1, align_4_label);
3f803cd9
SC
11651 }
11652
4e44c1ef 11653 mem = change_address (src, QImode, out);
e9a25f70 11654
e075ae69 11655 /* Now compare the bytes. */
e9a25f70 11656
0f290768 11657 /* Compare the first n unaligned byte on a byte per byte basis. */
9076b9c1 11658 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
d43e0b7d 11659 QImode, 1, end_0_label);
3f803cd9 11660
0f290768 11661 /* Increment the address. */
0945b39d
JH
11662 if (TARGET_64BIT)
11663 emit_insn (gen_adddi3 (out, out, const1_rtx));
11664 else
11665 emit_insn (gen_addsi3 (out, out, const1_rtx));
e9a25f70 11666
e075ae69
RH
11667 /* Not needed with an alignment of 2 */
11668 if (align != 2)
11669 {
11670 emit_label (align_2_label);
3f803cd9 11671
d43e0b7d
RK
11672 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11673 end_0_label);
e075ae69 11674
0945b39d
JH
11675 if (TARGET_64BIT)
11676 emit_insn (gen_adddi3 (out, out, const1_rtx));
11677 else
11678 emit_insn (gen_addsi3 (out, out, const1_rtx));
e075ae69
RH
11679
11680 emit_label (align_3_label);
11681 }
11682
d43e0b7d
RK
11683 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11684 end_0_label);
e075ae69 11685
0945b39d
JH
11686 if (TARGET_64BIT)
11687 emit_insn (gen_adddi3 (out, out, const1_rtx));
11688 else
11689 emit_insn (gen_addsi3 (out, out, const1_rtx));
3f803cd9
SC
11690 }
11691
e075ae69
RH
11692 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11693 align this loop. It gives only huge programs, but does not help to
11694 speed up. */
11695 emit_label (align_4_label);
3f803cd9 11696
4e44c1ef 11697 mem = change_address (src, SImode, out);
e075ae69 11698 emit_move_insn (scratch, mem);
0945b39d
JH
11699 if (TARGET_64BIT)
11700 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11701 else
11702 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
e075ae69 11703
e2e52e1b
JH
11704 /* This formula yields a nonzero result iff one of the bytes is zero.
11705 This saves three branches inside loop and many cycles. */
11706
11707 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11708 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11709 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
7471a1f0 11710 emit_insn (gen_andsi3 (tmpreg, tmpreg,
d8bf17f9 11711 gen_int_mode (0x80808080, SImode)));
d43e0b7d
RK
11712 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11713 align_4_label);
e2e52e1b
JH
11714
11715 if (TARGET_CMOVE)
11716 {
11717 rtx reg = gen_reg_rtx (SImode);
0945b39d 11718 rtx reg2 = gen_reg_rtx (Pmode);
e2e52e1b
JH
11719 emit_move_insn (reg, tmpreg);
11720 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11721
0f290768 11722 /* If zero is not in the first two bytes, move two bytes forward. */
16189740 11723 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
11724 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11725 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11726 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11727 gen_rtx_IF_THEN_ELSE (SImode, tmp,
77ebd435
AJ
11728 reg,
11729 tmpreg)));
e2e52e1b 11730 /* Emit lea manually to avoid clobbering of flags. */
0945b39d 11731 emit_insn (gen_rtx_SET (SImode, reg2,
60c81c89 11732 gen_rtx_PLUS (Pmode, out, const2_rtx)));
e2e52e1b
JH
11733
11734 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11735 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11736 emit_insn (gen_rtx_SET (VOIDmode, out,
0945b39d 11737 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
a4f31c00
AJ
11738 reg2,
11739 out)));
e2e52e1b
JH
11740
11741 }
11742 else
11743 {
11744 rtx end_2_label = gen_label_rtx ();
11745 /* Is zero in the first two bytes? */
11746
16189740 11747 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
11748 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11749 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11750 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11751 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11752 pc_rtx);
11753 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11754 JUMP_LABEL (tmp) = end_2_label;
11755
0f290768 11756 /* Not in the first two. Move two bytes forward. */
e2e52e1b 11757 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
0945b39d 11758 if (TARGET_64BIT)
60c81c89 11759 emit_insn (gen_adddi3 (out, out, const2_rtx));
0945b39d 11760 else
60c81c89 11761 emit_insn (gen_addsi3 (out, out, const2_rtx));
e2e52e1b
JH
11762
11763 emit_label (end_2_label);
11764
11765 }
11766
0f290768 11767 /* Avoid branch in fixing the byte. */
e2e52e1b 11768 tmpreg = gen_lowpart (QImode, tmpreg);
7e08e190 11769 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
e6e81735 11770 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
0945b39d 11771 if (TARGET_64BIT)
e6e81735 11772 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
0945b39d 11773 else
e6e81735 11774 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
e075ae69
RH
11775
11776 emit_label (end_0_label);
11777}
0e07aff3
RH
11778
11779void
0f901c4c
SH
11780ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11781 rtx callarg2 ATTRIBUTE_UNUSED,
b96a374d 11782 rtx pop, int sibcall)
0e07aff3
RH
11783{
11784 rtx use = NULL, call;
11785
11786 if (pop == const0_rtx)
11787 pop = NULL;
11788 if (TARGET_64BIT && pop)
11789 abort ();
11790
b069de3b
SS
11791#if TARGET_MACHO
11792 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11793 fnaddr = machopic_indirect_call_target (fnaddr);
11794#else
0e07aff3
RH
11795 /* Static functions and indirect calls don't need the pic register. */
11796 if (! TARGET_64BIT && flag_pic
11797 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
12969f45 11798 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
66edd3b4 11799 use_reg (&use, pic_offset_table_rtx);
0e07aff3
RH
11800
11801 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11802 {
11803 rtx al = gen_rtx_REG (QImode, 0);
11804 emit_move_insn (al, callarg2);
11805 use_reg (&use, al);
11806 }
b069de3b 11807#endif /* TARGET_MACHO */
0e07aff3
RH
11808
11809 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11810 {
11811 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11812 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11813 }
4977bab6
ZW
11814 if (sibcall && TARGET_64BIT
11815 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11816 {
11817 rtx addr;
11818 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
b19ee4bd 11819 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
4977bab6
ZW
11820 emit_move_insn (fnaddr, addr);
11821 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11822 }
0e07aff3
RH
11823
11824 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11825 if (retval)
11826 call = gen_rtx_SET (VOIDmode, retval, call);
11827 if (pop)
11828 {
11829 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11830 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11831 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11832 }
11833
11834 call = emit_call_insn (call);
11835 if (use)
11836 CALL_INSN_FUNCTION_USAGE (call) = use;
11837}
fce5a9f2 11838
e075ae69 11839\f
e075ae69
RH
11840/* Clear stack slot assignments remembered from previous functions.
11841 This is called from INIT_EXPANDERS once before RTL is emitted for each
11842 function. */
11843
e2500fed 11844static struct machine_function *
b96a374d 11845ix86_init_machine_status (void)
37b15744 11846{
d7394366
JH
11847 struct machine_function *f;
11848
11849 f = ggc_alloc_cleared (sizeof (struct machine_function));
11850 f->use_fast_prologue_epilogue_nregs = -1;
8330e2c6
AJ
11851
11852 return f;
1526a060
BS
11853}
11854
e075ae69
RH
11855/* Return a MEM corresponding to a stack slot with mode MODE.
11856 Allocate a new slot if necessary.
11857
11858 The RTL for a function can have several slots available: N is
11859 which slot to use. */
11860
11861rtx
b96a374d 11862assign_386_stack_local (enum machine_mode mode, int n)
e075ae69 11863{
ddb0ae00
ZW
11864 struct stack_local_entry *s;
11865
e075ae69
RH
11866 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11867 abort ();
11868
ddb0ae00
ZW
11869 for (s = ix86_stack_locals; s; s = s->next)
11870 if (s->mode == mode && s->n == n)
11871 return s->rtl;
11872
11873 s = (struct stack_local_entry *)
11874 ggc_alloc (sizeof (struct stack_local_entry));
11875 s->n = n;
11876 s->mode = mode;
11877 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
e075ae69 11878
ddb0ae00
ZW
11879 s->next = ix86_stack_locals;
11880 ix86_stack_locals = s;
11881 return s->rtl;
e075ae69 11882}
f996902d
RH
11883
11884/* Construct the SYMBOL_REF for the tls_get_addr function. */
11885
e2500fed 11886static GTY(()) rtx ix86_tls_symbol;
f996902d 11887rtx
b96a374d 11888ix86_tls_get_addr (void)
f996902d 11889{
f996902d 11890
e2500fed 11891 if (!ix86_tls_symbol)
f996902d 11892 {
75d38379
JJ
11893 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11894 (TARGET_GNU_TLS && !TARGET_64BIT)
11895 ? "___tls_get_addr"
11896 : "__tls_get_addr");
f996902d
RH
11897 }
11898
e2500fed 11899 return ix86_tls_symbol;
f996902d 11900}
e075ae69
RH
11901\f
11902/* Calculate the length of the memory address in the instruction
11903 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11904
11905static int
b96a374d 11906memory_address_length (rtx addr)
e075ae69
RH
11907{
11908 struct ix86_address parts;
11909 rtx base, index, disp;
11910 int len;
11911
11912 if (GET_CODE (addr) == PRE_DEC
0d7d98ee
JH
11913 || GET_CODE (addr) == POST_INC
11914 || GET_CODE (addr) == PRE_MODIFY
11915 || GET_CODE (addr) == POST_MODIFY)
e075ae69 11916 return 0;
3f803cd9 11917
e075ae69
RH
11918 if (! ix86_decompose_address (addr, &parts))
11919 abort ();
3f803cd9 11920
e075ae69
RH
11921 base = parts.base;
11922 index = parts.index;
11923 disp = parts.disp;
11924 len = 0;
3f803cd9 11925
7b65ed54
EB
11926 /* Rule of thumb:
11927 - esp as the base always wants an index,
11928 - ebp as the base always wants a displacement. */
11929
e075ae69
RH
11930 /* Register Indirect. */
11931 if (base && !index && !disp)
11932 {
7b65ed54
EB
11933 /* esp (for its index) and ebp (for its displacement) need
11934 the two-byte modrm form. */
e075ae69
RH
11935 if (addr == stack_pointer_rtx
11936 || addr == arg_pointer_rtx
564d80f4
JH
11937 || addr == frame_pointer_rtx
11938 || addr == hard_frame_pointer_rtx)
e075ae69 11939 len = 1;
3f803cd9 11940 }
e9a25f70 11941
e075ae69
RH
11942 /* Direct Addressing. */
11943 else if (disp && !base && !index)
11944 len = 4;
11945
3f803cd9
SC
11946 else
11947 {
e075ae69
RH
11948 /* Find the length of the displacement constant. */
11949 if (disp)
11950 {
11951 if (GET_CODE (disp) == CONST_INT
9b73c90a
EB
11952 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11953 && base)
e075ae69
RH
11954 len = 1;
11955 else
11956 len = 4;
11957 }
7b65ed54
EB
11958 /* ebp always wants a displacement. */
11959 else if (base == hard_frame_pointer_rtx)
11960 len = 1;
3f803cd9 11961
43f3a59d 11962 /* An index requires the two-byte modrm form.... */
7b65ed54
EB
11963 if (index
11964 /* ...like esp, which always wants an index. */
11965 || base == stack_pointer_rtx
11966 || base == arg_pointer_rtx
11967 || base == frame_pointer_rtx)
e075ae69 11968 len += 1;
3f803cd9
SC
11969 }
11970
e075ae69
RH
11971 return len;
11972}
79325812 11973
5bf0ebab
RH
11974/* Compute default value for "length_immediate" attribute. When SHORTFORM
11975 is set, expect that insn have 8bit immediate alternative. */
e075ae69 11976int
b96a374d 11977ix86_attr_length_immediate_default (rtx insn, int shortform)
e075ae69 11978{
6ef67412
JH
11979 int len = 0;
11980 int i;
6c698a6d 11981 extract_insn_cached (insn);
6ef67412
JH
11982 for (i = recog_data.n_operands - 1; i >= 0; --i)
11983 if (CONSTANT_P (recog_data.operand[i]))
3071fab5 11984 {
6ef67412 11985 if (len)
3071fab5 11986 abort ();
6ef67412
JH
11987 if (shortform
11988 && GET_CODE (recog_data.operand[i]) == CONST_INT
11989 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11990 len = 1;
11991 else
11992 {
11993 switch (get_attr_mode (insn))
11994 {
11995 case MODE_QI:
11996 len+=1;
11997 break;
11998 case MODE_HI:
11999 len+=2;
12000 break;
12001 case MODE_SI:
12002 len+=4;
12003 break;
14f73b5a
JH
12004 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
12005 case MODE_DI:
12006 len+=4;
12007 break;
6ef67412 12008 default:
c725bd79 12009 fatal_insn ("unknown insn mode", insn);
6ef67412
JH
12010 }
12011 }
3071fab5 12012 }
6ef67412
JH
12013 return len;
12014}
12015/* Compute default value for "length_address" attribute. */
12016int
b96a374d 12017ix86_attr_length_address_default (rtx insn)
6ef67412
JH
12018{
12019 int i;
9b73c90a
EB
12020
12021 if (get_attr_type (insn) == TYPE_LEA)
12022 {
12023 rtx set = PATTERN (insn);
12024 if (GET_CODE (set) == SET)
12025 ;
12026 else if (GET_CODE (set) == PARALLEL
12027 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
12028 set = XVECEXP (set, 0, 0);
12029 else
12030 {
12031#ifdef ENABLE_CHECKING
12032 abort ();
12033#endif
12034 return 0;
12035 }
12036
12037 return memory_address_length (SET_SRC (set));
12038 }
12039
6c698a6d 12040 extract_insn_cached (insn);
1ccbefce
RH
12041 for (i = recog_data.n_operands - 1; i >= 0; --i)
12042 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 12043 {
6ef67412 12044 return memory_address_length (XEXP (recog_data.operand[i], 0));
e075ae69
RH
12045 break;
12046 }
6ef67412 12047 return 0;
3f803cd9 12048}
e075ae69
RH
12049\f
12050/* Return the maximum number of instructions a cpu can issue. */
b657fc39 12051
c237e94a 12052static int
b96a374d 12053ix86_issue_rate (void)
b657fc39 12054{
9e555526 12055 switch (ix86_tune)
b657fc39 12056 {
e075ae69
RH
12057 case PROCESSOR_PENTIUM:
12058 case PROCESSOR_K6:
12059 return 2;
79325812 12060
e075ae69 12061 case PROCESSOR_PENTIUMPRO:
b4e89e2d
JH
12062 case PROCESSOR_PENTIUM4:
12063 case PROCESSOR_ATHLON:
4977bab6 12064 case PROCESSOR_K8:
e075ae69 12065 return 3;
b657fc39 12066
b657fc39 12067 default:
e075ae69 12068 return 1;
b657fc39 12069 }
b657fc39
L
12070}
12071
e075ae69
RH
12072/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12073 by DEP_INSN and nothing set by DEP_INSN. */
b657fc39 12074
e075ae69 12075static int
b96a374d 12076ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
e075ae69
RH
12077{
12078 rtx set, set2;
b657fc39 12079
e075ae69
RH
12080 /* Simplify the test for uninteresting insns. */
12081 if (insn_type != TYPE_SETCC
12082 && insn_type != TYPE_ICMOV
12083 && insn_type != TYPE_FCMOV
12084 && insn_type != TYPE_IBR)
12085 return 0;
b657fc39 12086
e075ae69
RH
12087 if ((set = single_set (dep_insn)) != 0)
12088 {
12089 set = SET_DEST (set);
12090 set2 = NULL_RTX;
12091 }
12092 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
12093 && XVECLEN (PATTERN (dep_insn), 0) == 2
12094 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
12095 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
12096 {
12097 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12098 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12099 }
78a0d70c
ZW
12100 else
12101 return 0;
b657fc39 12102
78a0d70c
ZW
12103 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
12104 return 0;
b657fc39 12105
f5143c46 12106 /* This test is true if the dependent insn reads the flags but
78a0d70c
ZW
12107 not any other potentially set register. */
12108 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
12109 return 0;
12110
12111 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
12112 return 0;
12113
12114 return 1;
e075ae69 12115}
b657fc39 12116
e075ae69
RH
12117/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12118 address with operands set by DEP_INSN. */
12119
12120static int
b96a374d 12121ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
e075ae69
RH
12122{
12123 rtx addr;
12124
6ad48e84
JH
12125 if (insn_type == TYPE_LEA
12126 && TARGET_PENTIUM)
5fbdde42
RH
12127 {
12128 addr = PATTERN (insn);
12129 if (GET_CODE (addr) == SET)
12130 ;
12131 else if (GET_CODE (addr) == PARALLEL
12132 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
12133 addr = XVECEXP (addr, 0, 0);
12134 else
12135 abort ();
12136 addr = SET_SRC (addr);
12137 }
e075ae69
RH
12138 else
12139 {
12140 int i;
6c698a6d 12141 extract_insn_cached (insn);
1ccbefce
RH
12142 for (i = recog_data.n_operands - 1; i >= 0; --i)
12143 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 12144 {
1ccbefce 12145 addr = XEXP (recog_data.operand[i], 0);
e075ae69
RH
12146 goto found;
12147 }
12148 return 0;
12149 found:;
b657fc39
L
12150 }
12151
e075ae69 12152 return modified_in_p (addr, dep_insn);
b657fc39 12153}
a269a03c 12154
c237e94a 12155static int
b96a374d 12156ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
a269a03c 12157{
e075ae69 12158 enum attr_type insn_type, dep_insn_type;
6ad48e84 12159 enum attr_memory memory, dep_memory;
e075ae69 12160 rtx set, set2;
9b00189f 12161 int dep_insn_code_number;
a269a03c 12162
d1f87653 12163 /* Anti and output dependencies have zero cost on all CPUs. */
e075ae69 12164 if (REG_NOTE_KIND (link) != 0)
309ada50 12165 return 0;
a269a03c 12166
9b00189f
JH
12167 dep_insn_code_number = recog_memoized (dep_insn);
12168
e075ae69 12169 /* If we can't recognize the insns, we can't really do anything. */
9b00189f 12170 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
e075ae69 12171 return cost;
a269a03c 12172
1c71e60e
JH
12173 insn_type = get_attr_type (insn);
12174 dep_insn_type = get_attr_type (dep_insn);
9b00189f 12175
9e555526 12176 switch (ix86_tune)
a269a03c
JC
12177 {
12178 case PROCESSOR_PENTIUM:
e075ae69
RH
12179 /* Address Generation Interlock adds a cycle of latency. */
12180 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12181 cost += 1;
12182
12183 /* ??? Compares pair with jump/setcc. */
12184 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12185 cost = 0;
12186
d1f87653 12187 /* Floating point stores require value to be ready one cycle earlier. */
0f290768 12188 if (insn_type == TYPE_FMOV
e075ae69
RH
12189 && get_attr_memory (insn) == MEMORY_STORE
12190 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12191 cost += 1;
12192 break;
a269a03c 12193
e075ae69 12194 case PROCESSOR_PENTIUMPRO:
6ad48e84
JH
12195 memory = get_attr_memory (insn);
12196 dep_memory = get_attr_memory (dep_insn);
12197
0f290768 12198 /* Since we can't represent delayed latencies of load+operation,
e075ae69
RH
12199 increase the cost here for non-imov insns. */
12200 if (dep_insn_type != TYPE_IMOV
6ad48e84
JH
12201 && dep_insn_type != TYPE_FMOV
12202 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
e075ae69
RH
12203 cost += 1;
12204
12205 /* INT->FP conversion is expensive. */
12206 if (get_attr_fp_int_src (dep_insn))
12207 cost += 5;
12208
12209 /* There is one cycle extra latency between an FP op and a store. */
12210 if (insn_type == TYPE_FMOV
12211 && (set = single_set (dep_insn)) != NULL_RTX
12212 && (set2 = single_set (insn)) != NULL_RTX
12213 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12214 && GET_CODE (SET_DEST (set2)) == MEM)
12215 cost += 1;
6ad48e84
JH
12216
12217 /* Show ability of reorder buffer to hide latency of load by executing
12218 in parallel with previous instruction in case
12219 previous instruction is not needed to compute the address. */
12220 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12221 && !ix86_agi_dependant (insn, dep_insn, insn_type))
b96a374d 12222 {
6ad48e84
JH
12223 /* Claim moves to take one cycle, as core can issue one load
12224 at time and the next load can start cycle later. */
12225 if (dep_insn_type == TYPE_IMOV
12226 || dep_insn_type == TYPE_FMOV)
12227 cost = 1;
12228 else if (cost > 1)
12229 cost--;
12230 }
e075ae69 12231 break;
a269a03c 12232
e075ae69 12233 case PROCESSOR_K6:
6ad48e84
JH
12234 memory = get_attr_memory (insn);
12235 dep_memory = get_attr_memory (dep_insn);
e075ae69
RH
12236 /* The esp dependency is resolved before the instruction is really
12237 finished. */
12238 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12239 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12240 return 1;
a269a03c 12241
0f290768 12242 /* Since we can't represent delayed latencies of load+operation,
e075ae69 12243 increase the cost here for non-imov insns. */
6ad48e84 12244 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
e075ae69
RH
12245 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
12246
12247 /* INT->FP conversion is expensive. */
12248 if (get_attr_fp_int_src (dep_insn))
12249 cost += 5;
6ad48e84
JH
12250
12251 /* Show ability of reorder buffer to hide latency of load by executing
12252 in parallel with previous instruction in case
12253 previous instruction is not needed to compute the address. */
12254 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12255 && !ix86_agi_dependant (insn, dep_insn, insn_type))
b96a374d 12256 {
6ad48e84
JH
12257 /* Claim moves to take one cycle, as core can issue one load
12258 at time and the next load can start cycle later. */
12259 if (dep_insn_type == TYPE_IMOV
12260 || dep_insn_type == TYPE_FMOV)
12261 cost = 1;
12262 else if (cost > 2)
12263 cost -= 2;
12264 else
12265 cost = 1;
12266 }
a14003ee 12267 break;
e075ae69 12268
309ada50 12269 case PROCESSOR_ATHLON:
4977bab6 12270 case PROCESSOR_K8:
6ad48e84
JH
12271 memory = get_attr_memory (insn);
12272 dep_memory = get_attr_memory (dep_insn);
12273
6ad48e84
JH
12274 /* Show ability of reorder buffer to hide latency of load by executing
12275 in parallel with previous instruction in case
12276 previous instruction is not needed to compute the address. */
12277 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12278 && !ix86_agi_dependant (insn, dep_insn, insn_type))
b96a374d 12279 {
26f74aa3
JH
12280 enum attr_unit unit = get_attr_unit (insn);
12281 int loadcost = 3;
12282
12283 /* Because of the difference between the length of integer and
12284 floating unit pipeline preparation stages, the memory operands
b96a374d 12285 for floating point are cheaper.
26f74aa3 12286
c51e6d85 12287 ??? For Athlon it the difference is most probably 2. */
26f74aa3
JH
12288 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12289 loadcost = 3;
12290 else
12291 loadcost = TARGET_ATHLON ? 2 : 0;
12292
12293 if (cost >= loadcost)
12294 cost -= loadcost;
6ad48e84
JH
12295 else
12296 cost = 0;
12297 }
309ada50 12298
a269a03c 12299 default:
a269a03c
JC
12300 break;
12301 }
12302
12303 return cost;
12304}
0a726ef1 12305
e075ae69
RH
12306static union
12307{
12308 struct ppro_sched_data
12309 {
12310 rtx decode[3];
12311 int issued_this_cycle;
12312 } ppro;
12313} ix86_sched_data;
0a726ef1 12314
e075ae69 12315static enum attr_ppro_uops
b96a374d 12316ix86_safe_ppro_uops (rtx insn)
e075ae69
RH
12317{
12318 if (recog_memoized (insn) >= 0)
12319 return get_attr_ppro_uops (insn);
12320 else
12321 return PPRO_UOPS_MANY;
12322}
0a726ef1 12323
e075ae69 12324static void
b96a374d 12325ix86_dump_ppro_packet (FILE *dump)
0a726ef1 12326{
e075ae69 12327 if (ix86_sched_data.ppro.decode[0])
0a726ef1 12328 {
e075ae69
RH
12329 fprintf (dump, "PPRO packet: %d",
12330 INSN_UID (ix86_sched_data.ppro.decode[0]));
12331 if (ix86_sched_data.ppro.decode[1])
12332 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
12333 if (ix86_sched_data.ppro.decode[2])
12334 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
12335 fputc ('\n', dump);
12336 }
12337}
0a726ef1 12338
e075ae69 12339/* We're beginning a new block. Initialize data structures as necessary. */
0a726ef1 12340
c237e94a 12341static void
b96a374d
AJ
12342ix86_sched_init (FILE *dump ATTRIBUTE_UNUSED,
12343 int sched_verbose ATTRIBUTE_UNUSED,
12344 int veclen ATTRIBUTE_UNUSED)
e075ae69
RH
12345{
12346 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
12347}
12348
12349/* Shift INSN to SLOT, and shift everything else down. */
12350
12351static void
b96a374d 12352ix86_reorder_insn (rtx *insnp, rtx *slot)
e075ae69
RH
12353{
12354 if (insnp != slot)
12355 {
12356 rtx insn = *insnp;
0f290768 12357 do
e075ae69
RH
12358 insnp[0] = insnp[1];
12359 while (++insnp != slot);
12360 *insnp = insn;
0a726ef1 12361 }
e075ae69
RH
12362}
12363
c6991660 12364static void
b96a374d 12365ix86_sched_reorder_ppro (rtx *ready, rtx *e_ready)
78a0d70c
ZW
12366{
12367 rtx decode[3];
12368 enum attr_ppro_uops cur_uops;
12369 int issued_this_cycle;
12370 rtx *insnp;
12371 int i;
e075ae69 12372
0f290768 12373 /* At this point .ppro.decode contains the state of the three
78a0d70c 12374 decoders from last "cycle". That is, those insns that were
0f290768 12375 actually independent. But here we're scheduling for the
78a0d70c
ZW
12376 decoder, and we may find things that are decodable in the
12377 same cycle. */
e075ae69 12378
0f290768 12379 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
78a0d70c 12380 issued_this_cycle = 0;
e075ae69 12381
78a0d70c
ZW
12382 insnp = e_ready;
12383 cur_uops = ix86_safe_ppro_uops (*insnp);
0a726ef1 12384
78a0d70c
ZW
12385 /* If the decoders are empty, and we've a complex insn at the
12386 head of the priority queue, let it issue without complaint. */
12387 if (decode[0] == NULL)
12388 {
12389 if (cur_uops == PPRO_UOPS_MANY)
12390 {
12391 decode[0] = *insnp;
12392 goto ppro_done;
12393 }
12394
12395 /* Otherwise, search for a 2-4 uop unsn to issue. */
12396 while (cur_uops != PPRO_UOPS_FEW)
12397 {
12398 if (insnp == ready)
12399 break;
12400 cur_uops = ix86_safe_ppro_uops (*--insnp);
12401 }
12402
12403 /* If so, move it to the head of the line. */
12404 if (cur_uops == PPRO_UOPS_FEW)
12405 ix86_reorder_insn (insnp, e_ready);
0a726ef1 12406
78a0d70c
ZW
12407 /* Issue the head of the queue. */
12408 issued_this_cycle = 1;
12409 decode[0] = *e_ready--;
12410 }
fb693d44 12411
78a0d70c
ZW
12412 /* Look for simple insns to fill in the other two slots. */
12413 for (i = 1; i < 3; ++i)
12414 if (decode[i] == NULL)
12415 {
a151daf0 12416 if (ready > e_ready)
78a0d70c 12417 goto ppro_done;
fb693d44 12418
e075ae69
RH
12419 insnp = e_ready;
12420 cur_uops = ix86_safe_ppro_uops (*insnp);
78a0d70c
ZW
12421 while (cur_uops != PPRO_UOPS_ONE)
12422 {
12423 if (insnp == ready)
12424 break;
12425 cur_uops = ix86_safe_ppro_uops (*--insnp);
12426 }
fb693d44 12427
78a0d70c
ZW
12428 /* Found one. Move it to the head of the queue and issue it. */
12429 if (cur_uops == PPRO_UOPS_ONE)
e075ae69 12430 {
78a0d70c
ZW
12431 ix86_reorder_insn (insnp, e_ready);
12432 decode[i] = *e_ready--;
12433 issued_this_cycle++;
12434 continue;
12435 }
fb693d44 12436
78a0d70c
ZW
12437 /* ??? Didn't find one. Ideally, here we would do a lazy split
12438 of 2-uop insns, issue one and queue the other. */
12439 }
fb693d44 12440
78a0d70c
ZW
12441 ppro_done:
12442 if (issued_this_cycle == 0)
12443 issued_this_cycle = 1;
12444 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
12445}
fb693d44 12446
0f290768 12447/* We are about to being issuing insns for this clock cycle.
78a0d70c 12448 Override the default sort algorithm to better slot instructions. */
c237e94a 12449static int
b96a374d
AJ
12450ix86_sched_reorder (FILE *dump ATTRIBUTE_UNUSED,
12451 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
12452 int *n_readyp, int clock_var ATTRIBUTE_UNUSED)
78a0d70c 12453{
c237e94a 12454 int n_ready = *n_readyp;
78a0d70c 12455 rtx *e_ready = ready + n_ready - 1;
fb693d44 12456
fce5a9f2 12457 /* Make sure to go ahead and initialize key items in
a151daf0
JL
12458 ix86_sched_data if we are not going to bother trying to
12459 reorder the ready queue. */
78a0d70c 12460 if (n_ready < 2)
a151daf0
JL
12461 {
12462 ix86_sched_data.ppro.issued_this_cycle = 1;
12463 goto out;
12464 }
e075ae69 12465
9e555526 12466 switch (ix86_tune)
78a0d70c
ZW
12467 {
12468 default:
12469 break;
e075ae69 12470
78a0d70c
ZW
12471 case PROCESSOR_PENTIUMPRO:
12472 ix86_sched_reorder_ppro (ready, e_ready);
e075ae69 12473 break;
fb693d44
RH
12474 }
12475
e075ae69
RH
12476out:
12477 return ix86_issue_rate ();
12478}
fb693d44 12479
e075ae69
RH
12480/* We are about to issue INSN. Return the number of insns left on the
12481 ready queue that can be issued this cycle. */
b222082e 12482
c237e94a 12483static int
b96a374d
AJ
12484ix86_variable_issue (FILE *dump, int sched_verbose, rtx insn,
12485 int can_issue_more)
e075ae69
RH
12486{
12487 int i;
9e555526 12488 switch (ix86_tune)
fb693d44 12489 {
e075ae69
RH
12490 default:
12491 return can_issue_more - 1;
fb693d44 12492
e075ae69
RH
12493 case PROCESSOR_PENTIUMPRO:
12494 {
12495 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
fb693d44 12496
e075ae69
RH
12497 if (uops == PPRO_UOPS_MANY)
12498 {
12499 if (sched_verbose)
12500 ix86_dump_ppro_packet (dump);
12501 ix86_sched_data.ppro.decode[0] = insn;
12502 ix86_sched_data.ppro.decode[1] = NULL;
12503 ix86_sched_data.ppro.decode[2] = NULL;
12504 if (sched_verbose)
12505 ix86_dump_ppro_packet (dump);
12506 ix86_sched_data.ppro.decode[0] = NULL;
12507 }
12508 else if (uops == PPRO_UOPS_FEW)
12509 {
12510 if (sched_verbose)
12511 ix86_dump_ppro_packet (dump);
12512 ix86_sched_data.ppro.decode[0] = insn;
12513 ix86_sched_data.ppro.decode[1] = NULL;
12514 ix86_sched_data.ppro.decode[2] = NULL;
12515 }
12516 else
12517 {
12518 for (i = 0; i < 3; ++i)
12519 if (ix86_sched_data.ppro.decode[i] == NULL)
12520 {
12521 ix86_sched_data.ppro.decode[i] = insn;
12522 break;
12523 }
12524 if (i == 3)
12525 abort ();
12526 if (i == 2)
12527 {
12528 if (sched_verbose)
12529 ix86_dump_ppro_packet (dump);
12530 ix86_sched_data.ppro.decode[0] = NULL;
12531 ix86_sched_data.ppro.decode[1] = NULL;
12532 ix86_sched_data.ppro.decode[2] = NULL;
12533 }
12534 }
12535 }
12536 return --ix86_sched_data.ppro.issued_this_cycle;
12537 }
fb693d44 12538}
9b690711
RH
12539
12540static int
b96a374d 12541ia32_use_dfa_pipeline_interface (void)
9b690711 12542{
4977bab6 12543 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
9b690711
RH
12544 return 1;
12545 return 0;
12546}
12547
12548/* How many alternative schedules to try. This should be as wide as the
12549 scheduling freedom in the DFA, but no wider. Making this value too
12550 large results extra work for the scheduler. */
12551
12552static int
b96a374d 12553ia32_multipass_dfa_lookahead (void)
9b690711 12554{
9e555526 12555 if (ix86_tune == PROCESSOR_PENTIUM)
9b690711
RH
12556 return 2;
12557 else
12558 return 0;
12559}
12560
0e4970d7 12561\f
a7180f70
BS
12562/* Compute the alignment given to a constant that is being placed in memory.
12563 EXP is the constant and ALIGN is the alignment that the object would
12564 ordinarily have.
12565 The value of this function is used instead of that alignment to align
12566 the object. */
12567
12568int
b96a374d 12569ix86_constant_alignment (tree exp, int align)
a7180f70
BS
12570{
12571 if (TREE_CODE (exp) == REAL_CST)
12572 {
12573 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12574 return 64;
12575 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12576 return 128;
12577 }
4137ba7a
JJ
12578 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
12579 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
12580 return BITS_PER_WORD;
a7180f70
BS
12581
12582 return align;
12583}
12584
12585/* Compute the alignment for a static variable.
12586 TYPE is the data type, and ALIGN is the alignment that
12587 the object would ordinarily have. The value of this function is used
12588 instead of that alignment to align the object. */
12589
12590int
b96a374d 12591ix86_data_alignment (tree type, int align)
a7180f70
BS
12592{
12593 if (AGGREGATE_TYPE_P (type)
12594 && TYPE_SIZE (type)
12595 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12596 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12597 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12598 return 256;
12599
0d7d98ee
JH
12600 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12601 to 16byte boundary. */
12602 if (TARGET_64BIT)
12603 {
12604 if (AGGREGATE_TYPE_P (type)
12605 && TYPE_SIZE (type)
12606 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12607 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12608 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12609 return 128;
12610 }
12611
a7180f70
BS
12612 if (TREE_CODE (type) == ARRAY_TYPE)
12613 {
12614 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12615 return 64;
12616 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12617 return 128;
12618 }
12619 else if (TREE_CODE (type) == COMPLEX_TYPE)
12620 {
0f290768 12621
a7180f70
BS
12622 if (TYPE_MODE (type) == DCmode && align < 64)
12623 return 64;
12624 if (TYPE_MODE (type) == XCmode && align < 128)
12625 return 128;
12626 }
12627 else if ((TREE_CODE (type) == RECORD_TYPE
12628 || TREE_CODE (type) == UNION_TYPE
12629 || TREE_CODE (type) == QUAL_UNION_TYPE)
12630 && TYPE_FIELDS (type))
12631 {
12632 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12633 return 64;
12634 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12635 return 128;
12636 }
12637 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12638 || TREE_CODE (type) == INTEGER_TYPE)
12639 {
12640 if (TYPE_MODE (type) == DFmode && align < 64)
12641 return 64;
12642 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12643 return 128;
12644 }
12645
12646 return align;
12647}
12648
12649/* Compute the alignment for a local variable.
12650 TYPE is the data type, and ALIGN is the alignment that
12651 the object would ordinarily have. The value of this macro is used
12652 instead of that alignment to align the object. */
12653
12654int
b96a374d 12655ix86_local_alignment (tree type, int align)
a7180f70 12656{
0d7d98ee
JH
12657 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12658 to 16byte boundary. */
12659 if (TARGET_64BIT)
12660 {
12661 if (AGGREGATE_TYPE_P (type)
12662 && TYPE_SIZE (type)
12663 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12664 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12665 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12666 return 128;
12667 }
a7180f70
BS
12668 if (TREE_CODE (type) == ARRAY_TYPE)
12669 {
12670 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12671 return 64;
12672 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12673 return 128;
12674 }
12675 else if (TREE_CODE (type) == COMPLEX_TYPE)
12676 {
12677 if (TYPE_MODE (type) == DCmode && align < 64)
12678 return 64;
12679 if (TYPE_MODE (type) == XCmode && align < 128)
12680 return 128;
12681 }
12682 else if ((TREE_CODE (type) == RECORD_TYPE
12683 || TREE_CODE (type) == UNION_TYPE
12684 || TREE_CODE (type) == QUAL_UNION_TYPE)
12685 && TYPE_FIELDS (type))
12686 {
12687 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12688 return 64;
12689 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12690 return 128;
12691 }
12692 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12693 || TREE_CODE (type) == INTEGER_TYPE)
12694 {
0f290768 12695
a7180f70
BS
12696 if (TYPE_MODE (type) == DFmode && align < 64)
12697 return 64;
12698 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12699 return 128;
12700 }
12701 return align;
12702}
0ed08620
JH
12703\f
12704/* Emit RTL insns to initialize the variable parts of a trampoline.
12705 FNADDR is an RTX for the address of the function's pure code.
12706 CXT is an RTX for the static chain value for the function. */
12707void
b96a374d 12708x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
0ed08620
JH
12709{
12710 if (!TARGET_64BIT)
12711 {
12712 /* Compute offset from the end of the jmp to the target function. */
12713 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12714 plus_constant (tramp, 10),
12715 NULL_RTX, 1, OPTAB_DIRECT);
12716 emit_move_insn (gen_rtx_MEM (QImode, tramp),
d8bf17f9 12717 gen_int_mode (0xb9, QImode));
0ed08620
JH
12718 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12719 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
d8bf17f9 12720 gen_int_mode (0xe9, QImode));
0ed08620
JH
12721 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12722 }
12723 else
12724 {
12725 int offset = 0;
12726 /* Try to load address using shorter movl instead of movabs.
12727 We may want to support movq for kernel mode, but kernel does not use
12728 trampolines at the moment. */
12729 if (x86_64_zero_extended_value (fnaddr))
12730 {
12731 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12732 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12733 gen_int_mode (0xbb41, HImode));
0ed08620
JH
12734 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12735 gen_lowpart (SImode, fnaddr));
12736 offset += 6;
12737 }
12738 else
12739 {
12740 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12741 gen_int_mode (0xbb49, HImode));
0ed08620
JH
12742 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12743 fnaddr);
12744 offset += 10;
12745 }
12746 /* Load static chain using movabs to r10. */
12747 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12748 gen_int_mode (0xba49, HImode));
0ed08620
JH
12749 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12750 cxt);
12751 offset += 10;
12752 /* Jump to the r11 */
12753 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12754 gen_int_mode (0xff49, HImode));
0ed08620 12755 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
d8bf17f9 12756 gen_int_mode (0xe3, QImode));
0ed08620
JH
12757 offset += 3;
12758 if (offset > TRAMPOLINE_SIZE)
b531087a 12759 abort ();
0ed08620 12760 }
5791cc29
JT
12761
12762#ifdef TRANSFER_FROM_TRAMPOLINE
f84d109f 12763 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
5791cc29
JT
12764 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12765#endif
0ed08620 12766}
eeb06b1b 12767\f
6a2dd09a
RS
12768#define def_builtin(MASK, NAME, TYPE, CODE) \
12769do { \
453ee231
JH
12770 if ((MASK) & target_flags \
12771 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
6a2dd09a
RS
12772 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12773 NULL, NULL_TREE); \
eeb06b1b 12774} while (0)
bd793c65 12775
bd793c65
BS
12776struct builtin_description
12777{
8b60264b
KG
12778 const unsigned int mask;
12779 const enum insn_code icode;
12780 const char *const name;
12781 const enum ix86_builtins code;
12782 const enum rtx_code comparison;
12783 const unsigned int flag;
bd793c65
BS
12784};
12785
8b60264b 12786static const struct builtin_description bdesc_comi[] =
bd793c65 12787{
37f22004
L
12788 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12789 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12790 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12791 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12792 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12793 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12794 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12795 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12796 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12797 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12798 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12799 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
1194ca05
JH
12800 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12801 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12802 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12803 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12804 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12805 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12806 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12807 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12808 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12809 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12810 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12811 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
bd793c65
BS
12812};
12813
8b60264b 12814static const struct builtin_description bdesc_2arg[] =
bd793c65
BS
12815{
12816 /* SSE */
37f22004
L
12817 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12818 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12819 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12820 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12821 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12822 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12823 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12824 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12825
12826 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12827 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12828 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12829 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12830 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12831 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12832 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12833 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12834 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12835 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12836 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12837 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12838 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12839 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12840 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12841 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12842 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12843 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12844 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12845 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12846
12847 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12848 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12849 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12850 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12851
12852 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12853 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12854 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12855 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12856
12857 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12858 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12859 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12860 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12861 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
bd793c65
BS
12862
12863 /* MMX */
eeb06b1b
BS
12864 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12865 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12866 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
d50672ef 12867 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
eeb06b1b
BS
12868 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12869 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12870 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
d50672ef 12871 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
eeb06b1b
BS
12872
12873 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12874 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12875 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12876 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12877 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12878 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12879 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12880 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12881
12882 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12883 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
37f22004 12884 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
eeb06b1b
BS
12885
12886 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12887 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12888 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12889 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12890
37f22004
L
12891 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12892 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
eeb06b1b
BS
12893
12894 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12895 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12896 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12897 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12898 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12899 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12900
37f22004
L
12901 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12902 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12903 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12904 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
eeb06b1b
BS
12905
12906 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12907 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12908 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12909 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12910 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12911 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
bd793c65
BS
12912
12913 /* Special. */
eeb06b1b
BS
12914 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12915 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12916 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12917
37f22004
L
12918 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12919 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12920 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
eeb06b1b
BS
12921
12922 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12923 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12924 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12925 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12926 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12927 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12928
12929 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12930 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12931 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12932 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12933 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12934 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12935
12936 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12937 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12938 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12939 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12940
37f22004 12941 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
fbe5eb6d
BS
12942 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12943
12944 /* SSE2 */
12945 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12946 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12947 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12948 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12949 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12950 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12951 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12952 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12953
12954 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12955 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12956 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12957 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12958 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12959 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12960 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12961 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12962 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12963 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12964 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12965 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12966 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12967 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12968 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
fbe5eb6d
BS
12969 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12970 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12971 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12972 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
fbe5eb6d
BS
12973 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12974
12975 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12976 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12977 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12978 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12979
1877be45
JH
12980 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12981 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12982 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12983 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
fbe5eb6d
BS
12984
12985 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12986 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12987 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12988
12989 /* SSE2 MMX */
12990 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12991 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12992 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
d50672ef 12993 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
fbe5eb6d
BS
12994 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12995 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12996 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
d50672ef 12997 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
fbe5eb6d
BS
12998
12999 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
13000 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
13001 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
13002 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
13003 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
13004 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
13005 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
13006 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
13007
13008 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
13009 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
13010 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
13011 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
13012
916b60b7
BS
13013 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
13014 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
13015 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
13016 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
fbe5eb6d
BS
13017
13018 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
13019 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
13020
13021 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
13022 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
13023 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
13024 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
13025 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
13026 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
13027
13028 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
13029 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
13030 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
13031 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
13032
13033 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
13034 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
13035 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
077084dd 13036 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
fbe5eb6d
BS
13037 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
13038 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
13039 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
f02e1358 13040 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
fbe5eb6d 13041
916b60b7
BS
13042 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
13043 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
13044 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
13045
13046 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
13047 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
13048
13049 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
13050 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
13051 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
13052 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
13053 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
13054 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
13055
13056 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
13057 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
13058 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
13059 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
13060 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
13061 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
13062
13063 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
13064 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
13065 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
13066 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
13067
13068 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
13069
fbe5eb6d 13070 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
37f22004 13071 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
fbe5eb6d 13072 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
22c7c85e
L
13073 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
13074
9e200aaf
KC
13075 /* SSE3 MMX */
13076 { MASK_SSE3, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
13077 { MASK_SSE3, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
13078 { MASK_SSE3, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
13079 { MASK_SSE3, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
13080 { MASK_SSE3, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
13081 { MASK_SSE3, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
bd793c65
BS
13082};
13083
8b60264b 13084static const struct builtin_description bdesc_1arg[] =
bd793c65 13085{
37f22004
L
13086 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
13087 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
fbe5eb6d 13088
37f22004
L
13089 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
13090 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
13091 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
fbe5eb6d 13092
37f22004
L
13093 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
13094 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
13095 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
13096 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
13097 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
13098 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
fbe5eb6d
BS
13099
13100 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
13101 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
13102 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
f02e1358 13103 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
fbe5eb6d
BS
13104
13105 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
13106
13107 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
13108 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
bd793c65 13109
fbe5eb6d
BS
13110 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
13111 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
13112 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
13113 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
13114 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
bd793c65 13115
fbe5eb6d 13116 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
bd793c65 13117
fbe5eb6d
BS
13118 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
13119 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
37f22004
L
13120 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
13121 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
fbe5eb6d
BS
13122
13123 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
13124 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
f02e1358
JH
13125 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
13126
22c7c85e
L
13127 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
13128
9e200aaf
KC
13129 /* SSE3 */
13130 { MASK_SSE3, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
13131 { MASK_SSE3, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
13132 { MASK_SSE3, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
bd793c65
BS
13133};
13134
f6155fda 13135void
b96a374d 13136ix86_init_builtins (void)
f6155fda
SS
13137{
13138 if (TARGET_MMX)
13139 ix86_init_mmx_sse_builtins ();
13140}
13141
13142/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
bd793c65
BS
13143 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13144 builtins. */
e37af218 13145static void
b96a374d 13146ix86_init_mmx_sse_builtins (void)
bd793c65 13147{
8b60264b 13148 const struct builtin_description * d;
77ebd435 13149 size_t i;
bd793c65
BS
13150
13151 tree pchar_type_node = build_pointer_type (char_type_node);
068f5dea
JH
13152 tree pcchar_type_node = build_pointer_type (
13153 build_type_variant (char_type_node, 1, 0));
bd793c65 13154 tree pfloat_type_node = build_pointer_type (float_type_node);
068f5dea
JH
13155 tree pcfloat_type_node = build_pointer_type (
13156 build_type_variant (float_type_node, 1, 0));
bd793c65 13157 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
916b60b7 13158 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
bd793c65
BS
13159 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13160
13161 /* Comparisons. */
13162 tree int_ftype_v4sf_v4sf
b4de2f7d
AH
13163 = build_function_type_list (integer_type_node,
13164 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13165 tree v4si_ftype_v4sf_v4sf
b4de2f7d
AH
13166 = build_function_type_list (V4SI_type_node,
13167 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13168 /* MMX/SSE/integer conversions. */
bd793c65 13169 tree int_ftype_v4sf
b4de2f7d
AH
13170 = build_function_type_list (integer_type_node,
13171 V4SF_type_node, NULL_TREE);
453ee231
JH
13172 tree int64_ftype_v4sf
13173 = build_function_type_list (long_long_integer_type_node,
13174 V4SF_type_node, NULL_TREE);
bd793c65 13175 tree int_ftype_v8qi
b4de2f7d 13176 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
bd793c65 13177 tree v4sf_ftype_v4sf_int
b4de2f7d
AH
13178 = build_function_type_list (V4SF_type_node,
13179 V4SF_type_node, integer_type_node, NULL_TREE);
453ee231
JH
13180 tree v4sf_ftype_v4sf_int64
13181 = build_function_type_list (V4SF_type_node,
13182 V4SF_type_node, long_long_integer_type_node,
13183 NULL_TREE);
bd793c65 13184 tree v4sf_ftype_v4sf_v2si
b4de2f7d
AH
13185 = build_function_type_list (V4SF_type_node,
13186 V4SF_type_node, V2SI_type_node, NULL_TREE);
bd793c65 13187 tree int_ftype_v4hi_int
b4de2f7d
AH
13188 = build_function_type_list (integer_type_node,
13189 V4HI_type_node, integer_type_node, NULL_TREE);
bd793c65 13190 tree v4hi_ftype_v4hi_int_int
e7a60f56 13191 = build_function_type_list (V4HI_type_node, V4HI_type_node,
b4de2f7d
AH
13192 integer_type_node, integer_type_node,
13193 NULL_TREE);
bd793c65
BS
13194 /* Miscellaneous. */
13195 tree v8qi_ftype_v4hi_v4hi
b4de2f7d
AH
13196 = build_function_type_list (V8QI_type_node,
13197 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 13198 tree v4hi_ftype_v2si_v2si
b4de2f7d
AH
13199 = build_function_type_list (V4HI_type_node,
13200 V2SI_type_node, V2SI_type_node, NULL_TREE);
bd793c65 13201 tree v4sf_ftype_v4sf_v4sf_int
b4de2f7d
AH
13202 = build_function_type_list (V4SF_type_node,
13203 V4SF_type_node, V4SF_type_node,
13204 integer_type_node, NULL_TREE);
bd793c65 13205 tree v2si_ftype_v4hi_v4hi
b4de2f7d
AH
13206 = build_function_type_list (V2SI_type_node,
13207 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 13208 tree v4hi_ftype_v4hi_int
b4de2f7d 13209 = build_function_type_list (V4HI_type_node,
e7a60f56 13210 V4HI_type_node, integer_type_node, NULL_TREE);
bd793c65 13211 tree v4hi_ftype_v4hi_di
b4de2f7d
AH
13212 = build_function_type_list (V4HI_type_node,
13213 V4HI_type_node, long_long_unsigned_type_node,
13214 NULL_TREE);
bd793c65 13215 tree v2si_ftype_v2si_di
b4de2f7d
AH
13216 = build_function_type_list (V2SI_type_node,
13217 V2SI_type_node, long_long_unsigned_type_node,
13218 NULL_TREE);
bd793c65 13219 tree void_ftype_void
b4de2f7d 13220 = build_function_type (void_type_node, void_list_node);
bd793c65 13221 tree void_ftype_unsigned
b4de2f7d 13222 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
22c7c85e
L
13223 tree void_ftype_unsigned_unsigned
13224 = build_function_type_list (void_type_node, unsigned_type_node,
13225 unsigned_type_node, NULL_TREE);
13226 tree void_ftype_pcvoid_unsigned_unsigned
13227 = build_function_type_list (void_type_node, const_ptr_type_node,
13228 unsigned_type_node, unsigned_type_node,
13229 NULL_TREE);
bd793c65 13230 tree unsigned_ftype_void
b4de2f7d 13231 = build_function_type (unsigned_type_node, void_list_node);
bd793c65 13232 tree di_ftype_void
b4de2f7d 13233 = build_function_type (long_long_unsigned_type_node, void_list_node);
e37af218 13234 tree v4sf_ftype_void
b4de2f7d 13235 = build_function_type (V4SF_type_node, void_list_node);
bd793c65 13236 tree v2si_ftype_v4sf
b4de2f7d 13237 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13238 /* Loads/stores. */
bd793c65 13239 tree void_ftype_v8qi_v8qi_pchar
b4de2f7d
AH
13240 = build_function_type_list (void_type_node,
13241 V8QI_type_node, V8QI_type_node,
13242 pchar_type_node, NULL_TREE);
068f5dea
JH
13243 tree v4sf_ftype_pcfloat
13244 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
bd793c65
BS
13245 /* @@@ the type is bogus */
13246 tree v4sf_ftype_v4sf_pv2si
b4de2f7d 13247 = build_function_type_list (V4SF_type_node,
f8ca7923 13248 V4SF_type_node, pv2si_type_node, NULL_TREE);
1255c85c 13249 tree void_ftype_pv2si_v4sf
b4de2f7d 13250 = build_function_type_list (void_type_node,
f8ca7923 13251 pv2si_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13252 tree void_ftype_pfloat_v4sf
b4de2f7d
AH
13253 = build_function_type_list (void_type_node,
13254 pfloat_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13255 tree void_ftype_pdi_di
b4de2f7d
AH
13256 = build_function_type_list (void_type_node,
13257 pdi_type_node, long_long_unsigned_type_node,
13258 NULL_TREE);
916b60b7 13259 tree void_ftype_pv2di_v2di
b4de2f7d
AH
13260 = build_function_type_list (void_type_node,
13261 pv2di_type_node, V2DI_type_node, NULL_TREE);
bd793c65
BS
13262 /* Normal vector unops. */
13263 tree v4sf_ftype_v4sf
b4de2f7d 13264 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
0f290768 13265
bd793c65
BS
13266 /* Normal vector binops. */
13267 tree v4sf_ftype_v4sf_v4sf
b4de2f7d
AH
13268 = build_function_type_list (V4SF_type_node,
13269 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13270 tree v8qi_ftype_v8qi_v8qi
b4de2f7d
AH
13271 = build_function_type_list (V8QI_type_node,
13272 V8QI_type_node, V8QI_type_node, NULL_TREE);
bd793c65 13273 tree v4hi_ftype_v4hi_v4hi
b4de2f7d
AH
13274 = build_function_type_list (V4HI_type_node,
13275 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 13276 tree v2si_ftype_v2si_v2si
b4de2f7d
AH
13277 = build_function_type_list (V2SI_type_node,
13278 V2SI_type_node, V2SI_type_node, NULL_TREE);
bd793c65 13279 tree di_ftype_di_di
b4de2f7d
AH
13280 = build_function_type_list (long_long_unsigned_type_node,
13281 long_long_unsigned_type_node,
13282 long_long_unsigned_type_node, NULL_TREE);
bd793c65 13283
47f339cf 13284 tree v2si_ftype_v2sf
ae3aa00d 13285 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
47f339cf 13286 tree v2sf_ftype_v2si
b4de2f7d 13287 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
47f339cf 13288 tree v2si_ftype_v2si
b4de2f7d 13289 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
47f339cf 13290 tree v2sf_ftype_v2sf
b4de2f7d 13291 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 13292 tree v2sf_ftype_v2sf_v2sf
b4de2f7d
AH
13293 = build_function_type_list (V2SF_type_node,
13294 V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 13295 tree v2si_ftype_v2sf_v2sf
b4de2f7d
AH
13296 = build_function_type_list (V2SI_type_node,
13297 V2SF_type_node, V2SF_type_node, NULL_TREE);
fbe5eb6d 13298 tree pint_type_node = build_pointer_type (integer_type_node);
068f5dea
JH
13299 tree pcint_type_node = build_pointer_type (
13300 build_type_variant (integer_type_node, 1, 0));
fbe5eb6d 13301 tree pdouble_type_node = build_pointer_type (double_type_node);
068f5dea
JH
13302 tree pcdouble_type_node = build_pointer_type (
13303 build_type_variant (double_type_node, 1, 0));
fbe5eb6d 13304 tree int_ftype_v2df_v2df
b4de2f7d
AH
13305 = build_function_type_list (integer_type_node,
13306 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d
BS
13307
13308 tree ti_ftype_void
b4de2f7d 13309 = build_function_type (intTI_type_node, void_list_node);
f02e1358
JH
13310 tree v2di_ftype_void
13311 = build_function_type (V2DI_type_node, void_list_node);
fbe5eb6d 13312 tree ti_ftype_ti_ti
b4de2f7d
AH
13313 = build_function_type_list (intTI_type_node,
13314 intTI_type_node, intTI_type_node, NULL_TREE);
068f5dea
JH
13315 tree void_ftype_pcvoid
13316 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
fbe5eb6d 13317 tree v2di_ftype_di
b4de2f7d
AH
13318 = build_function_type_list (V2DI_type_node,
13319 long_long_unsigned_type_node, NULL_TREE);
f02e1358
JH
13320 tree di_ftype_v2di
13321 = build_function_type_list (long_long_unsigned_type_node,
13322 V2DI_type_node, NULL_TREE);
fbe5eb6d 13323 tree v4sf_ftype_v4si
b4de2f7d 13324 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 13325 tree v4si_ftype_v4sf
b4de2f7d 13326 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 13327 tree v2df_ftype_v4si
b4de2f7d 13328 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 13329 tree v4si_ftype_v2df
b4de2f7d 13330 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13331 tree v2si_ftype_v2df
b4de2f7d 13332 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13333 tree v4sf_ftype_v2df
b4de2f7d 13334 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13335 tree v2df_ftype_v2si
b4de2f7d 13336 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
fbe5eb6d 13337 tree v2df_ftype_v4sf
b4de2f7d 13338 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 13339 tree int_ftype_v2df
b4de2f7d 13340 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
453ee231
JH
13341 tree int64_ftype_v2df
13342 = build_function_type_list (long_long_integer_type_node,
b96a374d 13343 V2DF_type_node, NULL_TREE);
fbe5eb6d 13344 tree v2df_ftype_v2df_int
b4de2f7d
AH
13345 = build_function_type_list (V2DF_type_node,
13346 V2DF_type_node, integer_type_node, NULL_TREE);
453ee231
JH
13347 tree v2df_ftype_v2df_int64
13348 = build_function_type_list (V2DF_type_node,
13349 V2DF_type_node, long_long_integer_type_node,
13350 NULL_TREE);
fbe5eb6d 13351 tree v4sf_ftype_v4sf_v2df
b4de2f7d
AH
13352 = build_function_type_list (V4SF_type_node,
13353 V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13354 tree v2df_ftype_v2df_v4sf
b4de2f7d
AH
13355 = build_function_type_list (V2DF_type_node,
13356 V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 13357 tree v2df_ftype_v2df_v2df_int
b4de2f7d
AH
13358 = build_function_type_list (V2DF_type_node,
13359 V2DF_type_node, V2DF_type_node,
13360 integer_type_node,
13361 NULL_TREE);
fbe5eb6d 13362 tree v2df_ftype_v2df_pv2si
b4de2f7d
AH
13363 = build_function_type_list (V2DF_type_node,
13364 V2DF_type_node, pv2si_type_node, NULL_TREE);
fbe5eb6d 13365 tree void_ftype_pv2si_v2df
b4de2f7d
AH
13366 = build_function_type_list (void_type_node,
13367 pv2si_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13368 tree void_ftype_pdouble_v2df
b4de2f7d
AH
13369 = build_function_type_list (void_type_node,
13370 pdouble_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13371 tree void_ftype_pint_int
b4de2f7d
AH
13372 = build_function_type_list (void_type_node,
13373 pint_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 13374 tree void_ftype_v16qi_v16qi_pchar
b4de2f7d
AH
13375 = build_function_type_list (void_type_node,
13376 V16QI_type_node, V16QI_type_node,
13377 pchar_type_node, NULL_TREE);
068f5dea
JH
13378 tree v2df_ftype_pcdouble
13379 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
fbe5eb6d 13380 tree v2df_ftype_v2df_v2df
b4de2f7d
AH
13381 = build_function_type_list (V2DF_type_node,
13382 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13383 tree v16qi_ftype_v16qi_v16qi
b4de2f7d
AH
13384 = build_function_type_list (V16QI_type_node,
13385 V16QI_type_node, V16QI_type_node, NULL_TREE);
fbe5eb6d 13386 tree v8hi_ftype_v8hi_v8hi
b4de2f7d
AH
13387 = build_function_type_list (V8HI_type_node,
13388 V8HI_type_node, V8HI_type_node, NULL_TREE);
fbe5eb6d 13389 tree v4si_ftype_v4si_v4si
b4de2f7d
AH
13390 = build_function_type_list (V4SI_type_node,
13391 V4SI_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 13392 tree v2di_ftype_v2di_v2di
b4de2f7d
AH
13393 = build_function_type_list (V2DI_type_node,
13394 V2DI_type_node, V2DI_type_node, NULL_TREE);
fbe5eb6d 13395 tree v2di_ftype_v2df_v2df
b4de2f7d
AH
13396 = build_function_type_list (V2DI_type_node,
13397 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13398 tree v2df_ftype_v2df
b4de2f7d 13399 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13400 tree v2df_ftype_double
b4de2f7d 13401 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
fbe5eb6d 13402 tree v2df_ftype_double_double
b4de2f7d
AH
13403 = build_function_type_list (V2DF_type_node,
13404 double_type_node, double_type_node, NULL_TREE);
fbe5eb6d 13405 tree int_ftype_v8hi_int
b4de2f7d
AH
13406 = build_function_type_list (integer_type_node,
13407 V8HI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 13408 tree v8hi_ftype_v8hi_int_int
b4de2f7d
AH
13409 = build_function_type_list (V8HI_type_node,
13410 V8HI_type_node, integer_type_node,
13411 integer_type_node, NULL_TREE);
916b60b7 13412 tree v2di_ftype_v2di_int
b4de2f7d
AH
13413 = build_function_type_list (V2DI_type_node,
13414 V2DI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 13415 tree v4si_ftype_v4si_int
b4de2f7d
AH
13416 = build_function_type_list (V4SI_type_node,
13417 V4SI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 13418 tree v8hi_ftype_v8hi_int
b4de2f7d
AH
13419 = build_function_type_list (V8HI_type_node,
13420 V8HI_type_node, integer_type_node, NULL_TREE);
916b60b7 13421 tree v8hi_ftype_v8hi_v2di
b4de2f7d
AH
13422 = build_function_type_list (V8HI_type_node,
13423 V8HI_type_node, V2DI_type_node, NULL_TREE);
916b60b7 13424 tree v4si_ftype_v4si_v2di
b4de2f7d
AH
13425 = build_function_type_list (V4SI_type_node,
13426 V4SI_type_node, V2DI_type_node, NULL_TREE);
916b60b7 13427 tree v4si_ftype_v8hi_v8hi
b4de2f7d
AH
13428 = build_function_type_list (V4SI_type_node,
13429 V8HI_type_node, V8HI_type_node, NULL_TREE);
916b60b7 13430 tree di_ftype_v8qi_v8qi
b4de2f7d
AH
13431 = build_function_type_list (long_long_unsigned_type_node,
13432 V8QI_type_node, V8QI_type_node, NULL_TREE);
916b60b7 13433 tree v2di_ftype_v16qi_v16qi
b4de2f7d
AH
13434 = build_function_type_list (V2DI_type_node,
13435 V16QI_type_node, V16QI_type_node, NULL_TREE);
916b60b7 13436 tree int_ftype_v16qi
b4de2f7d 13437 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
068f5dea
JH
13438 tree v16qi_ftype_pcchar
13439 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
f02e1358
JH
13440 tree void_ftype_pchar_v16qi
13441 = build_function_type_list (void_type_node,
13442 pchar_type_node, V16QI_type_node, NULL_TREE);
068f5dea
JH
13443 tree v4si_ftype_pcint
13444 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13445 tree void_ftype_pcint_v4si
f02e1358 13446 = build_function_type_list (void_type_node,
068f5dea 13447 pcint_type_node, V4SI_type_node, NULL_TREE);
f02e1358
JH
13448 tree v2di_ftype_v2di
13449 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
47f339cf 13450
f8a1ebc6
JH
13451 tree float80_type;
13452 tree float128_type;
13453
13454 /* The __float80 type. */
13455 if (TYPE_MODE (long_double_type_node) == XFmode)
13456 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
13457 "__float80");
13458 else
13459 {
13460 /* The __float80 type. */
13461 float80_type = make_node (REAL_TYPE);
13462 TYPE_PRECISION (float80_type) = 96;
13463 layout_type (float80_type);
13464 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
13465 }
13466
13467 float128_type = make_node (REAL_TYPE);
13468 TYPE_PRECISION (float128_type) = 128;
13469 layout_type (float128_type);
13470 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
13471
bd793c65
BS
13472 /* Add all builtins that are more or less simple operations on two
13473 operands. */
ca7558fc 13474 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
13475 {
13476 /* Use one of the operands; the target can have a different mode for
13477 mask-generating compares. */
13478 enum machine_mode mode;
13479 tree type;
13480
13481 if (d->name == 0)
13482 continue;
13483 mode = insn_data[d->icode].operand[1].mode;
13484
bd793c65
BS
13485 switch (mode)
13486 {
fbe5eb6d
BS
13487 case V16QImode:
13488 type = v16qi_ftype_v16qi_v16qi;
13489 break;
13490 case V8HImode:
13491 type = v8hi_ftype_v8hi_v8hi;
13492 break;
13493 case V4SImode:
13494 type = v4si_ftype_v4si_v4si;
13495 break;
13496 case V2DImode:
13497 type = v2di_ftype_v2di_v2di;
13498 break;
13499 case V2DFmode:
13500 type = v2df_ftype_v2df_v2df;
13501 break;
13502 case TImode:
13503 type = ti_ftype_ti_ti;
13504 break;
bd793c65
BS
13505 case V4SFmode:
13506 type = v4sf_ftype_v4sf_v4sf;
13507 break;
13508 case V8QImode:
13509 type = v8qi_ftype_v8qi_v8qi;
13510 break;
13511 case V4HImode:
13512 type = v4hi_ftype_v4hi_v4hi;
13513 break;
13514 case V2SImode:
13515 type = v2si_ftype_v2si_v2si;
13516 break;
bd793c65
BS
13517 case DImode:
13518 type = di_ftype_di_di;
13519 break;
13520
13521 default:
13522 abort ();
13523 }
0f290768 13524
bd793c65
BS
13525 /* Override for comparisons. */
13526 if (d->icode == CODE_FOR_maskcmpv4sf3
13527 || d->icode == CODE_FOR_maskncmpv4sf3
13528 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13529 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13530 type = v4si_ftype_v4sf_v4sf;
13531
fbe5eb6d
BS
13532 if (d->icode == CODE_FOR_maskcmpv2df3
13533 || d->icode == CODE_FOR_maskncmpv2df3
13534 || d->icode == CODE_FOR_vmmaskcmpv2df3
13535 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13536 type = v2di_ftype_v2df_v2df;
13537
eeb06b1b 13538 def_builtin (d->mask, d->name, type, d->code);
bd793c65
BS
13539 }
13540
13541 /* Add the remaining MMX insns with somewhat more complicated types. */
eeb06b1b
BS
13542 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13543 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
eeb06b1b
BS
13544 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13545 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13546 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13547
13548 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13549 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13550 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13551
13552 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13553 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13554
13555 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13556 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
0f290768 13557
bd793c65 13558 /* comi/ucomi insns. */
ca7558fc 13559 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
fbe5eb6d
BS
13560 if (d->mask == MASK_SSE2)
13561 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13562 else
13563 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
bd793c65 13564
1255c85c
BS
13565 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13566 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13567 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
bd793c65 13568
37f22004
L
13569 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13570 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13571 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13572 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13573 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13574 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13575 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13576 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13577 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13578 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13579 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13580
13581 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13582 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13583
13584 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13585
13586 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13587 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13588 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13589 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13590 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13591 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13592
13593 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13594 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13595 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13596 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13597
13598 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13599 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13600 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13601 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13602
13603 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13604
13605 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13606
13607 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13608 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13609 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13610 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13611 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13612 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13613
13614 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
bd793c65 13615
47f339cf
BS
13616 /* Original 3DNow! */
13617 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13618 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13619 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13620 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13621 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13622 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13623 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13624 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13625 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13626 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13627 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13628 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13629 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13630 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13631 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13632 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13633 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13634 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13635 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13636 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
47f339cf
BS
13637
13638 /* 3DNow! extension as used in the Athlon CPU. */
13639 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13640 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13641 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13642 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13643 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13644 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13645
37f22004 13646 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
fbe5eb6d
BS
13647
13648 /* SSE2 */
13649 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13650 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13651
13652 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13653 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
f02e1358 13654 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
fbe5eb6d 13655
068f5dea
JH
13656 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13657 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13658 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
fbe5eb6d
BS
13659 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13660 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13661 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13662
13663 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13664 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13665 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13666 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13667
13668 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
916b60b7 13669 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
fbe5eb6d
BS
13670 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13671 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
916b60b7 13672 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
fbe5eb6d
BS
13673
13674 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13675 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13676 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
916b60b7 13677 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
fbe5eb6d
BS
13678
13679 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13680 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13681
13682 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13683
13684 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
916b60b7 13685 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
fbe5eb6d
BS
13686
13687 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13688 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13689 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13690 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13691 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13692
13693 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13694
13695 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13696 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
37f22004
L
13697 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13698 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
fbe5eb6d
BS
13699
13700 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13701 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13702 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13703
13704 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
37f22004 13705 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
fbe5eb6d
BS
13706 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13707 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13708
13709 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13710 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13711 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
068f5dea
JH
13712 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13713 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
fbe5eb6d
BS
13714 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13715 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13716
068f5dea 13717 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
fbe5eb6d
BS
13718 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13719 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
916b60b7 13720
068f5dea
JH
13721 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13722 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13723 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
f02e1358
JH
13724 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13725 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
068f5dea 13726 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
f02e1358
JH
13727 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13728
37f22004 13729 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
f02e1358 13730
916b60b7
BS
13731 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13732 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13733 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13734
13735 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13736 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13737 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13738
13739 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13740 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13741
ab3146fd 13742 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
916b60b7
BS
13743 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13744 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13745 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13746
ab3146fd 13747 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
916b60b7
BS
13748 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13749 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13750 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13751
13752 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13753 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13754
13755 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
22c7c85e
L
13756
13757 /* Prescott New Instructions. */
9e200aaf 13758 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
22c7c85e
L
13759 void_ftype_pcvoid_unsigned_unsigned,
13760 IX86_BUILTIN_MONITOR);
9e200aaf 13761 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
22c7c85e
L
13762 void_ftype_unsigned_unsigned,
13763 IX86_BUILTIN_MWAIT);
9e200aaf 13764 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
22c7c85e
L
13765 v4sf_ftype_v4sf,
13766 IX86_BUILTIN_MOVSHDUP);
9e200aaf 13767 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
22c7c85e
L
13768 v4sf_ftype_v4sf,
13769 IX86_BUILTIN_MOVSLDUP);
9e200aaf 13770 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
22c7c85e 13771 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
9e200aaf 13772 def_builtin (MASK_SSE3, "__builtin_ia32_loadddup",
22c7c85e 13773 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
9e200aaf 13774 def_builtin (MASK_SSE3, "__builtin_ia32_movddup",
22c7c85e 13775 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
bd793c65
BS
13776}
13777
13778/* Errors in the source file can cause expand_expr to return const0_rtx
13779 where we expect a vector. To avoid crashing, use one of the vector
13780 clear instructions. */
13781static rtx
b96a374d 13782safe_vector_operand (rtx x, enum machine_mode mode)
bd793c65
BS
13783{
13784 if (x != const0_rtx)
13785 return x;
13786 x = gen_reg_rtx (mode);
13787
47f339cf 13788 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
bd793c65
BS
13789 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13790 : gen_rtx_SUBREG (DImode, x, 0)));
13791 else
e37af218 13792 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
4977bab6
ZW
13793 : gen_rtx_SUBREG (V4SFmode, x, 0),
13794 CONST0_RTX (V4SFmode)));
bd793c65
BS
13795 return x;
13796}
13797
13798/* Subroutine of ix86_expand_builtin to take care of binop insns. */
13799
13800static rtx
b96a374d 13801ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
bd793c65
BS
13802{
13803 rtx pat;
13804 tree arg0 = TREE_VALUE (arglist);
13805 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13806 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13807 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13808 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13809 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13810 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13811
13812 if (VECTOR_MODE_P (mode0))
13813 op0 = safe_vector_operand (op0, mode0);
13814 if (VECTOR_MODE_P (mode1))
13815 op1 = safe_vector_operand (op1, mode1);
13816
13817 if (! target
13818 || GET_MODE (target) != tmode
13819 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13820 target = gen_reg_rtx (tmode);
13821
d9deed68
JH
13822 if (GET_MODE (op1) == SImode && mode1 == TImode)
13823 {
13824 rtx x = gen_reg_rtx (V4SImode);
13825 emit_insn (gen_sse2_loadd (x, op1));
13826 op1 = gen_lowpart (TImode, x);
13827 }
13828
bd793c65
BS
13829 /* In case the insn wants input operands in modes different from
13830 the result, abort. */
ebe75517
JH
13831 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13832 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
bd793c65
BS
13833 abort ();
13834
13835 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13836 op0 = copy_to_mode_reg (mode0, op0);
13837 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13838 op1 = copy_to_mode_reg (mode1, op1);
13839
59bef189
RH
13840 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13841 yet one of the two must not be a memory. This is normally enforced
13842 by expanders, but we didn't bother to create one here. */
13843 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13844 op0 = copy_to_mode_reg (mode0, op0);
13845
bd793c65
BS
13846 pat = GEN_FCN (icode) (target, op0, op1);
13847 if (! pat)
13848 return 0;
13849 emit_insn (pat);
13850 return target;
13851}
13852
13853/* Subroutine of ix86_expand_builtin to take care of stores. */
13854
13855static rtx
b96a374d 13856ix86_expand_store_builtin (enum insn_code icode, tree arglist)
bd793c65
BS
13857{
13858 rtx pat;
13859 tree arg0 = TREE_VALUE (arglist);
13860 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13861 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13862 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13863 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13864 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13865
13866 if (VECTOR_MODE_P (mode1))
13867 op1 = safe_vector_operand (op1, mode1);
13868
13869 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
7f0e57bd 13870 op1 = copy_to_mode_reg (mode1, op1);
59bef189 13871
bd793c65
BS
13872 pat = GEN_FCN (icode) (op0, op1);
13873 if (pat)
13874 emit_insn (pat);
13875 return 0;
13876}
13877
13878/* Subroutine of ix86_expand_builtin to take care of unop insns. */
13879
13880static rtx
b96a374d
AJ
13881ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13882 rtx target, int do_load)
bd793c65
BS
13883{
13884 rtx pat;
13885 tree arg0 = TREE_VALUE (arglist);
13886 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13887 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13888 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13889
13890 if (! target
13891 || GET_MODE (target) != tmode
13892 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13893 target = gen_reg_rtx (tmode);
13894 if (do_load)
13895 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13896 else
13897 {
13898 if (VECTOR_MODE_P (mode0))
13899 op0 = safe_vector_operand (op0, mode0);
13900
13901 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13902 op0 = copy_to_mode_reg (mode0, op0);
13903 }
13904
13905 pat = GEN_FCN (icode) (target, op0);
13906 if (! pat)
13907 return 0;
13908 emit_insn (pat);
13909 return target;
13910}
13911
13912/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13913 sqrtss, rsqrtss, rcpss. */
13914
13915static rtx
b96a374d 13916ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
bd793c65
BS
13917{
13918 rtx pat;
13919 tree arg0 = TREE_VALUE (arglist);
59bef189 13920 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
bd793c65
BS
13921 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13922 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13923
13924 if (! target
13925 || GET_MODE (target) != tmode
13926 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13927 target = gen_reg_rtx (tmode);
13928
13929 if (VECTOR_MODE_P (mode0))
13930 op0 = safe_vector_operand (op0, mode0);
13931
13932 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13933 op0 = copy_to_mode_reg (mode0, op0);
fce5a9f2 13934
59bef189
RH
13935 op1 = op0;
13936 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13937 op1 = copy_to_mode_reg (mode0, op1);
fce5a9f2 13938
59bef189 13939 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
13940 if (! pat)
13941 return 0;
13942 emit_insn (pat);
13943 return target;
13944}
13945
13946/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13947
13948static rtx
b96a374d
AJ
13949ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13950 rtx target)
bd793c65
BS
13951{
13952 rtx pat;
13953 tree arg0 = TREE_VALUE (arglist);
13954 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13955 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13956 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13957 rtx op2;
13958 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13959 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13960 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13961 enum rtx_code comparison = d->comparison;
13962
13963 if (VECTOR_MODE_P (mode0))
13964 op0 = safe_vector_operand (op0, mode0);
13965 if (VECTOR_MODE_P (mode1))
13966 op1 = safe_vector_operand (op1, mode1);
13967
13968 /* Swap operands if we have a comparison that isn't available in
13969 hardware. */
13970 if (d->flag)
13971 {
21e1b5f1
BS
13972 rtx tmp = gen_reg_rtx (mode1);
13973 emit_move_insn (tmp, op1);
bd793c65 13974 op1 = op0;
21e1b5f1 13975 op0 = tmp;
bd793c65 13976 }
21e1b5f1
BS
13977
13978 if (! target
13979 || GET_MODE (target) != tmode
13980 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
bd793c65
BS
13981 target = gen_reg_rtx (tmode);
13982
13983 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13984 op0 = copy_to_mode_reg (mode0, op0);
13985 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13986 op1 = copy_to_mode_reg (mode1, op1);
13987
13988 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13989 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13990 if (! pat)
13991 return 0;
13992 emit_insn (pat);
13993 return target;
13994}
13995
13996/* Subroutine of ix86_expand_builtin to take care of comi insns. */
13997
13998static rtx
b96a374d
AJ
13999ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
14000 rtx target)
bd793c65
BS
14001{
14002 rtx pat;
14003 tree arg0 = TREE_VALUE (arglist);
14004 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14005 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14006 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14007 rtx op2;
14008 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
14009 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
14010 enum rtx_code comparison = d->comparison;
14011
14012 if (VECTOR_MODE_P (mode0))
14013 op0 = safe_vector_operand (op0, mode0);
14014 if (VECTOR_MODE_P (mode1))
14015 op1 = safe_vector_operand (op1, mode1);
14016
14017 /* Swap operands if we have a comparison that isn't available in
14018 hardware. */
14019 if (d->flag)
14020 {
14021 rtx tmp = op1;
14022 op1 = op0;
14023 op0 = tmp;
bd793c65
BS
14024 }
14025
14026 target = gen_reg_rtx (SImode);
14027 emit_move_insn (target, const0_rtx);
14028 target = gen_rtx_SUBREG (QImode, target, 0);
14029
14030 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
14031 op0 = copy_to_mode_reg (mode0, op0);
14032 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
14033 op1 = copy_to_mode_reg (mode1, op1);
14034
14035 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
1194ca05 14036 pat = GEN_FCN (d->icode) (op0, op1);
bd793c65
BS
14037 if (! pat)
14038 return 0;
14039 emit_insn (pat);
29628f27
BS
14040 emit_insn (gen_rtx_SET (VOIDmode,
14041 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
14042 gen_rtx_fmt_ee (comparison, QImode,
1194ca05 14043 SET_DEST (pat),
29628f27 14044 const0_rtx)));
bd793c65 14045
6f1a6c5b 14046 return SUBREG_REG (target);
bd793c65
BS
14047}
14048
14049/* Expand an expression EXP that calls a built-in function,
14050 with result going to TARGET if that's convenient
14051 (and in mode MODE if that's convenient).
14052 SUBTARGET may be used as the target for computing one of EXP's operands.
14053 IGNORE is nonzero if the value is to be ignored. */
14054
14055rtx
b96a374d
AJ
14056ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
14057 enum machine_mode mode ATTRIBUTE_UNUSED,
14058 int ignore ATTRIBUTE_UNUSED)
bd793c65 14059{
8b60264b 14060 const struct builtin_description *d;
77ebd435 14061 size_t i;
bd793c65
BS
14062 enum insn_code icode;
14063 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
14064 tree arglist = TREE_OPERAND (exp, 1);
e37af218 14065 tree arg0, arg1, arg2;
bd793c65
BS
14066 rtx op0, op1, op2, pat;
14067 enum machine_mode tmode, mode0, mode1, mode2;
8752c357 14068 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
bd793c65
BS
14069
14070 switch (fcode)
14071 {
14072 case IX86_BUILTIN_EMMS:
14073 emit_insn (gen_emms ());
14074 return 0;
14075
14076 case IX86_BUILTIN_SFENCE:
14077 emit_insn (gen_sfence ());
14078 return 0;
14079
bd793c65 14080 case IX86_BUILTIN_PEXTRW:
fbe5eb6d
BS
14081 case IX86_BUILTIN_PEXTRW128:
14082 icode = (fcode == IX86_BUILTIN_PEXTRW
14083 ? CODE_FOR_mmx_pextrw
14084 : CODE_FOR_sse2_pextrw);
bd793c65
BS
14085 arg0 = TREE_VALUE (arglist);
14086 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14087 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14088 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14089 tmode = insn_data[icode].operand[0].mode;
14090 mode0 = insn_data[icode].operand[1].mode;
14091 mode1 = insn_data[icode].operand[2].mode;
14092
14093 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14094 op0 = copy_to_mode_reg (mode0, op0);
14095 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14096 {
ebe75517
JH
14097 error ("selector must be an integer constant in the range 0..%i",
14098 fcode == IX86_BUILTIN_PEXTRW ? 3:7);
6f1a6c5b 14099 return gen_reg_rtx (tmode);
bd793c65
BS
14100 }
14101 if (target == 0
14102 || GET_MODE (target) != tmode
14103 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14104 target = gen_reg_rtx (tmode);
14105 pat = GEN_FCN (icode) (target, op0, op1);
14106 if (! pat)
14107 return 0;
14108 emit_insn (pat);
14109 return target;
14110
14111 case IX86_BUILTIN_PINSRW:
fbe5eb6d
BS
14112 case IX86_BUILTIN_PINSRW128:
14113 icode = (fcode == IX86_BUILTIN_PINSRW
14114 ? CODE_FOR_mmx_pinsrw
14115 : CODE_FOR_sse2_pinsrw);
bd793c65
BS
14116 arg0 = TREE_VALUE (arglist);
14117 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14118 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14119 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14120 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14121 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14122 tmode = insn_data[icode].operand[0].mode;
14123 mode0 = insn_data[icode].operand[1].mode;
14124 mode1 = insn_data[icode].operand[2].mode;
14125 mode2 = insn_data[icode].operand[3].mode;
14126
14127 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14128 op0 = copy_to_mode_reg (mode0, op0);
14129 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14130 op1 = copy_to_mode_reg (mode1, op1);
14131 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14132 {
ebe75517
JH
14133 error ("selector must be an integer constant in the range 0..%i",
14134 fcode == IX86_BUILTIN_PINSRW ? 15:255);
bd793c65
BS
14135 return const0_rtx;
14136 }
14137 if (target == 0
14138 || GET_MODE (target) != tmode
14139 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14140 target = gen_reg_rtx (tmode);
14141 pat = GEN_FCN (icode) (target, op0, op1, op2);
14142 if (! pat)
14143 return 0;
14144 emit_insn (pat);
14145 return target;
14146
14147 case IX86_BUILTIN_MASKMOVQ:
077084dd 14148 case IX86_BUILTIN_MASKMOVDQU:
fbe5eb6d
BS
14149 icode = (fcode == IX86_BUILTIN_MASKMOVQ
14150 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
f8ca7923
JH
14151 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
14152 : CODE_FOR_sse2_maskmovdqu));
bd793c65
BS
14153 /* Note the arg order is different from the operand order. */
14154 arg1 = TREE_VALUE (arglist);
14155 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14156 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14157 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14158 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14159 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14160 mode0 = insn_data[icode].operand[0].mode;
14161 mode1 = insn_data[icode].operand[1].mode;
14162 mode2 = insn_data[icode].operand[2].mode;
14163
5c464583 14164 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
bd793c65
BS
14165 op0 = copy_to_mode_reg (mode0, op0);
14166 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14167 op1 = copy_to_mode_reg (mode1, op1);
14168 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14169 op2 = copy_to_mode_reg (mode2, op2);
14170 pat = GEN_FCN (icode) (op0, op1, op2);
14171 if (! pat)
14172 return 0;
14173 emit_insn (pat);
14174 return 0;
14175
14176 case IX86_BUILTIN_SQRTSS:
14177 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
14178 case IX86_BUILTIN_RSQRTSS:
14179 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
14180 case IX86_BUILTIN_RCPSS:
14181 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
14182
14183 case IX86_BUILTIN_LOADAPS:
14184 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
14185
14186 case IX86_BUILTIN_LOADUPS:
14187 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14188
14189 case IX86_BUILTIN_STOREAPS:
e37af218 14190 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
f02e1358 14191
bd793c65 14192 case IX86_BUILTIN_STOREUPS:
e37af218 14193 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
bd793c65
BS
14194
14195 case IX86_BUILTIN_LOADSS:
14196 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
14197
14198 case IX86_BUILTIN_STORESS:
e37af218 14199 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
bd793c65 14200
0f290768 14201 case IX86_BUILTIN_LOADHPS:
bd793c65 14202 case IX86_BUILTIN_LOADLPS:
fbe5eb6d
BS
14203 case IX86_BUILTIN_LOADHPD:
14204 case IX86_BUILTIN_LOADLPD:
14205 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
14206 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
14207 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
997404de 14208 : CODE_FOR_sse2_movsd);
bd793c65
BS
14209 arg0 = TREE_VALUE (arglist);
14210 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14211 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14212 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14213 tmode = insn_data[icode].operand[0].mode;
14214 mode0 = insn_data[icode].operand[1].mode;
14215 mode1 = insn_data[icode].operand[2].mode;
14216
14217 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14218 op0 = copy_to_mode_reg (mode0, op0);
14219 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14220 if (target == 0
14221 || GET_MODE (target) != tmode
14222 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14223 target = gen_reg_rtx (tmode);
14224 pat = GEN_FCN (icode) (target, op0, op1);
14225 if (! pat)
14226 return 0;
14227 emit_insn (pat);
14228 return target;
0f290768 14229
bd793c65
BS
14230 case IX86_BUILTIN_STOREHPS:
14231 case IX86_BUILTIN_STORELPS:
fbe5eb6d
BS
14232 case IX86_BUILTIN_STOREHPD:
14233 case IX86_BUILTIN_STORELPD:
14234 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
14235 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
14236 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
997404de 14237 : CODE_FOR_sse2_movsd);
bd793c65
BS
14238 arg0 = TREE_VALUE (arglist);
14239 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14240 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14241 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14242 mode0 = insn_data[icode].operand[1].mode;
14243 mode1 = insn_data[icode].operand[2].mode;
14244
14245 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14246 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14247 op1 = copy_to_mode_reg (mode1, op1);
14248
14249 pat = GEN_FCN (icode) (op0, op0, op1);
14250 if (! pat)
14251 return 0;
14252 emit_insn (pat);
14253 return 0;
14254
14255 case IX86_BUILTIN_MOVNTPS:
e37af218 14256 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
bd793c65 14257 case IX86_BUILTIN_MOVNTQ:
e37af218 14258 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
bd793c65
BS
14259
14260 case IX86_BUILTIN_LDMXCSR:
14261 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14262 target = assign_386_stack_local (SImode, 0);
14263 emit_move_insn (target, op0);
14264 emit_insn (gen_ldmxcsr (target));
14265 return 0;
14266
14267 case IX86_BUILTIN_STMXCSR:
14268 target = assign_386_stack_local (SImode, 0);
14269 emit_insn (gen_stmxcsr (target));
14270 return copy_to_mode_reg (SImode, target);
14271
bd793c65 14272 case IX86_BUILTIN_SHUFPS:
fbe5eb6d
BS
14273 case IX86_BUILTIN_SHUFPD:
14274 icode = (fcode == IX86_BUILTIN_SHUFPS
14275 ? CODE_FOR_sse_shufps
14276 : CODE_FOR_sse2_shufpd);
bd793c65
BS
14277 arg0 = TREE_VALUE (arglist);
14278 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14279 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14280 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14281 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14282 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14283 tmode = insn_data[icode].operand[0].mode;
14284 mode0 = insn_data[icode].operand[1].mode;
14285 mode1 = insn_data[icode].operand[2].mode;
14286 mode2 = insn_data[icode].operand[3].mode;
14287
14288 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14289 op0 = copy_to_mode_reg (mode0, op0);
14290 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14291 op1 = copy_to_mode_reg (mode1, op1);
14292 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14293 {
14294 /* @@@ better error message */
14295 error ("mask must be an immediate");
6f1a6c5b 14296 return gen_reg_rtx (tmode);
bd793c65
BS
14297 }
14298 if (target == 0
14299 || GET_MODE (target) != tmode
14300 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14301 target = gen_reg_rtx (tmode);
14302 pat = GEN_FCN (icode) (target, op0, op1, op2);
14303 if (! pat)
14304 return 0;
14305 emit_insn (pat);
14306 return target;
14307
14308 case IX86_BUILTIN_PSHUFW:
fbe5eb6d
BS
14309 case IX86_BUILTIN_PSHUFD:
14310 case IX86_BUILTIN_PSHUFHW:
14311 case IX86_BUILTIN_PSHUFLW:
14312 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14313 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14314 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14315 : CODE_FOR_mmx_pshufw);
bd793c65
BS
14316 arg0 = TREE_VALUE (arglist);
14317 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14318 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14319 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14320 tmode = insn_data[icode].operand[0].mode;
29628f27
BS
14321 mode1 = insn_data[icode].operand[1].mode;
14322 mode2 = insn_data[icode].operand[2].mode;
bd793c65 14323
29628f27
BS
14324 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14325 op0 = copy_to_mode_reg (mode1, op0);
14326 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
bd793c65
BS
14327 {
14328 /* @@@ better error message */
14329 error ("mask must be an immediate");
14330 return const0_rtx;
14331 }
14332 if (target == 0
14333 || GET_MODE (target) != tmode
14334 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14335 target = gen_reg_rtx (tmode);
29628f27 14336 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
14337 if (! pat)
14338 return 0;
14339 emit_insn (pat);
14340 return target;
14341
ab3146fd
ZD
14342 case IX86_BUILTIN_PSLLDQI128:
14343 case IX86_BUILTIN_PSRLDQI128:
14344 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14345 : CODE_FOR_sse2_lshrti3);
14346 arg0 = TREE_VALUE (arglist);
14347 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14348 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14349 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14350 tmode = insn_data[icode].operand[0].mode;
14351 mode1 = insn_data[icode].operand[1].mode;
14352 mode2 = insn_data[icode].operand[2].mode;
14353
14354 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14355 {
14356 op0 = copy_to_reg (op0);
14357 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14358 }
14359 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14360 {
14361 error ("shift must be an immediate");
14362 return const0_rtx;
14363 }
14364 target = gen_reg_rtx (V2DImode);
14365 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14366 if (! pat)
14367 return 0;
14368 emit_insn (pat);
14369 return target;
14370
47f339cf
BS
14371 case IX86_BUILTIN_FEMMS:
14372 emit_insn (gen_femms ());
14373 return NULL_RTX;
14374
14375 case IX86_BUILTIN_PAVGUSB:
14376 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14377
14378 case IX86_BUILTIN_PF2ID:
14379 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14380
14381 case IX86_BUILTIN_PFACC:
14382 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14383
14384 case IX86_BUILTIN_PFADD:
14385 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14386
14387 case IX86_BUILTIN_PFCMPEQ:
14388 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14389
14390 case IX86_BUILTIN_PFCMPGE:
14391 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14392
14393 case IX86_BUILTIN_PFCMPGT:
14394 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14395
14396 case IX86_BUILTIN_PFMAX:
14397 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14398
14399 case IX86_BUILTIN_PFMIN:
14400 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14401
14402 case IX86_BUILTIN_PFMUL:
14403 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14404
14405 case IX86_BUILTIN_PFRCP:
14406 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14407
14408 case IX86_BUILTIN_PFRCPIT1:
14409 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14410
14411 case IX86_BUILTIN_PFRCPIT2:
14412 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14413
14414 case IX86_BUILTIN_PFRSQIT1:
14415 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14416
14417 case IX86_BUILTIN_PFRSQRT:
14418 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14419
14420 case IX86_BUILTIN_PFSUB:
14421 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14422
14423 case IX86_BUILTIN_PFSUBR:
14424 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14425
14426 case IX86_BUILTIN_PI2FD:
14427 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14428
14429 case IX86_BUILTIN_PMULHRW:
14430 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14431
47f339cf
BS
14432 case IX86_BUILTIN_PF2IW:
14433 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14434
14435 case IX86_BUILTIN_PFNACC:
14436 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14437
14438 case IX86_BUILTIN_PFPNACC:
14439 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14440
14441 case IX86_BUILTIN_PI2FW:
14442 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14443
14444 case IX86_BUILTIN_PSWAPDSI:
14445 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14446
14447 case IX86_BUILTIN_PSWAPDSF:
14448 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14449
e37af218
RH
14450 case IX86_BUILTIN_SSE_ZERO:
14451 target = gen_reg_rtx (V4SFmode);
4977bab6 14452 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
bd793c65
BS
14453 return target;
14454
bd793c65
BS
14455 case IX86_BUILTIN_MMX_ZERO:
14456 target = gen_reg_rtx (DImode);
14457 emit_insn (gen_mmx_clrdi (target));
14458 return target;
14459
f02e1358
JH
14460 case IX86_BUILTIN_CLRTI:
14461 target = gen_reg_rtx (V2DImode);
14462 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14463 return target;
14464
14465
fbe5eb6d
BS
14466 case IX86_BUILTIN_SQRTSD:
14467 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14468 case IX86_BUILTIN_LOADAPD:
14469 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14470 case IX86_BUILTIN_LOADUPD:
14471 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14472
14473 case IX86_BUILTIN_STOREAPD:
14474 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14475 case IX86_BUILTIN_STOREUPD:
14476 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14477
14478 case IX86_BUILTIN_LOADSD:
14479 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14480
14481 case IX86_BUILTIN_STORESD:
14482 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14483
14484 case IX86_BUILTIN_SETPD1:
14485 target = assign_386_stack_local (DFmode, 0);
14486 arg0 = TREE_VALUE (arglist);
14487 emit_move_insn (adjust_address (target, DFmode, 0),
14488 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14489 op0 = gen_reg_rtx (V2DFmode);
14490 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
60c81c89 14491 emit_insn (gen_sse2_shufpd (op0, op0, op0, const0_rtx));
fbe5eb6d
BS
14492 return op0;
14493
14494 case IX86_BUILTIN_SETPD:
14495 target = assign_386_stack_local (V2DFmode, 0);
14496 arg0 = TREE_VALUE (arglist);
14497 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14498 emit_move_insn (adjust_address (target, DFmode, 0),
14499 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14500 emit_move_insn (adjust_address (target, DFmode, 8),
14501 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14502 op0 = gen_reg_rtx (V2DFmode);
14503 emit_insn (gen_sse2_movapd (op0, target));
14504 return op0;
14505
14506 case IX86_BUILTIN_LOADRPD:
14507 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14508 gen_reg_rtx (V2DFmode), 1);
60c81c89 14509 emit_insn (gen_sse2_shufpd (target, target, target, const1_rtx));
fbe5eb6d
BS
14510 return target;
14511
14512 case IX86_BUILTIN_LOADPD1:
14513 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14514 gen_reg_rtx (V2DFmode), 1);
14515 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14516 return target;
14517
14518 case IX86_BUILTIN_STOREPD1:
14519 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14520 case IX86_BUILTIN_STORERPD:
14521 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14522
48126a97
JH
14523 case IX86_BUILTIN_CLRPD:
14524 target = gen_reg_rtx (V2DFmode);
14525 emit_insn (gen_sse_clrv2df (target));
14526 return target;
14527
fbe5eb6d
BS
14528 case IX86_BUILTIN_MFENCE:
14529 emit_insn (gen_sse2_mfence ());
14530 return 0;
14531 case IX86_BUILTIN_LFENCE:
14532 emit_insn (gen_sse2_lfence ());
14533 return 0;
14534
14535 case IX86_BUILTIN_CLFLUSH:
14536 arg0 = TREE_VALUE (arglist);
14537 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14538 icode = CODE_FOR_sse2_clflush;
1194ca05
JH
14539 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14540 op0 = copy_to_mode_reg (Pmode, op0);
fbe5eb6d
BS
14541
14542 emit_insn (gen_sse2_clflush (op0));
14543 return 0;
14544
14545 case IX86_BUILTIN_MOVNTPD:
14546 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14547 case IX86_BUILTIN_MOVNTDQ:
916b60b7 14548 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
fbe5eb6d
BS
14549 case IX86_BUILTIN_MOVNTI:
14550 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14551
f02e1358
JH
14552 case IX86_BUILTIN_LOADDQA:
14553 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14554 case IX86_BUILTIN_LOADDQU:
14555 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14556 case IX86_BUILTIN_LOADD:
14557 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14558
14559 case IX86_BUILTIN_STOREDQA:
14560 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14561 case IX86_BUILTIN_STOREDQU:
14562 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14563 case IX86_BUILTIN_STORED:
14564 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14565
22c7c85e
L
14566 case IX86_BUILTIN_MONITOR:
14567 arg0 = TREE_VALUE (arglist);
14568 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14569 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14570 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14571 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14572 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14573 if (!REG_P (op0))
14574 op0 = copy_to_mode_reg (SImode, op0);
14575 if (!REG_P (op1))
14576 op1 = copy_to_mode_reg (SImode, op1);
14577 if (!REG_P (op2))
14578 op2 = copy_to_mode_reg (SImode, op2);
14579 emit_insn (gen_monitor (op0, op1, op2));
14580 return 0;
14581
14582 case IX86_BUILTIN_MWAIT:
14583 arg0 = TREE_VALUE (arglist);
14584 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14585 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14586 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14587 if (!REG_P (op0))
14588 op0 = copy_to_mode_reg (SImode, op0);
14589 if (!REG_P (op1))
14590 op1 = copy_to_mode_reg (SImode, op1);
14591 emit_insn (gen_mwait (op0, op1));
14592 return 0;
14593
14594 case IX86_BUILTIN_LOADDDUP:
14595 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
14596
14597 case IX86_BUILTIN_LDDQU:
14598 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
14599 1);
14600
bd793c65
BS
14601 default:
14602 break;
14603 }
14604
ca7558fc 14605 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
14606 if (d->code == fcode)
14607 {
14608 /* Compares are treated specially. */
14609 if (d->icode == CODE_FOR_maskcmpv4sf3
14610 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14611 || d->icode == CODE_FOR_maskncmpv4sf3
fbe5eb6d
BS
14612 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14613 || d->icode == CODE_FOR_maskcmpv2df3
14614 || d->icode == CODE_FOR_vmmaskcmpv2df3
14615 || d->icode == CODE_FOR_maskncmpv2df3
14616 || d->icode == CODE_FOR_vmmaskncmpv2df3)
bd793c65
BS
14617 return ix86_expand_sse_compare (d, arglist, target);
14618
14619 return ix86_expand_binop_builtin (d->icode, arglist, target);
14620 }
14621
ca7558fc 14622 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
bd793c65
BS
14623 if (d->code == fcode)
14624 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
0f290768 14625
ca7558fc 14626 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
bd793c65
BS
14627 if (d->code == fcode)
14628 return ix86_expand_sse_comi (d, arglist, target);
0f290768 14629
bd793c65
BS
14630 /* @@@ Should really do something sensible here. */
14631 return 0;
bd793c65 14632}
4211a8fb
JH
14633
14634/* Store OPERAND to the memory after reload is completed. This means
f710504c 14635 that we can't easily use assign_stack_local. */
4211a8fb 14636rtx
b96a374d 14637ix86_force_to_memory (enum machine_mode mode, rtx operand)
4211a8fb 14638{
898d374d 14639 rtx result;
4211a8fb
JH
14640 if (!reload_completed)
14641 abort ();
a5b378d6 14642 if (TARGET_RED_ZONE)
898d374d
JH
14643 {
14644 result = gen_rtx_MEM (mode,
14645 gen_rtx_PLUS (Pmode,
14646 stack_pointer_rtx,
14647 GEN_INT (-RED_ZONE_SIZE)));
14648 emit_move_insn (result, operand);
14649 }
a5b378d6 14650 else if (!TARGET_RED_ZONE && TARGET_64BIT)
4211a8fb 14651 {
898d374d 14652 switch (mode)
4211a8fb 14653 {
898d374d
JH
14654 case HImode:
14655 case SImode:
14656 operand = gen_lowpart (DImode, operand);
5efb1046 14657 /* FALLTHRU */
898d374d 14658 case DImode:
4211a8fb 14659 emit_insn (
898d374d
JH
14660 gen_rtx_SET (VOIDmode,
14661 gen_rtx_MEM (DImode,
14662 gen_rtx_PRE_DEC (DImode,
14663 stack_pointer_rtx)),
14664 operand));
14665 break;
14666 default:
14667 abort ();
14668 }
14669 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14670 }
14671 else
14672 {
14673 switch (mode)
14674 {
14675 case DImode:
14676 {
14677 rtx operands[2];
14678 split_di (&operand, 1, operands, operands + 1);
14679 emit_insn (
14680 gen_rtx_SET (VOIDmode,
14681 gen_rtx_MEM (SImode,
14682 gen_rtx_PRE_DEC (Pmode,
14683 stack_pointer_rtx)),
14684 operands[1]));
14685 emit_insn (
14686 gen_rtx_SET (VOIDmode,
14687 gen_rtx_MEM (SImode,
14688 gen_rtx_PRE_DEC (Pmode,
14689 stack_pointer_rtx)),
14690 operands[0]));
14691 }
14692 break;
14693 case HImode:
14694 /* It is better to store HImodes as SImodes. */
14695 if (!TARGET_PARTIAL_REG_STALL)
14696 operand = gen_lowpart (SImode, operand);
5efb1046 14697 /* FALLTHRU */
898d374d 14698 case SImode:
4211a8fb 14699 emit_insn (
898d374d
JH
14700 gen_rtx_SET (VOIDmode,
14701 gen_rtx_MEM (GET_MODE (operand),
14702 gen_rtx_PRE_DEC (SImode,
14703 stack_pointer_rtx)),
14704 operand));
14705 break;
14706 default:
14707 abort ();
4211a8fb 14708 }
898d374d 14709 result = gen_rtx_MEM (mode, stack_pointer_rtx);
4211a8fb 14710 }
898d374d 14711 return result;
4211a8fb
JH
14712}
14713
14714/* Free operand from the memory. */
14715void
b96a374d 14716ix86_free_from_memory (enum machine_mode mode)
4211a8fb 14717{
a5b378d6 14718 if (!TARGET_RED_ZONE)
898d374d
JH
14719 {
14720 int size;
14721
14722 if (mode == DImode || TARGET_64BIT)
14723 size = 8;
14724 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14725 size = 2;
14726 else
14727 size = 4;
14728 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14729 to pop or add instruction if registers are available. */
14730 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14731 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14732 GEN_INT (size))));
14733 }
4211a8fb 14734}
a946dd00 14735
f84aa48a
JH
14736/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14737 QImode must go into class Q_REGS.
14738 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
892a2d68 14739 movdf to do mem-to-mem moves through integer regs. */
f84aa48a 14740enum reg_class
b96a374d 14741ix86_preferred_reload_class (rtx x, enum reg_class class)
f84aa48a 14742{
1877be45
JH
14743 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14744 return NO_REGS;
f84aa48a
JH
14745 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14746 {
14747 /* SSE can't load any constant directly yet. */
14748 if (SSE_CLASS_P (class))
14749 return NO_REGS;
14750 /* Floats can load 0 and 1. */
14751 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14752 {
14753 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14754 if (MAYBE_SSE_CLASS_P (class))
14755 return (reg_class_subset_p (class, GENERAL_REGS)
14756 ? GENERAL_REGS : FLOAT_REGS);
14757 else
14758 return class;
14759 }
14760 /* General regs can load everything. */
14761 if (reg_class_subset_p (class, GENERAL_REGS))
14762 return GENERAL_REGS;
14763 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14764 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14765 return NO_REGS;
14766 }
14767 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14768 return NO_REGS;
14769 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14770 return Q_REGS;
14771 return class;
14772}
14773
14774/* If we are copying between general and FP registers, we need a memory
14775 location. The same is true for SSE and MMX registers.
14776
14777 The macro can't work reliably when one of the CLASSES is class containing
14778 registers from multiple units (SSE, MMX, integer). We avoid this by never
14779 combining those units in single alternative in the machine description.
14780 Ensure that this constraint holds to avoid unexpected surprises.
14781
14782 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14783 enforce these sanity checks. */
14784int
b96a374d
AJ
14785ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14786 enum machine_mode mode, int strict)
f84aa48a
JH
14787{
14788 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14789 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14790 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14791 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14792 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14793 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14794 {
14795 if (strict)
14796 abort ();
14797 else
14798 return 1;
14799 }
14800 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
8f62128d
JH
14801 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14802 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14803 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14804 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
f84aa48a
JH
14805}
14806/* Return the cost of moving data from a register in class CLASS1 to
a4f31c00 14807 one in class CLASS2.
f84aa48a
JH
14808
14809 It is not required that the cost always equal 2 when FROM is the same as TO;
14810 on some machines it is expensive to move between registers if they are not
14811 general registers. */
14812int
b96a374d
AJ
14813ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14814 enum reg_class class2)
f84aa48a
JH
14815{
14816 /* In case we require secondary memory, compute cost of the store followed
b96a374d 14817 by load. In order to avoid bad register allocation choices, we need
d631b80a
RH
14818 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14819
f84aa48a
JH
14820 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14821 {
d631b80a
RH
14822 int cost = 1;
14823
14824 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14825 MEMORY_MOVE_COST (mode, class1, 1));
14826 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14827 MEMORY_MOVE_COST (mode, class2, 1));
b96a374d 14828
d631b80a
RH
14829 /* In case of copying from general_purpose_register we may emit multiple
14830 stores followed by single load causing memory size mismatch stall.
d1f87653 14831 Count this as arbitrarily high cost of 20. */
62415523 14832 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
d631b80a
RH
14833 cost += 20;
14834
14835 /* In the case of FP/MMX moves, the registers actually overlap, and we
14836 have to switch modes in order to treat them differently. */
14837 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14838 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14839 cost += 20;
14840
14841 return cost;
f84aa48a 14842 }
d631b80a 14843
92d0fb09 14844 /* Moves between SSE/MMX and integer unit are expensive. */
62415523
JH
14845 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14846 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
fa79946e
JH
14847 return ix86_cost->mmxsse_to_integer;
14848 if (MAYBE_FLOAT_CLASS_P (class1))
14849 return ix86_cost->fp_move;
14850 if (MAYBE_SSE_CLASS_P (class1))
14851 return ix86_cost->sse_move;
14852 if (MAYBE_MMX_CLASS_P (class1))
14853 return ix86_cost->mmx_move;
f84aa48a
JH
14854 return 2;
14855}
14856
a946dd00
JH
14857/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14858int
b96a374d 14859ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
a946dd00
JH
14860{
14861 /* Flags and only flags can only hold CCmode values. */
14862 if (CC_REGNO_P (regno))
14863 return GET_MODE_CLASS (mode) == MODE_CC;
14864 if (GET_MODE_CLASS (mode) == MODE_CC
14865 || GET_MODE_CLASS (mode) == MODE_RANDOM
14866 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14867 return 0;
14868 if (FP_REGNO_P (regno))
14869 return VALID_FP_MODE_P (mode);
14870 if (SSE_REGNO_P (regno))
a67a3220 14871 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
a946dd00 14872 if (MMX_REGNO_P (regno))
a67a3220
JH
14873 return (TARGET_MMX
14874 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
a946dd00
JH
14875 /* We handle both integer and floats in the general purpose registers.
14876 In future we should be able to handle vector modes as well. */
14877 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14878 return 0;
14879 /* Take care for QImode values - they can be in non-QI regs, but then
14880 they do cause partial register stalls. */
d2836273 14881 if (regno < 4 || mode != QImode || TARGET_64BIT)
a946dd00
JH
14882 return 1;
14883 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14884}
fa79946e
JH
14885
14886/* Return the cost of moving data of mode M between a
14887 register and memory. A value of 2 is the default; this cost is
14888 relative to those in `REGISTER_MOVE_COST'.
14889
14890 If moving between registers and memory is more expensive than
14891 between two registers, you should define this macro to express the
a4f31c00
AJ
14892 relative cost.
14893
fa79946e
JH
14894 Model also increased moving costs of QImode registers in non
14895 Q_REGS classes.
14896 */
14897int
b96a374d 14898ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
fa79946e
JH
14899{
14900 if (FLOAT_CLASS_P (class))
14901 {
14902 int index;
14903 switch (mode)
14904 {
14905 case SFmode:
14906 index = 0;
14907 break;
14908 case DFmode:
14909 index = 1;
14910 break;
14911 case XFmode:
fa79946e
JH
14912 index = 2;
14913 break;
14914 default:
14915 return 100;
14916 }
14917 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14918 }
14919 if (SSE_CLASS_P (class))
14920 {
14921 int index;
14922 switch (GET_MODE_SIZE (mode))
14923 {
14924 case 4:
14925 index = 0;
14926 break;
14927 case 8:
14928 index = 1;
14929 break;
14930 case 16:
14931 index = 2;
14932 break;
14933 default:
14934 return 100;
14935 }
14936 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14937 }
14938 if (MMX_CLASS_P (class))
14939 {
14940 int index;
14941 switch (GET_MODE_SIZE (mode))
14942 {
14943 case 4:
14944 index = 0;
14945 break;
14946 case 8:
14947 index = 1;
14948 break;
14949 default:
14950 return 100;
14951 }
14952 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14953 }
14954 switch (GET_MODE_SIZE (mode))
14955 {
14956 case 1:
14957 if (in)
14958 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14959 : ix86_cost->movzbl_load);
14960 else
14961 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14962 : ix86_cost->int_store[0] + 4);
14963 break;
14964 case 2:
14965 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14966 default:
14967 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14968 if (mode == TFmode)
14969 mode = XFmode;
3bb7e126 14970 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
bce75972
VM
14971 * (((int) GET_MODE_SIZE (mode)
14972 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
fa79946e
JH
14973 }
14974}
0ecf09f9 14975
3c50106f
RH
14976/* Compute a (partial) cost for rtx X. Return true if the complete
14977 cost has been computed, and false if subexpressions should be
14978 scanned. In either case, *TOTAL contains the cost result. */
14979
14980static bool
b96a374d 14981ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
3c50106f
RH
14982{
14983 enum machine_mode mode = GET_MODE (x);
14984
14985 switch (code)
14986 {
14987 case CONST_INT:
14988 case CONST:
14989 case LABEL_REF:
14990 case SYMBOL_REF:
14991 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
14992 *total = 3;
14993 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
14994 *total = 2;
3504dad3
JH
14995 else if (flag_pic && SYMBOLIC_CONST (x)
14996 && (!TARGET_64BIT
14997 || (!GET_CODE (x) != LABEL_REF
14998 && (GET_CODE (x) != SYMBOL_REF
12969f45 14999 || !SYMBOL_REF_LOCAL_P (x)))))
3c50106f
RH
15000 *total = 1;
15001 else
15002 *total = 0;
15003 return true;
15004
15005 case CONST_DOUBLE:
15006 if (mode == VOIDmode)
15007 *total = 0;
15008 else
15009 switch (standard_80387_constant_p (x))
15010 {
15011 case 1: /* 0.0 */
15012 *total = 1;
15013 break;
881b2a96 15014 default: /* Other constants */
3c50106f
RH
15015 *total = 2;
15016 break;
881b2a96
RS
15017 case 0:
15018 case -1:
3c50106f
RH
15019 /* Start with (MEM (SYMBOL_REF)), since that's where
15020 it'll probably end up. Add a penalty for size. */
15021 *total = (COSTS_N_INSNS (1)
3504dad3 15022 + (flag_pic != 0 && !TARGET_64BIT)
3c50106f
RH
15023 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
15024 break;
15025 }
15026 return true;
15027
15028 case ZERO_EXTEND:
15029 /* The zero extensions is often completely free on x86_64, so make
15030 it as cheap as possible. */
15031 if (TARGET_64BIT && mode == DImode
15032 && GET_MODE (XEXP (x, 0)) == SImode)
15033 *total = 1;
15034 else if (TARGET_ZERO_EXTEND_WITH_AND)
15035 *total = COSTS_N_INSNS (ix86_cost->add);
15036 else
15037 *total = COSTS_N_INSNS (ix86_cost->movzx);
15038 return false;
15039
15040 case SIGN_EXTEND:
15041 *total = COSTS_N_INSNS (ix86_cost->movsx);
15042 return false;
15043
15044 case ASHIFT:
15045 if (GET_CODE (XEXP (x, 1)) == CONST_INT
15046 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
15047 {
15048 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15049 if (value == 1)
15050 {
15051 *total = COSTS_N_INSNS (ix86_cost->add);
15052 return false;
15053 }
15054 if ((value == 2 || value == 3)
15055 && !TARGET_DECOMPOSE_LEA
15056 && ix86_cost->lea <= ix86_cost->shift_const)
15057 {
15058 *total = COSTS_N_INSNS (ix86_cost->lea);
15059 return false;
15060 }
15061 }
5efb1046 15062 /* FALLTHRU */
3c50106f
RH
15063
15064 case ROTATE:
15065 case ASHIFTRT:
15066 case LSHIFTRT:
15067 case ROTATERT:
15068 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
15069 {
15070 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15071 {
15072 if (INTVAL (XEXP (x, 1)) > 32)
15073 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
15074 else
15075 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
15076 }
15077 else
15078 {
15079 if (GET_CODE (XEXP (x, 1)) == AND)
15080 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
15081 else
15082 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
15083 }
15084 }
15085 else
15086 {
15087 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15088 *total = COSTS_N_INSNS (ix86_cost->shift_const);
15089 else
15090 *total = COSTS_N_INSNS (ix86_cost->shift_var);
15091 }
15092 return false;
15093
15094 case MULT:
15095 if (FLOAT_MODE_P (mode))
15096 *total = COSTS_N_INSNS (ix86_cost->fmul);
15097 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15098 {
15099 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15100 int nbits;
15101
15102 for (nbits = 0; value != 0; value >>= 1)
15103 nbits++;
15104
15105 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15106 + nbits * ix86_cost->mult_bit);
15107 }
15108 else
15109 {
15110 /* This is arbitrary */
15111 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15112 + 7 * ix86_cost->mult_bit);
15113 }
15114 return false;
15115
15116 case DIV:
15117 case UDIV:
15118 case MOD:
15119 case UMOD:
15120 if (FLOAT_MODE_P (mode))
15121 *total = COSTS_N_INSNS (ix86_cost->fdiv);
15122 else
15123 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15124 return false;
15125
15126 case PLUS:
15127 if (FLOAT_MODE_P (mode))
15128 *total = COSTS_N_INSNS (ix86_cost->fadd);
15129 else if (!TARGET_DECOMPOSE_LEA
15130 && GET_MODE_CLASS (mode) == MODE_INT
15131 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15132 {
15133 if (GET_CODE (XEXP (x, 0)) == PLUS
15134 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15135 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15136 && CONSTANT_P (XEXP (x, 1)))
15137 {
15138 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15139 if (val == 2 || val == 4 || val == 8)
15140 {
15141 *total = COSTS_N_INSNS (ix86_cost->lea);
15142 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15143 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15144 outer_code);
15145 *total += rtx_cost (XEXP (x, 1), outer_code);
15146 return true;
15147 }
15148 }
15149 else if (GET_CODE (XEXP (x, 0)) == MULT
15150 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15151 {
15152 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15153 if (val == 2 || val == 4 || val == 8)
15154 {
15155 *total = COSTS_N_INSNS (ix86_cost->lea);
15156 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15157 *total += rtx_cost (XEXP (x, 1), outer_code);
15158 return true;
15159 }
15160 }
15161 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15162 {
15163 *total = COSTS_N_INSNS (ix86_cost->lea);
15164 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15165 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15166 *total += rtx_cost (XEXP (x, 1), outer_code);
15167 return true;
15168 }
15169 }
5efb1046 15170 /* FALLTHRU */
3c50106f
RH
15171
15172 case MINUS:
15173 if (FLOAT_MODE_P (mode))
15174 {
15175 *total = COSTS_N_INSNS (ix86_cost->fadd);
15176 return false;
15177 }
5efb1046 15178 /* FALLTHRU */
3c50106f
RH
15179
15180 case AND:
15181 case IOR:
15182 case XOR:
15183 if (!TARGET_64BIT && mode == DImode)
15184 {
15185 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15186 + (rtx_cost (XEXP (x, 0), outer_code)
15187 << (GET_MODE (XEXP (x, 0)) != DImode))
15188 + (rtx_cost (XEXP (x, 1), outer_code)
b96a374d 15189 << (GET_MODE (XEXP (x, 1)) != DImode)));
3c50106f
RH
15190 return true;
15191 }
5efb1046 15192 /* FALLTHRU */
3c50106f
RH
15193
15194 case NEG:
15195 if (FLOAT_MODE_P (mode))
15196 {
15197 *total = COSTS_N_INSNS (ix86_cost->fchs);
15198 return false;
15199 }
5efb1046 15200 /* FALLTHRU */
3c50106f
RH
15201
15202 case NOT:
15203 if (!TARGET_64BIT && mode == DImode)
15204 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15205 else
15206 *total = COSTS_N_INSNS (ix86_cost->add);
15207 return false;
15208
15209 case FLOAT_EXTEND:
15210 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
15211 *total = 0;
15212 return false;
15213
15214 case ABS:
15215 if (FLOAT_MODE_P (mode))
15216 *total = COSTS_N_INSNS (ix86_cost->fabs);
15217 return false;
15218
15219 case SQRT:
15220 if (FLOAT_MODE_P (mode))
15221 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15222 return false;
15223
74dc3e94
RH
15224 case UNSPEC:
15225 if (XINT (x, 1) == UNSPEC_TP)
15226 *total = 0;
15227 return false;
15228
3c50106f
RH
15229 default:
15230 return false;
15231 }
15232}
15233
21c318ba 15234#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
2cc07db4 15235static void
b96a374d 15236ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
2cc07db4
RH
15237{
15238 init_section ();
15239 fputs ("\tpushl $", asm_out_file);
15240 assemble_name (asm_out_file, XSTR (symbol, 0));
15241 fputc ('\n', asm_out_file);
15242}
15243#endif
162f023b 15244
b069de3b
SS
15245#if TARGET_MACHO
15246
15247static int current_machopic_label_num;
15248
15249/* Given a symbol name and its associated stub, write out the
15250 definition of the stub. */
15251
15252void
b96a374d 15253machopic_output_stub (FILE *file, const char *symb, const char *stub)
b069de3b
SS
15254{
15255 unsigned int length;
15256 char *binder_name, *symbol_name, lazy_ptr_name[32];
15257 int label = ++current_machopic_label_num;
15258
15259 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15260 symb = (*targetm.strip_name_encoding) (symb);
15261
15262 length = strlen (stub);
15263 binder_name = alloca (length + 32);
15264 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15265
15266 length = strlen (symb);
15267 symbol_name = alloca (length + 32);
15268 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15269
15270 sprintf (lazy_ptr_name, "L%d$lz", label);
15271
15272 if (MACHOPIC_PURE)
15273 machopic_picsymbol_stub_section ();
15274 else
15275 machopic_symbol_stub_section ();
15276
15277 fprintf (file, "%s:\n", stub);
15278 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15279
15280 if (MACHOPIC_PURE)
15281 {
15282 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15283 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15284 fprintf (file, "\tjmp %%edx\n");
15285 }
15286 else
15287 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
b96a374d 15288
b069de3b 15289 fprintf (file, "%s:\n", binder_name);
b96a374d 15290
b069de3b
SS
15291 if (MACHOPIC_PURE)
15292 {
15293 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15294 fprintf (file, "\tpushl %%eax\n");
15295 }
15296 else
15297 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15298
15299 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15300
15301 machopic_lazy_symbol_ptr_section ();
15302 fprintf (file, "%s:\n", lazy_ptr_name);
15303 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15304 fprintf (file, "\t.long %s\n", binder_name);
15305}
15306#endif /* TARGET_MACHO */
15307
162f023b
JH
15308/* Order the registers for register allocator. */
15309
15310void
b96a374d 15311x86_order_regs_for_local_alloc (void)
162f023b
JH
15312{
15313 int pos = 0;
15314 int i;
15315
15316 /* First allocate the local general purpose registers. */
15317 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15318 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15319 reg_alloc_order [pos++] = i;
15320
15321 /* Global general purpose registers. */
15322 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15323 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15324 reg_alloc_order [pos++] = i;
15325
15326 /* x87 registers come first in case we are doing FP math
15327 using them. */
15328 if (!TARGET_SSE_MATH)
15329 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15330 reg_alloc_order [pos++] = i;
fce5a9f2 15331
162f023b
JH
15332 /* SSE registers. */
15333 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15334 reg_alloc_order [pos++] = i;
15335 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15336 reg_alloc_order [pos++] = i;
15337
d1f87653 15338 /* x87 registers. */
162f023b
JH
15339 if (TARGET_SSE_MATH)
15340 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15341 reg_alloc_order [pos++] = i;
15342
15343 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15344 reg_alloc_order [pos++] = i;
15345
15346 /* Initialize the rest of array as we do not allocate some registers
15347 at all. */
15348 while (pos < FIRST_PSEUDO_REGISTER)
15349 reg_alloc_order [pos++] = 0;
15350}
194734e9 15351
4977bab6
ZW
15352#ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15353#define TARGET_USE_MS_BITFIELD_LAYOUT 0
15354#endif
15355
fe77449a
DR
15356/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15357 struct attribute_spec.handler. */
15358static tree
b96a374d
AJ
15359ix86_handle_struct_attribute (tree *node, tree name,
15360 tree args ATTRIBUTE_UNUSED,
15361 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
fe77449a
DR
15362{
15363 tree *type = NULL;
15364 if (DECL_P (*node))
15365 {
15366 if (TREE_CODE (*node) == TYPE_DECL)
15367 type = &TREE_TYPE (*node);
15368 }
15369 else
15370 type = node;
15371
15372 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15373 || TREE_CODE (*type) == UNION_TYPE)))
15374 {
15375 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15376 *no_add_attrs = true;
15377 }
15378
15379 else if ((is_attribute_p ("ms_struct", name)
15380 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15381 || ((is_attribute_p ("gcc_struct", name)
15382 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15383 {
15384 warning ("`%s' incompatible attribute ignored",
15385 IDENTIFIER_POINTER (name));
15386 *no_add_attrs = true;
15387 }
15388
15389 return NULL_TREE;
15390}
15391
4977bab6 15392static bool
b96a374d 15393ix86_ms_bitfield_layout_p (tree record_type)
4977bab6 15394{
fe77449a 15395 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
021bad8e 15396 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
fe77449a 15397 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
4977bab6
ZW
15398}
15399
483ab821
MM
15400/* Returns an expression indicating where the this parameter is
15401 located on entry to the FUNCTION. */
15402
15403static rtx
b96a374d 15404x86_this_parameter (tree function)
483ab821
MM
15405{
15406 tree type = TREE_TYPE (function);
15407
3961e8fe
RH
15408 if (TARGET_64BIT)
15409 {
61f71b34 15410 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
3961e8fe
RH
15411 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15412 }
15413
e767b5be 15414 if (ix86_function_regparm (type, function) > 0)
483ab821
MM
15415 {
15416 tree parm;
15417
15418 parm = TYPE_ARG_TYPES (type);
15419 /* Figure out whether or not the function has a variable number of
15420 arguments. */
3961e8fe 15421 for (; parm; parm = TREE_CHAIN (parm))
483ab821
MM
15422 if (TREE_VALUE (parm) == void_type_node)
15423 break;
e767b5be 15424 /* If not, the this parameter is in the first argument. */
483ab821 15425 if (parm)
e767b5be
JH
15426 {
15427 int regno = 0;
15428 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
15429 regno = 2;
02e02343 15430 return gen_rtx_REG (SImode, regno);
e767b5be 15431 }
483ab821
MM
15432 }
15433
61f71b34 15434 if (aggregate_value_p (TREE_TYPE (type), type))
483ab821
MM
15435 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15436 else
15437 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15438}
15439
3961e8fe
RH
15440/* Determine whether x86_output_mi_thunk can succeed. */
15441
15442static bool
b96a374d
AJ
15443x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15444 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15445 HOST_WIDE_INT vcall_offset, tree function)
3961e8fe
RH
15446{
15447 /* 64-bit can handle anything. */
15448 if (TARGET_64BIT)
15449 return true;
15450
15451 /* For 32-bit, everything's fine if we have one free register. */
e767b5be 15452 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
3961e8fe
RH
15453 return true;
15454
15455 /* Need a free register for vcall_offset. */
15456 if (vcall_offset)
15457 return false;
15458
15459 /* Need a free register for GOT references. */
15460 if (flag_pic && !(*targetm.binds_local_p) (function))
15461 return false;
15462
15463 /* Otherwise ok. */
15464 return true;
15465}
15466
15467/* Output the assembler code for a thunk function. THUNK_DECL is the
15468 declaration for the thunk function itself, FUNCTION is the decl for
15469 the target function. DELTA is an immediate constant offset to be
272d0bee 15470 added to THIS. If VCALL_OFFSET is nonzero, the word at
3961e8fe 15471 *(*this + vcall_offset) should be added to THIS. */
483ab821 15472
c590b625 15473static void
b96a374d
AJ
15474x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15475 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15476 HOST_WIDE_INT vcall_offset, tree function)
194734e9 15477{
194734e9 15478 rtx xops[3];
3961e8fe
RH
15479 rtx this = x86_this_parameter (function);
15480 rtx this_reg, tmp;
194734e9 15481
3961e8fe
RH
15482 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15483 pull it in now and let DELTA benefit. */
15484 if (REG_P (this))
15485 this_reg = this;
15486 else if (vcall_offset)
15487 {
15488 /* Put the this parameter into %eax. */
15489 xops[0] = this;
15490 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15491 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15492 }
15493 else
15494 this_reg = NULL_RTX;
15495
15496 /* Adjust the this parameter by a fixed constant. */
15497 if (delta)
194734e9 15498 {
483ab821 15499 xops[0] = GEN_INT (delta);
3961e8fe
RH
15500 xops[1] = this_reg ? this_reg : this;
15501 if (TARGET_64BIT)
194734e9 15502 {
3961e8fe
RH
15503 if (!x86_64_general_operand (xops[0], DImode))
15504 {
15505 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15506 xops[1] = tmp;
15507 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15508 xops[0] = tmp;
15509 xops[1] = this;
15510 }
15511 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
194734e9
JH
15512 }
15513 else
3961e8fe 15514 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
194734e9 15515 }
3961e8fe
RH
15516
15517 /* Adjust the this parameter by a value stored in the vtable. */
15518 if (vcall_offset)
194734e9 15519 {
3961e8fe
RH
15520 if (TARGET_64BIT)
15521 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15522 else
e767b5be
JH
15523 {
15524 int tmp_regno = 2 /* ECX */;
15525 if (lookup_attribute ("fastcall",
15526 TYPE_ATTRIBUTES (TREE_TYPE (function))))
15527 tmp_regno = 0 /* EAX */;
15528 tmp = gen_rtx_REG (SImode, tmp_regno);
15529 }
483ab821 15530
3961e8fe
RH
15531 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15532 xops[1] = tmp;
15533 if (TARGET_64BIT)
15534 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15535 else
15536 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
483ab821 15537
3961e8fe
RH
15538 /* Adjust the this parameter. */
15539 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15540 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15541 {
15542 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15543 xops[0] = GEN_INT (vcall_offset);
15544 xops[1] = tmp2;
15545 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15546 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
483ab821 15547 }
3961e8fe
RH
15548 xops[1] = this_reg;
15549 if (TARGET_64BIT)
15550 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15551 else
15552 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15553 }
194734e9 15554
3961e8fe
RH
15555 /* If necessary, drop THIS back to its stack slot. */
15556 if (this_reg && this_reg != this)
15557 {
15558 xops[0] = this_reg;
15559 xops[1] = this;
15560 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15561 }
194734e9 15562
89ce1c8f 15563 xops[0] = XEXP (DECL_RTL (function), 0);
3961e8fe
RH
15564 if (TARGET_64BIT)
15565 {
15566 if (!flag_pic || (*targetm.binds_local_p) (function))
15567 output_asm_insn ("jmp\t%P0", xops);
15568 else
fcbe3b89 15569 {
89ce1c8f 15570 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
fcbe3b89
RH
15571 tmp = gen_rtx_CONST (Pmode, tmp);
15572 tmp = gen_rtx_MEM (QImode, tmp);
15573 xops[0] = tmp;
15574 output_asm_insn ("jmp\t%A0", xops);
15575 }
3961e8fe
RH
15576 }
15577 else
15578 {
15579 if (!flag_pic || (*targetm.binds_local_p) (function))
15580 output_asm_insn ("jmp\t%P0", xops);
194734e9 15581 else
21ff35fb 15582#if TARGET_MACHO
095fa594
SH
15583 if (TARGET_MACHO)
15584 {
0f901c4c 15585 const char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
095fa594
SH
15586 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15587 tmp = gen_rtx_MEM (QImode, tmp);
15588 xops[0] = tmp;
15589 output_asm_insn ("jmp\t%0", xops);
15590 }
15591 else
15592#endif /* TARGET_MACHO */
194734e9 15593 {
3961e8fe
RH
15594 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15595 output_set_got (tmp);
15596
15597 xops[1] = tmp;
15598 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15599 output_asm_insn ("jmp\t{*}%1", xops);
194734e9
JH
15600 }
15601 }
15602}
e2500fed 15603
1bc7c5b6 15604static void
b96a374d 15605x86_file_start (void)
1bc7c5b6
ZW
15606{
15607 default_file_start ();
15608 if (X86_FILE_START_VERSION_DIRECTIVE)
15609 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15610 if (X86_FILE_START_FLTUSED)
15611 fputs ("\t.global\t__fltused\n", asm_out_file);
15612 if (ix86_asm_dialect == ASM_INTEL)
15613 fputs ("\t.intel_syntax\n", asm_out_file);
15614}
15615
e932b21b 15616int
b96a374d 15617x86_field_alignment (tree field, int computed)
e932b21b
JH
15618{
15619 enum machine_mode mode;
ad9335eb
JJ
15620 tree type = TREE_TYPE (field);
15621
15622 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
e932b21b 15623 return computed;
ad9335eb
JJ
15624 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15625 ? get_inner_array_type (type) : type);
39e3a681
JJ
15626 if (mode == DFmode || mode == DCmode
15627 || GET_MODE_CLASS (mode) == MODE_INT
15628 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
e932b21b
JH
15629 return MIN (32, computed);
15630 return computed;
15631}
15632
a5fa1ecd
JH
15633/* Output assembler code to FILE to increment profiler label # LABELNO
15634 for profiling a function entry. */
15635void
b96a374d 15636x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
a5fa1ecd
JH
15637{
15638 if (TARGET_64BIT)
15639 if (flag_pic)
15640 {
15641#ifndef NO_PROFILE_COUNTERS
15642 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15643#endif
15644 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15645 }
15646 else
15647 {
15648#ifndef NO_PROFILE_COUNTERS
15649 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15650#endif
15651 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15652 }
15653 else if (flag_pic)
15654 {
15655#ifndef NO_PROFILE_COUNTERS
15656 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15657 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15658#endif
15659 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15660 }
15661 else
15662 {
15663#ifndef NO_PROFILE_COUNTERS
ff6e2d3e 15664 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
a5fa1ecd
JH
15665 PROFILE_COUNT_REGISTER);
15666#endif
15667 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15668 }
15669}
15670
d2c49530
JH
15671/* We don't have exact information about the insn sizes, but we may assume
15672 quite safely that we are informed about all 1 byte insns and memory
c51e6d85 15673 address sizes. This is enough to eliminate unnecessary padding in
d2c49530
JH
15674 99% of cases. */
15675
15676static int
b96a374d 15677min_insn_size (rtx insn)
d2c49530
JH
15678{
15679 int l = 0;
15680
15681 if (!INSN_P (insn) || !active_insn_p (insn))
15682 return 0;
15683
15684 /* Discard alignments we've emit and jump instructions. */
15685 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15686 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15687 return 0;
15688 if (GET_CODE (insn) == JUMP_INSN
15689 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15690 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15691 return 0;
15692
15693 /* Important case - calls are always 5 bytes.
15694 It is common to have many calls in the row. */
15695 if (GET_CODE (insn) == CALL_INSN
15696 && symbolic_reference_mentioned_p (PATTERN (insn))
15697 && !SIBLING_CALL_P (insn))
15698 return 5;
15699 if (get_attr_length (insn) <= 1)
15700 return 1;
15701
15702 /* For normal instructions we may rely on the sizes of addresses
15703 and the presence of symbol to require 4 bytes of encoding.
15704 This is not the case for jumps where references are PC relative. */
15705 if (GET_CODE (insn) != JUMP_INSN)
15706 {
15707 l = get_attr_length_address (insn);
15708 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15709 l = 4;
15710 }
15711 if (l)
15712 return 1+l;
15713 else
15714 return 2;
15715}
15716
c51e6d85 15717/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
d2c49530
JH
15718 window. */
15719
15720static void
be04394b 15721ix86_avoid_jump_misspredicts (void)
d2c49530
JH
15722{
15723 rtx insn, start = get_insns ();
15724 int nbytes = 0, njumps = 0;
15725 int isjump = 0;
15726
15727 /* Look for all minimal intervals of instructions containing 4 jumps.
15728 The intervals are bounded by START and INSN. NBYTES is the total
15729 size of instructions in the interval including INSN and not including
15730 START. When the NBYTES is smaller than 16 bytes, it is possible
15731 that the end of START and INSN ends up in the same 16byte page.
15732
15733 The smallest offset in the page INSN can start is the case where START
15734 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15735 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15736 */
15737 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15738 {
15739
15740 nbytes += min_insn_size (insn);
c263766c
RH
15741 if (dump_file)
15742 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
d2c49530
JH
15743 INSN_UID (insn), min_insn_size (insn));
15744 if ((GET_CODE (insn) == JUMP_INSN
15745 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15746 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15747 || GET_CODE (insn) == CALL_INSN)
15748 njumps++;
15749 else
15750 continue;
15751
15752 while (njumps > 3)
15753 {
15754 start = NEXT_INSN (start);
15755 if ((GET_CODE (start) == JUMP_INSN
15756 && GET_CODE (PATTERN (start)) != ADDR_VEC
15757 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15758 || GET_CODE (start) == CALL_INSN)
15759 njumps--, isjump = 1;
15760 else
15761 isjump = 0;
15762 nbytes -= min_insn_size (start);
15763 }
15764 if (njumps < 0)
15765 abort ();
c263766c
RH
15766 if (dump_file)
15767 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
d2c49530
JH
15768 INSN_UID (start), INSN_UID (insn), nbytes);
15769
15770 if (njumps == 3 && isjump && nbytes < 16)
15771 {
15772 int padsize = 15 - nbytes + min_insn_size (insn);
15773
c263766c
RH
15774 if (dump_file)
15775 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
15776 INSN_UID (insn), padsize);
d2c49530
JH
15777 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15778 }
15779 }
15780}
15781
be04394b 15782/* AMD Athlon works faster
d1f87653 15783 when RET is not destination of conditional jump or directly preceded
2a500b9e
JH
15784 by other jump instruction. We avoid the penalty by inserting NOP just
15785 before the RET instructions in such cases. */
18dbd950 15786static void
be04394b 15787ix86_pad_returns (void)
2a500b9e
JH
15788{
15789 edge e;
15790
2a500b9e
JH
15791 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15792 {
15793 basic_block bb = e->src;
a813c111 15794 rtx ret = BB_END (bb);
2a500b9e 15795 rtx prev;
253c7a00 15796 bool replace = false;
2a500b9e 15797
253c7a00
JH
15798 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15799 || !maybe_hot_bb_p (bb))
2a500b9e 15800 continue;
4977bab6
ZW
15801 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15802 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15803 break;
2a500b9e
JH
15804 if (prev && GET_CODE (prev) == CODE_LABEL)
15805 {
15806 edge e;
15807 for (e = bb->pred; e; e = e->pred_next)
4977bab6 15808 if (EDGE_FREQUENCY (e) && e->src->index >= 0
2a500b9e 15809 && !(e->flags & EDGE_FALLTHRU))
253c7a00 15810 replace = true;
2a500b9e 15811 }
253c7a00 15812 if (!replace)
2a500b9e 15813 {
4977bab6 15814 prev = prev_active_insn (ret);
25f57a0e
JH
15815 if (prev
15816 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15817 || GET_CODE (prev) == CALL_INSN))
253c7a00 15818 replace = true;
c51e6d85 15819 /* Empty functions get branch mispredict even when the jump destination
4977bab6
ZW
15820 is not visible to us. */
15821 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
253c7a00
JH
15822 replace = true;
15823 }
15824 if (replace)
15825 {
15826 emit_insn_before (gen_return_internal_long (), ret);
15827 delete_insn (ret);
2a500b9e 15828 }
2a500b9e 15829 }
be04394b
JH
15830}
15831
15832/* Implement machine specific optimizations. We implement padding of returns
15833 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
15834static void
15835ix86_reorg (void)
15836{
15837 if (TARGET_ATHLON_K8 && optimize && !optimize_size)
15838 ix86_pad_returns ();
15839 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
15840 ix86_avoid_jump_misspredicts ();
2a500b9e
JH
15841}
15842
4977bab6
ZW
15843/* Return nonzero when QImode register that must be represented via REX prefix
15844 is used. */
15845bool
b96a374d 15846x86_extended_QIreg_mentioned_p (rtx insn)
4977bab6
ZW
15847{
15848 int i;
15849 extract_insn_cached (insn);
15850 for (i = 0; i < recog_data.n_operands; i++)
15851 if (REG_P (recog_data.operand[i])
15852 && REGNO (recog_data.operand[i]) >= 4)
15853 return true;
15854 return false;
15855}
15856
15857/* Return nonzero when P points to register encoded via REX prefix.
15858 Called via for_each_rtx. */
15859static int
b96a374d 15860extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
4977bab6
ZW
15861{
15862 unsigned int regno;
15863 if (!REG_P (*p))
15864 return 0;
15865 regno = REGNO (*p);
15866 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15867}
15868
15869/* Return true when INSN mentions register that must be encoded using REX
15870 prefix. */
15871bool
b96a374d 15872x86_extended_reg_mentioned_p (rtx insn)
4977bab6
ZW
15873{
15874 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15875}
15876
1d6ba901 15877/* Generate an unsigned DImode/SImode to FP conversion. This is the same code
8d705469
JH
15878 optabs would emit if we didn't have TFmode patterns. */
15879
15880void
b96a374d 15881x86_emit_floatuns (rtx operands[2])
8d705469
JH
15882{
15883 rtx neglab, donelab, i0, i1, f0, in, out;
1d6ba901
ZD
15884 enum machine_mode mode, inmode;
15885
15886 inmode = GET_MODE (operands[1]);
15887 if (inmode != SImode
15888 && inmode != DImode)
15889 abort ();
8d705469
JH
15890
15891 out = operands[0];
1d6ba901 15892 in = force_reg (inmode, operands[1]);
8d705469
JH
15893 mode = GET_MODE (out);
15894 neglab = gen_label_rtx ();
15895 donelab = gen_label_rtx ();
15896 i1 = gen_reg_rtx (Pmode);
15897 f0 = gen_reg_rtx (mode);
15898
15899 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15900
15901 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15902 emit_jump_insn (gen_jump (donelab));
15903 emit_barrier ();
15904
15905 emit_label (neglab);
15906
15907 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15908 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15909 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15910 expand_float (f0, i0, 0);
15911 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15912
15913 emit_label (donelab);
15914}
15915
dafc5b82
JH
15916/* Return if we do not know how to pass TYPE solely in registers. */
15917bool
b96a374d 15918ix86_must_pass_in_stack (enum machine_mode mode, tree type)
dafc5b82
JH
15919{
15920 if (default_must_pass_in_stack (mode, type))
15921 return true;
15922 return (!TARGET_64BIT && type && mode == TImode);
15923}
15924
997404de
JH
15925/* Initialize vector TARGET via VALS. */
15926void
15927ix86_expand_vector_init (rtx target, rtx vals)
15928{
15929 enum machine_mode mode = GET_MODE (target);
15930 int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
15931 int n_elts = (GET_MODE_SIZE (mode) / elt_size);
15932 int i;
15933
15934 for (i = n_elts - 1; i >= 0; i--)
15935 if (GET_CODE (XVECEXP (vals, 0, i)) != CONST_INT
15936 && GET_CODE (XVECEXP (vals, 0, i)) != CONST_DOUBLE)
15937 break;
15938
15939 /* Few special cases first...
15940 ... constants are best loaded from constant pool. */
15941 if (i < 0)
15942 {
15943 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15944 return;
15945 }
15946
15947 /* ... values where only first field is non-constant are best loaded
15948 from the pool and overwriten via move later. */
15949 if (!i)
15950 {
15951 rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
15952 GET_MODE_INNER (mode), 0);
15953
15954 op = force_reg (mode, op);
15955 XVECEXP (vals, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode));
15956 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15957 switch (GET_MODE (target))
15958 {
15959 case V2DFmode:
15960 emit_insn (gen_sse2_movsd (target, target, op));
15961 break;
15962 case V4SFmode:
15963 emit_insn (gen_sse_movss (target, target, op));
15964 break;
15965 default:
15966 break;
15967 }
15968 return;
15969 }
15970
15971 /* And the busy sequence doing rotations. */
15972 switch (GET_MODE (target))
15973 {
15974 case V2DFmode:
15975 {
15976 rtx vecop0 =
15977 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 0), DFmode, 0);
15978 rtx vecop1 =
15979 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 1), DFmode, 0);
15980
15981 vecop0 = force_reg (V2DFmode, vecop0);
15982 vecop1 = force_reg (V2DFmode, vecop1);
15983 emit_insn (gen_sse2_unpcklpd (target, vecop0, vecop1));
15984 }
15985 break;
15986 case V4SFmode:
15987 {
15988 rtx vecop0 =
15989 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 0), SFmode, 0);
15990 rtx vecop1 =
15991 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 1), SFmode, 0);
15992 rtx vecop2 =
15993 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 2), SFmode, 0);
15994 rtx vecop3 =
15995 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 3), SFmode, 0);
15996 rtx tmp1 = gen_reg_rtx (V4SFmode);
15997 rtx tmp2 = gen_reg_rtx (V4SFmode);
15998
15999 vecop0 = force_reg (V4SFmode, vecop0);
16000 vecop1 = force_reg (V4SFmode, vecop1);
16001 vecop2 = force_reg (V4SFmode, vecop2);
16002 vecop3 = force_reg (V4SFmode, vecop3);
16003 emit_insn (gen_sse_unpcklps (tmp1, vecop1, vecop3));
16004 emit_insn (gen_sse_unpcklps (tmp2, vecop0, vecop2));
16005 emit_insn (gen_sse_unpcklps (target, tmp2, tmp1));
16006 }
16007 break;
16008 default:
16009 abort ();
16010 }
16011}
16012
e2500fed 16013#include "gt-i386.h"
This page took 4.030827 seconds and 5 git commands to generate.