]> gcc.gnu.org Git - gcc.git/blame - gcc/config/i386/i386.c
re PR target/9164 ([Alpha] improper code generation)
[gcc.git] / gcc / config / i386 / i386.c
CommitLineData
e075ae69 1/* Subroutines used for code generation on IA-32.
07933f72 2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
36210500 3 2002, 2003 Free Software Foundation, Inc.
2a2ab3f9
JVA
4
5This file is part of GNU CC.
6
7GNU CC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GNU CC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU CC; see the file COPYING. If not, write to
97aadbb9 19the Free Software Foundation, 59 Temple Place - Suite 330,
0f290768 20Boston, MA 02111-1307, USA. */
2a2ab3f9 21
2a2ab3f9 22#include "config.h"
bb5177ac 23#include "system.h"
4977bab6
ZW
24#include "coretypes.h"
25#include "tm.h"
2a2ab3f9 26#include "rtl.h"
6baf1cc8
BS
27#include "tree.h"
28#include "tm_p.h"
2a2ab3f9
JVA
29#include "regs.h"
30#include "hard-reg-set.h"
31#include "real.h"
32#include "insn-config.h"
33#include "conditions.h"
2a2ab3f9
JVA
34#include "output.h"
35#include "insn-attr.h"
2a2ab3f9 36#include "flags.h"
a8ffcc81 37#include "except.h"
ecbc4695 38#include "function.h"
00c79232 39#include "recog.h"
ced8dd8c 40#include "expr.h"
e78d8e51 41#include "optabs.h"
f103890b 42#include "toplev.h"
e075ae69 43#include "basic-block.h"
1526a060 44#include "ggc.h"
672a6f42
NB
45#include "target.h"
46#include "target-def.h"
f1e639b1 47#include "langhooks.h"
dafc5b82 48#include "cgraph.h"
2a2ab3f9 49
8dfe5673 50#ifndef CHECK_STACK_LIMIT
07933f72 51#define CHECK_STACK_LIMIT (-1)
8dfe5673
RK
52#endif
53
3c50106f
RH
54/* Return index of given mode in mult and division cost tables. */
55#define MODE_INDEX(mode) \
56 ((mode) == QImode ? 0 \
57 : (mode) == HImode ? 1 \
58 : (mode) == SImode ? 2 \
59 : (mode) == DImode ? 3 \
60 : 4)
61
2ab0437e 62/* Processor costs (relative to an add) */
fce5a9f2 63static const
2ab0437e
JH
64struct processor_costs size_cost = { /* costs for tunning for size */
65 2, /* cost of an add instruction */
66 3, /* cost of a lea instruction */
67 2, /* variable shift costs */
68 3, /* constant shift costs */
4977bab6 69 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
2ab0437e 70 0, /* cost of multiply per each bit set */
4977bab6 71 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
44cf5b6a
JH
72 3, /* cost of movsx */
73 3, /* cost of movzx */
2ab0437e
JH
74 0, /* "large" insn */
75 2, /* MOVE_RATIO */
76 2, /* cost for loading QImode using movzbl */
77 {2, 2, 2}, /* cost of loading integer registers
78 in QImode, HImode and SImode.
79 Relative to reg-reg move (2). */
80 {2, 2, 2}, /* cost of storing integer registers */
81 2, /* cost of reg,reg fld/fst */
82 {2, 2, 2}, /* cost of loading fp registers
83 in SFmode, DFmode and XFmode */
84 {2, 2, 2}, /* cost of loading integer registers */
85 3, /* cost of moving MMX register */
86 {3, 3}, /* cost of loading MMX registers
87 in SImode and DImode */
88 {3, 3}, /* cost of storing MMX registers
89 in SImode and DImode */
90 3, /* cost of moving SSE register */
91 {3, 3, 3}, /* cost of loading SSE registers
92 in SImode, DImode and TImode */
93 {3, 3, 3}, /* cost of storing SSE registers
94 in SImode, DImode and TImode */
95 3, /* MMX or SSE register to integer */
f4365627
JH
96 0, /* size of prefetch block */
97 0, /* number of parallel prefetches */
4977bab6 98 1, /* Branch cost */
229b303a
RS
99 2, /* cost of FADD and FSUB insns. */
100 2, /* cost of FMUL instruction. */
101 2, /* cost of FDIV instruction. */
102 2, /* cost of FABS instruction. */
103 2, /* cost of FCHS instruction. */
104 2, /* cost of FSQRT instruction. */
2ab0437e 105};
229b303a 106
32b5b1aa 107/* Processor costs (relative to an add) */
fce5a9f2 108static const
32b5b1aa 109struct processor_costs i386_cost = { /* 386 specific costs */
e9a25f70 110 1, /* cost of an add instruction */
32b5b1aa
SC
111 1, /* cost of a lea instruction */
112 3, /* variable shift costs */
113 2, /* constant shift costs */
4977bab6 114 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
32b5b1aa 115 1, /* cost of multiply per each bit set */
4977bab6 116 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
44cf5b6a
JH
117 3, /* cost of movsx */
118 2, /* cost of movzx */
96e7ae40 119 15, /* "large" insn */
e2e52e1b 120 3, /* MOVE_RATIO */
7c6b971d 121 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
122 {2, 4, 2}, /* cost of loading integer registers
123 in QImode, HImode and SImode.
0f290768 124 Relative to reg-reg move (2). */
96e7ae40
JH
125 {2, 4, 2}, /* cost of storing integer registers */
126 2, /* cost of reg,reg fld/fst */
127 {8, 8, 8}, /* cost of loading fp registers
128 in SFmode, DFmode and XFmode */
fa79946e
JH
129 {8, 8, 8}, /* cost of loading integer registers */
130 2, /* cost of moving MMX register */
131 {4, 8}, /* cost of loading MMX registers
132 in SImode and DImode */
133 {4, 8}, /* cost of storing MMX registers
134 in SImode and DImode */
135 2, /* cost of moving SSE register */
136 {4, 8, 16}, /* cost of loading SSE registers
137 in SImode, DImode and TImode */
138 {4, 8, 16}, /* cost of storing SSE registers
139 in SImode, DImode and TImode */
140 3, /* MMX or SSE register to integer */
f4365627
JH
141 0, /* size of prefetch block */
142 0, /* number of parallel prefetches */
4977bab6 143 1, /* Branch cost */
229b303a
RS
144 23, /* cost of FADD and FSUB insns. */
145 27, /* cost of FMUL instruction. */
146 88, /* cost of FDIV instruction. */
147 22, /* cost of FABS instruction. */
148 24, /* cost of FCHS instruction. */
149 122, /* cost of FSQRT instruction. */
32b5b1aa
SC
150};
151
fce5a9f2 152static const
32b5b1aa
SC
153struct processor_costs i486_cost = { /* 486 specific costs */
154 1, /* cost of an add instruction */
155 1, /* cost of a lea instruction */
156 3, /* variable shift costs */
157 2, /* constant shift costs */
4977bab6 158 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
32b5b1aa 159 1, /* cost of multiply per each bit set */
4977bab6 160 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
44cf5b6a
JH
161 3, /* cost of movsx */
162 2, /* cost of movzx */
96e7ae40 163 15, /* "large" insn */
e2e52e1b 164 3, /* MOVE_RATIO */
7c6b971d 165 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
166 {2, 4, 2}, /* cost of loading integer registers
167 in QImode, HImode and SImode.
0f290768 168 Relative to reg-reg move (2). */
96e7ae40
JH
169 {2, 4, 2}, /* cost of storing integer registers */
170 2, /* cost of reg,reg fld/fst */
171 {8, 8, 8}, /* cost of loading fp registers
172 in SFmode, DFmode and XFmode */
fa79946e
JH
173 {8, 8, 8}, /* cost of loading integer registers */
174 2, /* cost of moving MMX register */
175 {4, 8}, /* cost of loading MMX registers
176 in SImode and DImode */
177 {4, 8}, /* cost of storing MMX registers
178 in SImode and DImode */
179 2, /* cost of moving SSE register */
180 {4, 8, 16}, /* cost of loading SSE registers
181 in SImode, DImode and TImode */
182 {4, 8, 16}, /* cost of storing SSE registers
183 in SImode, DImode and TImode */
f4365627
JH
184 3, /* MMX or SSE register to integer */
185 0, /* size of prefetch block */
186 0, /* number of parallel prefetches */
4977bab6 187 1, /* Branch cost */
229b303a
RS
188 8, /* cost of FADD and FSUB insns. */
189 16, /* cost of FMUL instruction. */
190 73, /* cost of FDIV instruction. */
191 3, /* cost of FABS instruction. */
192 3, /* cost of FCHS instruction. */
193 83, /* cost of FSQRT instruction. */
32b5b1aa
SC
194};
195
fce5a9f2 196static const
e5cb57e8 197struct processor_costs pentium_cost = {
32b5b1aa
SC
198 1, /* cost of an add instruction */
199 1, /* cost of a lea instruction */
856b07a1 200 4, /* variable shift costs */
e5cb57e8 201 1, /* constant shift costs */
4977bab6 202 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
856b07a1 203 0, /* cost of multiply per each bit set */
4977bab6 204 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
44cf5b6a
JH
205 3, /* cost of movsx */
206 2, /* cost of movzx */
96e7ae40 207 8, /* "large" insn */
e2e52e1b 208 6, /* MOVE_RATIO */
7c6b971d 209 6, /* cost for loading QImode using movzbl */
96e7ae40
JH
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
0f290768 212 Relative to reg-reg move (2). */
96e7ae40
JH
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {2, 2, 6}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
fa79946e
JH
217 {4, 4, 6}, /* cost of loading integer registers */
218 8, /* cost of moving MMX register */
219 {8, 8}, /* cost of loading MMX registers
220 in SImode and DImode */
221 {8, 8}, /* cost of storing MMX registers
222 in SImode and DImode */
223 2, /* cost of moving SSE register */
224 {4, 8, 16}, /* cost of loading SSE registers
225 in SImode, DImode and TImode */
226 {4, 8, 16}, /* cost of storing SSE registers
227 in SImode, DImode and TImode */
f4365627
JH
228 3, /* MMX or SSE register to integer */
229 0, /* size of prefetch block */
230 0, /* number of parallel prefetches */
4977bab6 231 2, /* Branch cost */
229b303a
RS
232 3, /* cost of FADD and FSUB insns. */
233 3, /* cost of FMUL instruction. */
234 39, /* cost of FDIV instruction. */
235 1, /* cost of FABS instruction. */
236 1, /* cost of FCHS instruction. */
237 70, /* cost of FSQRT instruction. */
32b5b1aa
SC
238};
239
fce5a9f2 240static const
856b07a1
SC
241struct processor_costs pentiumpro_cost = {
242 1, /* cost of an add instruction */
243 1, /* cost of a lea instruction */
e075ae69 244 1, /* variable shift costs */
856b07a1 245 1, /* constant shift costs */
4977bab6 246 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
856b07a1 247 0, /* cost of multiply per each bit set */
4977bab6 248 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
44cf5b6a
JH
249 1, /* cost of movsx */
250 1, /* cost of movzx */
96e7ae40 251 8, /* "large" insn */
e2e52e1b 252 6, /* MOVE_RATIO */
7c6b971d 253 2, /* cost for loading QImode using movzbl */
96e7ae40
JH
254 {4, 4, 4}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
0f290768 256 Relative to reg-reg move (2). */
96e7ae40
JH
257 {2, 2, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
fa79946e
JH
261 {4, 4, 6}, /* cost of loading integer registers */
262 2, /* cost of moving MMX register */
263 {2, 2}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {2, 2}, /* cost of storing MMX registers
266 in SImode and DImode */
267 2, /* cost of moving SSE register */
268 {2, 2, 8}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {2, 2, 8}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
f4365627
JH
272 3, /* MMX or SSE register to integer */
273 32, /* size of prefetch block */
274 6, /* number of parallel prefetches */
4977bab6 275 2, /* Branch cost */
229b303a
RS
276 3, /* cost of FADD and FSUB insns. */
277 5, /* cost of FMUL instruction. */
278 56, /* cost of FDIV instruction. */
279 2, /* cost of FABS instruction. */
280 2, /* cost of FCHS instruction. */
281 56, /* cost of FSQRT instruction. */
856b07a1
SC
282};
283
fce5a9f2 284static const
a269a03c
JC
285struct processor_costs k6_cost = {
286 1, /* cost of an add instruction */
e075ae69 287 2, /* cost of a lea instruction */
a269a03c
JC
288 1, /* variable shift costs */
289 1, /* constant shift costs */
4977bab6 290 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
a269a03c 291 0, /* cost of multiply per each bit set */
4977bab6 292 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
44cf5b6a
JH
293 2, /* cost of movsx */
294 2, /* cost of movzx */
96e7ae40 295 8, /* "large" insn */
e2e52e1b 296 4, /* MOVE_RATIO */
7c6b971d 297 3, /* cost for loading QImode using movzbl */
96e7ae40
JH
298 {4, 5, 4}, /* cost of loading integer registers
299 in QImode, HImode and SImode.
0f290768 300 Relative to reg-reg move (2). */
96e7ae40
JH
301 {2, 3, 2}, /* cost of storing integer registers */
302 4, /* cost of reg,reg fld/fst */
303 {6, 6, 6}, /* cost of loading fp registers
304 in SFmode, DFmode and XFmode */
fa79946e
JH
305 {4, 4, 4}, /* cost of loading integer registers */
306 2, /* cost of moving MMX register */
307 {2, 2}, /* cost of loading MMX registers
308 in SImode and DImode */
309 {2, 2}, /* cost of storing MMX registers
310 in SImode and DImode */
311 2, /* cost of moving SSE register */
312 {2, 2, 8}, /* cost of loading SSE registers
313 in SImode, DImode and TImode */
314 {2, 2, 8}, /* cost of storing SSE registers
315 in SImode, DImode and TImode */
f4365627
JH
316 6, /* MMX or SSE register to integer */
317 32, /* size of prefetch block */
318 1, /* number of parallel prefetches */
4977bab6 319 1, /* Branch cost */
229b303a
RS
320 2, /* cost of FADD and FSUB insns. */
321 2, /* cost of FMUL instruction. */
4f770e7b
RS
322 56, /* cost of FDIV instruction. */
323 2, /* cost of FABS instruction. */
229b303a
RS
324 2, /* cost of FCHS instruction. */
325 56, /* cost of FSQRT instruction. */
a269a03c
JC
326};
327
fce5a9f2 328static const
309ada50
JH
329struct processor_costs athlon_cost = {
330 1, /* cost of an add instruction */
0b5107cf 331 2, /* cost of a lea instruction */
309ada50
JH
332 1, /* variable shift costs */
333 1, /* constant shift costs */
4977bab6 334 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
309ada50 335 0, /* cost of multiply per each bit set */
4977bab6 336 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
44cf5b6a
JH
337 1, /* cost of movsx */
338 1, /* cost of movzx */
309ada50 339 8, /* "large" insn */
e2e52e1b 340 9, /* MOVE_RATIO */
309ada50 341 4, /* cost for loading QImode using movzbl */
b72b1c29 342 {3, 4, 3}, /* cost of loading integer registers
309ada50 343 in QImode, HImode and SImode.
0f290768 344 Relative to reg-reg move (2). */
b72b1c29 345 {3, 4, 3}, /* cost of storing integer registers */
309ada50 346 4, /* cost of reg,reg fld/fst */
b72b1c29 347 {4, 4, 12}, /* cost of loading fp registers
309ada50 348 in SFmode, DFmode and XFmode */
b72b1c29 349 {6, 6, 8}, /* cost of loading integer registers */
fa79946e 350 2, /* cost of moving MMX register */
b72b1c29 351 {4, 4}, /* cost of loading MMX registers
fa79946e 352 in SImode and DImode */
b72b1c29 353 {4, 4}, /* cost of storing MMX registers
fa79946e
JH
354 in SImode and DImode */
355 2, /* cost of moving SSE register */
b72b1c29 356 {4, 4, 6}, /* cost of loading SSE registers
fa79946e 357 in SImode, DImode and TImode */
b72b1c29 358 {4, 4, 5}, /* cost of storing SSE registers
fa79946e 359 in SImode, DImode and TImode */
b72b1c29 360 5, /* MMX or SSE register to integer */
f4365627
JH
361 64, /* size of prefetch block */
362 6, /* number of parallel prefetches */
4977bab6 363 2, /* Branch cost */
229b303a
RS
364 4, /* cost of FADD and FSUB insns. */
365 4, /* cost of FMUL instruction. */
366 24, /* cost of FDIV instruction. */
367 2, /* cost of FABS instruction. */
368 2, /* cost of FCHS instruction. */
369 35, /* cost of FSQRT instruction. */
309ada50
JH
370};
371
4977bab6
ZW
372static const
373struct processor_costs k8_cost = {
374 1, /* cost of an add instruction */
375 2, /* cost of a lea instruction */
376 1, /* variable shift costs */
377 1, /* constant shift costs */
378 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
379 0, /* cost of multiply per each bit set */
380 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
381 1, /* cost of movsx */
382 1, /* cost of movzx */
383 8, /* "large" insn */
384 9, /* MOVE_RATIO */
385 4, /* cost for loading QImode using movzbl */
386 {3, 4, 3}, /* cost of loading integer registers
387 in QImode, HImode and SImode.
388 Relative to reg-reg move (2). */
389 {3, 4, 3}, /* cost of storing integer registers */
390 4, /* cost of reg,reg fld/fst */
391 {4, 4, 12}, /* cost of loading fp registers
392 in SFmode, DFmode and XFmode */
393 {6, 6, 8}, /* cost of loading integer registers */
394 2, /* cost of moving MMX register */
395 {3, 3}, /* cost of loading MMX registers
396 in SImode and DImode */
397 {4, 4}, /* cost of storing MMX registers
398 in SImode and DImode */
399 2, /* cost of moving SSE register */
400 {4, 3, 6}, /* cost of loading SSE registers
401 in SImode, DImode and TImode */
402 {4, 4, 5}, /* cost of storing SSE registers
403 in SImode, DImode and TImode */
404 5, /* MMX or SSE register to integer */
405 64, /* size of prefetch block */
406 6, /* number of parallel prefetches */
407 2, /* Branch cost */
408 4, /* cost of FADD and FSUB insns. */
409 4, /* cost of FMUL instruction. */
410 19, /* cost of FDIV instruction. */
411 2, /* cost of FABS instruction. */
412 2, /* cost of FCHS instruction. */
413 35, /* cost of FSQRT instruction. */
414};
415
fce5a9f2 416static const
b4e89e2d
JH
417struct processor_costs pentium4_cost = {
418 1, /* cost of an add instruction */
419 1, /* cost of a lea instruction */
4977bab6
ZW
420 4, /* variable shift costs */
421 4, /* constant shift costs */
422 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
b4e89e2d 423 0, /* cost of multiply per each bit set */
4977bab6 424 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
44cf5b6a
JH
425 1, /* cost of movsx */
426 1, /* cost of movzx */
b4e89e2d
JH
427 16, /* "large" insn */
428 6, /* MOVE_RATIO */
429 2, /* cost for loading QImode using movzbl */
430 {4, 5, 4}, /* cost of loading integer registers
431 in QImode, HImode and SImode.
432 Relative to reg-reg move (2). */
433 {2, 3, 2}, /* cost of storing integer registers */
434 2, /* cost of reg,reg fld/fst */
435 {2, 2, 6}, /* cost of loading fp registers
436 in SFmode, DFmode and XFmode */
437 {4, 4, 6}, /* cost of loading integer registers */
438 2, /* cost of moving MMX register */
439 {2, 2}, /* cost of loading MMX registers
440 in SImode and DImode */
441 {2, 2}, /* cost of storing MMX registers
442 in SImode and DImode */
443 12, /* cost of moving SSE register */
444 {12, 12, 12}, /* cost of loading SSE registers
445 in SImode, DImode and TImode */
446 {2, 2, 8}, /* cost of storing SSE registers
447 in SImode, DImode and TImode */
448 10, /* MMX or SSE register to integer */
f4365627
JH
449 64, /* size of prefetch block */
450 6, /* number of parallel prefetches */
4977bab6 451 2, /* Branch cost */
229b303a
RS
452 5, /* cost of FADD and FSUB insns. */
453 7, /* cost of FMUL instruction. */
454 43, /* cost of FDIV instruction. */
455 2, /* cost of FABS instruction. */
456 2, /* cost of FCHS instruction. */
457 43, /* cost of FSQRT instruction. */
b4e89e2d
JH
458};
459
8b60264b 460const struct processor_costs *ix86_cost = &pentium_cost;
32b5b1aa 461
a269a03c
JC
462/* Processor feature/optimization bitmasks. */
463#define m_386 (1<<PROCESSOR_I386)
464#define m_486 (1<<PROCESSOR_I486)
465#define m_PENT (1<<PROCESSOR_PENTIUM)
466#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
467#define m_K6 (1<<PROCESSOR_K6)
309ada50 468#define m_ATHLON (1<<PROCESSOR_ATHLON)
b4e89e2d 469#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
4977bab6
ZW
470#define m_K8 (1<<PROCESSOR_K8)
471#define m_ATHLON_K8 (m_K8 | m_ATHLON)
a269a03c 472
4977bab6
ZW
473const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
474const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
a269a03c 475const int x86_zero_extend_with_and = m_486 | m_PENT;
4977bab6 476const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
e075ae69 477const int x86_double_with_add = ~m_386;
a269a03c 478const int x86_use_bit_test = m_386;
4977bab6
ZW
479const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
480const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
481const int x86_3dnow_a = m_ATHLON_K8;
482const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
ef6257cd 483const int x86_branch_hints = m_PENT4;
b4e89e2d 484const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
e075ae69
RH
485const int x86_partial_reg_stall = m_PPRO;
486const int x86_use_loop = m_K6;
4977bab6 487const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
e075ae69
RH
488const int x86_use_mov0 = m_K6;
489const int x86_use_cltd = ~(m_PENT | m_K6);
490const int x86_read_modify_write = ~m_PENT;
491const int x86_read_modify = ~(m_PENT | m_PPRO);
492const int x86_split_long_moves = m_PPRO;
4977bab6 493const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
285464d0 494const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
b4e89e2d 495const int x86_single_stringop = m_386 | m_PENT4;
d9f32422
JH
496const int x86_qimode_math = ~(0);
497const int x86_promote_qi_regs = 0;
498const int x86_himode_math = ~(m_PPRO);
499const int x86_promote_hi_regs = m_PPRO;
4977bab6
ZW
500const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
501const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
502const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
503const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
504const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
505const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
506const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
507const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
508const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
509const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
b972dd02 510const int x86_decompose_lea = m_PENT4;
495333a6 511const int x86_shift1 = ~m_486;
4977bab6
ZW
512const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
513const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
514/* Set for machines where the type and dependencies are resolved on SSE register
d1f87653 515 parts instead of whole registers, so we may maintain just lower part of
4977bab6
ZW
516 scalar values in proper format leaving the upper part undefined. */
517const int x86_sse_partial_regs = m_ATHLON_K8;
518/* Athlon optimizes partial-register FPS special case, thus avoiding the
519 need for extra instructions beforehand */
520const int x86_sse_partial_regs_for_cvtsd2ss = 0;
521const int x86_sse_typeless_stores = m_ATHLON_K8;
522const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
523const int x86_use_ffreep = m_ATHLON_K8;
524const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
8f62128d 525const int x86_inter_unit_moves = ~(m_ATHLON_K8);
881b2a96 526const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PPRO;
a269a03c 527
d1f87653 528/* In case the average insn count for single function invocation is
6ab16dd9
JH
529 lower than this constant, emit fast (but longer) prologue and
530 epilogue code. */
4977bab6 531#define FAST_PROLOGUE_INSN_COUNT 20
5bf0ebab 532
6ab16dd9
JH
533/* Set by prologue expander and used by epilogue expander to determine
534 the style used. */
535static int use_fast_prologue_epilogue;
536
5bf0ebab
RH
537/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
538static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
539static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
540static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
4c0d89b5
RS
541
542/* Array of the smallest class containing reg number REGNO, indexed by
0f290768 543 REGNO. Used by REGNO_REG_CLASS in i386.h. */
4c0d89b5 544
e075ae69 545enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
4c0d89b5
RS
546{
547 /* ax, dx, cx, bx */
ab408a86 548 AREG, DREG, CREG, BREG,
4c0d89b5 549 /* si, di, bp, sp */
e075ae69 550 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
4c0d89b5
RS
551 /* FP registers */
552 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
79325812 553 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
4c0d89b5 554 /* arg pointer */
83774849 555 NON_Q_REGS,
564d80f4 556 /* flags, fpsr, dirflag, frame */
a7180f70
BS
557 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
558 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
559 SSE_REGS, SSE_REGS,
560 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
3d117b30
JH
561 MMX_REGS, MMX_REGS,
562 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
563 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
564 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
565 SSE_REGS, SSE_REGS,
4c0d89b5 566};
c572e5ba 567
3d117b30 568/* The "default" register map used in 32bit mode. */
83774849 569
0f290768 570int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
571{
572 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
573 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
3d117b30 574 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
575 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
576 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
3d117b30
JH
577 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
578 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
579};
580
5bf0ebab
RH
581static int const x86_64_int_parameter_registers[6] =
582{
583 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
584 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
585};
586
587static int const x86_64_int_return_registers[4] =
588{
589 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
590};
53c17031 591
0f7fa3d0
JH
592/* The "default" register map used in 64bit mode. */
593int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
594{
595 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
3de72741 596 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
0f7fa3d0
JH
597 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
598 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
599 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
600 8,9,10,11,12,13,14,15, /* extended integer registers */
601 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
602};
603
83774849
RH
604/* Define the register numbers to be used in Dwarf debugging information.
605 The SVR4 reference port C compiler uses the following register numbers
606 in its Dwarf output code:
607 0 for %eax (gcc regno = 0)
608 1 for %ecx (gcc regno = 2)
609 2 for %edx (gcc regno = 1)
610 3 for %ebx (gcc regno = 3)
611 4 for %esp (gcc regno = 7)
612 5 for %ebp (gcc regno = 6)
613 6 for %esi (gcc regno = 4)
614 7 for %edi (gcc regno = 5)
615 The following three DWARF register numbers are never generated by
616 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
617 believes these numbers have these meanings.
618 8 for %eip (no gcc equivalent)
619 9 for %eflags (gcc regno = 17)
620 10 for %trapno (no gcc equivalent)
621 It is not at all clear how we should number the FP stack registers
622 for the x86 architecture. If the version of SDB on x86/svr4 were
623 a bit less brain dead with respect to floating-point then we would
624 have a precedent to follow with respect to DWARF register numbers
625 for x86 FP registers, but the SDB on x86/svr4 is so completely
626 broken with respect to FP registers that it is hardly worth thinking
627 of it as something to strive for compatibility with.
628 The version of x86/svr4 SDB I have at the moment does (partially)
629 seem to believe that DWARF register number 11 is associated with
630 the x86 register %st(0), but that's about all. Higher DWARF
631 register numbers don't seem to be associated with anything in
632 particular, and even for DWARF regno 11, SDB only seems to under-
633 stand that it should say that a variable lives in %st(0) (when
634 asked via an `=' command) if we said it was in DWARF regno 11,
635 but SDB still prints garbage when asked for the value of the
636 variable in question (via a `/' command).
637 (Also note that the labels SDB prints for various FP stack regs
638 when doing an `x' command are all wrong.)
639 Note that these problems generally don't affect the native SVR4
640 C compiler because it doesn't allow the use of -O with -g and
641 because when it is *not* optimizing, it allocates a memory
642 location for each floating-point variable, and the memory
643 location is what gets described in the DWARF AT_location
644 attribute for the variable in question.
645 Regardless of the severe mental illness of the x86/svr4 SDB, we
646 do something sensible here and we use the following DWARF
647 register numbers. Note that these are all stack-top-relative
648 numbers.
649 11 for %st(0) (gcc regno = 8)
650 12 for %st(1) (gcc regno = 9)
651 13 for %st(2) (gcc regno = 10)
652 14 for %st(3) (gcc regno = 11)
653 15 for %st(4) (gcc regno = 12)
654 16 for %st(5) (gcc regno = 13)
655 17 for %st(6) (gcc regno = 14)
656 18 for %st(7) (gcc regno = 15)
657*/
0f290768 658int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
659{
660 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
661 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
3f3f2124 662 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
663 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
664 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
d1f87653
KH
665 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
666 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
667};
668
c572e5ba
JVA
669/* Test and compare insns in i386.md store the information needed to
670 generate branch and scc insns here. */
671
07933f72
GS
672rtx ix86_compare_op0 = NULL_RTX;
673rtx ix86_compare_op1 = NULL_RTX;
f5316dfe 674
f996902d
RH
675/* The encoding characters for the four TLS models present in ELF. */
676
755ac5d4 677static char const tls_model_chars[] = " GLil";
f996902d 678
7a2e09f4 679#define MAX_386_STACK_LOCALS 3
8362f420
JH
680/* Size of the register save area. */
681#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
36edd3cc
BS
682
683/* Define the structure for the machine field in struct function. */
ddb0ae00
ZW
684
685struct stack_local_entry GTY(())
686{
687 unsigned short mode;
688 unsigned short n;
689 rtx rtl;
690 struct stack_local_entry *next;
691};
692
4dd2ac2c
JH
693/* Structure describing stack frame layout.
694 Stack grows downward:
695
696 [arguments]
697 <- ARG_POINTER
698 saved pc
699
700 saved frame pointer if frame_pointer_needed
701 <- HARD_FRAME_POINTER
702 [saved regs]
703
704 [padding1] \
705 )
706 [va_arg registers] (
707 > to_allocate <- FRAME_POINTER
708 [frame] (
709 )
710 [padding2] /
711 */
712struct ix86_frame
713{
714 int nregs;
715 int padding1;
8362f420 716 int va_arg_size;
4dd2ac2c
JH
717 HOST_WIDE_INT frame;
718 int padding2;
719 int outgoing_arguments_size;
8362f420 720 int red_zone_size;
4dd2ac2c
JH
721
722 HOST_WIDE_INT to_allocate;
723 /* The offsets relative to ARG_POINTER. */
724 HOST_WIDE_INT frame_pointer_offset;
725 HOST_WIDE_INT hard_frame_pointer_offset;
726 HOST_WIDE_INT stack_pointer_offset;
727};
728
c93e80a5
JH
729/* Used to enable/disable debugging features. */
730const char *ix86_debug_arg_string, *ix86_debug_addr_string;
6189a572
JH
731/* Code model option as passed by user. */
732const char *ix86_cmodel_string;
733/* Parsed value. */
734enum cmodel ix86_cmodel;
80f33d06
GS
735/* Asm dialect. */
736const char *ix86_asm_string;
737enum asm_dialect ix86_asm_dialect = ASM_ATT;
f996902d
RH
738/* TLS dialext. */
739const char *ix86_tls_dialect_string;
740enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
6189a572 741
5bf0ebab 742/* Which unit we are generating floating point math for. */
965f5423
JH
743enum fpmath_unit ix86_fpmath;
744
5bf0ebab 745/* Which cpu are we scheduling for. */
9e555526 746enum processor_type ix86_tune;
5bf0ebab
RH
747/* Which instruction set architecture to use. */
748enum processor_type ix86_arch;
c8c5cb99
SC
749
750/* Strings to hold which cpu and instruction set architecture to use. */
9e555526 751const char *ix86_tune_string; /* for -mtune=<xxx> */
9c23aa47 752const char *ix86_arch_string; /* for -march=<xxx> */
965f5423 753const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
c8c5cb99 754
0f290768 755/* # of registers to use to pass arguments. */
e075ae69 756const char *ix86_regparm_string;
e9a25f70 757
f4365627
JH
758/* true if sse prefetch instruction is not NOOP. */
759int x86_prefetch_sse;
760
e075ae69
RH
761/* ix86_regparm_string as a number */
762int ix86_regparm;
e9a25f70
JL
763
764/* Alignment to use for loops and jumps: */
765
0f290768 766/* Power of two alignment for loops. */
e075ae69 767const char *ix86_align_loops_string;
e9a25f70 768
0f290768 769/* Power of two alignment for non-loop jumps. */
e075ae69 770const char *ix86_align_jumps_string;
e9a25f70 771
3af4bd89 772/* Power of two alignment for stack boundary in bytes. */
e075ae69 773const char *ix86_preferred_stack_boundary_string;
3af4bd89
JH
774
775/* Preferred alignment for stack boundary in bits. */
e075ae69 776int ix86_preferred_stack_boundary;
3af4bd89 777
e9a25f70 778/* Values 1-5: see jump.c */
e075ae69
RH
779int ix86_branch_cost;
780const char *ix86_branch_cost_string;
e9a25f70 781
0f290768 782/* Power of two alignment for functions. */
e075ae69 783const char *ix86_align_funcs_string;
623fe810
RH
784
785/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
786static char internal_label_prefix[16];
787static int internal_label_prefix_len;
e075ae69 788\f
623fe810 789static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
f996902d 790static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
f6da8bc3
KG
791static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
792static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
e075ae69 793 int, int, FILE *));
f996902d
RH
794static const char *get_some_local_dynamic_name PARAMS ((void));
795static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
796static rtx maybe_get_pool_constant PARAMS ((rtx));
f6da8bc3 797static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
3a3677ff
RH
798static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
799 rtx *, rtx *));
f996902d 800static rtx get_thread_pointer PARAMS ((void));
145aacc2 801static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
f6da8bc3
KG
802static rtx gen_push PARAMS ((rtx));
803static int memory_address_length PARAMS ((rtx addr));
804static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
805static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
f6da8bc3
KG
806static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
807static void ix86_dump_ppro_packet PARAMS ((FILE *));
808static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
e2500fed 809static struct machine_function * ix86_init_machine_status PARAMS ((void));
2b589241 810static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
b531087a
KH
811static int ix86_nsaved_regs PARAMS ((void));
812static void ix86_emit_save_regs PARAMS ((void));
c6036a37 813static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
37a58036 814static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
bd09bdeb 815static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
0e4970d7 816static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
b531087a 817static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
55efb413 818static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
0945b39d 819static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
0945b39d
JH
820static rtx ix86_expand_aligntest PARAMS ((rtx, int));
821static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
c237e94a
ZW
822static int ix86_issue_rate PARAMS ((void));
823static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
824static void ix86_sched_init PARAMS ((FILE *, int, int));
825static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
826static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
9b690711
RH
827static int ia32_use_dfa_pipeline_interface PARAMS ((void));
828static int ia32_multipass_dfa_lookahead PARAMS ((void));
e37af218 829static void ix86_init_mmx_sse_builtins PARAMS ((void));
3961e8fe
RH
830static rtx x86_this_parameter PARAMS ((tree));
831static void x86_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
832 HOST_WIDE_INT, tree));
833static bool x86_can_output_mi_thunk PARAMS ((tree, HOST_WIDE_INT,
834 HOST_WIDE_INT, tree));
4977bab6 835bool ix86_expand_carry_flag_compare PARAMS ((enum rtx_code, rtx, rtx, rtx*));
e075ae69
RH
836
837struct ix86_address
838{
839 rtx base, index, disp;
840 HOST_WIDE_INT scale;
841};
b08de47e 842
e075ae69 843static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
dcefdf67 844static int ix86_address_cost PARAMS ((rtx));
3a04ff64 845static bool ix86_cannot_force_const_mem PARAMS ((rtx));
69bd9368 846static rtx ix86_delegitimize_address PARAMS ((rtx));
bd793c65 847
f996902d
RH
848static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
849static const char *ix86_strip_name_encoding PARAMS ((const char *))
850 ATTRIBUTE_UNUSED;
fb49053f 851
bd793c65 852struct builtin_description;
8b60264b
KG
853static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
854 tree, rtx));
855static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
856 tree, rtx));
bd793c65
BS
857static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
858static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
859static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
e37af218 860static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
bd793c65 861static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
c0c102a9
JH
862static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
863static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
864 enum rtx_code *,
865 enum rtx_code *,
866 enum rtx_code *));
9e7adcb3
JH
867static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
868 rtx *, rtx *));
869static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
870static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
871static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
872static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
bd09bdeb 873static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
9b690711 874static int ix86_save_reg PARAMS ((unsigned int, int));
4dd2ac2c 875static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
8d8e52be 876static int ix86_comp_type_attributes PARAMS ((tree, tree));
483ab821 877static int ix86_fntype_regparm PARAMS ((tree));
91d231cb 878const struct attribute_spec ix86_attribute_table[];
4977bab6 879static bool ix86_function_ok_for_sibcall PARAMS ((tree, tree));
91d231cb
JM
880static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
881static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
b069de3b 882static int ix86_value_regno PARAMS ((enum machine_mode));
8b978a57 883static bool contains_128bit_aligned_vector_p PARAMS ((tree));
4977bab6 884static bool ix86_ms_bitfield_layout_p PARAMS ((tree));
fe77449a 885static tree ix86_handle_struct_attribute PARAMS ((tree *, tree, tree, int, bool *));
4977bab6 886static int extended_reg_mentioned_1 PARAMS ((rtx *, void *));
3c50106f 887static bool ix86_rtx_costs PARAMS ((rtx, int, int, int *));
7c262518 888
21c318ba 889#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
2cc07db4
RH
890static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
891#endif
e56feed6 892
53c17031
JH
893/* Register class used for passing given 64bit part of the argument.
894 These represent classes as documented by the PS ABI, with the exception
895 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
d1f87653 896 use SF or DFmode move instead of DImode to avoid reformatting penalties.
53c17031 897
d1f87653 898 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
53c17031
JH
899 whenever possible (upper half does contain padding).
900 */
901enum x86_64_reg_class
902 {
903 X86_64_NO_CLASS,
904 X86_64_INTEGER_CLASS,
905 X86_64_INTEGERSI_CLASS,
906 X86_64_SSE_CLASS,
907 X86_64_SSESF_CLASS,
908 X86_64_SSEDF_CLASS,
909 X86_64_SSEUP_CLASS,
910 X86_64_X87_CLASS,
911 X86_64_X87UP_CLASS,
912 X86_64_MEMORY_CLASS
913 };
0b5826ac 914static const char * const x86_64_reg_class_name[] =
53c17031
JH
915 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
916
917#define MAX_CLASSES 4
918static int classify_argument PARAMS ((enum machine_mode, tree,
919 enum x86_64_reg_class [MAX_CLASSES],
920 int));
921static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
922 int *));
923static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
07933f72 924 const int *, int));
53c17031
JH
925static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
926 enum x86_64_reg_class));
881b2a96
RS
927
928/* Table of constants used by fldpi, fldln2, etc... */
929static REAL_VALUE_TYPE ext_80387_constants_table [5];
930static bool ext_80387_constants_init = 0;
931static void init_ext_80387_constants PARAMS ((void));
672a6f42
NB
932\f
933/* Initialize the GCC target structure. */
91d231cb
JM
934#undef TARGET_ATTRIBUTE_TABLE
935#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
672a6f42 936#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
f5f4be42
NB
937# undef TARGET_MERGE_DECL_ATTRIBUTES
938# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
672a6f42
NB
939#endif
940
8d8e52be
JM
941#undef TARGET_COMP_TYPE_ATTRIBUTES
942#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
943
f6155fda
SS
944#undef TARGET_INIT_BUILTINS
945#define TARGET_INIT_BUILTINS ix86_init_builtins
946
947#undef TARGET_EXPAND_BUILTIN
948#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
949
bd09bdeb
RH
950#undef TARGET_ASM_FUNCTION_EPILOGUE
951#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
08c148a8 952
17b53c33
NB
953#undef TARGET_ASM_OPEN_PAREN
954#define TARGET_ASM_OPEN_PAREN ""
955#undef TARGET_ASM_CLOSE_PAREN
956#define TARGET_ASM_CLOSE_PAREN ""
957
301d03af
RS
958#undef TARGET_ASM_ALIGNED_HI_OP
959#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
960#undef TARGET_ASM_ALIGNED_SI_OP
961#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
962#ifdef ASM_QUAD
963#undef TARGET_ASM_ALIGNED_DI_OP
964#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
965#endif
966
967#undef TARGET_ASM_UNALIGNED_HI_OP
968#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
969#undef TARGET_ASM_UNALIGNED_SI_OP
970#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
971#undef TARGET_ASM_UNALIGNED_DI_OP
972#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
973
c237e94a
ZW
974#undef TARGET_SCHED_ADJUST_COST
975#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
976#undef TARGET_SCHED_ISSUE_RATE
977#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
978#undef TARGET_SCHED_VARIABLE_ISSUE
979#define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
980#undef TARGET_SCHED_INIT
981#define TARGET_SCHED_INIT ix86_sched_init
982#undef TARGET_SCHED_REORDER
983#define TARGET_SCHED_REORDER ix86_sched_reorder
fce5a9f2 984#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
9b690711
RH
985#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
986 ia32_use_dfa_pipeline_interface
987#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
988#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
989 ia32_multipass_dfa_lookahead
c237e94a 990
4977bab6
ZW
991#undef TARGET_FUNCTION_OK_FOR_SIBCALL
992#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
993
f996902d
RH
994#ifdef HAVE_AS_TLS
995#undef TARGET_HAVE_TLS
996#define TARGET_HAVE_TLS true
997#endif
3a04ff64
RH
998#undef TARGET_CANNOT_FORCE_CONST_MEM
999#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
f996902d 1000
7daebb7a 1001#undef TARGET_DELEGITIMIZE_ADDRESS
69bd9368 1002#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
7daebb7a 1003
4977bab6
ZW
1004#undef TARGET_MS_BITFIELD_LAYOUT_P
1005#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1006
c590b625
RH
1007#undef TARGET_ASM_OUTPUT_MI_THUNK
1008#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
3961e8fe
RH
1009#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1010#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
c590b625 1011
3c50106f
RH
1012#undef TARGET_RTX_COSTS
1013#define TARGET_RTX_COSTS ix86_rtx_costs
dcefdf67
RH
1014#undef TARGET_ADDRESS_COST
1015#define TARGET_ADDRESS_COST ix86_address_cost
3c50106f 1016
f6897b10 1017struct gcc_target targetm = TARGET_INITIALIZER;
e075ae69 1018\f
67c2b45f
JS
1019/* The svr4 ABI for the i386 says that records and unions are returned
1020 in memory. */
1021#ifndef DEFAULT_PCC_STRUCT_RETURN
1022#define DEFAULT_PCC_STRUCT_RETURN 1
1023#endif
1024
f5316dfe
MM
1025/* Sometimes certain combinations of command options do not make
1026 sense on a particular target machine. You can define a macro
1027 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1028 defined, is executed once just after all the command options have
1029 been parsed.
1030
1031 Don't use this macro to turn on various extra optimizations for
1032 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1033
1034void
1035override_options ()
1036{
400500c4 1037 int i;
e075ae69
RH
1038 /* Comes from final.c -- no real reason to change it. */
1039#define MAX_CODE_ALIGN 16
f5316dfe 1040
c8c5cb99
SC
1041 static struct ptt
1042 {
8b60264b
KG
1043 const struct processor_costs *cost; /* Processor costs */
1044 const int target_enable; /* Target flags to enable. */
1045 const int target_disable; /* Target flags to disable. */
1046 const int align_loop; /* Default alignments. */
2cca7283 1047 const int align_loop_max_skip;
8b60264b 1048 const int align_jump;
2cca7283 1049 const int align_jump_max_skip;
8b60264b 1050 const int align_func;
e075ae69 1051 }
0f290768 1052 const processor_target_table[PROCESSOR_max] =
e075ae69 1053 {
4977bab6
ZW
1054 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1055 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1056 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1057 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1058 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1059 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1060 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1061 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
e075ae69
RH
1062 };
1063
f4365627 1064 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
e075ae69
RH
1065 static struct pta
1066 {
8b60264b
KG
1067 const char *const name; /* processor name or nickname. */
1068 const enum processor_type processor;
0dd0e980
JH
1069 const enum pta_flags
1070 {
1071 PTA_SSE = 1,
1072 PTA_SSE2 = 2,
1073 PTA_MMX = 4,
f4365627 1074 PTA_PREFETCH_SSE = 8,
0dd0e980 1075 PTA_3DNOW = 16,
4977bab6
ZW
1076 PTA_3DNOW_A = 64,
1077 PTA_64BIT = 128
0dd0e980 1078 } flags;
e075ae69 1079 }
0f290768 1080 const processor_alias_table[] =
e075ae69 1081 {
0dd0e980
JH
1082 {"i386", PROCESSOR_I386, 0},
1083 {"i486", PROCESSOR_I486, 0},
1084 {"i586", PROCESSOR_PENTIUM, 0},
1085 {"pentium", PROCESSOR_PENTIUM, 0},
1086 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
3780101d
JG
1087 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1088 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1089 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
3462df62 1090 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
0dd0e980
JH
1091 {"i686", PROCESSOR_PENTIUMPRO, 0},
1092 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1093 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
f4365627 1094 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
0dd0e980 1095 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
f4365627 1096 PTA_MMX | PTA_PREFETCH_SSE},
0dd0e980
JH
1097 {"k6", PROCESSOR_K6, PTA_MMX},
1098 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1099 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
f4365627 1100 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1101 | PTA_3DNOW_A},
f4365627 1102 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
0dd0e980 1103 | PTA_3DNOW | PTA_3DNOW_A},
f4365627 1104 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1105 | PTA_3DNOW_A | PTA_SSE},
f4365627 1106 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1107 | PTA_3DNOW_A | PTA_SSE},
f4365627 1108 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1109 | PTA_3DNOW_A | PTA_SSE},
4977bab6
ZW
1110 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1111 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
3af4bd89 1112 };
c8c5cb99 1113
ca7558fc 1114 int const pta_size = ARRAY_SIZE (processor_alias_table);
c8c5cb99 1115
3dc85dfb
RH
1116 /* By default our XFmode is the 80-bit extended format. If we have
1117 use TFmode instead, it's also the 80-bit format, but with padding. */
1118 real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format;
1119 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
1120
41ed2237 1121 /* Set the default values for switches whose default depends on TARGET_64BIT
d1f87653 1122 in case they weren't overwritten by command line options. */
55ba61f3
JH
1123 if (TARGET_64BIT)
1124 {
1125 if (flag_omit_frame_pointer == 2)
1126 flag_omit_frame_pointer = 1;
1127 if (flag_asynchronous_unwind_tables == 2)
1128 flag_asynchronous_unwind_tables = 1;
1129 if (flag_pcc_struct_return == 2)
1130 flag_pcc_struct_return = 0;
1131 }
1132 else
1133 {
1134 if (flag_omit_frame_pointer == 2)
1135 flag_omit_frame_pointer = 0;
1136 if (flag_asynchronous_unwind_tables == 2)
1137 flag_asynchronous_unwind_tables = 0;
1138 if (flag_pcc_struct_return == 2)
7c712dcc 1139 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
55ba61f3
JH
1140 }
1141
f5316dfe
MM
1142#ifdef SUBTARGET_OVERRIDE_OPTIONS
1143 SUBTARGET_OVERRIDE_OPTIONS;
1144#endif
1145
9e555526
RH
1146 if (!ix86_tune_string && ix86_arch_string)
1147 ix86_tune_string = ix86_arch_string;
1148 if (!ix86_tune_string)
1149 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
f4365627 1150 if (!ix86_arch_string)
4977bab6 1151 ix86_arch_string = TARGET_64BIT ? "k8" : "i386";
e075ae69 1152
6189a572
JH
1153 if (ix86_cmodel_string != 0)
1154 {
1155 if (!strcmp (ix86_cmodel_string, "small"))
1156 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1157 else if (flag_pic)
c725bd79 1158 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
6189a572
JH
1159 else if (!strcmp (ix86_cmodel_string, "32"))
1160 ix86_cmodel = CM_32;
1161 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1162 ix86_cmodel = CM_KERNEL;
1163 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1164 ix86_cmodel = CM_MEDIUM;
1165 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1166 ix86_cmodel = CM_LARGE;
1167 else
1168 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1169 }
1170 else
1171 {
1172 ix86_cmodel = CM_32;
1173 if (TARGET_64BIT)
1174 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1175 }
c93e80a5
JH
1176 if (ix86_asm_string != 0)
1177 {
1178 if (!strcmp (ix86_asm_string, "intel"))
1179 ix86_asm_dialect = ASM_INTEL;
1180 else if (!strcmp (ix86_asm_string, "att"))
1181 ix86_asm_dialect = ASM_ATT;
1182 else
1183 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1184 }
6189a572 1185 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
c725bd79 1186 error ("code model `%s' not supported in the %s bit mode",
6189a572
JH
1187 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1188 if (ix86_cmodel == CM_LARGE)
c725bd79 1189 sorry ("code model `large' not supported yet");
0c2dc519 1190 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
c725bd79 1191 sorry ("%i-bit mode not compiled in",
0c2dc519 1192 (target_flags & MASK_64BIT) ? 64 : 32);
6189a572 1193
f4365627
JH
1194 for (i = 0; i < pta_size; i++)
1195 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1196 {
1197 ix86_arch = processor_alias_table[i].processor;
1198 /* Default cpu tuning to the architecture. */
9e555526 1199 ix86_tune = ix86_arch;
f4365627 1200 if (processor_alias_table[i].flags & PTA_MMX
9ef1b13a 1201 && !(target_flags_explicit & MASK_MMX))
f4365627
JH
1202 target_flags |= MASK_MMX;
1203 if (processor_alias_table[i].flags & PTA_3DNOW
9ef1b13a 1204 && !(target_flags_explicit & MASK_3DNOW))
f4365627
JH
1205 target_flags |= MASK_3DNOW;
1206 if (processor_alias_table[i].flags & PTA_3DNOW_A
9ef1b13a 1207 && !(target_flags_explicit & MASK_3DNOW_A))
f4365627
JH
1208 target_flags |= MASK_3DNOW_A;
1209 if (processor_alias_table[i].flags & PTA_SSE
9ef1b13a 1210 && !(target_flags_explicit & MASK_SSE))
f4365627
JH
1211 target_flags |= MASK_SSE;
1212 if (processor_alias_table[i].flags & PTA_SSE2
9ef1b13a 1213 && !(target_flags_explicit & MASK_SSE2))
f4365627
JH
1214 target_flags |= MASK_SSE2;
1215 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1216 x86_prefetch_sse = true;
4977bab6
ZW
1217 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1218 error ("CPU you selected does not support x86-64 instruction set");
f4365627
JH
1219 break;
1220 }
400500c4 1221
f4365627
JH
1222 if (i == pta_size)
1223 error ("bad value (%s) for -march= switch", ix86_arch_string);
e075ae69 1224
f4365627 1225 for (i = 0; i < pta_size; i++)
9e555526 1226 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
f4365627 1227 {
9e555526 1228 ix86_tune = processor_alias_table[i].processor;
4977bab6
ZW
1229 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1230 error ("CPU you selected does not support x86-64 instruction set");
f4365627
JH
1231 break;
1232 }
1233 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1234 x86_prefetch_sse = true;
1235 if (i == pta_size)
9e555526 1236 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
e075ae69 1237
2ab0437e
JH
1238 if (optimize_size)
1239 ix86_cost = &size_cost;
1240 else
9e555526
RH
1241 ix86_cost = processor_target_table[ix86_tune].cost;
1242 target_flags |= processor_target_table[ix86_tune].target_enable;
1243 target_flags &= ~processor_target_table[ix86_tune].target_disable;
e075ae69 1244
36edd3cc
BS
1245 /* Arrange to set up i386_stack_locals for all functions. */
1246 init_machine_status = ix86_init_machine_status;
fce5a9f2 1247
0f290768 1248 /* Validate -mregparm= value. */
e075ae69 1249 if (ix86_regparm_string)
b08de47e 1250 {
400500c4
RK
1251 i = atoi (ix86_regparm_string);
1252 if (i < 0 || i > REGPARM_MAX)
1253 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1254 else
1255 ix86_regparm = i;
b08de47e 1256 }
0d7d98ee
JH
1257 else
1258 if (TARGET_64BIT)
1259 ix86_regparm = REGPARM_MAX;
b08de47e 1260
3e18fdf6 1261 /* If the user has provided any of the -malign-* options,
a4f31c00 1262 warn and use that value only if -falign-* is not set.
3e18fdf6 1263 Remove this code in GCC 3.2 or later. */
e075ae69 1264 if (ix86_align_loops_string)
b08de47e 1265 {
3e18fdf6
GK
1266 warning ("-malign-loops is obsolete, use -falign-loops");
1267 if (align_loops == 0)
1268 {
1269 i = atoi (ix86_align_loops_string);
1270 if (i < 0 || i > MAX_CODE_ALIGN)
1271 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1272 else
1273 align_loops = 1 << i;
1274 }
b08de47e 1275 }
3af4bd89 1276
e075ae69 1277 if (ix86_align_jumps_string)
b08de47e 1278 {
3e18fdf6
GK
1279 warning ("-malign-jumps is obsolete, use -falign-jumps");
1280 if (align_jumps == 0)
1281 {
1282 i = atoi (ix86_align_jumps_string);
1283 if (i < 0 || i > MAX_CODE_ALIGN)
1284 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1285 else
1286 align_jumps = 1 << i;
1287 }
b08de47e 1288 }
b08de47e 1289
e075ae69 1290 if (ix86_align_funcs_string)
b08de47e 1291 {
3e18fdf6
GK
1292 warning ("-malign-functions is obsolete, use -falign-functions");
1293 if (align_functions == 0)
1294 {
1295 i = atoi (ix86_align_funcs_string);
1296 if (i < 0 || i > MAX_CODE_ALIGN)
1297 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1298 else
1299 align_functions = 1 << i;
1300 }
b08de47e 1301 }
3af4bd89 1302
3e18fdf6 1303 /* Default align_* from the processor table. */
3e18fdf6 1304 if (align_loops == 0)
2cca7283 1305 {
9e555526
RH
1306 align_loops = processor_target_table[ix86_tune].align_loop;
1307 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2cca7283 1308 }
3e18fdf6 1309 if (align_jumps == 0)
2cca7283 1310 {
9e555526
RH
1311 align_jumps = processor_target_table[ix86_tune].align_jump;
1312 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2cca7283 1313 }
3e18fdf6 1314 if (align_functions == 0)
2cca7283 1315 {
9e555526 1316 align_functions = processor_target_table[ix86_tune].align_func;
2cca7283 1317 }
3e18fdf6 1318
e4c0478d 1319 /* Validate -mpreferred-stack-boundary= value, or provide default.
fbb83b43
AO
1320 The default of 128 bits is for Pentium III's SSE __m128, but we
1321 don't want additional code to keep the stack aligned when
1322 optimizing for code size. */
1323 ix86_preferred_stack_boundary = (optimize_size
ef49d42e 1324 ? TARGET_64BIT ? 128 : 32
fbb83b43 1325 : 128);
e075ae69 1326 if (ix86_preferred_stack_boundary_string)
3af4bd89 1327 {
400500c4 1328 i = atoi (ix86_preferred_stack_boundary_string);
ef49d42e 1329 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
c6257c5d 1330 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
ef49d42e 1331 TARGET_64BIT ? 4 : 2);
400500c4
RK
1332 else
1333 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3af4bd89 1334 }
77a989d1 1335
0f290768 1336 /* Validate -mbranch-cost= value, or provide default. */
9e555526 1337 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
e075ae69 1338 if (ix86_branch_cost_string)
804a8ee0 1339 {
400500c4
RK
1340 i = atoi (ix86_branch_cost_string);
1341 if (i < 0 || i > 5)
1342 error ("-mbranch-cost=%d is not between 0 and 5", i);
1343 else
1344 ix86_branch_cost = i;
804a8ee0 1345 }
804a8ee0 1346
f996902d
RH
1347 if (ix86_tls_dialect_string)
1348 {
1349 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1350 ix86_tls_dialect = TLS_DIALECT_GNU;
1351 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1352 ix86_tls_dialect = TLS_DIALECT_SUN;
1353 else
1354 error ("bad value (%s) for -mtls-dialect= switch",
1355 ix86_tls_dialect_string);
1356 }
1357
e9a25f70
JL
1358 /* Keep nonleaf frame pointers. */
1359 if (TARGET_OMIT_LEAF_FRAME_POINTER)
77a989d1 1360 flag_omit_frame_pointer = 1;
e075ae69
RH
1361
1362 /* If we're doing fast math, we don't care about comparison order
1363 wrt NaNs. This lets us use a shorter comparison sequence. */
de6c5979 1364 if (flag_unsafe_math_optimizations)
e075ae69
RH
1365 target_flags &= ~MASK_IEEE_FP;
1366
30c99a84
RH
1367 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1368 since the insns won't need emulation. */
1369 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1370 target_flags &= ~MASK_NO_FANCY_MATH_387;
1371
14f73b5a
JH
1372 if (TARGET_64BIT)
1373 {
1374 if (TARGET_ALIGN_DOUBLE)
c725bd79 1375 error ("-malign-double makes no sense in the 64bit mode");
14f73b5a 1376 if (TARGET_RTD)
c725bd79 1377 error ("-mrtd calling convention not supported in the 64bit mode");
14f73b5a 1378 /* Enable by default the SSE and MMX builtins. */
965f5423
JH
1379 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1380 ix86_fpmath = FPMATH_SSE;
14f73b5a 1381 }
965f5423
JH
1382 else
1383 ix86_fpmath = FPMATH_387;
1384
1385 if (ix86_fpmath_string != 0)
1386 {
1387 if (! strcmp (ix86_fpmath_string, "387"))
1388 ix86_fpmath = FPMATH_387;
1389 else if (! strcmp (ix86_fpmath_string, "sse"))
1390 {
1391 if (!TARGET_SSE)
1392 {
1393 warning ("SSE instruction set disabled, using 387 arithmetics");
1394 ix86_fpmath = FPMATH_387;
1395 }
1396 else
1397 ix86_fpmath = FPMATH_SSE;
1398 }
1399 else if (! strcmp (ix86_fpmath_string, "387,sse")
1400 || ! strcmp (ix86_fpmath_string, "sse,387"))
1401 {
1402 if (!TARGET_SSE)
1403 {
1404 warning ("SSE instruction set disabled, using 387 arithmetics");
1405 ix86_fpmath = FPMATH_387;
1406 }
1407 else if (!TARGET_80387)
1408 {
1409 warning ("387 instruction set disabled, using SSE arithmetics");
1410 ix86_fpmath = FPMATH_SSE;
1411 }
1412 else
1413 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1414 }
fce5a9f2 1415 else
965f5423
JH
1416 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1417 }
14f73b5a 1418
a7180f70
BS
1419 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1420 on by -msse. */
1421 if (TARGET_SSE)
e37af218
RH
1422 {
1423 target_flags |= MASK_MMX;
1424 x86_prefetch_sse = true;
1425 }
c6036a37 1426
47f339cf
BS
1427 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1428 if (TARGET_3DNOW)
1429 {
1430 target_flags |= MASK_MMX;
d1f87653 1431 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
47f339cf
BS
1432 extensions it adds. */
1433 if (x86_3dnow_a & (1 << ix86_arch))
1434 target_flags |= MASK_3DNOW_A;
1435 }
9e555526 1436 if ((x86_accumulate_outgoing_args & TUNEMASK)
9ef1b13a 1437 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
c6036a37
JH
1438 && !optimize_size)
1439 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
623fe810
RH
1440
1441 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1442 {
1443 char *p;
1444 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1445 p = strchr (internal_label_prefix, 'X');
1446 internal_label_prefix_len = p - internal_label_prefix;
1447 *p = '\0';
1448 }
f5316dfe
MM
1449}
1450\f
32b5b1aa 1451void
c6aded7c 1452optimization_options (level, size)
32b5b1aa 1453 int level;
bb5177ac 1454 int size ATTRIBUTE_UNUSED;
32b5b1aa 1455{
e9a25f70
JL
1456 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1457 make the problem with not enough registers even worse. */
32b5b1aa
SC
1458#ifdef INSN_SCHEDULING
1459 if (level > 1)
1460 flag_schedule_insns = 0;
1461#endif
55ba61f3
JH
1462
1463 /* The default values of these switches depend on the TARGET_64BIT
1464 that is not known at this moment. Mark these values with 2 and
1465 let user the to override these. In case there is no command line option
1466 specifying them, we will set the defaults in override_options. */
1467 if (optimize >= 1)
1468 flag_omit_frame_pointer = 2;
1469 flag_pcc_struct_return = 2;
1470 flag_asynchronous_unwind_tables = 2;
32b5b1aa 1471}
b08de47e 1472\f
91d231cb
JM
1473/* Table of valid machine attributes. */
1474const struct attribute_spec ix86_attribute_table[] =
b08de47e 1475{
91d231cb 1476 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
b08de47e
MM
1477 /* Stdcall attribute says callee is responsible for popping arguments
1478 if they are not variable. */
91d231cb 1479 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
e91f04de
CH
1480 /* Fastcall attribute says callee is responsible for popping arguments
1481 if they are not variable. */
1482 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
91d231cb
JM
1483 /* Cdecl attribute says the callee is a normal C declaration */
1484 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
b08de47e 1485 /* Regparm attribute specifies how many integer arguments are to be
0f290768 1486 passed in registers. */
91d231cb
JM
1487 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1488#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
3da1eb0b
DS
1489 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1490 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1491 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
91d231cb 1492#endif
fe77449a
DR
1493 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1494 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
91d231cb
JM
1495 { NULL, 0, 0, false, false, false, NULL }
1496};
1497
5fbf0217
EB
1498/* Decide whether we can make a sibling call to a function. DECL is the
1499 declaration of the function being targeted by the call and EXP is the
1500 CALL_EXPR representing the call. */
4977bab6
ZW
1501
1502static bool
1503ix86_function_ok_for_sibcall (decl, exp)
1504 tree decl;
1505 tree exp;
1506{
1507 /* If we are generating position-independent code, we cannot sibcall
1508 optimize any indirect call, or a direct call to a global function,
1509 as the PLT requires %ebx be live. */
1510 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1511 return false;
1512
1513 /* If we are returning floats on the 80387 register stack, we cannot
1514 make a sibcall from a function that doesn't return a float to a
5fbf0217
EB
1515 function that does or, conversely, from a function that does return
1516 a float to a function that doesn't; the necessary stack adjustment
1517 would not be executed. */
4977bab6 1518 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
5fbf0217 1519 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
4977bab6
ZW
1520 return false;
1521
1522 /* If this call is indirect, we'll need to be able to use a call-clobbered
1523 register for the address of the target function. Make sure that all
1524 such registers are not used for passing parameters. */
1525 if (!decl && !TARGET_64BIT)
1526 {
1527 int regparm = ix86_regparm;
1528 tree attr, type;
1529
1530 /* We're looking at the CALL_EXPR, we need the type of the function. */
1531 type = TREE_OPERAND (exp, 0); /* pointer expression */
1532 type = TREE_TYPE (type); /* pointer type */
1533 type = TREE_TYPE (type); /* function type */
1534
1535 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1536 if (attr)
1537 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1538
1539 if (regparm >= 3)
1540 {
1541 /* ??? Need to count the actual number of registers to be used,
1542 not the possible number of registers. Fix later. */
1543 return false;
1544 }
1545 }
1546
1547 /* Otherwise okay. That also includes certain types of indirect calls. */
1548 return true;
1549}
1550
e91f04de 1551/* Handle a "cdecl", "stdcall", or "fastcall" attribute;
91d231cb
JM
1552 arguments as in struct attribute_spec.handler. */
1553static tree
1554ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1555 tree *node;
1556 tree name;
1557 tree args ATTRIBUTE_UNUSED;
1558 int flags ATTRIBUTE_UNUSED;
1559 bool *no_add_attrs;
1560{
1561 if (TREE_CODE (*node) != FUNCTION_TYPE
1562 && TREE_CODE (*node) != METHOD_TYPE
1563 && TREE_CODE (*node) != FIELD_DECL
1564 && TREE_CODE (*node) != TYPE_DECL)
b08de47e 1565 {
91d231cb
JM
1566 warning ("`%s' attribute only applies to functions",
1567 IDENTIFIER_POINTER (name));
1568 *no_add_attrs = true;
1569 }
e91f04de
CH
1570 else
1571 {
1572 if (is_attribute_p ("fastcall", name))
1573 {
1574 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1575 {
1576 error ("fastcall and stdcall attributes are not compatible");
1577 }
1578 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1579 {
1580 error ("fastcall and regparm attributes are not compatible");
1581 }
1582 }
1583 else if (is_attribute_p ("stdcall", name))
1584 {
1585 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1586 {
1587 error ("fastcall and stdcall attributes are not compatible");
1588 }
1589 }
1590 }
b08de47e 1591
91d231cb
JM
1592 if (TARGET_64BIT)
1593 {
1594 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1595 *no_add_attrs = true;
1596 }
b08de47e 1597
91d231cb
JM
1598 return NULL_TREE;
1599}
b08de47e 1600
91d231cb
JM
1601/* Handle a "regparm" attribute;
1602 arguments as in struct attribute_spec.handler. */
1603static tree
1604ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1605 tree *node;
1606 tree name;
1607 tree args;
1608 int flags ATTRIBUTE_UNUSED;
1609 bool *no_add_attrs;
1610{
1611 if (TREE_CODE (*node) != FUNCTION_TYPE
1612 && TREE_CODE (*node) != METHOD_TYPE
1613 && TREE_CODE (*node) != FIELD_DECL
1614 && TREE_CODE (*node) != TYPE_DECL)
1615 {
1616 warning ("`%s' attribute only applies to functions",
1617 IDENTIFIER_POINTER (name));
1618 *no_add_attrs = true;
1619 }
1620 else
1621 {
1622 tree cst;
b08de47e 1623
91d231cb
JM
1624 cst = TREE_VALUE (args);
1625 if (TREE_CODE (cst) != INTEGER_CST)
1626 {
1627 warning ("`%s' attribute requires an integer constant argument",
1628 IDENTIFIER_POINTER (name));
1629 *no_add_attrs = true;
1630 }
1631 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1632 {
1633 warning ("argument to `%s' attribute larger than %d",
1634 IDENTIFIER_POINTER (name), REGPARM_MAX);
1635 *no_add_attrs = true;
1636 }
e91f04de
CH
1637
1638 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1639 {
1640 error ("fastcall and regparm attributes are not compatible");
1641 }
b08de47e
MM
1642 }
1643
91d231cb 1644 return NULL_TREE;
b08de47e
MM
1645}
1646
1647/* Return 0 if the attributes for two types are incompatible, 1 if they
1648 are compatible, and 2 if they are nearly compatible (which causes a
1649 warning to be generated). */
1650
8d8e52be 1651static int
e075ae69 1652ix86_comp_type_attributes (type1, type2)
afcfe58c
MM
1653 tree type1;
1654 tree type2;
b08de47e 1655{
0f290768 1656 /* Check for mismatch of non-default calling convention. */
27c38fbe 1657 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
afcfe58c
MM
1658
1659 if (TREE_CODE (type1) != FUNCTION_TYPE)
1660 return 1;
1661
e91f04de
CH
1662 /* Check for mismatched fastcall types */
1663 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1664 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1665 return 0;
1666
afcfe58c 1667 /* Check for mismatched return types (cdecl vs stdcall). */
6093f019
RH
1668 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1669 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
afcfe58c 1670 return 0;
b08de47e
MM
1671 return 1;
1672}
b08de47e 1673\f
483ab821
MM
1674/* Return the regparm value for a fuctio with the indicated TYPE. */
1675
1676static int
1677ix86_fntype_regparm (type)
1678 tree type;
1679{
1680 tree attr;
1681
1682 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1683 if (attr)
1684 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1685 else
1686 return ix86_regparm;
1687}
1688
b08de47e
MM
1689/* Value is the number of bytes of arguments automatically
1690 popped when returning from a subroutine call.
1691 FUNDECL is the declaration node of the function (as a tree),
1692 FUNTYPE is the data type of the function (as a tree),
1693 or for a library call it is an identifier node for the subroutine name.
1694 SIZE is the number of bytes of arguments passed on the stack.
1695
1696 On the 80386, the RTD insn may be used to pop them if the number
1697 of args is fixed, but if the number is variable then the caller
1698 must pop them all. RTD can't be used for library calls now
1699 because the library is compiled with the Unix compiler.
1700 Use of RTD is a selectable option, since it is incompatible with
1701 standard Unix calling sequences. If the option is not selected,
1702 the caller must always pop the args.
1703
1704 The attribute stdcall is equivalent to RTD on a per module basis. */
1705
1706int
e075ae69 1707ix86_return_pops_args (fundecl, funtype, size)
b08de47e
MM
1708 tree fundecl;
1709 tree funtype;
1710 int size;
79325812 1711{
3345ee7d 1712 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
b08de47e 1713
0f290768 1714 /* Cdecl functions override -mrtd, and never pop the stack. */
e9a25f70 1715 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
79325812 1716
e91f04de
CH
1717 /* Stdcall and fastcall functions will pop the stack if not variable args. */
1718 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1719 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
698cdd84 1720 rtd = 1;
79325812 1721
698cdd84
SC
1722 if (rtd
1723 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
e9a25f70
JL
1724 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1725 == void_type_node)))
698cdd84
SC
1726 return size;
1727 }
79325812 1728
232b8f52 1729 /* Lose any fake structure return argument if it is passed on the stack. */
0d7d98ee
JH
1730 if (aggregate_value_p (TREE_TYPE (funtype))
1731 && !TARGET_64BIT)
232b8f52 1732 {
483ab821 1733 int nregs = ix86_fntype_regparm (funtype);
232b8f52
JJ
1734
1735 if (!nregs)
1736 return GET_MODE_SIZE (Pmode);
1737 }
1738
1739 return 0;
b08de47e 1740}
b08de47e
MM
1741\f
1742/* Argument support functions. */
1743
53c17031
JH
1744/* Return true when register may be used to pass function parameters. */
1745bool
1746ix86_function_arg_regno_p (regno)
1747 int regno;
1748{
1749 int i;
1750 if (!TARGET_64BIT)
0333394e
JJ
1751 return (regno < REGPARM_MAX
1752 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
53c17031
JH
1753 if (SSE_REGNO_P (regno) && TARGET_SSE)
1754 return true;
1755 /* RAX is used as hidden argument to va_arg functions. */
1756 if (!regno)
1757 return true;
1758 for (i = 0; i < REGPARM_MAX; i++)
1759 if (regno == x86_64_int_parameter_registers[i])
1760 return true;
1761 return false;
1762}
1763
b08de47e
MM
1764/* Initialize a variable CUM of type CUMULATIVE_ARGS
1765 for a call to a function whose data type is FNTYPE.
1766 For a library call, FNTYPE is 0. */
1767
1768void
dafc5b82 1769init_cumulative_args (cum, fntype, libname, fndecl)
e9a25f70 1770 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
b08de47e
MM
1771 tree fntype; /* tree ptr for function decl */
1772 rtx libname; /* SYMBOL_REF of library name or 0 */
dafc5b82 1773 tree fndecl;
b08de47e
MM
1774{
1775 static CUMULATIVE_ARGS zero_cum;
1776 tree param, next_param;
dafc5b82 1777 bool user_convention = false;
b08de47e
MM
1778
1779 if (TARGET_DEBUG_ARG)
1780 {
1781 fprintf (stderr, "\ninit_cumulative_args (");
1782 if (fntype)
e9a25f70
JL
1783 fprintf (stderr, "fntype code = %s, ret code = %s",
1784 tree_code_name[(int) TREE_CODE (fntype)],
1785 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
b08de47e
MM
1786 else
1787 fprintf (stderr, "no fntype");
1788
1789 if (libname)
1790 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1791 }
1792
1793 *cum = zero_cum;
1794
1795 /* Set up the number of registers to use for passing arguments. */
e075ae69 1796 cum->nregs = ix86_regparm;
53c17031
JH
1797 cum->sse_nregs = SSE_REGPARM_MAX;
1798 if (fntype && !TARGET_64BIT)
b08de47e
MM
1799 {
1800 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
e9a25f70 1801
b08de47e 1802 if (attr)
dafc5b82
JH
1803 {
1804 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1805 user_convention = true;
1806 }
b08de47e 1807 }
53c17031 1808 cum->maybe_vaarg = false;
b08de47e 1809
e91f04de
CH
1810 /* Use ecx and edx registers if function has fastcall attribute */
1811 if (fntype && !TARGET_64BIT)
1812 {
1813 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1814 {
1815 cum->nregs = 2;
1816 cum->fastcall = 1;
dafc5b82
JH
1817 user_convention = true;
1818 }
1819 }
1820
1821 /* Use register calling convention for local functions when possible. */
1822 if (!TARGET_64BIT && !user_convention && fndecl
1823 && flag_unit_at_a_time)
1824 {
1825 struct cgraph_local_info *i = cgraph_local_info (fndecl);
1826 if (i && i->local)
1827 {
1828 /* We can't use regparm(3) for nested functions as these use
1829 static chain pointer in third argument. */
1830 if (DECL_CONTEXT (fndecl) && !DECL_NO_STATIC_CHAIN (fndecl))
1831 cum->nregs = 2;
1832 else
1833 cum->nregs = 3;
e91f04de
CH
1834 }
1835 }
1836
1837
b08de47e
MM
1838 /* Determine if this function has variable arguments. This is
1839 indicated by the last argument being 'void_type_mode' if there
1840 are no variable arguments. If there are variable arguments, then
1841 we won't pass anything in registers */
1842
1843 if (cum->nregs)
1844 {
1845 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
e9a25f70 1846 param != 0; param = next_param)
b08de47e
MM
1847 {
1848 next_param = TREE_CHAIN (param);
e9a25f70 1849 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
53c17031
JH
1850 {
1851 if (!TARGET_64BIT)
e91f04de
CH
1852 {
1853 cum->nregs = 0;
1854 cum->fastcall = 0;
1855 }
53c17031
JH
1856 cum->maybe_vaarg = true;
1857 }
b08de47e
MM
1858 }
1859 }
53c17031
JH
1860 if ((!fntype && !libname)
1861 || (fntype && !TYPE_ARG_TYPES (fntype)))
1862 cum->maybe_vaarg = 1;
b08de47e
MM
1863
1864 if (TARGET_DEBUG_ARG)
1865 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1866
1867 return;
1868}
1869
d1f87653 1870/* x86-64 register passing implementation. See x86-64 ABI for details. Goal
f710504c 1871 of this code is to classify each 8bytes of incoming argument by the register
53c17031
JH
1872 class and assign registers accordingly. */
1873
1874/* Return the union class of CLASS1 and CLASS2.
1875 See the x86-64 PS ABI for details. */
1876
1877static enum x86_64_reg_class
1878merge_classes (class1, class2)
1879 enum x86_64_reg_class class1, class2;
1880{
1881 /* Rule #1: If both classes are equal, this is the resulting class. */
1882 if (class1 == class2)
1883 return class1;
1884
1885 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1886 the other class. */
1887 if (class1 == X86_64_NO_CLASS)
1888 return class2;
1889 if (class2 == X86_64_NO_CLASS)
1890 return class1;
1891
1892 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1893 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1894 return X86_64_MEMORY_CLASS;
1895
1896 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1897 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1898 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1899 return X86_64_INTEGERSI_CLASS;
1900 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1901 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1902 return X86_64_INTEGER_CLASS;
1903
1904 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1905 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1906 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1907 return X86_64_MEMORY_CLASS;
1908
1909 /* Rule #6: Otherwise class SSE is used. */
1910 return X86_64_SSE_CLASS;
1911}
1912
1913/* Classify the argument of type TYPE and mode MODE.
1914 CLASSES will be filled by the register class used to pass each word
1915 of the operand. The number of words is returned. In case the parameter
1916 should be passed in memory, 0 is returned. As a special case for zero
1917 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1918
1919 BIT_OFFSET is used internally for handling records and specifies offset
1920 of the offset in bits modulo 256 to avoid overflow cases.
1921
1922 See the x86-64 PS ABI for details.
1923*/
1924
1925static int
1926classify_argument (mode, type, classes, bit_offset)
1927 enum machine_mode mode;
1928 tree type;
1929 enum x86_64_reg_class classes[MAX_CLASSES];
1930 int bit_offset;
1931{
1932 int bytes =
1933 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
23327dae 1934 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
53c17031 1935
c60ee6f5
JH
1936 /* Variable sized entities are always passed/returned in memory. */
1937 if (bytes < 0)
1938 return 0;
1939
dafc5b82
JH
1940 if (mode != VOIDmode
1941 && MUST_PASS_IN_STACK (mode, type))
1942 return 0;
1943
53c17031
JH
1944 if (type && AGGREGATE_TYPE_P (type))
1945 {
1946 int i;
1947 tree field;
1948 enum x86_64_reg_class subclasses[MAX_CLASSES];
1949
1950 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1951 if (bytes > 16)
1952 return 0;
1953
1954 for (i = 0; i < words; i++)
1955 classes[i] = X86_64_NO_CLASS;
1956
1957 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1958 signalize memory class, so handle it as special case. */
1959 if (!words)
1960 {
1961 classes[0] = X86_64_NO_CLASS;
1962 return 1;
1963 }
1964
1965 /* Classify each field of record and merge classes. */
1966 if (TREE_CODE (type) == RECORD_TYPE)
1967 {
91ea38f9
JH
1968 /* For classes first merge in the field of the subclasses. */
1969 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1970 {
1971 tree bases = TYPE_BINFO_BASETYPES (type);
1972 int n_bases = TREE_VEC_LENGTH (bases);
1973 int i;
1974
1975 for (i = 0; i < n_bases; ++i)
1976 {
1977 tree binfo = TREE_VEC_ELT (bases, i);
1978 int num;
1979 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1980 tree type = BINFO_TYPE (binfo);
1981
1982 num = classify_argument (TYPE_MODE (type),
1983 type, subclasses,
1984 (offset + bit_offset) % 256);
1985 if (!num)
1986 return 0;
1987 for (i = 0; i < num; i++)
1988 {
db01f480 1989 int pos = (offset + (bit_offset % 64)) / 8 / 8;
91ea38f9
JH
1990 classes[i + pos] =
1991 merge_classes (subclasses[i], classes[i + pos]);
1992 }
1993 }
1994 }
1995 /* And now merge the fields of structure. */
53c17031
JH
1996 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1997 {
1998 if (TREE_CODE (field) == FIELD_DECL)
1999 {
2000 int num;
2001
2002 /* Bitfields are always classified as integer. Handle them
2003 early, since later code would consider them to be
2004 misaligned integers. */
2005 if (DECL_BIT_FIELD (field))
2006 {
2007 for (i = int_bit_position (field) / 8 / 8;
2008 i < (int_bit_position (field)
2009 + tree_low_cst (DECL_SIZE (field), 0)
2010 + 63) / 8 / 8; i++)
2011 classes[i] =
2012 merge_classes (X86_64_INTEGER_CLASS,
2013 classes[i]);
2014 }
2015 else
2016 {
2017 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2018 TREE_TYPE (field), subclasses,
2019 (int_bit_position (field)
2020 + bit_offset) % 256);
2021 if (!num)
2022 return 0;
2023 for (i = 0; i < num; i++)
2024 {
2025 int pos =
db01f480 2026 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
53c17031
JH
2027 classes[i + pos] =
2028 merge_classes (subclasses[i], classes[i + pos]);
2029 }
2030 }
2031 }
2032 }
2033 }
2034 /* Arrays are handled as small records. */
2035 else if (TREE_CODE (type) == ARRAY_TYPE)
2036 {
2037 int num;
2038 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2039 TREE_TYPE (type), subclasses, bit_offset);
2040 if (!num)
2041 return 0;
2042
2043 /* The partial classes are now full classes. */
2044 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2045 subclasses[0] = X86_64_SSE_CLASS;
2046 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2047 subclasses[0] = X86_64_INTEGER_CLASS;
2048
2049 for (i = 0; i < words; i++)
2050 classes[i] = subclasses[i % num];
2051 }
2052 /* Unions are similar to RECORD_TYPE but offset is always 0. */
e4dbaed5
AS
2053 else if (TREE_CODE (type) == UNION_TYPE
2054 || TREE_CODE (type) == QUAL_UNION_TYPE)
53c17031 2055 {
91ea38f9
JH
2056 /* For classes first merge in the field of the subclasses. */
2057 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2058 {
2059 tree bases = TYPE_BINFO_BASETYPES (type);
2060 int n_bases = TREE_VEC_LENGTH (bases);
2061 int i;
2062
2063 for (i = 0; i < n_bases; ++i)
2064 {
2065 tree binfo = TREE_VEC_ELT (bases, i);
2066 int num;
2067 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2068 tree type = BINFO_TYPE (binfo);
2069
2070 num = classify_argument (TYPE_MODE (type),
2071 type, subclasses,
db01f480 2072 (offset + (bit_offset % 64)) % 256);
91ea38f9
JH
2073 if (!num)
2074 return 0;
2075 for (i = 0; i < num; i++)
2076 {
c16576e6 2077 int pos = (offset + (bit_offset % 64)) / 8 / 8;
91ea38f9
JH
2078 classes[i + pos] =
2079 merge_classes (subclasses[i], classes[i + pos]);
2080 }
2081 }
2082 }
53c17031
JH
2083 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2084 {
2085 if (TREE_CODE (field) == FIELD_DECL)
2086 {
2087 int num;
2088 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2089 TREE_TYPE (field), subclasses,
2090 bit_offset);
2091 if (!num)
2092 return 0;
2093 for (i = 0; i < num; i++)
2094 classes[i] = merge_classes (subclasses[i], classes[i]);
2095 }
2096 }
2097 }
2098 else
2099 abort ();
2100
2101 /* Final merger cleanup. */
2102 for (i = 0; i < words; i++)
2103 {
2104 /* If one class is MEMORY, everything should be passed in
2105 memory. */
2106 if (classes[i] == X86_64_MEMORY_CLASS)
2107 return 0;
2108
d6a7951f 2109 /* The X86_64_SSEUP_CLASS should be always preceded by
53c17031
JH
2110 X86_64_SSE_CLASS. */
2111 if (classes[i] == X86_64_SSEUP_CLASS
2112 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2113 classes[i] = X86_64_SSE_CLASS;
2114
d6a7951f 2115 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
53c17031
JH
2116 if (classes[i] == X86_64_X87UP_CLASS
2117 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2118 classes[i] = X86_64_SSE_CLASS;
2119 }
2120 return words;
2121 }
2122
2123 /* Compute alignment needed. We align all types to natural boundaries with
2124 exception of XFmode that is aligned to 64bits. */
2125 if (mode != VOIDmode && mode != BLKmode)
2126 {
2127 int mode_alignment = GET_MODE_BITSIZE (mode);
2128
2129 if (mode == XFmode)
2130 mode_alignment = 128;
2131 else if (mode == XCmode)
2132 mode_alignment = 256;
f5143c46 2133 /* Misaligned fields are always returned in memory. */
53c17031
JH
2134 if (bit_offset % mode_alignment)
2135 return 0;
2136 }
2137
2138 /* Classification of atomic types. */
2139 switch (mode)
2140 {
2141 case DImode:
2142 case SImode:
2143 case HImode:
2144 case QImode:
2145 case CSImode:
2146 case CHImode:
2147 case CQImode:
2148 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2149 classes[0] = X86_64_INTEGERSI_CLASS;
2150 else
2151 classes[0] = X86_64_INTEGER_CLASS;
2152 return 1;
2153 case CDImode:
2154 case TImode:
2155 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2156 return 2;
2157 case CTImode:
2158 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2159 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2160 return 4;
2161 case SFmode:
2162 if (!(bit_offset % 64))
2163 classes[0] = X86_64_SSESF_CLASS;
2164 else
2165 classes[0] = X86_64_SSE_CLASS;
2166 return 1;
2167 case DFmode:
2168 classes[0] = X86_64_SSEDF_CLASS;
2169 return 1;
2170 case TFmode:
2171 classes[0] = X86_64_X87_CLASS;
2172 classes[1] = X86_64_X87UP_CLASS;
2173 return 2;
2174 case TCmode:
2175 classes[0] = X86_64_X87_CLASS;
2176 classes[1] = X86_64_X87UP_CLASS;
2177 classes[2] = X86_64_X87_CLASS;
2178 classes[3] = X86_64_X87UP_CLASS;
2179 return 4;
2180 case DCmode:
2181 classes[0] = X86_64_SSEDF_CLASS;
2182 classes[1] = X86_64_SSEDF_CLASS;
2183 return 2;
2184 case SCmode:
2185 classes[0] = X86_64_SSE_CLASS;
2186 return 1;
e95d6b23
JH
2187 case V4SFmode:
2188 case V4SImode:
495333a6
JH
2189 case V16QImode:
2190 case V8HImode:
2191 case V2DFmode:
2192 case V2DImode:
e95d6b23
JH
2193 classes[0] = X86_64_SSE_CLASS;
2194 classes[1] = X86_64_SSEUP_CLASS;
2195 return 2;
2196 case V2SFmode:
2197 case V2SImode:
2198 case V4HImode:
2199 case V8QImode:
1194ca05 2200 return 0;
53c17031 2201 case BLKmode:
e95d6b23 2202 case VOIDmode:
53c17031
JH
2203 return 0;
2204 default:
2205 abort ();
2206 }
2207}
2208
2209/* Examine the argument and return set number of register required in each
f5143c46 2210 class. Return 0 iff parameter should be passed in memory. */
53c17031
JH
2211static int
2212examine_argument (mode, type, in_return, int_nregs, sse_nregs)
2213 enum machine_mode mode;
2214 tree type;
2215 int *int_nregs, *sse_nregs;
2216 int in_return;
2217{
2218 enum x86_64_reg_class class[MAX_CLASSES];
2219 int n = classify_argument (mode, type, class, 0);
2220
2221 *int_nregs = 0;
2222 *sse_nregs = 0;
2223 if (!n)
2224 return 0;
2225 for (n--; n >= 0; n--)
2226 switch (class[n])
2227 {
2228 case X86_64_INTEGER_CLASS:
2229 case X86_64_INTEGERSI_CLASS:
2230 (*int_nregs)++;
2231 break;
2232 case X86_64_SSE_CLASS:
2233 case X86_64_SSESF_CLASS:
2234 case X86_64_SSEDF_CLASS:
2235 (*sse_nregs)++;
2236 break;
2237 case X86_64_NO_CLASS:
2238 case X86_64_SSEUP_CLASS:
2239 break;
2240 case X86_64_X87_CLASS:
2241 case X86_64_X87UP_CLASS:
2242 if (!in_return)
2243 return 0;
2244 break;
2245 case X86_64_MEMORY_CLASS:
2246 abort ();
2247 }
2248 return 1;
2249}
2250/* Construct container for the argument used by GCC interface. See
2251 FUNCTION_ARG for the detailed description. */
2252static rtx
2253construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
2254 enum machine_mode mode;
2255 tree type;
2256 int in_return;
2257 int nintregs, nsseregs;
07933f72
GS
2258 const int * intreg;
2259 int sse_regno;
53c17031
JH
2260{
2261 enum machine_mode tmpmode;
2262 int bytes =
2263 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2264 enum x86_64_reg_class class[MAX_CLASSES];
2265 int n;
2266 int i;
2267 int nexps = 0;
2268 int needed_sseregs, needed_intregs;
2269 rtx exp[MAX_CLASSES];
2270 rtx ret;
2271
2272 n = classify_argument (mode, type, class, 0);
2273 if (TARGET_DEBUG_ARG)
2274 {
2275 if (!n)
2276 fprintf (stderr, "Memory class\n");
2277 else
2278 {
2279 fprintf (stderr, "Classes:");
2280 for (i = 0; i < n; i++)
2281 {
2282 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2283 }
2284 fprintf (stderr, "\n");
2285 }
2286 }
2287 if (!n)
2288 return NULL;
2289 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2290 return NULL;
2291 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2292 return NULL;
2293
2294 /* First construct simple cases. Avoid SCmode, since we want to use
2295 single register to pass this type. */
2296 if (n == 1 && mode != SCmode)
2297 switch (class[0])
2298 {
2299 case X86_64_INTEGER_CLASS:
2300 case X86_64_INTEGERSI_CLASS:
2301 return gen_rtx_REG (mode, intreg[0]);
2302 case X86_64_SSE_CLASS:
2303 case X86_64_SSESF_CLASS:
2304 case X86_64_SSEDF_CLASS:
2305 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2306 case X86_64_X87_CLASS:
2307 return gen_rtx_REG (mode, FIRST_STACK_REG);
2308 case X86_64_NO_CLASS:
2309 /* Zero sized array, struct or class. */
2310 return NULL;
2311 default:
2312 abort ();
2313 }
2314 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
e95d6b23 2315 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
53c17031
JH
2316 if (n == 2
2317 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2318 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2319 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2320 && class[1] == X86_64_INTEGER_CLASS
2321 && (mode == CDImode || mode == TImode)
2322 && intreg[0] + 1 == intreg[1])
2323 return gen_rtx_REG (mode, intreg[0]);
2324 if (n == 4
2325 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2326 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2327 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2328
2329 /* Otherwise figure out the entries of the PARALLEL. */
2330 for (i = 0; i < n; i++)
2331 {
2332 switch (class[i])
2333 {
2334 case X86_64_NO_CLASS:
2335 break;
2336 case X86_64_INTEGER_CLASS:
2337 case X86_64_INTEGERSI_CLASS:
d1f87653 2338 /* Merge TImodes on aligned occasions here too. */
53c17031
JH
2339 if (i * 8 + 8 > bytes)
2340 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2341 else if (class[i] == X86_64_INTEGERSI_CLASS)
2342 tmpmode = SImode;
2343 else
2344 tmpmode = DImode;
2345 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2346 if (tmpmode == BLKmode)
2347 tmpmode = DImode;
2348 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2349 gen_rtx_REG (tmpmode, *intreg),
2350 GEN_INT (i*8));
2351 intreg++;
2352 break;
2353 case X86_64_SSESF_CLASS:
2354 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2355 gen_rtx_REG (SFmode,
2356 SSE_REGNO (sse_regno)),
2357 GEN_INT (i*8));
2358 sse_regno++;
2359 break;
2360 case X86_64_SSEDF_CLASS:
2361 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2362 gen_rtx_REG (DFmode,
2363 SSE_REGNO (sse_regno)),
2364 GEN_INT (i*8));
2365 sse_regno++;
2366 break;
2367 case X86_64_SSE_CLASS:
12f5c45e
JH
2368 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2369 tmpmode = TImode;
53c17031
JH
2370 else
2371 tmpmode = DImode;
2372 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2373 gen_rtx_REG (tmpmode,
2374 SSE_REGNO (sse_regno)),
2375 GEN_INT (i*8));
12f5c45e
JH
2376 if (tmpmode == TImode)
2377 i++;
53c17031
JH
2378 sse_regno++;
2379 break;
2380 default:
2381 abort ();
2382 }
2383 }
2384 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2385 for (i = 0; i < nexps; i++)
2386 XVECEXP (ret, 0, i) = exp [i];
2387 return ret;
2388}
2389
b08de47e
MM
2390/* Update the data in CUM to advance over an argument
2391 of mode MODE and data type TYPE.
2392 (TYPE is null for libcalls where that information may not be available.) */
2393
2394void
2395function_arg_advance (cum, mode, type, named)
2396 CUMULATIVE_ARGS *cum; /* current arg information */
2397 enum machine_mode mode; /* current arg mode */
2398 tree type; /* type of the argument or 0 if lib support */
2399 int named; /* whether or not the argument was named */
2400{
5ac9118e
KG
2401 int bytes =
2402 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
2403 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2404
2405 if (TARGET_DEBUG_ARG)
2406 fprintf (stderr,
e9a25f70 2407 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
b08de47e 2408 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
53c17031 2409 if (TARGET_64BIT)
b08de47e 2410 {
53c17031
JH
2411 int int_nregs, sse_nregs;
2412 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2413 cum->words += words;
2414 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
82a127a9 2415 {
53c17031
JH
2416 cum->nregs -= int_nregs;
2417 cum->sse_nregs -= sse_nregs;
2418 cum->regno += int_nregs;
2419 cum->sse_regno += sse_nregs;
82a127a9 2420 }
53c17031
JH
2421 else
2422 cum->words += words;
b08de47e 2423 }
a4f31c00 2424 else
82a127a9 2425 {
53c17031
JH
2426 if (TARGET_SSE && mode == TImode)
2427 {
2428 cum->sse_words += words;
2429 cum->sse_nregs -= 1;
2430 cum->sse_regno += 1;
2431 if (cum->sse_nregs <= 0)
2432 {
2433 cum->sse_nregs = 0;
2434 cum->sse_regno = 0;
2435 }
2436 }
2437 else
82a127a9 2438 {
53c17031
JH
2439 cum->words += words;
2440 cum->nregs -= words;
2441 cum->regno += words;
2442
2443 if (cum->nregs <= 0)
2444 {
2445 cum->nregs = 0;
2446 cum->regno = 0;
2447 }
82a127a9
CM
2448 }
2449 }
b08de47e
MM
2450 return;
2451}
2452
2453/* Define where to put the arguments to a function.
2454 Value is zero to push the argument on the stack,
2455 or a hard register in which to store the argument.
2456
2457 MODE is the argument's machine mode.
2458 TYPE is the data type of the argument (as a tree).
2459 This is null for libcalls where that information may
2460 not be available.
2461 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2462 the preceding args and about the function being called.
2463 NAMED is nonzero if this argument is a named parameter
2464 (otherwise it is an extra parameter matching an ellipsis). */
2465
07933f72 2466rtx
b08de47e
MM
2467function_arg (cum, mode, type, named)
2468 CUMULATIVE_ARGS *cum; /* current arg information */
2469 enum machine_mode mode; /* current arg mode */
2470 tree type; /* type of the argument or 0 if lib support */
2471 int named; /* != 0 for normal args, == 0 for ... args */
2472{
2473 rtx ret = NULL_RTX;
5ac9118e
KG
2474 int bytes =
2475 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
2476 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2477
5bdc5878 2478 /* Handle a hidden AL argument containing number of registers for varargs
53c17031
JH
2479 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2480 any AL settings. */
32ee7d1d 2481 if (mode == VOIDmode)
b08de47e 2482 {
53c17031
JH
2483 if (TARGET_64BIT)
2484 return GEN_INT (cum->maybe_vaarg
2485 ? (cum->sse_nregs < 0
2486 ? SSE_REGPARM_MAX
2487 : cum->sse_regno)
2488 : -1);
2489 else
2490 return constm1_rtx;
b08de47e 2491 }
53c17031
JH
2492 if (TARGET_64BIT)
2493 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2494 &x86_64_int_parameter_registers [cum->regno],
2495 cum->sse_regno);
2496 else
2497 switch (mode)
2498 {
2499 /* For now, pass fp/complex values on the stack. */
2500 default:
2501 break;
2502
2503 case BLKmode:
8d454008
RH
2504 if (bytes < 0)
2505 break;
2506 /* FALLTHRU */
53c17031
JH
2507 case DImode:
2508 case SImode:
2509 case HImode:
2510 case QImode:
2511 if (words <= cum->nregs)
e91f04de
CH
2512 {
2513 int regno = cum->regno;
2514
2515 /* Fastcall allocates the first two DWORD (SImode) or
2516 smaller arguments to ECX and EDX. */
2517 if (cum->fastcall)
2518 {
2519 if (mode == BLKmode || mode == DImode)
2520 break;
2521
2522 /* ECX not EAX is the first allocated register. */
2523 if (regno == 0)
2524 regno = 2;
2525 }
2526 ret = gen_rtx_REG (mode, regno);
2527 }
53c17031
JH
2528 break;
2529 case TImode:
2530 if (cum->sse_nregs)
2531 ret = gen_rtx_REG (mode, cum->sse_regno);
2532 break;
2533 }
b08de47e
MM
2534
2535 if (TARGET_DEBUG_ARG)
2536 {
2537 fprintf (stderr,
91ea38f9 2538 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
b08de47e
MM
2539 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2540
2541 if (ret)
91ea38f9 2542 print_simple_rtl (stderr, ret);
b08de47e
MM
2543 else
2544 fprintf (stderr, ", stack");
2545
2546 fprintf (stderr, " )\n");
2547 }
2548
2549 return ret;
2550}
53c17031 2551
09b2e78d
ZD
2552/* A C expression that indicates when an argument must be passed by
2553 reference. If nonzero for an argument, a copy of that argument is
2554 made in memory and a pointer to the argument is passed instead of
2555 the argument itself. The pointer is passed in whatever way is
2556 appropriate for passing a pointer to that type. */
2557
2558int
2559function_arg_pass_by_reference (cum, mode, type, named)
2560 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
2561 enum machine_mode mode ATTRIBUTE_UNUSED;
2562 tree type;
2563 int named ATTRIBUTE_UNUSED;
2564{
2565 if (!TARGET_64BIT)
2566 return 0;
2567
2568 if (type && int_size_in_bytes (type) == -1)
2569 {
2570 if (TARGET_DEBUG_ARG)
2571 fprintf (stderr, "function_arg_pass_by_reference\n");
2572 return 1;
2573 }
2574
2575 return 0;
2576}
2577
8b978a57
JH
2578/* Return true when TYPE should be 128bit aligned for 32bit argument passing
2579 ABI */
2580static bool
2581contains_128bit_aligned_vector_p (type)
2582 tree type;
2583{
2584 enum machine_mode mode = TYPE_MODE (type);
2585 if (SSE_REG_MODE_P (mode)
2586 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2587 return true;
2588 if (TYPE_ALIGN (type) < 128)
2589 return false;
2590
2591 if (AGGREGATE_TYPE_P (type))
2592 {
2593 /* Walk the agregates recursivly. */
2594 if (TREE_CODE (type) == RECORD_TYPE
2595 || TREE_CODE (type) == UNION_TYPE
2596 || TREE_CODE (type) == QUAL_UNION_TYPE)
2597 {
2598 tree field;
2599
2600 if (TYPE_BINFO (type) != NULL
2601 && TYPE_BINFO_BASETYPES (type) != NULL)
2602 {
2603 tree bases = TYPE_BINFO_BASETYPES (type);
2604 int n_bases = TREE_VEC_LENGTH (bases);
2605 int i;
2606
2607 for (i = 0; i < n_bases; ++i)
2608 {
2609 tree binfo = TREE_VEC_ELT (bases, i);
2610 tree type = BINFO_TYPE (binfo);
2611
2612 if (contains_128bit_aligned_vector_p (type))
2613 return true;
2614 }
2615 }
2616 /* And now merge the fields of structure. */
2617 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2618 {
2619 if (TREE_CODE (field) == FIELD_DECL
2620 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2621 return true;
2622 }
2623 }
2624 /* Just for use if some languages passes arrays by value. */
2625 else if (TREE_CODE (type) == ARRAY_TYPE)
2626 {
2627 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2628 return true;
2629 }
2630 else
2631 abort ();
2632 }
2633 return false;
2634}
2635
53c17031
JH
2636/* Gives the alignment boundary, in bits, of an argument with the specified mode
2637 and type. */
2638
2639int
2640ix86_function_arg_boundary (mode, type)
2641 enum machine_mode mode;
2642 tree type;
2643{
2644 int align;
53c17031
JH
2645 if (type)
2646 align = TYPE_ALIGN (type);
2647 else
2648 align = GET_MODE_ALIGNMENT (mode);
2649 if (align < PARM_BOUNDARY)
2650 align = PARM_BOUNDARY;
8b978a57
JH
2651 if (!TARGET_64BIT)
2652 {
2653 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2654 make an exception for SSE modes since these require 128bit
2655 alignment.
2656
2657 The handling here differs from field_alignment. ICC aligns MMX
2658 arguments to 4 byte boundaries, while structure fields are aligned
2659 to 8 byte boundaries. */
2660 if (!type)
2661 {
2662 if (!SSE_REG_MODE_P (mode))
2663 align = PARM_BOUNDARY;
2664 }
2665 else
2666 {
2667 if (!contains_128bit_aligned_vector_p (type))
2668 align = PARM_BOUNDARY;
2669 }
2670 if (align != PARM_BOUNDARY && !TARGET_SSE)
2671 abort();
2672 }
53c17031
JH
2673 if (align > 128)
2674 align = 128;
2675 return align;
2676}
2677
2678/* Return true if N is a possible register number of function value. */
2679bool
2680ix86_function_value_regno_p (regno)
2681 int regno;
2682{
2683 if (!TARGET_64BIT)
2684 {
2685 return ((regno) == 0
2686 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2687 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2688 }
2689 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2690 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2691 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2692}
2693
2694/* Define how to find the value returned by a function.
2695 VALTYPE is the data type of the value (as a tree).
2696 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2697 otherwise, FUNC is 0. */
2698rtx
2699ix86_function_value (valtype)
2700 tree valtype;
2701{
2702 if (TARGET_64BIT)
2703 {
2704 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2705 REGPARM_MAX, SSE_REGPARM_MAX,
2706 x86_64_int_return_registers, 0);
d1f87653
KH
2707 /* For zero sized structures, construct_container return NULL, but we need
2708 to keep rest of compiler happy by returning meaningful value. */
53c17031
JH
2709 if (!ret)
2710 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2711 return ret;
2712 }
2713 else
b069de3b
SS
2714 return gen_rtx_REG (TYPE_MODE (valtype),
2715 ix86_value_regno (TYPE_MODE (valtype)));
53c17031
JH
2716}
2717
f5143c46 2718/* Return false iff type is returned in memory. */
53c17031
JH
2719int
2720ix86_return_in_memory (type)
2721 tree type;
2722{
2723 int needed_intregs, needed_sseregs;
2724 if (TARGET_64BIT)
2725 {
2726 return !examine_argument (TYPE_MODE (type), type, 1,
2727 &needed_intregs, &needed_sseregs);
2728 }
2729 else
2730 {
5e062767
DS
2731 if (TYPE_MODE (type) == BLKmode)
2732 return 1;
2733 else if (MS_AGGREGATE_RETURN
2734 && AGGREGATE_TYPE_P (type)
2735 && int_size_in_bytes(type) <= 8)
2736 return 0;
2737 else if ((VECTOR_MODE_P (TYPE_MODE (type))
2738 && int_size_in_bytes (type) == 8)
2739 || (int_size_in_bytes (type) > 12
2740 && TYPE_MODE (type) != TImode
2741 && TYPE_MODE (type) != TFmode
2742 && !VECTOR_MODE_P (TYPE_MODE (type))))
53c17031
JH
2743 return 1;
2744 return 0;
2745 }
2746}
2747
2748/* Define how to find the value returned by a library function
2749 assuming the value has mode MODE. */
2750rtx
2751ix86_libcall_value (mode)
2752 enum machine_mode mode;
2753{
2754 if (TARGET_64BIT)
2755 {
2756 switch (mode)
2757 {
2758 case SFmode:
2759 case SCmode:
2760 case DFmode:
2761 case DCmode:
2762 return gen_rtx_REG (mode, FIRST_SSE_REG);
2763 case TFmode:
2764 case TCmode:
2765 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2766 default:
2767 return gen_rtx_REG (mode, 0);
2768 }
2769 }
2770 else
b069de3b
SS
2771 return gen_rtx_REG (mode, ix86_value_regno (mode));
2772}
2773
2774/* Given a mode, return the register to use for a return value. */
2775
2776static int
2777ix86_value_regno (mode)
2778 enum machine_mode mode;
2779{
2780 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2781 return FIRST_FLOAT_REG;
2782 if (mode == TImode || VECTOR_MODE_P (mode))
2783 return FIRST_SSE_REG;
2784 return 0;
53c17031 2785}
ad919812
JH
2786\f
2787/* Create the va_list data type. */
53c17031 2788
ad919812
JH
2789tree
2790ix86_build_va_list ()
2791{
2792 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
53c17031 2793
ad919812
JH
2794 /* For i386 we use plain pointer to argument area. */
2795 if (!TARGET_64BIT)
2796 return build_pointer_type (char_type_node);
2797
f1e639b1 2798 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
ad919812
JH
2799 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2800
fce5a9f2 2801 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
ad919812 2802 unsigned_type_node);
fce5a9f2 2803 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
ad919812
JH
2804 unsigned_type_node);
2805 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2806 ptr_type_node);
2807 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2808 ptr_type_node);
2809
2810 DECL_FIELD_CONTEXT (f_gpr) = record;
2811 DECL_FIELD_CONTEXT (f_fpr) = record;
2812 DECL_FIELD_CONTEXT (f_ovf) = record;
2813 DECL_FIELD_CONTEXT (f_sav) = record;
2814
2815 TREE_CHAIN (record) = type_decl;
2816 TYPE_NAME (record) = type_decl;
2817 TYPE_FIELDS (record) = f_gpr;
2818 TREE_CHAIN (f_gpr) = f_fpr;
2819 TREE_CHAIN (f_fpr) = f_ovf;
2820 TREE_CHAIN (f_ovf) = f_sav;
2821
2822 layout_type (record);
2823
2824 /* The correct type is an array type of one element. */
2825 return build_array_type (record, build_index_type (size_zero_node));
2826}
2827
2828/* Perform any needed actions needed for a function that is receiving a
fce5a9f2 2829 variable number of arguments.
ad919812
JH
2830
2831 CUM is as above.
2832
2833 MODE and TYPE are the mode and type of the current parameter.
2834
2835 PRETEND_SIZE is a variable that should be set to the amount of stack
2836 that must be pushed by the prolog to pretend that our caller pushed
2837 it.
2838
2839 Normally, this macro will push all remaining incoming registers on the
2840 stack and set PRETEND_SIZE to the length of the registers pushed. */
2841
2842void
2843ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2844 CUMULATIVE_ARGS *cum;
2845 enum machine_mode mode;
2846 tree type;
2847 int *pretend_size ATTRIBUTE_UNUSED;
2848 int no_rtl;
2849
2850{
2851 CUMULATIVE_ARGS next_cum;
2852 rtx save_area = NULL_RTX, mem;
2853 rtx label;
2854 rtx label_ref;
2855 rtx tmp_reg;
2856 rtx nsse_reg;
2857 int set;
2858 tree fntype;
2859 int stdarg_p;
2860 int i;
2861
2862 if (!TARGET_64BIT)
2863 return;
2864
2865 /* Indicate to allocate space on the stack for varargs save area. */
2866 ix86_save_varrargs_registers = 1;
2867
5474eed5
JH
2868 cfun->stack_alignment_needed = 128;
2869
ad919812
JH
2870 fntype = TREE_TYPE (current_function_decl);
2871 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2872 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2873 != void_type_node));
2874
2875 /* For varargs, we do not want to skip the dummy va_dcl argument.
2876 For stdargs, we do want to skip the last named argument. */
2877 next_cum = *cum;
2878 if (stdarg_p)
2879 function_arg_advance (&next_cum, mode, type, 1);
2880
2881 if (!no_rtl)
2882 save_area = frame_pointer_rtx;
2883
2884 set = get_varargs_alias_set ();
2885
2886 for (i = next_cum.regno; i < ix86_regparm; i++)
2887 {
2888 mem = gen_rtx_MEM (Pmode,
2889 plus_constant (save_area, i * UNITS_PER_WORD));
0692acba 2890 set_mem_alias_set (mem, set);
ad919812
JH
2891 emit_move_insn (mem, gen_rtx_REG (Pmode,
2892 x86_64_int_parameter_registers[i]));
2893 }
2894
2895 if (next_cum.sse_nregs)
2896 {
2897 /* Now emit code to save SSE registers. The AX parameter contains number
d1f87653 2898 of SSE parameter registers used to call this function. We use
ad919812
JH
2899 sse_prologue_save insn template that produces computed jump across
2900 SSE saves. We need some preparation work to get this working. */
2901
2902 label = gen_label_rtx ();
2903 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2904
2905 /* Compute address to jump to :
2906 label - 5*eax + nnamed_sse_arguments*5 */
2907 tmp_reg = gen_reg_rtx (Pmode);
2908 nsse_reg = gen_reg_rtx (Pmode);
2909 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2910 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
44cf5b6a 2911 gen_rtx_MULT (Pmode, nsse_reg,
ad919812
JH
2912 GEN_INT (4))));
2913 if (next_cum.sse_regno)
2914 emit_move_insn
2915 (nsse_reg,
2916 gen_rtx_CONST (DImode,
2917 gen_rtx_PLUS (DImode,
2918 label_ref,
2919 GEN_INT (next_cum.sse_regno * 4))));
2920 else
2921 emit_move_insn (nsse_reg, label_ref);
2922 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2923
2924 /* Compute address of memory block we save into. We always use pointer
2925 pointing 127 bytes after first byte to store - this is needed to keep
2926 instruction size limited by 4 bytes. */
2927 tmp_reg = gen_reg_rtx (Pmode);
8ac61af7
RK
2928 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2929 plus_constant (save_area,
2930 8 * REGPARM_MAX + 127)));
ad919812 2931 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
14f73b5a 2932 set_mem_alias_set (mem, set);
8ac61af7 2933 set_mem_align (mem, BITS_PER_WORD);
ad919812
JH
2934
2935 /* And finally do the dirty job! */
8ac61af7
RK
2936 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2937 GEN_INT (next_cum.sse_regno), label));
ad919812
JH
2938 }
2939
2940}
2941
2942/* Implement va_start. */
2943
2944void
e5faf155 2945ix86_va_start (valist, nextarg)
ad919812
JH
2946 tree valist;
2947 rtx nextarg;
2948{
2949 HOST_WIDE_INT words, n_gpr, n_fpr;
2950 tree f_gpr, f_fpr, f_ovf, f_sav;
2951 tree gpr, fpr, ovf, sav, t;
2952
2953 /* Only 64bit target needs something special. */
2954 if (!TARGET_64BIT)
2955 {
e5faf155 2956 std_expand_builtin_va_start (valist, nextarg);
ad919812
JH
2957 return;
2958 }
2959
2960 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2961 f_fpr = TREE_CHAIN (f_gpr);
2962 f_ovf = TREE_CHAIN (f_fpr);
2963 f_sav = TREE_CHAIN (f_ovf);
2964
2965 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2966 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2967 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2968 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2969 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2970
2971 /* Count number of gp and fp argument registers used. */
2972 words = current_function_args_info.words;
2973 n_gpr = current_function_args_info.regno;
2974 n_fpr = current_function_args_info.sse_regno;
2975
2976 if (TARGET_DEBUG_ARG)
2977 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
b531087a 2978 (int) words, (int) n_gpr, (int) n_fpr);
ad919812
JH
2979
2980 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2981 build_int_2 (n_gpr * 8, 0));
2982 TREE_SIDE_EFFECTS (t) = 1;
2983 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2984
2985 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2986 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2987 TREE_SIDE_EFFECTS (t) = 1;
2988 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2989
2990 /* Find the overflow area. */
2991 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2992 if (words != 0)
2993 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2994 build_int_2 (words * UNITS_PER_WORD, 0));
2995 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2996 TREE_SIDE_EFFECTS (t) = 1;
2997 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2998
2999 /* Find the register save area.
3000 Prologue of the function save it right above stack frame. */
3001 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3002 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3003 TREE_SIDE_EFFECTS (t) = 1;
3004 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3005}
3006
3007/* Implement va_arg. */
3008rtx
3009ix86_va_arg (valist, type)
3010 tree valist, type;
3011{
0139adca 3012 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
ad919812
JH
3013 tree f_gpr, f_fpr, f_ovf, f_sav;
3014 tree gpr, fpr, ovf, sav, t;
b932f770 3015 int size, rsize;
ad919812
JH
3016 rtx lab_false, lab_over = NULL_RTX;
3017 rtx addr_rtx, r;
3018 rtx container;
09b2e78d 3019 int indirect_p = 0;
ad919812
JH
3020
3021 /* Only 64bit target needs something special. */
3022 if (!TARGET_64BIT)
3023 {
3024 return std_expand_builtin_va_arg (valist, type);
3025 }
3026
3027 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3028 f_fpr = TREE_CHAIN (f_gpr);
3029 f_ovf = TREE_CHAIN (f_fpr);
3030 f_sav = TREE_CHAIN (f_ovf);
3031
3032 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3033 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3034 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3035 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3036 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3037
3038 size = int_size_in_bytes (type);
09b2e78d
ZD
3039 if (size == -1)
3040 {
3041 /* Passed by reference. */
3042 indirect_p = 1;
3043 type = build_pointer_type (type);
3044 size = int_size_in_bytes (type);
3045 }
ad919812
JH
3046 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3047
3048 container = construct_container (TYPE_MODE (type), type, 0,
3049 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3050 /*
3051 * Pull the value out of the saved registers ...
3052 */
3053
3054 addr_rtx = gen_reg_rtx (Pmode);
3055
3056 if (container)
3057 {
3058 rtx int_addr_rtx, sse_addr_rtx;
3059 int needed_intregs, needed_sseregs;
3060 int need_temp;
3061
3062 lab_over = gen_label_rtx ();
3063 lab_false = gen_label_rtx ();
8bad7136 3064
ad919812
JH
3065 examine_argument (TYPE_MODE (type), type, 0,
3066 &needed_intregs, &needed_sseregs);
3067
3068
3069 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
3070 || TYPE_ALIGN (type) > 128);
3071
d1f87653 3072 /* In case we are passing structure, verify that it is consecutive block
ad919812
JH
3073 on the register save area. If not we need to do moves. */
3074 if (!need_temp && !REG_P (container))
3075 {
d1f87653 3076 /* Verify that all registers are strictly consecutive */
ad919812
JH
3077 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3078 {
3079 int i;
3080
3081 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3082 {
3083 rtx slot = XVECEXP (container, 0, i);
b531087a 3084 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
ad919812
JH
3085 || INTVAL (XEXP (slot, 1)) != i * 16)
3086 need_temp = 1;
3087 }
3088 }
3089 else
3090 {
3091 int i;
3092
3093 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3094 {
3095 rtx slot = XVECEXP (container, 0, i);
b531087a 3096 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
ad919812
JH
3097 || INTVAL (XEXP (slot, 1)) != i * 8)
3098 need_temp = 1;
3099 }
3100 }
3101 }
3102 if (!need_temp)
3103 {
3104 int_addr_rtx = addr_rtx;
3105 sse_addr_rtx = addr_rtx;
3106 }
3107 else
3108 {
3109 int_addr_rtx = gen_reg_rtx (Pmode);
3110 sse_addr_rtx = gen_reg_rtx (Pmode);
3111 }
3112 /* First ensure that we fit completely in registers. */
3113 if (needed_intregs)
3114 {
3115 emit_cmp_and_jump_insns (expand_expr
3116 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
3117 GEN_INT ((REGPARM_MAX - needed_intregs +
3118 1) * 8), GE, const1_rtx, SImode,
d43e0b7d 3119 1, lab_false);
ad919812
JH
3120 }
3121 if (needed_sseregs)
3122 {
3123 emit_cmp_and_jump_insns (expand_expr
3124 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
3125 GEN_INT ((SSE_REGPARM_MAX -
3126 needed_sseregs + 1) * 16 +
3127 REGPARM_MAX * 8), GE, const1_rtx,
d43e0b7d 3128 SImode, 1, lab_false);
ad919812
JH
3129 }
3130
3131 /* Compute index to start of area used for integer regs. */
3132 if (needed_intregs)
3133 {
3134 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3135 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3136 if (r != int_addr_rtx)
3137 emit_move_insn (int_addr_rtx, r);
3138 }
3139 if (needed_sseregs)
3140 {
3141 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3142 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3143 if (r != sse_addr_rtx)
3144 emit_move_insn (sse_addr_rtx, r);
3145 }
3146 if (need_temp)
3147 {
3148 int i;
3149 rtx mem;
3150
b932f770
JH
3151 /* Never use the memory itself, as it has the alias set. */
3152 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
3153 mem = gen_rtx_MEM (BLKmode, addr_rtx);
0692acba 3154 set_mem_alias_set (mem, get_varargs_alias_set ());
8ac61af7 3155 set_mem_align (mem, BITS_PER_UNIT);
b932f770 3156
ad919812
JH
3157 for (i = 0; i < XVECLEN (container, 0); i++)
3158 {
3159 rtx slot = XVECEXP (container, 0, i);
3160 rtx reg = XEXP (slot, 0);
3161 enum machine_mode mode = GET_MODE (reg);
3162 rtx src_addr;
3163 rtx src_mem;
3164 int src_offset;
3165 rtx dest_mem;
3166
3167 if (SSE_REGNO_P (REGNO (reg)))
3168 {
3169 src_addr = sse_addr_rtx;
3170 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3171 }
3172 else
3173 {
3174 src_addr = int_addr_rtx;
3175 src_offset = REGNO (reg) * 8;
3176 }
3177 src_mem = gen_rtx_MEM (mode, src_addr);
0692acba 3178 set_mem_alias_set (src_mem, get_varargs_alias_set ());
ad919812
JH
3179 src_mem = adjust_address (src_mem, mode, src_offset);
3180 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
ad919812
JH
3181 emit_move_insn (dest_mem, src_mem);
3182 }
3183 }
3184
3185 if (needed_intregs)
3186 {
3187 t =
3188 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3189 build_int_2 (needed_intregs * 8, 0));
3190 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3191 TREE_SIDE_EFFECTS (t) = 1;
3192 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3193 }
3194 if (needed_sseregs)
3195 {
3196 t =
3197 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3198 build_int_2 (needed_sseregs * 16, 0));
3199 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3200 TREE_SIDE_EFFECTS (t) = 1;
3201 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3202 }
3203
3204 emit_jump_insn (gen_jump (lab_over));
3205 emit_barrier ();
3206 emit_label (lab_false);
3207 }
3208
3209 /* ... otherwise out of the overflow area. */
3210
3211 /* Care for on-stack alignment if needed. */
3212 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3213 t = ovf;
3214 else
3215 {
3216 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3217 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3218 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3219 }
3220 t = save_expr (t);
3221
3222 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3223 if (r != addr_rtx)
3224 emit_move_insn (addr_rtx, r);
3225
3226 t =
3227 build (PLUS_EXPR, TREE_TYPE (t), t,
3228 build_int_2 (rsize * UNITS_PER_WORD, 0));
3229 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3230 TREE_SIDE_EFFECTS (t) = 1;
3231 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3232
3233 if (container)
3234 emit_label (lab_over);
3235
09b2e78d
ZD
3236 if (indirect_p)
3237 {
3238 r = gen_rtx_MEM (Pmode, addr_rtx);
3239 set_mem_alias_set (r, get_varargs_alias_set ());
3240 emit_move_insn (addr_rtx, r);
3241 }
3242
ad919812
JH
3243 return addr_rtx;
3244}
3245\f
c3c637e3
GS
3246/* Return nonzero if OP is either a i387 or SSE fp register. */
3247int
3248any_fp_register_operand (op, mode)
3249 rtx op;
3250 enum machine_mode mode ATTRIBUTE_UNUSED;
3251{
3252 return ANY_FP_REG_P (op);
3253}
3254
3255/* Return nonzero if OP is an i387 fp register. */
3256int
3257fp_register_operand (op, mode)
3258 rtx op;
3259 enum machine_mode mode ATTRIBUTE_UNUSED;
3260{
3261 return FP_REG_P (op);
3262}
3263
3264/* Return nonzero if OP is a non-fp register_operand. */
3265int
3266register_and_not_any_fp_reg_operand (op, mode)
3267 rtx op;
3268 enum machine_mode mode;
3269{
3270 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3271}
3272
40b982a9 3273/* Return nonzero if OP is a register operand other than an
c3c637e3
GS
3274 i387 fp register. */
3275int
3276register_and_not_fp_reg_operand (op, mode)
3277 rtx op;
3278 enum machine_mode mode;
3279{
3280 return register_operand (op, mode) && !FP_REG_P (op);
3281}
3282
7dd4b4a3
JH
3283/* Return nonzero if OP is general operand representable on x86_64. */
3284
3285int
3286x86_64_general_operand (op, mode)
3287 rtx op;
3288 enum machine_mode mode;
3289{
3290 if (!TARGET_64BIT)
3291 return general_operand (op, mode);
3292 if (nonimmediate_operand (op, mode))
3293 return 1;
c05dbe81 3294 return x86_64_sign_extended_value (op);
7dd4b4a3
JH
3295}
3296
3297/* Return nonzero if OP is general operand representable on x86_64
d6a7951f 3298 as either sign extended or zero extended constant. */
7dd4b4a3
JH
3299
3300int
3301x86_64_szext_general_operand (op, mode)
3302 rtx op;
3303 enum machine_mode mode;
3304{
3305 if (!TARGET_64BIT)
3306 return general_operand (op, mode);
3307 if (nonimmediate_operand (op, mode))
3308 return 1;
c05dbe81 3309 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
7dd4b4a3
JH
3310}
3311
3312/* Return nonzero if OP is nonmemory operand representable on x86_64. */
3313
3314int
3315x86_64_nonmemory_operand (op, mode)
3316 rtx op;
3317 enum machine_mode mode;
3318{
3319 if (!TARGET_64BIT)
3320 return nonmemory_operand (op, mode);
3321 if (register_operand (op, mode))
3322 return 1;
c05dbe81 3323 return x86_64_sign_extended_value (op);
7dd4b4a3
JH
3324}
3325
3326/* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3327
3328int
3329x86_64_movabs_operand (op, mode)
3330 rtx op;
3331 enum machine_mode mode;
3332{
3333 if (!TARGET_64BIT || !flag_pic)
3334 return nonmemory_operand (op, mode);
c05dbe81 3335 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
7dd4b4a3
JH
3336 return 1;
3337 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3338 return 1;
3339 return 0;
3340}
3341
3342/* Return nonzero if OP is nonmemory operand representable on x86_64. */
3343
3344int
3345x86_64_szext_nonmemory_operand (op, mode)
3346 rtx op;
3347 enum machine_mode mode;
3348{
3349 if (!TARGET_64BIT)
3350 return nonmemory_operand (op, mode);
3351 if (register_operand (op, mode))
3352 return 1;
c05dbe81 3353 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
7dd4b4a3
JH
3354}
3355
3356/* Return nonzero if OP is immediate operand representable on x86_64. */
3357
3358int
3359x86_64_immediate_operand (op, mode)
3360 rtx op;
3361 enum machine_mode mode;
3362{
3363 if (!TARGET_64BIT)
3364 return immediate_operand (op, mode);
c05dbe81 3365 return x86_64_sign_extended_value (op);
7dd4b4a3
JH
3366}
3367
3368/* Return nonzero if OP is immediate operand representable on x86_64. */
3369
3370int
3371x86_64_zext_immediate_operand (op, mode)
3372 rtx op;
3373 enum machine_mode mode ATTRIBUTE_UNUSED;
3374{
3375 return x86_64_zero_extended_value (op);
3376}
3377
8bad7136
JL
3378/* Return nonzero if OP is (const_int 1), else return zero. */
3379
3380int
3381const_int_1_operand (op, mode)
3382 rtx op;
3383 enum machine_mode mode ATTRIBUTE_UNUSED;
3384{
3385 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
3386}
3387
794a292d
JJ
3388/* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3389 for shift & compare patterns, as shifting by 0 does not change flags),
3390 else return zero. */
3391
3392int
3393const_int_1_31_operand (op, mode)
3394 rtx op;
3395 enum machine_mode mode ATTRIBUTE_UNUSED;
3396{
3397 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3398}
3399
e075ae69
RH
3400/* Returns 1 if OP is either a symbol reference or a sum of a symbol
3401 reference and a constant. */
b08de47e
MM
3402
3403int
e075ae69
RH
3404symbolic_operand (op, mode)
3405 register rtx op;
3406 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 3407{
e075ae69 3408 switch (GET_CODE (op))
2a2ab3f9 3409 {
e075ae69
RH
3410 case SYMBOL_REF:
3411 case LABEL_REF:
3412 return 1;
3413
3414 case CONST:
3415 op = XEXP (op, 0);
3416 if (GET_CODE (op) == SYMBOL_REF
3417 || GET_CODE (op) == LABEL_REF
3418 || (GET_CODE (op) == UNSPEC
8ee41eaf
RH
3419 && (XINT (op, 1) == UNSPEC_GOT
3420 || XINT (op, 1) == UNSPEC_GOTOFF
3421 || XINT (op, 1) == UNSPEC_GOTPCREL)))
e075ae69
RH
3422 return 1;
3423 if (GET_CODE (op) != PLUS
3424 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3425 return 0;
3426
3427 op = XEXP (op, 0);
3428 if (GET_CODE (op) == SYMBOL_REF
3429 || GET_CODE (op) == LABEL_REF)
3430 return 1;
3431 /* Only @GOTOFF gets offsets. */
3432 if (GET_CODE (op) != UNSPEC
8ee41eaf 3433 || XINT (op, 1) != UNSPEC_GOTOFF)
e075ae69
RH
3434 return 0;
3435
3436 op = XVECEXP (op, 0, 0);
3437 if (GET_CODE (op) == SYMBOL_REF
3438 || GET_CODE (op) == LABEL_REF)
3439 return 1;
3440 return 0;
3441
3442 default:
3443 return 0;
2a2ab3f9
JVA
3444 }
3445}
2a2ab3f9 3446
e075ae69 3447/* Return true if the operand contains a @GOT or @GOTOFF reference. */
3b3c6a3f 3448
e075ae69
RH
3449int
3450pic_symbolic_operand (op, mode)
3451 register rtx op;
3452 enum machine_mode mode ATTRIBUTE_UNUSED;
3453{
6eb791fc
JH
3454 if (GET_CODE (op) != CONST)
3455 return 0;
3456 op = XEXP (op, 0);
3457 if (TARGET_64BIT)
3458 {
3459 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
3460 return 1;
3461 }
fce5a9f2 3462 else
2a2ab3f9 3463 {
e075ae69
RH
3464 if (GET_CODE (op) == UNSPEC)
3465 return 1;
3466 if (GET_CODE (op) != PLUS
3467 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3468 return 0;
3469 op = XEXP (op, 0);
3470 if (GET_CODE (op) == UNSPEC)
3471 return 1;
2a2ab3f9 3472 }
e075ae69 3473 return 0;
2a2ab3f9 3474}
2a2ab3f9 3475
623fe810
RH
3476/* Return true if OP is a symbolic operand that resolves locally. */
3477
3478static int
3479local_symbolic_operand (op, mode)
3480 rtx op;
3481 enum machine_mode mode ATTRIBUTE_UNUSED;
3482{
623fe810
RH
3483 if (GET_CODE (op) == CONST
3484 && GET_CODE (XEXP (op, 0)) == PLUS
c05dbe81 3485 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
623fe810
RH
3486 op = XEXP (XEXP (op, 0), 0);
3487
8bfb45f8
JJ
3488 if (GET_CODE (op) == LABEL_REF)
3489 return 1;
3490
623fe810
RH
3491 if (GET_CODE (op) != SYMBOL_REF)
3492 return 0;
3493
3494 /* These we've been told are local by varasm and encode_section_info
3495 respectively. */
3496 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
3497 return 1;
3498
3499 /* There is, however, a not insubstantial body of code in the rest of
fce5a9f2 3500 the compiler that assumes it can just stick the results of
623fe810
RH
3501 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3502 /* ??? This is a hack. Should update the body of the compiler to
fb49053f 3503 always create a DECL an invoke targetm.encode_section_info. */
623fe810
RH
3504 if (strncmp (XSTR (op, 0), internal_label_prefix,
3505 internal_label_prefix_len) == 0)
3506 return 1;
3507
3508 return 0;
3509}
3510
f996902d
RH
3511/* Test for various thread-local symbols. See ix86_encode_section_info. */
3512
3513int
3514tls_symbolic_operand (op, mode)
3515 register rtx op;
3516 enum machine_mode mode ATTRIBUTE_UNUSED;
3517{
3518 const char *symbol_str;
3519
3520 if (GET_CODE (op) != SYMBOL_REF)
3521 return 0;
3522 symbol_str = XSTR (op, 0);
3523
3524 if (symbol_str[0] != '%')
3525 return 0;
755ac5d4 3526 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
f996902d
RH
3527}
3528
3529static int
3530tls_symbolic_operand_1 (op, kind)
3531 rtx op;
3532 enum tls_model kind;
3533{
3534 const char *symbol_str;
3535
3536 if (GET_CODE (op) != SYMBOL_REF)
3537 return 0;
3538 symbol_str = XSTR (op, 0);
3539
3540 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3541}
3542
3543int
3544global_dynamic_symbolic_operand (op, mode)
3545 register rtx op;
3546 enum machine_mode mode ATTRIBUTE_UNUSED;
3547{
3548 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3549}
3550
3551int
3552local_dynamic_symbolic_operand (op, mode)
3553 register rtx op;
3554 enum machine_mode mode ATTRIBUTE_UNUSED;
3555{
3556 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3557}
3558
3559int
3560initial_exec_symbolic_operand (op, mode)
3561 register rtx op;
3562 enum machine_mode mode ATTRIBUTE_UNUSED;
3563{
3564 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3565}
3566
3567int
3568local_exec_symbolic_operand (op, mode)
3569 register rtx op;
3570 enum machine_mode mode ATTRIBUTE_UNUSED;
3571{
3572 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3573}
3574
28d52ffb
RH
3575/* Test for a valid operand for a call instruction. Don't allow the
3576 arg pointer register or virtual regs since they may decay into
3577 reg + const, which the patterns can't handle. */
2a2ab3f9 3578
e075ae69
RH
3579int
3580call_insn_operand (op, mode)
3581 rtx op;
3582 enum machine_mode mode ATTRIBUTE_UNUSED;
3583{
e075ae69
RH
3584 /* Disallow indirect through a virtual register. This leads to
3585 compiler aborts when trying to eliminate them. */
3586 if (GET_CODE (op) == REG
3587 && (op == arg_pointer_rtx
564d80f4 3588 || op == frame_pointer_rtx
e075ae69
RH
3589 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3590 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3591 return 0;
2a2ab3f9 3592
28d52ffb
RH
3593 /* Disallow `call 1234'. Due to varying assembler lameness this
3594 gets either rejected or translated to `call .+1234'. */
3595 if (GET_CODE (op) == CONST_INT)
3596 return 0;
3597
cbbf65e0
RH
3598 /* Explicitly allow SYMBOL_REF even if pic. */
3599 if (GET_CODE (op) == SYMBOL_REF)
e075ae69 3600 return 1;
2a2ab3f9 3601
cbbf65e0
RH
3602 /* Otherwise we can allow any general_operand in the address. */
3603 return general_operand (op, Pmode);
e075ae69 3604}
79325812 3605
4977bab6
ZW
3606/* Test for a valid operand for a call instruction. Don't allow the
3607 arg pointer register or virtual regs since they may decay into
3608 reg + const, which the patterns can't handle. */
3609
3610int
3611sibcall_insn_operand (op, mode)
3612 rtx op;
3613 enum machine_mode mode ATTRIBUTE_UNUSED;
3614{
3615 /* Disallow indirect through a virtual register. This leads to
3616 compiler aborts when trying to eliminate them. */
3617 if (GET_CODE (op) == REG
3618 && (op == arg_pointer_rtx
3619 || op == frame_pointer_rtx
3620 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3621 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3622 return 0;
3623
3624 /* Explicitly allow SYMBOL_REF even if pic. */
3625 if (GET_CODE (op) == SYMBOL_REF)
3626 return 1;
3627
3628 /* Otherwise we can only allow register operands. */
3629 return register_operand (op, Pmode);
3630}
3631
e075ae69
RH
3632int
3633constant_call_address_operand (op, mode)
3634 rtx op;
3635 enum machine_mode mode ATTRIBUTE_UNUSED;
3636{
eaf19aba
JJ
3637 if (GET_CODE (op) == CONST
3638 && GET_CODE (XEXP (op, 0)) == PLUS
3639 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3640 op = XEXP (XEXP (op, 0), 0);
e1ff012c 3641 return GET_CODE (op) == SYMBOL_REF;
e075ae69 3642}
2a2ab3f9 3643
e075ae69 3644/* Match exactly zero and one. */
e9a25f70 3645
0f290768 3646int
e075ae69
RH
3647const0_operand (op, mode)
3648 register rtx op;
3649 enum machine_mode mode;
3650{
3651 return op == CONST0_RTX (mode);
3652}
e9a25f70 3653
0f290768 3654int
e075ae69
RH
3655const1_operand (op, mode)
3656 register rtx op;
3657 enum machine_mode mode ATTRIBUTE_UNUSED;
3658{
3659 return op == const1_rtx;
3660}
2a2ab3f9 3661
e075ae69 3662/* Match 2, 4, or 8. Used for leal multiplicands. */
e9a25f70 3663
e075ae69
RH
3664int
3665const248_operand (op, mode)
3666 register rtx op;
3667 enum machine_mode mode ATTRIBUTE_UNUSED;
3668{
3669 return (GET_CODE (op) == CONST_INT
3670 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3671}
e9a25f70 3672
d1f87653 3673/* True if this is a constant appropriate for an increment or decrement. */
81fd0956 3674
e075ae69
RH
3675int
3676incdec_operand (op, mode)
3677 register rtx op;
0631e0bf 3678 enum machine_mode mode ATTRIBUTE_UNUSED;
e075ae69 3679{
f5143c46 3680 /* On Pentium4, the inc and dec operations causes extra dependency on flag
b4e89e2d
JH
3681 registers, since carry flag is not set. */
3682 if (TARGET_PENTIUM4 && !optimize_size)
3683 return 0;
2b1c08f5 3684 return op == const1_rtx || op == constm1_rtx;
e075ae69 3685}
2a2ab3f9 3686
371bc54b
JH
3687/* Return nonzero if OP is acceptable as operand of DImode shift
3688 expander. */
3689
3690int
3691shiftdi_operand (op, mode)
3692 rtx op;
3693 enum machine_mode mode ATTRIBUTE_UNUSED;
3694{
3695 if (TARGET_64BIT)
3696 return nonimmediate_operand (op, mode);
3697 else
3698 return register_operand (op, mode);
3699}
3700
0f290768 3701/* Return false if this is the stack pointer, or any other fake
e075ae69
RH
3702 register eliminable to the stack pointer. Otherwise, this is
3703 a register operand.
2a2ab3f9 3704
e075ae69
RH
3705 This is used to prevent esp from being used as an index reg.
3706 Which would only happen in pathological cases. */
5f1ec3e6 3707
e075ae69
RH
3708int
3709reg_no_sp_operand (op, mode)
3710 register rtx op;
3711 enum machine_mode mode;
3712{
3713 rtx t = op;
3714 if (GET_CODE (t) == SUBREG)
3715 t = SUBREG_REG (t);
564d80f4 3716 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
e075ae69 3717 return 0;
2a2ab3f9 3718
e075ae69 3719 return register_operand (op, mode);
2a2ab3f9 3720}
b840bfb0 3721
915119a5
BS
3722int
3723mmx_reg_operand (op, mode)
3724 register rtx op;
bd793c65 3725 enum machine_mode mode ATTRIBUTE_UNUSED;
915119a5
BS
3726{
3727 return MMX_REG_P (op);
3728}
3729
2c5a510c
RH
3730/* Return false if this is any eliminable register. Otherwise
3731 general_operand. */
3732
3733int
3734general_no_elim_operand (op, mode)
3735 register rtx op;
3736 enum machine_mode mode;
3737{
3738 rtx t = op;
3739 if (GET_CODE (t) == SUBREG)
3740 t = SUBREG_REG (t);
3741 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3742 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3743 || t == virtual_stack_dynamic_rtx)
3744 return 0;
1020a5ab
RH
3745 if (REG_P (t)
3746 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3747 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3748 return 0;
2c5a510c
RH
3749
3750 return general_operand (op, mode);
3751}
3752
3753/* Return false if this is any eliminable register. Otherwise
3754 register_operand or const_int. */
3755
3756int
3757nonmemory_no_elim_operand (op, mode)
3758 register rtx op;
3759 enum machine_mode mode;
3760{
3761 rtx t = op;
3762 if (GET_CODE (t) == SUBREG)
3763 t = SUBREG_REG (t);
3764 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3765 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3766 || t == virtual_stack_dynamic_rtx)
3767 return 0;
3768
3769 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3770}
3771
7ec70495
JH
3772/* Return false if this is any eliminable register or stack register,
3773 otherwise work like register_operand. */
3774
3775int
3776index_register_operand (op, mode)
3777 register rtx op;
3778 enum machine_mode mode;
3779{
3780 rtx t = op;
3781 if (GET_CODE (t) == SUBREG)
3782 t = SUBREG_REG (t);
3783 if (!REG_P (t))
3784 return 0;
3785 if (t == arg_pointer_rtx
3786 || t == frame_pointer_rtx
3787 || t == virtual_incoming_args_rtx
3788 || t == virtual_stack_vars_rtx
3789 || t == virtual_stack_dynamic_rtx
3790 || REGNO (t) == STACK_POINTER_REGNUM)
3791 return 0;
3792
3793 return general_operand (op, mode);
3794}
3795
e075ae69 3796/* Return true if op is a Q_REGS class register. */
b840bfb0 3797
e075ae69
RH
3798int
3799q_regs_operand (op, mode)
3800 register rtx op;
3801 enum machine_mode mode;
b840bfb0 3802{
e075ae69
RH
3803 if (mode != VOIDmode && GET_MODE (op) != mode)
3804 return 0;
3805 if (GET_CODE (op) == SUBREG)
3806 op = SUBREG_REG (op);
7799175f 3807 return ANY_QI_REG_P (op);
0f290768 3808}
b840bfb0 3809
4977bab6
ZW
3810/* Return true if op is an flags register. */
3811
3812int
3813flags_reg_operand (op, mode)
3814 register rtx op;
3815 enum machine_mode mode;
3816{
3817 if (mode != VOIDmode && GET_MODE (op) != mode)
3818 return 0;
3819 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3820}
3821
e075ae69 3822/* Return true if op is a NON_Q_REGS class register. */
b840bfb0 3823
e075ae69
RH
3824int
3825non_q_regs_operand (op, mode)
3826 register rtx op;
3827 enum machine_mode mode;
3828{
3829 if (mode != VOIDmode && GET_MODE (op) != mode)
3830 return 0;
3831 if (GET_CODE (op) == SUBREG)
3832 op = SUBREG_REG (op);
3833 return NON_QI_REG_P (op);
0f290768 3834}
b840bfb0 3835
4977bab6
ZW
3836int
3837zero_extended_scalar_load_operand (op, mode)
3838 rtx op;
3839 enum machine_mode mode ATTRIBUTE_UNUSED;
3840{
3841 unsigned n_elts;
3842 if (GET_CODE (op) != MEM)
3843 return 0;
3844 op = maybe_get_pool_constant (op);
3845 if (!op)
3846 return 0;
3847 if (GET_CODE (op) != CONST_VECTOR)
3848 return 0;
3849 n_elts =
3850 (GET_MODE_SIZE (GET_MODE (op)) /
3851 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3852 for (n_elts--; n_elts > 0; n_elts--)
3853 {
3854 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3855 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3856 return 0;
3857 }
3858 return 1;
3859}
3860
fdc4b40b
JH
3861/* Return 1 when OP is operand acceptable for standard SSE move. */
3862int
3863vector_move_operand (op, mode)
3864 rtx op;
3865 enum machine_mode mode;
3866{
3867 if (nonimmediate_operand (op, mode))
3868 return 1;
3869 if (GET_MODE (op) != mode && mode != VOIDmode)
3870 return 0;
3871 return (op == CONST0_RTX (GET_MODE (op)));
3872}
3873
915119a5
BS
3874/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3875 insns. */
3876int
3877sse_comparison_operator (op, mode)
3878 rtx op;
3879 enum machine_mode mode ATTRIBUTE_UNUSED;
3880{
3881 enum rtx_code code = GET_CODE (op);
a46d1d38
JH
3882 switch (code)
3883 {
3884 /* Operations supported directly. */
3885 case EQ:
3886 case LT:
3887 case LE:
3888 case UNORDERED:
3889 case NE:
3890 case UNGE:
3891 case UNGT:
3892 case ORDERED:
3893 return 1;
3894 /* These are equivalent to ones above in non-IEEE comparisons. */
3895 case UNEQ:
3896 case UNLT:
3897 case UNLE:
3898 case LTGT:
3899 case GE:
3900 case GT:
3901 return !TARGET_IEEE_FP;
3902 default:
3903 return 0;
3904 }
915119a5 3905}
9076b9c1 3906/* Return 1 if OP is a valid comparison operator in valid mode. */
e075ae69 3907int
9076b9c1
JH
3908ix86_comparison_operator (op, mode)
3909 register rtx op;
3910 enum machine_mode mode;
e075ae69 3911{
9076b9c1 3912 enum machine_mode inmode;
9a915772 3913 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
3914 if (mode != VOIDmode && GET_MODE (op) != mode)
3915 return 0;
9a915772
JH
3916 if (GET_RTX_CLASS (code) != '<')
3917 return 0;
3918 inmode = GET_MODE (XEXP (op, 0));
3919
3920 if (inmode == CCFPmode || inmode == CCFPUmode)
3921 {
3922 enum rtx_code second_code, bypass_code;
3923 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3924 return (bypass_code == NIL && second_code == NIL);
3925 }
3926 switch (code)
3a3677ff
RH
3927 {
3928 case EQ: case NE:
3a3677ff 3929 return 1;
9076b9c1 3930 case LT: case GE:
7e08e190 3931 if (inmode == CCmode || inmode == CCGCmode
9076b9c1
JH
3932 || inmode == CCGOCmode || inmode == CCNOmode)
3933 return 1;
3934 return 0;
7e08e190 3935 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
7e08e190 3936 if (inmode == CCmode)
9076b9c1
JH
3937 return 1;
3938 return 0;
3939 case GT: case LE:
7e08e190 3940 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
9076b9c1
JH
3941 return 1;
3942 return 0;
3a3677ff
RH
3943 default:
3944 return 0;
3945 }
3946}
3947
e6e81735
JH
3948/* Return 1 if OP is a valid comparison operator testing carry flag
3949 to be set. */
3950int
3951ix86_carry_flag_operator (op, mode)
3952 register rtx op;
3953 enum machine_mode mode;
3954{
3955 enum machine_mode inmode;
3956 enum rtx_code code = GET_CODE (op);
3957
3958 if (mode != VOIDmode && GET_MODE (op) != mode)
3959 return 0;
3960 if (GET_RTX_CLASS (code) != '<')
3961 return 0;
3962 inmode = GET_MODE (XEXP (op, 0));
3963 if (GET_CODE (XEXP (op, 0)) != REG
3964 || REGNO (XEXP (op, 0)) != 17
3965 || XEXP (op, 1) != const0_rtx)
3966 return 0;
3967
3968 if (inmode == CCFPmode || inmode == CCFPUmode)
3969 {
3970 enum rtx_code second_code, bypass_code;
3971
3972 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3973 if (bypass_code != NIL || second_code != NIL)
3974 return 0;
3975 code = ix86_fp_compare_code_to_integer (code);
3976 }
3977 else if (inmode != CCmode)
3978 return 0;
3979 return code == LTU;
3980}
3981
9076b9c1 3982/* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3a3677ff 3983
9076b9c1
JH
3984int
3985fcmov_comparison_operator (op, mode)
3a3677ff
RH
3986 register rtx op;
3987 enum machine_mode mode;
3988{
b62d22a2 3989 enum machine_mode inmode;
9a915772 3990 enum rtx_code code = GET_CODE (op);
e6e81735 3991
3a3677ff
RH
3992 if (mode != VOIDmode && GET_MODE (op) != mode)
3993 return 0;
9a915772
JH
3994 if (GET_RTX_CLASS (code) != '<')
3995 return 0;
3996 inmode = GET_MODE (XEXP (op, 0));
3997 if (inmode == CCFPmode || inmode == CCFPUmode)
3a3677ff 3998 {
9a915772 3999 enum rtx_code second_code, bypass_code;
e6e81735 4000
9a915772
JH
4001 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4002 if (bypass_code != NIL || second_code != NIL)
4003 return 0;
4004 code = ix86_fp_compare_code_to_integer (code);
4005 }
4006 /* i387 supports just limited amount of conditional codes. */
4007 switch (code)
4008 {
4009 case LTU: case GTU: case LEU: case GEU:
4010 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
9076b9c1
JH
4011 return 1;
4012 return 0;
9a915772
JH
4013 case ORDERED: case UNORDERED:
4014 case EQ: case NE:
4015 return 1;
3a3677ff
RH
4016 default:
4017 return 0;
4018 }
e075ae69 4019}
b840bfb0 4020
e9e80858
JH
4021/* Return 1 if OP is a binary operator that can be promoted to wider mode. */
4022
4023int
4024promotable_binary_operator (op, mode)
4025 register rtx op;
4026 enum machine_mode mode ATTRIBUTE_UNUSED;
4027{
4028 switch (GET_CODE (op))
4029 {
4030 case MULT:
4031 /* Modern CPUs have same latency for HImode and SImode multiply,
4032 but 386 and 486 do HImode multiply faster. */
9e555526 4033 return ix86_tune > PROCESSOR_I486;
e9e80858
JH
4034 case PLUS:
4035 case AND:
4036 case IOR:
4037 case XOR:
4038 case ASHIFT:
4039 return 1;
4040 default:
4041 return 0;
4042 }
4043}
4044
e075ae69
RH
4045/* Nearly general operand, but accept any const_double, since we wish
4046 to be able to drop them into memory rather than have them get pulled
4047 into registers. */
b840bfb0 4048
2a2ab3f9 4049int
e075ae69
RH
4050cmp_fp_expander_operand (op, mode)
4051 register rtx op;
4052 enum machine_mode mode;
2a2ab3f9 4053{
e075ae69 4054 if (mode != VOIDmode && mode != GET_MODE (op))
0b6b2900 4055 return 0;
e075ae69 4056 if (GET_CODE (op) == CONST_DOUBLE)
2a2ab3f9 4057 return 1;
e075ae69 4058 return general_operand (op, mode);
2a2ab3f9
JVA
4059}
4060
e075ae69 4061/* Match an SI or HImode register for a zero_extract. */
2a2ab3f9
JVA
4062
4063int
e075ae69 4064ext_register_operand (op, mode)
2a2ab3f9 4065 register rtx op;
bb5177ac 4066 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 4067{
3522082b 4068 int regno;
0d7d98ee
JH
4069 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
4070 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
e075ae69 4071 return 0;
3522082b
JH
4072
4073 if (!register_operand (op, VOIDmode))
4074 return 0;
4075
d1f87653 4076 /* Be careful to accept only registers having upper parts. */
3522082b
JH
4077 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
4078 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
e075ae69
RH
4079}
4080
4081/* Return 1 if this is a valid binary floating-point operation.
0f290768 4082 OP is the expression matched, and MODE is its mode. */
e075ae69
RH
4083
4084int
4085binary_fp_operator (op, mode)
4086 register rtx op;
4087 enum machine_mode mode;
4088{
4089 if (mode != VOIDmode && mode != GET_MODE (op))
4090 return 0;
4091
2a2ab3f9
JVA
4092 switch (GET_CODE (op))
4093 {
e075ae69
RH
4094 case PLUS:
4095 case MINUS:
4096 case MULT:
4097 case DIV:
4098 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
e9a25f70 4099
2a2ab3f9
JVA
4100 default:
4101 return 0;
4102 }
4103}
fee2770d 4104
e075ae69 4105int
b531087a 4106mult_operator (op, mode)
e075ae69
RH
4107 register rtx op;
4108 enum machine_mode mode ATTRIBUTE_UNUSED;
4109{
4110 return GET_CODE (op) == MULT;
4111}
4112
4113int
b531087a 4114div_operator (op, mode)
e075ae69
RH
4115 register rtx op;
4116 enum machine_mode mode ATTRIBUTE_UNUSED;
4117{
4118 return GET_CODE (op) == DIV;
4119}
0a726ef1
JL
4120
4121int
e075ae69
RH
4122arith_or_logical_operator (op, mode)
4123 rtx op;
4124 enum machine_mode mode;
0a726ef1 4125{
e075ae69
RH
4126 return ((mode == VOIDmode || GET_MODE (op) == mode)
4127 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
4128 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
0a726ef1
JL
4129}
4130
e075ae69 4131/* Returns 1 if OP is memory operand with a displacement. */
fee2770d
RS
4132
4133int
e075ae69
RH
4134memory_displacement_operand (op, mode)
4135 register rtx op;
4136 enum machine_mode mode;
4f2c8ebb 4137{
e075ae69 4138 struct ix86_address parts;
e9a25f70 4139
e075ae69
RH
4140 if (! memory_operand (op, mode))
4141 return 0;
4142
4143 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4144 abort ();
4145
4146 return parts.disp != NULL_RTX;
4f2c8ebb
RS
4147}
4148
16189740 4149/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
e075ae69
RH
4150 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4151
4152 ??? It seems likely that this will only work because cmpsi is an
4153 expander, and no actual insns use this. */
4f2c8ebb
RS
4154
4155int
e075ae69
RH
4156cmpsi_operand (op, mode)
4157 rtx op;
4158 enum machine_mode mode;
fee2770d 4159{
b9b2c339 4160 if (nonimmediate_operand (op, mode))
e075ae69
RH
4161 return 1;
4162
4163 if (GET_CODE (op) == AND
4164 && GET_MODE (op) == SImode
4165 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4166 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4167 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4168 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4169 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4170 && GET_CODE (XEXP (op, 1)) == CONST_INT)
fee2770d 4171 return 1;
e9a25f70 4172
fee2770d
RS
4173 return 0;
4174}
d784886d 4175
e075ae69
RH
4176/* Returns 1 if OP is memory operand that can not be represented by the
4177 modRM array. */
d784886d
RK
4178
4179int
e075ae69 4180long_memory_operand (op, mode)
d784886d
RK
4181 register rtx op;
4182 enum machine_mode mode;
4183{
e075ae69 4184 if (! memory_operand (op, mode))
d784886d
RK
4185 return 0;
4186
e075ae69 4187 return memory_address_length (op) != 0;
d784886d 4188}
2247f6ed
JH
4189
4190/* Return nonzero if the rtx is known aligned. */
4191
4192int
4193aligned_operand (op, mode)
4194 rtx op;
4195 enum machine_mode mode;
4196{
4197 struct ix86_address parts;
4198
4199 if (!general_operand (op, mode))
4200 return 0;
4201
0f290768 4202 /* Registers and immediate operands are always "aligned". */
2247f6ed
JH
4203 if (GET_CODE (op) != MEM)
4204 return 1;
4205
0f290768 4206 /* Don't even try to do any aligned optimizations with volatiles. */
2247f6ed
JH
4207 if (MEM_VOLATILE_P (op))
4208 return 0;
4209
4210 op = XEXP (op, 0);
4211
4212 /* Pushes and pops are only valid on the stack pointer. */
4213 if (GET_CODE (op) == PRE_DEC
4214 || GET_CODE (op) == POST_INC)
4215 return 1;
4216
4217 /* Decode the address. */
4218 if (! ix86_decompose_address (op, &parts))
4219 abort ();
4220
1540f9eb
JH
4221 if (parts.base && GET_CODE (parts.base) == SUBREG)
4222 parts.base = SUBREG_REG (parts.base);
4223 if (parts.index && GET_CODE (parts.index) == SUBREG)
4224 parts.index = SUBREG_REG (parts.index);
4225
2247f6ed
JH
4226 /* Look for some component that isn't known to be aligned. */
4227 if (parts.index)
4228 {
4229 if (parts.scale < 4
bdb429a5 4230 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
2247f6ed
JH
4231 return 0;
4232 }
4233 if (parts.base)
4234 {
bdb429a5 4235 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
2247f6ed
JH
4236 return 0;
4237 }
4238 if (parts.disp)
4239 {
4240 if (GET_CODE (parts.disp) != CONST_INT
4241 || (INTVAL (parts.disp) & 3) != 0)
4242 return 0;
4243 }
4244
4245 /* Didn't find one -- this must be an aligned address. */
4246 return 1;
4247}
e075ae69 4248\f
881b2a96
RS
4249/* Initialize the table of extra 80387 mathematical constants. */
4250
4251static void
4252init_ext_80387_constants ()
4253{
4254 static const char * cst[5] =
4255 {
4256 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4257 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4258 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4259 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4260 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4261 };
4262 int i;
4263
4264 for (i = 0; i < 5; i++)
4265 {
4266 real_from_string (&ext_80387_constants_table[i], cst[i]);
4267 /* Ensure each constant is rounded to XFmode precision. */
4268 real_convert (&ext_80387_constants_table[i], XFmode,
4269 &ext_80387_constants_table[i]);
4270 }
4271
4272 ext_80387_constants_init = 1;
4273}
4274
e075ae69 4275/* Return true if the constant is something that can be loaded with
881b2a96 4276 a special instruction. */
57dbca5e
BS
4277
4278int
e075ae69
RH
4279standard_80387_constant_p (x)
4280 rtx x;
57dbca5e 4281{
2b04e52b 4282 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
e075ae69 4283 return -1;
881b2a96 4284
2b04e52b
JH
4285 if (x == CONST0_RTX (GET_MODE (x)))
4286 return 1;
4287 if (x == CONST1_RTX (GET_MODE (x)))
4288 return 2;
881b2a96
RS
4289
4290 /* For XFmode constants, try to find a special 80387 instruction on
4291 those CPUs that benefit from them. */
4292 if (GET_MODE (x) == XFmode
9e555526 4293 && x86_ext_80387_constants & TUNEMASK)
881b2a96
RS
4294 {
4295 REAL_VALUE_TYPE r;
4296 int i;
4297
4298 if (! ext_80387_constants_init)
4299 init_ext_80387_constants ();
4300
4301 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4302 for (i = 0; i < 5; i++)
4303 if (real_identical (&r, &ext_80387_constants_table[i]))
4304 return i + 3;
4305 }
4306
e075ae69 4307 return 0;
57dbca5e
BS
4308}
4309
881b2a96
RS
4310/* Return the opcode of the special instruction to be used to load
4311 the constant X. */
4312
4313const char *
4314standard_80387_constant_opcode (x)
4315 rtx x;
4316{
4317 switch (standard_80387_constant_p (x))
4318 {
4319 case 1:
4320 return "fldz";
4321 case 2:
4322 return "fld1";
4323 case 3:
4324 return "fldlg2";
4325 case 4:
4326 return "fldln2";
4327 case 5:
4328 return "fldl2e";
4329 case 6:
4330 return "fldl2t";
4331 case 7:
4332 return "fldpi";
4333 }
4334 abort ();
4335}
4336
4337/* Return the CONST_DOUBLE representing the 80387 constant that is
4338 loaded by the specified special instruction. The argument IDX
4339 matches the return value from standard_80387_constant_p. */
4340
4341rtx
4342standard_80387_constant_rtx (idx)
4343 int idx;
4344{
4345 int i;
4346
4347 if (! ext_80387_constants_init)
4348 init_ext_80387_constants ();
4349
4350 switch (idx)
4351 {
4352 case 3:
4353 case 4:
4354 case 5:
4355 case 6:
4356 case 7:
4357 i = idx - 3;
4358 break;
4359
4360 default:
4361 abort ();
4362 }
4363
4364 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i], XFmode);
4365}
4366
2b04e52b
JH
4367/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4368 */
4369int
4370standard_sse_constant_p (x)
4371 rtx x;
4372{
0e67d460
JH
4373 if (x == const0_rtx)
4374 return 1;
2b04e52b
JH
4375 return (x == CONST0_RTX (GET_MODE (x)));
4376}
4377
2a2ab3f9
JVA
4378/* Returns 1 if OP contains a symbol reference */
4379
4380int
4381symbolic_reference_mentioned_p (op)
4382 rtx op;
4383{
6f7d635c 4384 register const char *fmt;
2a2ab3f9
JVA
4385 register int i;
4386
4387 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4388 return 1;
4389
4390 fmt = GET_RTX_FORMAT (GET_CODE (op));
4391 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4392 {
4393 if (fmt[i] == 'E')
4394 {
4395 register int j;
4396
4397 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4398 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4399 return 1;
4400 }
e9a25f70 4401
2a2ab3f9
JVA
4402 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4403 return 1;
4404 }
4405
4406 return 0;
4407}
e075ae69
RH
4408
4409/* Return 1 if it is appropriate to emit `ret' instructions in the
4410 body of a function. Do this only if the epilogue is simple, needing a
4411 couple of insns. Prior to reloading, we can't tell how many registers
4412 must be saved, so return 0 then. Return 0 if there is no frame
4413 marker to de-allocate.
4414
4415 If NON_SAVING_SETJMP is defined and true, then it is not possible
4416 for the epilogue to be simple, so return 0. This is a special case
4417 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4418 until final, but jump_optimize may need to know sooner if a
4419 `return' is OK. */
32b5b1aa
SC
4420
4421int
e075ae69 4422ix86_can_use_return_insn_p ()
32b5b1aa 4423{
4dd2ac2c 4424 struct ix86_frame frame;
9a7372d6 4425
e075ae69
RH
4426#ifdef NON_SAVING_SETJMP
4427 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4428 return 0;
4429#endif
9a7372d6
RH
4430
4431 if (! reload_completed || frame_pointer_needed)
4432 return 0;
32b5b1aa 4433
9a7372d6
RH
4434 /* Don't allow more than 32 pop, since that's all we can do
4435 with one instruction. */
4436 if (current_function_pops_args
4437 && current_function_args_size >= 32768)
e075ae69 4438 return 0;
32b5b1aa 4439
4dd2ac2c
JH
4440 ix86_compute_frame_layout (&frame);
4441 return frame.to_allocate == 0 && frame.nregs == 0;
e075ae69 4442}
6189a572
JH
4443\f
4444/* Return 1 if VALUE can be stored in the sign extended immediate field. */
4445int
c05dbe81 4446x86_64_sign_extended_value (value)
6189a572
JH
4447 rtx value;
4448{
4449 switch (GET_CODE (value))
4450 {
4451 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4452 to be at least 32 and this all acceptable constants are
4453 represented as CONST_INT. */
4454 case CONST_INT:
4455 if (HOST_BITS_PER_WIDE_INT == 32)
4456 return 1;
4457 else
4458 {
4459 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
fa9f36a1 4460 return trunc_int_for_mode (val, SImode) == val;
6189a572
JH
4461 }
4462 break;
4463
75d38379
JJ
4464 /* For certain code models, the symbolic references are known to fit.
4465 in CM_SMALL_PIC model we know it fits if it is local to the shared
4466 library. Don't count TLS SYMBOL_REFs here, since they should fit
4467 only if inside of UNSPEC handled below. */
6189a572 4468 case SYMBOL_REF:
c05dbe81 4469 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
6189a572
JH
4470
4471 /* For certain code models, the code is near as well. */
4472 case LABEL_REF:
c05dbe81
JH
4473 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4474 || ix86_cmodel == CM_KERNEL);
6189a572
JH
4475
4476 /* We also may accept the offsetted memory references in certain special
4477 cases. */
4478 case CONST:
75d38379
JJ
4479 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4480 switch (XINT (XEXP (value, 0), 1))
4481 {
4482 case UNSPEC_GOTPCREL:
4483 case UNSPEC_DTPOFF:
4484 case UNSPEC_GOTNTPOFF:
4485 case UNSPEC_NTPOFF:
4486 return 1;
4487 default:
4488 break;
4489 }
4490 if (GET_CODE (XEXP (value, 0)) == PLUS)
6189a572
JH
4491 {
4492 rtx op1 = XEXP (XEXP (value, 0), 0);
4493 rtx op2 = XEXP (XEXP (value, 0), 1);
4494 HOST_WIDE_INT offset;
4495
4496 if (ix86_cmodel == CM_LARGE)
4497 return 0;
4498 if (GET_CODE (op2) != CONST_INT)
4499 return 0;
4500 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4501 switch (GET_CODE (op1))
4502 {
4503 case SYMBOL_REF:
75d38379 4504 /* For CM_SMALL assume that latest object is 16MB before
6189a572
JH
4505 end of 31bits boundary. We may also accept pretty
4506 large negative constants knowing that all objects are
4507 in the positive half of address space. */
4508 if (ix86_cmodel == CM_SMALL
75d38379 4509 && offset < 16*1024*1024
6189a572
JH
4510 && trunc_int_for_mode (offset, SImode) == offset)
4511 return 1;
4512 /* For CM_KERNEL we know that all object resist in the
4513 negative half of 32bits address space. We may not
4514 accept negative offsets, since they may be just off
d6a7951f 4515 and we may accept pretty large positive ones. */
6189a572
JH
4516 if (ix86_cmodel == CM_KERNEL
4517 && offset > 0
4518 && trunc_int_for_mode (offset, SImode) == offset)
4519 return 1;
4520 break;
4521 case LABEL_REF:
4522 /* These conditions are similar to SYMBOL_REF ones, just the
4523 constraints for code models differ. */
c05dbe81 4524 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
75d38379 4525 && offset < 16*1024*1024
6189a572
JH
4526 && trunc_int_for_mode (offset, SImode) == offset)
4527 return 1;
4528 if (ix86_cmodel == CM_KERNEL
4529 && offset > 0
4530 && trunc_int_for_mode (offset, SImode) == offset)
4531 return 1;
4532 break;
75d38379
JJ
4533 case UNSPEC:
4534 switch (XINT (op1, 1))
4535 {
4536 case UNSPEC_DTPOFF:
4537 case UNSPEC_NTPOFF:
4538 if (offset > 0
4539 && trunc_int_for_mode (offset, SImode) == offset)
4540 return 1;
4541 }
4542 break;
6189a572
JH
4543 default:
4544 return 0;
4545 }
4546 }
4547 return 0;
4548 default:
4549 return 0;
4550 }
4551}
4552
4553/* Return 1 if VALUE can be stored in the zero extended immediate field. */
4554int
4555x86_64_zero_extended_value (value)
4556 rtx value;
4557{
4558 switch (GET_CODE (value))
4559 {
4560 case CONST_DOUBLE:
4561 if (HOST_BITS_PER_WIDE_INT == 32)
4562 return (GET_MODE (value) == VOIDmode
4563 && !CONST_DOUBLE_HIGH (value));
4564 else
4565 return 0;
4566 case CONST_INT:
4567 if (HOST_BITS_PER_WIDE_INT == 32)
4568 return INTVAL (value) >= 0;
4569 else
b531087a 4570 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
6189a572
JH
4571 break;
4572
4573 /* For certain code models, the symbolic references are known to fit. */
4574 case SYMBOL_REF:
4575 return ix86_cmodel == CM_SMALL;
4576
4577 /* For certain code models, the code is near as well. */
4578 case LABEL_REF:
4579 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4580
4581 /* We also may accept the offsetted memory references in certain special
4582 cases. */
4583 case CONST:
4584 if (GET_CODE (XEXP (value, 0)) == PLUS)
4585 {
4586 rtx op1 = XEXP (XEXP (value, 0), 0);
4587 rtx op2 = XEXP (XEXP (value, 0), 1);
4588
4589 if (ix86_cmodel == CM_LARGE)
4590 return 0;
4591 switch (GET_CODE (op1))
4592 {
4593 case SYMBOL_REF:
4594 return 0;
d6a7951f 4595 /* For small code model we may accept pretty large positive
6189a572
JH
4596 offsets, since one bit is available for free. Negative
4597 offsets are limited by the size of NULL pointer area
4598 specified by the ABI. */
4599 if (ix86_cmodel == CM_SMALL
4600 && GET_CODE (op2) == CONST_INT
4601 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4602 && (trunc_int_for_mode (INTVAL (op2), SImode)
4603 == INTVAL (op2)))
4604 return 1;
4605 /* ??? For the kernel, we may accept adjustment of
4606 -0x10000000, since we know that it will just convert
d6a7951f 4607 negative address space to positive, but perhaps this
6189a572
JH
4608 is not worthwhile. */
4609 break;
4610 case LABEL_REF:
4611 /* These conditions are similar to SYMBOL_REF ones, just the
4612 constraints for code models differ. */
4613 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4614 && GET_CODE (op2) == CONST_INT
4615 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4616 && (trunc_int_for_mode (INTVAL (op2), SImode)
4617 == INTVAL (op2)))
4618 return 1;
4619 break;
4620 default:
4621 return 0;
4622 }
4623 }
4624 return 0;
4625 default:
4626 return 0;
4627 }
4628}
6fca22eb
RH
4629
4630/* Value should be nonzero if functions must have frame pointers.
4631 Zero means the frame pointer need not be set up (and parms may
4632 be accessed via the stack pointer) in functions that seem suitable. */
4633
4634int
4635ix86_frame_pointer_required ()
4636{
4637 /* If we accessed previous frames, then the generated code expects
4638 to be able to access the saved ebp value in our frame. */
4639 if (cfun->machine->accesses_prev_frame)
4640 return 1;
a4f31c00 4641
6fca22eb
RH
4642 /* Several x86 os'es need a frame pointer for other reasons,
4643 usually pertaining to setjmp. */
4644 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4645 return 1;
4646
4647 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4648 the frame pointer by default. Turn it back on now if we've not
4649 got a leaf function. */
a7943381 4650 if (TARGET_OMIT_LEAF_FRAME_POINTER
55ba61f3
JH
4651 && (!current_function_is_leaf))
4652 return 1;
4653
4654 if (current_function_profile)
6fca22eb
RH
4655 return 1;
4656
4657 return 0;
4658}
4659
4660/* Record that the current function accesses previous call frames. */
4661
4662void
4663ix86_setup_frame_addresses ()
4664{
4665 cfun->machine->accesses_prev_frame = 1;
4666}
e075ae69 4667\f
145aacc2
RH
4668#if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4669# define USE_HIDDEN_LINKONCE 1
4670#else
4671# define USE_HIDDEN_LINKONCE 0
4672#endif
4673
bd09bdeb 4674static int pic_labels_used;
e9a25f70 4675
145aacc2
RH
4676/* Fills in the label name that should be used for a pc thunk for
4677 the given register. */
4678
4679static void
4680get_pc_thunk_name (name, regno)
4681 char name[32];
4682 unsigned int regno;
4683{
4684 if (USE_HIDDEN_LINKONCE)
4685 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4686 else
4687 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4688}
4689
4690
e075ae69
RH
4691/* This function generates code for -fpic that loads %ebx with
4692 the return address of the caller and then returns. */
4693
4694void
4cf12e7e 4695ix86_asm_file_end (file)
e075ae69 4696 FILE *file;
e075ae69
RH
4697{
4698 rtx xops[2];
bd09bdeb 4699 int regno;
32b5b1aa 4700
bd09bdeb 4701 for (regno = 0; regno < 8; ++regno)
7c262518 4702 {
145aacc2
RH
4703 char name[32];
4704
bd09bdeb
RH
4705 if (! ((pic_labels_used >> regno) & 1))
4706 continue;
4707
145aacc2 4708 get_pc_thunk_name (name, regno);
bd09bdeb 4709
145aacc2
RH
4710 if (USE_HIDDEN_LINKONCE)
4711 {
4712 tree decl;
4713
4714 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4715 error_mark_node);
4716 TREE_PUBLIC (decl) = 1;
4717 TREE_STATIC (decl) = 1;
4718 DECL_ONE_ONLY (decl) = 1;
4719
4720 (*targetm.asm_out.unique_section) (decl, 0);
4721 named_section (decl, NULL, 0);
4722
5eb99654 4723 (*targetm.asm_out.globalize_label) (file, name);
145aacc2
RH
4724 fputs ("\t.hidden\t", file);
4725 assemble_name (file, name);
4726 fputc ('\n', file);
4727 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
4728 }
4729 else
4730 {
4731 text_section ();
4732 ASM_OUTPUT_LABEL (file, name);
4733 }
bd09bdeb
RH
4734
4735 xops[0] = gen_rtx_REG (SImode, regno);
4736 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4737 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4738 output_asm_insn ("ret", xops);
7c262518 4739 }
32b5b1aa 4740}
32b5b1aa 4741
c8c03509 4742/* Emit code for the SET_GOT patterns. */
32b5b1aa 4743
c8c03509
RH
4744const char *
4745output_set_got (dest)
4746 rtx dest;
4747{
4748 rtx xops[3];
0d7d98ee 4749
c8c03509 4750 xops[0] = dest;
5fc0e5df 4751 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
32b5b1aa 4752
c8c03509 4753 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
32b5b1aa 4754 {
c8c03509
RH
4755 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4756
4757 if (!flag_pic)
4758 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4759 else
4760 output_asm_insn ("call\t%a2", xops);
4761
b069de3b
SS
4762#if TARGET_MACHO
4763 /* Output the "canonical" label name ("Lxx$pb") here too. This
4764 is what will be referred to by the Mach-O PIC subsystem. */
4765 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4766#endif
4977bab6 4767 (*targetm.asm_out.internal_label) (asm_out_file, "L",
c8c03509
RH
4768 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4769
4770 if (flag_pic)
4771 output_asm_insn ("pop{l}\t%0", xops);
32b5b1aa 4772 }
e075ae69 4773 else
e5cb57e8 4774 {
145aacc2
RH
4775 char name[32];
4776 get_pc_thunk_name (name, REGNO (dest));
bd09bdeb 4777 pic_labels_used |= 1 << REGNO (dest);
f996902d 4778
145aacc2 4779 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
c8c03509
RH
4780 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4781 output_asm_insn ("call\t%X2", xops);
e5cb57e8 4782 }
e5cb57e8 4783
c8c03509
RH
4784 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4785 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
b069de3b 4786 else if (!TARGET_MACHO)
8e9fadc3 4787 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
79325812 4788
c8c03509 4789 return "";
e9a25f70 4790}
8dfe5673 4791
0d7d98ee 4792/* Generate an "push" pattern for input ARG. */
e9a25f70 4793
e075ae69
RH
4794static rtx
4795gen_push (arg)
4796 rtx arg;
e9a25f70 4797{
c5c76735 4798 return gen_rtx_SET (VOIDmode,
0d7d98ee
JH
4799 gen_rtx_MEM (Pmode,
4800 gen_rtx_PRE_DEC (Pmode,
c5c76735
JL
4801 stack_pointer_rtx)),
4802 arg);
e9a25f70
JL
4803}
4804
bd09bdeb
RH
4805/* Return >= 0 if there is an unused call-clobbered register available
4806 for the entire function. */
4807
4808static unsigned int
4809ix86_select_alt_pic_regnum ()
4810{
4811 if (current_function_is_leaf && !current_function_profile)
4812 {
4813 int i;
4814 for (i = 2; i >= 0; --i)
4815 if (!regs_ever_live[i])
4816 return i;
4817 }
4818
4819 return INVALID_REGNUM;
4820}
fce5a9f2 4821
4dd2ac2c
JH
4822/* Return 1 if we need to save REGNO. */
4823static int
1020a5ab 4824ix86_save_reg (regno, maybe_eh_return)
9b690711 4825 unsigned int regno;
37a58036 4826 int maybe_eh_return;
1020a5ab 4827{
bd09bdeb
RH
4828 if (pic_offset_table_rtx
4829 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4830 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
66edd3b4 4831 || current_function_profile
8c38a24f
MM
4832 || current_function_calls_eh_return
4833 || current_function_uses_const_pool))
bd09bdeb
RH
4834 {
4835 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4836 return 0;
4837 return 1;
4838 }
1020a5ab
RH
4839
4840 if (current_function_calls_eh_return && maybe_eh_return)
4841 {
4842 unsigned i;
4843 for (i = 0; ; i++)
4844 {
b531087a 4845 unsigned test = EH_RETURN_DATA_REGNO (i);
1020a5ab
RH
4846 if (test == INVALID_REGNUM)
4847 break;
9b690711 4848 if (test == regno)
1020a5ab
RH
4849 return 1;
4850 }
4851 }
4dd2ac2c 4852
1020a5ab
RH
4853 return (regs_ever_live[regno]
4854 && !call_used_regs[regno]
4855 && !fixed_regs[regno]
4856 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4dd2ac2c
JH
4857}
4858
0903fcab
JH
4859/* Return number of registers to be saved on the stack. */
4860
4861static int
4862ix86_nsaved_regs ()
4863{
4864 int nregs = 0;
0903fcab
JH
4865 int regno;
4866
4dd2ac2c 4867 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 4868 if (ix86_save_reg (regno, true))
4dd2ac2c 4869 nregs++;
0903fcab
JH
4870 return nregs;
4871}
4872
4873/* Return the offset between two registers, one to be eliminated, and the other
4874 its replacement, at the start of a routine. */
4875
4876HOST_WIDE_INT
4877ix86_initial_elimination_offset (from, to)
4878 int from;
4879 int to;
4880{
4dd2ac2c
JH
4881 struct ix86_frame frame;
4882 ix86_compute_frame_layout (&frame);
564d80f4
JH
4883
4884 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 4885 return frame.hard_frame_pointer_offset;
564d80f4
JH
4886 else if (from == FRAME_POINTER_REGNUM
4887 && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 4888 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
4889 else
4890 {
564d80f4
JH
4891 if (to != STACK_POINTER_REGNUM)
4892 abort ();
4893 else if (from == ARG_POINTER_REGNUM)
4dd2ac2c 4894 return frame.stack_pointer_offset;
564d80f4
JH
4895 else if (from != FRAME_POINTER_REGNUM)
4896 abort ();
0903fcab 4897 else
4dd2ac2c 4898 return frame.stack_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
4899 }
4900}
4901
4dd2ac2c 4902/* Fill structure ix86_frame about frame of currently computed function. */
0f290768 4903
4dd2ac2c
JH
4904static void
4905ix86_compute_frame_layout (frame)
4906 struct ix86_frame *frame;
65954bd8 4907{
65954bd8 4908 HOST_WIDE_INT total_size;
564d80f4 4909 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
44affdae
JH
4910 int offset;
4911 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4dd2ac2c 4912 HOST_WIDE_INT size = get_frame_size ();
65954bd8 4913
4dd2ac2c 4914 frame->nregs = ix86_nsaved_regs ();
564d80f4 4915 total_size = size;
65954bd8 4916
9ba81eaa 4917 /* Skip return address and saved base pointer. */
4dd2ac2c
JH
4918 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4919
4920 frame->hard_frame_pointer_offset = offset;
564d80f4 4921
fcbfaa65
RK
4922 /* Do some sanity checking of stack_alignment_needed and
4923 preferred_alignment, since i386 port is the only using those features
f710504c 4924 that may break easily. */
564d80f4 4925
44affdae
JH
4926 if (size && !stack_alignment_needed)
4927 abort ();
44affdae
JH
4928 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4929 abort ();
4930 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4931 abort ();
4932 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4933 abort ();
564d80f4 4934
4dd2ac2c
JH
4935 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4936 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
564d80f4 4937
4dd2ac2c
JH
4938 /* Register save area */
4939 offset += frame->nregs * UNITS_PER_WORD;
65954bd8 4940
8362f420
JH
4941 /* Va-arg area */
4942 if (ix86_save_varrargs_registers)
4943 {
4944 offset += X86_64_VARARGS_SIZE;
4945 frame->va_arg_size = X86_64_VARARGS_SIZE;
4946 }
4947 else
4948 frame->va_arg_size = 0;
4949
4dd2ac2c
JH
4950 /* Align start of frame for local function. */
4951 frame->padding1 = ((offset + stack_alignment_needed - 1)
4952 & -stack_alignment_needed) - offset;
f73ad30e 4953
4dd2ac2c 4954 offset += frame->padding1;
65954bd8 4955
4dd2ac2c
JH
4956 /* Frame pointer points here. */
4957 frame->frame_pointer_offset = offset;
54ff41b7 4958
4dd2ac2c 4959 offset += size;
65954bd8 4960
0b7ae565
RH
4961 /* Add outgoing arguments area. Can be skipped if we eliminated
4962 all the function calls as dead code. */
4963 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4dd2ac2c
JH
4964 {
4965 offset += current_function_outgoing_args_size;
4966 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4967 }
4968 else
4969 frame->outgoing_arguments_size = 0;
564d80f4 4970
002ff5bc
RH
4971 /* Align stack boundary. Only needed if we're calling another function
4972 or using alloca. */
4973 if (!current_function_is_leaf || current_function_calls_alloca)
0b7ae565
RH
4974 frame->padding2 = ((offset + preferred_alignment - 1)
4975 & -preferred_alignment) - offset;
4976 else
4977 frame->padding2 = 0;
4dd2ac2c
JH
4978
4979 offset += frame->padding2;
4980
4981 /* We've reached end of stack frame. */
4982 frame->stack_pointer_offset = offset;
4983
4984 /* Size prologue needs to allocate. */
4985 frame->to_allocate =
4986 (size + frame->padding1 + frame->padding2
8362f420 4987 + frame->outgoing_arguments_size + frame->va_arg_size);
4dd2ac2c 4988
8362f420
JH
4989 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4990 && current_function_is_leaf)
4991 {
4992 frame->red_zone_size = frame->to_allocate;
4993 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4994 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4995 }
4996 else
4997 frame->red_zone_size = 0;
4998 frame->to_allocate -= frame->red_zone_size;
4999 frame->stack_pointer_offset -= frame->red_zone_size;
4dd2ac2c
JH
5000#if 0
5001 fprintf (stderr, "nregs: %i\n", frame->nregs);
5002 fprintf (stderr, "size: %i\n", size);
5003 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5004 fprintf (stderr, "padding1: %i\n", frame->padding1);
8362f420 5005 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4dd2ac2c
JH
5006 fprintf (stderr, "padding2: %i\n", frame->padding2);
5007 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
8362f420 5008 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4dd2ac2c
JH
5009 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5010 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5011 frame->hard_frame_pointer_offset);
5012 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5013#endif
65954bd8
JL
5014}
5015
0903fcab
JH
5016/* Emit code to save registers in the prologue. */
5017
5018static void
5019ix86_emit_save_regs ()
5020{
5021 register int regno;
0903fcab 5022 rtx insn;
0903fcab 5023
4dd2ac2c 5024 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 5025 if (ix86_save_reg (regno, true))
0903fcab 5026 {
0d7d98ee 5027 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
0903fcab
JH
5028 RTX_FRAME_RELATED_P (insn) = 1;
5029 }
5030}
5031
c6036a37
JH
5032/* Emit code to save registers using MOV insns. First register
5033 is restored from POINTER + OFFSET. */
5034static void
5035ix86_emit_save_regs_using_mov (pointer, offset)
b72f00af
RK
5036 rtx pointer;
5037 HOST_WIDE_INT offset;
c6036a37
JH
5038{
5039 int regno;
5040 rtx insn;
5041
5042 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5043 if (ix86_save_reg (regno, true))
5044 {
b72f00af
RK
5045 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5046 Pmode, offset),
c6036a37
JH
5047 gen_rtx_REG (Pmode, regno));
5048 RTX_FRAME_RELATED_P (insn) = 1;
5049 offset += UNITS_PER_WORD;
5050 }
5051}
5052
0f290768 5053/* Expand the prologue into a bunch of separate insns. */
e075ae69
RH
5054
5055void
5056ix86_expand_prologue ()
2a2ab3f9 5057{
564d80f4 5058 rtx insn;
bd09bdeb 5059 bool pic_reg_used;
4dd2ac2c 5060 struct ix86_frame frame;
6ab16dd9 5061 int use_mov = 0;
c6036a37 5062 HOST_WIDE_INT allocate;
4dd2ac2c 5063
4977bab6 5064 ix86_compute_frame_layout (&frame);
2ab0437e 5065 if (!optimize_size)
6ab16dd9 5066 {
4977bab6
ZW
5067 int count = frame.nregs;
5068
5069 /* The fast prologue uses move instead of push to save registers. This
5070 is significantly longer, but also executes faster as modern hardware
5071 can execute the moves in parallel, but can't do that for push/pop.
5072
d1f87653 5073 Be careful about choosing what prologue to emit: When function takes
4977bab6
ZW
5074 many instructions to execute we may use slow version as well as in
5075 case function is known to be outside hot spot (this is known with
5076 feedback only). Weight the size of function by number of registers
5077 to save as it is cheap to use one or two push instructions but very
5078 slow to use many of them. */
5079 if (count)
5080 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5081 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5082 || (flag_branch_probabilities
5083 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5084 use_fast_prologue_epilogue = 0;
5085 else
5086 use_fast_prologue_epilogue = !expensive_function_p (count);
2ab0437e
JH
5087 if (TARGET_PROLOGUE_USING_MOVE)
5088 use_mov = use_fast_prologue_epilogue;
6ab16dd9 5089 }
79325812 5090
e075ae69
RH
5091 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5092 slower on all targets. Also sdb doesn't like it. */
e9a25f70 5093
2a2ab3f9
JVA
5094 if (frame_pointer_needed)
5095 {
564d80f4 5096 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
e075ae69 5097 RTX_FRAME_RELATED_P (insn) = 1;
e9a25f70 5098
564d80f4 5099 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
e075ae69 5100 RTX_FRAME_RELATED_P (insn) = 1;
2a2ab3f9
JVA
5101 }
5102
c6036a37
JH
5103 allocate = frame.to_allocate;
5104 /* In case we are dealing only with single register and empty frame,
5105 push is equivalent of the mov+add sequence. */
5106 if (allocate == 0 && frame.nregs <= 1)
5107 use_mov = 0;
5108
5109 if (!use_mov)
5110 ix86_emit_save_regs ();
5111 else
5112 allocate += frame.nregs * UNITS_PER_WORD;
564d80f4 5113
c6036a37 5114 if (allocate == 0)
8dfe5673 5115 ;
e323735c 5116 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
469ac993 5117 {
f2042df3
RH
5118 insn = emit_insn (gen_pro_epilogue_adjust_stack
5119 (stack_pointer_rtx, stack_pointer_rtx,
e323735c 5120 GEN_INT (-allocate)));
e075ae69 5121 RTX_FRAME_RELATED_P (insn) = 1;
469ac993 5122 }
79325812 5123 else
8dfe5673 5124 {
e075ae69 5125 /* ??? Is this only valid for Win32? */
e9a25f70 5126
e075ae69 5127 rtx arg0, sym;
e9a25f70 5128
8362f420 5129 if (TARGET_64BIT)
b531087a 5130 abort ();
8362f420 5131
e075ae69 5132 arg0 = gen_rtx_REG (SImode, 0);
c6036a37 5133 emit_move_insn (arg0, GEN_INT (allocate));
77a989d1 5134
e075ae69
RH
5135 sym = gen_rtx_MEM (FUNCTION_MODE,
5136 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
32ee7d1d 5137 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
e075ae69
RH
5138
5139 CALL_INSN_FUNCTION_USAGE (insn)
276ab4a4
RH
5140 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
5141 CALL_INSN_FUNCTION_USAGE (insn));
98417968
DS
5142
5143 /* Don't allow scheduling pass to move insns across __alloca
5144 call. */
5145 emit_insn (gen_blockage (const0_rtx));
e075ae69 5146 }
c6036a37
JH
5147 if (use_mov)
5148 {
5149 if (!frame_pointer_needed || !frame.to_allocate)
5150 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5151 else
5152 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5153 -frame.nregs * UNITS_PER_WORD);
5154 }
e9a25f70 5155
84530511
SC
5156#ifdef SUBTARGET_PROLOGUE
5157 SUBTARGET_PROLOGUE;
0f290768 5158#endif
84530511 5159
bd09bdeb
RH
5160 pic_reg_used = false;
5161 if (pic_offset_table_rtx
5162 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5163 || current_function_profile))
5164 {
5165 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5166
5167 if (alt_pic_reg_used != INVALID_REGNUM)
5168 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5169
5170 pic_reg_used = true;
5171 }
5172
e9a25f70 5173 if (pic_reg_used)
c8c03509
RH
5174 {
5175 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5176
66edd3b4
RH
5177 /* Even with accurate pre-reload life analysis, we can wind up
5178 deleting all references to the pic register after reload.
5179 Consider if cross-jumping unifies two sides of a branch
d1f87653 5180 controlled by a comparison vs the only read from a global.
66edd3b4
RH
5181 In which case, allow the set_got to be deleted, though we're
5182 too late to do anything about the ebx save in the prologue. */
c8c03509
RH
5183 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5184 }
77a989d1 5185
66edd3b4
RH
5186 /* Prevent function calls from be scheduled before the call to mcount.
5187 In the pic_reg_used case, make sure that the got load isn't deleted. */
5188 if (current_function_profile)
5189 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
77a989d1
SC
5190}
5191
da2d1d3a
JH
5192/* Emit code to restore saved registers using MOV insns. First register
5193 is restored from POINTER + OFFSET. */
5194static void
1020a5ab
RH
5195ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
5196 rtx pointer;
5197 int offset;
37a58036 5198 int maybe_eh_return;
da2d1d3a
JH
5199{
5200 int regno;
da2d1d3a 5201
4dd2ac2c 5202 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 5203 if (ix86_save_reg (regno, maybe_eh_return))
da2d1d3a 5204 {
4dd2ac2c 5205 emit_move_insn (gen_rtx_REG (Pmode, regno),
b72f00af
RK
5206 adjust_address (gen_rtx_MEM (Pmode, pointer),
5207 Pmode, offset));
4dd2ac2c 5208 offset += UNITS_PER_WORD;
da2d1d3a
JH
5209 }
5210}
5211
0f290768 5212/* Restore function stack, frame, and registers. */
e9a25f70 5213
2a2ab3f9 5214void
1020a5ab
RH
5215ix86_expand_epilogue (style)
5216 int style;
2a2ab3f9 5217{
1c71e60e 5218 int regno;
fdb8a883 5219 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4dd2ac2c 5220 struct ix86_frame frame;
65954bd8 5221 HOST_WIDE_INT offset;
4dd2ac2c
JH
5222
5223 ix86_compute_frame_layout (&frame);
2a2ab3f9 5224
a4f31c00 5225 /* Calculate start of saved registers relative to ebp. Special care
84e306b4
RH
5226 must be taken for the normal return case of a function using
5227 eh_return: the eax and edx registers are marked as saved, but not
5228 restored along this path. */
5229 offset = frame.nregs;
5230 if (current_function_calls_eh_return && style != 2)
5231 offset -= 2;
5232 offset *= -UNITS_PER_WORD;
2a2ab3f9 5233
fdb8a883
JW
5234 /* If we're only restoring one register and sp is not valid then
5235 using a move instruction to restore the register since it's
0f290768 5236 less work than reloading sp and popping the register.
da2d1d3a
JH
5237
5238 The default code result in stack adjustment using add/lea instruction,
5239 while this code results in LEAVE instruction (or discrete equivalent),
5240 so it is profitable in some other cases as well. Especially when there
5241 are no registers to restore. We also use this code when TARGET_USE_LEAVE
d1f87653 5242 and there is exactly one register to pop. This heuristic may need some
da2d1d3a 5243 tuning in future. */
4dd2ac2c 5244 if ((!sp_valid && frame.nregs <= 1)
2ab0437e 5245 || (TARGET_EPILOGUE_USING_MOVE
6ab16dd9 5246 && use_fast_prologue_epilogue
c6036a37 5247 && (frame.nregs > 1 || frame.to_allocate))
4dd2ac2c 5248 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
2ab0437e 5249 || (frame_pointer_needed && TARGET_USE_LEAVE
6ab16dd9 5250 && use_fast_prologue_epilogue && frame.nregs == 1)
2ab0437e 5251 || current_function_calls_eh_return)
2a2ab3f9 5252 {
da2d1d3a
JH
5253 /* Restore registers. We can use ebp or esp to address the memory
5254 locations. If both are available, default to ebp, since offsets
5255 are known to be small. Only exception is esp pointing directly to the
5256 end of block of saved registers, where we may simplify addressing
5257 mode. */
5258
4dd2ac2c 5259 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
1020a5ab
RH
5260 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5261 frame.to_allocate, style == 2);
da2d1d3a 5262 else
1020a5ab
RH
5263 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5264 offset, style == 2);
5265
5266 /* eh_return epilogues need %ecx added to the stack pointer. */
5267 if (style == 2)
5268 {
5269 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
2a2ab3f9 5270
1020a5ab
RH
5271 if (frame_pointer_needed)
5272 {
5273 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5274 tmp = plus_constant (tmp, UNITS_PER_WORD);
5275 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5276
5277 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5278 emit_move_insn (hard_frame_pointer_rtx, tmp);
5279
5280 emit_insn (gen_pro_epilogue_adjust_stack
f2042df3 5281 (stack_pointer_rtx, sa, const0_rtx));
1020a5ab
RH
5282 }
5283 else
5284 {
5285 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5286 tmp = plus_constant (tmp, (frame.to_allocate
5287 + frame.nregs * UNITS_PER_WORD));
5288 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5289 }
5290 }
5291 else if (!frame_pointer_needed)
f2042df3
RH
5292 emit_insn (gen_pro_epilogue_adjust_stack
5293 (stack_pointer_rtx, stack_pointer_rtx,
5294 GEN_INT (frame.to_allocate
5295 + frame.nregs * UNITS_PER_WORD)));
0f290768 5296 /* If not an i386, mov & pop is faster than "leave". */
6ab16dd9 5297 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
8362f420 5298 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
c8c5cb99 5299 else
2a2ab3f9 5300 {
1c71e60e
JH
5301 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5302 hard_frame_pointer_rtx,
f2042df3 5303 const0_rtx));
8362f420
JH
5304 if (TARGET_64BIT)
5305 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5306 else
5307 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
e9a25f70
JL
5308 }
5309 }
1c71e60e 5310 else
68f654ec 5311 {
1c71e60e
JH
5312 /* First step is to deallocate the stack frame so that we can
5313 pop the registers. */
5314 if (!sp_valid)
5315 {
5316 if (!frame_pointer_needed)
5317 abort ();
5318 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5319 hard_frame_pointer_rtx,
f2042df3 5320 GEN_INT (offset)));
1c71e60e 5321 }
4dd2ac2c 5322 else if (frame.to_allocate)
f2042df3
RH
5323 emit_insn (gen_pro_epilogue_adjust_stack
5324 (stack_pointer_rtx, stack_pointer_rtx,
5325 GEN_INT (frame.to_allocate)));
1c71e60e 5326
4dd2ac2c 5327 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 5328 if (ix86_save_reg (regno, false))
8362f420
JH
5329 {
5330 if (TARGET_64BIT)
5331 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5332 else
5333 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5334 }
4dd2ac2c 5335 if (frame_pointer_needed)
8362f420 5336 {
f5143c46 5337 /* Leave results in shorter dependency chains on CPUs that are
2ab0437e
JH
5338 able to grok it fast. */
5339 if (TARGET_USE_LEAVE)
5340 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5341 else if (TARGET_64BIT)
8362f420
JH
5342 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5343 else
5344 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5345 }
68f654ec 5346 }
68f654ec 5347
cbbf65e0 5348 /* Sibcall epilogues don't want a return instruction. */
1020a5ab 5349 if (style == 0)
cbbf65e0
RH
5350 return;
5351
2a2ab3f9
JVA
5352 if (current_function_pops_args && current_function_args_size)
5353 {
e075ae69 5354 rtx popc = GEN_INT (current_function_pops_args);
2a2ab3f9 5355
b8c752c8
UD
5356 /* i386 can only pop 64K bytes. If asked to pop more, pop
5357 return address, do explicit add, and jump indirectly to the
0f290768 5358 caller. */
2a2ab3f9 5359
b8c752c8 5360 if (current_function_pops_args >= 65536)
2a2ab3f9 5361 {
e075ae69 5362 rtx ecx = gen_rtx_REG (SImode, 2);
e9a25f70 5363
8362f420
JH
5364 /* There are is no "pascal" calling convention in 64bit ABI. */
5365 if (TARGET_64BIT)
b531087a 5366 abort ();
8362f420 5367
e075ae69
RH
5368 emit_insn (gen_popsi1 (ecx));
5369 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
11837777 5370 emit_jump_insn (gen_return_indirect_internal (ecx));
e9a25f70 5371 }
79325812 5372 else
e075ae69
RH
5373 emit_jump_insn (gen_return_pop_internal (popc));
5374 }
5375 else
5376 emit_jump_insn (gen_return_internal ());
5377}
bd09bdeb
RH
5378
5379/* Reset from the function's potential modifications. */
5380
5381static void
5382ix86_output_function_epilogue (file, size)
5383 FILE *file ATTRIBUTE_UNUSED;
5384 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
5385{
5386 if (pic_offset_table_rtx)
5387 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5388}
e075ae69
RH
5389\f
5390/* Extract the parts of an RTL expression that is a valid memory address
b446e5a2
JH
5391 for an instruction. Return 0 if the structure of the address is
5392 grossly off. Return -1 if the address contains ASHIFT, so it is not
5393 strictly valid, but still used for computing length of lea instruction.
5394 */
e075ae69
RH
5395
5396static int
5397ix86_decompose_address (addr, out)
5398 register rtx addr;
5399 struct ix86_address *out;
5400{
5401 rtx base = NULL_RTX;
5402 rtx index = NULL_RTX;
5403 rtx disp = NULL_RTX;
5404 HOST_WIDE_INT scale = 1;
5405 rtx scale_rtx = NULL_RTX;
b446e5a2 5406 int retval = 1;
e075ae69 5407
1540f9eb 5408 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
e075ae69
RH
5409 base = addr;
5410 else if (GET_CODE (addr) == PLUS)
5411 {
5412 rtx op0 = XEXP (addr, 0);
5413 rtx op1 = XEXP (addr, 1);
5414 enum rtx_code code0 = GET_CODE (op0);
5415 enum rtx_code code1 = GET_CODE (op1);
5416
5417 if (code0 == REG || code0 == SUBREG)
5418 {
5419 if (code1 == REG || code1 == SUBREG)
5420 index = op0, base = op1; /* index + base */
5421 else
5422 base = op0, disp = op1; /* base + displacement */
5423 }
5424 else if (code0 == MULT)
e9a25f70 5425 {
e075ae69
RH
5426 index = XEXP (op0, 0);
5427 scale_rtx = XEXP (op0, 1);
5428 if (code1 == REG || code1 == SUBREG)
5429 base = op1; /* index*scale + base */
e9a25f70 5430 else
e075ae69
RH
5431 disp = op1; /* index*scale + disp */
5432 }
5433 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
5434 {
5435 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
5436 scale_rtx = XEXP (XEXP (op0, 0), 1);
5437 base = XEXP (op0, 1);
5438 disp = op1;
2a2ab3f9 5439 }
e075ae69
RH
5440 else if (code0 == PLUS)
5441 {
5442 index = XEXP (op0, 0); /* index + base + disp */
5443 base = XEXP (op0, 1);
5444 disp = op1;
5445 }
5446 else
b446e5a2 5447 return 0;
e075ae69
RH
5448 }
5449 else if (GET_CODE (addr) == MULT)
5450 {
5451 index = XEXP (addr, 0); /* index*scale */
5452 scale_rtx = XEXP (addr, 1);
5453 }
5454 else if (GET_CODE (addr) == ASHIFT)
5455 {
5456 rtx tmp;
5457
5458 /* We're called for lea too, which implements ashift on occasion. */
5459 index = XEXP (addr, 0);
5460 tmp = XEXP (addr, 1);
5461 if (GET_CODE (tmp) != CONST_INT)
b446e5a2 5462 return 0;
e075ae69
RH
5463 scale = INTVAL (tmp);
5464 if ((unsigned HOST_WIDE_INT) scale > 3)
b446e5a2 5465 return 0;
e075ae69 5466 scale = 1 << scale;
b446e5a2 5467 retval = -1;
2a2ab3f9 5468 }
2a2ab3f9 5469 else
e075ae69
RH
5470 disp = addr; /* displacement */
5471
5472 /* Extract the integral value of scale. */
5473 if (scale_rtx)
e9a25f70 5474 {
e075ae69 5475 if (GET_CODE (scale_rtx) != CONST_INT)
b446e5a2 5476 return 0;
e075ae69 5477 scale = INTVAL (scale_rtx);
e9a25f70 5478 }
3b3c6a3f 5479
e075ae69
RH
5480 /* Allow arg pointer and stack pointer as index if there is not scaling */
5481 if (base && index && scale == 1
564d80f4
JH
5482 && (index == arg_pointer_rtx || index == frame_pointer_rtx
5483 || index == stack_pointer_rtx))
e075ae69
RH
5484 {
5485 rtx tmp = base;
5486 base = index;
5487 index = tmp;
5488 }
5489
5490 /* Special case: %ebp cannot be encoded as a base without a displacement. */
564d80f4
JH
5491 if ((base == hard_frame_pointer_rtx
5492 || base == frame_pointer_rtx
5493 || base == arg_pointer_rtx) && !disp)
e075ae69
RH
5494 disp = const0_rtx;
5495
5496 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5497 Avoid this by transforming to [%esi+0]. */
9e555526 5498 if (ix86_tune == PROCESSOR_K6 && !optimize_size
e075ae69 5499 && base && !index && !disp
329e1d01 5500 && REG_P (base)
e075ae69
RH
5501 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5502 disp = const0_rtx;
5503
5504 /* Special case: encode reg+reg instead of reg*2. */
5505 if (!base && index && scale && scale == 2)
5506 base = index, scale = 1;
0f290768 5507
e075ae69
RH
5508 /* Special case: scaling cannot be encoded without base or displacement. */
5509 if (!base && !disp && index && scale != 1)
5510 disp = const0_rtx;
5511
5512 out->base = base;
5513 out->index = index;
5514 out->disp = disp;
5515 out->scale = scale;
3b3c6a3f 5516
b446e5a2 5517 return retval;
e075ae69 5518}
01329426
JH
5519\f
5520/* Return cost of the memory address x.
5521 For i386, it is better to use a complex address than let gcc copy
5522 the address into a reg and make a new pseudo. But not if the address
5523 requires to two regs - that would mean more pseudos with longer
5524 lifetimes. */
dcefdf67 5525static int
01329426
JH
5526ix86_address_cost (x)
5527 rtx x;
5528{
5529 struct ix86_address parts;
5530 int cost = 1;
3b3c6a3f 5531
01329426
JH
5532 if (!ix86_decompose_address (x, &parts))
5533 abort ();
5534
1540f9eb
JH
5535 if (parts.base && GET_CODE (parts.base) == SUBREG)
5536 parts.base = SUBREG_REG (parts.base);
5537 if (parts.index && GET_CODE (parts.index) == SUBREG)
5538 parts.index = SUBREG_REG (parts.index);
5539
01329426
JH
5540 /* More complex memory references are better. */
5541 if (parts.disp && parts.disp != const0_rtx)
5542 cost--;
5543
5544 /* Attempt to minimize number of registers in the address. */
5545 if ((parts.base
5546 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5547 || (parts.index
5548 && (!REG_P (parts.index)
5549 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5550 cost++;
5551
5552 if (parts.base
5553 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5554 && parts.index
5555 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5556 && parts.base != parts.index)
5557 cost++;
5558
5559 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5560 since it's predecode logic can't detect the length of instructions
5561 and it degenerates to vector decoded. Increase cost of such
5562 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
0f290768 5563 to split such addresses or even refuse such addresses at all.
01329426
JH
5564
5565 Following addressing modes are affected:
5566 [base+scale*index]
5567 [scale*index+disp]
5568 [base+index]
0f290768 5569
01329426
JH
5570 The first and last case may be avoidable by explicitly coding the zero in
5571 memory address, but I don't have AMD-K6 machine handy to check this
5572 theory. */
5573
5574 if (TARGET_K6
5575 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5576 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5577 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5578 cost += 10;
0f290768 5579
01329426
JH
5580 return cost;
5581}
5582\f
b949ea8b
JW
5583/* If X is a machine specific address (i.e. a symbol or label being
5584 referenced as a displacement from the GOT implemented using an
5585 UNSPEC), then return the base term. Otherwise return X. */
5586
5587rtx
5588ix86_find_base_term (x)
5589 rtx x;
5590{
5591 rtx term;
5592
6eb791fc
JH
5593 if (TARGET_64BIT)
5594 {
5595 if (GET_CODE (x) != CONST)
5596 return x;
5597 term = XEXP (x, 0);
5598 if (GET_CODE (term) == PLUS
5599 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5600 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5601 term = XEXP (term, 0);
5602 if (GET_CODE (term) != UNSPEC
8ee41eaf 5603 || XINT (term, 1) != UNSPEC_GOTPCREL)
6eb791fc
JH
5604 return x;
5605
5606 term = XVECEXP (term, 0, 0);
5607
5608 if (GET_CODE (term) != SYMBOL_REF
5609 && GET_CODE (term) != LABEL_REF)
5610 return x;
5611
5612 return term;
5613 }
5614
69bd9368 5615 term = ix86_delegitimize_address (x);
b949ea8b
JW
5616
5617 if (GET_CODE (term) != SYMBOL_REF
5618 && GET_CODE (term) != LABEL_REF)
5619 return x;
5620
5621 return term;
5622}
5623\f
f996902d
RH
5624/* Determine if a given RTX is a valid constant. We already know this
5625 satisfies CONSTANT_P. */
5626
5627bool
5628legitimate_constant_p (x)
5629 rtx x;
5630{
5631 rtx inner;
5632
5633 switch (GET_CODE (x))
5634 {
5635 case SYMBOL_REF:
5636 /* TLS symbols are not constant. */
5637 if (tls_symbolic_operand (x, Pmode))
5638 return false;
5639 break;
5640
5641 case CONST:
5642 inner = XEXP (x, 0);
5643
5644 /* Offsets of TLS symbols are never valid.
5645 Discourage CSE from creating them. */
5646 if (GET_CODE (inner) == PLUS
5647 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5648 return false;
5649
5650 /* Only some unspecs are valid as "constants". */
5651 if (GET_CODE (inner) == UNSPEC)
5652 switch (XINT (inner, 1))
5653 {
5654 case UNSPEC_TPOFF:
5655 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
f996902d
RH
5656 default:
5657 return false;
5658 }
5659 break;
5660
5661 default:
5662 break;
5663 }
5664
5665 /* Otherwise we handle everything else in the move patterns. */
5666 return true;
5667}
5668
3a04ff64
RH
5669/* Determine if it's legal to put X into the constant pool. This
5670 is not possible for the address of thread-local symbols, which
5671 is checked above. */
5672
5673static bool
5674ix86_cannot_force_const_mem (x)
5675 rtx x;
5676{
5677 return !legitimate_constant_p (x);
5678}
5679
f996902d
RH
5680/* Determine if a given RTX is a valid constant address. */
5681
5682bool
5683constant_address_p (x)
5684 rtx x;
5685{
5686 switch (GET_CODE (x))
5687 {
5688 case LABEL_REF:
5689 case CONST_INT:
5690 return true;
5691
5692 case CONST_DOUBLE:
5693 return TARGET_64BIT;
5694
5695 case CONST:
b069de3b
SS
5696 /* For Mach-O, really believe the CONST. */
5697 if (TARGET_MACHO)
5698 return true;
5699 /* Otherwise fall through. */
f996902d
RH
5700 case SYMBOL_REF:
5701 return !flag_pic && legitimate_constant_p (x);
5702
5703 default:
5704 return false;
5705 }
5706}
5707
5708/* Nonzero if the constant value X is a legitimate general operand
fce5a9f2 5709 when generating PIC code. It is given that flag_pic is on and
f996902d
RH
5710 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5711
5712bool
5713legitimate_pic_operand_p (x)
5714 rtx x;
5715{
5716 rtx inner;
5717
5718 switch (GET_CODE (x))
5719 {
5720 case CONST:
5721 inner = XEXP (x, 0);
5722
5723 /* Only some unspecs are valid as "constants". */
5724 if (GET_CODE (inner) == UNSPEC)
5725 switch (XINT (inner, 1))
5726 {
5727 case UNSPEC_TPOFF:
5728 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
f996902d
RH
5729 default:
5730 return false;
5731 }
5732 /* FALLTHRU */
5733
5734 case SYMBOL_REF:
5735 case LABEL_REF:
5736 return legitimate_pic_address_disp_p (x);
5737
5738 default:
5739 return true;
5740 }
5741}
5742
e075ae69
RH
5743/* Determine if a given CONST RTX is a valid memory displacement
5744 in PIC mode. */
0f290768 5745
59be65f6 5746int
91bb873f
RH
5747legitimate_pic_address_disp_p (disp)
5748 register rtx disp;
5749{
f996902d
RH
5750 bool saw_plus;
5751
6eb791fc
JH
5752 /* In 64bit mode we can allow direct addresses of symbols and labels
5753 when they are not dynamic symbols. */
c05dbe81
JH
5754 if (TARGET_64BIT)
5755 {
5756 /* TLS references should always be enclosed in UNSPEC. */
5757 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5758 return 0;
5759 if (GET_CODE (disp) == SYMBOL_REF
5760 && ix86_cmodel == CM_SMALL_PIC
5761 && (CONSTANT_POOL_ADDRESS_P (disp)
5762 || SYMBOL_REF_FLAG (disp)))
5763 return 1;
5764 if (GET_CODE (disp) == LABEL_REF)
5765 return 1;
5766 if (GET_CODE (disp) == CONST
5767 && GET_CODE (XEXP (disp, 0)) == PLUS
5768 && ((GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
5769 && ix86_cmodel == CM_SMALL_PIC
5770 && (CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (disp, 0), 0))
5771 || SYMBOL_REF_FLAG (XEXP (XEXP (disp, 0), 0))))
5772 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
5773 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT
5774 && INTVAL (XEXP (XEXP (disp, 0), 1)) < 16*1024*1024
5775 && INTVAL (XEXP (XEXP (disp, 0), 1)) >= -16*1024*1024)
5776 return 1;
5777 }
91bb873f
RH
5778 if (GET_CODE (disp) != CONST)
5779 return 0;
5780 disp = XEXP (disp, 0);
5781
6eb791fc
JH
5782 if (TARGET_64BIT)
5783 {
5784 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5785 of GOT tables. We should not need these anyway. */
5786 if (GET_CODE (disp) != UNSPEC
8ee41eaf 5787 || XINT (disp, 1) != UNSPEC_GOTPCREL)
6eb791fc
JH
5788 return 0;
5789
5790 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5791 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5792 return 0;
5793 return 1;
5794 }
5795
f996902d 5796 saw_plus = false;
91bb873f
RH
5797 if (GET_CODE (disp) == PLUS)
5798 {
5799 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5800 return 0;
5801 disp = XEXP (disp, 0);
f996902d 5802 saw_plus = true;
91bb873f
RH
5803 }
5804
b069de3b
SS
5805 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5806 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5807 {
5808 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5809 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5810 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5811 {
5812 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5813 if (strstr (sym_name, "$pb") != 0)
5814 return 1;
5815 }
5816 }
5817
8ee41eaf 5818 if (GET_CODE (disp) != UNSPEC)
91bb873f
RH
5819 return 0;
5820
623fe810
RH
5821 switch (XINT (disp, 1))
5822 {
8ee41eaf 5823 case UNSPEC_GOT:
f996902d
RH
5824 if (saw_plus)
5825 return false;
623fe810 5826 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
8ee41eaf 5827 case UNSPEC_GOTOFF:
623fe810 5828 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
f996902d 5829 case UNSPEC_GOTTPOFF:
dea73790
JJ
5830 case UNSPEC_GOTNTPOFF:
5831 case UNSPEC_INDNTPOFF:
f996902d
RH
5832 if (saw_plus)
5833 return false;
5834 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5835 case UNSPEC_NTPOFF:
f996902d
RH
5836 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5837 case UNSPEC_DTPOFF:
f996902d 5838 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
623fe810 5839 }
fce5a9f2 5840
623fe810 5841 return 0;
91bb873f
RH
5842}
5843
e075ae69
RH
5844/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5845 memory address for an instruction. The MODE argument is the machine mode
5846 for the MEM expression that wants to use this address.
5847
5848 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5849 convert common non-canonical forms to canonical form so that they will
5850 be recognized. */
5851
3b3c6a3f
MM
5852int
5853legitimate_address_p (mode, addr, strict)
5854 enum machine_mode mode;
5855 register rtx addr;
5856 int strict;
5857{
e075ae69
RH
5858 struct ix86_address parts;
5859 rtx base, index, disp;
5860 HOST_WIDE_INT scale;
5861 const char *reason = NULL;
5862 rtx reason_rtx = NULL_RTX;
3b3c6a3f
MM
5863
5864 if (TARGET_DEBUG_ADDR)
5865 {
5866 fprintf (stderr,
e9a25f70 5867 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
3b3c6a3f 5868 GET_MODE_NAME (mode), strict);
3b3c6a3f
MM
5869 debug_rtx (addr);
5870 }
5871
9e20be0c
JJ
5872 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
5873 {
5874 if (TARGET_DEBUG_ADDR)
5875 fprintf (stderr, "Success.\n");
5876 return TRUE;
5877 }
5878
b446e5a2 5879 if (ix86_decompose_address (addr, &parts) <= 0)
3b3c6a3f 5880 {
e075ae69 5881 reason = "decomposition failed";
50e60bc3 5882 goto report_error;
3b3c6a3f
MM
5883 }
5884
e075ae69
RH
5885 base = parts.base;
5886 index = parts.index;
5887 disp = parts.disp;
5888 scale = parts.scale;
91f0226f 5889
e075ae69 5890 /* Validate base register.
e9a25f70
JL
5891
5892 Don't allow SUBREG's here, it can lead to spill failures when the base
3d771dfd
MM
5893 is one word out of a two word structure, which is represented internally
5894 as a DImode int. */
e9a25f70 5895
3b3c6a3f
MM
5896 if (base)
5897 {
1540f9eb 5898 rtx reg;
e075ae69
RH
5899 reason_rtx = base;
5900
1540f9eb
JH
5901 if (GET_CODE (base) == SUBREG)
5902 reg = SUBREG_REG (base);
5903 else
5904 reg = base;
5905
5906 if (GET_CODE (reg) != REG)
3b3c6a3f 5907 {
e075ae69 5908 reason = "base is not a register";
50e60bc3 5909 goto report_error;
3b3c6a3f
MM
5910 }
5911
c954bd01
RH
5912 if (GET_MODE (base) != Pmode)
5913 {
e075ae69 5914 reason = "base is not in Pmode";
50e60bc3 5915 goto report_error;
c954bd01
RH
5916 }
5917
1540f9eb
JH
5918 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5919 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
3b3c6a3f 5920 {
e075ae69 5921 reason = "base is not valid";
50e60bc3 5922 goto report_error;
3b3c6a3f
MM
5923 }
5924 }
5925
e075ae69 5926 /* Validate index register.
e9a25f70
JL
5927
5928 Don't allow SUBREG's here, it can lead to spill failures when the index
3d771dfd
MM
5929 is one word out of a two word structure, which is represented internally
5930 as a DImode int. */
e075ae69
RH
5931
5932 if (index)
3b3c6a3f 5933 {
1540f9eb 5934 rtx reg;
e075ae69
RH
5935 reason_rtx = index;
5936
1540f9eb
JH
5937 if (GET_CODE (index) == SUBREG)
5938 reg = SUBREG_REG (index);
5939 else
5940 reg = index;
5941
5942 if (GET_CODE (reg) != REG)
3b3c6a3f 5943 {
e075ae69 5944 reason = "index is not a register";
50e60bc3 5945 goto report_error;
3b3c6a3f
MM
5946 }
5947
e075ae69 5948 if (GET_MODE (index) != Pmode)
c954bd01 5949 {
e075ae69 5950 reason = "index is not in Pmode";
50e60bc3 5951 goto report_error;
c954bd01
RH
5952 }
5953
1540f9eb
JH
5954 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5955 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
3b3c6a3f 5956 {
e075ae69 5957 reason = "index is not valid";
50e60bc3 5958 goto report_error;
3b3c6a3f
MM
5959 }
5960 }
3b3c6a3f 5961
e075ae69
RH
5962 /* Validate scale factor. */
5963 if (scale != 1)
3b3c6a3f 5964 {
e075ae69
RH
5965 reason_rtx = GEN_INT (scale);
5966 if (!index)
3b3c6a3f 5967 {
e075ae69 5968 reason = "scale without index";
50e60bc3 5969 goto report_error;
3b3c6a3f
MM
5970 }
5971
e075ae69 5972 if (scale != 2 && scale != 4 && scale != 8)
3b3c6a3f 5973 {
e075ae69 5974 reason = "scale is not a valid multiplier";
50e60bc3 5975 goto report_error;
3b3c6a3f
MM
5976 }
5977 }
5978
91bb873f 5979 /* Validate displacement. */
3b3c6a3f
MM
5980 if (disp)
5981 {
e075ae69
RH
5982 reason_rtx = disp;
5983
f996902d
RH
5984 if (GET_CODE (disp) == CONST
5985 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5986 switch (XINT (XEXP (disp, 0), 1))
5987 {
5988 case UNSPEC_GOT:
5989 case UNSPEC_GOTOFF:
5990 case UNSPEC_GOTPCREL:
5991 if (!flag_pic)
5992 abort ();
5993 goto is_legitimate_pic;
5994
5995 case UNSPEC_GOTTPOFF:
dea73790
JJ
5996 case UNSPEC_GOTNTPOFF:
5997 case UNSPEC_INDNTPOFF:
f996902d
RH
5998 case UNSPEC_NTPOFF:
5999 case UNSPEC_DTPOFF:
6000 break;
6001
6002 default:
6003 reason = "invalid address unspec";
6004 goto report_error;
6005 }
6006
b069de3b
SS
6007 else if (flag_pic && (SYMBOLIC_CONST (disp)
6008#if TARGET_MACHO
6009 && !machopic_operand_p (disp)
6010#endif
6011 ))
3b3c6a3f 6012 {
f996902d 6013 is_legitimate_pic:
0d7d98ee
JH
6014 if (TARGET_64BIT && (index || base))
6015 {
75d38379
JJ
6016 /* foo@dtpoff(%rX) is ok. */
6017 if (GET_CODE (disp) != CONST
6018 || GET_CODE (XEXP (disp, 0)) != PLUS
6019 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6020 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6021 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6022 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6023 {
6024 reason = "non-constant pic memory reference";
6025 goto report_error;
6026 }
0d7d98ee 6027 }
75d38379 6028 else if (! legitimate_pic_address_disp_p (disp))
91bb873f 6029 {
e075ae69 6030 reason = "displacement is an invalid pic construct";
50e60bc3 6031 goto report_error;
91bb873f
RH
6032 }
6033
4e9efe54 6034 /* This code used to verify that a symbolic pic displacement
0f290768
KH
6035 includes the pic_offset_table_rtx register.
6036
4e9efe54
JH
6037 While this is good idea, unfortunately these constructs may
6038 be created by "adds using lea" optimization for incorrect
6039 code like:
6040
6041 int a;
6042 int foo(int i)
6043 {
6044 return *(&a+i);
6045 }
6046
50e60bc3 6047 This code is nonsensical, but results in addressing
4e9efe54 6048 GOT table with pic_offset_table_rtx base. We can't
f710504c 6049 just refuse it easily, since it gets matched by
4e9efe54
JH
6050 "addsi3" pattern, that later gets split to lea in the
6051 case output register differs from input. While this
6052 can be handled by separate addsi pattern for this case
6053 that never results in lea, this seems to be easier and
6054 correct fix for crash to disable this test. */
3b3c6a3f 6055 }
f996902d
RH
6056 else if (!CONSTANT_ADDRESS_P (disp))
6057 {
6058 reason = "displacement is not constant";
6059 goto report_error;
6060 }
c05dbe81
JH
6061 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
6062 {
6063 reason = "displacement is out of range";
6064 goto report_error;
6065 }
6066 else if (!TARGET_64BIT && GET_CODE (disp) == CONST_DOUBLE)
6067 {
6068 reason = "displacement is a const_double";
6069 goto report_error;
6070 }
3b3c6a3f
MM
6071 }
6072
e075ae69 6073 /* Everything looks valid. */
3b3c6a3f 6074 if (TARGET_DEBUG_ADDR)
e075ae69 6075 fprintf (stderr, "Success.\n");
3b3c6a3f 6076 return TRUE;
e075ae69 6077
5bf0ebab 6078 report_error:
e075ae69
RH
6079 if (TARGET_DEBUG_ADDR)
6080 {
6081 fprintf (stderr, "Error: %s\n", reason);
6082 debug_rtx (reason_rtx);
6083 }
6084 return FALSE;
3b3c6a3f 6085}
3b3c6a3f 6086\f
55efb413
JW
6087/* Return an unique alias set for the GOT. */
6088
0f290768 6089static HOST_WIDE_INT
55efb413
JW
6090ix86_GOT_alias_set ()
6091{
5bf0ebab
RH
6092 static HOST_WIDE_INT set = -1;
6093 if (set == -1)
6094 set = new_alias_set ();
6095 return set;
0f290768 6096}
55efb413 6097
3b3c6a3f
MM
6098/* Return a legitimate reference for ORIG (an address) using the
6099 register REG. If REG is 0, a new pseudo is generated.
6100
91bb873f 6101 There are two types of references that must be handled:
3b3c6a3f
MM
6102
6103 1. Global data references must load the address from the GOT, via
6104 the PIC reg. An insn is emitted to do this load, and the reg is
6105 returned.
6106
91bb873f
RH
6107 2. Static data references, constant pool addresses, and code labels
6108 compute the address as an offset from the GOT, whose base is in
6109 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
6110 differentiate them from global data objects. The returned
6111 address is the PIC reg + an unspec constant.
3b3c6a3f
MM
6112
6113 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
91bb873f 6114 reg also appears in the address. */
3b3c6a3f
MM
6115
6116rtx
6117legitimize_pic_address (orig, reg)
6118 rtx orig;
6119 rtx reg;
6120{
6121 rtx addr = orig;
6122 rtx new = orig;
91bb873f 6123 rtx base;
3b3c6a3f 6124
b069de3b
SS
6125#if TARGET_MACHO
6126 if (reg == 0)
6127 reg = gen_reg_rtx (Pmode);
6128 /* Use the generic Mach-O PIC machinery. */
6129 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6130#endif
6131
c05dbe81
JH
6132 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6133 new = addr;
6134 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
3b3c6a3f 6135 {
c05dbe81
JH
6136 /* This symbol may be referenced via a displacement from the PIC
6137 base address (@GOTOFF). */
3b3c6a3f 6138
c05dbe81
JH
6139 if (reload_in_progress)
6140 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6141 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6142 new = gen_rtx_CONST (Pmode, new);
6143 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3b3c6a3f 6144
c05dbe81
JH
6145 if (reg != 0)
6146 {
6147 emit_move_insn (reg, new);
6148 new = reg;
6149 }
3b3c6a3f 6150 }
91bb873f 6151 else if (GET_CODE (addr) == SYMBOL_REF)
3b3c6a3f 6152 {
14f73b5a
JH
6153 if (TARGET_64BIT)
6154 {
8ee41eaf 6155 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
14f73b5a
JH
6156 new = gen_rtx_CONST (Pmode, new);
6157 new = gen_rtx_MEM (Pmode, new);
6158 RTX_UNCHANGING_P (new) = 1;
6159 set_mem_alias_set (new, ix86_GOT_alias_set ());
6160
6161 if (reg == 0)
6162 reg = gen_reg_rtx (Pmode);
6163 /* Use directly gen_movsi, otherwise the address is loaded
6164 into register for CSE. We don't want to CSE this addresses,
6165 instead we CSE addresses from the GOT table, so skip this. */
6166 emit_insn (gen_movsi (reg, new));
6167 new = reg;
6168 }
6169 else
6170 {
6171 /* This symbol must be referenced via a load from the
6172 Global Offset Table (@GOT). */
3b3c6a3f 6173
66edd3b4
RH
6174 if (reload_in_progress)
6175 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf 6176 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
14f73b5a
JH
6177 new = gen_rtx_CONST (Pmode, new);
6178 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6179 new = gen_rtx_MEM (Pmode, new);
6180 RTX_UNCHANGING_P (new) = 1;
6181 set_mem_alias_set (new, ix86_GOT_alias_set ());
3b3c6a3f 6182
14f73b5a
JH
6183 if (reg == 0)
6184 reg = gen_reg_rtx (Pmode);
6185 emit_move_insn (reg, new);
6186 new = reg;
6187 }
0f290768 6188 }
91bb873f
RH
6189 else
6190 {
6191 if (GET_CODE (addr) == CONST)
3b3c6a3f 6192 {
91bb873f 6193 addr = XEXP (addr, 0);
e3c8ea67
RH
6194
6195 /* We must match stuff we generate before. Assume the only
6196 unspecs that can get here are ours. Not that we could do
6197 anything with them anyway... */
6198 if (GET_CODE (addr) == UNSPEC
6199 || (GET_CODE (addr) == PLUS
6200 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6201 return orig;
6202 if (GET_CODE (addr) != PLUS)
564d80f4 6203 abort ();
3b3c6a3f 6204 }
91bb873f
RH
6205 if (GET_CODE (addr) == PLUS)
6206 {
6207 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
e9a25f70 6208
91bb873f
RH
6209 /* Check first to see if this is a constant offset from a @GOTOFF
6210 symbol reference. */
623fe810 6211 if (local_symbolic_operand (op0, Pmode)
91bb873f
RH
6212 && GET_CODE (op1) == CONST_INT)
6213 {
6eb791fc
JH
6214 if (!TARGET_64BIT)
6215 {
66edd3b4
RH
6216 if (reload_in_progress)
6217 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf
RH
6218 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6219 UNSPEC_GOTOFF);
6eb791fc
JH
6220 new = gen_rtx_PLUS (Pmode, new, op1);
6221 new = gen_rtx_CONST (Pmode, new);
6222 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
91bb873f 6223
6eb791fc
JH
6224 if (reg != 0)
6225 {
6226 emit_move_insn (reg, new);
6227 new = reg;
6228 }
6229 }
6230 else
91bb873f 6231 {
75d38379
JJ
6232 if (INTVAL (op1) < -16*1024*1024
6233 || INTVAL (op1) >= 16*1024*1024)
6234 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
91bb873f
RH
6235 }
6236 }
6237 else
6238 {
6239 base = legitimize_pic_address (XEXP (addr, 0), reg);
6240 new = legitimize_pic_address (XEXP (addr, 1),
6241 base == reg ? NULL_RTX : reg);
6242
6243 if (GET_CODE (new) == CONST_INT)
6244 new = plus_constant (base, INTVAL (new));
6245 else
6246 {
6247 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6248 {
6249 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6250 new = XEXP (new, 1);
6251 }
6252 new = gen_rtx_PLUS (Pmode, base, new);
6253 }
6254 }
6255 }
3b3c6a3f
MM
6256 }
6257 return new;
6258}
fb49053f 6259
fb49053f 6260static void
f996902d 6261ix86_encode_section_info (decl, first)
fb49053f
RH
6262 tree decl;
6263 int first ATTRIBUTE_UNUSED;
6264{
f996902d
RH
6265 bool local_p = (*targetm.binds_local_p) (decl);
6266 rtx rtl, symbol;
6267
6268 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
6269 if (GET_CODE (rtl) != MEM)
6270 return;
6271 symbol = XEXP (rtl, 0);
6272 if (GET_CODE (symbol) != SYMBOL_REF)
6273 return;
6274
6275 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
6276 symbol so that we may access it directly in the GOT. */
6277
fb49053f 6278 if (flag_pic)
f996902d
RH
6279 SYMBOL_REF_FLAG (symbol) = local_p;
6280
6281 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
6282 "local dynamic", "initial exec" or "local exec" TLS models
6283 respectively. */
6284
6285 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
fb49053f 6286 {
f996902d
RH
6287 const char *symbol_str;
6288 char *newstr;
6289 size_t len;
dce81a1a 6290 enum tls_model kind = decl_tls_model (decl);
f996902d 6291
75d38379
JJ
6292 if (TARGET_64BIT && ! flag_pic)
6293 {
6294 /* x86-64 doesn't allow non-pic code for shared libraries,
6295 so don't generate GD/LD TLS models for non-pic code. */
6296 switch (kind)
6297 {
6298 case TLS_MODEL_GLOBAL_DYNAMIC:
6299 kind = TLS_MODEL_INITIAL_EXEC; break;
6300 case TLS_MODEL_LOCAL_DYNAMIC:
6301 kind = TLS_MODEL_LOCAL_EXEC; break;
6302 default:
6303 break;
6304 }
6305 }
6306
f996902d 6307 symbol_str = XSTR (symbol, 0);
fb49053f 6308
f996902d
RH
6309 if (symbol_str[0] == '%')
6310 {
6311 if (symbol_str[1] == tls_model_chars[kind])
6312 return;
6313 symbol_str += 2;
6314 }
6315 len = strlen (symbol_str) + 1;
6316 newstr = alloca (len + 2);
6317
6318 newstr[0] = '%';
6319 newstr[1] = tls_model_chars[kind];
6320 memcpy (newstr + 2, symbol_str, len);
6321
6322 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
fb49053f
RH
6323 }
6324}
f996902d
RH
6325
6326/* Undo the above when printing symbol names. */
6327
6328static const char *
6329ix86_strip_name_encoding (str)
6330 const char *str;
6331{
6332 if (str[0] == '%')
6333 str += 2;
6334 if (str [0] == '*')
6335 str += 1;
6336 return str;
6337}
3b3c6a3f 6338\f
f996902d
RH
6339/* Load the thread pointer into a register. */
6340
6341static rtx
6342get_thread_pointer ()
6343{
6344 rtx tp;
6345
6346 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
9e20be0c
JJ
6347 tp = gen_rtx_MEM (Pmode, tp);
6348 RTX_UNCHANGING_P (tp) = 1;
6349 set_mem_alias_set (tp, ix86_GOT_alias_set ());
f996902d
RH
6350 tp = force_reg (Pmode, tp);
6351
6352 return tp;
6353}
fce5a9f2 6354
3b3c6a3f
MM
6355/* Try machine-dependent ways of modifying an illegitimate address
6356 to be legitimate. If we find one, return the new, valid address.
6357 This macro is used in only one place: `memory_address' in explow.c.
6358
6359 OLDX is the address as it was before break_out_memory_refs was called.
6360 In some cases it is useful to look at this to decide what needs to be done.
6361
6362 MODE and WIN are passed so that this macro can use
6363 GO_IF_LEGITIMATE_ADDRESS.
6364
6365 It is always safe for this macro to do nothing. It exists to recognize
6366 opportunities to optimize the output.
6367
6368 For the 80386, we handle X+REG by loading X into a register R and
6369 using R+REG. R will go in a general reg and indexing will be used.
6370 However, if REG is a broken-out memory address or multiplication,
6371 nothing needs to be done because REG can certainly go in a general reg.
6372
6373 When -fpic is used, special handling is needed for symbolic references.
6374 See comments by legitimize_pic_address in i386.c for details. */
6375
6376rtx
6377legitimize_address (x, oldx, mode)
6378 register rtx x;
bb5177ac 6379 register rtx oldx ATTRIBUTE_UNUSED;
3b3c6a3f
MM
6380 enum machine_mode mode;
6381{
6382 int changed = 0;
6383 unsigned log;
6384
6385 if (TARGET_DEBUG_ADDR)
6386 {
e9a25f70
JL
6387 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6388 GET_MODE_NAME (mode));
3b3c6a3f
MM
6389 debug_rtx (x);
6390 }
6391
f996902d
RH
6392 log = tls_symbolic_operand (x, mode);
6393 if (log)
6394 {
6395 rtx dest, base, off, pic;
75d38379 6396 int type;
f996902d 6397
755ac5d4 6398 switch (log)
f996902d
RH
6399 {
6400 case TLS_MODEL_GLOBAL_DYNAMIC:
6401 dest = gen_reg_rtx (Pmode);
75d38379
JJ
6402 if (TARGET_64BIT)
6403 {
6404 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6405
6406 start_sequence ();
6407 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6408 insns = get_insns ();
6409 end_sequence ();
6410
6411 emit_libcall_block (insns, dest, rax, x);
6412 }
6413 else
6414 emit_insn (gen_tls_global_dynamic_32 (dest, x));
f996902d
RH
6415 break;
6416
6417 case TLS_MODEL_LOCAL_DYNAMIC:
6418 base = gen_reg_rtx (Pmode);
75d38379
JJ
6419 if (TARGET_64BIT)
6420 {
6421 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6422
6423 start_sequence ();
6424 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6425 insns = get_insns ();
6426 end_sequence ();
6427
6428 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6429 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6430 emit_libcall_block (insns, base, rax, note);
6431 }
6432 else
6433 emit_insn (gen_tls_local_dynamic_base_32 (base));
f996902d
RH
6434
6435 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6436 off = gen_rtx_CONST (Pmode, off);
6437
6438 return gen_rtx_PLUS (Pmode, base, off);
6439
6440 case TLS_MODEL_INITIAL_EXEC:
75d38379
JJ
6441 if (TARGET_64BIT)
6442 {
6443 pic = NULL;
6444 type = UNSPEC_GOTNTPOFF;
6445 }
6446 else if (flag_pic)
f996902d 6447 {
66edd3b4
RH
6448 if (reload_in_progress)
6449 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
f996902d 6450 pic = pic_offset_table_rtx;
75d38379 6451 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
f996902d 6452 }
dea73790 6453 else if (!TARGET_GNU_TLS)
f996902d
RH
6454 {
6455 pic = gen_reg_rtx (Pmode);
6456 emit_insn (gen_set_got (pic));
75d38379 6457 type = UNSPEC_GOTTPOFF;
f996902d 6458 }
dea73790 6459 else
75d38379
JJ
6460 {
6461 pic = NULL;
6462 type = UNSPEC_INDNTPOFF;
6463 }
f996902d
RH
6464
6465 base = get_thread_pointer ();
6466
75d38379 6467 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
f996902d 6468 off = gen_rtx_CONST (Pmode, off);
75d38379 6469 if (pic)
dea73790 6470 off = gen_rtx_PLUS (Pmode, pic, off);
f996902d
RH
6471 off = gen_rtx_MEM (Pmode, off);
6472 RTX_UNCHANGING_P (off) = 1;
6473 set_mem_alias_set (off, ix86_GOT_alias_set ());
f996902d 6474 dest = gen_reg_rtx (Pmode);
dea73790 6475
75d38379 6476 if (TARGET_64BIT || TARGET_GNU_TLS)
dea73790
JJ
6477 {
6478 emit_move_insn (dest, off);
6479 return gen_rtx_PLUS (Pmode, base, dest);
6480 }
6481 else
6482 emit_insn (gen_subsi3 (dest, base, off));
f996902d
RH
6483 break;
6484
6485 case TLS_MODEL_LOCAL_EXEC:
6486 base = get_thread_pointer ();
6487
6488 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
75d38379
JJ
6489 (TARGET_64BIT || TARGET_GNU_TLS)
6490 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
f996902d
RH
6491 off = gen_rtx_CONST (Pmode, off);
6492
75d38379 6493 if (TARGET_64BIT || TARGET_GNU_TLS)
f996902d
RH
6494 return gen_rtx_PLUS (Pmode, base, off);
6495 else
6496 {
6497 dest = gen_reg_rtx (Pmode);
6498 emit_insn (gen_subsi3 (dest, base, off));
6499 }
6500 break;
6501
6502 default:
6503 abort ();
6504 }
6505
6506 return dest;
6507 }
6508
3b3c6a3f
MM
6509 if (flag_pic && SYMBOLIC_CONST (x))
6510 return legitimize_pic_address (x, 0);
6511
6512 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6513 if (GET_CODE (x) == ASHIFT
6514 && GET_CODE (XEXP (x, 1)) == CONST_INT
b531087a 6515 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
3b3c6a3f
MM
6516 {
6517 changed = 1;
a269a03c
JC
6518 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6519 GEN_INT (1 << log));
3b3c6a3f
MM
6520 }
6521
6522 if (GET_CODE (x) == PLUS)
6523 {
0f290768 6524 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
e9a25f70 6525
3b3c6a3f
MM
6526 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6527 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
b531087a 6528 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
3b3c6a3f
MM
6529 {
6530 changed = 1;
c5c76735
JL
6531 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6532 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6533 GEN_INT (1 << log));
3b3c6a3f
MM
6534 }
6535
6536 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6537 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
b531087a 6538 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
3b3c6a3f
MM
6539 {
6540 changed = 1;
c5c76735
JL
6541 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6542 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6543 GEN_INT (1 << log));
3b3c6a3f
MM
6544 }
6545
0f290768 6546 /* Put multiply first if it isn't already. */
3b3c6a3f
MM
6547 if (GET_CODE (XEXP (x, 1)) == MULT)
6548 {
6549 rtx tmp = XEXP (x, 0);
6550 XEXP (x, 0) = XEXP (x, 1);
6551 XEXP (x, 1) = tmp;
6552 changed = 1;
6553 }
6554
6555 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6556 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6557 created by virtual register instantiation, register elimination, and
6558 similar optimizations. */
6559 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6560 {
6561 changed = 1;
c5c76735
JL
6562 x = gen_rtx_PLUS (Pmode,
6563 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6564 XEXP (XEXP (x, 1), 0)),
6565 XEXP (XEXP (x, 1), 1));
3b3c6a3f
MM
6566 }
6567
e9a25f70
JL
6568 /* Canonicalize
6569 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3b3c6a3f
MM
6570 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6571 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6572 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6573 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6574 && CONSTANT_P (XEXP (x, 1)))
6575 {
00c79232
ML
6576 rtx constant;
6577 rtx other = NULL_RTX;
3b3c6a3f
MM
6578
6579 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6580 {
6581 constant = XEXP (x, 1);
6582 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6583 }
6584 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6585 {
6586 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6587 other = XEXP (x, 1);
6588 }
6589 else
6590 constant = 0;
6591
6592 if (constant)
6593 {
6594 changed = 1;
c5c76735
JL
6595 x = gen_rtx_PLUS (Pmode,
6596 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6597 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6598 plus_constant (other, INTVAL (constant)));
3b3c6a3f
MM
6599 }
6600 }
6601
6602 if (changed && legitimate_address_p (mode, x, FALSE))
6603 return x;
6604
6605 if (GET_CODE (XEXP (x, 0)) == MULT)
6606 {
6607 changed = 1;
6608 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6609 }
6610
6611 if (GET_CODE (XEXP (x, 1)) == MULT)
6612 {
6613 changed = 1;
6614 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6615 }
6616
6617 if (changed
6618 && GET_CODE (XEXP (x, 1)) == REG
6619 && GET_CODE (XEXP (x, 0)) == REG)
6620 return x;
6621
6622 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6623 {
6624 changed = 1;
6625 x = legitimize_pic_address (x, 0);
6626 }
6627
6628 if (changed && legitimate_address_p (mode, x, FALSE))
6629 return x;
6630
6631 if (GET_CODE (XEXP (x, 0)) == REG)
6632 {
6633 register rtx temp = gen_reg_rtx (Pmode);
6634 register rtx val = force_operand (XEXP (x, 1), temp);
6635 if (val != temp)
6636 emit_move_insn (temp, val);
6637
6638 XEXP (x, 1) = temp;
6639 return x;
6640 }
6641
6642 else if (GET_CODE (XEXP (x, 1)) == REG)
6643 {
6644 register rtx temp = gen_reg_rtx (Pmode);
6645 register rtx val = force_operand (XEXP (x, 0), temp);
6646 if (val != temp)
6647 emit_move_insn (temp, val);
6648
6649 XEXP (x, 0) = temp;
6650 return x;
6651 }
6652 }
6653
6654 return x;
6655}
2a2ab3f9
JVA
6656\f
6657/* Print an integer constant expression in assembler syntax. Addition
6658 and subtraction are the only arithmetic that may appear in these
6659 expressions. FILE is the stdio stream to write to, X is the rtx, and
6660 CODE is the operand print code from the output string. */
6661
6662static void
6663output_pic_addr_const (file, x, code)
6664 FILE *file;
6665 rtx x;
6666 int code;
6667{
6668 char buf[256];
6669
6670 switch (GET_CODE (x))
6671 {
6672 case PC:
6673 if (flag_pic)
6674 putc ('.', file);
6675 else
6676 abort ();
6677 break;
6678
6679 case SYMBOL_REF:
91bb873f 6680 assemble_name (file, XSTR (x, 0));
b069de3b 6681 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x))
91bb873f 6682 fputs ("@PLT", file);
2a2ab3f9
JVA
6683 break;
6684
91bb873f
RH
6685 case LABEL_REF:
6686 x = XEXP (x, 0);
6687 /* FALLTHRU */
2a2ab3f9
JVA
6688 case CODE_LABEL:
6689 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6690 assemble_name (asm_out_file, buf);
6691 break;
6692
6693 case CONST_INT:
f64cecad 6694 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2a2ab3f9
JVA
6695 break;
6696
6697 case CONST:
6698 /* This used to output parentheses around the expression,
6699 but that does not work on the 386 (either ATT or BSD assembler). */
6700 output_pic_addr_const (file, XEXP (x, 0), code);
6701 break;
6702
6703 case CONST_DOUBLE:
6704 if (GET_MODE (x) == VOIDmode)
6705 {
6706 /* We can use %d if the number is <32 bits and positive. */
6707 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
f64cecad
JC
6708 fprintf (file, "0x%lx%08lx",
6709 (unsigned long) CONST_DOUBLE_HIGH (x),
6710 (unsigned long) CONST_DOUBLE_LOW (x));
2a2ab3f9 6711 else
f64cecad 6712 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2a2ab3f9
JVA
6713 }
6714 else
6715 /* We can't handle floating point constants;
6716 PRINT_OPERAND must handle them. */
6717 output_operand_lossage ("floating constant misused");
6718 break;
6719
6720 case PLUS:
e9a25f70 6721 /* Some assemblers need integer constants to appear first. */
2a2ab3f9
JVA
6722 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6723 {
2a2ab3f9 6724 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 6725 putc ('+', file);
e9a25f70 6726 output_pic_addr_const (file, XEXP (x, 1), code);
2a2ab3f9 6727 }
91bb873f 6728 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2a2ab3f9 6729 {
2a2ab3f9 6730 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 6731 putc ('+', file);
e9a25f70 6732 output_pic_addr_const (file, XEXP (x, 0), code);
2a2ab3f9 6733 }
91bb873f
RH
6734 else
6735 abort ();
2a2ab3f9
JVA
6736 break;
6737
6738 case MINUS:
b069de3b
SS
6739 if (!TARGET_MACHO)
6740 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
2a2ab3f9 6741 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 6742 putc ('-', file);
2a2ab3f9 6743 output_pic_addr_const (file, XEXP (x, 1), code);
b069de3b
SS
6744 if (!TARGET_MACHO)
6745 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
2a2ab3f9
JVA
6746 break;
6747
91bb873f
RH
6748 case UNSPEC:
6749 if (XVECLEN (x, 0) != 1)
5bf0ebab 6750 abort ();
91bb873f
RH
6751 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6752 switch (XINT (x, 1))
77ebd435 6753 {
8ee41eaf 6754 case UNSPEC_GOT:
77ebd435
AJ
6755 fputs ("@GOT", file);
6756 break;
8ee41eaf 6757 case UNSPEC_GOTOFF:
77ebd435
AJ
6758 fputs ("@GOTOFF", file);
6759 break;
8ee41eaf 6760 case UNSPEC_GOTPCREL:
edfe8595 6761 fputs ("@GOTPCREL(%rip)", file);
6eb791fc 6762 break;
f996902d 6763 case UNSPEC_GOTTPOFF:
dea73790 6764 /* FIXME: This might be @TPOFF in Sun ld too. */
f996902d
RH
6765 fputs ("@GOTTPOFF", file);
6766 break;
6767 case UNSPEC_TPOFF:
6768 fputs ("@TPOFF", file);
6769 break;
6770 case UNSPEC_NTPOFF:
75d38379
JJ
6771 if (TARGET_64BIT)
6772 fputs ("@TPOFF", file);
6773 else
6774 fputs ("@NTPOFF", file);
f996902d
RH
6775 break;
6776 case UNSPEC_DTPOFF:
6777 fputs ("@DTPOFF", file);
6778 break;
dea73790 6779 case UNSPEC_GOTNTPOFF:
75d38379
JJ
6780 if (TARGET_64BIT)
6781 fputs ("@GOTTPOFF(%rip)", file);
6782 else
6783 fputs ("@GOTNTPOFF", file);
dea73790
JJ
6784 break;
6785 case UNSPEC_INDNTPOFF:
6786 fputs ("@INDNTPOFF", file);
6787 break;
77ebd435
AJ
6788 default:
6789 output_operand_lossage ("invalid UNSPEC as operand");
6790 break;
6791 }
91bb873f
RH
6792 break;
6793
2a2ab3f9
JVA
6794 default:
6795 output_operand_lossage ("invalid expression as operand");
6796 }
6797}
1865dbb5 6798
0f290768 6799/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
1865dbb5
JM
6800 We need to handle our special PIC relocations. */
6801
0f290768 6802void
1865dbb5
JM
6803i386_dwarf_output_addr_const (file, x)
6804 FILE *file;
6805 rtx x;
6806{
14f73b5a 6807#ifdef ASM_QUAD
18b5b8d6 6808 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
14f73b5a
JH
6809#else
6810 if (TARGET_64BIT)
6811 abort ();
18b5b8d6 6812 fprintf (file, "%s", ASM_LONG);
14f73b5a 6813#endif
1865dbb5
JM
6814 if (flag_pic)
6815 output_pic_addr_const (file, x, '\0');
6816 else
6817 output_addr_const (file, x);
6818 fputc ('\n', file);
6819}
6820
b9203463
RH
6821/* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6822 We need to emit DTP-relative relocations. */
6823
6824void
6825i386_output_dwarf_dtprel (file, size, x)
6826 FILE *file;
6827 int size;
6828 rtx x;
6829{
75d38379
JJ
6830 fputs (ASM_LONG, file);
6831 output_addr_const (file, x);
6832 fputs ("@DTPOFF", file);
b9203463
RH
6833 switch (size)
6834 {
6835 case 4:
b9203463
RH
6836 break;
6837 case 8:
75d38379 6838 fputs (", 0", file);
b9203463 6839 break;
b9203463
RH
6840 default:
6841 abort ();
6842 }
b9203463
RH
6843}
6844
1865dbb5
JM
6845/* In the name of slightly smaller debug output, and to cater to
6846 general assembler losage, recognize PIC+GOTOFF and turn it back
6847 into a direct symbol reference. */
6848
69bd9368
RS
6849static rtx
6850ix86_delegitimize_address (orig_x)
1865dbb5
JM
6851 rtx orig_x;
6852{
ec65b2e3 6853 rtx x = orig_x, y;
1865dbb5 6854
4c8c0dec
JJ
6855 if (GET_CODE (x) == MEM)
6856 x = XEXP (x, 0);
6857
6eb791fc
JH
6858 if (TARGET_64BIT)
6859 {
6860 if (GET_CODE (x) != CONST
6861 || GET_CODE (XEXP (x, 0)) != UNSPEC
8ee41eaf 6862 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
4c8c0dec 6863 || GET_CODE (orig_x) != MEM)
6eb791fc
JH
6864 return orig_x;
6865 return XVECEXP (XEXP (x, 0), 0, 0);
6866 }
6867
1865dbb5 6868 if (GET_CODE (x) != PLUS
1865dbb5
JM
6869 || GET_CODE (XEXP (x, 1)) != CONST)
6870 return orig_x;
6871
ec65b2e3
JJ
6872 if (GET_CODE (XEXP (x, 0)) == REG
6873 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6874 /* %ebx + GOT/GOTOFF */
6875 y = NULL;
6876 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6877 {
6878 /* %ebx + %reg * scale + GOT/GOTOFF */
6879 y = XEXP (x, 0);
6880 if (GET_CODE (XEXP (y, 0)) == REG
6881 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6882 y = XEXP (y, 1);
6883 else if (GET_CODE (XEXP (y, 1)) == REG
6884 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6885 y = XEXP (y, 0);
6886 else
6887 return orig_x;
6888 if (GET_CODE (y) != REG
6889 && GET_CODE (y) != MULT
6890 && GET_CODE (y) != ASHIFT)
6891 return orig_x;
6892 }
6893 else
6894 return orig_x;
6895
1865dbb5
JM
6896 x = XEXP (XEXP (x, 1), 0);
6897 if (GET_CODE (x) == UNSPEC
8ee41eaf
RH
6898 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6899 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
ec65b2e3
JJ
6900 {
6901 if (y)
6902 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6903 return XVECEXP (x, 0, 0);
6904 }
1865dbb5
JM
6905
6906 if (GET_CODE (x) == PLUS
6907 && GET_CODE (XEXP (x, 0)) == UNSPEC
6908 && GET_CODE (XEXP (x, 1)) == CONST_INT
8ee41eaf
RH
6909 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6910 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6911 && GET_CODE (orig_x) != MEM)))
ec65b2e3
JJ
6912 {
6913 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6914 if (y)
6915 return gen_rtx_PLUS (Pmode, y, x);
6916 return x;
6917 }
1865dbb5
JM
6918
6919 return orig_x;
6920}
2a2ab3f9 6921\f
a269a03c 6922static void
e075ae69 6923put_condition_code (code, mode, reverse, fp, file)
a269a03c 6924 enum rtx_code code;
e075ae69
RH
6925 enum machine_mode mode;
6926 int reverse, fp;
a269a03c
JC
6927 FILE *file;
6928{
a269a03c
JC
6929 const char *suffix;
6930
9a915772
JH
6931 if (mode == CCFPmode || mode == CCFPUmode)
6932 {
6933 enum rtx_code second_code, bypass_code;
6934 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6935 if (bypass_code != NIL || second_code != NIL)
b531087a 6936 abort ();
9a915772
JH
6937 code = ix86_fp_compare_code_to_integer (code);
6938 mode = CCmode;
6939 }
a269a03c
JC
6940 if (reverse)
6941 code = reverse_condition (code);
e075ae69 6942
a269a03c
JC
6943 switch (code)
6944 {
6945 case EQ:
6946 suffix = "e";
6947 break;
a269a03c
JC
6948 case NE:
6949 suffix = "ne";
6950 break;
a269a03c 6951 case GT:
7e08e190 6952 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
e075ae69
RH
6953 abort ();
6954 suffix = "g";
a269a03c 6955 break;
a269a03c 6956 case GTU:
e075ae69
RH
6957 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6958 Those same assemblers have the same but opposite losage on cmov. */
7e08e190 6959 if (mode != CCmode)
0f290768 6960 abort ();
e075ae69 6961 suffix = fp ? "nbe" : "a";
a269a03c 6962 break;
a269a03c 6963 case LT:
9076b9c1 6964 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 6965 suffix = "s";
7e08e190 6966 else if (mode == CCmode || mode == CCGCmode)
e075ae69 6967 suffix = "l";
9076b9c1 6968 else
0f290768 6969 abort ();
a269a03c 6970 break;
a269a03c 6971 case LTU:
9076b9c1 6972 if (mode != CCmode)
0f290768 6973 abort ();
a269a03c
JC
6974 suffix = "b";
6975 break;
a269a03c 6976 case GE:
9076b9c1 6977 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 6978 suffix = "ns";
7e08e190 6979 else if (mode == CCmode || mode == CCGCmode)
e075ae69 6980 suffix = "ge";
9076b9c1 6981 else
0f290768 6982 abort ();
a269a03c 6983 break;
a269a03c 6984 case GEU:
e075ae69 6985 /* ??? As above. */
7e08e190 6986 if (mode != CCmode)
0f290768 6987 abort ();
7e08e190 6988 suffix = fp ? "nb" : "ae";
a269a03c 6989 break;
a269a03c 6990 case LE:
7e08e190 6991 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
e075ae69
RH
6992 abort ();
6993 suffix = "le";
a269a03c 6994 break;
a269a03c 6995 case LEU:
9076b9c1
JH
6996 if (mode != CCmode)
6997 abort ();
7e08e190 6998 suffix = "be";
a269a03c 6999 break;
3a3677ff 7000 case UNORDERED:
9e7adcb3 7001 suffix = fp ? "u" : "p";
3a3677ff
RH
7002 break;
7003 case ORDERED:
9e7adcb3 7004 suffix = fp ? "nu" : "np";
3a3677ff 7005 break;
a269a03c
JC
7006 default:
7007 abort ();
7008 }
7009 fputs (suffix, file);
7010}
7011
e075ae69
RH
7012void
7013print_reg (x, code, file)
7014 rtx x;
7015 int code;
7016 FILE *file;
e5cb57e8 7017{
e075ae69 7018 if (REGNO (x) == ARG_POINTER_REGNUM
564d80f4 7019 || REGNO (x) == FRAME_POINTER_REGNUM
e075ae69
RH
7020 || REGNO (x) == FLAGS_REG
7021 || REGNO (x) == FPSR_REG)
7022 abort ();
e9a25f70 7023
5bf0ebab 7024 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
e075ae69
RH
7025 putc ('%', file);
7026
ef6257cd 7027 if (code == 'w' || MMX_REG_P (x))
e075ae69
RH
7028 code = 2;
7029 else if (code == 'b')
7030 code = 1;
7031 else if (code == 'k')
7032 code = 4;
3f3f2124
JH
7033 else if (code == 'q')
7034 code = 8;
e075ae69
RH
7035 else if (code == 'y')
7036 code = 3;
7037 else if (code == 'h')
7038 code = 0;
7039 else
7040 code = GET_MODE_SIZE (GET_MODE (x));
e9a25f70 7041
3f3f2124
JH
7042 /* Irritatingly, AMD extended registers use different naming convention
7043 from the normal registers. */
7044 if (REX_INT_REG_P (x))
7045 {
885a70fd
JH
7046 if (!TARGET_64BIT)
7047 abort ();
3f3f2124
JH
7048 switch (code)
7049 {
ef6257cd 7050 case 0:
c725bd79 7051 error ("extended registers have no high halves");
3f3f2124
JH
7052 break;
7053 case 1:
7054 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7055 break;
7056 case 2:
7057 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7058 break;
7059 case 4:
7060 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7061 break;
7062 case 8:
7063 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7064 break;
7065 default:
c725bd79 7066 error ("unsupported operand size for extended register");
3f3f2124
JH
7067 break;
7068 }
7069 return;
7070 }
e075ae69
RH
7071 switch (code)
7072 {
7073 case 3:
7074 if (STACK_TOP_P (x))
7075 {
7076 fputs ("st(0)", file);
7077 break;
7078 }
7079 /* FALLTHRU */
e075ae69 7080 case 8:
3f3f2124 7081 case 4:
e075ae69 7082 case 12:
446988df 7083 if (! ANY_FP_REG_P (x))
885a70fd 7084 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
e075ae69 7085 /* FALLTHRU */
a7180f70 7086 case 16:
e075ae69
RH
7087 case 2:
7088 fputs (hi_reg_name[REGNO (x)], file);
7089 break;
7090 case 1:
7091 fputs (qi_reg_name[REGNO (x)], file);
7092 break;
7093 case 0:
7094 fputs (qi_high_reg_name[REGNO (x)], file);
7095 break;
7096 default:
7097 abort ();
fe25fea3 7098 }
e5cb57e8
SC
7099}
7100
f996902d
RH
7101/* Locate some local-dynamic symbol still in use by this function
7102 so that we can print its name in some tls_local_dynamic_base
7103 pattern. */
7104
7105static const char *
7106get_some_local_dynamic_name ()
7107{
7108 rtx insn;
7109
7110 if (cfun->machine->some_ld_name)
7111 return cfun->machine->some_ld_name;
7112
7113 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7114 if (INSN_P (insn)
7115 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7116 return cfun->machine->some_ld_name;
7117
7118 abort ();
7119}
7120
7121static int
7122get_some_local_dynamic_name_1 (px, data)
7123 rtx *px;
7124 void *data ATTRIBUTE_UNUSED;
7125{
7126 rtx x = *px;
7127
7128 if (GET_CODE (x) == SYMBOL_REF
7129 && local_dynamic_symbolic_operand (x, Pmode))
7130 {
7131 cfun->machine->some_ld_name = XSTR (x, 0);
7132 return 1;
7133 }
7134
7135 return 0;
7136}
7137
2a2ab3f9 7138/* Meaning of CODE:
fe25fea3 7139 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
e5cb57e8 7140 C -- print opcode suffix for set/cmov insn.
fe25fea3 7141 c -- like C, but print reversed condition
ef6257cd 7142 F,f -- likewise, but for floating-point.
048b1c95
JJ
7143 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
7144 nothing
2a2ab3f9
JVA
7145 R -- print the prefix for register names.
7146 z -- print the opcode suffix for the size of the current operand.
7147 * -- print a star (in certain assembler syntax)
fb204271 7148 A -- print an absolute memory reference.
2a2ab3f9 7149 w -- print the operand as if it's a "word" (HImode) even if it isn't.
2d49677f
SC
7150 s -- print a shift double count, followed by the assemblers argument
7151 delimiter.
fe25fea3
SC
7152 b -- print the QImode name of the register for the indicated operand.
7153 %b0 would print %al if operands[0] is reg 0.
7154 w -- likewise, print the HImode name of the register.
7155 k -- likewise, print the SImode name of the register.
3f3f2124 7156 q -- likewise, print the DImode name of the register.
ef6257cd
JH
7157 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7158 y -- print "st(0)" instead of "st" as a register.
a46d1d38 7159 D -- print condition for SSE cmp instruction.
ef6257cd
JH
7160 P -- if PIC, print an @PLT suffix.
7161 X -- don't print any sort of PIC '@' suffix for a symbol.
f996902d 7162 & -- print some in-use local-dynamic symbol name.
a46d1d38 7163 */
2a2ab3f9
JVA
7164
7165void
7166print_operand (file, x, code)
7167 FILE *file;
7168 rtx x;
7169 int code;
7170{
7171 if (code)
7172 {
7173 switch (code)
7174 {
7175 case '*':
80f33d06 7176 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9
JVA
7177 putc ('*', file);
7178 return;
7179
f996902d
RH
7180 case '&':
7181 assemble_name (file, get_some_local_dynamic_name ());
7182 return;
7183
fb204271 7184 case 'A':
80f33d06 7185 if (ASSEMBLER_DIALECT == ASM_ATT)
fb204271 7186 putc ('*', file);
80f33d06 7187 else if (ASSEMBLER_DIALECT == ASM_INTEL)
fb204271
DN
7188 {
7189 /* Intel syntax. For absolute addresses, registers should not
7190 be surrounded by braces. */
7191 if (GET_CODE (x) != REG)
7192 {
7193 putc ('[', file);
7194 PRINT_OPERAND (file, x, 0);
7195 putc (']', file);
7196 return;
7197 }
7198 }
80f33d06
GS
7199 else
7200 abort ();
fb204271
DN
7201
7202 PRINT_OPERAND (file, x, 0);
7203 return;
7204
7205
2a2ab3f9 7206 case 'L':
80f33d06 7207 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7208 putc ('l', file);
2a2ab3f9
JVA
7209 return;
7210
7211 case 'W':
80f33d06 7212 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7213 putc ('w', file);
2a2ab3f9
JVA
7214 return;
7215
7216 case 'B':
80f33d06 7217 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7218 putc ('b', file);
2a2ab3f9
JVA
7219 return;
7220
7221 case 'Q':
80f33d06 7222 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7223 putc ('l', file);
2a2ab3f9
JVA
7224 return;
7225
7226 case 'S':
80f33d06 7227 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7228 putc ('s', file);
2a2ab3f9
JVA
7229 return;
7230
5f1ec3e6 7231 case 'T':
80f33d06 7232 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7233 putc ('t', file);
5f1ec3e6
JVA
7234 return;
7235
2a2ab3f9
JVA
7236 case 'z':
7237 /* 387 opcodes don't get size suffixes if the operands are
0f290768 7238 registers. */
2a2ab3f9
JVA
7239 if (STACK_REG_P (x))
7240 return;
7241
831c4e87
KC
7242 /* Likewise if using Intel opcodes. */
7243 if (ASSEMBLER_DIALECT == ASM_INTEL)
7244 return;
7245
7246 /* This is the size of op from size of operand. */
2a2ab3f9
JVA
7247 switch (GET_MODE_SIZE (GET_MODE (x)))
7248 {
2a2ab3f9 7249 case 2:
155d8a47
JW
7250#ifdef HAVE_GAS_FILDS_FISTS
7251 putc ('s', file);
7252#endif
2a2ab3f9
JVA
7253 return;
7254
7255 case 4:
7256 if (GET_MODE (x) == SFmode)
7257 {
e075ae69 7258 putc ('s', file);
2a2ab3f9
JVA
7259 return;
7260 }
7261 else
e075ae69 7262 putc ('l', file);
2a2ab3f9
JVA
7263 return;
7264
5f1ec3e6 7265 case 12:
2b589241 7266 case 16:
e075ae69
RH
7267 putc ('t', file);
7268 return;
5f1ec3e6 7269
2a2ab3f9
JVA
7270 case 8:
7271 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
56c0e8fa
JVA
7272 {
7273#ifdef GAS_MNEMONICS
e075ae69 7274 putc ('q', file);
56c0e8fa 7275#else
e075ae69
RH
7276 putc ('l', file);
7277 putc ('l', file);
56c0e8fa
JVA
7278#endif
7279 }
e075ae69
RH
7280 else
7281 putc ('l', file);
2a2ab3f9 7282 return;
155d8a47
JW
7283
7284 default:
7285 abort ();
2a2ab3f9 7286 }
4af3895e
JVA
7287
7288 case 'b':
7289 case 'w':
7290 case 'k':
3f3f2124 7291 case 'q':
4af3895e
JVA
7292 case 'h':
7293 case 'y':
5cb6195d 7294 case 'X':
e075ae69 7295 case 'P':
4af3895e
JVA
7296 break;
7297
2d49677f
SC
7298 case 's':
7299 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7300 {
7301 PRINT_OPERAND (file, x, 0);
e075ae69 7302 putc (',', file);
2d49677f 7303 }
a269a03c
JC
7304 return;
7305
a46d1d38
JH
7306 case 'D':
7307 /* Little bit of braindamage here. The SSE compare instructions
7308 does use completely different names for the comparisons that the
7309 fp conditional moves. */
7310 switch (GET_CODE (x))
7311 {
7312 case EQ:
7313 case UNEQ:
7314 fputs ("eq", file);
7315 break;
7316 case LT:
7317 case UNLT:
7318 fputs ("lt", file);
7319 break;
7320 case LE:
7321 case UNLE:
7322 fputs ("le", file);
7323 break;
7324 case UNORDERED:
7325 fputs ("unord", file);
7326 break;
7327 case NE:
7328 case LTGT:
7329 fputs ("neq", file);
7330 break;
7331 case UNGE:
7332 case GE:
7333 fputs ("nlt", file);
7334 break;
7335 case UNGT:
7336 case GT:
7337 fputs ("nle", file);
7338 break;
7339 case ORDERED:
7340 fputs ("ord", file);
7341 break;
7342 default:
7343 abort ();
7344 break;
7345 }
7346 return;
048b1c95
JJ
7347 case 'O':
7348#ifdef CMOV_SUN_AS_SYNTAX
7349 if (ASSEMBLER_DIALECT == ASM_ATT)
7350 {
7351 switch (GET_MODE (x))
7352 {
7353 case HImode: putc ('w', file); break;
7354 case SImode:
7355 case SFmode: putc ('l', file); break;
7356 case DImode:
7357 case DFmode: putc ('q', file); break;
7358 default: abort ();
7359 }
7360 putc ('.', file);
7361 }
7362#endif
7363 return;
1853aadd 7364 case 'C':
e075ae69 7365 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
1853aadd 7366 return;
fe25fea3 7367 case 'F':
048b1c95
JJ
7368#ifdef CMOV_SUN_AS_SYNTAX
7369 if (ASSEMBLER_DIALECT == ASM_ATT)
7370 putc ('.', file);
7371#endif
e075ae69 7372 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
fe25fea3
SC
7373 return;
7374
e9a25f70 7375 /* Like above, but reverse condition */
e075ae69 7376 case 'c':
fce5a9f2 7377 /* Check to see if argument to %c is really a constant
c1d5afc4
CR
7378 and not a condition code which needs to be reversed. */
7379 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7380 {
7381 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7382 return;
7383 }
e075ae69
RH
7384 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7385 return;
fe25fea3 7386 case 'f':
048b1c95
JJ
7387#ifdef CMOV_SUN_AS_SYNTAX
7388 if (ASSEMBLER_DIALECT == ASM_ATT)
7389 putc ('.', file);
7390#endif
e075ae69 7391 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
1853aadd 7392 return;
ef6257cd
JH
7393 case '+':
7394 {
7395 rtx x;
e5cb57e8 7396
ef6257cd
JH
7397 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7398 return;
a4f31c00 7399
ef6257cd
JH
7400 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7401 if (x)
7402 {
7403 int pred_val = INTVAL (XEXP (x, 0));
7404
7405 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7406 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7407 {
7408 int taken = pred_val > REG_BR_PROB_BASE / 2;
7409 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7410
7411 /* Emit hints only in the case default branch prediction
d1f87653 7412 heuristics would fail. */
ef6257cd
JH
7413 if (taken != cputaken)
7414 {
7415 /* We use 3e (DS) prefix for taken branches and
7416 2e (CS) prefix for not taken branches. */
7417 if (taken)
7418 fputs ("ds ; ", file);
7419 else
7420 fputs ("cs ; ", file);
7421 }
7422 }
7423 }
7424 return;
7425 }
4af3895e 7426 default:
a52453cc 7427 output_operand_lossage ("invalid operand code `%c'", code);
2a2ab3f9
JVA
7428 }
7429 }
e9a25f70 7430
2a2ab3f9
JVA
7431 if (GET_CODE (x) == REG)
7432 {
7433 PRINT_REG (x, code, file);
7434 }
e9a25f70 7435
2a2ab3f9
JVA
7436 else if (GET_CODE (x) == MEM)
7437 {
e075ae69 7438 /* No `byte ptr' prefix for call instructions. */
80f33d06 7439 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
2a2ab3f9 7440 {
69ddee61 7441 const char * size;
e075ae69
RH
7442 switch (GET_MODE_SIZE (GET_MODE (x)))
7443 {
7444 case 1: size = "BYTE"; break;
7445 case 2: size = "WORD"; break;
7446 case 4: size = "DWORD"; break;
7447 case 8: size = "QWORD"; break;
7448 case 12: size = "XWORD"; break;
a7180f70 7449 case 16: size = "XMMWORD"; break;
e075ae69 7450 default:
564d80f4 7451 abort ();
e075ae69 7452 }
fb204271
DN
7453
7454 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7455 if (code == 'b')
7456 size = "BYTE";
7457 else if (code == 'w')
7458 size = "WORD";
7459 else if (code == 'k')
7460 size = "DWORD";
7461
e075ae69
RH
7462 fputs (size, file);
7463 fputs (" PTR ", file);
2a2ab3f9 7464 }
e075ae69
RH
7465
7466 x = XEXP (x, 0);
7467 if (flag_pic && CONSTANT_ADDRESS_P (x))
7468 output_pic_addr_const (file, x, code);
0d7d98ee 7469 /* Avoid (%rip) for call operands. */
5bf0ebab 7470 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
0d7d98ee
JH
7471 && GET_CODE (x) != CONST_INT)
7472 output_addr_const (file, x);
c8b94768
RH
7473 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7474 output_operand_lossage ("invalid constraints for operand");
2a2ab3f9 7475 else
e075ae69 7476 output_address (x);
2a2ab3f9 7477 }
e9a25f70 7478
2a2ab3f9
JVA
7479 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7480 {
e9a25f70
JL
7481 REAL_VALUE_TYPE r;
7482 long l;
7483
5f1ec3e6
JVA
7484 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7485 REAL_VALUE_TO_TARGET_SINGLE (r, l);
e075ae69 7486
80f33d06 7487 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7488 putc ('$', file);
52267fcb 7489 fprintf (file, "0x%lx", l);
5f1ec3e6 7490 }
e9a25f70 7491
0f290768 7492 /* These float cases don't actually occur as immediate operands. */
5f1ec3e6
JVA
7493 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7494 {
e9a25f70
JL
7495 char dstr[30];
7496
da6eec72 7497 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
5f1ec3e6 7498 fprintf (file, "%s", dstr);
2a2ab3f9 7499 }
e9a25f70 7500
2b589241
JH
7501 else if (GET_CODE (x) == CONST_DOUBLE
7502 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
2a2ab3f9 7503 {
e9a25f70
JL
7504 char dstr[30];
7505
da6eec72 7506 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
5f1ec3e6 7507 fprintf (file, "%s", dstr);
2a2ab3f9 7508 }
f996902d 7509
79325812 7510 else
2a2ab3f9 7511 {
4af3895e 7512 if (code != 'P')
2a2ab3f9 7513 {
695dac07 7514 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
e075ae69 7515 {
80f33d06 7516 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
7517 putc ('$', file);
7518 }
2a2ab3f9
JVA
7519 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7520 || GET_CODE (x) == LABEL_REF)
e075ae69 7521 {
80f33d06 7522 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
7523 putc ('$', file);
7524 else
7525 fputs ("OFFSET FLAT:", file);
7526 }
2a2ab3f9 7527 }
e075ae69
RH
7528 if (GET_CODE (x) == CONST_INT)
7529 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7530 else if (flag_pic)
2a2ab3f9
JVA
7531 output_pic_addr_const (file, x, code);
7532 else
7533 output_addr_const (file, x);
7534 }
7535}
7536\f
7537/* Print a memory operand whose address is ADDR. */
7538
7539void
7540print_operand_address (file, addr)
7541 FILE *file;
7542 register rtx addr;
7543{
e075ae69
RH
7544 struct ix86_address parts;
7545 rtx base, index, disp;
7546 int scale;
e9a25f70 7547
9e20be0c
JJ
7548 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
7549 {
7550 if (ASSEMBLER_DIALECT == ASM_INTEL)
7551 fputs ("DWORD PTR ", file);
7552 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7553 putc ('%', file);
75d38379
JJ
7554 if (TARGET_64BIT)
7555 fputs ("fs:0", file);
7556 else
7557 fputs ("gs:0", file);
9e20be0c
JJ
7558 return;
7559 }
7560
e075ae69
RH
7561 if (! ix86_decompose_address (addr, &parts))
7562 abort ();
e9a25f70 7563
e075ae69
RH
7564 base = parts.base;
7565 index = parts.index;
7566 disp = parts.disp;
7567 scale = parts.scale;
e9a25f70 7568
e075ae69
RH
7569 if (!base && !index)
7570 {
7571 /* Displacement only requires special attention. */
e9a25f70 7572
e075ae69 7573 if (GET_CODE (disp) == CONST_INT)
2a2ab3f9 7574 {
80f33d06 7575 if (ASSEMBLER_DIALECT == ASM_INTEL)
fb204271
DN
7576 {
7577 if (USER_LABEL_PREFIX[0] == 0)
7578 putc ('%', file);
7579 fputs ("ds:", file);
7580 }
e075ae69 7581 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
2a2ab3f9 7582 }
e075ae69
RH
7583 else if (flag_pic)
7584 output_pic_addr_const (file, addr, 0);
7585 else
7586 output_addr_const (file, addr);
0d7d98ee
JH
7587
7588 /* Use one byte shorter RIP relative addressing for 64bit mode. */
edfe8595 7589 if (TARGET_64BIT
75d38379
JJ
7590 && ((GET_CODE (addr) == SYMBOL_REF
7591 && ! tls_symbolic_operand (addr, GET_MODE (addr)))
edfe8595
RH
7592 || GET_CODE (addr) == LABEL_REF
7593 || (GET_CODE (addr) == CONST
7594 && GET_CODE (XEXP (addr, 0)) == PLUS
200bcf7e
JH
7595 && (GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
7596 || GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF)
edfe8595 7597 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
0d7d98ee 7598 fputs ("(%rip)", file);
e075ae69
RH
7599 }
7600 else
7601 {
80f33d06 7602 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9 7603 {
e075ae69 7604 if (disp)
2a2ab3f9 7605 {
c399861d 7606 if (flag_pic)
e075ae69
RH
7607 output_pic_addr_const (file, disp, 0);
7608 else if (GET_CODE (disp) == LABEL_REF)
7609 output_asm_label (disp);
2a2ab3f9 7610 else
e075ae69 7611 output_addr_const (file, disp);
2a2ab3f9
JVA
7612 }
7613
e075ae69
RH
7614 putc ('(', file);
7615 if (base)
7616 PRINT_REG (base, 0, file);
7617 if (index)
2a2ab3f9 7618 {
e075ae69
RH
7619 putc (',', file);
7620 PRINT_REG (index, 0, file);
7621 if (scale != 1)
7622 fprintf (file, ",%d", scale);
2a2ab3f9 7623 }
e075ae69 7624 putc (')', file);
2a2ab3f9 7625 }
2a2ab3f9
JVA
7626 else
7627 {
e075ae69 7628 rtx offset = NULL_RTX;
e9a25f70 7629
e075ae69
RH
7630 if (disp)
7631 {
7632 /* Pull out the offset of a symbol; print any symbol itself. */
7633 if (GET_CODE (disp) == CONST
7634 && GET_CODE (XEXP (disp, 0)) == PLUS
7635 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7636 {
7637 offset = XEXP (XEXP (disp, 0), 1);
7638 disp = gen_rtx_CONST (VOIDmode,
7639 XEXP (XEXP (disp, 0), 0));
7640 }
ce193852 7641
e075ae69
RH
7642 if (flag_pic)
7643 output_pic_addr_const (file, disp, 0);
7644 else if (GET_CODE (disp) == LABEL_REF)
7645 output_asm_label (disp);
7646 else if (GET_CODE (disp) == CONST_INT)
7647 offset = disp;
7648 else
7649 output_addr_const (file, disp);
7650 }
e9a25f70 7651
e075ae69
RH
7652 putc ('[', file);
7653 if (base)
a8620236 7654 {
e075ae69
RH
7655 PRINT_REG (base, 0, file);
7656 if (offset)
7657 {
7658 if (INTVAL (offset) >= 0)
7659 putc ('+', file);
7660 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7661 }
a8620236 7662 }
e075ae69
RH
7663 else if (offset)
7664 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
2247a58c 7665 else
e075ae69 7666 putc ('0', file);
e9a25f70 7667
e075ae69
RH
7668 if (index)
7669 {
7670 putc ('+', file);
7671 PRINT_REG (index, 0, file);
7672 if (scale != 1)
7673 fprintf (file, "*%d", scale);
7674 }
7675 putc (']', file);
7676 }
2a2ab3f9
JVA
7677 }
7678}
f996902d
RH
7679
7680bool
7681output_addr_const_extra (file, x)
7682 FILE *file;
7683 rtx x;
7684{
7685 rtx op;
7686
7687 if (GET_CODE (x) != UNSPEC)
7688 return false;
7689
7690 op = XVECEXP (x, 0, 0);
7691 switch (XINT (x, 1))
7692 {
7693 case UNSPEC_GOTTPOFF:
7694 output_addr_const (file, op);
dea73790 7695 /* FIXME: This might be @TPOFF in Sun ld. */
f996902d
RH
7696 fputs ("@GOTTPOFF", file);
7697 break;
7698 case UNSPEC_TPOFF:
7699 output_addr_const (file, op);
7700 fputs ("@TPOFF", file);
7701 break;
7702 case UNSPEC_NTPOFF:
7703 output_addr_const (file, op);
75d38379
JJ
7704 if (TARGET_64BIT)
7705 fputs ("@TPOFF", file);
7706 else
7707 fputs ("@NTPOFF", file);
f996902d
RH
7708 break;
7709 case UNSPEC_DTPOFF:
7710 output_addr_const (file, op);
7711 fputs ("@DTPOFF", file);
7712 break;
dea73790
JJ
7713 case UNSPEC_GOTNTPOFF:
7714 output_addr_const (file, op);
75d38379
JJ
7715 if (TARGET_64BIT)
7716 fputs ("@GOTTPOFF(%rip)", file);
7717 else
7718 fputs ("@GOTNTPOFF", file);
dea73790
JJ
7719 break;
7720 case UNSPEC_INDNTPOFF:
7721 output_addr_const (file, op);
7722 fputs ("@INDNTPOFF", file);
7723 break;
f996902d
RH
7724
7725 default:
7726 return false;
7727 }
7728
7729 return true;
7730}
2a2ab3f9
JVA
7731\f
7732/* Split one or more DImode RTL references into pairs of SImode
7733 references. The RTL can be REG, offsettable MEM, integer constant, or
7734 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7735 split and "num" is its length. lo_half and hi_half are output arrays
0f290768 7736 that parallel "operands". */
2a2ab3f9
JVA
7737
7738void
7739split_di (operands, num, lo_half, hi_half)
7740 rtx operands[];
7741 int num;
7742 rtx lo_half[], hi_half[];
7743{
7744 while (num--)
7745 {
57dbca5e 7746 rtx op = operands[num];
b932f770
JH
7747
7748 /* simplify_subreg refuse to split volatile memory addresses,
7749 but we still have to handle it. */
7750 if (GET_CODE (op) == MEM)
2a2ab3f9 7751 {
f4ef873c 7752 lo_half[num] = adjust_address (op, SImode, 0);
b72f00af 7753 hi_half[num] = adjust_address (op, SImode, 4);
2a2ab3f9
JVA
7754 }
7755 else
b932f770 7756 {
38ca929b
JH
7757 lo_half[num] = simplify_gen_subreg (SImode, op,
7758 GET_MODE (op) == VOIDmode
7759 ? DImode : GET_MODE (op), 0);
7760 hi_half[num] = simplify_gen_subreg (SImode, op,
7761 GET_MODE (op) == VOIDmode
7762 ? DImode : GET_MODE (op), 4);
b932f770 7763 }
2a2ab3f9
JVA
7764 }
7765}
44cf5b6a
JH
7766/* Split one or more TImode RTL references into pairs of SImode
7767 references. The RTL can be REG, offsettable MEM, integer constant, or
7768 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7769 split and "num" is its length. lo_half and hi_half are output arrays
7770 that parallel "operands". */
7771
7772void
7773split_ti (operands, num, lo_half, hi_half)
7774 rtx operands[];
7775 int num;
7776 rtx lo_half[], hi_half[];
7777{
7778 while (num--)
7779 {
7780 rtx op = operands[num];
b932f770
JH
7781
7782 /* simplify_subreg refuse to split volatile memory addresses, but we
7783 still have to handle it. */
7784 if (GET_CODE (op) == MEM)
44cf5b6a
JH
7785 {
7786 lo_half[num] = adjust_address (op, DImode, 0);
7787 hi_half[num] = adjust_address (op, DImode, 8);
7788 }
7789 else
b932f770
JH
7790 {
7791 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7792 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7793 }
44cf5b6a
JH
7794 }
7795}
2a2ab3f9 7796\f
2a2ab3f9
JVA
7797/* Output code to perform a 387 binary operation in INSN, one of PLUS,
7798 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7799 is the expression of the binary operation. The output may either be
7800 emitted here, or returned to the caller, like all output_* functions.
7801
7802 There is no guarantee that the operands are the same mode, as they
0f290768 7803 might be within FLOAT or FLOAT_EXTEND expressions. */
2a2ab3f9 7804
e3c2afab
AM
7805#ifndef SYSV386_COMPAT
7806/* Set to 1 for compatibility with brain-damaged assemblers. No-one
7807 wants to fix the assemblers because that causes incompatibility
7808 with gcc. No-one wants to fix gcc because that causes
7809 incompatibility with assemblers... You can use the option of
7810 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7811#define SYSV386_COMPAT 1
7812#endif
7813
69ddee61 7814const char *
2a2ab3f9
JVA
7815output_387_binary_op (insn, operands)
7816 rtx insn;
7817 rtx *operands;
7818{
e3c2afab 7819 static char buf[30];
69ddee61 7820 const char *p;
1deaa899
JH
7821 const char *ssep;
7822 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
2a2ab3f9 7823
e3c2afab
AM
7824#ifdef ENABLE_CHECKING
7825 /* Even if we do not want to check the inputs, this documents input
7826 constraints. Which helps in understanding the following code. */
7827 if (STACK_REG_P (operands[0])
7828 && ((REG_P (operands[1])
7829 && REGNO (operands[0]) == REGNO (operands[1])
7830 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7831 || (REG_P (operands[2])
7832 && REGNO (operands[0]) == REGNO (operands[2])
7833 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7834 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7835 ; /* ok */
1deaa899 7836 else if (!is_sse)
e3c2afab
AM
7837 abort ();
7838#endif
7839
2a2ab3f9
JVA
7840 switch (GET_CODE (operands[3]))
7841 {
7842 case PLUS:
e075ae69
RH
7843 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7844 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7845 p = "fiadd";
7846 else
7847 p = "fadd";
1deaa899 7848 ssep = "add";
2a2ab3f9
JVA
7849 break;
7850
7851 case MINUS:
e075ae69
RH
7852 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7853 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7854 p = "fisub";
7855 else
7856 p = "fsub";
1deaa899 7857 ssep = "sub";
2a2ab3f9
JVA
7858 break;
7859
7860 case MULT:
e075ae69
RH
7861 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7862 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7863 p = "fimul";
7864 else
7865 p = "fmul";
1deaa899 7866 ssep = "mul";
2a2ab3f9
JVA
7867 break;
7868
7869 case DIV:
e075ae69
RH
7870 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7871 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7872 p = "fidiv";
7873 else
7874 p = "fdiv";
1deaa899 7875 ssep = "div";
2a2ab3f9
JVA
7876 break;
7877
7878 default:
7879 abort ();
7880 }
7881
1deaa899
JH
7882 if (is_sse)
7883 {
7884 strcpy (buf, ssep);
7885 if (GET_MODE (operands[0]) == SFmode)
7886 strcat (buf, "ss\t{%2, %0|%0, %2}");
7887 else
7888 strcat (buf, "sd\t{%2, %0|%0, %2}");
7889 return buf;
7890 }
e075ae69 7891 strcpy (buf, p);
2a2ab3f9
JVA
7892
7893 switch (GET_CODE (operands[3]))
7894 {
7895 case MULT:
7896 case PLUS:
7897 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7898 {
e3c2afab 7899 rtx temp = operands[2];
2a2ab3f9
JVA
7900 operands[2] = operands[1];
7901 operands[1] = temp;
7902 }
7903
e3c2afab
AM
7904 /* know operands[0] == operands[1]. */
7905
2a2ab3f9 7906 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
7907 {
7908 p = "%z2\t%2";
7909 break;
7910 }
2a2ab3f9
JVA
7911
7912 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63
JL
7913 {
7914 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
7915 /* How is it that we are storing to a dead operand[2]?
7916 Well, presumably operands[1] is dead too. We can't
7917 store the result to st(0) as st(0) gets popped on this
7918 instruction. Instead store to operands[2] (which I
7919 think has to be st(1)). st(1) will be popped later.
7920 gcc <= 2.8.1 didn't have this check and generated
7921 assembly code that the Unixware assembler rejected. */
7922 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 7923 else
e3c2afab 7924 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
e075ae69 7925 break;
6b28fd63 7926 }
2a2ab3f9
JVA
7927
7928 if (STACK_TOP_P (operands[0]))
e3c2afab 7929 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 7930 else
e3c2afab 7931 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
e075ae69 7932 break;
2a2ab3f9
JVA
7933
7934 case MINUS:
7935 case DIV:
7936 if (GET_CODE (operands[1]) == MEM)
e075ae69
RH
7937 {
7938 p = "r%z1\t%1";
7939 break;
7940 }
2a2ab3f9
JVA
7941
7942 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
7943 {
7944 p = "%z2\t%2";
7945 break;
7946 }
2a2ab3f9 7947
2a2ab3f9 7948 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63 7949 {
e3c2afab
AM
7950#if SYSV386_COMPAT
7951 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7952 derived assemblers, confusingly reverse the direction of
7953 the operation for fsub{r} and fdiv{r} when the
7954 destination register is not st(0). The Intel assembler
7955 doesn't have this brain damage. Read !SYSV386_COMPAT to
7956 figure out what the hardware really does. */
7957 if (STACK_TOP_P (operands[0]))
7958 p = "{p\t%0, %2|rp\t%2, %0}";
7959 else
7960 p = "{rp\t%2, %0|p\t%0, %2}";
7961#else
6b28fd63 7962 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
7963 /* As above for fmul/fadd, we can't store to st(0). */
7964 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 7965 else
e3c2afab
AM
7966 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7967#endif
e075ae69 7968 break;
6b28fd63 7969 }
2a2ab3f9
JVA
7970
7971 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6b28fd63 7972 {
e3c2afab 7973#if SYSV386_COMPAT
6b28fd63 7974 if (STACK_TOP_P (operands[0]))
e3c2afab 7975 p = "{rp\t%0, %1|p\t%1, %0}";
6b28fd63 7976 else
e3c2afab
AM
7977 p = "{p\t%1, %0|rp\t%0, %1}";
7978#else
7979 if (STACK_TOP_P (operands[0]))
7980 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7981 else
7982 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7983#endif
e075ae69 7984 break;
6b28fd63 7985 }
2a2ab3f9
JVA
7986
7987 if (STACK_TOP_P (operands[0]))
7988 {
7989 if (STACK_TOP_P (operands[1]))
e3c2afab 7990 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 7991 else
e3c2afab 7992 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
e075ae69 7993 break;
2a2ab3f9
JVA
7994 }
7995 else if (STACK_TOP_P (operands[1]))
e3c2afab
AM
7996 {
7997#if SYSV386_COMPAT
7998 p = "{\t%1, %0|r\t%0, %1}";
7999#else
8000 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8001#endif
8002 }
2a2ab3f9 8003 else
e3c2afab
AM
8004 {
8005#if SYSV386_COMPAT
8006 p = "{r\t%2, %0|\t%0, %2}";
8007#else
8008 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8009#endif
8010 }
e075ae69 8011 break;
2a2ab3f9
JVA
8012
8013 default:
8014 abort ();
8015 }
e075ae69
RH
8016
8017 strcat (buf, p);
8018 return buf;
2a2ab3f9 8019}
e075ae69 8020
a4f31c00 8021/* Output code to initialize control word copies used by
7a2e09f4
JH
8022 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
8023 is set to control word rounding downwards. */
8024void
8025emit_i387_cw_initialization (normal, round_down)
8026 rtx normal, round_down;
8027{
8028 rtx reg = gen_reg_rtx (HImode);
8029
8030 emit_insn (gen_x86_fnstcw_1 (normal));
8031 emit_move_insn (reg, normal);
8032 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
8033 && !TARGET_64BIT)
8034 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8035 else
8036 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
8037 emit_move_insn (round_down, reg);
8038}
8039
2a2ab3f9 8040/* Output code for INSN to convert a float to a signed int. OPERANDS
46d21d2c 8041 are the insn operands. The output may be [HSD]Imode and the input
e075ae69 8042 operand may be [SDX]Fmode. */
2a2ab3f9 8043
69ddee61 8044const char *
2a2ab3f9
JVA
8045output_fix_trunc (insn, operands)
8046 rtx insn;
8047 rtx *operands;
8048{
8049 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
e075ae69 8050 int dimode_p = GET_MODE (operands[0]) == DImode;
2a2ab3f9 8051
e075ae69
RH
8052 /* Jump through a hoop or two for DImode, since the hardware has no
8053 non-popping instruction. We used to do this a different way, but
8054 that was somewhat fragile and broke with post-reload splitters. */
a05924f9
JH
8055 if (dimode_p && !stack_top_dies)
8056 output_asm_insn ("fld\t%y1", operands);
e075ae69 8057
7a2e09f4 8058 if (!STACK_TOP_P (operands[1]))
10195bd8
JW
8059 abort ();
8060
e075ae69 8061 if (GET_CODE (operands[0]) != MEM)
7a2e09f4 8062 abort ();
e9a25f70 8063
7a2e09f4 8064 output_asm_insn ("fldcw\t%3", operands);
e075ae69 8065 if (stack_top_dies || dimode_p)
7a2e09f4 8066 output_asm_insn ("fistp%z0\t%0", operands);
10195bd8 8067 else
7a2e09f4 8068 output_asm_insn ("fist%z0\t%0", operands);
e075ae69 8069 output_asm_insn ("fldcw\t%2", operands);
10195bd8 8070
e075ae69 8071 return "";
2a2ab3f9 8072}
cda749b1 8073
e075ae69
RH
8074/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8075 should be used and 2 when fnstsw should be used. UNORDERED_P is true
8076 when fucom should be used. */
8077
69ddee61 8078const char *
e075ae69 8079output_fp_compare (insn, operands, eflags_p, unordered_p)
cda749b1
JW
8080 rtx insn;
8081 rtx *operands;
e075ae69 8082 int eflags_p, unordered_p;
cda749b1 8083{
e075ae69
RH
8084 int stack_top_dies;
8085 rtx cmp_op0 = operands[0];
8086 rtx cmp_op1 = operands[1];
0644b628 8087 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
e075ae69
RH
8088
8089 if (eflags_p == 2)
8090 {
8091 cmp_op0 = cmp_op1;
8092 cmp_op1 = operands[2];
8093 }
0644b628
JH
8094 if (is_sse)
8095 {
8096 if (GET_MODE (operands[0]) == SFmode)
8097 if (unordered_p)
8098 return "ucomiss\t{%1, %0|%0, %1}";
8099 else
a5cf80f0 8100 return "comiss\t{%1, %0|%0, %1}";
0644b628
JH
8101 else
8102 if (unordered_p)
8103 return "ucomisd\t{%1, %0|%0, %1}";
8104 else
a5cf80f0 8105 return "comisd\t{%1, %0|%0, %1}";
0644b628 8106 }
cda749b1 8107
e075ae69 8108 if (! STACK_TOP_P (cmp_op0))
cda749b1
JW
8109 abort ();
8110
e075ae69 8111 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
cda749b1 8112
e075ae69
RH
8113 if (STACK_REG_P (cmp_op1)
8114 && stack_top_dies
8115 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8116 && REGNO (cmp_op1) != FIRST_STACK_REG)
cda749b1 8117 {
e075ae69
RH
8118 /* If both the top of the 387 stack dies, and the other operand
8119 is also a stack register that dies, then this must be a
8120 `fcompp' float compare */
8121
8122 if (eflags_p == 1)
8123 {
8124 /* There is no double popping fcomi variant. Fortunately,
8125 eflags is immune from the fstp's cc clobbering. */
8126 if (unordered_p)
8127 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8128 else
8129 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8130 return "fstp\t%y0";
8131 }
8132 else
cda749b1 8133 {
e075ae69
RH
8134 if (eflags_p == 2)
8135 {
8136 if (unordered_p)
8137 return "fucompp\n\tfnstsw\t%0";
8138 else
8139 return "fcompp\n\tfnstsw\t%0";
8140 }
cda749b1
JW
8141 else
8142 {
e075ae69
RH
8143 if (unordered_p)
8144 return "fucompp";
8145 else
8146 return "fcompp";
cda749b1
JW
8147 }
8148 }
cda749b1
JW
8149 }
8150 else
8151 {
e075ae69 8152 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
cda749b1 8153
0f290768 8154 static const char * const alt[24] =
e075ae69
RH
8155 {
8156 "fcom%z1\t%y1",
8157 "fcomp%z1\t%y1",
8158 "fucom%z1\t%y1",
8159 "fucomp%z1\t%y1",
0f290768 8160
e075ae69
RH
8161 "ficom%z1\t%y1",
8162 "ficomp%z1\t%y1",
8163 NULL,
8164 NULL,
8165
8166 "fcomi\t{%y1, %0|%0, %y1}",
8167 "fcomip\t{%y1, %0|%0, %y1}",
8168 "fucomi\t{%y1, %0|%0, %y1}",
8169 "fucomip\t{%y1, %0|%0, %y1}",
8170
8171 NULL,
8172 NULL,
8173 NULL,
8174 NULL,
8175
8176 "fcom%z2\t%y2\n\tfnstsw\t%0",
8177 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8178 "fucom%z2\t%y2\n\tfnstsw\t%0",
8179 "fucomp%z2\t%y2\n\tfnstsw\t%0",
0f290768 8180
e075ae69
RH
8181 "ficom%z2\t%y2\n\tfnstsw\t%0",
8182 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8183 NULL,
8184 NULL
8185 };
8186
8187 int mask;
69ddee61 8188 const char *ret;
e075ae69
RH
8189
8190 mask = eflags_p << 3;
8191 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
8192 mask |= unordered_p << 1;
8193 mask |= stack_top_dies;
8194
8195 if (mask >= 24)
8196 abort ();
8197 ret = alt[mask];
8198 if (ret == NULL)
8199 abort ();
cda749b1 8200
e075ae69 8201 return ret;
cda749b1
JW
8202 }
8203}
2a2ab3f9 8204
f88c65f7
RH
8205void
8206ix86_output_addr_vec_elt (file, value)
8207 FILE *file;
8208 int value;
8209{
8210 const char *directive = ASM_LONG;
8211
8212 if (TARGET_64BIT)
8213 {
8214#ifdef ASM_QUAD
8215 directive = ASM_QUAD;
8216#else
8217 abort ();
8218#endif
8219 }
8220
8221 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8222}
8223
8224void
8225ix86_output_addr_diff_elt (file, value, rel)
8226 FILE *file;
8227 int value, rel;
8228{
8229 if (TARGET_64BIT)
74411039 8230 fprintf (file, "%s%s%d-%s%d\n",
f88c65f7
RH
8231 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8232 else if (HAVE_AS_GOTOFF_IN_DATA)
8233 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
b069de3b
SS
8234#if TARGET_MACHO
8235 else if (TARGET_MACHO)
8236 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
8237 machopic_function_base_name () + 1);
8238#endif
f88c65f7 8239 else
5fc0e5df
KW
8240 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8241 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
f88c65f7 8242}
32b5b1aa 8243\f
a8bac9ab
RH
8244/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8245 for the target. */
8246
8247void
8248ix86_expand_clear (dest)
8249 rtx dest;
8250{
8251 rtx tmp;
8252
8253 /* We play register width games, which are only valid after reload. */
8254 if (!reload_completed)
8255 abort ();
8256
8257 /* Avoid HImode and its attendant prefix byte. */
8258 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8259 dest = gen_rtx_REG (SImode, REGNO (dest));
8260
8261 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8262
8263 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8264 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8265 {
8266 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8267 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8268 }
8269
8270 emit_insn (tmp);
8271}
8272
f996902d
RH
8273/* X is an unchanging MEM. If it is a constant pool reference, return
8274 the constant pool rtx, else NULL. */
8275
8276static rtx
8277maybe_get_pool_constant (x)
8278 rtx x;
8279{
69bd9368 8280 x = ix86_delegitimize_address (XEXP (x, 0));
f996902d
RH
8281
8282 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8283 return get_pool_constant (x);
8284
8285 return NULL_RTX;
8286}
8287
79325812 8288void
e075ae69
RH
8289ix86_expand_move (mode, operands)
8290 enum machine_mode mode;
8291 rtx operands[];
32b5b1aa 8292{
e075ae69 8293 int strict = (reload_in_progress || reload_completed);
f996902d
RH
8294 rtx insn, op0, op1, tmp;
8295
8296 op0 = operands[0];
8297 op1 = operands[1];
8298
f996902d
RH
8299 if (tls_symbolic_operand (op1, Pmode))
8300 {
8301 op1 = legitimize_address (op1, op1, VOIDmode);
8302 if (GET_CODE (op0) == MEM)
8303 {
8304 tmp = gen_reg_rtx (mode);
8305 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
8306 op1 = tmp;
8307 }
8308 }
8309 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8310 {
b069de3b
SS
8311#if TARGET_MACHO
8312 if (MACHOPIC_PURE)
8313 {
8314 rtx temp = ((reload_in_progress
8315 || ((op0 && GET_CODE (op0) == REG)
8316 && mode == Pmode))
8317 ? op0 : gen_reg_rtx (Pmode));
8318 op1 = machopic_indirect_data_reference (op1, temp);
8319 op1 = machopic_legitimize_pic_address (op1, mode,
8320 temp == op1 ? 0 : temp);
8321 }
8322 else
8323 {
8324 if (MACHOPIC_INDIRECT)
8325 op1 = machopic_indirect_data_reference (op1, 0);
8326 }
8327 if (op0 != op1)
8328 {
8329 insn = gen_rtx_SET (VOIDmode, op0, op1);
8330 emit_insn (insn);
8331 }
8332 return;
8333#endif /* TARGET_MACHO */
f996902d
RH
8334 if (GET_CODE (op0) == MEM)
8335 op1 = force_reg (Pmode, op1);
e075ae69 8336 else
32b5b1aa 8337 {
f996902d 8338 rtx temp = op0;
e075ae69
RH
8339 if (GET_CODE (temp) != REG)
8340 temp = gen_reg_rtx (Pmode);
f996902d
RH
8341 temp = legitimize_pic_address (op1, temp);
8342 if (temp == op0)
e075ae69 8343 return;
f996902d 8344 op1 = temp;
32b5b1aa 8345 }
e075ae69
RH
8346 }
8347 else
8348 {
f996902d 8349 if (GET_CODE (op0) == MEM
44cf5b6a 8350 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
f996902d
RH
8351 || !push_operand (op0, mode))
8352 && GET_CODE (op1) == MEM)
8353 op1 = force_reg (mode, op1);
e9a25f70 8354
f996902d
RH
8355 if (push_operand (op0, mode)
8356 && ! general_no_elim_operand (op1, mode))
8357 op1 = copy_to_mode_reg (mode, op1);
2c5a510c 8358
44cf5b6a
JH
8359 /* Force large constants in 64bit compilation into register
8360 to get them CSEed. */
8361 if (TARGET_64BIT && mode == DImode
f996902d
RH
8362 && immediate_operand (op1, mode)
8363 && !x86_64_zero_extended_value (op1)
8364 && !register_operand (op0, mode)
44cf5b6a 8365 && optimize && !reload_completed && !reload_in_progress)
f996902d 8366 op1 = copy_to_mode_reg (mode, op1);
44cf5b6a 8367
e075ae69 8368 if (FLOAT_MODE_P (mode))
32b5b1aa 8369 {
d7a29404
JH
8370 /* If we are loading a floating point constant to a register,
8371 force the value to memory now, since we'll get better code
8372 out the back end. */
e075ae69
RH
8373
8374 if (strict)
8375 ;
f996902d
RH
8376 else if (GET_CODE (op1) == CONST_DOUBLE
8377 && register_operand (op0, mode))
8378 op1 = validize_mem (force_const_mem (mode, op1));
32b5b1aa 8379 }
32b5b1aa 8380 }
e9a25f70 8381
f996902d 8382 insn = gen_rtx_SET (VOIDmode, op0, op1);
e9a25f70 8383
e075ae69
RH
8384 emit_insn (insn);
8385}
e9a25f70 8386
e37af218
RH
8387void
8388ix86_expand_vector_move (mode, operands)
8389 enum machine_mode mode;
8390 rtx operands[];
8391{
8392 /* Force constants other than zero into memory. We do not know how
8393 the instructions used to build constants modify the upper 64 bits
8394 of the register, once we have that information we may be able
8395 to handle some of them more efficiently. */
8396 if ((reload_in_progress | reload_completed) == 0
8397 && register_operand (operands[0], mode)
fdc4b40b 8398 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
2b28d405 8399 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
e37af218
RH
8400
8401 /* Make operand1 a register if it isn't already. */
f8ca7923 8402 if (!no_new_pseudos
e37af218 8403 && !register_operand (operands[0], mode)
b105d6da 8404 && !register_operand (operands[1], mode))
e37af218 8405 {
59bef189 8406 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
e37af218
RH
8407 emit_move_insn (operands[0], temp);
8408 return;
8409 }
8410
8411 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
fce5a9f2 8412}
e37af218 8413
e075ae69
RH
8414/* Attempt to expand a binary operator. Make the expansion closer to the
8415 actual machine, then just general_operand, which will allow 3 separate
9d81fc27 8416 memory references (one output, two input) in a single insn. */
e9a25f70 8417
e075ae69
RH
8418void
8419ix86_expand_binary_operator (code, mode, operands)
8420 enum rtx_code code;
8421 enum machine_mode mode;
8422 rtx operands[];
8423{
8424 int matching_memory;
8425 rtx src1, src2, dst, op, clob;
8426
8427 dst = operands[0];
8428 src1 = operands[1];
8429 src2 = operands[2];
8430
8431 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8432 if (GET_RTX_CLASS (code) == 'c'
8433 && (rtx_equal_p (dst, src2)
8434 || immediate_operand (src1, mode)))
8435 {
8436 rtx temp = src1;
8437 src1 = src2;
8438 src2 = temp;
32b5b1aa 8439 }
e9a25f70 8440
e075ae69
RH
8441 /* If the destination is memory, and we do not have matching source
8442 operands, do things in registers. */
8443 matching_memory = 0;
8444 if (GET_CODE (dst) == MEM)
32b5b1aa 8445 {
e075ae69
RH
8446 if (rtx_equal_p (dst, src1))
8447 matching_memory = 1;
8448 else if (GET_RTX_CLASS (code) == 'c'
8449 && rtx_equal_p (dst, src2))
8450 matching_memory = 2;
8451 else
8452 dst = gen_reg_rtx (mode);
8453 }
0f290768 8454
e075ae69
RH
8455 /* Both source operands cannot be in memory. */
8456 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8457 {
8458 if (matching_memory != 2)
8459 src2 = force_reg (mode, src2);
8460 else
8461 src1 = force_reg (mode, src1);
32b5b1aa 8462 }
e9a25f70 8463
06a964de
JH
8464 /* If the operation is not commutable, source 1 cannot be a constant
8465 or non-matching memory. */
0f290768 8466 if ((CONSTANT_P (src1)
06a964de
JH
8467 || (!matching_memory && GET_CODE (src1) == MEM))
8468 && GET_RTX_CLASS (code) != 'c')
e075ae69 8469 src1 = force_reg (mode, src1);
0f290768 8470
e075ae69 8471 /* If optimizing, copy to regs to improve CSE */
fe577e58 8472 if (optimize && ! no_new_pseudos)
32b5b1aa 8473 {
e075ae69
RH
8474 if (GET_CODE (dst) == MEM)
8475 dst = gen_reg_rtx (mode);
8476 if (GET_CODE (src1) == MEM)
8477 src1 = force_reg (mode, src1);
8478 if (GET_CODE (src2) == MEM)
8479 src2 = force_reg (mode, src2);
32b5b1aa 8480 }
e9a25f70 8481
e075ae69
RH
8482 /* Emit the instruction. */
8483
8484 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8485 if (reload_in_progress)
8486 {
8487 /* Reload doesn't know about the flags register, and doesn't know that
8488 it doesn't want to clobber it. We can only do this with PLUS. */
8489 if (code != PLUS)
8490 abort ();
8491 emit_insn (op);
8492 }
8493 else
32b5b1aa 8494 {
e075ae69
RH
8495 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8496 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
32b5b1aa 8497 }
e9a25f70 8498
e075ae69
RH
8499 /* Fix up the destination if needed. */
8500 if (dst != operands[0])
8501 emit_move_insn (operands[0], dst);
8502}
8503
8504/* Return TRUE or FALSE depending on whether the binary operator meets the
8505 appropriate constraints. */
8506
8507int
8508ix86_binary_operator_ok (code, mode, operands)
8509 enum rtx_code code;
8510 enum machine_mode mode ATTRIBUTE_UNUSED;
8511 rtx operands[3];
8512{
8513 /* Both source operands cannot be in memory. */
8514 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8515 return 0;
8516 /* If the operation is not commutable, source 1 cannot be a constant. */
8517 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8518 return 0;
8519 /* If the destination is memory, we must have a matching source operand. */
8520 if (GET_CODE (operands[0]) == MEM
8521 && ! (rtx_equal_p (operands[0], operands[1])
8522 || (GET_RTX_CLASS (code) == 'c'
8523 && rtx_equal_p (operands[0], operands[2]))))
8524 return 0;
06a964de 8525 /* If the operation is not commutable and the source 1 is memory, we must
d6a7951f 8526 have a matching destination. */
06a964de
JH
8527 if (GET_CODE (operands[1]) == MEM
8528 && GET_RTX_CLASS (code) != 'c'
8529 && ! rtx_equal_p (operands[0], operands[1]))
8530 return 0;
e075ae69
RH
8531 return 1;
8532}
8533
8534/* Attempt to expand a unary operator. Make the expansion closer to the
8535 actual machine, then just general_operand, which will allow 2 separate
9d81fc27 8536 memory references (one output, one input) in a single insn. */
e075ae69 8537
9d81fc27 8538void
e075ae69
RH
8539ix86_expand_unary_operator (code, mode, operands)
8540 enum rtx_code code;
8541 enum machine_mode mode;
8542 rtx operands[];
8543{
06a964de
JH
8544 int matching_memory;
8545 rtx src, dst, op, clob;
8546
8547 dst = operands[0];
8548 src = operands[1];
e075ae69 8549
06a964de
JH
8550 /* If the destination is memory, and we do not have matching source
8551 operands, do things in registers. */
8552 matching_memory = 0;
8553 if (GET_CODE (dst) == MEM)
32b5b1aa 8554 {
06a964de
JH
8555 if (rtx_equal_p (dst, src))
8556 matching_memory = 1;
e075ae69 8557 else
06a964de 8558 dst = gen_reg_rtx (mode);
32b5b1aa 8559 }
e9a25f70 8560
06a964de
JH
8561 /* When source operand is memory, destination must match. */
8562 if (!matching_memory && GET_CODE (src) == MEM)
8563 src = force_reg (mode, src);
0f290768 8564
06a964de 8565 /* If optimizing, copy to regs to improve CSE */
fe577e58 8566 if (optimize && ! no_new_pseudos)
06a964de
JH
8567 {
8568 if (GET_CODE (dst) == MEM)
8569 dst = gen_reg_rtx (mode);
8570 if (GET_CODE (src) == MEM)
8571 src = force_reg (mode, src);
8572 }
8573
8574 /* Emit the instruction. */
8575
8576 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8577 if (reload_in_progress || code == NOT)
8578 {
8579 /* Reload doesn't know about the flags register, and doesn't know that
8580 it doesn't want to clobber it. */
8581 if (code != NOT)
8582 abort ();
8583 emit_insn (op);
8584 }
8585 else
8586 {
8587 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8588 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8589 }
8590
8591 /* Fix up the destination if needed. */
8592 if (dst != operands[0])
8593 emit_move_insn (operands[0], dst);
e075ae69
RH
8594}
8595
8596/* Return TRUE or FALSE depending on whether the unary operator meets the
8597 appropriate constraints. */
8598
8599int
8600ix86_unary_operator_ok (code, mode, operands)
8601 enum rtx_code code ATTRIBUTE_UNUSED;
8602 enum machine_mode mode ATTRIBUTE_UNUSED;
8603 rtx operands[2] ATTRIBUTE_UNUSED;
8604{
06a964de
JH
8605 /* If one of operands is memory, source and destination must match. */
8606 if ((GET_CODE (operands[0]) == MEM
8607 || GET_CODE (operands[1]) == MEM)
8608 && ! rtx_equal_p (operands[0], operands[1]))
8609 return FALSE;
e075ae69
RH
8610 return TRUE;
8611}
8612
16189740
RH
8613/* Return TRUE or FALSE depending on whether the first SET in INSN
8614 has source and destination with matching CC modes, and that the
8615 CC mode is at least as constrained as REQ_MODE. */
8616
8617int
8618ix86_match_ccmode (insn, req_mode)
8619 rtx insn;
8620 enum machine_mode req_mode;
8621{
8622 rtx set;
8623 enum machine_mode set_mode;
8624
8625 set = PATTERN (insn);
8626 if (GET_CODE (set) == PARALLEL)
8627 set = XVECEXP (set, 0, 0);
8628 if (GET_CODE (set) != SET)
8629 abort ();
9076b9c1
JH
8630 if (GET_CODE (SET_SRC (set)) != COMPARE)
8631 abort ();
16189740
RH
8632
8633 set_mode = GET_MODE (SET_DEST (set));
8634 switch (set_mode)
8635 {
9076b9c1
JH
8636 case CCNOmode:
8637 if (req_mode != CCNOmode
8638 && (req_mode != CCmode
8639 || XEXP (SET_SRC (set), 1) != const0_rtx))
8640 return 0;
8641 break;
16189740 8642 case CCmode:
9076b9c1 8643 if (req_mode == CCGCmode)
16189740
RH
8644 return 0;
8645 /* FALLTHRU */
9076b9c1
JH
8646 case CCGCmode:
8647 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8648 return 0;
8649 /* FALLTHRU */
8650 case CCGOCmode:
16189740
RH
8651 if (req_mode == CCZmode)
8652 return 0;
8653 /* FALLTHRU */
8654 case CCZmode:
8655 break;
8656
8657 default:
8658 abort ();
8659 }
8660
8661 return (GET_MODE (SET_SRC (set)) == set_mode);
8662}
8663
e075ae69
RH
8664/* Generate insn patterns to do an integer compare of OPERANDS. */
8665
8666static rtx
8667ix86_expand_int_compare (code, op0, op1)
8668 enum rtx_code code;
8669 rtx op0, op1;
8670{
8671 enum machine_mode cmpmode;
8672 rtx tmp, flags;
8673
8674 cmpmode = SELECT_CC_MODE (code, op0, op1);
8675 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8676
8677 /* This is very simple, but making the interface the same as in the
8678 FP case makes the rest of the code easier. */
8679 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8680 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8681
8682 /* Return the test that should be put into the flags user, i.e.
8683 the bcc, scc, or cmov instruction. */
8684 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8685}
8686
3a3677ff
RH
8687/* Figure out whether to use ordered or unordered fp comparisons.
8688 Return the appropriate mode to use. */
e075ae69 8689
b1cdafbb 8690enum machine_mode
3a3677ff 8691ix86_fp_compare_mode (code)
8752c357 8692 enum rtx_code code ATTRIBUTE_UNUSED;
e075ae69 8693{
9e7adcb3
JH
8694 /* ??? In order to make all comparisons reversible, we do all comparisons
8695 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8696 all forms trapping and nontrapping comparisons, we can make inequality
8697 comparisons trapping again, since it results in better code when using
8698 FCOM based compares. */
8699 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
3a3677ff
RH
8700}
8701
9076b9c1
JH
8702enum machine_mode
8703ix86_cc_mode (code, op0, op1)
8704 enum rtx_code code;
8705 rtx op0, op1;
8706{
8707 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8708 return ix86_fp_compare_mode (code);
8709 switch (code)
8710 {
8711 /* Only zero flag is needed. */
8712 case EQ: /* ZF=0 */
8713 case NE: /* ZF!=0 */
8714 return CCZmode;
8715 /* Codes needing carry flag. */
265dab10
JH
8716 case GEU: /* CF=0 */
8717 case GTU: /* CF=0 & ZF=0 */
7e08e190
JH
8718 case LTU: /* CF=1 */
8719 case LEU: /* CF=1 | ZF=1 */
265dab10 8720 return CCmode;
9076b9c1
JH
8721 /* Codes possibly doable only with sign flag when
8722 comparing against zero. */
8723 case GE: /* SF=OF or SF=0 */
7e08e190 8724 case LT: /* SF<>OF or SF=1 */
9076b9c1
JH
8725 if (op1 == const0_rtx)
8726 return CCGOCmode;
8727 else
8728 /* For other cases Carry flag is not required. */
8729 return CCGCmode;
8730 /* Codes doable only with sign flag when comparing
8731 against zero, but we miss jump instruction for it
4aae8a9a 8732 so we need to use relational tests against overflow
9076b9c1
JH
8733 that thus needs to be zero. */
8734 case GT: /* ZF=0 & SF=OF */
8735 case LE: /* ZF=1 | SF<>OF */
8736 if (op1 == const0_rtx)
8737 return CCNOmode;
8738 else
8739 return CCGCmode;
7fcd7218
JH
8740 /* strcmp pattern do (use flags) and combine may ask us for proper
8741 mode. */
8742 case USE:
8743 return CCmode;
9076b9c1 8744 default:
0f290768 8745 abort ();
9076b9c1
JH
8746 }
8747}
8748
3a3677ff
RH
8749/* Return true if we should use an FCOMI instruction for this fp comparison. */
8750
a940d8bd 8751int
3a3677ff 8752ix86_use_fcomi_compare (code)
9e7adcb3 8753 enum rtx_code code ATTRIBUTE_UNUSED;
3a3677ff 8754{
9e7adcb3
JH
8755 enum rtx_code swapped_code = swap_condition (code);
8756 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8757 || (ix86_fp_comparison_cost (swapped_code)
8758 == ix86_fp_comparison_fcomi_cost (swapped_code)));
3a3677ff
RH
8759}
8760
0f290768 8761/* Swap, force into registers, or otherwise massage the two operands
3a3677ff 8762 to a fp comparison. The operands are updated in place; the new
d1f87653 8763 comparison code is returned. */
3a3677ff
RH
8764
8765static enum rtx_code
8766ix86_prepare_fp_compare_args (code, pop0, pop1)
8767 enum rtx_code code;
8768 rtx *pop0, *pop1;
8769{
8770 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8771 rtx op0 = *pop0, op1 = *pop1;
8772 enum machine_mode op_mode = GET_MODE (op0);
0644b628 8773 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
3a3677ff 8774
e075ae69 8775 /* All of the unordered compare instructions only work on registers.
3a3677ff
RH
8776 The same is true of the XFmode compare instructions. The same is
8777 true of the fcomi compare instructions. */
8778
0644b628
JH
8779 if (!is_sse
8780 && (fpcmp_mode == CCFPUmode
8781 || op_mode == XFmode
8782 || op_mode == TFmode
8783 || ix86_use_fcomi_compare (code)))
e075ae69 8784 {
3a3677ff
RH
8785 op0 = force_reg (op_mode, op0);
8786 op1 = force_reg (op_mode, op1);
e075ae69
RH
8787 }
8788 else
8789 {
8790 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8791 things around if they appear profitable, otherwise force op0
8792 into a register. */
8793
8794 if (standard_80387_constant_p (op0) == 0
8795 || (GET_CODE (op0) == MEM
8796 && ! (standard_80387_constant_p (op1) == 0
8797 || GET_CODE (op1) == MEM)))
32b5b1aa 8798 {
e075ae69
RH
8799 rtx tmp;
8800 tmp = op0, op0 = op1, op1 = tmp;
8801 code = swap_condition (code);
8802 }
8803
8804 if (GET_CODE (op0) != REG)
3a3677ff 8805 op0 = force_reg (op_mode, op0);
e075ae69
RH
8806
8807 if (CONSTANT_P (op1))
8808 {
8809 if (standard_80387_constant_p (op1))
3a3677ff 8810 op1 = force_reg (op_mode, op1);
e075ae69 8811 else
3a3677ff 8812 op1 = validize_mem (force_const_mem (op_mode, op1));
32b5b1aa
SC
8813 }
8814 }
e9a25f70 8815
9e7adcb3
JH
8816 /* Try to rearrange the comparison to make it cheaper. */
8817 if (ix86_fp_comparison_cost (code)
8818 > ix86_fp_comparison_cost (swap_condition (code))
558740bf 8819 && (GET_CODE (op1) == REG || !no_new_pseudos))
9e7adcb3
JH
8820 {
8821 rtx tmp;
8822 tmp = op0, op0 = op1, op1 = tmp;
8823 code = swap_condition (code);
8824 if (GET_CODE (op0) != REG)
8825 op0 = force_reg (op_mode, op0);
8826 }
8827
3a3677ff
RH
8828 *pop0 = op0;
8829 *pop1 = op1;
8830 return code;
8831}
8832
c0c102a9
JH
8833/* Convert comparison codes we use to represent FP comparison to integer
8834 code that will result in proper branch. Return UNKNOWN if no such code
8835 is available. */
8836static enum rtx_code
8837ix86_fp_compare_code_to_integer (code)
8838 enum rtx_code code;
8839{
8840 switch (code)
8841 {
8842 case GT:
8843 return GTU;
8844 case GE:
8845 return GEU;
8846 case ORDERED:
8847 case UNORDERED:
8848 return code;
8849 break;
8850 case UNEQ:
8851 return EQ;
8852 break;
8853 case UNLT:
8854 return LTU;
8855 break;
8856 case UNLE:
8857 return LEU;
8858 break;
8859 case LTGT:
8860 return NE;
8861 break;
8862 default:
8863 return UNKNOWN;
8864 }
8865}
8866
8867/* Split comparison code CODE into comparisons we can do using branch
8868 instructions. BYPASS_CODE is comparison code for branch that will
8869 branch around FIRST_CODE and SECOND_CODE. If some of branches
8870 is not required, set value to NIL.
8871 We never require more than two branches. */
8872static void
8873ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8874 enum rtx_code code, *bypass_code, *first_code, *second_code;
8875{
8876 *first_code = code;
8877 *bypass_code = NIL;
8878 *second_code = NIL;
8879
8880 /* The fcomi comparison sets flags as follows:
8881
8882 cmp ZF PF CF
8883 > 0 0 0
8884 < 0 0 1
8885 = 1 0 0
8886 un 1 1 1 */
8887
8888 switch (code)
8889 {
8890 case GT: /* GTU - CF=0 & ZF=0 */
8891 case GE: /* GEU - CF=0 */
8892 case ORDERED: /* PF=0 */
8893 case UNORDERED: /* PF=1 */
8894 case UNEQ: /* EQ - ZF=1 */
8895 case UNLT: /* LTU - CF=1 */
8896 case UNLE: /* LEU - CF=1 | ZF=1 */
8897 case LTGT: /* EQ - ZF=0 */
8898 break;
8899 case LT: /* LTU - CF=1 - fails on unordered */
8900 *first_code = UNLT;
8901 *bypass_code = UNORDERED;
8902 break;
8903 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8904 *first_code = UNLE;
8905 *bypass_code = UNORDERED;
8906 break;
8907 case EQ: /* EQ - ZF=1 - fails on unordered */
8908 *first_code = UNEQ;
8909 *bypass_code = UNORDERED;
8910 break;
8911 case NE: /* NE - ZF=0 - fails on unordered */
8912 *first_code = LTGT;
8913 *second_code = UNORDERED;
8914 break;
8915 case UNGE: /* GEU - CF=0 - fails on unordered */
8916 *first_code = GE;
8917 *second_code = UNORDERED;
8918 break;
8919 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8920 *first_code = GT;
8921 *second_code = UNORDERED;
8922 break;
8923 default:
8924 abort ();
8925 }
8926 if (!TARGET_IEEE_FP)
8927 {
8928 *second_code = NIL;
8929 *bypass_code = NIL;
8930 }
8931}
8932
9e7adcb3 8933/* Return cost of comparison done fcom + arithmetics operations on AX.
5bdc5878 8934 All following functions do use number of instructions as a cost metrics.
9e7adcb3
JH
8935 In future this should be tweaked to compute bytes for optimize_size and
8936 take into account performance of various instructions on various CPUs. */
8937static int
8938ix86_fp_comparison_arithmetics_cost (code)
8939 enum rtx_code code;
8940{
8941 if (!TARGET_IEEE_FP)
8942 return 4;
8943 /* The cost of code output by ix86_expand_fp_compare. */
8944 switch (code)
8945 {
8946 case UNLE:
8947 case UNLT:
8948 case LTGT:
8949 case GT:
8950 case GE:
8951 case UNORDERED:
8952 case ORDERED:
8953 case UNEQ:
8954 return 4;
8955 break;
8956 case LT:
8957 case NE:
8958 case EQ:
8959 case UNGE:
8960 return 5;
8961 break;
8962 case LE:
8963 case UNGT:
8964 return 6;
8965 break;
8966 default:
8967 abort ();
8968 }
8969}
8970
8971/* Return cost of comparison done using fcomi operation.
8972 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8973static int
8974ix86_fp_comparison_fcomi_cost (code)
8975 enum rtx_code code;
8976{
8977 enum rtx_code bypass_code, first_code, second_code;
d1f87653 8978 /* Return arbitrarily high cost when instruction is not supported - this
9e7adcb3
JH
8979 prevents gcc from using it. */
8980 if (!TARGET_CMOVE)
8981 return 1024;
8982 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8983 return (bypass_code != NIL || second_code != NIL) + 2;
8984}
8985
8986/* Return cost of comparison done using sahf operation.
8987 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8988static int
8989ix86_fp_comparison_sahf_cost (code)
8990 enum rtx_code code;
8991{
8992 enum rtx_code bypass_code, first_code, second_code;
d1f87653 8993 /* Return arbitrarily high cost when instruction is not preferred - this
9e7adcb3
JH
8994 avoids gcc from using it. */
8995 if (!TARGET_USE_SAHF && !optimize_size)
8996 return 1024;
8997 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8998 return (bypass_code != NIL || second_code != NIL) + 3;
8999}
9000
9001/* Compute cost of the comparison done using any method.
9002 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9003static int
9004ix86_fp_comparison_cost (code)
9005 enum rtx_code code;
9006{
9007 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9008 int min;
9009
9010 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9011 sahf_cost = ix86_fp_comparison_sahf_cost (code);
9012
9013 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9014 if (min > sahf_cost)
9015 min = sahf_cost;
9016 if (min > fcomi_cost)
9017 min = fcomi_cost;
9018 return min;
9019}
c0c102a9 9020
3a3677ff
RH
9021/* Generate insn patterns to do a floating point compare of OPERANDS. */
9022
9e7adcb3
JH
9023static rtx
9024ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
3a3677ff
RH
9025 enum rtx_code code;
9026 rtx op0, op1, scratch;
9e7adcb3
JH
9027 rtx *second_test;
9028 rtx *bypass_test;
3a3677ff
RH
9029{
9030 enum machine_mode fpcmp_mode, intcmp_mode;
c0c102a9 9031 rtx tmp, tmp2;
9e7adcb3 9032 int cost = ix86_fp_comparison_cost (code);
c0c102a9 9033 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
9034
9035 fpcmp_mode = ix86_fp_compare_mode (code);
9036 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9037
9e7adcb3
JH
9038 if (second_test)
9039 *second_test = NULL_RTX;
9040 if (bypass_test)
9041 *bypass_test = NULL_RTX;
9042
c0c102a9
JH
9043 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9044
9e7adcb3
JH
9045 /* Do fcomi/sahf based test when profitable. */
9046 if ((bypass_code == NIL || bypass_test)
9047 && (second_code == NIL || second_test)
9048 && ix86_fp_comparison_arithmetics_cost (code) > cost)
32b5b1aa 9049 {
c0c102a9
JH
9050 if (TARGET_CMOVE)
9051 {
9052 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9053 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9054 tmp);
9055 emit_insn (tmp);
9056 }
9057 else
9058 {
9059 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 9060 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
9061 if (!scratch)
9062 scratch = gen_reg_rtx (HImode);
c0c102a9
JH
9063 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9064 emit_insn (gen_x86_sahf_1 (scratch));
9065 }
e075ae69
RH
9066
9067 /* The FP codes work out to act like unsigned. */
9a915772 9068 intcmp_mode = fpcmp_mode;
9e7adcb3
JH
9069 code = first_code;
9070 if (bypass_code != NIL)
9071 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9072 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9073 const0_rtx);
9074 if (second_code != NIL)
9075 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9076 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9077 const0_rtx);
e075ae69
RH
9078 }
9079 else
9080 {
9081 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
e075ae69 9082 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 9083 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
9084 if (!scratch)
9085 scratch = gen_reg_rtx (HImode);
3a3677ff 9086 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
e075ae69 9087
9a915772
JH
9088 /* In the unordered case, we have to check C2 for NaN's, which
9089 doesn't happen to work out to anything nice combination-wise.
9090 So do some bit twiddling on the value we've got in AH to come
9091 up with an appropriate set of condition codes. */
e075ae69 9092
9a915772
JH
9093 intcmp_mode = CCNOmode;
9094 switch (code)
32b5b1aa 9095 {
9a915772
JH
9096 case GT:
9097 case UNGT:
9098 if (code == GT || !TARGET_IEEE_FP)
32b5b1aa 9099 {
3a3677ff 9100 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
e075ae69 9101 code = EQ;
9a915772
JH
9102 }
9103 else
9104 {
9105 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9106 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9107 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9108 intcmp_mode = CCmode;
9109 code = GEU;
9110 }
9111 break;
9112 case LT:
9113 case UNLT:
9114 if (code == LT && TARGET_IEEE_FP)
9115 {
3a3677ff
RH
9116 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9117 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
e075ae69
RH
9118 intcmp_mode = CCmode;
9119 code = EQ;
9a915772
JH
9120 }
9121 else
9122 {
9123 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9124 code = NE;
9125 }
9126 break;
9127 case GE:
9128 case UNGE:
9129 if (code == GE || !TARGET_IEEE_FP)
9130 {
3a3677ff 9131 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
e075ae69 9132 code = EQ;
9a915772
JH
9133 }
9134 else
9135 {
9136 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9137 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9138 GEN_INT (0x01)));
9139 code = NE;
9140 }
9141 break;
9142 case LE:
9143 case UNLE:
9144 if (code == LE && TARGET_IEEE_FP)
9145 {
3a3677ff
RH
9146 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9147 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9148 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
9149 intcmp_mode = CCmode;
9150 code = LTU;
9a915772
JH
9151 }
9152 else
9153 {
9154 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9155 code = NE;
9156 }
9157 break;
9158 case EQ:
9159 case UNEQ:
9160 if (code == EQ && TARGET_IEEE_FP)
9161 {
3a3677ff
RH
9162 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9163 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
9164 intcmp_mode = CCmode;
9165 code = EQ;
9a915772
JH
9166 }
9167 else
9168 {
3a3677ff
RH
9169 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9170 code = NE;
9171 break;
9a915772
JH
9172 }
9173 break;
9174 case NE:
9175 case LTGT:
9176 if (code == NE && TARGET_IEEE_FP)
9177 {
3a3677ff 9178 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9a915772
JH
9179 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9180 GEN_INT (0x40)));
3a3677ff 9181 code = NE;
9a915772
JH
9182 }
9183 else
9184 {
3a3677ff
RH
9185 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9186 code = EQ;
32b5b1aa 9187 }
9a915772
JH
9188 break;
9189
9190 case UNORDERED:
9191 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9192 code = NE;
9193 break;
9194 case ORDERED:
9195 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9196 code = EQ;
9197 break;
9198
9199 default:
9200 abort ();
32b5b1aa 9201 }
32b5b1aa 9202 }
e075ae69
RH
9203
9204 /* Return the test that should be put into the flags user, i.e.
9205 the bcc, scc, or cmov instruction. */
9206 return gen_rtx_fmt_ee (code, VOIDmode,
9207 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9208 const0_rtx);
9209}
9210
9e3e266c 9211rtx
a1b8572c 9212ix86_expand_compare (code, second_test, bypass_test)
e075ae69 9213 enum rtx_code code;
a1b8572c 9214 rtx *second_test, *bypass_test;
e075ae69
RH
9215{
9216 rtx op0, op1, ret;
9217 op0 = ix86_compare_op0;
9218 op1 = ix86_compare_op1;
9219
a1b8572c
JH
9220 if (second_test)
9221 *second_test = NULL_RTX;
9222 if (bypass_test)
9223 *bypass_test = NULL_RTX;
9224
e075ae69 9225 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
bf71a4f8 9226 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
77ebd435 9227 second_test, bypass_test);
32b5b1aa 9228 else
e075ae69
RH
9229 ret = ix86_expand_int_compare (code, op0, op1);
9230
9231 return ret;
9232}
9233
03598dea
JH
9234/* Return true if the CODE will result in nontrivial jump sequence. */
9235bool
9236ix86_fp_jump_nontrivial_p (code)
9237 enum rtx_code code;
9238{
9239 enum rtx_code bypass_code, first_code, second_code;
9240 if (!TARGET_CMOVE)
9241 return true;
9242 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9243 return bypass_code != NIL || second_code != NIL;
9244}
9245
e075ae69 9246void
3a3677ff 9247ix86_expand_branch (code, label)
e075ae69 9248 enum rtx_code code;
e075ae69
RH
9249 rtx label;
9250{
3a3677ff 9251 rtx tmp;
e075ae69 9252
3a3677ff 9253 switch (GET_MODE (ix86_compare_op0))
32b5b1aa 9254 {
3a3677ff
RH
9255 case QImode:
9256 case HImode:
9257 case SImode:
0d7d98ee 9258 simple:
a1b8572c 9259 tmp = ix86_expand_compare (code, NULL, NULL);
e075ae69
RH
9260 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9261 gen_rtx_LABEL_REF (VOIDmode, label),
9262 pc_rtx);
9263 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32b5b1aa 9264 return;
e075ae69 9265
3a3677ff
RH
9266 case SFmode:
9267 case DFmode:
0f290768 9268 case XFmode:
2b589241 9269 case TFmode:
3a3677ff
RH
9270 {
9271 rtvec vec;
9272 int use_fcomi;
03598dea 9273 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
9274
9275 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9276 &ix86_compare_op1);
fce5a9f2 9277
03598dea
JH
9278 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9279
9280 /* Check whether we will use the natural sequence with one jump. If
9281 so, we can expand jump early. Otherwise delay expansion by
9282 creating compound insn to not confuse optimizers. */
9283 if (bypass_code == NIL && second_code == NIL
9284 && TARGET_CMOVE)
9285 {
9286 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9287 gen_rtx_LABEL_REF (VOIDmode, label),
9288 pc_rtx, NULL_RTX);
9289 }
9290 else
9291 {
9292 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9293 ix86_compare_op0, ix86_compare_op1);
9294 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9295 gen_rtx_LABEL_REF (VOIDmode, label),
9296 pc_rtx);
9297 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9298
9299 use_fcomi = ix86_use_fcomi_compare (code);
9300 vec = rtvec_alloc (3 + !use_fcomi);
9301 RTVEC_ELT (vec, 0) = tmp;
9302 RTVEC_ELT (vec, 1)
9303 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9304 RTVEC_ELT (vec, 2)
9305 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9306 if (! use_fcomi)
9307 RTVEC_ELT (vec, 3)
9308 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9309
9310 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9311 }
3a3677ff
RH
9312 return;
9313 }
32b5b1aa 9314
3a3677ff 9315 case DImode:
0d7d98ee
JH
9316 if (TARGET_64BIT)
9317 goto simple;
3a3677ff
RH
9318 /* Expand DImode branch into multiple compare+branch. */
9319 {
9320 rtx lo[2], hi[2], label2;
9321 enum rtx_code code1, code2, code3;
32b5b1aa 9322
3a3677ff
RH
9323 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9324 {
9325 tmp = ix86_compare_op0;
9326 ix86_compare_op0 = ix86_compare_op1;
9327 ix86_compare_op1 = tmp;
9328 code = swap_condition (code);
9329 }
9330 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9331 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
32b5b1aa 9332
3a3677ff
RH
9333 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9334 avoid two branches. This costs one extra insn, so disable when
9335 optimizing for size. */
32b5b1aa 9336
3a3677ff
RH
9337 if ((code == EQ || code == NE)
9338 && (!optimize_size
9339 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9340 {
9341 rtx xor0, xor1;
32b5b1aa 9342
3a3677ff
RH
9343 xor1 = hi[0];
9344 if (hi[1] != const0_rtx)
9345 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9346 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 9347
3a3677ff
RH
9348 xor0 = lo[0];
9349 if (lo[1] != const0_rtx)
9350 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9351 NULL_RTX, 0, OPTAB_WIDEN);
e075ae69 9352
3a3677ff
RH
9353 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9354 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 9355
3a3677ff
RH
9356 ix86_compare_op0 = tmp;
9357 ix86_compare_op1 = const0_rtx;
9358 ix86_expand_branch (code, label);
9359 return;
9360 }
e075ae69 9361
1f9124e4
JJ
9362 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9363 op1 is a constant and the low word is zero, then we can just
9364 examine the high word. */
32b5b1aa 9365
1f9124e4
JJ
9366 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9367 switch (code)
9368 {
9369 case LT: case LTU: case GE: case GEU:
9370 ix86_compare_op0 = hi[0];
9371 ix86_compare_op1 = hi[1];
9372 ix86_expand_branch (code, label);
9373 return;
9374 default:
9375 break;
9376 }
e075ae69 9377
3a3677ff 9378 /* Otherwise, we need two or three jumps. */
e075ae69 9379
3a3677ff 9380 label2 = gen_label_rtx ();
e075ae69 9381
3a3677ff
RH
9382 code1 = code;
9383 code2 = swap_condition (code);
9384 code3 = unsigned_condition (code);
e075ae69 9385
3a3677ff
RH
9386 switch (code)
9387 {
9388 case LT: case GT: case LTU: case GTU:
9389 break;
e075ae69 9390
3a3677ff
RH
9391 case LE: code1 = LT; code2 = GT; break;
9392 case GE: code1 = GT; code2 = LT; break;
9393 case LEU: code1 = LTU; code2 = GTU; break;
9394 case GEU: code1 = GTU; code2 = LTU; break;
e075ae69 9395
3a3677ff
RH
9396 case EQ: code1 = NIL; code2 = NE; break;
9397 case NE: code2 = NIL; break;
e075ae69 9398
3a3677ff
RH
9399 default:
9400 abort ();
9401 }
e075ae69 9402
3a3677ff
RH
9403 /*
9404 * a < b =>
9405 * if (hi(a) < hi(b)) goto true;
9406 * if (hi(a) > hi(b)) goto false;
9407 * if (lo(a) < lo(b)) goto true;
9408 * false:
9409 */
9410
9411 ix86_compare_op0 = hi[0];
9412 ix86_compare_op1 = hi[1];
9413
9414 if (code1 != NIL)
9415 ix86_expand_branch (code1, label);
9416 if (code2 != NIL)
9417 ix86_expand_branch (code2, label2);
9418
9419 ix86_compare_op0 = lo[0];
9420 ix86_compare_op1 = lo[1];
9421 ix86_expand_branch (code3, label);
9422
9423 if (code2 != NIL)
9424 emit_label (label2);
9425 return;
9426 }
e075ae69 9427
3a3677ff
RH
9428 default:
9429 abort ();
9430 }
32b5b1aa 9431}
e075ae69 9432
9e7adcb3
JH
9433/* Split branch based on floating point condition. */
9434void
03598dea
JH
9435ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
9436 enum rtx_code code;
9437 rtx op1, op2, target1, target2, tmp;
9e7adcb3
JH
9438{
9439 rtx second, bypass;
9440 rtx label = NULL_RTX;
03598dea 9441 rtx condition;
6b24c259
JH
9442 int bypass_probability = -1, second_probability = -1, probability = -1;
9443 rtx i;
9e7adcb3
JH
9444
9445 if (target2 != pc_rtx)
9446 {
9447 rtx tmp = target2;
9448 code = reverse_condition_maybe_unordered (code);
9449 target2 = target1;
9450 target1 = tmp;
9451 }
9452
9453 condition = ix86_expand_fp_compare (code, op1, op2,
9454 tmp, &second, &bypass);
6b24c259
JH
9455
9456 if (split_branch_probability >= 0)
9457 {
9458 /* Distribute the probabilities across the jumps.
9459 Assume the BYPASS and SECOND to be always test
9460 for UNORDERED. */
9461 probability = split_branch_probability;
9462
d6a7951f 9463 /* Value of 1 is low enough to make no need for probability
6b24c259
JH
9464 to be updated. Later we may run some experiments and see
9465 if unordered values are more frequent in practice. */
9466 if (bypass)
9467 bypass_probability = 1;
9468 if (second)
9469 second_probability = 1;
9470 }
9e7adcb3
JH
9471 if (bypass != NULL_RTX)
9472 {
9473 label = gen_label_rtx ();
6b24c259
JH
9474 i = emit_jump_insn (gen_rtx_SET
9475 (VOIDmode, pc_rtx,
9476 gen_rtx_IF_THEN_ELSE (VOIDmode,
9477 bypass,
9478 gen_rtx_LABEL_REF (VOIDmode,
9479 label),
9480 pc_rtx)));
9481 if (bypass_probability >= 0)
9482 REG_NOTES (i)
9483 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9484 GEN_INT (bypass_probability),
9485 REG_NOTES (i));
9486 }
9487 i = emit_jump_insn (gen_rtx_SET
9e7adcb3
JH
9488 (VOIDmode, pc_rtx,
9489 gen_rtx_IF_THEN_ELSE (VOIDmode,
6b24c259
JH
9490 condition, target1, target2)));
9491 if (probability >= 0)
9492 REG_NOTES (i)
9493 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9494 GEN_INT (probability),
9495 REG_NOTES (i));
9496 if (second != NULL_RTX)
9e7adcb3 9497 {
6b24c259
JH
9498 i = emit_jump_insn (gen_rtx_SET
9499 (VOIDmode, pc_rtx,
9500 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9501 target2)));
9502 if (second_probability >= 0)
9503 REG_NOTES (i)
9504 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9505 GEN_INT (second_probability),
9506 REG_NOTES (i));
9e7adcb3 9507 }
9e7adcb3
JH
9508 if (label != NULL_RTX)
9509 emit_label (label);
9510}
9511
32b5b1aa 9512int
3a3677ff 9513ix86_expand_setcc (code, dest)
e075ae69 9514 enum rtx_code code;
e075ae69 9515 rtx dest;
32b5b1aa 9516{
a1b8572c
JH
9517 rtx ret, tmp, tmpreg;
9518 rtx second_test, bypass_test;
e075ae69 9519
885a70fd
JH
9520 if (GET_MODE (ix86_compare_op0) == DImode
9521 && !TARGET_64BIT)
e075ae69
RH
9522 return 0; /* FAIL */
9523
b932f770
JH
9524 if (GET_MODE (dest) != QImode)
9525 abort ();
e075ae69 9526
a1b8572c 9527 ret = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
9528 PUT_MODE (ret, QImode);
9529
9530 tmp = dest;
a1b8572c 9531 tmpreg = dest;
32b5b1aa 9532
e075ae69 9533 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
a1b8572c
JH
9534 if (bypass_test || second_test)
9535 {
9536 rtx test = second_test;
9537 int bypass = 0;
9538 rtx tmp2 = gen_reg_rtx (QImode);
9539 if (bypass_test)
9540 {
9541 if (second_test)
b531087a 9542 abort ();
a1b8572c
JH
9543 test = bypass_test;
9544 bypass = 1;
9545 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9546 }
9547 PUT_MODE (test, QImode);
9548 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9549
9550 if (bypass)
9551 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9552 else
9553 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9554 }
e075ae69 9555
e075ae69 9556 return 1; /* DONE */
32b5b1aa 9557}
e075ae69 9558
d1f87653 9559/* Expand comparison setting or clearing carry flag. Return true when successful
4977bab6
ZW
9560 and set pop for the operation. */
9561bool
9562ix86_expand_carry_flag_compare (code, op0, op1, pop)
9563 rtx op0, op1, *pop;
9564 enum rtx_code code;
9565{
9566 enum machine_mode mode =
9567 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9568
9569 /* Do not handle DImode compares that go trought special path. Also we can't
9570 deal with FP compares yet. This is possible to add. */
e6e81735
JH
9571 if ((mode == DImode && !TARGET_64BIT))
9572 return false;
9573 if (FLOAT_MODE_P (mode))
9574 {
9575 rtx second_test = NULL, bypass_test = NULL;
9576 rtx compare_op, compare_seq;
9577
9578 /* Shortcut: following common codes never translate into carry flag compares. */
9579 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9580 || code == ORDERED || code == UNORDERED)
9581 return false;
9582
9583 /* These comparisons require zero flag; swap operands so they won't. */
9584 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9585 && !TARGET_IEEE_FP)
9586 {
9587 rtx tmp = op0;
9588 op0 = op1;
9589 op1 = tmp;
9590 code = swap_condition (code);
9591 }
9592
9593 /* Try to expand the comparsion and verify that we end up with carry flag
9594 based comparsion. This is fails to be true only when we decide to expand
9595 comparsion using arithmetic that is not too common scenario. */
9596 start_sequence ();
9597 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9598 &second_test, &bypass_test);
9599 compare_seq = get_insns ();
9600 end_sequence ();
9601
9602 if (second_test || bypass_test)
9603 return false;
9604 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9605 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9606 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9607 else
9608 code = GET_CODE (compare_op);
9609 if (code != LTU && code != GEU)
9610 return false;
9611 emit_insn (compare_seq);
9612 *pop = compare_op;
9613 return true;
9614 }
9615 if (!INTEGRAL_MODE_P (mode))
4977bab6
ZW
9616 return false;
9617 switch (code)
9618 {
9619 case LTU:
9620 case GEU:
9621 break;
9622
9623 /* Convert a==0 into (unsigned)a<1. */
9624 case EQ:
9625 case NE:
9626 if (op1 != const0_rtx)
9627 return false;
9628 op1 = const1_rtx;
9629 code = (code == EQ ? LTU : GEU);
9630 break;
9631
9632 /* Convert a>b into b<a or a>=b-1. */
9633 case GTU:
9634 case LEU:
9635 if (GET_CODE (op1) == CONST_INT)
9636 {
9637 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9638 /* Bail out on overflow. We still can swap operands but that
9639 would force loading of the constant into register. */
9640 if (op1 == const0_rtx
9641 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9642 return false;
9643 code = (code == GTU ? GEU : LTU);
9644 }
9645 else
9646 {
9647 rtx tmp = op1;
9648 op1 = op0;
9649 op0 = tmp;
9650 code = (code == GTU ? LTU : GEU);
9651 }
9652 break;
9653
9654 /* Convert a>0 into (unsigned)a<0x7fffffff. */
9655 case LT:
9656 case GE:
9657 if (mode == DImode || op1 != const0_rtx)
9658 return false;
9659 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9660 code = (code == LT ? GEU : LTU);
9661 break;
9662 case LE:
9663 case GT:
9664 if (mode == DImode || op1 != constm1_rtx)
9665 return false;
9666 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9667 code = (code == LE ? GEU : LTU);
9668 break;
9669
9670 default:
9671 return false;
9672 }
9673 ix86_compare_op0 = op0;
9674 ix86_compare_op1 = op1;
9675 *pop = ix86_expand_compare (code, NULL, NULL);
9676 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9677 abort ();
9678 return true;
9679}
9680
32b5b1aa 9681int
e075ae69
RH
9682ix86_expand_int_movcc (operands)
9683 rtx operands[];
32b5b1aa 9684{
e075ae69
RH
9685 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9686 rtx compare_seq, compare_op;
a1b8572c 9687 rtx second_test, bypass_test;
635559ab 9688 enum machine_mode mode = GET_MODE (operands[0]);
4977bab6 9689 bool sign_bit_compare_p = false;;
3a3677ff 9690
e075ae69 9691 start_sequence ();
a1b8572c 9692 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
2f937369 9693 compare_seq = get_insns ();
e075ae69
RH
9694 end_sequence ();
9695
9696 compare_code = GET_CODE (compare_op);
9697
4977bab6
ZW
9698 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9699 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9700 sign_bit_compare_p = true;
9701
e075ae69
RH
9702 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9703 HImode insns, we'd be swallowed in word prefix ops. */
9704
4977bab6 9705 if ((mode != HImode || TARGET_FAST_PREFIX)
635559ab 9706 && (mode != DImode || TARGET_64BIT)
0f290768 9707 && GET_CODE (operands[2]) == CONST_INT
e075ae69
RH
9708 && GET_CODE (operands[3]) == CONST_INT)
9709 {
9710 rtx out = operands[0];
9711 HOST_WIDE_INT ct = INTVAL (operands[2]);
9712 HOST_WIDE_INT cf = INTVAL (operands[3]);
9713 HOST_WIDE_INT diff;
9714
4977bab6
ZW
9715 diff = ct - cf;
9716 /* Sign bit compares are better done using shifts than we do by using
9717 sbb. */
9718 if (sign_bit_compare_p
9719 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9720 ix86_compare_op1, &compare_op))
e075ae69 9721 {
e075ae69
RH
9722 /* Detect overlap between destination and compare sources. */
9723 rtx tmp = out;
9724
4977bab6 9725 if (!sign_bit_compare_p)
36583fea 9726 {
e6e81735
JH
9727 bool fpcmp = false;
9728
4977bab6
ZW
9729 compare_code = GET_CODE (compare_op);
9730
e6e81735
JH
9731 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9732 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9733 {
9734 fpcmp = true;
9735 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9736 }
9737
4977bab6
ZW
9738 /* To simplify rest of code, restrict to the GEU case. */
9739 if (compare_code == LTU)
9740 {
9741 HOST_WIDE_INT tmp = ct;
9742 ct = cf;
9743 cf = tmp;
9744 compare_code = reverse_condition (compare_code);
9745 code = reverse_condition (code);
9746 }
e6e81735
JH
9747 else
9748 {
9749 if (fpcmp)
9750 PUT_CODE (compare_op,
9751 reverse_condition_maybe_unordered
9752 (GET_CODE (compare_op)));
9753 else
9754 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9755 }
4977bab6 9756 diff = ct - cf;
36583fea 9757
4977bab6
ZW
9758 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9759 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9760 tmp = gen_reg_rtx (mode);
e075ae69 9761
4977bab6 9762 if (mode == DImode)
e6e81735 9763 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
4977bab6 9764 else
e6e81735 9765 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
4977bab6 9766 }
14f73b5a 9767 else
4977bab6
ZW
9768 {
9769 if (code == GT || code == GE)
9770 code = reverse_condition (code);
9771 else
9772 {
9773 HOST_WIDE_INT tmp = ct;
9774 ct = cf;
9775 cf = tmp;
5fb48685 9776 diff = ct - cf;
4977bab6
ZW
9777 }
9778 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9779 ix86_compare_op1, VOIDmode, 0, -1);
9780 }
e075ae69 9781
36583fea
JH
9782 if (diff == 1)
9783 {
9784 /*
9785 * cmpl op0,op1
9786 * sbbl dest,dest
9787 * [addl dest, ct]
9788 *
9789 * Size 5 - 8.
9790 */
9791 if (ct)
635559ab
JH
9792 tmp = expand_simple_binop (mode, PLUS,
9793 tmp, GEN_INT (ct),
4977bab6 9794 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
9795 }
9796 else if (cf == -1)
9797 {
9798 /*
9799 * cmpl op0,op1
9800 * sbbl dest,dest
9801 * orl $ct, dest
9802 *
9803 * Size 8.
9804 */
635559ab
JH
9805 tmp = expand_simple_binop (mode, IOR,
9806 tmp, GEN_INT (ct),
4977bab6 9807 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
9808 }
9809 else if (diff == -1 && ct)
9810 {
9811 /*
9812 * cmpl op0,op1
9813 * sbbl dest,dest
06ec023f 9814 * notl dest
36583fea
JH
9815 * [addl dest, cf]
9816 *
9817 * Size 8 - 11.
9818 */
4977bab6 9819 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
635559ab
JH
9820 if (cf)
9821 tmp = expand_simple_binop (mode, PLUS,
4977bab6
ZW
9822 copy_rtx (tmp), GEN_INT (cf),
9823 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
9824 }
9825 else
9826 {
9827 /*
9828 * cmpl op0,op1
9829 * sbbl dest,dest
06ec023f 9830 * [notl dest]
36583fea
JH
9831 * andl cf - ct, dest
9832 * [addl dest, ct]
9833 *
9834 * Size 8 - 11.
9835 */
06ec023f
RB
9836
9837 if (cf == 0)
9838 {
9839 cf = ct;
9840 ct = 0;
4977bab6 9841 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
06ec023f
RB
9842 }
9843
635559ab 9844 tmp = expand_simple_binop (mode, AND,
4977bab6 9845 copy_rtx (tmp),
d8bf17f9 9846 gen_int_mode (cf - ct, mode),
4977bab6 9847 copy_rtx (tmp), 1, OPTAB_DIRECT);
635559ab
JH
9848 if (ct)
9849 tmp = expand_simple_binop (mode, PLUS,
4977bab6
ZW
9850 copy_rtx (tmp), GEN_INT (ct),
9851 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea 9852 }
e075ae69 9853
4977bab6
ZW
9854 if (!rtx_equal_p (tmp, out))
9855 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
e075ae69
RH
9856
9857 return 1; /* DONE */
9858 }
9859
e075ae69
RH
9860 if (diff < 0)
9861 {
9862 HOST_WIDE_INT tmp;
9863 tmp = ct, ct = cf, cf = tmp;
9864 diff = -diff;
734dba19
JH
9865 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9866 {
9867 /* We may be reversing unordered compare to normal compare, that
9868 is not valid in general (we may convert non-trapping condition
9869 to trapping one), however on i386 we currently emit all
9870 comparisons unordered. */
9871 compare_code = reverse_condition_maybe_unordered (compare_code);
9872 code = reverse_condition_maybe_unordered (code);
9873 }
9874 else
9875 {
9876 compare_code = reverse_condition (compare_code);
9877 code = reverse_condition (code);
9878 }
e075ae69 9879 }
0f2a3457
JJ
9880
9881 compare_code = NIL;
9882 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9883 && GET_CODE (ix86_compare_op1) == CONST_INT)
9884 {
9885 if (ix86_compare_op1 == const0_rtx
9886 && (code == LT || code == GE))
9887 compare_code = code;
9888 else if (ix86_compare_op1 == constm1_rtx)
9889 {
9890 if (code == LE)
9891 compare_code = LT;
9892 else if (code == GT)
9893 compare_code = GE;
9894 }
9895 }
9896
9897 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9898 if (compare_code != NIL
9899 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9900 && (cf == -1 || ct == -1))
9901 {
9902 /* If lea code below could be used, only optimize
9903 if it results in a 2 insn sequence. */
9904
9905 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9906 || diff == 3 || diff == 5 || diff == 9)
9907 || (compare_code == LT && ct == -1)
9908 || (compare_code == GE && cf == -1))
9909 {
9910 /*
9911 * notl op1 (if necessary)
9912 * sarl $31, op1
9913 * orl cf, op1
9914 */
9915 if (ct != -1)
9916 {
9917 cf = ct;
9918 ct = -1;
9919 code = reverse_condition (code);
9920 }
9921
9922 out = emit_store_flag (out, code, ix86_compare_op0,
9923 ix86_compare_op1, VOIDmode, 0, -1);
9924
9925 out = expand_simple_binop (mode, IOR,
9926 out, GEN_INT (cf),
9927 out, 1, OPTAB_DIRECT);
9928 if (out != operands[0])
9929 emit_move_insn (operands[0], out);
9930
9931 return 1; /* DONE */
9932 }
9933 }
9934
4977bab6 9935
635559ab
JH
9936 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9937 || diff == 3 || diff == 5 || diff == 9)
4977bab6 9938 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
c05dbe81 9939 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
e075ae69
RH
9940 {
9941 /*
9942 * xorl dest,dest
9943 * cmpl op1,op2
9944 * setcc dest
9945 * lea cf(dest*(ct-cf)),dest
9946 *
9947 * Size 14.
9948 *
9949 * This also catches the degenerate setcc-only case.
9950 */
9951
9952 rtx tmp;
9953 int nops;
9954
9955 out = emit_store_flag (out, code, ix86_compare_op0,
9956 ix86_compare_op1, VOIDmode, 0, 1);
9957
9958 nops = 0;
97f51ac4
RB
9959 /* On x86_64 the lea instruction operates on Pmode, so we need
9960 to get arithmetics done in proper mode to match. */
e075ae69 9961 if (diff == 1)
068f5dea 9962 tmp = copy_rtx (out);
e075ae69
RH
9963 else
9964 {
885a70fd 9965 rtx out1;
068f5dea 9966 out1 = copy_rtx (out);
635559ab 9967 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
e075ae69
RH
9968 nops++;
9969 if (diff & 1)
9970 {
635559ab 9971 tmp = gen_rtx_PLUS (mode, tmp, out1);
e075ae69
RH
9972 nops++;
9973 }
9974 }
9975 if (cf != 0)
9976 {
635559ab 9977 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
e075ae69
RH
9978 nops++;
9979 }
4977bab6 9980 if (!rtx_equal_p (tmp, out))
e075ae69 9981 {
14f73b5a 9982 if (nops == 1)
a5cf80f0 9983 out = force_operand (tmp, copy_rtx (out));
e075ae69 9984 else
4977bab6 9985 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
e075ae69 9986 }
4977bab6 9987 if (!rtx_equal_p (out, operands[0]))
1985ef90 9988 emit_move_insn (operands[0], copy_rtx (out));
e075ae69
RH
9989
9990 return 1; /* DONE */
9991 }
9992
9993 /*
9994 * General case: Jumpful:
9995 * xorl dest,dest cmpl op1, op2
9996 * cmpl op1, op2 movl ct, dest
9997 * setcc dest jcc 1f
9998 * decl dest movl cf, dest
9999 * andl (cf-ct),dest 1:
10000 * addl ct,dest
0f290768 10001 *
e075ae69
RH
10002 * Size 20. Size 14.
10003 *
10004 * This is reasonably steep, but branch mispredict costs are
10005 * high on modern cpus, so consider failing only if optimizing
10006 * for space.
e075ae69
RH
10007 */
10008
4977bab6
ZW
10009 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10010 && BRANCH_COST >= 2)
e075ae69 10011 {
97f51ac4 10012 if (cf == 0)
e075ae69 10013 {
97f51ac4
RB
10014 cf = ct;
10015 ct = 0;
734dba19 10016 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
0f2a3457
JJ
10017 /* We may be reversing unordered compare to normal compare,
10018 that is not valid in general (we may convert non-trapping
10019 condition to trapping one), however on i386 we currently
10020 emit all comparisons unordered. */
10021 code = reverse_condition_maybe_unordered (code);
10022 else
10023 {
10024 code = reverse_condition (code);
10025 if (compare_code != NIL)
10026 compare_code = reverse_condition (compare_code);
10027 }
10028 }
10029
10030 if (compare_code != NIL)
10031 {
10032 /* notl op1 (if needed)
10033 sarl $31, op1
10034 andl (cf-ct), op1
10035 addl ct, op1
10036
10037 For x < 0 (resp. x <= -1) there will be no notl,
10038 so if possible swap the constants to get rid of the
10039 complement.
10040 True/false will be -1/0 while code below (store flag
10041 followed by decrement) is 0/-1, so the constants need
10042 to be exchanged once more. */
10043
10044 if (compare_code == GE || !cf)
734dba19 10045 {
0f2a3457
JJ
10046 code = reverse_condition (code);
10047 compare_code = LT;
734dba19
JH
10048 }
10049 else
10050 {
0f2a3457
JJ
10051 HOST_WIDE_INT tmp = cf;
10052 cf = ct;
10053 ct = tmp;
734dba19 10054 }
0f2a3457
JJ
10055
10056 out = emit_store_flag (out, code, ix86_compare_op0,
10057 ix86_compare_op1, VOIDmode, 0, -1);
e075ae69 10058 }
0f2a3457
JJ
10059 else
10060 {
10061 out = emit_store_flag (out, code, ix86_compare_op0,
10062 ix86_compare_op1, VOIDmode, 0, 1);
e075ae69 10063
4977bab6
ZW
10064 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10065 copy_rtx (out), 1, OPTAB_DIRECT);
0f2a3457 10066 }
e075ae69 10067
4977bab6 10068 out = expand_simple_binop (mode, AND, copy_rtx (out),
d8bf17f9 10069 gen_int_mode (cf - ct, mode),
4977bab6 10070 copy_rtx (out), 1, OPTAB_DIRECT);
97f51ac4 10071 if (ct)
4977bab6
ZW
10072 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10073 copy_rtx (out), 1, OPTAB_DIRECT);
10074 if (!rtx_equal_p (out, operands[0]))
10075 emit_move_insn (operands[0], copy_rtx (out));
e075ae69
RH
10076
10077 return 1; /* DONE */
10078 }
10079 }
10080
4977bab6 10081 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
e075ae69
RH
10082 {
10083 /* Try a few things more with specific constants and a variable. */
10084
78a0d70c 10085 optab op;
e075ae69
RH
10086 rtx var, orig_out, out, tmp;
10087
4977bab6 10088 if (BRANCH_COST <= 2)
e075ae69
RH
10089 return 0; /* FAIL */
10090
0f290768 10091 /* If one of the two operands is an interesting constant, load a
e075ae69 10092 constant with the above and mask it in with a logical operation. */
0f290768 10093
e075ae69
RH
10094 if (GET_CODE (operands[2]) == CONST_INT)
10095 {
10096 var = operands[3];
4977bab6 10097 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
e075ae69 10098 operands[3] = constm1_rtx, op = and_optab;
4977bab6 10099 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
e075ae69 10100 operands[3] = const0_rtx, op = ior_optab;
78a0d70c
ZW
10101 else
10102 return 0; /* FAIL */
e075ae69
RH
10103 }
10104 else if (GET_CODE (operands[3]) == CONST_INT)
10105 {
10106 var = operands[2];
4977bab6 10107 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
e075ae69 10108 operands[2] = constm1_rtx, op = and_optab;
4977bab6 10109 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
e075ae69 10110 operands[2] = const0_rtx, op = ior_optab;
78a0d70c
ZW
10111 else
10112 return 0; /* FAIL */
e075ae69 10113 }
78a0d70c 10114 else
e075ae69
RH
10115 return 0; /* FAIL */
10116
10117 orig_out = operands[0];
635559ab 10118 tmp = gen_reg_rtx (mode);
e075ae69
RH
10119 operands[0] = tmp;
10120
10121 /* Recurse to get the constant loaded. */
10122 if (ix86_expand_int_movcc (operands) == 0)
10123 return 0; /* FAIL */
10124
10125 /* Mask in the interesting variable. */
635559ab 10126 out = expand_binop (mode, op, var, tmp, orig_out, 0,
e075ae69 10127 OPTAB_WIDEN);
4977bab6
ZW
10128 if (!rtx_equal_p (out, orig_out))
10129 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
e075ae69
RH
10130
10131 return 1; /* DONE */
10132 }
10133
10134 /*
10135 * For comparison with above,
10136 *
10137 * movl cf,dest
10138 * movl ct,tmp
10139 * cmpl op1,op2
10140 * cmovcc tmp,dest
10141 *
10142 * Size 15.
10143 */
10144
635559ab
JH
10145 if (! nonimmediate_operand (operands[2], mode))
10146 operands[2] = force_reg (mode, operands[2]);
10147 if (! nonimmediate_operand (operands[3], mode))
10148 operands[3] = force_reg (mode, operands[3]);
e075ae69 10149
a1b8572c
JH
10150 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10151 {
635559ab 10152 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
10153 emit_move_insn (tmp, operands[3]);
10154 operands[3] = tmp;
10155 }
10156 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10157 {
635559ab 10158 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
10159 emit_move_insn (tmp, operands[2]);
10160 operands[2] = tmp;
10161 }
4977bab6 10162
c9682caf 10163 if (! register_operand (operands[2], VOIDmode)
4977bab6
ZW
10164 && (mode == QImode
10165 || ! register_operand (operands[3], VOIDmode)))
635559ab 10166 operands[2] = force_reg (mode, operands[2]);
a1b8572c 10167
4977bab6
ZW
10168 if (mode == QImode
10169 && ! register_operand (operands[3], VOIDmode))
10170 operands[3] = force_reg (mode, operands[3]);
10171
e075ae69
RH
10172 emit_insn (compare_seq);
10173 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 10174 gen_rtx_IF_THEN_ELSE (mode,
e075ae69
RH
10175 compare_op, operands[2],
10176 operands[3])));
a1b8572c 10177 if (bypass_test)
4977bab6 10178 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
635559ab 10179 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c 10180 bypass_test,
4977bab6
ZW
10181 copy_rtx (operands[3]),
10182 copy_rtx (operands[0]))));
a1b8572c 10183 if (second_test)
4977bab6 10184 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
635559ab 10185 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c 10186 second_test,
4977bab6
ZW
10187 copy_rtx (operands[2]),
10188 copy_rtx (operands[0]))));
e075ae69
RH
10189
10190 return 1; /* DONE */
e9a25f70 10191}
e075ae69 10192
32b5b1aa 10193int
e075ae69
RH
10194ix86_expand_fp_movcc (operands)
10195 rtx operands[];
32b5b1aa 10196{
e075ae69 10197 enum rtx_code code;
e075ae69 10198 rtx tmp;
a1b8572c 10199 rtx compare_op, second_test, bypass_test;
32b5b1aa 10200
0073023d
JH
10201 /* For SF/DFmode conditional moves based on comparisons
10202 in same mode, we may want to use SSE min/max instructions. */
965f5423
JH
10203 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
10204 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
0073023d 10205 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
fa9f36a1
JH
10206 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10207 && (!TARGET_IEEE_FP
10208 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
0073023d
JH
10209 /* We may be called from the post-reload splitter. */
10210 && (!REG_P (operands[0])
10211 || SSE_REG_P (operands[0])
52a661a6 10212 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
0073023d
JH
10213 {
10214 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10215 code = GET_CODE (operands[1]);
10216
10217 /* See if we have (cross) match between comparison operands and
10218 conditional move operands. */
10219 if (rtx_equal_p (operands[2], op1))
10220 {
10221 rtx tmp = op0;
10222 op0 = op1;
10223 op1 = tmp;
10224 code = reverse_condition_maybe_unordered (code);
10225 }
10226 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10227 {
10228 /* Check for min operation. */
4977bab6 10229 if (code == LT || code == UNLE)
0073023d 10230 {
4977bab6
ZW
10231 if (code == UNLE)
10232 {
10233 rtx tmp = op0;
10234 op0 = op1;
10235 op1 = tmp;
10236 }
0073023d
JH
10237 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10238 if (memory_operand (op0, VOIDmode))
10239 op0 = force_reg (GET_MODE (operands[0]), op0);
10240 if (GET_MODE (operands[0]) == SFmode)
10241 emit_insn (gen_minsf3 (operands[0], op0, op1));
10242 else
10243 emit_insn (gen_mindf3 (operands[0], op0, op1));
10244 return 1;
10245 }
10246 /* Check for max operation. */
4977bab6 10247 if (code == GT || code == UNGE)
0073023d 10248 {
4977bab6
ZW
10249 if (code == UNGE)
10250 {
10251 rtx tmp = op0;
10252 op0 = op1;
10253 op1 = tmp;
10254 }
0073023d
JH
10255 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10256 if (memory_operand (op0, VOIDmode))
10257 op0 = force_reg (GET_MODE (operands[0]), op0);
10258 if (GET_MODE (operands[0]) == SFmode)
10259 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10260 else
10261 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10262 return 1;
10263 }
10264 }
10265 /* Manage condition to be sse_comparison_operator. In case we are
10266 in non-ieee mode, try to canonicalize the destination operand
10267 to be first in the comparison - this helps reload to avoid extra
10268 moves. */
10269 if (!sse_comparison_operator (operands[1], VOIDmode)
10270 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10271 {
10272 rtx tmp = ix86_compare_op0;
10273 ix86_compare_op0 = ix86_compare_op1;
10274 ix86_compare_op1 = tmp;
10275 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10276 VOIDmode, ix86_compare_op0,
10277 ix86_compare_op1);
10278 }
d1f87653 10279 /* Similarly try to manage result to be first operand of conditional
fa9f36a1
JH
10280 move. We also don't support the NE comparison on SSE, so try to
10281 avoid it. */
037f20f1
JH
10282 if ((rtx_equal_p (operands[0], operands[3])
10283 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10284 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
0073023d
JH
10285 {
10286 rtx tmp = operands[2];
10287 operands[2] = operands[3];
92d0fb09 10288 operands[3] = tmp;
0073023d
JH
10289 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10290 (GET_CODE (operands[1])),
10291 VOIDmode, ix86_compare_op0,
10292 ix86_compare_op1);
10293 }
10294 if (GET_MODE (operands[0]) == SFmode)
10295 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10296 operands[2], operands[3],
10297 ix86_compare_op0, ix86_compare_op1));
10298 else
10299 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10300 operands[2], operands[3],
10301 ix86_compare_op0, ix86_compare_op1));
10302 return 1;
10303 }
10304
e075ae69 10305 /* The floating point conditional move instructions don't directly
0f290768 10306 support conditions resulting from a signed integer comparison. */
32b5b1aa 10307
e075ae69 10308 code = GET_CODE (operands[1]);
a1b8572c 10309 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9e7adcb3
JH
10310
10311 /* The floating point conditional move instructions don't directly
10312 support signed integer comparisons. */
10313
a1b8572c 10314 if (!fcmov_comparison_operator (compare_op, VOIDmode))
e075ae69 10315 {
a1b8572c 10316 if (second_test != NULL || bypass_test != NULL)
b531087a 10317 abort ();
e075ae69 10318 tmp = gen_reg_rtx (QImode);
3a3677ff 10319 ix86_expand_setcc (code, tmp);
e075ae69
RH
10320 code = NE;
10321 ix86_compare_op0 = tmp;
10322 ix86_compare_op1 = const0_rtx;
a1b8572c
JH
10323 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10324 }
10325 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10326 {
10327 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10328 emit_move_insn (tmp, operands[3]);
10329 operands[3] = tmp;
10330 }
10331 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10332 {
10333 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10334 emit_move_insn (tmp, operands[2]);
10335 operands[2] = tmp;
e075ae69 10336 }
e9a25f70 10337
e075ae69
RH
10338 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10339 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9e7adcb3 10340 compare_op,
e075ae69
RH
10341 operands[2],
10342 operands[3])));
a1b8572c
JH
10343 if (bypass_test)
10344 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10345 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10346 bypass_test,
10347 operands[3],
10348 operands[0])));
10349 if (second_test)
10350 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10351 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10352 second_test,
10353 operands[2],
10354 operands[0])));
32b5b1aa 10355
e075ae69 10356 return 1;
32b5b1aa
SC
10357}
10358
7b52eede
JH
10359/* Expand conditional increment or decrement using adb/sbb instructions.
10360 The default case using setcc followed by the conditional move can be
10361 done by generic code. */
10362int
10363ix86_expand_int_addcc (operands)
10364 rtx operands[];
10365{
10366 enum rtx_code code = GET_CODE (operands[1]);
10367 rtx compare_op;
10368 rtx val = const0_rtx;
e6e81735 10369 bool fpcmp = false;
e6e81735 10370 enum machine_mode mode = GET_MODE (operands[0]);
7b52eede
JH
10371
10372 if (operands[3] != const1_rtx
10373 && operands[3] != constm1_rtx)
10374 return 0;
10375 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10376 ix86_compare_op1, &compare_op))
10377 return 0;
e6e81735
JH
10378 code = GET_CODE (compare_op);
10379
10380 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10381 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10382 {
10383 fpcmp = true;
10384 code = ix86_fp_compare_code_to_integer (code);
10385 }
10386
10387 if (code != LTU)
10388 {
10389 val = constm1_rtx;
10390 if (fpcmp)
10391 PUT_CODE (compare_op,
10392 reverse_condition_maybe_unordered
10393 (GET_CODE (compare_op)));
10394 else
10395 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10396 }
10397 PUT_MODE (compare_op, mode);
10398
10399 /* Construct either adc or sbb insn. */
10400 if ((code == LTU) == (operands[3] == constm1_rtx))
7b52eede
JH
10401 {
10402 switch (GET_MODE (operands[0]))
10403 {
10404 case QImode:
e6e81735 10405 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10406 break;
10407 case HImode:
e6e81735 10408 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10409 break;
10410 case SImode:
e6e81735 10411 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10412 break;
10413 case DImode:
e6e81735 10414 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
7b52eede
JH
10415 break;
10416 default:
10417 abort ();
10418 }
10419 }
10420 else
10421 {
10422 switch (GET_MODE (operands[0]))
10423 {
10424 case QImode:
e6e81735 10425 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10426 break;
10427 case HImode:
e6e81735 10428 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10429 break;
10430 case SImode:
e6e81735 10431 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10432 break;
10433 case DImode:
e6e81735 10434 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
7b52eede
JH
10435 break;
10436 default:
10437 abort ();
10438 }
10439 }
10440 return 1; /* DONE */
10441}
10442
10443
2450a057
JH
10444/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10445 works for floating pointer parameters and nonoffsetable memories.
10446 For pushes, it returns just stack offsets; the values will be saved
10447 in the right order. Maximally three parts are generated. */
10448
2b589241 10449static int
2450a057
JH
10450ix86_split_to_parts (operand, parts, mode)
10451 rtx operand;
10452 rtx *parts;
10453 enum machine_mode mode;
32b5b1aa 10454{
26e5b205
JH
10455 int size;
10456
10457 if (!TARGET_64BIT)
10458 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
10459 else
10460 size = (GET_MODE_SIZE (mode) + 4) / 8;
2450a057 10461
a7180f70
BS
10462 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10463 abort ();
2450a057
JH
10464 if (size < 2 || size > 3)
10465 abort ();
10466
f996902d
RH
10467 /* Optimize constant pool reference to immediates. This is used by fp
10468 moves, that force all constants to memory to allow combining. */
10469 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10470 {
10471 rtx tmp = maybe_get_pool_constant (operand);
10472 if (tmp)
10473 operand = tmp;
10474 }
d7a29404 10475
2450a057 10476 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
e075ae69 10477 {
2450a057
JH
10478 /* The only non-offsetable memories we handle are pushes. */
10479 if (! push_operand (operand, VOIDmode))
10480 abort ();
10481
26e5b205
JH
10482 operand = copy_rtx (operand);
10483 PUT_MODE (operand, Pmode);
2450a057
JH
10484 parts[0] = parts[1] = parts[2] = operand;
10485 }
26e5b205 10486 else if (!TARGET_64BIT)
2450a057
JH
10487 {
10488 if (mode == DImode)
10489 split_di (&operand, 1, &parts[0], &parts[1]);
10490 else
e075ae69 10491 {
2450a057
JH
10492 if (REG_P (operand))
10493 {
10494 if (!reload_completed)
10495 abort ();
10496 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10497 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10498 if (size == 3)
10499 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10500 }
10501 else if (offsettable_memref_p (operand))
10502 {
f4ef873c 10503 operand = adjust_address (operand, SImode, 0);
2450a057 10504 parts[0] = operand;
b72f00af 10505 parts[1] = adjust_address (operand, SImode, 4);
2450a057 10506 if (size == 3)
b72f00af 10507 parts[2] = adjust_address (operand, SImode, 8);
2450a057
JH
10508 }
10509 else if (GET_CODE (operand) == CONST_DOUBLE)
10510 {
10511 REAL_VALUE_TYPE r;
2b589241 10512 long l[4];
2450a057
JH
10513
10514 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10515 switch (mode)
10516 {
10517 case XFmode:
2b589241 10518 case TFmode:
2450a057 10519 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
d8bf17f9 10520 parts[2] = gen_int_mode (l[2], SImode);
2450a057
JH
10521 break;
10522 case DFmode:
10523 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10524 break;
10525 default:
10526 abort ();
10527 }
d8bf17f9
LB
10528 parts[1] = gen_int_mode (l[1], SImode);
10529 parts[0] = gen_int_mode (l[0], SImode);
2450a057
JH
10530 }
10531 else
10532 abort ();
e075ae69 10533 }
2450a057 10534 }
26e5b205
JH
10535 else
10536 {
44cf5b6a
JH
10537 if (mode == TImode)
10538 split_ti (&operand, 1, &parts[0], &parts[1]);
26e5b205
JH
10539 if (mode == XFmode || mode == TFmode)
10540 {
10541 if (REG_P (operand))
10542 {
10543 if (!reload_completed)
10544 abort ();
10545 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10546 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10547 }
10548 else if (offsettable_memref_p (operand))
10549 {
b72f00af 10550 operand = adjust_address (operand, DImode, 0);
26e5b205 10551 parts[0] = operand;
b72f00af 10552 parts[1] = adjust_address (operand, SImode, 8);
26e5b205
JH
10553 }
10554 else if (GET_CODE (operand) == CONST_DOUBLE)
10555 {
10556 REAL_VALUE_TYPE r;
10557 long l[3];
10558
10559 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10560 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10561 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10562 if (HOST_BITS_PER_WIDE_INT >= 64)
523fbd9d 10563 parts[0]
d8bf17f9 10564 = gen_int_mode
44cf5b6a 10565 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
b531087a 10566 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
d8bf17f9 10567 DImode);
26e5b205
JH
10568 else
10569 parts[0] = immed_double_const (l[0], l[1], DImode);
d8bf17f9 10570 parts[1] = gen_int_mode (l[2], SImode);
26e5b205
JH
10571 }
10572 else
10573 abort ();
10574 }
10575 }
2450a057 10576
2b589241 10577 return size;
2450a057
JH
10578}
10579
10580/* Emit insns to perform a move or push of DI, DF, and XF values.
10581 Return false when normal moves are needed; true when all required
10582 insns have been emitted. Operands 2-4 contain the input values
10583 int the correct order; operands 5-7 contain the output values. */
10584
26e5b205
JH
10585void
10586ix86_split_long_move (operands)
10587 rtx operands[];
2450a057
JH
10588{
10589 rtx part[2][3];
26e5b205 10590 int nparts;
2450a057
JH
10591 int push = 0;
10592 int collisions = 0;
26e5b205
JH
10593 enum machine_mode mode = GET_MODE (operands[0]);
10594
10595 /* The DFmode expanders may ask us to move double.
10596 For 64bit target this is single move. By hiding the fact
10597 here we simplify i386.md splitters. */
10598 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10599 {
8cdfa312
RH
10600 /* Optimize constant pool reference to immediates. This is used by
10601 fp moves, that force all constants to memory to allow combining. */
26e5b205
JH
10602
10603 if (GET_CODE (operands[1]) == MEM
10604 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10605 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10606 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10607 if (push_operand (operands[0], VOIDmode))
b47b4f21
JH
10608 {
10609 operands[0] = copy_rtx (operands[0]);
10610 PUT_MODE (operands[0], Pmode);
10611 }
26e5b205
JH
10612 else
10613 operands[0] = gen_lowpart (DImode, operands[0]);
10614 operands[1] = gen_lowpart (DImode, operands[1]);
10615 emit_move_insn (operands[0], operands[1]);
10616 return;
10617 }
2450a057 10618
2450a057
JH
10619 /* The only non-offsettable memory we handle is push. */
10620 if (push_operand (operands[0], VOIDmode))
10621 push = 1;
10622 else if (GET_CODE (operands[0]) == MEM
10623 && ! offsettable_memref_p (operands[0]))
10624 abort ();
10625
26e5b205
JH
10626 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10627 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
2450a057
JH
10628
10629 /* When emitting push, take care for source operands on the stack. */
10630 if (push && GET_CODE (operands[1]) == MEM
10631 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10632 {
26e5b205 10633 if (nparts == 3)
886cbb88
JH
10634 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10635 XEXP (part[1][2], 0));
10636 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10637 XEXP (part[1][1], 0));
2450a057
JH
10638 }
10639
0f290768 10640 /* We need to do copy in the right order in case an address register
2450a057
JH
10641 of the source overlaps the destination. */
10642 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10643 {
10644 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10645 collisions++;
10646 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10647 collisions++;
26e5b205 10648 if (nparts == 3
2450a057
JH
10649 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10650 collisions++;
10651
10652 /* Collision in the middle part can be handled by reordering. */
26e5b205 10653 if (collisions == 1 && nparts == 3
2450a057 10654 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
e075ae69 10655 {
2450a057
JH
10656 rtx tmp;
10657 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10658 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10659 }
e075ae69 10660
2450a057
JH
10661 /* If there are more collisions, we can't handle it by reordering.
10662 Do an lea to the last part and use only one colliding move. */
10663 else if (collisions > 1)
10664 {
10665 collisions = 1;
26e5b205 10666 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
2450a057 10667 XEXP (part[1][0], 0)));
26e5b205
JH
10668 part[1][0] = change_address (part[1][0],
10669 TARGET_64BIT ? DImode : SImode,
10670 part[0][nparts - 1]);
b72f00af 10671 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
26e5b205 10672 if (nparts == 3)
b72f00af 10673 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
2450a057
JH
10674 }
10675 }
10676
10677 if (push)
10678 {
26e5b205 10679 if (!TARGET_64BIT)
2b589241 10680 {
26e5b205
JH
10681 if (nparts == 3)
10682 {
10683 /* We use only first 12 bytes of TFmode value, but for pushing we
10684 are required to adjust stack as if we were pushing real 16byte
10685 value. */
10686 if (mode == TFmode && !TARGET_64BIT)
10687 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10688 GEN_INT (-4)));
10689 emit_move_insn (part[0][2], part[1][2]);
10690 }
2b589241 10691 }
26e5b205
JH
10692 else
10693 {
10694 /* In 64bit mode we don't have 32bit push available. In case this is
10695 register, it is OK - we will just use larger counterpart. We also
10696 retype memory - these comes from attempt to avoid REX prefix on
10697 moving of second half of TFmode value. */
10698 if (GET_MODE (part[1][1]) == SImode)
10699 {
10700 if (GET_CODE (part[1][1]) == MEM)
f4ef873c 10701 part[1][1] = adjust_address (part[1][1], DImode, 0);
26e5b205
JH
10702 else if (REG_P (part[1][1]))
10703 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10704 else
b531087a 10705 abort ();
886cbb88
JH
10706 if (GET_MODE (part[1][0]) == SImode)
10707 part[1][0] = part[1][1];
26e5b205
JH
10708 }
10709 }
10710 emit_move_insn (part[0][1], part[1][1]);
10711 emit_move_insn (part[0][0], part[1][0]);
10712 return;
2450a057
JH
10713 }
10714
10715 /* Choose correct order to not overwrite the source before it is copied. */
10716 if ((REG_P (part[0][0])
10717 && REG_P (part[1][1])
10718 && (REGNO (part[0][0]) == REGNO (part[1][1])
26e5b205 10719 || (nparts == 3
2450a057
JH
10720 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10721 || (collisions > 0
10722 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10723 {
26e5b205 10724 if (nparts == 3)
2450a057 10725 {
26e5b205
JH
10726 operands[2] = part[0][2];
10727 operands[3] = part[0][1];
10728 operands[4] = part[0][0];
10729 operands[5] = part[1][2];
10730 operands[6] = part[1][1];
10731 operands[7] = part[1][0];
2450a057
JH
10732 }
10733 else
10734 {
26e5b205
JH
10735 operands[2] = part[0][1];
10736 operands[3] = part[0][0];
10737 operands[5] = part[1][1];
10738 operands[6] = part[1][0];
2450a057
JH
10739 }
10740 }
10741 else
10742 {
26e5b205 10743 if (nparts == 3)
2450a057 10744 {
26e5b205
JH
10745 operands[2] = part[0][0];
10746 operands[3] = part[0][1];
10747 operands[4] = part[0][2];
10748 operands[5] = part[1][0];
10749 operands[6] = part[1][1];
10750 operands[7] = part[1][2];
2450a057
JH
10751 }
10752 else
10753 {
26e5b205
JH
10754 operands[2] = part[0][0];
10755 operands[3] = part[0][1];
10756 operands[5] = part[1][0];
10757 operands[6] = part[1][1];
e075ae69
RH
10758 }
10759 }
26e5b205
JH
10760 emit_move_insn (operands[2], operands[5]);
10761 emit_move_insn (operands[3], operands[6]);
10762 if (nparts == 3)
10763 emit_move_insn (operands[4], operands[7]);
32b5b1aa 10764
26e5b205 10765 return;
32b5b1aa 10766}
32b5b1aa 10767
e075ae69
RH
10768void
10769ix86_split_ashldi (operands, scratch)
10770 rtx *operands, scratch;
32b5b1aa 10771{
e075ae69
RH
10772 rtx low[2], high[2];
10773 int count;
b985a30f 10774
e075ae69
RH
10775 if (GET_CODE (operands[2]) == CONST_INT)
10776 {
10777 split_di (operands, 2, low, high);
10778 count = INTVAL (operands[2]) & 63;
32b5b1aa 10779
e075ae69
RH
10780 if (count >= 32)
10781 {
10782 emit_move_insn (high[0], low[1]);
10783 emit_move_insn (low[0], const0_rtx);
b985a30f 10784
e075ae69
RH
10785 if (count > 32)
10786 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10787 }
10788 else
10789 {
10790 if (!rtx_equal_p (operands[0], operands[1]))
10791 emit_move_insn (operands[0], operands[1]);
10792 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10793 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10794 }
10795 }
10796 else
10797 {
10798 if (!rtx_equal_p (operands[0], operands[1]))
10799 emit_move_insn (operands[0], operands[1]);
b985a30f 10800
e075ae69 10801 split_di (operands, 1, low, high);
b985a30f 10802
e075ae69
RH
10803 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10804 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
32b5b1aa 10805
fe577e58 10806 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 10807 {
fe577e58 10808 if (! no_new_pseudos)
e075ae69
RH
10809 scratch = force_reg (SImode, const0_rtx);
10810 else
10811 emit_move_insn (scratch, const0_rtx);
10812
10813 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10814 scratch));
10815 }
10816 else
10817 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10818 }
e9a25f70 10819}
32b5b1aa 10820
e075ae69
RH
10821void
10822ix86_split_ashrdi (operands, scratch)
10823 rtx *operands, scratch;
32b5b1aa 10824{
e075ae69
RH
10825 rtx low[2], high[2];
10826 int count;
32b5b1aa 10827
e075ae69
RH
10828 if (GET_CODE (operands[2]) == CONST_INT)
10829 {
10830 split_di (operands, 2, low, high);
10831 count = INTVAL (operands[2]) & 63;
32b5b1aa 10832
e075ae69
RH
10833 if (count >= 32)
10834 {
10835 emit_move_insn (low[0], high[1]);
32b5b1aa 10836
e075ae69
RH
10837 if (! reload_completed)
10838 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10839 else
10840 {
10841 emit_move_insn (high[0], low[0]);
10842 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10843 }
10844
10845 if (count > 32)
10846 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10847 }
10848 else
10849 {
10850 if (!rtx_equal_p (operands[0], operands[1]))
10851 emit_move_insn (operands[0], operands[1]);
10852 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10853 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10854 }
10855 }
10856 else
32b5b1aa 10857 {
e075ae69
RH
10858 if (!rtx_equal_p (operands[0], operands[1]))
10859 emit_move_insn (operands[0], operands[1]);
10860
10861 split_di (operands, 1, low, high);
10862
10863 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10864 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10865
fe577e58 10866 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 10867 {
fe577e58 10868 if (! no_new_pseudos)
e075ae69
RH
10869 scratch = gen_reg_rtx (SImode);
10870 emit_move_insn (scratch, high[0]);
10871 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10872 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10873 scratch));
10874 }
10875 else
10876 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
32b5b1aa 10877 }
e075ae69 10878}
32b5b1aa 10879
e075ae69
RH
10880void
10881ix86_split_lshrdi (operands, scratch)
10882 rtx *operands, scratch;
10883{
10884 rtx low[2], high[2];
10885 int count;
32b5b1aa 10886
e075ae69 10887 if (GET_CODE (operands[2]) == CONST_INT)
32b5b1aa 10888 {
e075ae69
RH
10889 split_di (operands, 2, low, high);
10890 count = INTVAL (operands[2]) & 63;
10891
10892 if (count >= 32)
c7271385 10893 {
e075ae69
RH
10894 emit_move_insn (low[0], high[1]);
10895 emit_move_insn (high[0], const0_rtx);
32b5b1aa 10896
e075ae69
RH
10897 if (count > 32)
10898 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10899 }
10900 else
10901 {
10902 if (!rtx_equal_p (operands[0], operands[1]))
10903 emit_move_insn (operands[0], operands[1]);
10904 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10905 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10906 }
32b5b1aa 10907 }
e075ae69
RH
10908 else
10909 {
10910 if (!rtx_equal_p (operands[0], operands[1]))
10911 emit_move_insn (operands[0], operands[1]);
32b5b1aa 10912
e075ae69
RH
10913 split_di (operands, 1, low, high);
10914
10915 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10916 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10917
10918 /* Heh. By reversing the arguments, we can reuse this pattern. */
fe577e58 10919 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 10920 {
fe577e58 10921 if (! no_new_pseudos)
e075ae69
RH
10922 scratch = force_reg (SImode, const0_rtx);
10923 else
10924 emit_move_insn (scratch, const0_rtx);
10925
10926 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10927 scratch));
10928 }
10929 else
10930 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10931 }
32b5b1aa 10932}
3f803cd9 10933
0407c02b 10934/* Helper function for the string operations below. Dest VARIABLE whether
0945b39d
JH
10935 it is aligned to VALUE bytes. If true, jump to the label. */
10936static rtx
10937ix86_expand_aligntest (variable, value)
10938 rtx variable;
10939 int value;
10940{
10941 rtx label = gen_label_rtx ();
10942 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10943 if (GET_MODE (variable) == DImode)
10944 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10945 else
10946 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10947 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
d43e0b7d 10948 1, label);
0945b39d
JH
10949 return label;
10950}
10951
10952/* Adjust COUNTER by the VALUE. */
10953static void
10954ix86_adjust_counter (countreg, value)
10955 rtx countreg;
10956 HOST_WIDE_INT value;
10957{
10958 if (GET_MODE (countreg) == DImode)
10959 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10960 else
10961 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10962}
10963
10964/* Zero extend possibly SImode EXP to Pmode register. */
d24b3457 10965rtx
0945b39d
JH
10966ix86_zero_extend_to_Pmode (exp)
10967 rtx exp;
10968{
10969 rtx r;
10970 if (GET_MODE (exp) == VOIDmode)
10971 return force_reg (Pmode, exp);
10972 if (GET_MODE (exp) == Pmode)
10973 return copy_to_mode_reg (Pmode, exp);
10974 r = gen_reg_rtx (Pmode);
10975 emit_insn (gen_zero_extendsidi2 (r, exp));
10976 return r;
10977}
10978
10979/* Expand string move (memcpy) operation. Use i386 string operations when
10980 profitable. expand_clrstr contains similar code. */
10981int
10982ix86_expand_movstr (dst, src, count_exp, align_exp)
10983 rtx dst, src, count_exp, align_exp;
10984{
10985 rtx srcreg, destreg, countreg;
10986 enum machine_mode counter_mode;
10987 HOST_WIDE_INT align = 0;
10988 unsigned HOST_WIDE_INT count = 0;
10989 rtx insns;
10990
0945b39d
JH
10991 if (GET_CODE (align_exp) == CONST_INT)
10992 align = INTVAL (align_exp);
10993
d0a5295a
RH
10994 /* Can't use any of this if the user has appropriated esi or edi. */
10995 if (global_regs[4] || global_regs[5])
10996 return 0;
10997
5519a4f9 10998 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
10999 if (!TARGET_ALIGN_STRINGOPS)
11000 align = 64;
11001
11002 if (GET_CODE (count_exp) == CONST_INT)
26771da7
JH
11003 {
11004 count = INTVAL (count_exp);
11005 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11006 return 0;
11007 }
0945b39d
JH
11008
11009 /* Figure out proper mode for counter. For 32bits it is always SImode,
11010 for 64bits use SImode when possible, otherwise DImode.
11011 Set count to number of bytes copied when known at compile time. */
11012 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11013 || x86_64_zero_extended_value (count_exp))
11014 counter_mode = SImode;
11015 else
11016 counter_mode = DImode;
11017
26771da7
JH
11018 start_sequence ();
11019
0945b39d
JH
11020 if (counter_mode != SImode && counter_mode != DImode)
11021 abort ();
11022
11023 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11024 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11025
11026 emit_insn (gen_cld ());
11027
11028 /* When optimizing for size emit simple rep ; movsb instruction for
11029 counts not divisible by 4. */
11030
11031 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11032 {
11033 countreg = ix86_zero_extend_to_Pmode (count_exp);
11034 if (TARGET_64BIT)
11035 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
11036 destreg, srcreg, countreg));
11037 else
11038 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
11039 destreg, srcreg, countreg));
11040 }
11041
11042 /* For constant aligned (or small unaligned) copies use rep movsl
11043 followed by code copying the rest. For PentiumPro ensure 8 byte
11044 alignment to allow rep movsl acceleration. */
11045
11046 else if (count != 0
11047 && (align >= 8
11048 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 11049 || optimize_size || count < (unsigned int) 64))
0945b39d
JH
11050 {
11051 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11052 if (count & ~(size - 1))
11053 {
11054 countreg = copy_to_mode_reg (counter_mode,
11055 GEN_INT ((count >> (size == 4 ? 2 : 3))
11056 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11057 countreg = ix86_zero_extend_to_Pmode (countreg);
11058 if (size == 4)
11059 {
11060 if (TARGET_64BIT)
11061 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
11062 destreg, srcreg, countreg));
11063 else
11064 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
11065 destreg, srcreg, countreg));
11066 }
11067 else
11068 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
11069 destreg, srcreg, countreg));
11070 }
11071 if (size == 8 && (count & 0x04))
11072 emit_insn (gen_strmovsi (destreg, srcreg));
11073 if (count & 0x02)
11074 emit_insn (gen_strmovhi (destreg, srcreg));
11075 if (count & 0x01)
11076 emit_insn (gen_strmovqi (destreg, srcreg));
11077 }
11078 /* The generic code based on the glibc implementation:
11079 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11080 allowing accelerated copying there)
11081 - copy the data using rep movsl
11082 - copy the rest. */
11083 else
11084 {
11085 rtx countreg2;
11086 rtx label = NULL;
37ad04a5
JH
11087 int desired_alignment = (TARGET_PENTIUMPRO
11088 && (count == 0 || count >= (unsigned int) 260)
11089 ? 8 : UNITS_PER_WORD);
0945b39d
JH
11090
11091 /* In case we don't know anything about the alignment, default to
11092 library version, since it is usually equally fast and result in
4977bab6
ZW
11093 shorter code.
11094
11095 Also emit call when we know that the count is large and call overhead
11096 will not be important. */
11097 if (!TARGET_INLINE_ALL_STRINGOPS
11098 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
0945b39d
JH
11099 {
11100 end_sequence ();
11101 return 0;
11102 }
11103
11104 if (TARGET_SINGLE_STRINGOP)
11105 emit_insn (gen_cld ());
11106
11107 countreg2 = gen_reg_rtx (Pmode);
11108 countreg = copy_to_mode_reg (counter_mode, count_exp);
11109
11110 /* We don't use loops to align destination and to copy parts smaller
11111 than 4 bytes, because gcc is able to optimize such code better (in
11112 the case the destination or the count really is aligned, gcc is often
11113 able to predict the branches) and also it is friendlier to the
a4f31c00 11114 hardware branch prediction.
0945b39d 11115
d1f87653 11116 Using loops is beneficial for generic case, because we can
0945b39d
JH
11117 handle small counts using the loops. Many CPUs (such as Athlon)
11118 have large REP prefix setup costs.
11119
4aae8a9a 11120 This is quite costly. Maybe we can revisit this decision later or
0945b39d
JH
11121 add some customizability to this code. */
11122
37ad04a5 11123 if (count == 0 && align < desired_alignment)
0945b39d
JH
11124 {
11125 label = gen_label_rtx ();
aaae0bb9 11126 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
d43e0b7d 11127 LEU, 0, counter_mode, 1, label);
0945b39d
JH
11128 }
11129 if (align <= 1)
11130 {
11131 rtx label = ix86_expand_aligntest (destreg, 1);
11132 emit_insn (gen_strmovqi (destreg, srcreg));
11133 ix86_adjust_counter (countreg, 1);
11134 emit_label (label);
11135 LABEL_NUSES (label) = 1;
11136 }
11137 if (align <= 2)
11138 {
11139 rtx label = ix86_expand_aligntest (destreg, 2);
11140 emit_insn (gen_strmovhi (destreg, srcreg));
11141 ix86_adjust_counter (countreg, 2);
11142 emit_label (label);
11143 LABEL_NUSES (label) = 1;
11144 }
37ad04a5 11145 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
11146 {
11147 rtx label = ix86_expand_aligntest (destreg, 4);
11148 emit_insn (gen_strmovsi (destreg, srcreg));
11149 ix86_adjust_counter (countreg, 4);
11150 emit_label (label);
11151 LABEL_NUSES (label) = 1;
11152 }
11153
37ad04a5
JH
11154 if (label && desired_alignment > 4 && !TARGET_64BIT)
11155 {
11156 emit_label (label);
11157 LABEL_NUSES (label) = 1;
11158 label = NULL_RTX;
11159 }
0945b39d
JH
11160 if (!TARGET_SINGLE_STRINGOP)
11161 emit_insn (gen_cld ());
11162 if (TARGET_64BIT)
11163 {
11164 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11165 GEN_INT (3)));
11166 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
11167 destreg, srcreg, countreg2));
11168 }
11169 else
11170 {
11171 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
11172 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
11173 destreg, srcreg, countreg2));
11174 }
11175
11176 if (label)
11177 {
11178 emit_label (label);
11179 LABEL_NUSES (label) = 1;
11180 }
11181 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11182 emit_insn (gen_strmovsi (destreg, srcreg));
11183 if ((align <= 4 || count == 0) && TARGET_64BIT)
11184 {
11185 rtx label = ix86_expand_aligntest (countreg, 4);
11186 emit_insn (gen_strmovsi (destreg, srcreg));
11187 emit_label (label);
11188 LABEL_NUSES (label) = 1;
11189 }
11190 if (align > 2 && count != 0 && (count & 2))
11191 emit_insn (gen_strmovhi (destreg, srcreg));
11192 if (align <= 2 || count == 0)
11193 {
11194 rtx label = ix86_expand_aligntest (countreg, 2);
11195 emit_insn (gen_strmovhi (destreg, srcreg));
11196 emit_label (label);
11197 LABEL_NUSES (label) = 1;
11198 }
11199 if (align > 1 && count != 0 && (count & 1))
11200 emit_insn (gen_strmovqi (destreg, srcreg));
11201 if (align <= 1 || count == 0)
11202 {
11203 rtx label = ix86_expand_aligntest (countreg, 1);
11204 emit_insn (gen_strmovqi (destreg, srcreg));
11205 emit_label (label);
11206 LABEL_NUSES (label) = 1;
11207 }
11208 }
11209
11210 insns = get_insns ();
11211 end_sequence ();
11212
11213 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
2f937369 11214 emit_insn (insns);
0945b39d
JH
11215 return 1;
11216}
11217
11218/* Expand string clear operation (bzero). Use i386 string operations when
11219 profitable. expand_movstr contains similar code. */
11220int
11221ix86_expand_clrstr (src, count_exp, align_exp)
11222 rtx src, count_exp, align_exp;
11223{
11224 rtx destreg, zeroreg, countreg;
11225 enum machine_mode counter_mode;
11226 HOST_WIDE_INT align = 0;
11227 unsigned HOST_WIDE_INT count = 0;
11228
11229 if (GET_CODE (align_exp) == CONST_INT)
11230 align = INTVAL (align_exp);
11231
d0a5295a
RH
11232 /* Can't use any of this if the user has appropriated esi. */
11233 if (global_regs[4])
11234 return 0;
11235
5519a4f9 11236 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
11237 if (!TARGET_ALIGN_STRINGOPS)
11238 align = 32;
11239
11240 if (GET_CODE (count_exp) == CONST_INT)
26771da7
JH
11241 {
11242 count = INTVAL (count_exp);
11243 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11244 return 0;
11245 }
0945b39d
JH
11246 /* Figure out proper mode for counter. For 32bits it is always SImode,
11247 for 64bits use SImode when possible, otherwise DImode.
11248 Set count to number of bytes copied when known at compile time. */
11249 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11250 || x86_64_zero_extended_value (count_exp))
11251 counter_mode = SImode;
11252 else
11253 counter_mode = DImode;
11254
11255 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11256
11257 emit_insn (gen_cld ());
11258
11259 /* When optimizing for size emit simple rep ; movsb instruction for
11260 counts not divisible by 4. */
11261
11262 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11263 {
11264 countreg = ix86_zero_extend_to_Pmode (count_exp);
11265 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11266 if (TARGET_64BIT)
11267 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
11268 destreg, countreg));
11269 else
11270 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
11271 destreg, countreg));
11272 }
11273 else if (count != 0
11274 && (align >= 8
11275 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 11276 || optimize_size || count < (unsigned int) 64))
0945b39d
JH
11277 {
11278 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11279 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11280 if (count & ~(size - 1))
11281 {
11282 countreg = copy_to_mode_reg (counter_mode,
11283 GEN_INT ((count >> (size == 4 ? 2 : 3))
11284 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11285 countreg = ix86_zero_extend_to_Pmode (countreg);
11286 if (size == 4)
11287 {
11288 if (TARGET_64BIT)
11289 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
11290 destreg, countreg));
11291 else
11292 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
11293 destreg, countreg));
11294 }
11295 else
11296 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
11297 destreg, countreg));
11298 }
11299 if (size == 8 && (count & 0x04))
11300 emit_insn (gen_strsetsi (destreg,
11301 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11302 if (count & 0x02)
11303 emit_insn (gen_strsethi (destreg,
11304 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11305 if (count & 0x01)
11306 emit_insn (gen_strsetqi (destreg,
11307 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11308 }
11309 else
11310 {
11311 rtx countreg2;
11312 rtx label = NULL;
37ad04a5
JH
11313 /* Compute desired alignment of the string operation. */
11314 int desired_alignment = (TARGET_PENTIUMPRO
11315 && (count == 0 || count >= (unsigned int) 260)
11316 ? 8 : UNITS_PER_WORD);
0945b39d
JH
11317
11318 /* In case we don't know anything about the alignment, default to
11319 library version, since it is usually equally fast and result in
4977bab6
ZW
11320 shorter code.
11321
11322 Also emit call when we know that the count is large and call overhead
11323 will not be important. */
11324 if (!TARGET_INLINE_ALL_STRINGOPS
11325 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
0945b39d
JH
11326 return 0;
11327
11328 if (TARGET_SINGLE_STRINGOP)
11329 emit_insn (gen_cld ());
11330
11331 countreg2 = gen_reg_rtx (Pmode);
11332 countreg = copy_to_mode_reg (counter_mode, count_exp);
11333 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11334
37ad04a5 11335 if (count == 0 && align < desired_alignment)
0945b39d
JH
11336 {
11337 label = gen_label_rtx ();
37ad04a5 11338 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
d43e0b7d 11339 LEU, 0, counter_mode, 1, label);
0945b39d
JH
11340 }
11341 if (align <= 1)
11342 {
11343 rtx label = ix86_expand_aligntest (destreg, 1);
11344 emit_insn (gen_strsetqi (destreg,
11345 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11346 ix86_adjust_counter (countreg, 1);
11347 emit_label (label);
11348 LABEL_NUSES (label) = 1;
11349 }
11350 if (align <= 2)
11351 {
11352 rtx label = ix86_expand_aligntest (destreg, 2);
11353 emit_insn (gen_strsethi (destreg,
11354 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11355 ix86_adjust_counter (countreg, 2);
11356 emit_label (label);
11357 LABEL_NUSES (label) = 1;
11358 }
37ad04a5 11359 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
11360 {
11361 rtx label = ix86_expand_aligntest (destreg, 4);
11362 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
11363 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11364 : zeroreg)));
11365 ix86_adjust_counter (countreg, 4);
11366 emit_label (label);
11367 LABEL_NUSES (label) = 1;
11368 }
11369
37ad04a5
JH
11370 if (label && desired_alignment > 4 && !TARGET_64BIT)
11371 {
11372 emit_label (label);
11373 LABEL_NUSES (label) = 1;
11374 label = NULL_RTX;
11375 }
11376
0945b39d
JH
11377 if (!TARGET_SINGLE_STRINGOP)
11378 emit_insn (gen_cld ());
11379 if (TARGET_64BIT)
11380 {
11381 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11382 GEN_INT (3)));
11383 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
11384 destreg, countreg2));
11385 }
11386 else
11387 {
11388 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
11389 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
11390 destreg, countreg2));
11391 }
0945b39d
JH
11392 if (label)
11393 {
11394 emit_label (label);
11395 LABEL_NUSES (label) = 1;
11396 }
37ad04a5 11397
0945b39d
JH
11398 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11399 emit_insn (gen_strsetsi (destreg,
11400 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11401 if (TARGET_64BIT && (align <= 4 || count == 0))
11402 {
79258dce 11403 rtx label = ix86_expand_aligntest (countreg, 4);
0945b39d
JH
11404 emit_insn (gen_strsetsi (destreg,
11405 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11406 emit_label (label);
11407 LABEL_NUSES (label) = 1;
11408 }
11409 if (align > 2 && count != 0 && (count & 2))
11410 emit_insn (gen_strsethi (destreg,
11411 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11412 if (align <= 2 || count == 0)
11413 {
74411039 11414 rtx label = ix86_expand_aligntest (countreg, 2);
0945b39d
JH
11415 emit_insn (gen_strsethi (destreg,
11416 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11417 emit_label (label);
11418 LABEL_NUSES (label) = 1;
11419 }
11420 if (align > 1 && count != 0 && (count & 1))
11421 emit_insn (gen_strsetqi (destreg,
11422 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11423 if (align <= 1 || count == 0)
11424 {
74411039 11425 rtx label = ix86_expand_aligntest (countreg, 1);
0945b39d
JH
11426 emit_insn (gen_strsetqi (destreg,
11427 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11428 emit_label (label);
11429 LABEL_NUSES (label) = 1;
11430 }
11431 }
11432 return 1;
11433}
11434/* Expand strlen. */
11435int
11436ix86_expand_strlen (out, src, eoschar, align)
11437 rtx out, src, eoschar, align;
11438{
11439 rtx addr, scratch1, scratch2, scratch3, scratch4;
11440
11441 /* The generic case of strlen expander is long. Avoid it's
11442 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11443
11444 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11445 && !TARGET_INLINE_ALL_STRINGOPS
11446 && !optimize_size
11447 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11448 return 0;
11449
11450 addr = force_reg (Pmode, XEXP (src, 0));
11451 scratch1 = gen_reg_rtx (Pmode);
11452
11453 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11454 && !optimize_size)
11455 {
11456 /* Well it seems that some optimizer does not combine a call like
11457 foo(strlen(bar), strlen(bar));
11458 when the move and the subtraction is done here. It does calculate
11459 the length just once when these instructions are done inside of
11460 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11461 often used and I use one fewer register for the lifetime of
11462 output_strlen_unroll() this is better. */
11463
11464 emit_move_insn (out, addr);
11465
11466 ix86_expand_strlensi_unroll_1 (out, align);
11467
11468 /* strlensi_unroll_1 returns the address of the zero at the end of
11469 the string, like memchr(), so compute the length by subtracting
11470 the start address. */
11471 if (TARGET_64BIT)
11472 emit_insn (gen_subdi3 (out, out, addr));
11473 else
11474 emit_insn (gen_subsi3 (out, out, addr));
11475 }
11476 else
11477 {
11478 scratch2 = gen_reg_rtx (Pmode);
11479 scratch3 = gen_reg_rtx (Pmode);
11480 scratch4 = force_reg (Pmode, constm1_rtx);
11481
11482 emit_move_insn (scratch3, addr);
11483 eoschar = force_reg (QImode, eoschar);
11484
11485 emit_insn (gen_cld ());
11486 if (TARGET_64BIT)
11487 {
11488 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
11489 align, scratch4, scratch3));
11490 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11491 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11492 }
11493 else
11494 {
11495 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
11496 align, scratch4, scratch3));
11497 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11498 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11499 }
11500 }
11501 return 1;
11502}
11503
e075ae69
RH
11504/* Expand the appropriate insns for doing strlen if not just doing
11505 repnz; scasb
11506
11507 out = result, initialized with the start address
11508 align_rtx = alignment of the address.
11509 scratch = scratch register, initialized with the startaddress when
77ebd435 11510 not aligned, otherwise undefined
3f803cd9
SC
11511
11512 This is just the body. It needs the initialisations mentioned above and
11513 some address computing at the end. These things are done in i386.md. */
11514
0945b39d
JH
11515static void
11516ix86_expand_strlensi_unroll_1 (out, align_rtx)
11517 rtx out, align_rtx;
3f803cd9 11518{
e075ae69
RH
11519 int align;
11520 rtx tmp;
11521 rtx align_2_label = NULL_RTX;
11522 rtx align_3_label = NULL_RTX;
11523 rtx align_4_label = gen_label_rtx ();
11524 rtx end_0_label = gen_label_rtx ();
e075ae69 11525 rtx mem;
e2e52e1b 11526 rtx tmpreg = gen_reg_rtx (SImode);
0945b39d 11527 rtx scratch = gen_reg_rtx (SImode);
e6e81735 11528 rtx cmp;
e075ae69
RH
11529
11530 align = 0;
11531 if (GET_CODE (align_rtx) == CONST_INT)
11532 align = INTVAL (align_rtx);
3f803cd9 11533
e9a25f70 11534 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
3f803cd9 11535
e9a25f70 11536 /* Is there a known alignment and is it less than 4? */
e075ae69 11537 if (align < 4)
3f803cd9 11538 {
0945b39d
JH
11539 rtx scratch1 = gen_reg_rtx (Pmode);
11540 emit_move_insn (scratch1, out);
e9a25f70 11541 /* Is there a known alignment and is it not 2? */
e075ae69 11542 if (align != 2)
3f803cd9 11543 {
e075ae69
RH
11544 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11545 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11546
11547 /* Leave just the 3 lower bits. */
0945b39d 11548 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
e075ae69
RH
11549 NULL_RTX, 0, OPTAB_WIDEN);
11550
9076b9c1 11551 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 11552 Pmode, 1, align_4_label);
9076b9c1 11553 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
d43e0b7d 11554 Pmode, 1, align_2_label);
9076b9c1 11555 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
d43e0b7d 11556 Pmode, 1, align_3_label);
3f803cd9
SC
11557 }
11558 else
11559 {
e9a25f70
JL
11560 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11561 check if is aligned to 4 - byte. */
e9a25f70 11562
0945b39d 11563 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
e075ae69
RH
11564 NULL_RTX, 0, OPTAB_WIDEN);
11565
9076b9c1 11566 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 11567 Pmode, 1, align_4_label);
3f803cd9
SC
11568 }
11569
e075ae69 11570 mem = gen_rtx_MEM (QImode, out);
e9a25f70 11571
e075ae69 11572 /* Now compare the bytes. */
e9a25f70 11573
0f290768 11574 /* Compare the first n unaligned byte on a byte per byte basis. */
9076b9c1 11575 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
d43e0b7d 11576 QImode, 1, end_0_label);
3f803cd9 11577
0f290768 11578 /* Increment the address. */
0945b39d
JH
11579 if (TARGET_64BIT)
11580 emit_insn (gen_adddi3 (out, out, const1_rtx));
11581 else
11582 emit_insn (gen_addsi3 (out, out, const1_rtx));
e9a25f70 11583
e075ae69
RH
11584 /* Not needed with an alignment of 2 */
11585 if (align != 2)
11586 {
11587 emit_label (align_2_label);
3f803cd9 11588
d43e0b7d
RK
11589 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11590 end_0_label);
e075ae69 11591
0945b39d
JH
11592 if (TARGET_64BIT)
11593 emit_insn (gen_adddi3 (out, out, const1_rtx));
11594 else
11595 emit_insn (gen_addsi3 (out, out, const1_rtx));
e075ae69
RH
11596
11597 emit_label (align_3_label);
11598 }
11599
d43e0b7d
RK
11600 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11601 end_0_label);
e075ae69 11602
0945b39d
JH
11603 if (TARGET_64BIT)
11604 emit_insn (gen_adddi3 (out, out, const1_rtx));
11605 else
11606 emit_insn (gen_addsi3 (out, out, const1_rtx));
3f803cd9
SC
11607 }
11608
e075ae69
RH
11609 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11610 align this loop. It gives only huge programs, but does not help to
11611 speed up. */
11612 emit_label (align_4_label);
3f803cd9 11613
e075ae69
RH
11614 mem = gen_rtx_MEM (SImode, out);
11615 emit_move_insn (scratch, mem);
0945b39d
JH
11616 if (TARGET_64BIT)
11617 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11618 else
11619 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
e075ae69 11620
e2e52e1b
JH
11621 /* This formula yields a nonzero result iff one of the bytes is zero.
11622 This saves three branches inside loop and many cycles. */
11623
11624 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11625 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11626 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
7471a1f0 11627 emit_insn (gen_andsi3 (tmpreg, tmpreg,
d8bf17f9 11628 gen_int_mode (0x80808080, SImode)));
d43e0b7d
RK
11629 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11630 align_4_label);
e2e52e1b
JH
11631
11632 if (TARGET_CMOVE)
11633 {
11634 rtx reg = gen_reg_rtx (SImode);
0945b39d 11635 rtx reg2 = gen_reg_rtx (Pmode);
e2e52e1b
JH
11636 emit_move_insn (reg, tmpreg);
11637 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11638
0f290768 11639 /* If zero is not in the first two bytes, move two bytes forward. */
16189740 11640 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
11641 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11642 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11643 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11644 gen_rtx_IF_THEN_ELSE (SImode, tmp,
77ebd435
AJ
11645 reg,
11646 tmpreg)));
e2e52e1b 11647 /* Emit lea manually to avoid clobbering of flags. */
0945b39d
JH
11648 emit_insn (gen_rtx_SET (SImode, reg2,
11649 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
e2e52e1b
JH
11650
11651 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11652 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11653 emit_insn (gen_rtx_SET (VOIDmode, out,
0945b39d 11654 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
a4f31c00
AJ
11655 reg2,
11656 out)));
e2e52e1b
JH
11657
11658 }
11659 else
11660 {
11661 rtx end_2_label = gen_label_rtx ();
11662 /* Is zero in the first two bytes? */
11663
16189740 11664 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
11665 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11666 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11667 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11668 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11669 pc_rtx);
11670 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11671 JUMP_LABEL (tmp) = end_2_label;
11672
0f290768 11673 /* Not in the first two. Move two bytes forward. */
e2e52e1b 11674 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
0945b39d
JH
11675 if (TARGET_64BIT)
11676 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11677 else
11678 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
e2e52e1b
JH
11679
11680 emit_label (end_2_label);
11681
11682 }
11683
0f290768 11684 /* Avoid branch in fixing the byte. */
e2e52e1b 11685 tmpreg = gen_lowpart (QImode, tmpreg);
7e08e190 11686 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
e6e81735 11687 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
0945b39d 11688 if (TARGET_64BIT)
e6e81735 11689 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
0945b39d 11690 else
e6e81735 11691 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
e075ae69
RH
11692
11693 emit_label (end_0_label);
11694}
0e07aff3
RH
11695
11696void
4977bab6 11697ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop, sibcall)
0e07aff3 11698 rtx retval, fnaddr, callarg1, callarg2, pop;
4977bab6 11699 int sibcall;
0e07aff3
RH
11700{
11701 rtx use = NULL, call;
11702
11703 if (pop == const0_rtx)
11704 pop = NULL;
11705 if (TARGET_64BIT && pop)
11706 abort ();
11707
b069de3b
SS
11708#if TARGET_MACHO
11709 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11710 fnaddr = machopic_indirect_call_target (fnaddr);
11711#else
0e07aff3
RH
11712 /* Static functions and indirect calls don't need the pic register. */
11713 if (! TARGET_64BIT && flag_pic
11714 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11715 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
66edd3b4 11716 use_reg (&use, pic_offset_table_rtx);
0e07aff3
RH
11717
11718 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11719 {
11720 rtx al = gen_rtx_REG (QImode, 0);
11721 emit_move_insn (al, callarg2);
11722 use_reg (&use, al);
11723 }
b069de3b 11724#endif /* TARGET_MACHO */
0e07aff3
RH
11725
11726 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11727 {
11728 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11729 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11730 }
4977bab6
ZW
11731 if (sibcall && TARGET_64BIT
11732 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11733 {
11734 rtx addr;
11735 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11736 fnaddr = gen_rtx_REG (Pmode, 40);
11737 emit_move_insn (fnaddr, addr);
11738 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11739 }
0e07aff3
RH
11740
11741 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11742 if (retval)
11743 call = gen_rtx_SET (VOIDmode, retval, call);
11744 if (pop)
11745 {
11746 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11747 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11748 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11749 }
11750
11751 call = emit_call_insn (call);
11752 if (use)
11753 CALL_INSN_FUNCTION_USAGE (call) = use;
11754}
fce5a9f2 11755
e075ae69 11756\f
e075ae69
RH
11757/* Clear stack slot assignments remembered from previous functions.
11758 This is called from INIT_EXPANDERS once before RTL is emitted for each
11759 function. */
11760
e2500fed
GK
11761static struct machine_function *
11762ix86_init_machine_status ()
37b15744 11763{
e2500fed 11764 return ggc_alloc_cleared (sizeof (struct machine_function));
1526a060
BS
11765}
11766
e075ae69
RH
11767/* Return a MEM corresponding to a stack slot with mode MODE.
11768 Allocate a new slot if necessary.
11769
11770 The RTL for a function can have several slots available: N is
11771 which slot to use. */
11772
11773rtx
11774assign_386_stack_local (mode, n)
11775 enum machine_mode mode;
11776 int n;
11777{
ddb0ae00
ZW
11778 struct stack_local_entry *s;
11779
e075ae69
RH
11780 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11781 abort ();
11782
ddb0ae00
ZW
11783 for (s = ix86_stack_locals; s; s = s->next)
11784 if (s->mode == mode && s->n == n)
11785 return s->rtl;
11786
11787 s = (struct stack_local_entry *)
11788 ggc_alloc (sizeof (struct stack_local_entry));
11789 s->n = n;
11790 s->mode = mode;
11791 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
e075ae69 11792
ddb0ae00
ZW
11793 s->next = ix86_stack_locals;
11794 ix86_stack_locals = s;
11795 return s->rtl;
e075ae69 11796}
f996902d
RH
11797
11798/* Construct the SYMBOL_REF for the tls_get_addr function. */
11799
e2500fed 11800static GTY(()) rtx ix86_tls_symbol;
f996902d
RH
11801rtx
11802ix86_tls_get_addr ()
11803{
f996902d 11804
e2500fed 11805 if (!ix86_tls_symbol)
f996902d 11806 {
75d38379
JJ
11807 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11808 (TARGET_GNU_TLS && !TARGET_64BIT)
11809 ? "___tls_get_addr"
11810 : "__tls_get_addr");
f996902d
RH
11811 }
11812
e2500fed 11813 return ix86_tls_symbol;
f996902d 11814}
e075ae69
RH
11815\f
11816/* Calculate the length of the memory address in the instruction
11817 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11818
11819static int
11820memory_address_length (addr)
11821 rtx addr;
11822{
11823 struct ix86_address parts;
11824 rtx base, index, disp;
11825 int len;
11826
11827 if (GET_CODE (addr) == PRE_DEC
0d7d98ee
JH
11828 || GET_CODE (addr) == POST_INC
11829 || GET_CODE (addr) == PRE_MODIFY
11830 || GET_CODE (addr) == POST_MODIFY)
e075ae69 11831 return 0;
3f803cd9 11832
e075ae69
RH
11833 if (! ix86_decompose_address (addr, &parts))
11834 abort ();
3f803cd9 11835
e075ae69
RH
11836 base = parts.base;
11837 index = parts.index;
11838 disp = parts.disp;
11839 len = 0;
3f803cd9 11840
e075ae69
RH
11841 /* Register Indirect. */
11842 if (base && !index && !disp)
11843 {
11844 /* Special cases: ebp and esp need the two-byte modrm form. */
11845 if (addr == stack_pointer_rtx
11846 || addr == arg_pointer_rtx
564d80f4
JH
11847 || addr == frame_pointer_rtx
11848 || addr == hard_frame_pointer_rtx)
e075ae69 11849 len = 1;
3f803cd9 11850 }
e9a25f70 11851
e075ae69
RH
11852 /* Direct Addressing. */
11853 else if (disp && !base && !index)
11854 len = 4;
11855
3f803cd9
SC
11856 else
11857 {
e075ae69
RH
11858 /* Find the length of the displacement constant. */
11859 if (disp)
11860 {
11861 if (GET_CODE (disp) == CONST_INT
9b73c90a
EB
11862 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11863 && base)
e075ae69
RH
11864 len = 1;
11865 else
11866 len = 4;
11867 }
3f803cd9 11868
e075ae69
RH
11869 /* An index requires the two-byte modrm form. */
11870 if (index)
11871 len += 1;
3f803cd9
SC
11872 }
11873
e075ae69
RH
11874 return len;
11875}
79325812 11876
5bf0ebab
RH
11877/* Compute default value for "length_immediate" attribute. When SHORTFORM
11878 is set, expect that insn have 8bit immediate alternative. */
e075ae69 11879int
6ef67412 11880ix86_attr_length_immediate_default (insn, shortform)
e075ae69 11881 rtx insn;
6ef67412 11882 int shortform;
e075ae69 11883{
6ef67412
JH
11884 int len = 0;
11885 int i;
6c698a6d 11886 extract_insn_cached (insn);
6ef67412
JH
11887 for (i = recog_data.n_operands - 1; i >= 0; --i)
11888 if (CONSTANT_P (recog_data.operand[i]))
3071fab5 11889 {
6ef67412 11890 if (len)
3071fab5 11891 abort ();
6ef67412
JH
11892 if (shortform
11893 && GET_CODE (recog_data.operand[i]) == CONST_INT
11894 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11895 len = 1;
11896 else
11897 {
11898 switch (get_attr_mode (insn))
11899 {
11900 case MODE_QI:
11901 len+=1;
11902 break;
11903 case MODE_HI:
11904 len+=2;
11905 break;
11906 case MODE_SI:
11907 len+=4;
11908 break;
14f73b5a
JH
11909 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11910 case MODE_DI:
11911 len+=4;
11912 break;
6ef67412 11913 default:
c725bd79 11914 fatal_insn ("unknown insn mode", insn);
6ef67412
JH
11915 }
11916 }
3071fab5 11917 }
6ef67412
JH
11918 return len;
11919}
11920/* Compute default value for "length_address" attribute. */
11921int
11922ix86_attr_length_address_default (insn)
11923 rtx insn;
11924{
11925 int i;
9b73c90a
EB
11926
11927 if (get_attr_type (insn) == TYPE_LEA)
11928 {
11929 rtx set = PATTERN (insn);
11930 if (GET_CODE (set) == SET)
11931 ;
11932 else if (GET_CODE (set) == PARALLEL
11933 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
11934 set = XVECEXP (set, 0, 0);
11935 else
11936 {
11937#ifdef ENABLE_CHECKING
11938 abort ();
11939#endif
11940 return 0;
11941 }
11942
11943 return memory_address_length (SET_SRC (set));
11944 }
11945
6c698a6d 11946 extract_insn_cached (insn);
1ccbefce
RH
11947 for (i = recog_data.n_operands - 1; i >= 0; --i)
11948 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 11949 {
6ef67412 11950 return memory_address_length (XEXP (recog_data.operand[i], 0));
e075ae69
RH
11951 break;
11952 }
6ef67412 11953 return 0;
3f803cd9 11954}
e075ae69
RH
11955\f
11956/* Return the maximum number of instructions a cpu can issue. */
b657fc39 11957
c237e94a 11958static int
e075ae69 11959ix86_issue_rate ()
b657fc39 11960{
9e555526 11961 switch (ix86_tune)
b657fc39 11962 {
e075ae69
RH
11963 case PROCESSOR_PENTIUM:
11964 case PROCESSOR_K6:
11965 return 2;
79325812 11966
e075ae69 11967 case PROCESSOR_PENTIUMPRO:
b4e89e2d
JH
11968 case PROCESSOR_PENTIUM4:
11969 case PROCESSOR_ATHLON:
4977bab6 11970 case PROCESSOR_K8:
e075ae69 11971 return 3;
b657fc39 11972
b657fc39 11973 default:
e075ae69 11974 return 1;
b657fc39 11975 }
b657fc39
L
11976}
11977
e075ae69
RH
11978/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11979 by DEP_INSN and nothing set by DEP_INSN. */
b657fc39 11980
e075ae69
RH
11981static int
11982ix86_flags_dependant (insn, dep_insn, insn_type)
11983 rtx insn, dep_insn;
11984 enum attr_type insn_type;
11985{
11986 rtx set, set2;
b657fc39 11987
e075ae69
RH
11988 /* Simplify the test for uninteresting insns. */
11989 if (insn_type != TYPE_SETCC
11990 && insn_type != TYPE_ICMOV
11991 && insn_type != TYPE_FCMOV
11992 && insn_type != TYPE_IBR)
11993 return 0;
b657fc39 11994
e075ae69
RH
11995 if ((set = single_set (dep_insn)) != 0)
11996 {
11997 set = SET_DEST (set);
11998 set2 = NULL_RTX;
11999 }
12000 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
12001 && XVECLEN (PATTERN (dep_insn), 0) == 2
12002 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
12003 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
12004 {
12005 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12006 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12007 }
78a0d70c
ZW
12008 else
12009 return 0;
b657fc39 12010
78a0d70c
ZW
12011 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
12012 return 0;
b657fc39 12013
f5143c46 12014 /* This test is true if the dependent insn reads the flags but
78a0d70c
ZW
12015 not any other potentially set register. */
12016 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
12017 return 0;
12018
12019 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
12020 return 0;
12021
12022 return 1;
e075ae69 12023}
b657fc39 12024
e075ae69
RH
12025/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12026 address with operands set by DEP_INSN. */
12027
12028static int
12029ix86_agi_dependant (insn, dep_insn, insn_type)
12030 rtx insn, dep_insn;
12031 enum attr_type insn_type;
12032{
12033 rtx addr;
12034
6ad48e84
JH
12035 if (insn_type == TYPE_LEA
12036 && TARGET_PENTIUM)
5fbdde42
RH
12037 {
12038 addr = PATTERN (insn);
12039 if (GET_CODE (addr) == SET)
12040 ;
12041 else if (GET_CODE (addr) == PARALLEL
12042 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
12043 addr = XVECEXP (addr, 0, 0);
12044 else
12045 abort ();
12046 addr = SET_SRC (addr);
12047 }
e075ae69
RH
12048 else
12049 {
12050 int i;
6c698a6d 12051 extract_insn_cached (insn);
1ccbefce
RH
12052 for (i = recog_data.n_operands - 1; i >= 0; --i)
12053 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 12054 {
1ccbefce 12055 addr = XEXP (recog_data.operand[i], 0);
e075ae69
RH
12056 goto found;
12057 }
12058 return 0;
12059 found:;
b657fc39
L
12060 }
12061
e075ae69 12062 return modified_in_p (addr, dep_insn);
b657fc39 12063}
a269a03c 12064
c237e94a 12065static int
e075ae69 12066ix86_adjust_cost (insn, link, dep_insn, cost)
a269a03c
JC
12067 rtx insn, link, dep_insn;
12068 int cost;
12069{
e075ae69 12070 enum attr_type insn_type, dep_insn_type;
6ad48e84 12071 enum attr_memory memory, dep_memory;
e075ae69 12072 rtx set, set2;
9b00189f 12073 int dep_insn_code_number;
a269a03c 12074
d1f87653 12075 /* Anti and output dependencies have zero cost on all CPUs. */
e075ae69 12076 if (REG_NOTE_KIND (link) != 0)
309ada50 12077 return 0;
a269a03c 12078
9b00189f
JH
12079 dep_insn_code_number = recog_memoized (dep_insn);
12080
e075ae69 12081 /* If we can't recognize the insns, we can't really do anything. */
9b00189f 12082 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
e075ae69 12083 return cost;
a269a03c 12084
1c71e60e
JH
12085 insn_type = get_attr_type (insn);
12086 dep_insn_type = get_attr_type (dep_insn);
9b00189f 12087
9e555526 12088 switch (ix86_tune)
a269a03c
JC
12089 {
12090 case PROCESSOR_PENTIUM:
e075ae69
RH
12091 /* Address Generation Interlock adds a cycle of latency. */
12092 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12093 cost += 1;
12094
12095 /* ??? Compares pair with jump/setcc. */
12096 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12097 cost = 0;
12098
d1f87653 12099 /* Floating point stores require value to be ready one cycle earlier. */
0f290768 12100 if (insn_type == TYPE_FMOV
e075ae69
RH
12101 && get_attr_memory (insn) == MEMORY_STORE
12102 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12103 cost += 1;
12104 break;
a269a03c 12105
e075ae69 12106 case PROCESSOR_PENTIUMPRO:
6ad48e84
JH
12107 memory = get_attr_memory (insn);
12108 dep_memory = get_attr_memory (dep_insn);
12109
0f290768 12110 /* Since we can't represent delayed latencies of load+operation,
e075ae69
RH
12111 increase the cost here for non-imov insns. */
12112 if (dep_insn_type != TYPE_IMOV
6ad48e84
JH
12113 && dep_insn_type != TYPE_FMOV
12114 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
e075ae69
RH
12115 cost += 1;
12116
12117 /* INT->FP conversion is expensive. */
12118 if (get_attr_fp_int_src (dep_insn))
12119 cost += 5;
12120
12121 /* There is one cycle extra latency between an FP op and a store. */
12122 if (insn_type == TYPE_FMOV
12123 && (set = single_set (dep_insn)) != NULL_RTX
12124 && (set2 = single_set (insn)) != NULL_RTX
12125 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12126 && GET_CODE (SET_DEST (set2)) == MEM)
12127 cost += 1;
6ad48e84
JH
12128
12129 /* Show ability of reorder buffer to hide latency of load by executing
12130 in parallel with previous instruction in case
12131 previous instruction is not needed to compute the address. */
12132 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12133 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12134 {
12135 /* Claim moves to take one cycle, as core can issue one load
12136 at time and the next load can start cycle later. */
12137 if (dep_insn_type == TYPE_IMOV
12138 || dep_insn_type == TYPE_FMOV)
12139 cost = 1;
12140 else if (cost > 1)
12141 cost--;
12142 }
e075ae69 12143 break;
a269a03c 12144
e075ae69 12145 case PROCESSOR_K6:
6ad48e84
JH
12146 memory = get_attr_memory (insn);
12147 dep_memory = get_attr_memory (dep_insn);
e075ae69
RH
12148 /* The esp dependency is resolved before the instruction is really
12149 finished. */
12150 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12151 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12152 return 1;
a269a03c 12153
0f290768 12154 /* Since we can't represent delayed latencies of load+operation,
e075ae69 12155 increase the cost here for non-imov insns. */
6ad48e84 12156 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
e075ae69
RH
12157 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
12158
12159 /* INT->FP conversion is expensive. */
12160 if (get_attr_fp_int_src (dep_insn))
12161 cost += 5;
6ad48e84
JH
12162
12163 /* Show ability of reorder buffer to hide latency of load by executing
12164 in parallel with previous instruction in case
12165 previous instruction is not needed to compute the address. */
12166 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12167 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12168 {
12169 /* Claim moves to take one cycle, as core can issue one load
12170 at time and the next load can start cycle later. */
12171 if (dep_insn_type == TYPE_IMOV
12172 || dep_insn_type == TYPE_FMOV)
12173 cost = 1;
12174 else if (cost > 2)
12175 cost -= 2;
12176 else
12177 cost = 1;
12178 }
a14003ee 12179 break;
e075ae69 12180
309ada50 12181 case PROCESSOR_ATHLON:
4977bab6 12182 case PROCESSOR_K8:
6ad48e84
JH
12183 memory = get_attr_memory (insn);
12184 dep_memory = get_attr_memory (dep_insn);
12185
6ad48e84
JH
12186 /* Show ability of reorder buffer to hide latency of load by executing
12187 in parallel with previous instruction in case
12188 previous instruction is not needed to compute the address. */
12189 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12190 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12191 {
12192 /* Claim moves to take one cycle, as core can issue one load
12193 at time and the next load can start cycle later. */
12194 if (dep_insn_type == TYPE_IMOV
12195 || dep_insn_type == TYPE_FMOV)
12196 cost = 0;
12197 else if (cost >= 3)
12198 cost -= 3;
12199 else
12200 cost = 0;
12201 }
309ada50 12202
a269a03c 12203 default:
a269a03c
JC
12204 break;
12205 }
12206
12207 return cost;
12208}
0a726ef1 12209
e075ae69
RH
12210static union
12211{
12212 struct ppro_sched_data
12213 {
12214 rtx decode[3];
12215 int issued_this_cycle;
12216 } ppro;
12217} ix86_sched_data;
0a726ef1 12218
e075ae69
RH
12219static enum attr_ppro_uops
12220ix86_safe_ppro_uops (insn)
12221 rtx insn;
12222{
12223 if (recog_memoized (insn) >= 0)
12224 return get_attr_ppro_uops (insn);
12225 else
12226 return PPRO_UOPS_MANY;
12227}
0a726ef1 12228
e075ae69
RH
12229static void
12230ix86_dump_ppro_packet (dump)
12231 FILE *dump;
0a726ef1 12232{
e075ae69 12233 if (ix86_sched_data.ppro.decode[0])
0a726ef1 12234 {
e075ae69
RH
12235 fprintf (dump, "PPRO packet: %d",
12236 INSN_UID (ix86_sched_data.ppro.decode[0]));
12237 if (ix86_sched_data.ppro.decode[1])
12238 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
12239 if (ix86_sched_data.ppro.decode[2])
12240 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
12241 fputc ('\n', dump);
12242 }
12243}
0a726ef1 12244
e075ae69 12245/* We're beginning a new block. Initialize data structures as necessary. */
0a726ef1 12246
c237e94a
ZW
12247static void
12248ix86_sched_init (dump, sched_verbose, veclen)
e075ae69
RH
12249 FILE *dump ATTRIBUTE_UNUSED;
12250 int sched_verbose ATTRIBUTE_UNUSED;
c237e94a 12251 int veclen ATTRIBUTE_UNUSED;
e075ae69
RH
12252{
12253 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
12254}
12255
12256/* Shift INSN to SLOT, and shift everything else down. */
12257
12258static void
12259ix86_reorder_insn (insnp, slot)
12260 rtx *insnp, *slot;
12261{
12262 if (insnp != slot)
12263 {
12264 rtx insn = *insnp;
0f290768 12265 do
e075ae69
RH
12266 insnp[0] = insnp[1];
12267 while (++insnp != slot);
12268 *insnp = insn;
0a726ef1 12269 }
e075ae69
RH
12270}
12271
c6991660 12272static void
78a0d70c
ZW
12273ix86_sched_reorder_ppro (ready, e_ready)
12274 rtx *ready;
12275 rtx *e_ready;
12276{
12277 rtx decode[3];
12278 enum attr_ppro_uops cur_uops;
12279 int issued_this_cycle;
12280 rtx *insnp;
12281 int i;
e075ae69 12282
0f290768 12283 /* At this point .ppro.decode contains the state of the three
78a0d70c 12284 decoders from last "cycle". That is, those insns that were
0f290768 12285 actually independent. But here we're scheduling for the
78a0d70c
ZW
12286 decoder, and we may find things that are decodable in the
12287 same cycle. */
e075ae69 12288
0f290768 12289 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
78a0d70c 12290 issued_this_cycle = 0;
e075ae69 12291
78a0d70c
ZW
12292 insnp = e_ready;
12293 cur_uops = ix86_safe_ppro_uops (*insnp);
0a726ef1 12294
78a0d70c
ZW
12295 /* If the decoders are empty, and we've a complex insn at the
12296 head of the priority queue, let it issue without complaint. */
12297 if (decode[0] == NULL)
12298 {
12299 if (cur_uops == PPRO_UOPS_MANY)
12300 {
12301 decode[0] = *insnp;
12302 goto ppro_done;
12303 }
12304
12305 /* Otherwise, search for a 2-4 uop unsn to issue. */
12306 while (cur_uops != PPRO_UOPS_FEW)
12307 {
12308 if (insnp == ready)
12309 break;
12310 cur_uops = ix86_safe_ppro_uops (*--insnp);
12311 }
12312
12313 /* If so, move it to the head of the line. */
12314 if (cur_uops == PPRO_UOPS_FEW)
12315 ix86_reorder_insn (insnp, e_ready);
0a726ef1 12316
78a0d70c
ZW
12317 /* Issue the head of the queue. */
12318 issued_this_cycle = 1;
12319 decode[0] = *e_ready--;
12320 }
fb693d44 12321
78a0d70c
ZW
12322 /* Look for simple insns to fill in the other two slots. */
12323 for (i = 1; i < 3; ++i)
12324 if (decode[i] == NULL)
12325 {
a151daf0 12326 if (ready > e_ready)
78a0d70c 12327 goto ppro_done;
fb693d44 12328
e075ae69
RH
12329 insnp = e_ready;
12330 cur_uops = ix86_safe_ppro_uops (*insnp);
78a0d70c
ZW
12331 while (cur_uops != PPRO_UOPS_ONE)
12332 {
12333 if (insnp == ready)
12334 break;
12335 cur_uops = ix86_safe_ppro_uops (*--insnp);
12336 }
fb693d44 12337
78a0d70c
ZW
12338 /* Found one. Move it to the head of the queue and issue it. */
12339 if (cur_uops == PPRO_UOPS_ONE)
e075ae69 12340 {
78a0d70c
ZW
12341 ix86_reorder_insn (insnp, e_ready);
12342 decode[i] = *e_ready--;
12343 issued_this_cycle++;
12344 continue;
12345 }
fb693d44 12346
78a0d70c
ZW
12347 /* ??? Didn't find one. Ideally, here we would do a lazy split
12348 of 2-uop insns, issue one and queue the other. */
12349 }
fb693d44 12350
78a0d70c
ZW
12351 ppro_done:
12352 if (issued_this_cycle == 0)
12353 issued_this_cycle = 1;
12354 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
12355}
fb693d44 12356
0f290768 12357/* We are about to being issuing insns for this clock cycle.
78a0d70c 12358 Override the default sort algorithm to better slot instructions. */
c237e94a
ZW
12359static int
12360ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
78a0d70c
ZW
12361 FILE *dump ATTRIBUTE_UNUSED;
12362 int sched_verbose ATTRIBUTE_UNUSED;
12363 rtx *ready;
c237e94a 12364 int *n_readyp;
78a0d70c
ZW
12365 int clock_var ATTRIBUTE_UNUSED;
12366{
c237e94a 12367 int n_ready = *n_readyp;
78a0d70c 12368 rtx *e_ready = ready + n_ready - 1;
fb693d44 12369
fce5a9f2 12370 /* Make sure to go ahead and initialize key items in
a151daf0
JL
12371 ix86_sched_data if we are not going to bother trying to
12372 reorder the ready queue. */
78a0d70c 12373 if (n_ready < 2)
a151daf0
JL
12374 {
12375 ix86_sched_data.ppro.issued_this_cycle = 1;
12376 goto out;
12377 }
e075ae69 12378
9e555526 12379 switch (ix86_tune)
78a0d70c
ZW
12380 {
12381 default:
12382 break;
e075ae69 12383
78a0d70c
ZW
12384 case PROCESSOR_PENTIUMPRO:
12385 ix86_sched_reorder_ppro (ready, e_ready);
e075ae69 12386 break;
fb693d44
RH
12387 }
12388
e075ae69
RH
12389out:
12390 return ix86_issue_rate ();
12391}
fb693d44 12392
e075ae69
RH
12393/* We are about to issue INSN. Return the number of insns left on the
12394 ready queue that can be issued this cycle. */
b222082e 12395
c237e94a 12396static int
e075ae69
RH
12397ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
12398 FILE *dump;
12399 int sched_verbose;
12400 rtx insn;
12401 int can_issue_more;
12402{
12403 int i;
9e555526 12404 switch (ix86_tune)
fb693d44 12405 {
e075ae69
RH
12406 default:
12407 return can_issue_more - 1;
fb693d44 12408
e075ae69
RH
12409 case PROCESSOR_PENTIUMPRO:
12410 {
12411 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
fb693d44 12412
e075ae69
RH
12413 if (uops == PPRO_UOPS_MANY)
12414 {
12415 if (sched_verbose)
12416 ix86_dump_ppro_packet (dump);
12417 ix86_sched_data.ppro.decode[0] = insn;
12418 ix86_sched_data.ppro.decode[1] = NULL;
12419 ix86_sched_data.ppro.decode[2] = NULL;
12420 if (sched_verbose)
12421 ix86_dump_ppro_packet (dump);
12422 ix86_sched_data.ppro.decode[0] = NULL;
12423 }
12424 else if (uops == PPRO_UOPS_FEW)
12425 {
12426 if (sched_verbose)
12427 ix86_dump_ppro_packet (dump);
12428 ix86_sched_data.ppro.decode[0] = insn;
12429 ix86_sched_data.ppro.decode[1] = NULL;
12430 ix86_sched_data.ppro.decode[2] = NULL;
12431 }
12432 else
12433 {
12434 for (i = 0; i < 3; ++i)
12435 if (ix86_sched_data.ppro.decode[i] == NULL)
12436 {
12437 ix86_sched_data.ppro.decode[i] = insn;
12438 break;
12439 }
12440 if (i == 3)
12441 abort ();
12442 if (i == 2)
12443 {
12444 if (sched_verbose)
12445 ix86_dump_ppro_packet (dump);
12446 ix86_sched_data.ppro.decode[0] = NULL;
12447 ix86_sched_data.ppro.decode[1] = NULL;
12448 ix86_sched_data.ppro.decode[2] = NULL;
12449 }
12450 }
12451 }
12452 return --ix86_sched_data.ppro.issued_this_cycle;
12453 }
fb693d44 12454}
9b690711
RH
12455
12456static int
12457ia32_use_dfa_pipeline_interface ()
12458{
4977bab6 12459 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
9b690711
RH
12460 return 1;
12461 return 0;
12462}
12463
12464/* How many alternative schedules to try. This should be as wide as the
12465 scheduling freedom in the DFA, but no wider. Making this value too
12466 large results extra work for the scheduler. */
12467
12468static int
12469ia32_multipass_dfa_lookahead ()
12470{
9e555526 12471 if (ix86_tune == PROCESSOR_PENTIUM)
9b690711
RH
12472 return 2;
12473 else
12474 return 0;
12475}
12476
a7180f70 12477\f
0e4970d7
RK
12478/* Walk through INSNS and look for MEM references whose address is DSTREG or
12479 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
12480 appropriate. */
12481
12482void
12483ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
12484 rtx insns;
12485 rtx dstref, srcref, dstreg, srcreg;
12486{
12487 rtx insn;
12488
12489 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
12490 if (INSN_P (insn))
12491 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
12492 dstreg, srcreg);
12493}
12494
12495/* Subroutine of above to actually do the updating by recursively walking
12496 the rtx. */
12497
12498static void
12499ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
12500 rtx x;
12501 rtx dstref, srcref, dstreg, srcreg;
12502{
12503 enum rtx_code code = GET_CODE (x);
12504 const char *format_ptr = GET_RTX_FORMAT (code);
12505 int i, j;
12506
12507 if (code == MEM && XEXP (x, 0) == dstreg)
12508 MEM_COPY_ATTRIBUTES (x, dstref);
12509 else if (code == MEM && XEXP (x, 0) == srcreg)
12510 MEM_COPY_ATTRIBUTES (x, srcref);
12511
12512 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
12513 {
12514 if (*format_ptr == 'e')
12515 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
12516 dstreg, srcreg);
12517 else if (*format_ptr == 'E')
12518 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
397be6cf 12519 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
0e4970d7
RK
12520 dstreg, srcreg);
12521 }
12522}
12523\f
a7180f70
BS
12524/* Compute the alignment given to a constant that is being placed in memory.
12525 EXP is the constant and ALIGN is the alignment that the object would
12526 ordinarily have.
12527 The value of this function is used instead of that alignment to align
12528 the object. */
12529
12530int
12531ix86_constant_alignment (exp, align)
12532 tree exp;
12533 int align;
12534{
12535 if (TREE_CODE (exp) == REAL_CST)
12536 {
12537 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12538 return 64;
12539 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12540 return 128;
12541 }
12542 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
12543 && align < 256)
12544 return 256;
12545
12546 return align;
12547}
12548
12549/* Compute the alignment for a static variable.
12550 TYPE is the data type, and ALIGN is the alignment that
12551 the object would ordinarily have. The value of this function is used
12552 instead of that alignment to align the object. */
12553
12554int
12555ix86_data_alignment (type, align)
12556 tree type;
12557 int align;
12558{
12559 if (AGGREGATE_TYPE_P (type)
12560 && TYPE_SIZE (type)
12561 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12562 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12563 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12564 return 256;
12565
0d7d98ee
JH
12566 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12567 to 16byte boundary. */
12568 if (TARGET_64BIT)
12569 {
12570 if (AGGREGATE_TYPE_P (type)
12571 && TYPE_SIZE (type)
12572 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12573 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12574 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12575 return 128;
12576 }
12577
a7180f70
BS
12578 if (TREE_CODE (type) == ARRAY_TYPE)
12579 {
12580 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12581 return 64;
12582 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12583 return 128;
12584 }
12585 else if (TREE_CODE (type) == COMPLEX_TYPE)
12586 {
0f290768 12587
a7180f70
BS
12588 if (TYPE_MODE (type) == DCmode && align < 64)
12589 return 64;
12590 if (TYPE_MODE (type) == XCmode && align < 128)
12591 return 128;
12592 }
12593 else if ((TREE_CODE (type) == RECORD_TYPE
12594 || TREE_CODE (type) == UNION_TYPE
12595 || TREE_CODE (type) == QUAL_UNION_TYPE)
12596 && TYPE_FIELDS (type))
12597 {
12598 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12599 return 64;
12600 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12601 return 128;
12602 }
12603 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12604 || TREE_CODE (type) == INTEGER_TYPE)
12605 {
12606 if (TYPE_MODE (type) == DFmode && align < 64)
12607 return 64;
12608 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12609 return 128;
12610 }
12611
12612 return align;
12613}
12614
12615/* Compute the alignment for a local variable.
12616 TYPE is the data type, and ALIGN is the alignment that
12617 the object would ordinarily have. The value of this macro is used
12618 instead of that alignment to align the object. */
12619
12620int
12621ix86_local_alignment (type, align)
12622 tree type;
12623 int align;
12624{
0d7d98ee
JH
12625 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12626 to 16byte boundary. */
12627 if (TARGET_64BIT)
12628 {
12629 if (AGGREGATE_TYPE_P (type)
12630 && TYPE_SIZE (type)
12631 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12632 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12633 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12634 return 128;
12635 }
a7180f70
BS
12636 if (TREE_CODE (type) == ARRAY_TYPE)
12637 {
12638 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12639 return 64;
12640 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12641 return 128;
12642 }
12643 else if (TREE_CODE (type) == COMPLEX_TYPE)
12644 {
12645 if (TYPE_MODE (type) == DCmode && align < 64)
12646 return 64;
12647 if (TYPE_MODE (type) == XCmode && align < 128)
12648 return 128;
12649 }
12650 else if ((TREE_CODE (type) == RECORD_TYPE
12651 || TREE_CODE (type) == UNION_TYPE
12652 || TREE_CODE (type) == QUAL_UNION_TYPE)
12653 && TYPE_FIELDS (type))
12654 {
12655 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12656 return 64;
12657 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12658 return 128;
12659 }
12660 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12661 || TREE_CODE (type) == INTEGER_TYPE)
12662 {
0f290768 12663
a7180f70
BS
12664 if (TYPE_MODE (type) == DFmode && align < 64)
12665 return 64;
12666 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12667 return 128;
12668 }
12669 return align;
12670}
0ed08620
JH
12671\f
12672/* Emit RTL insns to initialize the variable parts of a trampoline.
12673 FNADDR is an RTX for the address of the function's pure code.
12674 CXT is an RTX for the static chain value for the function. */
12675void
12676x86_initialize_trampoline (tramp, fnaddr, cxt)
12677 rtx tramp, fnaddr, cxt;
12678{
12679 if (!TARGET_64BIT)
12680 {
12681 /* Compute offset from the end of the jmp to the target function. */
12682 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12683 plus_constant (tramp, 10),
12684 NULL_RTX, 1, OPTAB_DIRECT);
12685 emit_move_insn (gen_rtx_MEM (QImode, tramp),
d8bf17f9 12686 gen_int_mode (0xb9, QImode));
0ed08620
JH
12687 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12688 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
d8bf17f9 12689 gen_int_mode (0xe9, QImode));
0ed08620
JH
12690 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12691 }
12692 else
12693 {
12694 int offset = 0;
12695 /* Try to load address using shorter movl instead of movabs.
12696 We may want to support movq for kernel mode, but kernel does not use
12697 trampolines at the moment. */
12698 if (x86_64_zero_extended_value (fnaddr))
12699 {
12700 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12701 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12702 gen_int_mode (0xbb41, HImode));
0ed08620
JH
12703 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12704 gen_lowpart (SImode, fnaddr));
12705 offset += 6;
12706 }
12707 else
12708 {
12709 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12710 gen_int_mode (0xbb49, HImode));
0ed08620
JH
12711 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12712 fnaddr);
12713 offset += 10;
12714 }
12715 /* Load static chain using movabs to r10. */
12716 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12717 gen_int_mode (0xba49, HImode));
0ed08620
JH
12718 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12719 cxt);
12720 offset += 10;
12721 /* Jump to the r11 */
12722 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12723 gen_int_mode (0xff49, HImode));
0ed08620 12724 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
d8bf17f9 12725 gen_int_mode (0xe3, QImode));
0ed08620
JH
12726 offset += 3;
12727 if (offset > TRAMPOLINE_SIZE)
b531087a 12728 abort ();
0ed08620 12729 }
5791cc29
JT
12730
12731#ifdef TRANSFER_FROM_TRAMPOLINE
12732 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12733 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12734#endif
0ed08620 12735}
eeb06b1b 12736\f
6a2dd09a
RS
12737#define def_builtin(MASK, NAME, TYPE, CODE) \
12738do { \
453ee231
JH
12739 if ((MASK) & target_flags \
12740 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
6a2dd09a
RS
12741 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12742 NULL, NULL_TREE); \
eeb06b1b 12743} while (0)
bd793c65 12744
bd793c65
BS
12745struct builtin_description
12746{
8b60264b
KG
12747 const unsigned int mask;
12748 const enum insn_code icode;
12749 const char *const name;
12750 const enum ix86_builtins code;
12751 const enum rtx_code comparison;
12752 const unsigned int flag;
bd793c65
BS
12753};
12754
fbe5eb6d
BS
12755/* Used for builtins that are enabled both by -msse and -msse2. */
12756#define MASK_SSE1 (MASK_SSE | MASK_SSE2)
453ee231
JH
12757#define MASK_SSE164 (MASK_SSE | MASK_SSE2 | MASK_64BIT)
12758#define MASK_SSE264 (MASK_SSE2 | MASK_64BIT)
fbe5eb6d 12759
8b60264b 12760static const struct builtin_description bdesc_comi[] =
bd793c65 12761{
1194ca05
JH
12762 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12763 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12764 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12765 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12766 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12767 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12768 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12769 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12770 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12771 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12772 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12773 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12774 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12775 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12776 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12777 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12778 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12779 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12780 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12781 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12782 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12783 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12784 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12785 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
bd793c65
BS
12786};
12787
8b60264b 12788static const struct builtin_description bdesc_2arg[] =
bd793c65
BS
12789{
12790 /* SSE */
fbe5eb6d
BS
12791 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12792 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12793 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12794 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12795 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12796 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12797 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12798 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12799
12800 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12801 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12802 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12803 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12804 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12805 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12806 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12807 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12808 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12809 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12810 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12811 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12812 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12813 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12814 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
fbe5eb6d
BS
12815 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12816 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12817 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12818 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
fbe5eb6d
BS
12819 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12820
12821 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12822 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12823 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12824 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12825
1877be45
JH
12826 { MASK_SSE1, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12827 { MASK_SSE1, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12828 { MASK_SSE1, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12829 { MASK_SSE1, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12830
fbe5eb6d
BS
12831 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12832 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12833 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12834 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12835 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
bd793c65
BS
12836
12837 /* MMX */
eeb06b1b
BS
12838 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12839 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12840 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
d50672ef 12841 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
eeb06b1b
BS
12842 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12843 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12844 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
d50672ef 12845 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
eeb06b1b
BS
12846
12847 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12848 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12849 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12850 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12851 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12852 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12853 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12854 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12855
12856 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12857 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
fbe5eb6d 12858 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
eeb06b1b
BS
12859
12860 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12861 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12862 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12863 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12864
fbe5eb6d
BS
12865 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12866 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
eeb06b1b
BS
12867
12868 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12869 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12870 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12871 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12872 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12873 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12874
fbe5eb6d
BS
12875 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12876 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12877 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12878 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
eeb06b1b
BS
12879
12880 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12881 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12882 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12883 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12884 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12885 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
bd793c65
BS
12886
12887 /* Special. */
eeb06b1b
BS
12888 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12889 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12890 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12891
fbe5eb6d
BS
12892 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12893 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
453ee231 12894 { MASK_SSE164, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
eeb06b1b
BS
12895
12896 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12897 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12898 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12899 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12900 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12901 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12902
12903 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12904 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12905 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12906 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12907 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12908 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12909
12910 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12911 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12912 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12913 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12914
fbe5eb6d
BS
12915 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12916 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12917
12918 /* SSE2 */
12919 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12920 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12921 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12922 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12923 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12924 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12925 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12926 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12927
12928 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12929 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12930 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12931 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12932 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12933 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12934 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12935 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12936 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12937 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12938 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12939 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12940 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12941 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12942 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
fbe5eb6d
BS
12943 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12944 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12945 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12946 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
fbe5eb6d
BS
12947 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12948
12949 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12950 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12951 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12952 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12953
1877be45
JH
12954 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12955 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12956 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12957 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
fbe5eb6d
BS
12958
12959 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12960 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12961 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12962
12963 /* SSE2 MMX */
12964 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12965 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12966 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
d50672ef 12967 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
fbe5eb6d
BS
12968 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12969 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12970 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
d50672ef 12971 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
fbe5eb6d
BS
12972
12973 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12974 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12975 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12976 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12977 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12978 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12979 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12980 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12981
12982 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12983 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12984 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12985 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12986
916b60b7
BS
12987 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12988 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12989 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12990 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
fbe5eb6d
BS
12991
12992 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12993 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12994
12995 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12996 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12997 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12998 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12999 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
13000 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
13001
13002 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
13003 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
13004 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
13005 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
13006
13007 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
13008 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
13009 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
077084dd 13010 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
fbe5eb6d
BS
13011 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
13012 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
13013 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
f02e1358 13014 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
fbe5eb6d 13015
916b60b7
BS
13016 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
13017 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
13018 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
13019
13020 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
13021 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
13022
13023 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
13024 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
13025 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
13026 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
13027 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
13028 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
13029
13030 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
13031 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
13032 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
13033 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
13034 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
13035 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
13036
13037 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
13038 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
13039 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
13040 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
13041
13042 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
13043
fbe5eb6d 13044 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
453ee231 13045 { MASK_SSE264, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
fbe5eb6d
BS
13046 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
13047 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
bd793c65
BS
13048};
13049
8b60264b 13050static const struct builtin_description bdesc_1arg[] =
bd793c65 13051{
fbe5eb6d
BS
13052 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
13053 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
13054
13055 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
13056 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
13057 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
13058
13059 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
13060 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
453ee231 13061 { MASK_SSE164, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
fbe5eb6d
BS
13062 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
13063 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
453ee231 13064 { MASK_SSE164, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
fbe5eb6d
BS
13065
13066 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
13067 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
13068 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
f02e1358 13069 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
fbe5eb6d
BS
13070
13071 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
13072
13073 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
13074 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
bd793c65 13075
fbe5eb6d
BS
13076 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
13077 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
13078 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
13079 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
13080 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
bd793c65 13081
fbe5eb6d 13082 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
bd793c65 13083
fbe5eb6d
BS
13084 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
13085 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
453ee231
JH
13086 { MASK_SSE264, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
13087 { MASK_SSE264, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
fbe5eb6d
BS
13088
13089 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
13090 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
f02e1358
JH
13091 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
13092
13093 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 }
bd793c65
BS
13094};
13095
f6155fda
SS
13096void
13097ix86_init_builtins ()
13098{
13099 if (TARGET_MMX)
13100 ix86_init_mmx_sse_builtins ();
13101}
13102
13103/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
bd793c65
BS
13104 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13105 builtins. */
e37af218 13106static void
f6155fda 13107ix86_init_mmx_sse_builtins ()
bd793c65 13108{
8b60264b 13109 const struct builtin_description * d;
77ebd435 13110 size_t i;
bd793c65
BS
13111
13112 tree pchar_type_node = build_pointer_type (char_type_node);
068f5dea
JH
13113 tree pcchar_type_node = build_pointer_type (
13114 build_type_variant (char_type_node, 1, 0));
bd793c65 13115 tree pfloat_type_node = build_pointer_type (float_type_node);
068f5dea
JH
13116 tree pcfloat_type_node = build_pointer_type (
13117 build_type_variant (float_type_node, 1, 0));
bd793c65 13118 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
916b60b7 13119 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
bd793c65
BS
13120 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13121
13122 /* Comparisons. */
13123 tree int_ftype_v4sf_v4sf
b4de2f7d
AH
13124 = build_function_type_list (integer_type_node,
13125 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13126 tree v4si_ftype_v4sf_v4sf
b4de2f7d
AH
13127 = build_function_type_list (V4SI_type_node,
13128 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13129 /* MMX/SSE/integer conversions. */
bd793c65 13130 tree int_ftype_v4sf
b4de2f7d
AH
13131 = build_function_type_list (integer_type_node,
13132 V4SF_type_node, NULL_TREE);
453ee231
JH
13133 tree int64_ftype_v4sf
13134 = build_function_type_list (long_long_integer_type_node,
13135 V4SF_type_node, NULL_TREE);
bd793c65 13136 tree int_ftype_v8qi
b4de2f7d 13137 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
bd793c65 13138 tree v4sf_ftype_v4sf_int
b4de2f7d
AH
13139 = build_function_type_list (V4SF_type_node,
13140 V4SF_type_node, integer_type_node, NULL_TREE);
453ee231
JH
13141 tree v4sf_ftype_v4sf_int64
13142 = build_function_type_list (V4SF_type_node,
13143 V4SF_type_node, long_long_integer_type_node,
13144 NULL_TREE);
bd793c65 13145 tree v4sf_ftype_v4sf_v2si
b4de2f7d
AH
13146 = build_function_type_list (V4SF_type_node,
13147 V4SF_type_node, V2SI_type_node, NULL_TREE);
bd793c65 13148 tree int_ftype_v4hi_int
b4de2f7d
AH
13149 = build_function_type_list (integer_type_node,
13150 V4HI_type_node, integer_type_node, NULL_TREE);
bd793c65 13151 tree v4hi_ftype_v4hi_int_int
e7a60f56 13152 = build_function_type_list (V4HI_type_node, V4HI_type_node,
b4de2f7d
AH
13153 integer_type_node, integer_type_node,
13154 NULL_TREE);
bd793c65
BS
13155 /* Miscellaneous. */
13156 tree v8qi_ftype_v4hi_v4hi
b4de2f7d
AH
13157 = build_function_type_list (V8QI_type_node,
13158 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 13159 tree v4hi_ftype_v2si_v2si
b4de2f7d
AH
13160 = build_function_type_list (V4HI_type_node,
13161 V2SI_type_node, V2SI_type_node, NULL_TREE);
bd793c65 13162 tree v4sf_ftype_v4sf_v4sf_int
b4de2f7d
AH
13163 = build_function_type_list (V4SF_type_node,
13164 V4SF_type_node, V4SF_type_node,
13165 integer_type_node, NULL_TREE);
bd793c65 13166 tree v2si_ftype_v4hi_v4hi
b4de2f7d
AH
13167 = build_function_type_list (V2SI_type_node,
13168 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 13169 tree v4hi_ftype_v4hi_int
b4de2f7d 13170 = build_function_type_list (V4HI_type_node,
e7a60f56 13171 V4HI_type_node, integer_type_node, NULL_TREE);
bd793c65 13172 tree v4hi_ftype_v4hi_di
b4de2f7d
AH
13173 = build_function_type_list (V4HI_type_node,
13174 V4HI_type_node, long_long_unsigned_type_node,
13175 NULL_TREE);
bd793c65 13176 tree v2si_ftype_v2si_di
b4de2f7d
AH
13177 = build_function_type_list (V2SI_type_node,
13178 V2SI_type_node, long_long_unsigned_type_node,
13179 NULL_TREE);
bd793c65 13180 tree void_ftype_void
b4de2f7d 13181 = build_function_type (void_type_node, void_list_node);
bd793c65 13182 tree void_ftype_unsigned
b4de2f7d 13183 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
bd793c65 13184 tree unsigned_ftype_void
b4de2f7d 13185 = build_function_type (unsigned_type_node, void_list_node);
bd793c65 13186 tree di_ftype_void
b4de2f7d 13187 = build_function_type (long_long_unsigned_type_node, void_list_node);
e37af218 13188 tree v4sf_ftype_void
b4de2f7d 13189 = build_function_type (V4SF_type_node, void_list_node);
bd793c65 13190 tree v2si_ftype_v4sf
b4de2f7d 13191 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13192 /* Loads/stores. */
bd793c65 13193 tree void_ftype_v8qi_v8qi_pchar
b4de2f7d
AH
13194 = build_function_type_list (void_type_node,
13195 V8QI_type_node, V8QI_type_node,
13196 pchar_type_node, NULL_TREE);
068f5dea
JH
13197 tree v4sf_ftype_pcfloat
13198 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
bd793c65
BS
13199 /* @@@ the type is bogus */
13200 tree v4sf_ftype_v4sf_pv2si
b4de2f7d 13201 = build_function_type_list (V4SF_type_node,
f8ca7923 13202 V4SF_type_node, pv2si_type_node, NULL_TREE);
1255c85c 13203 tree void_ftype_pv2si_v4sf
b4de2f7d 13204 = build_function_type_list (void_type_node,
f8ca7923 13205 pv2si_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13206 tree void_ftype_pfloat_v4sf
b4de2f7d
AH
13207 = build_function_type_list (void_type_node,
13208 pfloat_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13209 tree void_ftype_pdi_di
b4de2f7d
AH
13210 = build_function_type_list (void_type_node,
13211 pdi_type_node, long_long_unsigned_type_node,
13212 NULL_TREE);
916b60b7 13213 tree void_ftype_pv2di_v2di
b4de2f7d
AH
13214 = build_function_type_list (void_type_node,
13215 pv2di_type_node, V2DI_type_node, NULL_TREE);
bd793c65
BS
13216 /* Normal vector unops. */
13217 tree v4sf_ftype_v4sf
b4de2f7d 13218 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
0f290768 13219
bd793c65
BS
13220 /* Normal vector binops. */
13221 tree v4sf_ftype_v4sf_v4sf
b4de2f7d
AH
13222 = build_function_type_list (V4SF_type_node,
13223 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13224 tree v8qi_ftype_v8qi_v8qi
b4de2f7d
AH
13225 = build_function_type_list (V8QI_type_node,
13226 V8QI_type_node, V8QI_type_node, NULL_TREE);
bd793c65 13227 tree v4hi_ftype_v4hi_v4hi
b4de2f7d
AH
13228 = build_function_type_list (V4HI_type_node,
13229 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 13230 tree v2si_ftype_v2si_v2si
b4de2f7d
AH
13231 = build_function_type_list (V2SI_type_node,
13232 V2SI_type_node, V2SI_type_node, NULL_TREE);
bd793c65 13233 tree di_ftype_di_di
b4de2f7d
AH
13234 = build_function_type_list (long_long_unsigned_type_node,
13235 long_long_unsigned_type_node,
13236 long_long_unsigned_type_node, NULL_TREE);
bd793c65 13237
47f339cf 13238 tree v2si_ftype_v2sf
ae3aa00d 13239 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
47f339cf 13240 tree v2sf_ftype_v2si
b4de2f7d 13241 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
47f339cf 13242 tree v2si_ftype_v2si
b4de2f7d 13243 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
47f339cf 13244 tree v2sf_ftype_v2sf
b4de2f7d 13245 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 13246 tree v2sf_ftype_v2sf_v2sf
b4de2f7d
AH
13247 = build_function_type_list (V2SF_type_node,
13248 V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 13249 tree v2si_ftype_v2sf_v2sf
b4de2f7d
AH
13250 = build_function_type_list (V2SI_type_node,
13251 V2SF_type_node, V2SF_type_node, NULL_TREE);
fbe5eb6d 13252 tree pint_type_node = build_pointer_type (integer_type_node);
068f5dea
JH
13253 tree pcint_type_node = build_pointer_type (
13254 build_type_variant (integer_type_node, 1, 0));
fbe5eb6d 13255 tree pdouble_type_node = build_pointer_type (double_type_node);
068f5dea
JH
13256 tree pcdouble_type_node = build_pointer_type (
13257 build_type_variant (double_type_node, 1, 0));
fbe5eb6d 13258 tree int_ftype_v2df_v2df
b4de2f7d
AH
13259 = build_function_type_list (integer_type_node,
13260 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d
BS
13261
13262 tree ti_ftype_void
b4de2f7d 13263 = build_function_type (intTI_type_node, void_list_node);
f02e1358
JH
13264 tree v2di_ftype_void
13265 = build_function_type (V2DI_type_node, void_list_node);
fbe5eb6d 13266 tree ti_ftype_ti_ti
b4de2f7d
AH
13267 = build_function_type_list (intTI_type_node,
13268 intTI_type_node, intTI_type_node, NULL_TREE);
068f5dea
JH
13269 tree void_ftype_pcvoid
13270 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
fbe5eb6d 13271 tree v2di_ftype_di
b4de2f7d
AH
13272 = build_function_type_list (V2DI_type_node,
13273 long_long_unsigned_type_node, NULL_TREE);
f02e1358
JH
13274 tree di_ftype_v2di
13275 = build_function_type_list (long_long_unsigned_type_node,
13276 V2DI_type_node, NULL_TREE);
fbe5eb6d 13277 tree v4sf_ftype_v4si
b4de2f7d 13278 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 13279 tree v4si_ftype_v4sf
b4de2f7d 13280 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 13281 tree v2df_ftype_v4si
b4de2f7d 13282 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 13283 tree v4si_ftype_v2df
b4de2f7d 13284 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13285 tree v2si_ftype_v2df
b4de2f7d 13286 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13287 tree v4sf_ftype_v2df
b4de2f7d 13288 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13289 tree v2df_ftype_v2si
b4de2f7d 13290 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
fbe5eb6d 13291 tree v2df_ftype_v4sf
b4de2f7d 13292 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 13293 tree int_ftype_v2df
b4de2f7d 13294 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
453ee231
JH
13295 tree int64_ftype_v2df
13296 = build_function_type_list (long_long_integer_type_node,
13297 V2DF_type_node, NULL_TREE);
fbe5eb6d 13298 tree v2df_ftype_v2df_int
b4de2f7d
AH
13299 = build_function_type_list (V2DF_type_node,
13300 V2DF_type_node, integer_type_node, NULL_TREE);
453ee231
JH
13301 tree v2df_ftype_v2df_int64
13302 = build_function_type_list (V2DF_type_node,
13303 V2DF_type_node, long_long_integer_type_node,
13304 NULL_TREE);
fbe5eb6d 13305 tree v4sf_ftype_v4sf_v2df
b4de2f7d
AH
13306 = build_function_type_list (V4SF_type_node,
13307 V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13308 tree v2df_ftype_v2df_v4sf
b4de2f7d
AH
13309 = build_function_type_list (V2DF_type_node,
13310 V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 13311 tree v2df_ftype_v2df_v2df_int
b4de2f7d
AH
13312 = build_function_type_list (V2DF_type_node,
13313 V2DF_type_node, V2DF_type_node,
13314 integer_type_node,
13315 NULL_TREE);
fbe5eb6d 13316 tree v2df_ftype_v2df_pv2si
b4de2f7d
AH
13317 = build_function_type_list (V2DF_type_node,
13318 V2DF_type_node, pv2si_type_node, NULL_TREE);
fbe5eb6d 13319 tree void_ftype_pv2si_v2df
b4de2f7d
AH
13320 = build_function_type_list (void_type_node,
13321 pv2si_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13322 tree void_ftype_pdouble_v2df
b4de2f7d
AH
13323 = build_function_type_list (void_type_node,
13324 pdouble_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13325 tree void_ftype_pint_int
b4de2f7d
AH
13326 = build_function_type_list (void_type_node,
13327 pint_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 13328 tree void_ftype_v16qi_v16qi_pchar
b4de2f7d
AH
13329 = build_function_type_list (void_type_node,
13330 V16QI_type_node, V16QI_type_node,
13331 pchar_type_node, NULL_TREE);
068f5dea
JH
13332 tree v2df_ftype_pcdouble
13333 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
fbe5eb6d 13334 tree v2df_ftype_v2df_v2df
b4de2f7d
AH
13335 = build_function_type_list (V2DF_type_node,
13336 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13337 tree v16qi_ftype_v16qi_v16qi
b4de2f7d
AH
13338 = build_function_type_list (V16QI_type_node,
13339 V16QI_type_node, V16QI_type_node, NULL_TREE);
fbe5eb6d 13340 tree v8hi_ftype_v8hi_v8hi
b4de2f7d
AH
13341 = build_function_type_list (V8HI_type_node,
13342 V8HI_type_node, V8HI_type_node, NULL_TREE);
fbe5eb6d 13343 tree v4si_ftype_v4si_v4si
b4de2f7d
AH
13344 = build_function_type_list (V4SI_type_node,
13345 V4SI_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 13346 tree v2di_ftype_v2di_v2di
b4de2f7d
AH
13347 = build_function_type_list (V2DI_type_node,
13348 V2DI_type_node, V2DI_type_node, NULL_TREE);
fbe5eb6d 13349 tree v2di_ftype_v2df_v2df
b4de2f7d
AH
13350 = build_function_type_list (V2DI_type_node,
13351 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13352 tree v2df_ftype_v2df
b4de2f7d 13353 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13354 tree v2df_ftype_double
b4de2f7d 13355 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
fbe5eb6d 13356 tree v2df_ftype_double_double
b4de2f7d
AH
13357 = build_function_type_list (V2DF_type_node,
13358 double_type_node, double_type_node, NULL_TREE);
fbe5eb6d 13359 tree int_ftype_v8hi_int
b4de2f7d
AH
13360 = build_function_type_list (integer_type_node,
13361 V8HI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 13362 tree v8hi_ftype_v8hi_int_int
b4de2f7d
AH
13363 = build_function_type_list (V8HI_type_node,
13364 V8HI_type_node, integer_type_node,
13365 integer_type_node, NULL_TREE);
916b60b7 13366 tree v2di_ftype_v2di_int
b4de2f7d
AH
13367 = build_function_type_list (V2DI_type_node,
13368 V2DI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 13369 tree v4si_ftype_v4si_int
b4de2f7d
AH
13370 = build_function_type_list (V4SI_type_node,
13371 V4SI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 13372 tree v8hi_ftype_v8hi_int
b4de2f7d
AH
13373 = build_function_type_list (V8HI_type_node,
13374 V8HI_type_node, integer_type_node, NULL_TREE);
916b60b7 13375 tree v8hi_ftype_v8hi_v2di
b4de2f7d
AH
13376 = build_function_type_list (V8HI_type_node,
13377 V8HI_type_node, V2DI_type_node, NULL_TREE);
916b60b7 13378 tree v4si_ftype_v4si_v2di
b4de2f7d
AH
13379 = build_function_type_list (V4SI_type_node,
13380 V4SI_type_node, V2DI_type_node, NULL_TREE);
916b60b7 13381 tree v4si_ftype_v8hi_v8hi
b4de2f7d
AH
13382 = build_function_type_list (V4SI_type_node,
13383 V8HI_type_node, V8HI_type_node, NULL_TREE);
916b60b7 13384 tree di_ftype_v8qi_v8qi
b4de2f7d
AH
13385 = build_function_type_list (long_long_unsigned_type_node,
13386 V8QI_type_node, V8QI_type_node, NULL_TREE);
916b60b7 13387 tree v2di_ftype_v16qi_v16qi
b4de2f7d
AH
13388 = build_function_type_list (V2DI_type_node,
13389 V16QI_type_node, V16QI_type_node, NULL_TREE);
916b60b7 13390 tree int_ftype_v16qi
b4de2f7d 13391 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
068f5dea
JH
13392 tree v16qi_ftype_pcchar
13393 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
f02e1358
JH
13394 tree void_ftype_pchar_v16qi
13395 = build_function_type_list (void_type_node,
13396 pchar_type_node, V16QI_type_node, NULL_TREE);
068f5dea
JH
13397 tree v4si_ftype_pcint
13398 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13399 tree void_ftype_pcint_v4si
f02e1358 13400 = build_function_type_list (void_type_node,
068f5dea 13401 pcint_type_node, V4SI_type_node, NULL_TREE);
f02e1358
JH
13402 tree v2di_ftype_v2di
13403 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
47f339cf 13404
bd793c65
BS
13405 /* Add all builtins that are more or less simple operations on two
13406 operands. */
ca7558fc 13407 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
13408 {
13409 /* Use one of the operands; the target can have a different mode for
13410 mask-generating compares. */
13411 enum machine_mode mode;
13412 tree type;
13413
13414 if (d->name == 0)
13415 continue;
13416 mode = insn_data[d->icode].operand[1].mode;
13417
bd793c65
BS
13418 switch (mode)
13419 {
fbe5eb6d
BS
13420 case V16QImode:
13421 type = v16qi_ftype_v16qi_v16qi;
13422 break;
13423 case V8HImode:
13424 type = v8hi_ftype_v8hi_v8hi;
13425 break;
13426 case V4SImode:
13427 type = v4si_ftype_v4si_v4si;
13428 break;
13429 case V2DImode:
13430 type = v2di_ftype_v2di_v2di;
13431 break;
13432 case V2DFmode:
13433 type = v2df_ftype_v2df_v2df;
13434 break;
13435 case TImode:
13436 type = ti_ftype_ti_ti;
13437 break;
bd793c65
BS
13438 case V4SFmode:
13439 type = v4sf_ftype_v4sf_v4sf;
13440 break;
13441 case V8QImode:
13442 type = v8qi_ftype_v8qi_v8qi;
13443 break;
13444 case V4HImode:
13445 type = v4hi_ftype_v4hi_v4hi;
13446 break;
13447 case V2SImode:
13448 type = v2si_ftype_v2si_v2si;
13449 break;
bd793c65
BS
13450 case DImode:
13451 type = di_ftype_di_di;
13452 break;
13453
13454 default:
13455 abort ();
13456 }
0f290768 13457
bd793c65
BS
13458 /* Override for comparisons. */
13459 if (d->icode == CODE_FOR_maskcmpv4sf3
13460 || d->icode == CODE_FOR_maskncmpv4sf3
13461 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13462 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13463 type = v4si_ftype_v4sf_v4sf;
13464
fbe5eb6d
BS
13465 if (d->icode == CODE_FOR_maskcmpv2df3
13466 || d->icode == CODE_FOR_maskncmpv2df3
13467 || d->icode == CODE_FOR_vmmaskcmpv2df3
13468 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13469 type = v2di_ftype_v2df_v2df;
13470
eeb06b1b 13471 def_builtin (d->mask, d->name, type, d->code);
bd793c65
BS
13472 }
13473
13474 /* Add the remaining MMX insns with somewhat more complicated types. */
eeb06b1b
BS
13475 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13476 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
eeb06b1b
BS
13477 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13478 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13479 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13480
13481 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13482 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13483 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13484
13485 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13486 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13487
13488 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13489 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
0f290768 13490
bd793c65 13491 /* comi/ucomi insns. */
ca7558fc 13492 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
fbe5eb6d
BS
13493 if (d->mask == MASK_SSE2)
13494 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13495 else
13496 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
bd793c65 13497
1255c85c
BS
13498 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13499 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13500 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
bd793c65 13501
36210500
SP
13502 def_builtin (MASK_SSE1, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13503 def_builtin (MASK_SSE1, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
fbe5eb6d
BS
13504 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13505 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13506 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
453ee231 13507 def_builtin (MASK_SSE164, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
fbe5eb6d 13508 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
453ee231 13509 def_builtin (MASK_SSE164, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
fbe5eb6d
BS
13510 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13511 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
453ee231 13512 def_builtin (MASK_SSE164, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
bd793c65 13513
fbe5eb6d
BS
13514 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13515 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
bd793c65 13516
fbe5eb6d 13517 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
bd793c65 13518
068f5dea
JH
13519 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13520 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13521 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
fbe5eb6d
BS
13522 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13523 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13524 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
bd793c65 13525
fbe5eb6d
BS
13526 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13527 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13528 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13529 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
bd793c65 13530
fbe5eb6d
BS
13531 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13532 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13533 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13534 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
bd793c65 13535
fbe5eb6d 13536 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
bd793c65 13537
916b60b7 13538 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
bd793c65 13539
fbe5eb6d
BS
13540 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13541 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13542 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13543 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13544 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13545 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
bd793c65 13546
fbe5eb6d 13547 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
bd793c65 13548
47f339cf
BS
13549 /* Original 3DNow! */
13550 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13551 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13552 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13553 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13554 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13555 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13556 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13557 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13558 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13559 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13560 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13561 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13562 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13563 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13564 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13565 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13566 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13567 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13568 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13569 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
47f339cf
BS
13570
13571 /* 3DNow! extension as used in the Athlon CPU. */
13572 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13573 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13574 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13575 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13576 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13577 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13578
fbe5eb6d
BS
13579 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13580
13581 /* SSE2 */
13582 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13583 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13584
13585 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13586 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
f02e1358 13587 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
fbe5eb6d 13588
068f5dea
JH
13589 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13590 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13591 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
fbe5eb6d
BS
13592 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13593 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13594 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13595
13596 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13597 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13598 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13599 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13600
13601 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
916b60b7 13602 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
fbe5eb6d
BS
13603 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13604 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
916b60b7 13605 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
fbe5eb6d
BS
13606
13607 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13608 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13609 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
916b60b7 13610 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
fbe5eb6d
BS
13611
13612 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13613 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13614
13615 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13616
13617 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
916b60b7 13618 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
fbe5eb6d
BS
13619
13620 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13621 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13622 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13623 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13624 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13625
13626 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13627
13628 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13629 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
453ee231
JH
13630 def_builtin (MASK_SSE264, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13631 def_builtin (MASK_SSE264, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
fbe5eb6d
BS
13632
13633 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13634 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13635 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13636
13637 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
453ee231 13638 def_builtin (MASK_SSE264, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
fbe5eb6d
BS
13639 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13640 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13641
13642 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13643 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13644 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
068f5dea
JH
13645 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13646 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
fbe5eb6d
BS
13647 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13648 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13649
068f5dea 13650 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
fbe5eb6d
BS
13651 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13652 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
916b60b7 13653
068f5dea
JH
13654 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13655 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13656 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
f02e1358
JH
13657 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13658 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
068f5dea 13659 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
f02e1358
JH
13660 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13661
13662 def_builtin (MASK_SSE1, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13663
916b60b7
BS
13664 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13665 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13666 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13667
13668 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13669 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13670 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13671
13672 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13673 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13674
ab3146fd 13675 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
916b60b7
BS
13676 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13677 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13678 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13679
ab3146fd 13680 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
916b60b7
BS
13681 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13682 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13683 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13684
13685 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13686 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13687
13688 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
bd793c65
BS
13689}
13690
13691/* Errors in the source file can cause expand_expr to return const0_rtx
13692 where we expect a vector. To avoid crashing, use one of the vector
13693 clear instructions. */
13694static rtx
13695safe_vector_operand (x, mode)
13696 rtx x;
13697 enum machine_mode mode;
13698{
13699 if (x != const0_rtx)
13700 return x;
13701 x = gen_reg_rtx (mode);
13702
47f339cf 13703 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
bd793c65
BS
13704 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13705 : gen_rtx_SUBREG (DImode, x, 0)));
13706 else
e37af218 13707 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
4977bab6
ZW
13708 : gen_rtx_SUBREG (V4SFmode, x, 0),
13709 CONST0_RTX (V4SFmode)));
bd793c65
BS
13710 return x;
13711}
13712
13713/* Subroutine of ix86_expand_builtin to take care of binop insns. */
13714
13715static rtx
13716ix86_expand_binop_builtin (icode, arglist, target)
13717 enum insn_code icode;
13718 tree arglist;
13719 rtx target;
13720{
13721 rtx pat;
13722 tree arg0 = TREE_VALUE (arglist);
13723 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13724 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13725 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13726 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13727 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13728 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13729
13730 if (VECTOR_MODE_P (mode0))
13731 op0 = safe_vector_operand (op0, mode0);
13732 if (VECTOR_MODE_P (mode1))
13733 op1 = safe_vector_operand (op1, mode1);
13734
13735 if (! target
13736 || GET_MODE (target) != tmode
13737 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13738 target = gen_reg_rtx (tmode);
13739
d9deed68
JH
13740 if (GET_MODE (op1) == SImode && mode1 == TImode)
13741 {
13742 rtx x = gen_reg_rtx (V4SImode);
13743 emit_insn (gen_sse2_loadd (x, op1));
13744 op1 = gen_lowpart (TImode, x);
13745 }
13746
bd793c65
BS
13747 /* In case the insn wants input operands in modes different from
13748 the result, abort. */
13749 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
13750 abort ();
13751
13752 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13753 op0 = copy_to_mode_reg (mode0, op0);
13754 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13755 op1 = copy_to_mode_reg (mode1, op1);
13756
59bef189
RH
13757 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13758 yet one of the two must not be a memory. This is normally enforced
13759 by expanders, but we didn't bother to create one here. */
13760 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13761 op0 = copy_to_mode_reg (mode0, op0);
13762
bd793c65
BS
13763 pat = GEN_FCN (icode) (target, op0, op1);
13764 if (! pat)
13765 return 0;
13766 emit_insn (pat);
13767 return target;
13768}
13769
13770/* Subroutine of ix86_expand_builtin to take care of stores. */
13771
13772static rtx
e37af218 13773ix86_expand_store_builtin (icode, arglist)
bd793c65
BS
13774 enum insn_code icode;
13775 tree arglist;
bd793c65
BS
13776{
13777 rtx pat;
13778 tree arg0 = TREE_VALUE (arglist);
13779 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13780 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13781 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13782 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13783 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13784
13785 if (VECTOR_MODE_P (mode1))
13786 op1 = safe_vector_operand (op1, mode1);
13787
13788 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
7f0e57bd 13789 op1 = copy_to_mode_reg (mode1, op1);
59bef189 13790
bd793c65
BS
13791 pat = GEN_FCN (icode) (op0, op1);
13792 if (pat)
13793 emit_insn (pat);
13794 return 0;
13795}
13796
13797/* Subroutine of ix86_expand_builtin to take care of unop insns. */
13798
13799static rtx
13800ix86_expand_unop_builtin (icode, arglist, target, do_load)
13801 enum insn_code icode;
13802 tree arglist;
13803 rtx target;
13804 int do_load;
13805{
13806 rtx pat;
13807 tree arg0 = TREE_VALUE (arglist);
13808 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13809 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13810 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13811
13812 if (! target
13813 || GET_MODE (target) != tmode
13814 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13815 target = gen_reg_rtx (tmode);
13816 if (do_load)
13817 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13818 else
13819 {
13820 if (VECTOR_MODE_P (mode0))
13821 op0 = safe_vector_operand (op0, mode0);
13822
13823 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13824 op0 = copy_to_mode_reg (mode0, op0);
13825 }
13826
13827 pat = GEN_FCN (icode) (target, op0);
13828 if (! pat)
13829 return 0;
13830 emit_insn (pat);
13831 return target;
13832}
13833
13834/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13835 sqrtss, rsqrtss, rcpss. */
13836
13837static rtx
13838ix86_expand_unop1_builtin (icode, arglist, target)
13839 enum insn_code icode;
13840 tree arglist;
13841 rtx target;
13842{
13843 rtx pat;
13844 tree arg0 = TREE_VALUE (arglist);
59bef189 13845 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
bd793c65
BS
13846 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13847 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13848
13849 if (! target
13850 || GET_MODE (target) != tmode
13851 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13852 target = gen_reg_rtx (tmode);
13853
13854 if (VECTOR_MODE_P (mode0))
13855 op0 = safe_vector_operand (op0, mode0);
13856
13857 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13858 op0 = copy_to_mode_reg (mode0, op0);
fce5a9f2 13859
59bef189
RH
13860 op1 = op0;
13861 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13862 op1 = copy_to_mode_reg (mode0, op1);
fce5a9f2 13863
59bef189 13864 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
13865 if (! pat)
13866 return 0;
13867 emit_insn (pat);
13868 return target;
13869}
13870
13871/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13872
13873static rtx
13874ix86_expand_sse_compare (d, arglist, target)
8b60264b 13875 const struct builtin_description *d;
bd793c65
BS
13876 tree arglist;
13877 rtx target;
13878{
13879 rtx pat;
13880 tree arg0 = TREE_VALUE (arglist);
13881 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13882 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13883 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13884 rtx op2;
13885 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13886 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13887 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13888 enum rtx_code comparison = d->comparison;
13889
13890 if (VECTOR_MODE_P (mode0))
13891 op0 = safe_vector_operand (op0, mode0);
13892 if (VECTOR_MODE_P (mode1))
13893 op1 = safe_vector_operand (op1, mode1);
13894
13895 /* Swap operands if we have a comparison that isn't available in
13896 hardware. */
13897 if (d->flag)
13898 {
21e1b5f1
BS
13899 rtx tmp = gen_reg_rtx (mode1);
13900 emit_move_insn (tmp, op1);
bd793c65 13901 op1 = op0;
21e1b5f1 13902 op0 = tmp;
bd793c65 13903 }
21e1b5f1
BS
13904
13905 if (! target
13906 || GET_MODE (target) != tmode
13907 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
bd793c65
BS
13908 target = gen_reg_rtx (tmode);
13909
13910 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13911 op0 = copy_to_mode_reg (mode0, op0);
13912 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13913 op1 = copy_to_mode_reg (mode1, op1);
13914
13915 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13916 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13917 if (! pat)
13918 return 0;
13919 emit_insn (pat);
13920 return target;
13921}
13922
13923/* Subroutine of ix86_expand_builtin to take care of comi insns. */
13924
13925static rtx
13926ix86_expand_sse_comi (d, arglist, target)
8b60264b 13927 const struct builtin_description *d;
bd793c65
BS
13928 tree arglist;
13929 rtx target;
13930{
13931 rtx pat;
13932 tree arg0 = TREE_VALUE (arglist);
13933 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13934 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13935 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13936 rtx op2;
13937 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13938 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13939 enum rtx_code comparison = d->comparison;
13940
13941 if (VECTOR_MODE_P (mode0))
13942 op0 = safe_vector_operand (op0, mode0);
13943 if (VECTOR_MODE_P (mode1))
13944 op1 = safe_vector_operand (op1, mode1);
13945
13946 /* Swap operands if we have a comparison that isn't available in
13947 hardware. */
13948 if (d->flag)
13949 {
13950 rtx tmp = op1;
13951 op1 = op0;
13952 op0 = tmp;
bd793c65
BS
13953 }
13954
13955 target = gen_reg_rtx (SImode);
13956 emit_move_insn (target, const0_rtx);
13957 target = gen_rtx_SUBREG (QImode, target, 0);
13958
13959 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13960 op0 = copy_to_mode_reg (mode0, op0);
13961 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13962 op1 = copy_to_mode_reg (mode1, op1);
13963
13964 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
1194ca05 13965 pat = GEN_FCN (d->icode) (op0, op1);
bd793c65
BS
13966 if (! pat)
13967 return 0;
13968 emit_insn (pat);
29628f27
BS
13969 emit_insn (gen_rtx_SET (VOIDmode,
13970 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13971 gen_rtx_fmt_ee (comparison, QImode,
1194ca05 13972 SET_DEST (pat),
29628f27 13973 const0_rtx)));
bd793c65 13974
6f1a6c5b 13975 return SUBREG_REG (target);
bd793c65
BS
13976}
13977
13978/* Expand an expression EXP that calls a built-in function,
13979 with result going to TARGET if that's convenient
13980 (and in mode MODE if that's convenient).
13981 SUBTARGET may be used as the target for computing one of EXP's operands.
13982 IGNORE is nonzero if the value is to be ignored. */
13983
13984rtx
13985ix86_expand_builtin (exp, target, subtarget, mode, ignore)
13986 tree exp;
13987 rtx target;
13988 rtx subtarget ATTRIBUTE_UNUSED;
13989 enum machine_mode mode ATTRIBUTE_UNUSED;
13990 int ignore ATTRIBUTE_UNUSED;
13991{
8b60264b 13992 const struct builtin_description *d;
77ebd435 13993 size_t i;
bd793c65
BS
13994 enum insn_code icode;
13995 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13996 tree arglist = TREE_OPERAND (exp, 1);
e37af218 13997 tree arg0, arg1, arg2;
bd793c65
BS
13998 rtx op0, op1, op2, pat;
13999 enum machine_mode tmode, mode0, mode1, mode2;
8752c357 14000 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
bd793c65
BS
14001
14002 switch (fcode)
14003 {
14004 case IX86_BUILTIN_EMMS:
14005 emit_insn (gen_emms ());
14006 return 0;
14007
14008 case IX86_BUILTIN_SFENCE:
14009 emit_insn (gen_sfence ());
14010 return 0;
14011
bd793c65 14012 case IX86_BUILTIN_PEXTRW:
fbe5eb6d
BS
14013 case IX86_BUILTIN_PEXTRW128:
14014 icode = (fcode == IX86_BUILTIN_PEXTRW
14015 ? CODE_FOR_mmx_pextrw
14016 : CODE_FOR_sse2_pextrw);
bd793c65
BS
14017 arg0 = TREE_VALUE (arglist);
14018 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14019 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14020 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14021 tmode = insn_data[icode].operand[0].mode;
14022 mode0 = insn_data[icode].operand[1].mode;
14023 mode1 = insn_data[icode].operand[2].mode;
14024
14025 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14026 op0 = copy_to_mode_reg (mode0, op0);
14027 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14028 {
14029 /* @@@ better error message */
14030 error ("selector must be an immediate");
6f1a6c5b 14031 return gen_reg_rtx (tmode);
bd793c65
BS
14032 }
14033 if (target == 0
14034 || GET_MODE (target) != tmode
14035 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14036 target = gen_reg_rtx (tmode);
14037 pat = GEN_FCN (icode) (target, op0, op1);
14038 if (! pat)
14039 return 0;
14040 emit_insn (pat);
14041 return target;
14042
14043 case IX86_BUILTIN_PINSRW:
fbe5eb6d
BS
14044 case IX86_BUILTIN_PINSRW128:
14045 icode = (fcode == IX86_BUILTIN_PINSRW
14046 ? CODE_FOR_mmx_pinsrw
14047 : CODE_FOR_sse2_pinsrw);
bd793c65
BS
14048 arg0 = TREE_VALUE (arglist);
14049 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14050 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14051 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14052 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14053 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14054 tmode = insn_data[icode].operand[0].mode;
14055 mode0 = insn_data[icode].operand[1].mode;
14056 mode1 = insn_data[icode].operand[2].mode;
14057 mode2 = insn_data[icode].operand[3].mode;
14058
14059 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14060 op0 = copy_to_mode_reg (mode0, op0);
14061 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14062 op1 = copy_to_mode_reg (mode1, op1);
14063 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14064 {
14065 /* @@@ better error message */
14066 error ("selector must be an immediate");
14067 return const0_rtx;
14068 }
14069 if (target == 0
14070 || GET_MODE (target) != tmode
14071 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14072 target = gen_reg_rtx (tmode);
14073 pat = GEN_FCN (icode) (target, op0, op1, op2);
14074 if (! pat)
14075 return 0;
14076 emit_insn (pat);
14077 return target;
14078
14079 case IX86_BUILTIN_MASKMOVQ:
077084dd 14080 case IX86_BUILTIN_MASKMOVDQU:
fbe5eb6d
BS
14081 icode = (fcode == IX86_BUILTIN_MASKMOVQ
14082 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
f8ca7923
JH
14083 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
14084 : CODE_FOR_sse2_maskmovdqu));
bd793c65
BS
14085 /* Note the arg order is different from the operand order. */
14086 arg1 = TREE_VALUE (arglist);
14087 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14088 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14089 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14090 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14091 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14092 mode0 = insn_data[icode].operand[0].mode;
14093 mode1 = insn_data[icode].operand[1].mode;
14094 mode2 = insn_data[icode].operand[2].mode;
14095
5c464583 14096 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
bd793c65
BS
14097 op0 = copy_to_mode_reg (mode0, op0);
14098 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14099 op1 = copy_to_mode_reg (mode1, op1);
14100 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14101 op2 = copy_to_mode_reg (mode2, op2);
14102 pat = GEN_FCN (icode) (op0, op1, op2);
14103 if (! pat)
14104 return 0;
14105 emit_insn (pat);
14106 return 0;
14107
14108 case IX86_BUILTIN_SQRTSS:
14109 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
14110 case IX86_BUILTIN_RSQRTSS:
14111 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
14112 case IX86_BUILTIN_RCPSS:
14113 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
14114
14115 case IX86_BUILTIN_LOADAPS:
14116 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
14117
14118 case IX86_BUILTIN_LOADUPS:
14119 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14120
14121 case IX86_BUILTIN_STOREAPS:
e37af218 14122 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
f02e1358 14123
bd793c65 14124 case IX86_BUILTIN_STOREUPS:
e37af218 14125 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
bd793c65
BS
14126
14127 case IX86_BUILTIN_LOADSS:
14128 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
14129
14130 case IX86_BUILTIN_STORESS:
e37af218 14131 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
bd793c65 14132
0f290768 14133 case IX86_BUILTIN_LOADHPS:
bd793c65 14134 case IX86_BUILTIN_LOADLPS:
fbe5eb6d
BS
14135 case IX86_BUILTIN_LOADHPD:
14136 case IX86_BUILTIN_LOADLPD:
14137 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
14138 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
14139 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
14140 : CODE_FOR_sse2_movlpd);
bd793c65
BS
14141 arg0 = TREE_VALUE (arglist);
14142 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14143 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14144 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14145 tmode = insn_data[icode].operand[0].mode;
14146 mode0 = insn_data[icode].operand[1].mode;
14147 mode1 = insn_data[icode].operand[2].mode;
14148
14149 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14150 op0 = copy_to_mode_reg (mode0, op0);
14151 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14152 if (target == 0
14153 || GET_MODE (target) != tmode
14154 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14155 target = gen_reg_rtx (tmode);
14156 pat = GEN_FCN (icode) (target, op0, op1);
14157 if (! pat)
14158 return 0;
14159 emit_insn (pat);
14160 return target;
0f290768 14161
bd793c65
BS
14162 case IX86_BUILTIN_STOREHPS:
14163 case IX86_BUILTIN_STORELPS:
fbe5eb6d
BS
14164 case IX86_BUILTIN_STOREHPD:
14165 case IX86_BUILTIN_STORELPD:
14166 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
14167 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
14168 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
14169 : CODE_FOR_sse2_movlpd);
bd793c65
BS
14170 arg0 = TREE_VALUE (arglist);
14171 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14172 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14173 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14174 mode0 = insn_data[icode].operand[1].mode;
14175 mode1 = insn_data[icode].operand[2].mode;
14176
14177 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14178 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14179 op1 = copy_to_mode_reg (mode1, op1);
14180
14181 pat = GEN_FCN (icode) (op0, op0, op1);
14182 if (! pat)
14183 return 0;
14184 emit_insn (pat);
14185 return 0;
14186
14187 case IX86_BUILTIN_MOVNTPS:
e37af218 14188 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
bd793c65 14189 case IX86_BUILTIN_MOVNTQ:
e37af218 14190 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
bd793c65
BS
14191
14192 case IX86_BUILTIN_LDMXCSR:
14193 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14194 target = assign_386_stack_local (SImode, 0);
14195 emit_move_insn (target, op0);
14196 emit_insn (gen_ldmxcsr (target));
14197 return 0;
14198
14199 case IX86_BUILTIN_STMXCSR:
14200 target = assign_386_stack_local (SImode, 0);
14201 emit_insn (gen_stmxcsr (target));
14202 return copy_to_mode_reg (SImode, target);
14203
bd793c65 14204 case IX86_BUILTIN_SHUFPS:
fbe5eb6d
BS
14205 case IX86_BUILTIN_SHUFPD:
14206 icode = (fcode == IX86_BUILTIN_SHUFPS
14207 ? CODE_FOR_sse_shufps
14208 : CODE_FOR_sse2_shufpd);
bd793c65
BS
14209 arg0 = TREE_VALUE (arglist);
14210 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14211 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14212 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14213 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14214 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14215 tmode = insn_data[icode].operand[0].mode;
14216 mode0 = insn_data[icode].operand[1].mode;
14217 mode1 = insn_data[icode].operand[2].mode;
14218 mode2 = insn_data[icode].operand[3].mode;
14219
14220 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14221 op0 = copy_to_mode_reg (mode0, op0);
14222 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14223 op1 = copy_to_mode_reg (mode1, op1);
14224 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14225 {
14226 /* @@@ better error message */
14227 error ("mask must be an immediate");
6f1a6c5b 14228 return gen_reg_rtx (tmode);
bd793c65
BS
14229 }
14230 if (target == 0
14231 || GET_MODE (target) != tmode
14232 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14233 target = gen_reg_rtx (tmode);
14234 pat = GEN_FCN (icode) (target, op0, op1, op2);
14235 if (! pat)
14236 return 0;
14237 emit_insn (pat);
14238 return target;
14239
14240 case IX86_BUILTIN_PSHUFW:
fbe5eb6d
BS
14241 case IX86_BUILTIN_PSHUFD:
14242 case IX86_BUILTIN_PSHUFHW:
14243 case IX86_BUILTIN_PSHUFLW:
14244 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14245 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14246 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14247 : CODE_FOR_mmx_pshufw);
bd793c65
BS
14248 arg0 = TREE_VALUE (arglist);
14249 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14250 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14251 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14252 tmode = insn_data[icode].operand[0].mode;
29628f27
BS
14253 mode1 = insn_data[icode].operand[1].mode;
14254 mode2 = insn_data[icode].operand[2].mode;
bd793c65 14255
29628f27
BS
14256 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14257 op0 = copy_to_mode_reg (mode1, op0);
14258 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
bd793c65
BS
14259 {
14260 /* @@@ better error message */
14261 error ("mask must be an immediate");
14262 return const0_rtx;
14263 }
14264 if (target == 0
14265 || GET_MODE (target) != tmode
14266 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14267 target = gen_reg_rtx (tmode);
29628f27 14268 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
14269 if (! pat)
14270 return 0;
14271 emit_insn (pat);
14272 return target;
14273
ab3146fd
ZD
14274 case IX86_BUILTIN_PSLLDQI128:
14275 case IX86_BUILTIN_PSRLDQI128:
14276 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14277 : CODE_FOR_sse2_lshrti3);
14278 arg0 = TREE_VALUE (arglist);
14279 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14280 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14281 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14282 tmode = insn_data[icode].operand[0].mode;
14283 mode1 = insn_data[icode].operand[1].mode;
14284 mode2 = insn_data[icode].operand[2].mode;
14285
14286 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14287 {
14288 op0 = copy_to_reg (op0);
14289 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14290 }
14291 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14292 {
14293 error ("shift must be an immediate");
14294 return const0_rtx;
14295 }
14296 target = gen_reg_rtx (V2DImode);
14297 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14298 if (! pat)
14299 return 0;
14300 emit_insn (pat);
14301 return target;
14302
47f339cf
BS
14303 case IX86_BUILTIN_FEMMS:
14304 emit_insn (gen_femms ());
14305 return NULL_RTX;
14306
14307 case IX86_BUILTIN_PAVGUSB:
14308 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14309
14310 case IX86_BUILTIN_PF2ID:
14311 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14312
14313 case IX86_BUILTIN_PFACC:
14314 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14315
14316 case IX86_BUILTIN_PFADD:
14317 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14318
14319 case IX86_BUILTIN_PFCMPEQ:
14320 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14321
14322 case IX86_BUILTIN_PFCMPGE:
14323 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14324
14325 case IX86_BUILTIN_PFCMPGT:
14326 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14327
14328 case IX86_BUILTIN_PFMAX:
14329 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14330
14331 case IX86_BUILTIN_PFMIN:
14332 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14333
14334 case IX86_BUILTIN_PFMUL:
14335 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14336
14337 case IX86_BUILTIN_PFRCP:
14338 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14339
14340 case IX86_BUILTIN_PFRCPIT1:
14341 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14342
14343 case IX86_BUILTIN_PFRCPIT2:
14344 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14345
14346 case IX86_BUILTIN_PFRSQIT1:
14347 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14348
14349 case IX86_BUILTIN_PFRSQRT:
14350 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14351
14352 case IX86_BUILTIN_PFSUB:
14353 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14354
14355 case IX86_BUILTIN_PFSUBR:
14356 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14357
14358 case IX86_BUILTIN_PI2FD:
14359 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14360
14361 case IX86_BUILTIN_PMULHRW:
14362 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14363
47f339cf
BS
14364 case IX86_BUILTIN_PF2IW:
14365 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14366
14367 case IX86_BUILTIN_PFNACC:
14368 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14369
14370 case IX86_BUILTIN_PFPNACC:
14371 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14372
14373 case IX86_BUILTIN_PI2FW:
14374 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14375
14376 case IX86_BUILTIN_PSWAPDSI:
14377 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14378
14379 case IX86_BUILTIN_PSWAPDSF:
14380 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14381
e37af218
RH
14382 case IX86_BUILTIN_SSE_ZERO:
14383 target = gen_reg_rtx (V4SFmode);
4977bab6 14384 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
bd793c65
BS
14385 return target;
14386
bd793c65
BS
14387 case IX86_BUILTIN_MMX_ZERO:
14388 target = gen_reg_rtx (DImode);
14389 emit_insn (gen_mmx_clrdi (target));
14390 return target;
14391
f02e1358
JH
14392 case IX86_BUILTIN_CLRTI:
14393 target = gen_reg_rtx (V2DImode);
14394 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14395 return target;
14396
14397
fbe5eb6d
BS
14398 case IX86_BUILTIN_SQRTSD:
14399 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14400 case IX86_BUILTIN_LOADAPD:
14401 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14402 case IX86_BUILTIN_LOADUPD:
14403 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14404
14405 case IX86_BUILTIN_STOREAPD:
14406 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14407 case IX86_BUILTIN_STOREUPD:
14408 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14409
14410 case IX86_BUILTIN_LOADSD:
14411 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14412
14413 case IX86_BUILTIN_STORESD:
14414 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14415
14416 case IX86_BUILTIN_SETPD1:
14417 target = assign_386_stack_local (DFmode, 0);
14418 arg0 = TREE_VALUE (arglist);
14419 emit_move_insn (adjust_address (target, DFmode, 0),
14420 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14421 op0 = gen_reg_rtx (V2DFmode);
14422 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14423 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
14424 return op0;
14425
14426 case IX86_BUILTIN_SETPD:
14427 target = assign_386_stack_local (V2DFmode, 0);
14428 arg0 = TREE_VALUE (arglist);
14429 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14430 emit_move_insn (adjust_address (target, DFmode, 0),
14431 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14432 emit_move_insn (adjust_address (target, DFmode, 8),
14433 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14434 op0 = gen_reg_rtx (V2DFmode);
14435 emit_insn (gen_sse2_movapd (op0, target));
14436 return op0;
14437
14438 case IX86_BUILTIN_LOADRPD:
14439 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14440 gen_reg_rtx (V2DFmode), 1);
14441 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
14442 return target;
14443
14444 case IX86_BUILTIN_LOADPD1:
14445 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14446 gen_reg_rtx (V2DFmode), 1);
14447 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14448 return target;
14449
14450 case IX86_BUILTIN_STOREPD1:
14451 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14452 case IX86_BUILTIN_STORERPD:
14453 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14454
48126a97
JH
14455 case IX86_BUILTIN_CLRPD:
14456 target = gen_reg_rtx (V2DFmode);
14457 emit_insn (gen_sse_clrv2df (target));
14458 return target;
14459
fbe5eb6d
BS
14460 case IX86_BUILTIN_MFENCE:
14461 emit_insn (gen_sse2_mfence ());
14462 return 0;
14463 case IX86_BUILTIN_LFENCE:
14464 emit_insn (gen_sse2_lfence ());
14465 return 0;
14466
14467 case IX86_BUILTIN_CLFLUSH:
14468 arg0 = TREE_VALUE (arglist);
14469 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14470 icode = CODE_FOR_sse2_clflush;
1194ca05
JH
14471 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14472 op0 = copy_to_mode_reg (Pmode, op0);
fbe5eb6d
BS
14473
14474 emit_insn (gen_sse2_clflush (op0));
14475 return 0;
14476
14477 case IX86_BUILTIN_MOVNTPD:
14478 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14479 case IX86_BUILTIN_MOVNTDQ:
916b60b7 14480 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
fbe5eb6d
BS
14481 case IX86_BUILTIN_MOVNTI:
14482 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14483
f02e1358
JH
14484 case IX86_BUILTIN_LOADDQA:
14485 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14486 case IX86_BUILTIN_LOADDQU:
14487 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14488 case IX86_BUILTIN_LOADD:
14489 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14490
14491 case IX86_BUILTIN_STOREDQA:
14492 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14493 case IX86_BUILTIN_STOREDQU:
14494 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14495 case IX86_BUILTIN_STORED:
14496 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14497
bd793c65
BS
14498 default:
14499 break;
14500 }
14501
ca7558fc 14502 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
14503 if (d->code == fcode)
14504 {
14505 /* Compares are treated specially. */
14506 if (d->icode == CODE_FOR_maskcmpv4sf3
14507 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14508 || d->icode == CODE_FOR_maskncmpv4sf3
fbe5eb6d
BS
14509 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14510 || d->icode == CODE_FOR_maskcmpv2df3
14511 || d->icode == CODE_FOR_vmmaskcmpv2df3
14512 || d->icode == CODE_FOR_maskncmpv2df3
14513 || d->icode == CODE_FOR_vmmaskncmpv2df3)
bd793c65
BS
14514 return ix86_expand_sse_compare (d, arglist, target);
14515
14516 return ix86_expand_binop_builtin (d->icode, arglist, target);
14517 }
14518
ca7558fc 14519 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
bd793c65
BS
14520 if (d->code == fcode)
14521 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
0f290768 14522
ca7558fc 14523 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
bd793c65
BS
14524 if (d->code == fcode)
14525 return ix86_expand_sse_comi (d, arglist, target);
0f290768 14526
bd793c65
BS
14527 /* @@@ Should really do something sensible here. */
14528 return 0;
bd793c65 14529}
4211a8fb
JH
14530
14531/* Store OPERAND to the memory after reload is completed. This means
f710504c 14532 that we can't easily use assign_stack_local. */
4211a8fb
JH
14533rtx
14534ix86_force_to_memory (mode, operand)
14535 enum machine_mode mode;
14536 rtx operand;
14537{
898d374d 14538 rtx result;
4211a8fb
JH
14539 if (!reload_completed)
14540 abort ();
898d374d
JH
14541 if (TARGET_64BIT && TARGET_RED_ZONE)
14542 {
14543 result = gen_rtx_MEM (mode,
14544 gen_rtx_PLUS (Pmode,
14545 stack_pointer_rtx,
14546 GEN_INT (-RED_ZONE_SIZE)));
14547 emit_move_insn (result, operand);
14548 }
14549 else if (TARGET_64BIT && !TARGET_RED_ZONE)
4211a8fb 14550 {
898d374d 14551 switch (mode)
4211a8fb 14552 {
898d374d
JH
14553 case HImode:
14554 case SImode:
14555 operand = gen_lowpart (DImode, operand);
14556 /* FALLTHRU */
14557 case DImode:
4211a8fb 14558 emit_insn (
898d374d
JH
14559 gen_rtx_SET (VOIDmode,
14560 gen_rtx_MEM (DImode,
14561 gen_rtx_PRE_DEC (DImode,
14562 stack_pointer_rtx)),
14563 operand));
14564 break;
14565 default:
14566 abort ();
14567 }
14568 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14569 }
14570 else
14571 {
14572 switch (mode)
14573 {
14574 case DImode:
14575 {
14576 rtx operands[2];
14577 split_di (&operand, 1, operands, operands + 1);
14578 emit_insn (
14579 gen_rtx_SET (VOIDmode,
14580 gen_rtx_MEM (SImode,
14581 gen_rtx_PRE_DEC (Pmode,
14582 stack_pointer_rtx)),
14583 operands[1]));
14584 emit_insn (
14585 gen_rtx_SET (VOIDmode,
14586 gen_rtx_MEM (SImode,
14587 gen_rtx_PRE_DEC (Pmode,
14588 stack_pointer_rtx)),
14589 operands[0]));
14590 }
14591 break;
14592 case HImode:
14593 /* It is better to store HImodes as SImodes. */
14594 if (!TARGET_PARTIAL_REG_STALL)
14595 operand = gen_lowpart (SImode, operand);
14596 /* FALLTHRU */
14597 case SImode:
4211a8fb 14598 emit_insn (
898d374d
JH
14599 gen_rtx_SET (VOIDmode,
14600 gen_rtx_MEM (GET_MODE (operand),
14601 gen_rtx_PRE_DEC (SImode,
14602 stack_pointer_rtx)),
14603 operand));
14604 break;
14605 default:
14606 abort ();
4211a8fb 14607 }
898d374d 14608 result = gen_rtx_MEM (mode, stack_pointer_rtx);
4211a8fb 14609 }
898d374d 14610 return result;
4211a8fb
JH
14611}
14612
14613/* Free operand from the memory. */
14614void
14615ix86_free_from_memory (mode)
14616 enum machine_mode mode;
14617{
898d374d
JH
14618 if (!TARGET_64BIT || !TARGET_RED_ZONE)
14619 {
14620 int size;
14621
14622 if (mode == DImode || TARGET_64BIT)
14623 size = 8;
14624 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14625 size = 2;
14626 else
14627 size = 4;
14628 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14629 to pop or add instruction if registers are available. */
14630 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14631 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14632 GEN_INT (size))));
14633 }
4211a8fb 14634}
a946dd00 14635
f84aa48a
JH
14636/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14637 QImode must go into class Q_REGS.
14638 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
892a2d68 14639 movdf to do mem-to-mem moves through integer regs. */
f84aa48a
JH
14640enum reg_class
14641ix86_preferred_reload_class (x, class)
14642 rtx x;
14643 enum reg_class class;
14644{
1877be45
JH
14645 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14646 return NO_REGS;
f84aa48a
JH
14647 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14648 {
14649 /* SSE can't load any constant directly yet. */
14650 if (SSE_CLASS_P (class))
14651 return NO_REGS;
14652 /* Floats can load 0 and 1. */
14653 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14654 {
14655 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14656 if (MAYBE_SSE_CLASS_P (class))
14657 return (reg_class_subset_p (class, GENERAL_REGS)
14658 ? GENERAL_REGS : FLOAT_REGS);
14659 else
14660 return class;
14661 }
14662 /* General regs can load everything. */
14663 if (reg_class_subset_p (class, GENERAL_REGS))
14664 return GENERAL_REGS;
14665 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14666 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14667 return NO_REGS;
14668 }
14669 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14670 return NO_REGS;
14671 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14672 return Q_REGS;
14673 return class;
14674}
14675
14676/* If we are copying between general and FP registers, we need a memory
14677 location. The same is true for SSE and MMX registers.
14678
14679 The macro can't work reliably when one of the CLASSES is class containing
14680 registers from multiple units (SSE, MMX, integer). We avoid this by never
14681 combining those units in single alternative in the machine description.
14682 Ensure that this constraint holds to avoid unexpected surprises.
14683
14684 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14685 enforce these sanity checks. */
14686int
14687ix86_secondary_memory_needed (class1, class2, mode, strict)
14688 enum reg_class class1, class2;
14689 enum machine_mode mode;
14690 int strict;
14691{
14692 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14693 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14694 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14695 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14696 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14697 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14698 {
14699 if (strict)
14700 abort ();
14701 else
14702 return 1;
14703 }
14704 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
8f62128d
JH
14705 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14706 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14707 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14708 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
f84aa48a
JH
14709}
14710/* Return the cost of moving data from a register in class CLASS1 to
a4f31c00 14711 one in class CLASS2.
f84aa48a
JH
14712
14713 It is not required that the cost always equal 2 when FROM is the same as TO;
14714 on some machines it is expensive to move between registers if they are not
14715 general registers. */
14716int
14717ix86_register_move_cost (mode, class1, class2)
14718 enum machine_mode mode;
14719 enum reg_class class1, class2;
14720{
14721 /* In case we require secondary memory, compute cost of the store followed
d631b80a
RH
14722 by load. In order to avoid bad register allocation choices, we need
14723 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14724
f84aa48a
JH
14725 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14726 {
d631b80a
RH
14727 int cost = 1;
14728
14729 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14730 MEMORY_MOVE_COST (mode, class1, 1));
14731 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14732 MEMORY_MOVE_COST (mode, class2, 1));
14733
14734 /* In case of copying from general_purpose_register we may emit multiple
14735 stores followed by single load causing memory size mismatch stall.
d1f87653 14736 Count this as arbitrarily high cost of 20. */
62415523 14737 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
d631b80a
RH
14738 cost += 20;
14739
14740 /* In the case of FP/MMX moves, the registers actually overlap, and we
14741 have to switch modes in order to treat them differently. */
14742 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14743 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14744 cost += 20;
14745
14746 return cost;
f84aa48a 14747 }
d631b80a 14748
92d0fb09 14749 /* Moves between SSE/MMX and integer unit are expensive. */
62415523
JH
14750 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14751 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
fa79946e
JH
14752 return ix86_cost->mmxsse_to_integer;
14753 if (MAYBE_FLOAT_CLASS_P (class1))
14754 return ix86_cost->fp_move;
14755 if (MAYBE_SSE_CLASS_P (class1))
14756 return ix86_cost->sse_move;
14757 if (MAYBE_MMX_CLASS_P (class1))
14758 return ix86_cost->mmx_move;
f84aa48a
JH
14759 return 2;
14760}
14761
a946dd00
JH
14762/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14763int
14764ix86_hard_regno_mode_ok (regno, mode)
14765 int regno;
14766 enum machine_mode mode;
14767{
14768 /* Flags and only flags can only hold CCmode values. */
14769 if (CC_REGNO_P (regno))
14770 return GET_MODE_CLASS (mode) == MODE_CC;
14771 if (GET_MODE_CLASS (mode) == MODE_CC
14772 || GET_MODE_CLASS (mode) == MODE_RANDOM
14773 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14774 return 0;
14775 if (FP_REGNO_P (regno))
14776 return VALID_FP_MODE_P (mode);
14777 if (SSE_REGNO_P (regno))
14778 return VALID_SSE_REG_MODE (mode);
14779 if (MMX_REGNO_P (regno))
47f339cf 14780 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
a946dd00
JH
14781 /* We handle both integer and floats in the general purpose registers.
14782 In future we should be able to handle vector modes as well. */
14783 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14784 return 0;
14785 /* Take care for QImode values - they can be in non-QI regs, but then
14786 they do cause partial register stalls. */
d2836273 14787 if (regno < 4 || mode != QImode || TARGET_64BIT)
a946dd00
JH
14788 return 1;
14789 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14790}
fa79946e
JH
14791
14792/* Return the cost of moving data of mode M between a
14793 register and memory. A value of 2 is the default; this cost is
14794 relative to those in `REGISTER_MOVE_COST'.
14795
14796 If moving between registers and memory is more expensive than
14797 between two registers, you should define this macro to express the
a4f31c00
AJ
14798 relative cost.
14799
fa79946e
JH
14800 Model also increased moving costs of QImode registers in non
14801 Q_REGS classes.
14802 */
14803int
14804ix86_memory_move_cost (mode, class, in)
14805 enum machine_mode mode;
14806 enum reg_class class;
14807 int in;
14808{
14809 if (FLOAT_CLASS_P (class))
14810 {
14811 int index;
14812 switch (mode)
14813 {
14814 case SFmode:
14815 index = 0;
14816 break;
14817 case DFmode:
14818 index = 1;
14819 break;
14820 case XFmode:
14821 case TFmode:
14822 index = 2;
14823 break;
14824 default:
14825 return 100;
14826 }
14827 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14828 }
14829 if (SSE_CLASS_P (class))
14830 {
14831 int index;
14832 switch (GET_MODE_SIZE (mode))
14833 {
14834 case 4:
14835 index = 0;
14836 break;
14837 case 8:
14838 index = 1;
14839 break;
14840 case 16:
14841 index = 2;
14842 break;
14843 default:
14844 return 100;
14845 }
14846 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14847 }
14848 if (MMX_CLASS_P (class))
14849 {
14850 int index;
14851 switch (GET_MODE_SIZE (mode))
14852 {
14853 case 4:
14854 index = 0;
14855 break;
14856 case 8:
14857 index = 1;
14858 break;
14859 default:
14860 return 100;
14861 }
14862 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14863 }
14864 switch (GET_MODE_SIZE (mode))
14865 {
14866 case 1:
14867 if (in)
14868 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14869 : ix86_cost->movzbl_load);
14870 else
14871 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14872 : ix86_cost->int_store[0] + 4);
14873 break;
14874 case 2:
14875 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14876 default:
14877 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14878 if (mode == TFmode)
14879 mode = XFmode;
3bb7e126 14880 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
d09e61b9
JH
14881 * ((int) GET_MODE_SIZE (mode)
14882 + UNITS_PER_WORD -1 ) / UNITS_PER_WORD);
fa79946e
JH
14883 }
14884}
0ecf09f9 14885
3c50106f
RH
14886/* Compute a (partial) cost for rtx X. Return true if the complete
14887 cost has been computed, and false if subexpressions should be
14888 scanned. In either case, *TOTAL contains the cost result. */
14889
14890static bool
14891ix86_rtx_costs (x, code, outer_code, total)
14892 rtx x;
14893 int code, outer_code;
14894 int *total;
14895{
14896 enum machine_mode mode = GET_MODE (x);
14897
14898 switch (code)
14899 {
14900 case CONST_INT:
14901 case CONST:
14902 case LABEL_REF:
14903 case SYMBOL_REF:
14904 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
14905 *total = 3;
14906 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
14907 *total = 2;
14908 else if (flag_pic && SYMBOLIC_CONST (x))
14909 *total = 1;
14910 else
14911 *total = 0;
14912 return true;
14913
14914 case CONST_DOUBLE:
14915 if (mode == VOIDmode)
14916 *total = 0;
14917 else
14918 switch (standard_80387_constant_p (x))
14919 {
14920 case 1: /* 0.0 */
14921 *total = 1;
14922 break;
881b2a96 14923 default: /* Other constants */
3c50106f
RH
14924 *total = 2;
14925 break;
881b2a96
RS
14926 case 0:
14927 case -1:
3c50106f
RH
14928 /* Start with (MEM (SYMBOL_REF)), since that's where
14929 it'll probably end up. Add a penalty for size. */
14930 *total = (COSTS_N_INSNS (1)
14931 + (flag_pic != 0)
14932 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14933 break;
14934 }
14935 return true;
14936
14937 case ZERO_EXTEND:
14938 /* The zero extensions is often completely free on x86_64, so make
14939 it as cheap as possible. */
14940 if (TARGET_64BIT && mode == DImode
14941 && GET_MODE (XEXP (x, 0)) == SImode)
14942 *total = 1;
14943 else if (TARGET_ZERO_EXTEND_WITH_AND)
14944 *total = COSTS_N_INSNS (ix86_cost->add);
14945 else
14946 *total = COSTS_N_INSNS (ix86_cost->movzx);
14947 return false;
14948
14949 case SIGN_EXTEND:
14950 *total = COSTS_N_INSNS (ix86_cost->movsx);
14951 return false;
14952
14953 case ASHIFT:
14954 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14955 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14956 {
14957 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14958 if (value == 1)
14959 {
14960 *total = COSTS_N_INSNS (ix86_cost->add);
14961 return false;
14962 }
14963 if ((value == 2 || value == 3)
14964 && !TARGET_DECOMPOSE_LEA
14965 && ix86_cost->lea <= ix86_cost->shift_const)
14966 {
14967 *total = COSTS_N_INSNS (ix86_cost->lea);
14968 return false;
14969 }
14970 }
14971 /* FALLTHRU */
14972
14973 case ROTATE:
14974 case ASHIFTRT:
14975 case LSHIFTRT:
14976 case ROTATERT:
14977 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
14978 {
14979 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14980 {
14981 if (INTVAL (XEXP (x, 1)) > 32)
14982 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
14983 else
14984 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
14985 }
14986 else
14987 {
14988 if (GET_CODE (XEXP (x, 1)) == AND)
14989 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
14990 else
14991 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
14992 }
14993 }
14994 else
14995 {
14996 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14997 *total = COSTS_N_INSNS (ix86_cost->shift_const);
14998 else
14999 *total = COSTS_N_INSNS (ix86_cost->shift_var);
15000 }
15001 return false;
15002
15003 case MULT:
15004 if (FLOAT_MODE_P (mode))
15005 *total = COSTS_N_INSNS (ix86_cost->fmul);
15006 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15007 {
15008 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15009 int nbits;
15010
15011 for (nbits = 0; value != 0; value >>= 1)
15012 nbits++;
15013
15014 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15015 + nbits * ix86_cost->mult_bit);
15016 }
15017 else
15018 {
15019 /* This is arbitrary */
15020 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15021 + 7 * ix86_cost->mult_bit);
15022 }
15023 return false;
15024
15025 case DIV:
15026 case UDIV:
15027 case MOD:
15028 case UMOD:
15029 if (FLOAT_MODE_P (mode))
15030 *total = COSTS_N_INSNS (ix86_cost->fdiv);
15031 else
15032 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15033 return false;
15034
15035 case PLUS:
15036 if (FLOAT_MODE_P (mode))
15037 *total = COSTS_N_INSNS (ix86_cost->fadd);
15038 else if (!TARGET_DECOMPOSE_LEA
15039 && GET_MODE_CLASS (mode) == MODE_INT
15040 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15041 {
15042 if (GET_CODE (XEXP (x, 0)) == PLUS
15043 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15044 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15045 && CONSTANT_P (XEXP (x, 1)))
15046 {
15047 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15048 if (val == 2 || val == 4 || val == 8)
15049 {
15050 *total = COSTS_N_INSNS (ix86_cost->lea);
15051 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15052 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15053 outer_code);
15054 *total += rtx_cost (XEXP (x, 1), outer_code);
15055 return true;
15056 }
15057 }
15058 else if (GET_CODE (XEXP (x, 0)) == MULT
15059 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15060 {
15061 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15062 if (val == 2 || val == 4 || val == 8)
15063 {
15064 *total = COSTS_N_INSNS (ix86_cost->lea);
15065 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15066 *total += rtx_cost (XEXP (x, 1), outer_code);
15067 return true;
15068 }
15069 }
15070 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15071 {
15072 *total = COSTS_N_INSNS (ix86_cost->lea);
15073 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15074 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15075 *total += rtx_cost (XEXP (x, 1), outer_code);
15076 return true;
15077 }
15078 }
15079 /* FALLTHRU */
15080
15081 case MINUS:
15082 if (FLOAT_MODE_P (mode))
15083 {
15084 *total = COSTS_N_INSNS (ix86_cost->fadd);
15085 return false;
15086 }
15087 /* FALLTHRU */
15088
15089 case AND:
15090 case IOR:
15091 case XOR:
15092 if (!TARGET_64BIT && mode == DImode)
15093 {
15094 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15095 + (rtx_cost (XEXP (x, 0), outer_code)
15096 << (GET_MODE (XEXP (x, 0)) != DImode))
15097 + (rtx_cost (XEXP (x, 1), outer_code)
15098 << (GET_MODE (XEXP (x, 1)) != DImode)));
15099 return true;
15100 }
15101 /* FALLTHRU */
15102
15103 case NEG:
15104 if (FLOAT_MODE_P (mode))
15105 {
15106 *total = COSTS_N_INSNS (ix86_cost->fchs);
15107 return false;
15108 }
15109 /* FALLTHRU */
15110
15111 case NOT:
15112 if (!TARGET_64BIT && mode == DImode)
15113 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15114 else
15115 *total = COSTS_N_INSNS (ix86_cost->add);
15116 return false;
15117
15118 case FLOAT_EXTEND:
15119 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
15120 *total = 0;
15121 return false;
15122
15123 case ABS:
15124 if (FLOAT_MODE_P (mode))
15125 *total = COSTS_N_INSNS (ix86_cost->fabs);
15126 return false;
15127
15128 case SQRT:
15129 if (FLOAT_MODE_P (mode))
15130 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15131 return false;
15132
15133 default:
15134 return false;
15135 }
15136}
15137
21c318ba 15138#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
2cc07db4
RH
15139static void
15140ix86_svr3_asm_out_constructor (symbol, priority)
15141 rtx symbol;
15142 int priority ATTRIBUTE_UNUSED;
15143{
15144 init_section ();
15145 fputs ("\tpushl $", asm_out_file);
15146 assemble_name (asm_out_file, XSTR (symbol, 0));
15147 fputc ('\n', asm_out_file);
15148}
15149#endif
162f023b 15150
b069de3b
SS
15151#if TARGET_MACHO
15152
15153static int current_machopic_label_num;
15154
15155/* Given a symbol name and its associated stub, write out the
15156 definition of the stub. */
15157
15158void
15159machopic_output_stub (file, symb, stub)
15160 FILE *file;
15161 const char *symb, *stub;
15162{
15163 unsigned int length;
15164 char *binder_name, *symbol_name, lazy_ptr_name[32];
15165 int label = ++current_machopic_label_num;
15166
15167 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15168 symb = (*targetm.strip_name_encoding) (symb);
15169
15170 length = strlen (stub);
15171 binder_name = alloca (length + 32);
15172 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15173
15174 length = strlen (symb);
15175 symbol_name = alloca (length + 32);
15176 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15177
15178 sprintf (lazy_ptr_name, "L%d$lz", label);
15179
15180 if (MACHOPIC_PURE)
15181 machopic_picsymbol_stub_section ();
15182 else
15183 machopic_symbol_stub_section ();
15184
15185 fprintf (file, "%s:\n", stub);
15186 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15187
15188 if (MACHOPIC_PURE)
15189 {
15190 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15191 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15192 fprintf (file, "\tjmp %%edx\n");
15193 }
15194 else
15195 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15196
15197 fprintf (file, "%s:\n", binder_name);
15198
15199 if (MACHOPIC_PURE)
15200 {
15201 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15202 fprintf (file, "\tpushl %%eax\n");
15203 }
15204 else
15205 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15206
15207 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15208
15209 machopic_lazy_symbol_ptr_section ();
15210 fprintf (file, "%s:\n", lazy_ptr_name);
15211 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15212 fprintf (file, "\t.long %s\n", binder_name);
15213}
15214#endif /* TARGET_MACHO */
15215
162f023b
JH
15216/* Order the registers for register allocator. */
15217
15218void
15219x86_order_regs_for_local_alloc ()
15220{
15221 int pos = 0;
15222 int i;
15223
15224 /* First allocate the local general purpose registers. */
15225 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15226 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15227 reg_alloc_order [pos++] = i;
15228
15229 /* Global general purpose registers. */
15230 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15231 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15232 reg_alloc_order [pos++] = i;
15233
15234 /* x87 registers come first in case we are doing FP math
15235 using them. */
15236 if (!TARGET_SSE_MATH)
15237 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15238 reg_alloc_order [pos++] = i;
fce5a9f2 15239
162f023b
JH
15240 /* SSE registers. */
15241 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15242 reg_alloc_order [pos++] = i;
15243 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15244 reg_alloc_order [pos++] = i;
15245
d1f87653 15246 /* x87 registers. */
162f023b
JH
15247 if (TARGET_SSE_MATH)
15248 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15249 reg_alloc_order [pos++] = i;
15250
15251 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15252 reg_alloc_order [pos++] = i;
15253
15254 /* Initialize the rest of array as we do not allocate some registers
15255 at all. */
15256 while (pos < FIRST_PSEUDO_REGISTER)
15257 reg_alloc_order [pos++] = 0;
15258}
194734e9 15259
4977bab6
ZW
15260#ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15261#define TARGET_USE_MS_BITFIELD_LAYOUT 0
15262#endif
15263
fe77449a
DR
15264/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15265 struct attribute_spec.handler. */
15266static tree
15267ix86_handle_struct_attribute (node, name, args, flags, no_add_attrs)
15268 tree *node;
15269 tree name;
15270 tree args ATTRIBUTE_UNUSED;
15271 int flags ATTRIBUTE_UNUSED;
15272 bool *no_add_attrs;
15273{
15274 tree *type = NULL;
15275 if (DECL_P (*node))
15276 {
15277 if (TREE_CODE (*node) == TYPE_DECL)
15278 type = &TREE_TYPE (*node);
15279 }
15280 else
15281 type = node;
15282
15283 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15284 || TREE_CODE (*type) == UNION_TYPE)))
15285 {
15286 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15287 *no_add_attrs = true;
15288 }
15289
15290 else if ((is_attribute_p ("ms_struct", name)
15291 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15292 || ((is_attribute_p ("gcc_struct", name)
15293 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15294 {
15295 warning ("`%s' incompatible attribute ignored",
15296 IDENTIFIER_POINTER (name));
15297 *no_add_attrs = true;
15298 }
15299
15300 return NULL_TREE;
15301}
15302
4977bab6
ZW
15303static bool
15304ix86_ms_bitfield_layout_p (record_type)
fe77449a 15305 tree record_type;
4977bab6 15306{
fe77449a
DR
15307 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15308 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15309 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
4977bab6
ZW
15310}
15311
483ab821
MM
15312/* Returns an expression indicating where the this parameter is
15313 located on entry to the FUNCTION. */
15314
15315static rtx
3961e8fe 15316x86_this_parameter (function)
483ab821
MM
15317 tree function;
15318{
15319 tree type = TREE_TYPE (function);
15320
3961e8fe
RH
15321 if (TARGET_64BIT)
15322 {
15323 int n = aggregate_value_p (TREE_TYPE (type)) != 0;
15324 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15325 }
15326
483ab821
MM
15327 if (ix86_fntype_regparm (type) > 0)
15328 {
15329 tree parm;
15330
15331 parm = TYPE_ARG_TYPES (type);
15332 /* Figure out whether or not the function has a variable number of
15333 arguments. */
3961e8fe 15334 for (; parm; parm = TREE_CHAIN (parm))
483ab821
MM
15335 if (TREE_VALUE (parm) == void_type_node)
15336 break;
15337 /* If not, the this parameter is in %eax. */
15338 if (parm)
15339 return gen_rtx_REG (SImode, 0);
15340 }
15341
15342 if (aggregate_value_p (TREE_TYPE (type)))
15343 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15344 else
15345 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15346}
15347
3961e8fe
RH
15348/* Determine whether x86_output_mi_thunk can succeed. */
15349
15350static bool
15351x86_can_output_mi_thunk (thunk, delta, vcall_offset, function)
15352 tree thunk ATTRIBUTE_UNUSED;
15353 HOST_WIDE_INT delta ATTRIBUTE_UNUSED;
15354 HOST_WIDE_INT vcall_offset;
15355 tree function;
15356{
15357 /* 64-bit can handle anything. */
15358 if (TARGET_64BIT)
15359 return true;
15360
15361 /* For 32-bit, everything's fine if we have one free register. */
15362 if (ix86_fntype_regparm (TREE_TYPE (function)) < 3)
15363 return true;
15364
15365 /* Need a free register for vcall_offset. */
15366 if (vcall_offset)
15367 return false;
15368
15369 /* Need a free register for GOT references. */
15370 if (flag_pic && !(*targetm.binds_local_p) (function))
15371 return false;
15372
15373 /* Otherwise ok. */
15374 return true;
15375}
15376
15377/* Output the assembler code for a thunk function. THUNK_DECL is the
15378 declaration for the thunk function itself, FUNCTION is the decl for
15379 the target function. DELTA is an immediate constant offset to be
272d0bee 15380 added to THIS. If VCALL_OFFSET is nonzero, the word at
3961e8fe 15381 *(*this + vcall_offset) should be added to THIS. */
483ab821 15382
c590b625 15383static void
3961e8fe
RH
15384x86_output_mi_thunk (file, thunk, delta, vcall_offset, function)
15385 FILE *file ATTRIBUTE_UNUSED;
483ab821 15386 tree thunk ATTRIBUTE_UNUSED;
eb0424da 15387 HOST_WIDE_INT delta;
3961e8fe 15388 HOST_WIDE_INT vcall_offset;
194734e9
JH
15389 tree function;
15390{
194734e9 15391 rtx xops[3];
3961e8fe
RH
15392 rtx this = x86_this_parameter (function);
15393 rtx this_reg, tmp;
194734e9 15394
3961e8fe
RH
15395 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15396 pull it in now and let DELTA benefit. */
15397 if (REG_P (this))
15398 this_reg = this;
15399 else if (vcall_offset)
15400 {
15401 /* Put the this parameter into %eax. */
15402 xops[0] = this;
15403 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15404 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15405 }
15406 else
15407 this_reg = NULL_RTX;
15408
15409 /* Adjust the this parameter by a fixed constant. */
15410 if (delta)
194734e9 15411 {
483ab821 15412 xops[0] = GEN_INT (delta);
3961e8fe
RH
15413 xops[1] = this_reg ? this_reg : this;
15414 if (TARGET_64BIT)
194734e9 15415 {
3961e8fe
RH
15416 if (!x86_64_general_operand (xops[0], DImode))
15417 {
15418 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15419 xops[1] = tmp;
15420 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15421 xops[0] = tmp;
15422 xops[1] = this;
15423 }
15424 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
194734e9
JH
15425 }
15426 else
3961e8fe 15427 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
194734e9 15428 }
3961e8fe
RH
15429
15430 /* Adjust the this parameter by a value stored in the vtable. */
15431 if (vcall_offset)
194734e9 15432 {
3961e8fe
RH
15433 if (TARGET_64BIT)
15434 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15435 else
15436 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
483ab821 15437
3961e8fe
RH
15438 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15439 xops[1] = tmp;
15440 if (TARGET_64BIT)
15441 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15442 else
15443 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
483ab821 15444
3961e8fe
RH
15445 /* Adjust the this parameter. */
15446 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15447 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15448 {
15449 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15450 xops[0] = GEN_INT (vcall_offset);
15451 xops[1] = tmp2;
15452 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15453 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
483ab821 15454 }
3961e8fe
RH
15455 xops[1] = this_reg;
15456 if (TARGET_64BIT)
15457 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15458 else
15459 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15460 }
194734e9 15461
3961e8fe
RH
15462 /* If necessary, drop THIS back to its stack slot. */
15463 if (this_reg && this_reg != this)
15464 {
15465 xops[0] = this_reg;
15466 xops[1] = this;
15467 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15468 }
194734e9 15469
3961e8fe
RH
15470 xops[0] = DECL_RTL (function);
15471 if (TARGET_64BIT)
15472 {
15473 if (!flag_pic || (*targetm.binds_local_p) (function))
15474 output_asm_insn ("jmp\t%P0", xops);
15475 else
fcbe3b89
RH
15476 {
15477 tmp = XEXP (xops[0], 0);
15478 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, tmp), UNSPEC_GOTPCREL);
15479 tmp = gen_rtx_CONST (Pmode, tmp);
15480 tmp = gen_rtx_MEM (QImode, tmp);
15481 xops[0] = tmp;
15482 output_asm_insn ("jmp\t%A0", xops);
15483 }
3961e8fe
RH
15484 }
15485 else
15486 {
15487 if (!flag_pic || (*targetm.binds_local_p) (function))
15488 output_asm_insn ("jmp\t%P0", xops);
194734e9 15489 else
21ff35fb 15490#if TARGET_MACHO
095fa594
SH
15491 if (TARGET_MACHO)
15492 {
15493 char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15494 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15495 tmp = gen_rtx_MEM (QImode, tmp);
15496 xops[0] = tmp;
15497 output_asm_insn ("jmp\t%0", xops);
15498 }
15499 else
15500#endif /* TARGET_MACHO */
194734e9 15501 {
3961e8fe
RH
15502 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15503 output_set_got (tmp);
15504
15505 xops[1] = tmp;
15506 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15507 output_asm_insn ("jmp\t{*}%1", xops);
194734e9
JH
15508 }
15509 }
15510}
e2500fed 15511
e932b21b
JH
15512int
15513x86_field_alignment (field, computed)
15514 tree field;
15515 int computed;
15516{
15517 enum machine_mode mode;
ad9335eb
JJ
15518 tree type = TREE_TYPE (field);
15519
15520 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
e932b21b 15521 return computed;
ad9335eb
JJ
15522 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15523 ? get_inner_array_type (type) : type);
39e3a681
JJ
15524 if (mode == DFmode || mode == DCmode
15525 || GET_MODE_CLASS (mode) == MODE_INT
15526 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
e932b21b
JH
15527 return MIN (32, computed);
15528 return computed;
15529}
15530
a5fa1ecd
JH
15531/* Output assembler code to FILE to increment profiler label # LABELNO
15532 for profiling a function entry. */
15533void
15534x86_function_profiler (file, labelno)
15535 FILE *file;
b9b21a05 15536 int labelno ATTRIBUTE_UNUSED;
a5fa1ecd
JH
15537{
15538 if (TARGET_64BIT)
15539 if (flag_pic)
15540 {
15541#ifndef NO_PROFILE_COUNTERS
15542 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15543#endif
15544 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15545 }
15546 else
15547 {
15548#ifndef NO_PROFILE_COUNTERS
15549 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15550#endif
15551 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15552 }
15553 else if (flag_pic)
15554 {
15555#ifndef NO_PROFILE_COUNTERS
15556 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15557 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15558#endif
15559 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15560 }
15561 else
15562 {
15563#ifndef NO_PROFILE_COUNTERS
ff6e2d3e 15564 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
a5fa1ecd
JH
15565 PROFILE_COUNT_REGISTER);
15566#endif
15567 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15568 }
15569}
15570
2a500b9e
JH
15571/* Implement machine specific optimizations.
15572 At the moment we implement single transformation: AMD Athlon works faster
d1f87653 15573 when RET is not destination of conditional jump or directly preceded
2a500b9e
JH
15574 by other jump instruction. We avoid the penalty by inserting NOP just
15575 before the RET instructions in such cases. */
15576void
15577x86_machine_dependent_reorg (first)
15578 rtx first ATTRIBUTE_UNUSED;
15579{
15580 edge e;
15581
4977bab6 15582 if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
2a500b9e
JH
15583 return;
15584 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15585 {
15586 basic_block bb = e->src;
15587 rtx ret = bb->end;
15588 rtx prev;
15589 bool insert = false;
15590
15591 if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
15592 continue;
4977bab6
ZW
15593 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15594 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15595 break;
2a500b9e
JH
15596 if (prev && GET_CODE (prev) == CODE_LABEL)
15597 {
15598 edge e;
15599 for (e = bb->pred; e; e = e->pred_next)
4977bab6 15600 if (EDGE_FREQUENCY (e) && e->src->index >= 0
2a500b9e
JH
15601 && !(e->flags & EDGE_FALLTHRU))
15602 insert = 1;
15603 }
15604 if (!insert)
15605 {
4977bab6 15606 prev = prev_active_insn (ret);
2a500b9e
JH
15607 if (prev && GET_CODE (prev) == JUMP_INSN
15608 && any_condjump_p (prev))
15609 insert = 1;
4977bab6
ZW
15610 /* Empty functions get branch misspredict even when the jump destination
15611 is not visible to us. */
15612 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15613 insert = 1;
2a500b9e
JH
15614 }
15615 if (insert)
15616 emit_insn_before (gen_nop (), ret);
15617 }
15618}
15619
4977bab6
ZW
15620/* Return nonzero when QImode register that must be represented via REX prefix
15621 is used. */
15622bool
15623x86_extended_QIreg_mentioned_p (insn)
15624 rtx insn;
15625{
15626 int i;
15627 extract_insn_cached (insn);
15628 for (i = 0; i < recog_data.n_operands; i++)
15629 if (REG_P (recog_data.operand[i])
15630 && REGNO (recog_data.operand[i]) >= 4)
15631 return true;
15632 return false;
15633}
15634
15635/* Return nonzero when P points to register encoded via REX prefix.
15636 Called via for_each_rtx. */
15637static int
15638extended_reg_mentioned_1 (p, data)
15639 rtx *p;
15640 void *data ATTRIBUTE_UNUSED;
15641{
15642 unsigned int regno;
15643 if (!REG_P (*p))
15644 return 0;
15645 regno = REGNO (*p);
15646 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15647}
15648
15649/* Return true when INSN mentions register that must be encoded using REX
15650 prefix. */
15651bool
15652x86_extended_reg_mentioned_p (insn)
15653 rtx insn;
15654{
15655 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15656}
15657
8d705469
JH
15658/* Generate an unsigned DImode to FP conversion. This is the same code
15659 optabs would emit if we didn't have TFmode patterns. */
15660
15661void
15662x86_emit_floatuns (operands)
15663 rtx operands[2];
15664{
15665 rtx neglab, donelab, i0, i1, f0, in, out;
15666 enum machine_mode mode;
15667
15668 out = operands[0];
15669 in = force_reg (DImode, operands[1]);
15670 mode = GET_MODE (out);
15671 neglab = gen_label_rtx ();
15672 donelab = gen_label_rtx ();
15673 i1 = gen_reg_rtx (Pmode);
15674 f0 = gen_reg_rtx (mode);
15675
15676 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15677
15678 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15679 emit_jump_insn (gen_jump (donelab));
15680 emit_barrier ();
15681
15682 emit_label (neglab);
15683
15684 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15685 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15686 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15687 expand_float (f0, i0, 0);
15688 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15689
15690 emit_label (donelab);
15691}
15692
dafc5b82
JH
15693/* Return if we do not know how to pass TYPE solely in registers. */
15694bool
15695ix86_must_pass_in_stack (mode, type)
15696 enum machine_mode mode;
15697 tree type;
15698{
15699 if (default_must_pass_in_stack (mode, type))
15700 return true;
15701 return (!TARGET_64BIT && type && mode == TImode);
15702}
15703
e2500fed 15704#include "gt-i386.h"
This page took 4.149351 seconds and 5 git commands to generate.