]> gcc.gnu.org Git - gcc.git/blame - gcc/config/i386/i386.c
langhooks.h (builtin_function): New langhook.
[gcc.git] / gcc / config / i386 / i386.c
CommitLineData
e075ae69 1/* Subroutines used for code generation on IA-32.
07933f72 2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
e129d93a 3 2002, 2003, 2004 Free Software Foundation, Inc.
2a2ab3f9 4
188fc5b5 5This file is part of GCC.
2a2ab3f9 6
188fc5b5 7GCC is free software; you can redistribute it and/or modify
2a2ab3f9
JVA
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
188fc5b5 12GCC is distributed in the hope that it will be useful,
2a2ab3f9
JVA
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
188fc5b5 18along with GCC; see the file COPYING. If not, write to
97aadbb9 19the Free Software Foundation, 59 Temple Place - Suite 330,
0f290768 20Boston, MA 02111-1307, USA. */
2a2ab3f9 21
2a2ab3f9 22#include "config.h"
bb5177ac 23#include "system.h"
4977bab6
ZW
24#include "coretypes.h"
25#include "tm.h"
2a2ab3f9 26#include "rtl.h"
6baf1cc8
BS
27#include "tree.h"
28#include "tm_p.h"
2a2ab3f9
JVA
29#include "regs.h"
30#include "hard-reg-set.h"
31#include "real.h"
32#include "insn-config.h"
33#include "conditions.h"
2a2ab3f9 34#include "output.h"
8bc527af 35#include "insn-codes.h"
2a2ab3f9 36#include "insn-attr.h"
2a2ab3f9 37#include "flags.h"
a8ffcc81 38#include "except.h"
ecbc4695 39#include "function.h"
00c79232 40#include "recog.h"
ced8dd8c 41#include "expr.h"
e78d8e51 42#include "optabs.h"
f103890b 43#include "toplev.h"
e075ae69 44#include "basic-block.h"
1526a060 45#include "ggc.h"
672a6f42
NB
46#include "target.h"
47#include "target-def.h"
f1e639b1 48#include "langhooks.h"
dafc5b82 49#include "cgraph.h"
cd3ce9b4 50#include "tree-gimple.h"
2a2ab3f9 51
8dfe5673 52#ifndef CHECK_STACK_LIMIT
07933f72 53#define CHECK_STACK_LIMIT (-1)
8dfe5673
RK
54#endif
55
3c50106f
RH
56/* Return index of given mode in mult and division cost tables. */
57#define MODE_INDEX(mode) \
58 ((mode) == QImode ? 0 \
59 : (mode) == HImode ? 1 \
60 : (mode) == SImode ? 2 \
61 : (mode) == DImode ? 3 \
62 : 4)
63
2ab0437e 64/* Processor costs (relative to an add) */
fce5a9f2 65static const
2ab0437e
JH
66struct processor_costs size_cost = { /* costs for tunning for size */
67 2, /* cost of an add instruction */
68 3, /* cost of a lea instruction */
69 2, /* variable shift costs */
70 3, /* constant shift costs */
4977bab6 71 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
2ab0437e 72 0, /* cost of multiply per each bit set */
4977bab6 73 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
44cf5b6a
JH
74 3, /* cost of movsx */
75 3, /* cost of movzx */
2ab0437e
JH
76 0, /* "large" insn */
77 2, /* MOVE_RATIO */
78 2, /* cost for loading QImode using movzbl */
79 {2, 2, 2}, /* cost of loading integer registers
80 in QImode, HImode and SImode.
81 Relative to reg-reg move (2). */
82 {2, 2, 2}, /* cost of storing integer registers */
83 2, /* cost of reg,reg fld/fst */
84 {2, 2, 2}, /* cost of loading fp registers
85 in SFmode, DFmode and XFmode */
86 {2, 2, 2}, /* cost of loading integer registers */
87 3, /* cost of moving MMX register */
88 {3, 3}, /* cost of loading MMX registers
89 in SImode and DImode */
90 {3, 3}, /* cost of storing MMX registers
91 in SImode and DImode */
92 3, /* cost of moving SSE register */
93 {3, 3, 3}, /* cost of loading SSE registers
94 in SImode, DImode and TImode */
95 {3, 3, 3}, /* cost of storing SSE registers
96 in SImode, DImode and TImode */
97 3, /* MMX or SSE register to integer */
f4365627
JH
98 0, /* size of prefetch block */
99 0, /* number of parallel prefetches */
4977bab6 100 1, /* Branch cost */
229b303a
RS
101 2, /* cost of FADD and FSUB insns. */
102 2, /* cost of FMUL instruction. */
103 2, /* cost of FDIV instruction. */
104 2, /* cost of FABS instruction. */
105 2, /* cost of FCHS instruction. */
106 2, /* cost of FSQRT instruction. */
2ab0437e 107};
229b303a 108
32b5b1aa 109/* Processor costs (relative to an add) */
fce5a9f2 110static const
32b5b1aa 111struct processor_costs i386_cost = { /* 386 specific costs */
e9a25f70 112 1, /* cost of an add instruction */
32b5b1aa
SC
113 1, /* cost of a lea instruction */
114 3, /* variable shift costs */
115 2, /* constant shift costs */
4977bab6 116 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
32b5b1aa 117 1, /* cost of multiply per each bit set */
4977bab6 118 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
44cf5b6a
JH
119 3, /* cost of movsx */
120 2, /* cost of movzx */
96e7ae40 121 15, /* "large" insn */
e2e52e1b 122 3, /* MOVE_RATIO */
7c6b971d 123 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
124 {2, 4, 2}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
0f290768 126 Relative to reg-reg move (2). */
96e7ae40
JH
127 {2, 4, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {8, 8, 8}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
fa79946e
JH
131 {8, 8, 8}, /* cost of loading integer registers */
132 2, /* cost of moving MMX register */
133 {4, 8}, /* cost of loading MMX registers
134 in SImode and DImode */
135 {4, 8}, /* cost of storing MMX registers
136 in SImode and DImode */
137 2, /* cost of moving SSE register */
138 {4, 8, 16}, /* cost of loading SSE registers
139 in SImode, DImode and TImode */
140 {4, 8, 16}, /* cost of storing SSE registers
141 in SImode, DImode and TImode */
142 3, /* MMX or SSE register to integer */
f4365627
JH
143 0, /* size of prefetch block */
144 0, /* number of parallel prefetches */
4977bab6 145 1, /* Branch cost */
229b303a
RS
146 23, /* cost of FADD and FSUB insns. */
147 27, /* cost of FMUL instruction. */
148 88, /* cost of FDIV instruction. */
149 22, /* cost of FABS instruction. */
150 24, /* cost of FCHS instruction. */
151 122, /* cost of FSQRT instruction. */
32b5b1aa
SC
152};
153
fce5a9f2 154static const
32b5b1aa
SC
155struct processor_costs i486_cost = { /* 486 specific costs */
156 1, /* cost of an add instruction */
157 1, /* cost of a lea instruction */
158 3, /* variable shift costs */
159 2, /* constant shift costs */
4977bab6 160 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
32b5b1aa 161 1, /* cost of multiply per each bit set */
4977bab6 162 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
44cf5b6a
JH
163 3, /* cost of movsx */
164 2, /* cost of movzx */
96e7ae40 165 15, /* "large" insn */
e2e52e1b 166 3, /* MOVE_RATIO */
7c6b971d 167 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
0f290768 170 Relative to reg-reg move (2). */
96e7ae40
JH
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
fa79946e
JH
175 {8, 8, 8}, /* cost of loading integer registers */
176 2, /* cost of moving MMX register */
177 {4, 8}, /* cost of loading MMX registers
178 in SImode and DImode */
179 {4, 8}, /* cost of storing MMX registers
180 in SImode and DImode */
181 2, /* cost of moving SSE register */
182 {4, 8, 16}, /* cost of loading SSE registers
183 in SImode, DImode and TImode */
184 {4, 8, 16}, /* cost of storing SSE registers
185 in SImode, DImode and TImode */
f4365627
JH
186 3, /* MMX or SSE register to integer */
187 0, /* size of prefetch block */
188 0, /* number of parallel prefetches */
4977bab6 189 1, /* Branch cost */
229b303a
RS
190 8, /* cost of FADD and FSUB insns. */
191 16, /* cost of FMUL instruction. */
192 73, /* cost of FDIV instruction. */
193 3, /* cost of FABS instruction. */
194 3, /* cost of FCHS instruction. */
195 83, /* cost of FSQRT instruction. */
32b5b1aa
SC
196};
197
fce5a9f2 198static const
e5cb57e8 199struct processor_costs pentium_cost = {
32b5b1aa
SC
200 1, /* cost of an add instruction */
201 1, /* cost of a lea instruction */
856b07a1 202 4, /* variable shift costs */
e5cb57e8 203 1, /* constant shift costs */
4977bab6 204 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
856b07a1 205 0, /* cost of multiply per each bit set */
4977bab6 206 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
44cf5b6a
JH
207 3, /* cost of movsx */
208 2, /* cost of movzx */
96e7ae40 209 8, /* "large" insn */
e2e52e1b 210 6, /* MOVE_RATIO */
7c6b971d 211 6, /* cost for loading QImode using movzbl */
96e7ae40
JH
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
0f290768 214 Relative to reg-reg move (2). */
96e7ae40
JH
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {2, 2, 6}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
fa79946e
JH
219 {4, 4, 6}, /* cost of loading integer registers */
220 8, /* cost of moving MMX register */
221 {8, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {8, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
f4365627
JH
230 3, /* MMX or SSE register to integer */
231 0, /* size of prefetch block */
232 0, /* number of parallel prefetches */
4977bab6 233 2, /* Branch cost */
229b303a
RS
234 3, /* cost of FADD and FSUB insns. */
235 3, /* cost of FMUL instruction. */
236 39, /* cost of FDIV instruction. */
237 1, /* cost of FABS instruction. */
238 1, /* cost of FCHS instruction. */
239 70, /* cost of FSQRT instruction. */
32b5b1aa
SC
240};
241
fce5a9f2 242static const
856b07a1
SC
243struct processor_costs pentiumpro_cost = {
244 1, /* cost of an add instruction */
245 1, /* cost of a lea instruction */
e075ae69 246 1, /* variable shift costs */
856b07a1 247 1, /* constant shift costs */
4977bab6 248 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
856b07a1 249 0, /* cost of multiply per each bit set */
4977bab6 250 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
44cf5b6a
JH
251 1, /* cost of movsx */
252 1, /* cost of movzx */
96e7ae40 253 8, /* "large" insn */
e2e52e1b 254 6, /* MOVE_RATIO */
7c6b971d 255 2, /* cost for loading QImode using movzbl */
96e7ae40
JH
256 {4, 4, 4}, /* cost of loading integer registers
257 in QImode, HImode and SImode.
0f290768 258 Relative to reg-reg move (2). */
96e7ae40
JH
259 {2, 2, 2}, /* cost of storing integer registers */
260 2, /* cost of reg,reg fld/fst */
261 {2, 2, 6}, /* cost of loading fp registers
262 in SFmode, DFmode and XFmode */
fa79946e
JH
263 {4, 4, 6}, /* cost of loading integer registers */
264 2, /* cost of moving MMX register */
265 {2, 2}, /* cost of loading MMX registers
266 in SImode and DImode */
267 {2, 2}, /* cost of storing MMX registers
268 in SImode and DImode */
269 2, /* cost of moving SSE register */
270 {2, 2, 8}, /* cost of loading SSE registers
271 in SImode, DImode and TImode */
272 {2, 2, 8}, /* cost of storing SSE registers
273 in SImode, DImode and TImode */
f4365627
JH
274 3, /* MMX or SSE register to integer */
275 32, /* size of prefetch block */
276 6, /* number of parallel prefetches */
4977bab6 277 2, /* Branch cost */
229b303a
RS
278 3, /* cost of FADD and FSUB insns. */
279 5, /* cost of FMUL instruction. */
280 56, /* cost of FDIV instruction. */
281 2, /* cost of FABS instruction. */
282 2, /* cost of FCHS instruction. */
283 56, /* cost of FSQRT instruction. */
856b07a1
SC
284};
285
fce5a9f2 286static const
a269a03c
JC
287struct processor_costs k6_cost = {
288 1, /* cost of an add instruction */
e075ae69 289 2, /* cost of a lea instruction */
a269a03c
JC
290 1, /* variable shift costs */
291 1, /* constant shift costs */
4977bab6 292 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
a269a03c 293 0, /* cost of multiply per each bit set */
4977bab6 294 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
44cf5b6a
JH
295 2, /* cost of movsx */
296 2, /* cost of movzx */
96e7ae40 297 8, /* "large" insn */
e2e52e1b 298 4, /* MOVE_RATIO */
7c6b971d 299 3, /* cost for loading QImode using movzbl */
96e7ae40
JH
300 {4, 5, 4}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
0f290768 302 Relative to reg-reg move (2). */
96e7ae40
JH
303 {2, 3, 2}, /* cost of storing integer registers */
304 4, /* cost of reg,reg fld/fst */
305 {6, 6, 6}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
fa79946e
JH
307 {4, 4, 4}, /* cost of loading integer registers */
308 2, /* cost of moving MMX register */
309 {2, 2}, /* cost of loading MMX registers
310 in SImode and DImode */
311 {2, 2}, /* cost of storing MMX registers
312 in SImode and DImode */
313 2, /* cost of moving SSE register */
314 {2, 2, 8}, /* cost of loading SSE registers
315 in SImode, DImode and TImode */
316 {2, 2, 8}, /* cost of storing SSE registers
317 in SImode, DImode and TImode */
f4365627
JH
318 6, /* MMX or SSE register to integer */
319 32, /* size of prefetch block */
320 1, /* number of parallel prefetches */
4977bab6 321 1, /* Branch cost */
229b303a
RS
322 2, /* cost of FADD and FSUB insns. */
323 2, /* cost of FMUL instruction. */
4f770e7b
RS
324 56, /* cost of FDIV instruction. */
325 2, /* cost of FABS instruction. */
229b303a
RS
326 2, /* cost of FCHS instruction. */
327 56, /* cost of FSQRT instruction. */
a269a03c
JC
328};
329
fce5a9f2 330static const
309ada50
JH
331struct processor_costs athlon_cost = {
332 1, /* cost of an add instruction */
0b5107cf 333 2, /* cost of a lea instruction */
309ada50
JH
334 1, /* variable shift costs */
335 1, /* constant shift costs */
4977bab6 336 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
309ada50 337 0, /* cost of multiply per each bit set */
4977bab6 338 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
44cf5b6a
JH
339 1, /* cost of movsx */
340 1, /* cost of movzx */
309ada50 341 8, /* "large" insn */
e2e52e1b 342 9, /* MOVE_RATIO */
309ada50 343 4, /* cost for loading QImode using movzbl */
b72b1c29 344 {3, 4, 3}, /* cost of loading integer registers
309ada50 345 in QImode, HImode and SImode.
0f290768 346 Relative to reg-reg move (2). */
b72b1c29 347 {3, 4, 3}, /* cost of storing integer registers */
309ada50 348 4, /* cost of reg,reg fld/fst */
b72b1c29 349 {4, 4, 12}, /* cost of loading fp registers
309ada50 350 in SFmode, DFmode and XFmode */
b72b1c29 351 {6, 6, 8}, /* cost of loading integer registers */
fa79946e 352 2, /* cost of moving MMX register */
b72b1c29 353 {4, 4}, /* cost of loading MMX registers
fa79946e 354 in SImode and DImode */
b72b1c29 355 {4, 4}, /* cost of storing MMX registers
fa79946e
JH
356 in SImode and DImode */
357 2, /* cost of moving SSE register */
b72b1c29 358 {4, 4, 6}, /* cost of loading SSE registers
fa79946e 359 in SImode, DImode and TImode */
b72b1c29 360 {4, 4, 5}, /* cost of storing SSE registers
fa79946e 361 in SImode, DImode and TImode */
b72b1c29 362 5, /* MMX or SSE register to integer */
f4365627
JH
363 64, /* size of prefetch block */
364 6, /* number of parallel prefetches */
4977bab6 365 2, /* Branch cost */
229b303a
RS
366 4, /* cost of FADD and FSUB insns. */
367 4, /* cost of FMUL instruction. */
368 24, /* cost of FDIV instruction. */
369 2, /* cost of FABS instruction. */
370 2, /* cost of FCHS instruction. */
371 35, /* cost of FSQRT instruction. */
309ada50
JH
372};
373
4977bab6
ZW
374static const
375struct processor_costs k8_cost = {
376 1, /* cost of an add instruction */
377 2, /* cost of a lea instruction */
378 1, /* variable shift costs */
379 1, /* constant shift costs */
380 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
381 0, /* cost of multiply per each bit set */
382 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
383 1, /* cost of movsx */
384 1, /* cost of movzx */
385 8, /* "large" insn */
386 9, /* MOVE_RATIO */
387 4, /* cost for loading QImode using movzbl */
388 {3, 4, 3}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {3, 4, 3}, /* cost of storing integer registers */
392 4, /* cost of reg,reg fld/fst */
393 {4, 4, 12}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {6, 6, 8}, /* cost of loading integer registers */
396 2, /* cost of moving MMX register */
397 {3, 3}, /* cost of loading MMX registers
398 in SImode and DImode */
399 {4, 4}, /* cost of storing MMX registers
400 in SImode and DImode */
401 2, /* cost of moving SSE register */
402 {4, 3, 6}, /* cost of loading SSE registers
403 in SImode, DImode and TImode */
404 {4, 4, 5}, /* cost of storing SSE registers
405 in SImode, DImode and TImode */
406 5, /* MMX or SSE register to integer */
407 64, /* size of prefetch block */
408 6, /* number of parallel prefetches */
409 2, /* Branch cost */
410 4, /* cost of FADD and FSUB insns. */
411 4, /* cost of FMUL instruction. */
412 19, /* cost of FDIV instruction. */
413 2, /* cost of FABS instruction. */
414 2, /* cost of FCHS instruction. */
415 35, /* cost of FSQRT instruction. */
416};
417
fce5a9f2 418static const
b4e89e2d
JH
419struct processor_costs pentium4_cost = {
420 1, /* cost of an add instruction */
e0c00392 421 3, /* cost of a lea instruction */
4977bab6
ZW
422 4, /* variable shift costs */
423 4, /* constant shift costs */
424 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
b4e89e2d 425 0, /* cost of multiply per each bit set */
4977bab6 426 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
44cf5b6a
JH
427 1, /* cost of movsx */
428 1, /* cost of movzx */
b4e89e2d
JH
429 16, /* "large" insn */
430 6, /* MOVE_RATIO */
431 2, /* cost for loading QImode using movzbl */
432 {4, 5, 4}, /* cost of loading integer registers
433 in QImode, HImode and SImode.
434 Relative to reg-reg move (2). */
435 {2, 3, 2}, /* cost of storing integer registers */
436 2, /* cost of reg,reg fld/fst */
437 {2, 2, 6}, /* cost of loading fp registers
438 in SFmode, DFmode and XFmode */
439 {4, 4, 6}, /* cost of loading integer registers */
440 2, /* cost of moving MMX register */
441 {2, 2}, /* cost of loading MMX registers
442 in SImode and DImode */
443 {2, 2}, /* cost of storing MMX registers
444 in SImode and DImode */
445 12, /* cost of moving SSE register */
446 {12, 12, 12}, /* cost of loading SSE registers
447 in SImode, DImode and TImode */
448 {2, 2, 8}, /* cost of storing SSE registers
449 in SImode, DImode and TImode */
450 10, /* MMX or SSE register to integer */
f4365627
JH
451 64, /* size of prefetch block */
452 6, /* number of parallel prefetches */
4977bab6 453 2, /* Branch cost */
229b303a
RS
454 5, /* cost of FADD and FSUB insns. */
455 7, /* cost of FMUL instruction. */
456 43, /* cost of FDIV instruction. */
457 2, /* cost of FABS instruction. */
458 2, /* cost of FCHS instruction. */
459 43, /* cost of FSQRT instruction. */
b4e89e2d
JH
460};
461
89c43c0a
VM
462static const
463struct processor_costs nocona_cost = {
464 1, /* cost of an add instruction */
465 1, /* cost of a lea instruction */
466 1, /* variable shift costs */
467 1, /* constant shift costs */
468 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
469 0, /* cost of multiply per each bit set */
470 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
471 1, /* cost of movsx */
472 1, /* cost of movzx */
473 16, /* "large" insn */
474 9, /* MOVE_RATIO */
475 4, /* cost for loading QImode using movzbl */
476 {4, 4, 4}, /* cost of loading integer registers
477 in QImode, HImode and SImode.
478 Relative to reg-reg move (2). */
479 {4, 4, 4}, /* cost of storing integer registers */
480 3, /* cost of reg,reg fld/fst */
481 {12, 12, 12}, /* cost of loading fp registers
482 in SFmode, DFmode and XFmode */
483 {4, 4, 4}, /* cost of loading integer registers */
484 6, /* cost of moving MMX register */
485 {12, 12}, /* cost of loading MMX registers
486 in SImode and DImode */
487 {12, 12}, /* cost of storing MMX registers
488 in SImode and DImode */
489 6, /* cost of moving SSE register */
490 {12, 12, 12}, /* cost of loading SSE registers
491 in SImode, DImode and TImode */
492 {12, 12, 12}, /* cost of storing SSE registers
493 in SImode, DImode and TImode */
494 8, /* MMX or SSE register to integer */
495 128, /* size of prefetch block */
496 8, /* number of parallel prefetches */
497 1, /* Branch cost */
498 6, /* cost of FADD and FSUB insns. */
499 8, /* cost of FMUL instruction. */
500 40, /* cost of FDIV instruction. */
501 3, /* cost of FABS instruction. */
502 3, /* cost of FCHS instruction. */
503 44, /* cost of FSQRT instruction. */
504};
505
8b60264b 506const struct processor_costs *ix86_cost = &pentium_cost;
32b5b1aa 507
a269a03c
JC
508/* Processor feature/optimization bitmasks. */
509#define m_386 (1<<PROCESSOR_I386)
510#define m_486 (1<<PROCESSOR_I486)
511#define m_PENT (1<<PROCESSOR_PENTIUM)
512#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
513#define m_K6 (1<<PROCESSOR_K6)
309ada50 514#define m_ATHLON (1<<PROCESSOR_ATHLON)
b4e89e2d 515#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
4977bab6
ZW
516#define m_K8 (1<<PROCESSOR_K8)
517#define m_ATHLON_K8 (m_K8 | m_ATHLON)
89c43c0a 518#define m_NOCONA (1<<PROCESSOR_NOCONA)
a269a03c 519
4977bab6 520const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
89c43c0a 521const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
a269a03c 522const int x86_zero_extend_with_and = m_486 | m_PENT;
89c43c0a 523const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA /* m_386 | m_K6 */;
e075ae69 524const int x86_double_with_add = ~m_386;
a269a03c 525const int x86_use_bit_test = m_386;
4977bab6 526const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
89c43c0a 527const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
4977bab6 528const int x86_3dnow_a = m_ATHLON_K8;
89c43c0a
VM
529const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
530const int x86_branch_hints = m_PENT4 | m_NOCONA;
531const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA;
e075ae69
RH
532const int x86_partial_reg_stall = m_PPRO;
533const int x86_use_loop = m_K6;
4977bab6 534const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
e075ae69
RH
535const int x86_use_mov0 = m_K6;
536const int x86_use_cltd = ~(m_PENT | m_K6);
537const int x86_read_modify_write = ~m_PENT;
538const int x86_read_modify = ~(m_PENT | m_PPRO);
539const int x86_split_long_moves = m_PPRO;
4977bab6 540const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
285464d0 541const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
89c43c0a 542const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
d9f32422
JH
543const int x86_qimode_math = ~(0);
544const int x86_promote_qi_regs = 0;
545const int x86_himode_math = ~(m_PPRO);
546const int x86_promote_hi_regs = m_PPRO;
89c43c0a
VM
547const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA;
548const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA;
549const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA;
550const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA;
551const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO);
552const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
553const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
554const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO;
7b50a809
JH
555const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
556const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
89c43c0a 557const int x86_decompose_lea = m_PENT4 | m_NOCONA;
495333a6 558const int x86_shift1 = ~m_486;
89c43c0a
VM
559const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
560const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO;
4977bab6 561/* Set for machines where the type and dependencies are resolved on SSE register
d1f87653 562 parts instead of whole registers, so we may maintain just lower part of
4977bab6
ZW
563 scalar values in proper format leaving the upper part undefined. */
564const int x86_sse_partial_regs = m_ATHLON_K8;
565/* Athlon optimizes partial-register FPS special case, thus avoiding the
566 need for extra instructions beforehand */
567const int x86_sse_partial_regs_for_cvtsd2ss = 0;
568const int x86_sse_typeless_stores = m_ATHLON_K8;
89c43c0a 569const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
4977bab6
ZW
570const int x86_use_ffreep = m_ATHLON_K8;
571const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
8f62128d 572const int x86_inter_unit_moves = ~(m_ATHLON_K8);
89c43c0a 573const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO;
be04394b
JH
574/* Some CPU cores are not able to predict more than 4 branch instructions in
575 the 16 byte window. */
89c43c0a 576const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
a269a03c 577
d1f87653 578/* In case the average insn count for single function invocation is
6ab16dd9
JH
579 lower than this constant, emit fast (but longer) prologue and
580 epilogue code. */
4977bab6 581#define FAST_PROLOGUE_INSN_COUNT 20
5bf0ebab 582
5bf0ebab
RH
583/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
584static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
585static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
586static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
4c0d89b5
RS
587
588/* Array of the smallest class containing reg number REGNO, indexed by
0f290768 589 REGNO. Used by REGNO_REG_CLASS in i386.h. */
4c0d89b5 590
e075ae69 591enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
4c0d89b5
RS
592{
593 /* ax, dx, cx, bx */
ab408a86 594 AREG, DREG, CREG, BREG,
4c0d89b5 595 /* si, di, bp, sp */
e075ae69 596 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
4c0d89b5
RS
597 /* FP registers */
598 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
79325812 599 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
4c0d89b5 600 /* arg pointer */
83774849 601 NON_Q_REGS,
564d80f4 602 /* flags, fpsr, dirflag, frame */
a7180f70
BS
603 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
604 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
605 SSE_REGS, SSE_REGS,
606 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
3d117b30
JH
607 MMX_REGS, MMX_REGS,
608 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
609 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
610 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
611 SSE_REGS, SSE_REGS,
4c0d89b5 612};
c572e5ba 613
3d117b30 614/* The "default" register map used in 32bit mode. */
83774849 615
0f290768 616int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
617{
618 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
619 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
3d117b30 620 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
621 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
622 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
3d117b30
JH
623 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
624 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
625};
626
5bf0ebab
RH
627static int const x86_64_int_parameter_registers[6] =
628{
629 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
630 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
631};
632
633static int const x86_64_int_return_registers[4] =
634{
635 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
636};
53c17031 637
0f7fa3d0
JH
638/* The "default" register map used in 64bit mode. */
639int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
640{
641 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
3de72741 642 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
0f7fa3d0
JH
643 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
644 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
645 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
646 8,9,10,11,12,13,14,15, /* extended integer registers */
647 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
648};
649
83774849
RH
650/* Define the register numbers to be used in Dwarf debugging information.
651 The SVR4 reference port C compiler uses the following register numbers
652 in its Dwarf output code:
653 0 for %eax (gcc regno = 0)
654 1 for %ecx (gcc regno = 2)
655 2 for %edx (gcc regno = 1)
656 3 for %ebx (gcc regno = 3)
657 4 for %esp (gcc regno = 7)
658 5 for %ebp (gcc regno = 6)
659 6 for %esi (gcc regno = 4)
660 7 for %edi (gcc regno = 5)
661 The following three DWARF register numbers are never generated by
662 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
663 believes these numbers have these meanings.
664 8 for %eip (no gcc equivalent)
665 9 for %eflags (gcc regno = 17)
666 10 for %trapno (no gcc equivalent)
667 It is not at all clear how we should number the FP stack registers
668 for the x86 architecture. If the version of SDB on x86/svr4 were
669 a bit less brain dead with respect to floating-point then we would
670 have a precedent to follow with respect to DWARF register numbers
671 for x86 FP registers, but the SDB on x86/svr4 is so completely
672 broken with respect to FP registers that it is hardly worth thinking
673 of it as something to strive for compatibility with.
674 The version of x86/svr4 SDB I have at the moment does (partially)
675 seem to believe that DWARF register number 11 is associated with
676 the x86 register %st(0), but that's about all. Higher DWARF
677 register numbers don't seem to be associated with anything in
678 particular, and even for DWARF regno 11, SDB only seems to under-
679 stand that it should say that a variable lives in %st(0) (when
680 asked via an `=' command) if we said it was in DWARF regno 11,
681 but SDB still prints garbage when asked for the value of the
682 variable in question (via a `/' command).
683 (Also note that the labels SDB prints for various FP stack regs
684 when doing an `x' command are all wrong.)
685 Note that these problems generally don't affect the native SVR4
686 C compiler because it doesn't allow the use of -O with -g and
687 because when it is *not* optimizing, it allocates a memory
688 location for each floating-point variable, and the memory
689 location is what gets described in the DWARF AT_location
690 attribute for the variable in question.
691 Regardless of the severe mental illness of the x86/svr4 SDB, we
692 do something sensible here and we use the following DWARF
693 register numbers. Note that these are all stack-top-relative
694 numbers.
695 11 for %st(0) (gcc regno = 8)
696 12 for %st(1) (gcc regno = 9)
697 13 for %st(2) (gcc regno = 10)
698 14 for %st(3) (gcc regno = 11)
699 15 for %st(4) (gcc regno = 12)
700 16 for %st(5) (gcc regno = 13)
701 17 for %st(6) (gcc regno = 14)
702 18 for %st(7) (gcc regno = 15)
703*/
0f290768 704int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
705{
706 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
707 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
3f3f2124 708 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
709 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
710 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
d1f87653
KH
711 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
712 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
713};
714
c572e5ba
JVA
715/* Test and compare insns in i386.md store the information needed to
716 generate branch and scc insns here. */
717
07933f72
GS
718rtx ix86_compare_op0 = NULL_RTX;
719rtx ix86_compare_op1 = NULL_RTX;
f5316dfe 720
7a2e09f4 721#define MAX_386_STACK_LOCALS 3
8362f420
JH
722/* Size of the register save area. */
723#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
36edd3cc
BS
724
725/* Define the structure for the machine field in struct function. */
ddb0ae00
ZW
726
727struct stack_local_entry GTY(())
728{
729 unsigned short mode;
730 unsigned short n;
731 rtx rtl;
732 struct stack_local_entry *next;
733};
734
4dd2ac2c
JH
735/* Structure describing stack frame layout.
736 Stack grows downward:
737
738 [arguments]
739 <- ARG_POINTER
740 saved pc
741
742 saved frame pointer if frame_pointer_needed
743 <- HARD_FRAME_POINTER
744 [saved regs]
745
746 [padding1] \
747 )
748 [va_arg registers] (
749 > to_allocate <- FRAME_POINTER
750 [frame] (
751 )
752 [padding2] /
753 */
754struct ix86_frame
755{
756 int nregs;
757 int padding1;
8362f420 758 int va_arg_size;
4dd2ac2c
JH
759 HOST_WIDE_INT frame;
760 int padding2;
761 int outgoing_arguments_size;
8362f420 762 int red_zone_size;
4dd2ac2c
JH
763
764 HOST_WIDE_INT to_allocate;
765 /* The offsets relative to ARG_POINTER. */
766 HOST_WIDE_INT frame_pointer_offset;
767 HOST_WIDE_INT hard_frame_pointer_offset;
768 HOST_WIDE_INT stack_pointer_offset;
d9b40e8d
JH
769
770 /* When save_regs_using_mov is set, emit prologue using
771 move instead of push instructions. */
772 bool save_regs_using_mov;
4dd2ac2c
JH
773};
774
c93e80a5
JH
775/* Used to enable/disable debugging features. */
776const char *ix86_debug_arg_string, *ix86_debug_addr_string;
6189a572
JH
777/* Code model option as passed by user. */
778const char *ix86_cmodel_string;
779/* Parsed value. */
780enum cmodel ix86_cmodel;
80f33d06
GS
781/* Asm dialect. */
782const char *ix86_asm_string;
783enum asm_dialect ix86_asm_dialect = ASM_ATT;
f996902d
RH
784/* TLS dialext. */
785const char *ix86_tls_dialect_string;
786enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
6189a572 787
5bf0ebab 788/* Which unit we are generating floating point math for. */
965f5423
JH
789enum fpmath_unit ix86_fpmath;
790
5bf0ebab 791/* Which cpu are we scheduling for. */
9e555526 792enum processor_type ix86_tune;
5bf0ebab
RH
793/* Which instruction set architecture to use. */
794enum processor_type ix86_arch;
c8c5cb99
SC
795
796/* Strings to hold which cpu and instruction set architecture to use. */
9e555526 797const char *ix86_tune_string; /* for -mtune=<xxx> */
9c23aa47 798const char *ix86_arch_string; /* for -march=<xxx> */
965f5423 799const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
c8c5cb99 800
0f290768 801/* # of registers to use to pass arguments. */
e075ae69 802const char *ix86_regparm_string;
e9a25f70 803
f4365627
JH
804/* true if sse prefetch instruction is not NOOP. */
805int x86_prefetch_sse;
806
e075ae69
RH
807/* ix86_regparm_string as a number */
808int ix86_regparm;
e9a25f70
JL
809
810/* Alignment to use for loops and jumps: */
811
0f290768 812/* Power of two alignment for loops. */
e075ae69 813const char *ix86_align_loops_string;
e9a25f70 814
0f290768 815/* Power of two alignment for non-loop jumps. */
e075ae69 816const char *ix86_align_jumps_string;
e9a25f70 817
3af4bd89 818/* Power of two alignment for stack boundary in bytes. */
e075ae69 819const char *ix86_preferred_stack_boundary_string;
3af4bd89
JH
820
821/* Preferred alignment for stack boundary in bits. */
e075ae69 822int ix86_preferred_stack_boundary;
3af4bd89 823
e9a25f70 824/* Values 1-5: see jump.c */
e075ae69
RH
825int ix86_branch_cost;
826const char *ix86_branch_cost_string;
e9a25f70 827
0f290768 828/* Power of two alignment for functions. */
e075ae69 829const char *ix86_align_funcs_string;
623fe810
RH
830
831/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
832static char internal_label_prefix[16];
833static int internal_label_prefix_len;
e075ae69 834\f
b96a374d
AJ
835static int local_symbolic_operand (rtx, enum machine_mode);
836static int tls_symbolic_operand_1 (rtx, enum tls_model);
837static void output_pic_addr_const (FILE *, rtx, int);
838static void put_condition_code (enum rtx_code, enum machine_mode,
839 int, int, FILE *);
840static const char *get_some_local_dynamic_name (void);
841static int get_some_local_dynamic_name_1 (rtx *, void *);
842static rtx maybe_get_pool_constant (rtx);
843static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
844static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
845 rtx *);
e129d93a
ILT
846static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
847static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
848 enum machine_mode);
b96a374d
AJ
849static rtx get_thread_pointer (int);
850static rtx legitimize_tls_address (rtx, enum tls_model, int);
851static void get_pc_thunk_name (char [32], unsigned int);
852static rtx gen_push (rtx);
853static int memory_address_length (rtx addr);
854static int ix86_flags_dependant (rtx, rtx, enum attr_type);
855static int ix86_agi_dependant (rtx, rtx, enum attr_type);
b96a374d
AJ
856static struct machine_function * ix86_init_machine_status (void);
857static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
858static int ix86_nsaved_regs (void);
859static void ix86_emit_save_regs (void);
860static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
72613dfa 861static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
b96a374d 862static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
b96a374d
AJ
863static HOST_WIDE_INT ix86_GOT_alias_set (void);
864static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
865static rtx ix86_expand_aligntest (rtx, int);
4e44c1ef 866static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
b96a374d
AJ
867static int ix86_issue_rate (void);
868static int ix86_adjust_cost (rtx, rtx, rtx, int);
b96a374d
AJ
869static int ia32_multipass_dfa_lookahead (void);
870static void ix86_init_mmx_sse_builtins (void);
871static rtx x86_this_parameter (tree);
872static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
873 HOST_WIDE_INT, tree);
874static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
875static void x86_file_start (void);
876static void ix86_reorg (void);
c35d187f
RH
877static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
878static tree ix86_build_builtin_va_list (void);
a0524eb3
KH
879static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
880 tree, int *, int);
23a60a04 881static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
e075ae69
RH
882
883struct ix86_address
884{
885 rtx base, index, disp;
886 HOST_WIDE_INT scale;
74dc3e94 887 enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg;
e075ae69 888};
b08de47e 889
b96a374d
AJ
890static int ix86_decompose_address (rtx, struct ix86_address *);
891static int ix86_address_cost (rtx);
892static bool ix86_cannot_force_const_mem (rtx);
893static rtx ix86_delegitimize_address (rtx);
bd793c65
BS
894
895struct builtin_description;
b96a374d
AJ
896static rtx ix86_expand_sse_comi (const struct builtin_description *,
897 tree, rtx);
898static rtx ix86_expand_sse_compare (const struct builtin_description *,
899 tree, rtx);
900static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
901static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
902static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
903static rtx ix86_expand_store_builtin (enum insn_code, tree);
904static rtx safe_vector_operand (rtx, enum machine_mode);
905static enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code);
906static void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
907 enum rtx_code *, enum rtx_code *);
908static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
909static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
910static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
911static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
912static int ix86_fp_comparison_cost (enum rtx_code code);
913static unsigned int ix86_select_alt_pic_regnum (void);
914static int ix86_save_reg (unsigned int, int);
915static void ix86_compute_frame_layout (struct ix86_frame *);
916static int ix86_comp_type_attributes (tree, tree);
e767b5be 917static int ix86_function_regparm (tree, tree);
91d231cb 918const struct attribute_spec ix86_attribute_table[];
b96a374d
AJ
919static bool ix86_function_ok_for_sibcall (tree, tree);
920static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
921static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
922static int ix86_value_regno (enum machine_mode);
923static bool contains_128bit_aligned_vector_p (tree);
0397ac35 924static rtx ix86_struct_value_rtx (tree, int);
b96a374d
AJ
925static bool ix86_ms_bitfield_layout_p (tree);
926static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
927static int extended_reg_mentioned_1 (rtx *, void *);
928static bool ix86_rtx_costs (rtx, int, int, int *);
929static int min_insn_size (rtx);
67dfe110 930static tree ix86_md_asm_clobbers (tree clobbers);
fe984136 931static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
8cd5a4e0
RH
932static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
933 tree, bool);
7c262518 934
21c318ba 935#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
b96a374d 936static void ix86_svr3_asm_out_constructor (rtx, int);
2cc07db4 937#endif
e56feed6 938
53c17031
JH
939/* Register class used for passing given 64bit part of the argument.
940 These represent classes as documented by the PS ABI, with the exception
941 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
d1f87653 942 use SF or DFmode move instead of DImode to avoid reformatting penalties.
53c17031 943
d1f87653 944 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
53c17031
JH
945 whenever possible (upper half does contain padding).
946 */
947enum x86_64_reg_class
948 {
949 X86_64_NO_CLASS,
950 X86_64_INTEGER_CLASS,
951 X86_64_INTEGERSI_CLASS,
952 X86_64_SSE_CLASS,
953 X86_64_SSESF_CLASS,
954 X86_64_SSEDF_CLASS,
955 X86_64_SSEUP_CLASS,
956 X86_64_X87_CLASS,
957 X86_64_X87UP_CLASS,
958 X86_64_MEMORY_CLASS
959 };
0b5826ac 960static const char * const x86_64_reg_class_name[] =
53c17031
JH
961 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
962
963#define MAX_CLASSES 4
b96a374d
AJ
964static int classify_argument (enum machine_mode, tree,
965 enum x86_64_reg_class [MAX_CLASSES], int);
966static int examine_argument (enum machine_mode, tree, int, int *, int *);
967static rtx construct_container (enum machine_mode, tree, int, int, int,
968 const int *, int);
969static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
970 enum x86_64_reg_class);
881b2a96 971
43f3a59d 972/* Table of constants used by fldpi, fldln2, etc.... */
881b2a96
RS
973static REAL_VALUE_TYPE ext_80387_constants_table [5];
974static bool ext_80387_constants_init = 0;
b96a374d 975static void init_ext_80387_constants (void);
672a6f42
NB
976\f
977/* Initialize the GCC target structure. */
91d231cb
JM
978#undef TARGET_ATTRIBUTE_TABLE
979#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
672a6f42 980#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
f5f4be42
NB
981# undef TARGET_MERGE_DECL_ATTRIBUTES
982# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
672a6f42
NB
983#endif
984
8d8e52be
JM
985#undef TARGET_COMP_TYPE_ATTRIBUTES
986#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
987
f6155fda
SS
988#undef TARGET_INIT_BUILTINS
989#define TARGET_INIT_BUILTINS ix86_init_builtins
990
991#undef TARGET_EXPAND_BUILTIN
992#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
993
bd09bdeb
RH
994#undef TARGET_ASM_FUNCTION_EPILOGUE
995#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
08c148a8 996
17b53c33
NB
997#undef TARGET_ASM_OPEN_PAREN
998#define TARGET_ASM_OPEN_PAREN ""
999#undef TARGET_ASM_CLOSE_PAREN
1000#define TARGET_ASM_CLOSE_PAREN ""
1001
301d03af
RS
1002#undef TARGET_ASM_ALIGNED_HI_OP
1003#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1004#undef TARGET_ASM_ALIGNED_SI_OP
1005#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1006#ifdef ASM_QUAD
1007#undef TARGET_ASM_ALIGNED_DI_OP
1008#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1009#endif
1010
1011#undef TARGET_ASM_UNALIGNED_HI_OP
1012#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1013#undef TARGET_ASM_UNALIGNED_SI_OP
1014#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1015#undef TARGET_ASM_UNALIGNED_DI_OP
1016#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1017
c237e94a
ZW
1018#undef TARGET_SCHED_ADJUST_COST
1019#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1020#undef TARGET_SCHED_ISSUE_RATE
1021#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
fce5a9f2 1022#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
ca4f3d13 1023#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE hook_int_void_1
9b690711
RH
1024#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1025#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1026 ia32_multipass_dfa_lookahead
c237e94a 1027
4977bab6
ZW
1028#undef TARGET_FUNCTION_OK_FOR_SIBCALL
1029#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1030
f996902d
RH
1031#ifdef HAVE_AS_TLS
1032#undef TARGET_HAVE_TLS
1033#define TARGET_HAVE_TLS true
1034#endif
3a04ff64
RH
1035#undef TARGET_CANNOT_FORCE_CONST_MEM
1036#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
f996902d 1037
7daebb7a 1038#undef TARGET_DELEGITIMIZE_ADDRESS
69bd9368 1039#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
7daebb7a 1040
4977bab6
ZW
1041#undef TARGET_MS_BITFIELD_LAYOUT_P
1042#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1043
c590b625
RH
1044#undef TARGET_ASM_OUTPUT_MI_THUNK
1045#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
3961e8fe
RH
1046#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1047#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
c590b625 1048
1bc7c5b6
ZW
1049#undef TARGET_ASM_FILE_START
1050#define TARGET_ASM_FILE_START x86_file_start
1051
3c50106f
RH
1052#undef TARGET_RTX_COSTS
1053#define TARGET_RTX_COSTS ix86_rtx_costs
dcefdf67
RH
1054#undef TARGET_ADDRESS_COST
1055#define TARGET_ADDRESS_COST ix86_address_cost
3c50106f 1056
e129d93a
ILT
1057#undef TARGET_FIXED_CONDITION_CODE_REGS
1058#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1059#undef TARGET_CC_MODES_COMPATIBLE
1060#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1061
18dbd950
RS
1062#undef TARGET_MACHINE_DEPENDENT_REORG
1063#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1064
c35d187f
RH
1065#undef TARGET_BUILD_BUILTIN_VA_LIST
1066#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1067
67dfe110
KH
1068#undef TARGET_MD_ASM_CLOBBERS
1069#define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1070
9184f892
KH
1071#undef TARGET_PROMOTE_PROTOTYPES
1072#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
0397ac35
RH
1073#undef TARGET_STRUCT_VALUE_RTX
1074#define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
a0524eb3
KH
1075#undef TARGET_SETUP_INCOMING_VARARGS
1076#define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
fe984136
RH
1077#undef TARGET_MUST_PASS_IN_STACK
1078#define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
8cd5a4e0
RH
1079#undef TARGET_PASS_BY_REFERENCE
1080#define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
a0524eb3 1081
cd3ce9b4
JM
1082#undef TARGET_GIMPLIFY_VA_ARG_EXPR
1083#define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1084
f6897b10 1085struct gcc_target targetm = TARGET_INITIALIZER;
89c43c0a 1086
e075ae69 1087\f
67c2b45f
JS
1088/* The svr4 ABI for the i386 says that records and unions are returned
1089 in memory. */
1090#ifndef DEFAULT_PCC_STRUCT_RETURN
1091#define DEFAULT_PCC_STRUCT_RETURN 1
1092#endif
1093
f5316dfe
MM
1094/* Sometimes certain combinations of command options do not make
1095 sense on a particular target machine. You can define a macro
1096 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1097 defined, is executed once just after all the command options have
1098 been parsed.
1099
1100 Don't use this macro to turn on various extra optimizations for
1101 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1102
1103void
b96a374d 1104override_options (void)
f5316dfe 1105{
400500c4 1106 int i;
3326f410
DJ
1107 int ix86_tune_defaulted = 0;
1108
e075ae69
RH
1109 /* Comes from final.c -- no real reason to change it. */
1110#define MAX_CODE_ALIGN 16
f5316dfe 1111
c8c5cb99
SC
1112 static struct ptt
1113 {
8b60264b
KG
1114 const struct processor_costs *cost; /* Processor costs */
1115 const int target_enable; /* Target flags to enable. */
1116 const int target_disable; /* Target flags to disable. */
1117 const int align_loop; /* Default alignments. */
2cca7283 1118 const int align_loop_max_skip;
8b60264b 1119 const int align_jump;
2cca7283 1120 const int align_jump_max_skip;
8b60264b 1121 const int align_func;
e075ae69 1122 }
0f290768 1123 const processor_target_table[PROCESSOR_max] =
e075ae69 1124 {
4977bab6
ZW
1125 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1126 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1127 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1128 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1129 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1130 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1131 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
89c43c0a
VM
1132 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1133 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}
e075ae69
RH
1134 };
1135
f4365627 1136 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
e075ae69
RH
1137 static struct pta
1138 {
8b60264b
KG
1139 const char *const name; /* processor name or nickname. */
1140 const enum processor_type processor;
0dd0e980
JH
1141 const enum pta_flags
1142 {
1143 PTA_SSE = 1,
1144 PTA_SSE2 = 2,
5bbeea44
JH
1145 PTA_SSE3 = 4,
1146 PTA_MMX = 8,
1147 PTA_PREFETCH_SSE = 16,
1148 PTA_3DNOW = 32,
4977bab6
ZW
1149 PTA_3DNOW_A = 64,
1150 PTA_64BIT = 128
0dd0e980 1151 } flags;
e075ae69 1152 }
0f290768 1153 const processor_alias_table[] =
e075ae69 1154 {
0dd0e980
JH
1155 {"i386", PROCESSOR_I386, 0},
1156 {"i486", PROCESSOR_I486, 0},
1157 {"i586", PROCESSOR_PENTIUM, 0},
1158 {"pentium", PROCESSOR_PENTIUM, 0},
1159 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
3780101d
JG
1160 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1161 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1162 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
3462df62 1163 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
0dd0e980
JH
1164 {"i686", PROCESSOR_PENTIUMPRO, 0},
1165 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1166 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
f4365627 1167 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
5bbeea44
JH
1168 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1169 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1170 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1171 | PTA_MMX | PTA_PREFETCH_SSE},
1172 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1173 | PTA_MMX | PTA_PREFETCH_SSE},
89c43c0a
VM
1174 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1175 | PTA_MMX | PTA_PREFETCH_SSE},
1176 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
5bbeea44 1177 | PTA_MMX | PTA_PREFETCH_SSE},
0dd0e980
JH
1178 {"k6", PROCESSOR_K6, PTA_MMX},
1179 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1180 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
f4365627 1181 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1182 | PTA_3DNOW_A},
f4365627 1183 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
0dd0e980 1184 | PTA_3DNOW | PTA_3DNOW_A},
f4365627 1185 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1186 | PTA_3DNOW_A | PTA_SSE},
f4365627 1187 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1188 | PTA_3DNOW_A | PTA_SSE},
f4365627 1189 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1190 | PTA_3DNOW_A | PTA_SSE},
3fec9fa9
JJ
1191 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1192 | PTA_SSE | PTA_SSE2 },
4977bab6
ZW
1193 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1194 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
9a609388
JH
1195 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1196 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1197 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1198 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1199 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1200 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
3af4bd89 1201 };
c8c5cb99 1202
ca7558fc 1203 int const pta_size = ARRAY_SIZE (processor_alias_table);
c8c5cb99 1204
41ed2237 1205 /* Set the default values for switches whose default depends on TARGET_64BIT
d1f87653 1206 in case they weren't overwritten by command line options. */
55ba61f3
JH
1207 if (TARGET_64BIT)
1208 {
1209 if (flag_omit_frame_pointer == 2)
1210 flag_omit_frame_pointer = 1;
1211 if (flag_asynchronous_unwind_tables == 2)
1212 flag_asynchronous_unwind_tables = 1;
1213 if (flag_pcc_struct_return == 2)
1214 flag_pcc_struct_return = 0;
1215 }
1216 else
1217 {
1218 if (flag_omit_frame_pointer == 2)
1219 flag_omit_frame_pointer = 0;
1220 if (flag_asynchronous_unwind_tables == 2)
1221 flag_asynchronous_unwind_tables = 0;
1222 if (flag_pcc_struct_return == 2)
7c712dcc 1223 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
55ba61f3
JH
1224 }
1225
f5316dfe
MM
1226#ifdef SUBTARGET_OVERRIDE_OPTIONS
1227 SUBTARGET_OVERRIDE_OPTIONS;
1228#endif
1229
9e555526
RH
1230 if (!ix86_tune_string && ix86_arch_string)
1231 ix86_tune_string = ix86_arch_string;
1232 if (!ix86_tune_string)
3326f410
DJ
1233 {
1234 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1235 ix86_tune_defaulted = 1;
1236 }
f4365627 1237 if (!ix86_arch_string)
3fec9fa9 1238 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
e075ae69 1239
6189a572
JH
1240 if (ix86_cmodel_string != 0)
1241 {
1242 if (!strcmp (ix86_cmodel_string, "small"))
1243 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1244 else if (flag_pic)
c725bd79 1245 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
6189a572
JH
1246 else if (!strcmp (ix86_cmodel_string, "32"))
1247 ix86_cmodel = CM_32;
1248 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1249 ix86_cmodel = CM_KERNEL;
1250 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1251 ix86_cmodel = CM_MEDIUM;
1252 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1253 ix86_cmodel = CM_LARGE;
1254 else
1255 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1256 }
1257 else
1258 {
1259 ix86_cmodel = CM_32;
1260 if (TARGET_64BIT)
1261 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1262 }
c93e80a5
JH
1263 if (ix86_asm_string != 0)
1264 {
1265 if (!strcmp (ix86_asm_string, "intel"))
1266 ix86_asm_dialect = ASM_INTEL;
1267 else if (!strcmp (ix86_asm_string, "att"))
1268 ix86_asm_dialect = ASM_ATT;
1269 else
1270 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1271 }
6189a572 1272 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
c725bd79 1273 error ("code model `%s' not supported in the %s bit mode",
6189a572
JH
1274 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1275 if (ix86_cmodel == CM_LARGE)
c725bd79 1276 sorry ("code model `large' not supported yet");
0c2dc519 1277 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
c725bd79 1278 sorry ("%i-bit mode not compiled in",
0c2dc519 1279 (target_flags & MASK_64BIT) ? 64 : 32);
6189a572 1280
f4365627
JH
1281 for (i = 0; i < pta_size; i++)
1282 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1283 {
1284 ix86_arch = processor_alias_table[i].processor;
1285 /* Default cpu tuning to the architecture. */
9e555526 1286 ix86_tune = ix86_arch;
f4365627 1287 if (processor_alias_table[i].flags & PTA_MMX
9ef1b13a 1288 && !(target_flags_explicit & MASK_MMX))
f4365627
JH
1289 target_flags |= MASK_MMX;
1290 if (processor_alias_table[i].flags & PTA_3DNOW
9ef1b13a 1291 && !(target_flags_explicit & MASK_3DNOW))
f4365627
JH
1292 target_flags |= MASK_3DNOW;
1293 if (processor_alias_table[i].flags & PTA_3DNOW_A
9ef1b13a 1294 && !(target_flags_explicit & MASK_3DNOW_A))
f4365627
JH
1295 target_flags |= MASK_3DNOW_A;
1296 if (processor_alias_table[i].flags & PTA_SSE
9ef1b13a 1297 && !(target_flags_explicit & MASK_SSE))
f4365627
JH
1298 target_flags |= MASK_SSE;
1299 if (processor_alias_table[i].flags & PTA_SSE2
9ef1b13a 1300 && !(target_flags_explicit & MASK_SSE2))
f4365627 1301 target_flags |= MASK_SSE2;
5bbeea44
JH
1302 if (processor_alias_table[i].flags & PTA_SSE3
1303 && !(target_flags_explicit & MASK_SSE3))
1304 target_flags |= MASK_SSE3;
f4365627
JH
1305 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1306 x86_prefetch_sse = true;
4977bab6 1307 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3326f410
DJ
1308 {
1309 if (ix86_tune_defaulted)
1310 {
1311 ix86_tune_string = "x86-64";
1312 for (i = 0; i < pta_size; i++)
1313 if (! strcmp (ix86_tune_string,
1314 processor_alias_table[i].name))
1315 break;
1316 ix86_tune = processor_alias_table[i].processor;
1317 }
1318 else
1319 error ("CPU you selected does not support x86-64 "
1320 "instruction set");
1321 }
f4365627
JH
1322 break;
1323 }
400500c4 1324
f4365627
JH
1325 if (i == pta_size)
1326 error ("bad value (%s) for -march= switch", ix86_arch_string);
e075ae69 1327
f4365627 1328 for (i = 0; i < pta_size; i++)
9e555526 1329 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
f4365627 1330 {
9e555526 1331 ix86_tune = processor_alias_table[i].processor;
4977bab6
ZW
1332 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1333 error ("CPU you selected does not support x86-64 instruction set");
c618c6ec
JJ
1334
1335 /* Intel CPUs have always interpreted SSE prefetch instructions as
1336 NOPs; so, we can enable SSE prefetch instructions even when
1337 -mtune (rather than -march) points us to a processor that has them.
1338 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1339 higher processors. */
1340 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1341 x86_prefetch_sse = true;
f4365627
JH
1342 break;
1343 }
f4365627 1344 if (i == pta_size)
9e555526 1345 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
e075ae69 1346
2ab0437e
JH
1347 if (optimize_size)
1348 ix86_cost = &size_cost;
1349 else
9e555526
RH
1350 ix86_cost = processor_target_table[ix86_tune].cost;
1351 target_flags |= processor_target_table[ix86_tune].target_enable;
1352 target_flags &= ~processor_target_table[ix86_tune].target_disable;
e075ae69 1353
36edd3cc
BS
1354 /* Arrange to set up i386_stack_locals for all functions. */
1355 init_machine_status = ix86_init_machine_status;
fce5a9f2 1356
0f290768 1357 /* Validate -mregparm= value. */
e075ae69 1358 if (ix86_regparm_string)
b08de47e 1359 {
400500c4
RK
1360 i = atoi (ix86_regparm_string);
1361 if (i < 0 || i > REGPARM_MAX)
1362 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1363 else
1364 ix86_regparm = i;
b08de47e 1365 }
0d7d98ee
JH
1366 else
1367 if (TARGET_64BIT)
1368 ix86_regparm = REGPARM_MAX;
b08de47e 1369
3e18fdf6 1370 /* If the user has provided any of the -malign-* options,
a4f31c00 1371 warn and use that value only if -falign-* is not set.
3e18fdf6 1372 Remove this code in GCC 3.2 or later. */
e075ae69 1373 if (ix86_align_loops_string)
b08de47e 1374 {
3e18fdf6
GK
1375 warning ("-malign-loops is obsolete, use -falign-loops");
1376 if (align_loops == 0)
1377 {
1378 i = atoi (ix86_align_loops_string);
1379 if (i < 0 || i > MAX_CODE_ALIGN)
1380 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1381 else
1382 align_loops = 1 << i;
1383 }
b08de47e 1384 }
3af4bd89 1385
e075ae69 1386 if (ix86_align_jumps_string)
b08de47e 1387 {
3e18fdf6
GK
1388 warning ("-malign-jumps is obsolete, use -falign-jumps");
1389 if (align_jumps == 0)
1390 {
1391 i = atoi (ix86_align_jumps_string);
1392 if (i < 0 || i > MAX_CODE_ALIGN)
1393 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1394 else
1395 align_jumps = 1 << i;
1396 }
b08de47e 1397 }
b08de47e 1398
e075ae69 1399 if (ix86_align_funcs_string)
b08de47e 1400 {
3e18fdf6
GK
1401 warning ("-malign-functions is obsolete, use -falign-functions");
1402 if (align_functions == 0)
1403 {
1404 i = atoi (ix86_align_funcs_string);
1405 if (i < 0 || i > MAX_CODE_ALIGN)
1406 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1407 else
1408 align_functions = 1 << i;
1409 }
b08de47e 1410 }
3af4bd89 1411
3e18fdf6 1412 /* Default align_* from the processor table. */
3e18fdf6 1413 if (align_loops == 0)
2cca7283 1414 {
9e555526
RH
1415 align_loops = processor_target_table[ix86_tune].align_loop;
1416 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2cca7283 1417 }
3e18fdf6 1418 if (align_jumps == 0)
2cca7283 1419 {
9e555526
RH
1420 align_jumps = processor_target_table[ix86_tune].align_jump;
1421 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2cca7283 1422 }
3e18fdf6 1423 if (align_functions == 0)
2cca7283 1424 {
9e555526 1425 align_functions = processor_target_table[ix86_tune].align_func;
2cca7283 1426 }
3e18fdf6 1427
e4c0478d 1428 /* Validate -mpreferred-stack-boundary= value, or provide default.
fbb83b43
AO
1429 The default of 128 bits is for Pentium III's SSE __m128, but we
1430 don't want additional code to keep the stack aligned when
1431 optimizing for code size. */
1432 ix86_preferred_stack_boundary = (optimize_size
ef49d42e 1433 ? TARGET_64BIT ? 128 : 32
fbb83b43 1434 : 128);
e075ae69 1435 if (ix86_preferred_stack_boundary_string)
3af4bd89 1436 {
400500c4 1437 i = atoi (ix86_preferred_stack_boundary_string);
ef49d42e 1438 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
c6257c5d 1439 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
ef49d42e 1440 TARGET_64BIT ? 4 : 2);
400500c4
RK
1441 else
1442 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3af4bd89 1443 }
77a989d1 1444
0f290768 1445 /* Validate -mbranch-cost= value, or provide default. */
9e555526 1446 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
e075ae69 1447 if (ix86_branch_cost_string)
804a8ee0 1448 {
400500c4
RK
1449 i = atoi (ix86_branch_cost_string);
1450 if (i < 0 || i > 5)
1451 error ("-mbranch-cost=%d is not between 0 and 5", i);
1452 else
1453 ix86_branch_cost = i;
804a8ee0 1454 }
804a8ee0 1455
f996902d
RH
1456 if (ix86_tls_dialect_string)
1457 {
1458 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1459 ix86_tls_dialect = TLS_DIALECT_GNU;
1460 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1461 ix86_tls_dialect = TLS_DIALECT_SUN;
1462 else
1463 error ("bad value (%s) for -mtls-dialect= switch",
1464 ix86_tls_dialect_string);
1465 }
1466
e9a25f70
JL
1467 /* Keep nonleaf frame pointers. */
1468 if (TARGET_OMIT_LEAF_FRAME_POINTER)
77a989d1 1469 flag_omit_frame_pointer = 1;
e075ae69
RH
1470
1471 /* If we're doing fast math, we don't care about comparison order
1472 wrt NaNs. This lets us use a shorter comparison sequence. */
de6c5979 1473 if (flag_unsafe_math_optimizations)
e075ae69
RH
1474 target_flags &= ~MASK_IEEE_FP;
1475
30c99a84
RH
1476 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1477 since the insns won't need emulation. */
1478 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1479 target_flags &= ~MASK_NO_FANCY_MATH_387;
1480
9e200aaf
KC
1481 /* Turn on SSE2 builtins for -msse3. */
1482 if (TARGET_SSE3)
22c7c85e
L
1483 target_flags |= MASK_SSE2;
1484
1485 /* Turn on SSE builtins for -msse2. */
1486 if (TARGET_SSE2)
1487 target_flags |= MASK_SSE;
1488
14f73b5a
JH
1489 if (TARGET_64BIT)
1490 {
1491 if (TARGET_ALIGN_DOUBLE)
c725bd79 1492 error ("-malign-double makes no sense in the 64bit mode");
14f73b5a 1493 if (TARGET_RTD)
c725bd79 1494 error ("-mrtd calling convention not supported in the 64bit mode");
14f73b5a 1495 /* Enable by default the SSE and MMX builtins. */
965f5423
JH
1496 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1497 ix86_fpmath = FPMATH_SSE;
14f73b5a 1498 }
965f5423 1499 else
a5b378d6
JH
1500 {
1501 ix86_fpmath = FPMATH_387;
1502 /* i386 ABI does not specify red zone. It still makes sense to use it
1503 when programmer takes care to stack from being destroyed. */
1504 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1505 target_flags |= MASK_NO_RED_ZONE;
1506 }
965f5423
JH
1507
1508 if (ix86_fpmath_string != 0)
1509 {
1510 if (! strcmp (ix86_fpmath_string, "387"))
1511 ix86_fpmath = FPMATH_387;
1512 else if (! strcmp (ix86_fpmath_string, "sse"))
1513 {
1514 if (!TARGET_SSE)
1515 {
1516 warning ("SSE instruction set disabled, using 387 arithmetics");
1517 ix86_fpmath = FPMATH_387;
1518 }
1519 else
1520 ix86_fpmath = FPMATH_SSE;
1521 }
1522 else if (! strcmp (ix86_fpmath_string, "387,sse")
1523 || ! strcmp (ix86_fpmath_string, "sse,387"))
1524 {
1525 if (!TARGET_SSE)
1526 {
1527 warning ("SSE instruction set disabled, using 387 arithmetics");
1528 ix86_fpmath = FPMATH_387;
1529 }
1530 else if (!TARGET_80387)
1531 {
1532 warning ("387 instruction set disabled, using SSE arithmetics");
1533 ix86_fpmath = FPMATH_SSE;
1534 }
1535 else
1536 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1537 }
fce5a9f2 1538 else
965f5423
JH
1539 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1540 }
14f73b5a 1541
a7180f70
BS
1542 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1543 on by -msse. */
1544 if (TARGET_SSE)
e37af218
RH
1545 {
1546 target_flags |= MASK_MMX;
1547 x86_prefetch_sse = true;
1548 }
c6036a37 1549
47f339cf
BS
1550 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1551 if (TARGET_3DNOW)
1552 {
1553 target_flags |= MASK_MMX;
d1f87653 1554 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
47f339cf
BS
1555 extensions it adds. */
1556 if (x86_3dnow_a & (1 << ix86_arch))
1557 target_flags |= MASK_3DNOW_A;
1558 }
9e555526 1559 if ((x86_accumulate_outgoing_args & TUNEMASK)
9ef1b13a 1560 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
c6036a37
JH
1561 && !optimize_size)
1562 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
623fe810
RH
1563
1564 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1565 {
1566 char *p;
1567 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1568 p = strchr (internal_label_prefix, 'X');
1569 internal_label_prefix_len = p - internal_label_prefix;
1570 *p = '\0';
1571 }
f5316dfe
MM
1572}
1573\f
32b5b1aa 1574void
b96a374d 1575optimization_options (int level, int size ATTRIBUTE_UNUSED)
32b5b1aa 1576{
e9a25f70
JL
1577 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1578 make the problem with not enough registers even worse. */
32b5b1aa
SC
1579#ifdef INSN_SCHEDULING
1580 if (level > 1)
1581 flag_schedule_insns = 0;
1582#endif
55ba61f3
JH
1583
1584 /* The default values of these switches depend on the TARGET_64BIT
1585 that is not known at this moment. Mark these values with 2 and
1586 let user the to override these. In case there is no command line option
1587 specifying them, we will set the defaults in override_options. */
1588 if (optimize >= 1)
1589 flag_omit_frame_pointer = 2;
1590 flag_pcc_struct_return = 2;
1591 flag_asynchronous_unwind_tables = 2;
32b5b1aa 1592}
b08de47e 1593\f
91d231cb
JM
1594/* Table of valid machine attributes. */
1595const struct attribute_spec ix86_attribute_table[] =
b08de47e 1596{
91d231cb 1597 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
b08de47e
MM
1598 /* Stdcall attribute says callee is responsible for popping arguments
1599 if they are not variable. */
91d231cb 1600 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
e91f04de
CH
1601 /* Fastcall attribute says callee is responsible for popping arguments
1602 if they are not variable. */
1603 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
91d231cb
JM
1604 /* Cdecl attribute says the callee is a normal C declaration */
1605 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
b08de47e 1606 /* Regparm attribute specifies how many integer arguments are to be
0f290768 1607 passed in registers. */
91d231cb
JM
1608 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1609#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
3da1eb0b
DS
1610 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1611 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1612 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
91d231cb 1613#endif
fe77449a
DR
1614 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1615 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
91d231cb
JM
1616 { NULL, 0, 0, false, false, false, NULL }
1617};
1618
5fbf0217
EB
1619/* Decide whether we can make a sibling call to a function. DECL is the
1620 declaration of the function being targeted by the call and EXP is the
1621 CALL_EXPR representing the call. */
4977bab6
ZW
1622
1623static bool
b96a374d 1624ix86_function_ok_for_sibcall (tree decl, tree exp)
4977bab6
ZW
1625{
1626 /* If we are generating position-independent code, we cannot sibcall
1627 optimize any indirect call, or a direct call to a global function,
1628 as the PLT requires %ebx be live. */
1629 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1630 return false;
1631
1632 /* If we are returning floats on the 80387 register stack, we cannot
1633 make a sibcall from a function that doesn't return a float to a
5fbf0217
EB
1634 function that does or, conversely, from a function that does return
1635 a float to a function that doesn't; the necessary stack adjustment
1636 would not be executed. */
4977bab6 1637 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
5fbf0217 1638 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
4977bab6
ZW
1639 return false;
1640
1641 /* If this call is indirect, we'll need to be able to use a call-clobbered
b96a374d 1642 register for the address of the target function. Make sure that all
4977bab6
ZW
1643 such registers are not used for passing parameters. */
1644 if (!decl && !TARGET_64BIT)
1645 {
e767b5be 1646 tree type;
4977bab6
ZW
1647
1648 /* We're looking at the CALL_EXPR, we need the type of the function. */
1649 type = TREE_OPERAND (exp, 0); /* pointer expression */
1650 type = TREE_TYPE (type); /* pointer type */
1651 type = TREE_TYPE (type); /* function type */
1652
e767b5be 1653 if (ix86_function_regparm (type, NULL) >= 3)
4977bab6
ZW
1654 {
1655 /* ??? Need to count the actual number of registers to be used,
1656 not the possible number of registers. Fix later. */
1657 return false;
1658 }
1659 }
1660
1661 /* Otherwise okay. That also includes certain types of indirect calls. */
1662 return true;
1663}
1664
e91f04de 1665/* Handle a "cdecl", "stdcall", or "fastcall" attribute;
91d231cb
JM
1666 arguments as in struct attribute_spec.handler. */
1667static tree
b96a374d
AJ
1668ix86_handle_cdecl_attribute (tree *node, tree name,
1669 tree args ATTRIBUTE_UNUSED,
1670 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
91d231cb
JM
1671{
1672 if (TREE_CODE (*node) != FUNCTION_TYPE
1673 && TREE_CODE (*node) != METHOD_TYPE
1674 && TREE_CODE (*node) != FIELD_DECL
1675 && TREE_CODE (*node) != TYPE_DECL)
b08de47e 1676 {
91d231cb
JM
1677 warning ("`%s' attribute only applies to functions",
1678 IDENTIFIER_POINTER (name));
1679 *no_add_attrs = true;
1680 }
e91f04de
CH
1681 else
1682 {
1683 if (is_attribute_p ("fastcall", name))
1684 {
1685 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1686 {
1687 error ("fastcall and stdcall attributes are not compatible");
1688 }
1689 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1690 {
1691 error ("fastcall and regparm attributes are not compatible");
1692 }
1693 }
1694 else if (is_attribute_p ("stdcall", name))
1695 {
1696 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1697 {
1698 error ("fastcall and stdcall attributes are not compatible");
1699 }
1700 }
1701 }
b08de47e 1702
91d231cb
JM
1703 if (TARGET_64BIT)
1704 {
1705 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1706 *no_add_attrs = true;
1707 }
b08de47e 1708
91d231cb
JM
1709 return NULL_TREE;
1710}
b08de47e 1711
91d231cb
JM
1712/* Handle a "regparm" attribute;
1713 arguments as in struct attribute_spec.handler. */
1714static tree
b96a374d
AJ
1715ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1716 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
91d231cb
JM
1717{
1718 if (TREE_CODE (*node) != FUNCTION_TYPE
1719 && TREE_CODE (*node) != METHOD_TYPE
1720 && TREE_CODE (*node) != FIELD_DECL
1721 && TREE_CODE (*node) != TYPE_DECL)
1722 {
1723 warning ("`%s' attribute only applies to functions",
1724 IDENTIFIER_POINTER (name));
1725 *no_add_attrs = true;
1726 }
1727 else
1728 {
1729 tree cst;
b08de47e 1730
91d231cb
JM
1731 cst = TREE_VALUE (args);
1732 if (TREE_CODE (cst) != INTEGER_CST)
1733 {
1734 warning ("`%s' attribute requires an integer constant argument",
1735 IDENTIFIER_POINTER (name));
1736 *no_add_attrs = true;
1737 }
1738 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1739 {
1740 warning ("argument to `%s' attribute larger than %d",
1741 IDENTIFIER_POINTER (name), REGPARM_MAX);
1742 *no_add_attrs = true;
1743 }
e91f04de
CH
1744
1745 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
e767b5be
JH
1746 {
1747 error ("fastcall and regparm attributes are not compatible");
1748 }
b08de47e
MM
1749 }
1750
91d231cb 1751 return NULL_TREE;
b08de47e
MM
1752}
1753
1754/* Return 0 if the attributes for two types are incompatible, 1 if they
1755 are compatible, and 2 if they are nearly compatible (which causes a
1756 warning to be generated). */
1757
8d8e52be 1758static int
b96a374d 1759ix86_comp_type_attributes (tree type1, tree type2)
b08de47e 1760{
0f290768 1761 /* Check for mismatch of non-default calling convention. */
27c38fbe 1762 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
afcfe58c
MM
1763
1764 if (TREE_CODE (type1) != FUNCTION_TYPE)
1765 return 1;
1766
b96a374d 1767 /* Check for mismatched fastcall types */
e91f04de
CH
1768 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1769 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
b96a374d 1770 return 0;
e91f04de 1771
afcfe58c 1772 /* Check for mismatched return types (cdecl vs stdcall). */
6093f019
RH
1773 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1774 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
265d94ac
MM
1775 return 0;
1776 if (ix86_function_regparm (type1, NULL)
1777 != ix86_function_regparm (type2, NULL))
afcfe58c 1778 return 0;
b08de47e
MM
1779 return 1;
1780}
b08de47e 1781\f
e767b5be
JH
1782/* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1783 DECL may be NULL when calling function indirectly
839a4992 1784 or considering a libcall. */
483ab821
MM
1785
1786static int
e767b5be 1787ix86_function_regparm (tree type, tree decl)
483ab821
MM
1788{
1789 tree attr;
e767b5be
JH
1790 int regparm = ix86_regparm;
1791 bool user_convention = false;
483ab821 1792
e767b5be
JH
1793 if (!TARGET_64BIT)
1794 {
1795 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1796 if (attr)
1797 {
1798 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1799 user_convention = true;
1800 }
1801
1802 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1803 {
1804 regparm = 2;
1805 user_convention = true;
1806 }
1807
1808 /* Use register calling convention for local functions when possible. */
1809 if (!TARGET_64BIT && !user_convention && decl
cb0bc263 1810 && flag_unit_at_a_time && !profile_flag)
e767b5be
JH
1811 {
1812 struct cgraph_local_info *i = cgraph_local_info (decl);
1813 if (i && i->local)
1814 {
1815 /* We can't use regparm(3) for nested functions as these use
1816 static chain pointer in third argument. */
1817 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1818 regparm = 2;
1819 else
1820 regparm = 3;
1821 }
1822 }
1823 }
1824 return regparm;
483ab821
MM
1825}
1826
fe9f516f
RH
1827/* Return true if EAX is live at the start of the function. Used by
1828 ix86_expand_prologue to determine if we need special help before
1829 calling allocate_stack_worker. */
1830
1831static bool
1832ix86_eax_live_at_start_p (void)
1833{
1834 /* Cheat. Don't bother working forward from ix86_function_regparm
1835 to the function type to whether an actual argument is located in
1836 eax. Instead just look at cfg info, which is still close enough
1837 to correct at this point. This gives false positives for broken
1838 functions that might use uninitialized data that happens to be
1839 allocated in eax, but who cares? */
1840 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1841}
1842
b08de47e
MM
1843/* Value is the number of bytes of arguments automatically
1844 popped when returning from a subroutine call.
1845 FUNDECL is the declaration node of the function (as a tree),
1846 FUNTYPE is the data type of the function (as a tree),
1847 or for a library call it is an identifier node for the subroutine name.
1848 SIZE is the number of bytes of arguments passed on the stack.
1849
1850 On the 80386, the RTD insn may be used to pop them if the number
1851 of args is fixed, but if the number is variable then the caller
1852 must pop them all. RTD can't be used for library calls now
1853 because the library is compiled with the Unix compiler.
1854 Use of RTD is a selectable option, since it is incompatible with
1855 standard Unix calling sequences. If the option is not selected,
1856 the caller must always pop the args.
1857
1858 The attribute stdcall is equivalent to RTD on a per module basis. */
1859
1860int
b96a374d 1861ix86_return_pops_args (tree fundecl, tree funtype, int size)
79325812 1862{
3345ee7d 1863 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
b08de47e 1864
43f3a59d 1865 /* Cdecl functions override -mrtd, and never pop the stack. */
e9a25f70 1866 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
79325812 1867
43f3a59d
KH
1868 /* Stdcall and fastcall functions will pop the stack if not
1869 variable args. */
e91f04de
CH
1870 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1871 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
698cdd84 1872 rtd = 1;
79325812 1873
698cdd84
SC
1874 if (rtd
1875 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
e9a25f70
JL
1876 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1877 == void_type_node)))
698cdd84
SC
1878 return size;
1879 }
79325812 1880
232b8f52 1881 /* Lose any fake structure return argument if it is passed on the stack. */
61f71b34 1882 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
0d7d98ee 1883 && !TARGET_64BIT)
232b8f52 1884 {
e767b5be 1885 int nregs = ix86_function_regparm (funtype, fundecl);
232b8f52
JJ
1886
1887 if (!nregs)
1888 return GET_MODE_SIZE (Pmode);
1889 }
1890
1891 return 0;
b08de47e 1892}
b08de47e
MM
1893\f
1894/* Argument support functions. */
1895
53c17031
JH
1896/* Return true when register may be used to pass function parameters. */
1897bool
b96a374d 1898ix86_function_arg_regno_p (int regno)
53c17031
JH
1899{
1900 int i;
1901 if (!TARGET_64BIT)
0333394e
JJ
1902 return (regno < REGPARM_MAX
1903 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
53c17031
JH
1904 if (SSE_REGNO_P (regno) && TARGET_SSE)
1905 return true;
1906 /* RAX is used as hidden argument to va_arg functions. */
1907 if (!regno)
1908 return true;
1909 for (i = 0; i < REGPARM_MAX; i++)
1910 if (regno == x86_64_int_parameter_registers[i])
1911 return true;
1912 return false;
1913}
1914
fe984136
RH
1915/* Return if we do not know how to pass TYPE solely in registers. */
1916
1917static bool
1918ix86_must_pass_in_stack (enum machine_mode mode, tree type)
1919{
1920 if (must_pass_in_stack_var_size_or_pad (mode, type))
1921 return true;
1922 return (!TARGET_64BIT && type && mode == TImode);
1923}
1924
b08de47e
MM
1925/* Initialize a variable CUM of type CUMULATIVE_ARGS
1926 for a call to a function whose data type is FNTYPE.
1927 For a library call, FNTYPE is 0. */
1928
1929void
b96a374d
AJ
1930init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1931 tree fntype, /* tree ptr for function decl */
1932 rtx libname, /* SYMBOL_REF of library name or 0 */
1933 tree fndecl)
b08de47e
MM
1934{
1935 static CUMULATIVE_ARGS zero_cum;
1936 tree param, next_param;
1937
1938 if (TARGET_DEBUG_ARG)
1939 {
1940 fprintf (stderr, "\ninit_cumulative_args (");
1941 if (fntype)
e9a25f70
JL
1942 fprintf (stderr, "fntype code = %s, ret code = %s",
1943 tree_code_name[(int) TREE_CODE (fntype)],
1944 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
b08de47e
MM
1945 else
1946 fprintf (stderr, "no fntype");
1947
1948 if (libname)
1949 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1950 }
1951
1952 *cum = zero_cum;
1953
1954 /* Set up the number of registers to use for passing arguments. */
e767b5be
JH
1955 if (fntype)
1956 cum->nregs = ix86_function_regparm (fntype, fndecl);
1957 else
1958 cum->nregs = ix86_regparm;
78fbfc4b
JB
1959 if (TARGET_SSE)
1960 cum->sse_nregs = SSE_REGPARM_MAX;
1961 if (TARGET_MMX)
1962 cum->mmx_nregs = MMX_REGPARM_MAX;
e1be55d0
JH
1963 cum->warn_sse = true;
1964 cum->warn_mmx = true;
53c17031 1965 cum->maybe_vaarg = false;
b08de47e 1966
e91f04de
CH
1967 /* Use ecx and edx registers if function has fastcall attribute */
1968 if (fntype && !TARGET_64BIT)
1969 {
1970 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1971 {
1972 cum->nregs = 2;
1973 cum->fastcall = 1;
1974 }
1975 }
1976
b08de47e
MM
1977 /* Determine if this function has variable arguments. This is
1978 indicated by the last argument being 'void_type_mode' if there
1979 are no variable arguments. If there are variable arguments, then
78fbfc4b 1980 we won't pass anything in registers in 32-bit mode. */
b08de47e 1981
78fbfc4b 1982 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
b08de47e
MM
1983 {
1984 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
e9a25f70 1985 param != 0; param = next_param)
b08de47e
MM
1986 {
1987 next_param = TREE_CHAIN (param);
e9a25f70 1988 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
53c17031
JH
1989 {
1990 if (!TARGET_64BIT)
e91f04de
CH
1991 {
1992 cum->nregs = 0;
e1be55d0
JH
1993 cum->sse_nregs = 0;
1994 cum->mmx_nregs = 0;
1995 cum->warn_sse = 0;
1996 cum->warn_mmx = 0;
e91f04de
CH
1997 cum->fastcall = 0;
1998 }
53c17031
JH
1999 cum->maybe_vaarg = true;
2000 }
b08de47e
MM
2001 }
2002 }
53c17031
JH
2003 if ((!fntype && !libname)
2004 || (fntype && !TYPE_ARG_TYPES (fntype)))
2005 cum->maybe_vaarg = 1;
b08de47e
MM
2006
2007 if (TARGET_DEBUG_ARG)
2008 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2009
2010 return;
2011}
2012
d1f87653 2013/* x86-64 register passing implementation. See x86-64 ABI for details. Goal
f710504c 2014 of this code is to classify each 8bytes of incoming argument by the register
53c17031
JH
2015 class and assign registers accordingly. */
2016
2017/* Return the union class of CLASS1 and CLASS2.
2018 See the x86-64 PS ABI for details. */
2019
2020static enum x86_64_reg_class
b96a374d 2021merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
53c17031
JH
2022{
2023 /* Rule #1: If both classes are equal, this is the resulting class. */
2024 if (class1 == class2)
2025 return class1;
2026
2027 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2028 the other class. */
2029 if (class1 == X86_64_NO_CLASS)
2030 return class2;
2031 if (class2 == X86_64_NO_CLASS)
2032 return class1;
2033
2034 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2035 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2036 return X86_64_MEMORY_CLASS;
2037
2038 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2039 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2040 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2041 return X86_64_INTEGERSI_CLASS;
2042 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2043 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2044 return X86_64_INTEGER_CLASS;
2045
2046 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
2047 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
2048 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
2049 return X86_64_MEMORY_CLASS;
2050
2051 /* Rule #6: Otherwise class SSE is used. */
2052 return X86_64_SSE_CLASS;
2053}
2054
2055/* Classify the argument of type TYPE and mode MODE.
2056 CLASSES will be filled by the register class used to pass each word
2057 of the operand. The number of words is returned. In case the parameter
2058 should be passed in memory, 0 is returned. As a special case for zero
2059 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2060
2061 BIT_OFFSET is used internally for handling records and specifies offset
2062 of the offset in bits modulo 256 to avoid overflow cases.
2063
2064 See the x86-64 PS ABI for details.
2065*/
2066
2067static int
b96a374d
AJ
2068classify_argument (enum machine_mode mode, tree type,
2069 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
53c17031 2070{
296e4ae8 2071 HOST_WIDE_INT bytes =
53c17031 2072 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
23327dae 2073 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
53c17031 2074
c60ee6f5
JH
2075 /* Variable sized entities are always passed/returned in memory. */
2076 if (bytes < 0)
2077 return 0;
2078
dafc5b82 2079 if (mode != VOIDmode
fe984136 2080 && targetm.calls.must_pass_in_stack (mode, type))
dafc5b82
JH
2081 return 0;
2082
53c17031
JH
2083 if (type && AGGREGATE_TYPE_P (type))
2084 {
2085 int i;
2086 tree field;
2087 enum x86_64_reg_class subclasses[MAX_CLASSES];
2088
2089 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2090 if (bytes > 16)
2091 return 0;
2092
2093 for (i = 0; i < words; i++)
2094 classes[i] = X86_64_NO_CLASS;
2095
2096 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2097 signalize memory class, so handle it as special case. */
2098 if (!words)
2099 {
2100 classes[0] = X86_64_NO_CLASS;
2101 return 1;
2102 }
2103
2104 /* Classify each field of record and merge classes. */
2105 if (TREE_CODE (type) == RECORD_TYPE)
2106 {
91ea38f9 2107 /* For classes first merge in the field of the subclasses. */
604a3205 2108 if (TYPE_BINFO (type) && BINFO_BASE_BINFOS (TYPE_BINFO (type)))
91ea38f9 2109 {
604a3205
NS
2110 tree bases = BINFO_BASE_BINFOS (TYPE_BINFO (type));
2111 int n_bases = BINFO_N_BASE_BINFOS (TYPE_BINFO (type));
91ea38f9
JH
2112 int i;
2113
2114 for (i = 0; i < n_bases; ++i)
2115 {
2116 tree binfo = TREE_VEC_ELT (bases, i);
2117 int num;
2118 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2119 tree type = BINFO_TYPE (binfo);
2120
2121 num = classify_argument (TYPE_MODE (type),
2122 type, subclasses,
2123 (offset + bit_offset) % 256);
2124 if (!num)
2125 return 0;
2126 for (i = 0; i < num; i++)
2127 {
db01f480 2128 int pos = (offset + (bit_offset % 64)) / 8 / 8;
91ea38f9
JH
2129 classes[i + pos] =
2130 merge_classes (subclasses[i], classes[i + pos]);
2131 }
2132 }
2133 }
43f3a59d 2134 /* And now merge the fields of structure. */
53c17031
JH
2135 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2136 {
2137 if (TREE_CODE (field) == FIELD_DECL)
2138 {
2139 int num;
2140
2141 /* Bitfields are always classified as integer. Handle them
2142 early, since later code would consider them to be
2143 misaligned integers. */
2144 if (DECL_BIT_FIELD (field))
2145 {
2146 for (i = int_bit_position (field) / 8 / 8;
2147 i < (int_bit_position (field)
2148 + tree_low_cst (DECL_SIZE (field), 0)
b96a374d 2149 + 63) / 8 / 8; i++)
53c17031
JH
2150 classes[i] =
2151 merge_classes (X86_64_INTEGER_CLASS,
2152 classes[i]);
2153 }
2154 else
2155 {
2156 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2157 TREE_TYPE (field), subclasses,
2158 (int_bit_position (field)
2159 + bit_offset) % 256);
2160 if (!num)
2161 return 0;
2162 for (i = 0; i < num; i++)
2163 {
2164 int pos =
db01f480 2165 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
53c17031
JH
2166 classes[i + pos] =
2167 merge_classes (subclasses[i], classes[i + pos]);
2168 }
2169 }
2170 }
2171 }
2172 }
2173 /* Arrays are handled as small records. */
2174 else if (TREE_CODE (type) == ARRAY_TYPE)
2175 {
2176 int num;
2177 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2178 TREE_TYPE (type), subclasses, bit_offset);
2179 if (!num)
2180 return 0;
2181
2182 /* The partial classes are now full classes. */
2183 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2184 subclasses[0] = X86_64_SSE_CLASS;
2185 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2186 subclasses[0] = X86_64_INTEGER_CLASS;
2187
2188 for (i = 0; i < words; i++)
2189 classes[i] = subclasses[i % num];
2190 }
2191 /* Unions are similar to RECORD_TYPE but offset is always 0. */
e4dbaed5
AS
2192 else if (TREE_CODE (type) == UNION_TYPE
2193 || TREE_CODE (type) == QUAL_UNION_TYPE)
53c17031 2194 {
91ea38f9 2195 /* For classes first merge in the field of the subclasses. */
604a3205 2196 if (TYPE_BINFO (type) && BINFO_BASE_BINFOS (TYPE_BINFO (type)))
91ea38f9 2197 {
604a3205
NS
2198 tree bases = BINFO_BASE_BINFOS (TYPE_BINFO (type));
2199 int n_bases = BINFO_N_BASE_BINFOS (TYPE_BINFO (type));
91ea38f9
JH
2200 int i;
2201
2202 for (i = 0; i < n_bases; ++i)
2203 {
2204 tree binfo = TREE_VEC_ELT (bases, i);
2205 int num;
2206 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2207 tree type = BINFO_TYPE (binfo);
2208
2209 num = classify_argument (TYPE_MODE (type),
2210 type, subclasses,
db01f480 2211 (offset + (bit_offset % 64)) % 256);
91ea38f9
JH
2212 if (!num)
2213 return 0;
2214 for (i = 0; i < num; i++)
2215 {
c16576e6 2216 int pos = (offset + (bit_offset % 64)) / 8 / 8;
91ea38f9
JH
2217 classes[i + pos] =
2218 merge_classes (subclasses[i], classes[i + pos]);
2219 }
2220 }
2221 }
53c17031
JH
2222 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2223 {
2224 if (TREE_CODE (field) == FIELD_DECL)
2225 {
2226 int num;
2227 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2228 TREE_TYPE (field), subclasses,
2229 bit_offset);
2230 if (!num)
2231 return 0;
2232 for (i = 0; i < num; i++)
2233 classes[i] = merge_classes (subclasses[i], classes[i]);
2234 }
2235 }
2236 }
448ec26c
WH
2237 else if (TREE_CODE (type) == SET_TYPE)
2238 {
2239 if (bytes <= 4)
2240 {
2241 classes[0] = X86_64_INTEGERSI_CLASS;
2242 return 1;
2243 }
2244 else if (bytes <= 8)
2245 {
2246 classes[0] = X86_64_INTEGER_CLASS;
2247 return 1;
2248 }
2249 else if (bytes <= 12)
2250 {
2251 classes[0] = X86_64_INTEGER_CLASS;
2252 classes[1] = X86_64_INTEGERSI_CLASS;
2253 return 2;
2254 }
2255 else
2256 {
2257 classes[0] = X86_64_INTEGER_CLASS;
2258 classes[1] = X86_64_INTEGER_CLASS;
2259 return 2;
2260 }
2261 }
53c17031
JH
2262 else
2263 abort ();
2264
2265 /* Final merger cleanup. */
2266 for (i = 0; i < words; i++)
2267 {
2268 /* If one class is MEMORY, everything should be passed in
2269 memory. */
2270 if (classes[i] == X86_64_MEMORY_CLASS)
2271 return 0;
2272
d6a7951f 2273 /* The X86_64_SSEUP_CLASS should be always preceded by
53c17031
JH
2274 X86_64_SSE_CLASS. */
2275 if (classes[i] == X86_64_SSEUP_CLASS
2276 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2277 classes[i] = X86_64_SSE_CLASS;
2278
d6a7951f 2279 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
53c17031
JH
2280 if (classes[i] == X86_64_X87UP_CLASS
2281 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2282 classes[i] = X86_64_SSE_CLASS;
2283 }
2284 return words;
2285 }
2286
2287 /* Compute alignment needed. We align all types to natural boundaries with
2288 exception of XFmode that is aligned to 64bits. */
2289 if (mode != VOIDmode && mode != BLKmode)
2290 {
2291 int mode_alignment = GET_MODE_BITSIZE (mode);
2292
2293 if (mode == XFmode)
2294 mode_alignment = 128;
2295 else if (mode == XCmode)
2296 mode_alignment = 256;
2c6b27c3
JH
2297 if (COMPLEX_MODE_P (mode))
2298 mode_alignment /= 2;
f5143c46 2299 /* Misaligned fields are always returned in memory. */
53c17031
JH
2300 if (bit_offset % mode_alignment)
2301 return 0;
2302 }
2303
9e9fb0ce
JB
2304 /* for V1xx modes, just use the base mode */
2305 if (VECTOR_MODE_P (mode)
2306 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
2307 mode = GET_MODE_INNER (mode);
2308
53c17031
JH
2309 /* Classification of atomic types. */
2310 switch (mode)
2311 {
2312 case DImode:
2313 case SImode:
2314 case HImode:
2315 case QImode:
2316 case CSImode:
2317 case CHImode:
2318 case CQImode:
2319 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2320 classes[0] = X86_64_INTEGERSI_CLASS;
2321 else
2322 classes[0] = X86_64_INTEGER_CLASS;
2323 return 1;
2324 case CDImode:
2325 case TImode:
2326 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2327 return 2;
2328 case CTImode:
9e9fb0ce 2329 return 0;
53c17031
JH
2330 case SFmode:
2331 if (!(bit_offset % 64))
2332 classes[0] = X86_64_SSESF_CLASS;
2333 else
2334 classes[0] = X86_64_SSE_CLASS;
2335 return 1;
2336 case DFmode:
2337 classes[0] = X86_64_SSEDF_CLASS;
2338 return 1;
f8a1ebc6 2339 case XFmode:
53c17031
JH
2340 classes[0] = X86_64_X87_CLASS;
2341 classes[1] = X86_64_X87UP_CLASS;
2342 return 2;
f8a1ebc6 2343 case TFmode:
9e9fb0ce
JB
2344 classes[0] = X86_64_SSE_CLASS;
2345 classes[1] = X86_64_SSEUP_CLASS;
53c17031
JH
2346 return 2;
2347 case SCmode:
2348 classes[0] = X86_64_SSE_CLASS;
2349 return 1;
9e9fb0ce
JB
2350 case DCmode:
2351 classes[0] = X86_64_SSEDF_CLASS;
2352 classes[1] = X86_64_SSEDF_CLASS;
2353 return 2;
2354 case XCmode:
2355 case TCmode:
2356 /* These modes are larger than 16 bytes. */
2357 return 0;
e95d6b23
JH
2358 case V4SFmode:
2359 case V4SImode:
495333a6
JH
2360 case V16QImode:
2361 case V8HImode:
2362 case V2DFmode:
2363 case V2DImode:
e95d6b23
JH
2364 classes[0] = X86_64_SSE_CLASS;
2365 classes[1] = X86_64_SSEUP_CLASS;
2366 return 2;
2367 case V2SFmode:
2368 case V2SImode:
2369 case V4HImode:
2370 case V8QImode:
9e9fb0ce
JB
2371 classes[0] = X86_64_SSE_CLASS;
2372 return 1;
53c17031 2373 case BLKmode:
e95d6b23 2374 case VOIDmode:
53c17031
JH
2375 return 0;
2376 default:
9e9fb0ce
JB
2377 if (VECTOR_MODE_P (mode))
2378 {
2379 if (bytes > 16)
2380 return 0;
2381 if (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT)
2382 {
2383 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2384 classes[0] = X86_64_INTEGERSI_CLASS;
2385 else
2386 classes[0] = X86_64_INTEGER_CLASS;
2387 classes[1] = X86_64_INTEGER_CLASS;
2388 return 1 + (bytes > 8);
2389 }
2390 }
53c17031
JH
2391 abort ();
2392 }
2393}
2394
2395/* Examine the argument and return set number of register required in each
f5143c46 2396 class. Return 0 iff parameter should be passed in memory. */
53c17031 2397static int
b96a374d
AJ
2398examine_argument (enum machine_mode mode, tree type, int in_return,
2399 int *int_nregs, int *sse_nregs)
53c17031
JH
2400{
2401 enum x86_64_reg_class class[MAX_CLASSES];
2402 int n = classify_argument (mode, type, class, 0);
2403
2404 *int_nregs = 0;
2405 *sse_nregs = 0;
2406 if (!n)
2407 return 0;
2408 for (n--; n >= 0; n--)
2409 switch (class[n])
2410 {
2411 case X86_64_INTEGER_CLASS:
2412 case X86_64_INTEGERSI_CLASS:
2413 (*int_nregs)++;
2414 break;
2415 case X86_64_SSE_CLASS:
2416 case X86_64_SSESF_CLASS:
2417 case X86_64_SSEDF_CLASS:
2418 (*sse_nregs)++;
2419 break;
2420 case X86_64_NO_CLASS:
2421 case X86_64_SSEUP_CLASS:
2422 break;
2423 case X86_64_X87_CLASS:
2424 case X86_64_X87UP_CLASS:
2425 if (!in_return)
2426 return 0;
2427 break;
2428 case X86_64_MEMORY_CLASS:
2429 abort ();
2430 }
2431 return 1;
2432}
2433/* Construct container for the argument used by GCC interface. See
2434 FUNCTION_ARG for the detailed description. */
2435static rtx
b96a374d
AJ
2436construct_container (enum machine_mode mode, tree type, int in_return,
2437 int nintregs, int nsseregs, const int * intreg,
2438 int sse_regno)
53c17031
JH
2439{
2440 enum machine_mode tmpmode;
2441 int bytes =
2442 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2443 enum x86_64_reg_class class[MAX_CLASSES];
2444 int n;
2445 int i;
2446 int nexps = 0;
2447 int needed_sseregs, needed_intregs;
2448 rtx exp[MAX_CLASSES];
2449 rtx ret;
2450
2451 n = classify_argument (mode, type, class, 0);
2452 if (TARGET_DEBUG_ARG)
2453 {
2454 if (!n)
2455 fprintf (stderr, "Memory class\n");
2456 else
2457 {
2458 fprintf (stderr, "Classes:");
2459 for (i = 0; i < n; i++)
2460 {
2461 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2462 }
2463 fprintf (stderr, "\n");
2464 }
2465 }
2466 if (!n)
2467 return NULL;
2468 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2469 return NULL;
2470 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2471 return NULL;
2472
2473 /* First construct simple cases. Avoid SCmode, since we want to use
2474 single register to pass this type. */
2475 if (n == 1 && mode != SCmode)
2476 switch (class[0])
2477 {
2478 case X86_64_INTEGER_CLASS:
2479 case X86_64_INTEGERSI_CLASS:
2480 return gen_rtx_REG (mode, intreg[0]);
2481 case X86_64_SSE_CLASS:
2482 case X86_64_SSESF_CLASS:
2483 case X86_64_SSEDF_CLASS:
2484 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2485 case X86_64_X87_CLASS:
2486 return gen_rtx_REG (mode, FIRST_STACK_REG);
2487 case X86_64_NO_CLASS:
2488 /* Zero sized array, struct or class. */
2489 return NULL;
2490 default:
2491 abort ();
2492 }
2c6b27c3
JH
2493 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2494 && mode != BLKmode)
e95d6b23 2495 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
53c17031
JH
2496 if (n == 2
2497 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
f8a1ebc6 2498 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
53c17031
JH
2499 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2500 && class[1] == X86_64_INTEGER_CLASS
f8a1ebc6 2501 && (mode == CDImode || mode == TImode || mode == TFmode)
53c17031
JH
2502 && intreg[0] + 1 == intreg[1])
2503 return gen_rtx_REG (mode, intreg[0]);
2504 if (n == 4
2505 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2c6b27c3
JH
2506 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS
2507 && mode != BLKmode)
f8a1ebc6 2508 return gen_rtx_REG (XCmode, FIRST_STACK_REG);
53c17031
JH
2509
2510 /* Otherwise figure out the entries of the PARALLEL. */
2511 for (i = 0; i < n; i++)
2512 {
2513 switch (class[i])
2514 {
2515 case X86_64_NO_CLASS:
2516 break;
2517 case X86_64_INTEGER_CLASS:
2518 case X86_64_INTEGERSI_CLASS:
d1f87653 2519 /* Merge TImodes on aligned occasions here too. */
53c17031
JH
2520 if (i * 8 + 8 > bytes)
2521 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2522 else if (class[i] == X86_64_INTEGERSI_CLASS)
2523 tmpmode = SImode;
2524 else
2525 tmpmode = DImode;
2526 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2527 if (tmpmode == BLKmode)
2528 tmpmode = DImode;
2529 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2530 gen_rtx_REG (tmpmode, *intreg),
2531 GEN_INT (i*8));
2532 intreg++;
2533 break;
2534 case X86_64_SSESF_CLASS:
2535 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2536 gen_rtx_REG (SFmode,
2537 SSE_REGNO (sse_regno)),
2538 GEN_INT (i*8));
2539 sse_regno++;
2540 break;
2541 case X86_64_SSEDF_CLASS:
2542 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2543 gen_rtx_REG (DFmode,
2544 SSE_REGNO (sse_regno)),
2545 GEN_INT (i*8));
2546 sse_regno++;
2547 break;
2548 case X86_64_SSE_CLASS:
12f5c45e
JH
2549 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2550 tmpmode = TImode;
53c17031
JH
2551 else
2552 tmpmode = DImode;
2553 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2554 gen_rtx_REG (tmpmode,
2555 SSE_REGNO (sse_regno)),
2556 GEN_INT (i*8));
12f5c45e
JH
2557 if (tmpmode == TImode)
2558 i++;
53c17031
JH
2559 sse_regno++;
2560 break;
2561 default:
2562 abort ();
2563 }
2564 }
2565 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2566 for (i = 0; i < nexps; i++)
2567 XVECEXP (ret, 0, i) = exp [i];
2568 return ret;
2569}
2570
b08de47e
MM
2571/* Update the data in CUM to advance over an argument
2572 of mode MODE and data type TYPE.
2573 (TYPE is null for libcalls where that information may not be available.) */
2574
2575void
b96a374d
AJ
2576function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
2577 enum machine_mode mode, /* current arg mode */
2578 tree type, /* type of the argument or 0 if lib support */
2579 int named) /* whether or not the argument was named */
b08de47e 2580{
5ac9118e
KG
2581 int bytes =
2582 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
2583 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2584
2585 if (TARGET_DEBUG_ARG)
2586 fprintf (stderr,
bcf17554
JH
2587 "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
2588 words, cum->words, cum->nregs, cum->sse_nregs, GET_MODE_NAME (mode), named);
53c17031 2589 if (TARGET_64BIT)
b08de47e 2590 {
53c17031
JH
2591 int int_nregs, sse_nregs;
2592 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2593 cum->words += words;
2594 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
82a127a9 2595 {
53c17031
JH
2596 cum->nregs -= int_nregs;
2597 cum->sse_nregs -= sse_nregs;
2598 cum->regno += int_nregs;
2599 cum->sse_regno += sse_nregs;
82a127a9 2600 }
53c17031
JH
2601 else
2602 cum->words += words;
b08de47e 2603 }
a4f31c00 2604 else
82a127a9 2605 {
bcf17554
JH
2606 if (TARGET_SSE && SSE_REG_MODE_P (mode)
2607 && (!type || !AGGREGATE_TYPE_P (type)))
53c17031
JH
2608 {
2609 cum->sse_words += words;
2610 cum->sse_nregs -= 1;
2611 cum->sse_regno += 1;
2612 if (cum->sse_nregs <= 0)
2613 {
2614 cum->sse_nregs = 0;
2615 cum->sse_regno = 0;
2616 }
2617 }
bcf17554
JH
2618 else if (TARGET_MMX && MMX_REG_MODE_P (mode)
2619 && (!type || !AGGREGATE_TYPE_P (type)))
2620 {
2621 cum->mmx_words += words;
2622 cum->mmx_nregs -= 1;
2623 cum->mmx_regno += 1;
2624 if (cum->mmx_nregs <= 0)
2625 {
2626 cum->mmx_nregs = 0;
2627 cum->mmx_regno = 0;
2628 }
2629 }
53c17031 2630 else
82a127a9 2631 {
53c17031
JH
2632 cum->words += words;
2633 cum->nregs -= words;
2634 cum->regno += words;
2635
2636 if (cum->nregs <= 0)
2637 {
2638 cum->nregs = 0;
2639 cum->regno = 0;
2640 }
82a127a9
CM
2641 }
2642 }
b08de47e
MM
2643 return;
2644}
2645
2646/* Define where to put the arguments to a function.
2647 Value is zero to push the argument on the stack,
2648 or a hard register in which to store the argument.
2649
2650 MODE is the argument's machine mode.
2651 TYPE is the data type of the argument (as a tree).
2652 This is null for libcalls where that information may
2653 not be available.
2654 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2655 the preceding args and about the function being called.
2656 NAMED is nonzero if this argument is a named parameter
2657 (otherwise it is an extra parameter matching an ellipsis). */
2658
07933f72 2659rtx
b96a374d
AJ
2660function_arg (CUMULATIVE_ARGS *cum, /* current arg information */
2661 enum machine_mode mode, /* current arg mode */
2662 tree type, /* type of the argument or 0 if lib support */
2663 int named) /* != 0 for normal args, == 0 for ... args */
b08de47e
MM
2664{
2665 rtx ret = NULL_RTX;
5ac9118e
KG
2666 int bytes =
2667 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e 2668 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
bcf17554 2669 static bool warnedsse, warnedmmx;
b08de47e 2670
5bdc5878 2671 /* Handle a hidden AL argument containing number of registers for varargs
53c17031
JH
2672 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2673 any AL settings. */
32ee7d1d 2674 if (mode == VOIDmode)
b08de47e 2675 {
53c17031
JH
2676 if (TARGET_64BIT)
2677 return GEN_INT (cum->maybe_vaarg
2678 ? (cum->sse_nregs < 0
2679 ? SSE_REGPARM_MAX
2680 : cum->sse_regno)
2681 : -1);
2682 else
2683 return constm1_rtx;
b08de47e 2684 }
53c17031
JH
2685 if (TARGET_64BIT)
2686 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2687 &x86_64_int_parameter_registers [cum->regno],
2688 cum->sse_regno);
2689 else
2690 switch (mode)
2691 {
2692 /* For now, pass fp/complex values on the stack. */
2693 default:
2694 break;
2695
2696 case BLKmode:
8d454008
RH
2697 if (bytes < 0)
2698 break;
5efb1046 2699 /* FALLTHRU */
53c17031
JH
2700 case DImode:
2701 case SImode:
2702 case HImode:
2703 case QImode:
2704 if (words <= cum->nregs)
b96a374d
AJ
2705 {
2706 int regno = cum->regno;
2707
2708 /* Fastcall allocates the first two DWORD (SImode) or
2709 smaller arguments to ECX and EDX. */
2710 if (cum->fastcall)
2711 {
2712 if (mode == BLKmode || mode == DImode)
2713 break;
2714
2715 /* ECX not EAX is the first allocated register. */
2716 if (regno == 0)
e767b5be 2717 regno = 2;
b96a374d
AJ
2718 }
2719 ret = gen_rtx_REG (mode, regno);
2720 }
53c17031
JH
2721 break;
2722 case TImode:
bcf17554
JH
2723 case V16QImode:
2724 case V8HImode:
2725 case V4SImode:
2726 case V2DImode:
2727 case V4SFmode:
2728 case V2DFmode:
2729 if (!type || !AGGREGATE_TYPE_P (type))
2730 {
78fbfc4b 2731 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
bcf17554
JH
2732 {
2733 warnedsse = true;
2734 warning ("SSE vector argument without SSE enabled "
2735 "changes the ABI");
2736 }
2737 if (cum->sse_nregs)
2738 ret = gen_rtx_REG (mode, cum->sse_regno + FIRST_SSE_REG);
2739 }
2740 break;
2741 case V8QImode:
2742 case V4HImode:
2743 case V2SImode:
2744 case V2SFmode:
2745 if (!type || !AGGREGATE_TYPE_P (type))
2746 {
e1be55d0 2747 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
bcf17554
JH
2748 {
2749 warnedmmx = true;
2750 warning ("MMX vector argument without MMX enabled "
2751 "changes the ABI");
2752 }
2753 if (cum->mmx_nregs)
2754 ret = gen_rtx_REG (mode, cum->mmx_regno + FIRST_MMX_REG);
2755 }
53c17031
JH
2756 break;
2757 }
b08de47e
MM
2758
2759 if (TARGET_DEBUG_ARG)
2760 {
2761 fprintf (stderr,
91ea38f9 2762 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
b08de47e
MM
2763 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2764
2765 if (ret)
91ea38f9 2766 print_simple_rtl (stderr, ret);
b08de47e
MM
2767 else
2768 fprintf (stderr, ", stack");
2769
2770 fprintf (stderr, " )\n");
2771 }
2772
2773 return ret;
2774}
53c17031 2775
09b2e78d
ZD
2776/* A C expression that indicates when an argument must be passed by
2777 reference. If nonzero for an argument, a copy of that argument is
2778 made in memory and a pointer to the argument is passed instead of
2779 the argument itself. The pointer is passed in whatever way is
2780 appropriate for passing a pointer to that type. */
2781
8cd5a4e0
RH
2782static bool
2783ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2784 enum machine_mode mode ATTRIBUTE_UNUSED,
2785 tree type, bool named ATTRIBUTE_UNUSED)
09b2e78d
ZD
2786{
2787 if (!TARGET_64BIT)
2788 return 0;
2789
2790 if (type && int_size_in_bytes (type) == -1)
2791 {
2792 if (TARGET_DEBUG_ARG)
2793 fprintf (stderr, "function_arg_pass_by_reference\n");
2794 return 1;
2795 }
2796
2797 return 0;
2798}
2799
8b978a57
JH
2800/* Return true when TYPE should be 128bit aligned for 32bit argument passing
2801 ABI */
2802static bool
b96a374d 2803contains_128bit_aligned_vector_p (tree type)
8b978a57
JH
2804{
2805 enum machine_mode mode = TYPE_MODE (type);
2806 if (SSE_REG_MODE_P (mode)
2807 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2808 return true;
2809 if (TYPE_ALIGN (type) < 128)
2810 return false;
2811
2812 if (AGGREGATE_TYPE_P (type))
2813 {
2a43945f 2814 /* Walk the aggregates recursively. */
8b978a57
JH
2815 if (TREE_CODE (type) == RECORD_TYPE
2816 || TREE_CODE (type) == UNION_TYPE
2817 || TREE_CODE (type) == QUAL_UNION_TYPE)
2818 {
2819 tree field;
2820
604a3205 2821 if (TYPE_BINFO (type) && BINFO_BASE_BINFOS (TYPE_BINFO (type)))
8b978a57 2822 {
604a3205
NS
2823 tree bases = BINFO_BASE_BINFOS (TYPE_BINFO (type));
2824 int n_bases = BINFO_N_BASE_BINFOS (TYPE_BINFO (type));
8b978a57
JH
2825 int i;
2826
2827 for (i = 0; i < n_bases; ++i)
2828 {
2829 tree binfo = TREE_VEC_ELT (bases, i);
2830 tree type = BINFO_TYPE (binfo);
2831
2832 if (contains_128bit_aligned_vector_p (type))
2833 return true;
2834 }
2835 }
43f3a59d 2836 /* And now merge the fields of structure. */
8b978a57
JH
2837 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2838 {
2839 if (TREE_CODE (field) == FIELD_DECL
2840 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2841 return true;
2842 }
2843 }
2844 /* Just for use if some languages passes arrays by value. */
2845 else if (TREE_CODE (type) == ARRAY_TYPE)
2846 {
2847 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2848 return true;
2849 }
2850 else
2851 abort ();
2852 }
2853 return false;
2854}
2855
bb498ea3
AH
2856/* Gives the alignment boundary, in bits, of an argument with the
2857 specified mode and type. */
53c17031
JH
2858
2859int
b96a374d 2860ix86_function_arg_boundary (enum machine_mode mode, tree type)
53c17031
JH
2861{
2862 int align;
53c17031
JH
2863 if (type)
2864 align = TYPE_ALIGN (type);
2865 else
2866 align = GET_MODE_ALIGNMENT (mode);
2867 if (align < PARM_BOUNDARY)
2868 align = PARM_BOUNDARY;
8b978a57
JH
2869 if (!TARGET_64BIT)
2870 {
2871 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2872 make an exception for SSE modes since these require 128bit
b96a374d 2873 alignment.
8b978a57
JH
2874
2875 The handling here differs from field_alignment. ICC aligns MMX
2876 arguments to 4 byte boundaries, while structure fields are aligned
2877 to 8 byte boundaries. */
78fbfc4b
JB
2878 if (!TARGET_SSE)
2879 align = PARM_BOUNDARY;
2880 else if (!type)
8b978a57
JH
2881 {
2882 if (!SSE_REG_MODE_P (mode))
2883 align = PARM_BOUNDARY;
2884 }
2885 else
2886 {
2887 if (!contains_128bit_aligned_vector_p (type))
2888 align = PARM_BOUNDARY;
2889 }
8b978a57 2890 }
53c17031
JH
2891 if (align > 128)
2892 align = 128;
2893 return align;
2894}
2895
2896/* Return true if N is a possible register number of function value. */
2897bool
b96a374d 2898ix86_function_value_regno_p (int regno)
53c17031
JH
2899{
2900 if (!TARGET_64BIT)
2901 {
2902 return ((regno) == 0
2903 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2904 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2905 }
2906 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2907 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2908 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2909}
2910
2911/* Define how to find the value returned by a function.
2912 VALTYPE is the data type of the value (as a tree).
2913 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2914 otherwise, FUNC is 0. */
2915rtx
b96a374d 2916ix86_function_value (tree valtype)
53c17031
JH
2917{
2918 if (TARGET_64BIT)
2919 {
2920 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2921 REGPARM_MAX, SSE_REGPARM_MAX,
2922 x86_64_int_return_registers, 0);
d1f87653
KH
2923 /* For zero sized structures, construct_container return NULL, but we need
2924 to keep rest of compiler happy by returning meaningful value. */
53c17031
JH
2925 if (!ret)
2926 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2927 return ret;
2928 }
2929 else
b069de3b
SS
2930 return gen_rtx_REG (TYPE_MODE (valtype),
2931 ix86_value_regno (TYPE_MODE (valtype)));
53c17031
JH
2932}
2933
f5143c46 2934/* Return false iff type is returned in memory. */
53c17031 2935int
b96a374d 2936ix86_return_in_memory (tree type)
53c17031 2937{
a30b6839
RH
2938 int needed_intregs, needed_sseregs, size;
2939 enum machine_mode mode = TYPE_MODE (type);
2940
53c17031 2941 if (TARGET_64BIT)
a30b6839
RH
2942 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2943
2944 if (mode == BLKmode)
2945 return 1;
2946
2947 size = int_size_in_bytes (type);
2948
2949 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
2950 return 0;
2951
2952 if (VECTOR_MODE_P (mode) || mode == TImode)
53c17031 2953 {
a30b6839
RH
2954 /* User-created vectors small enough to fit in EAX. */
2955 if (size < 8)
5e062767 2956 return 0;
a30b6839
RH
2957
2958 /* MMX/3dNow values are returned on the stack, since we've
2959 got to EMMS/FEMMS before returning. */
2960 if (size == 8)
53c17031 2961 return 1;
a30b6839 2962
0397ac35 2963 /* SSE values are returned in XMM0, except when it doesn't exist. */
a30b6839 2964 if (size == 16)
0397ac35 2965 return (TARGET_SSE ? 0 : 1);
53c17031 2966 }
a30b6839 2967
cf2348cb 2968 if (mode == XFmode)
a30b6839 2969 return 0;
f8a1ebc6 2970
a30b6839
RH
2971 if (size > 12)
2972 return 1;
2973 return 0;
53c17031
JH
2974}
2975
0397ac35
RH
2976/* When returning SSE vector types, we have a choice of either
2977 (1) being abi incompatible with a -march switch, or
2978 (2) generating an error.
2979 Given no good solution, I think the safest thing is one warning.
2980 The user won't be able to use -Werror, but....
2981
2982 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
2983 called in response to actually generating a caller or callee that
2984 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
2985 via aggregate_value_p for general type probing from tree-ssa. */
2986
2987static rtx
2988ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
2989{
2990 static bool warned;
2991
2992 if (!TARGET_SSE && type && !warned)
2993 {
2994 /* Look at the return type of the function, not the function type. */
2995 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
2996
2997 if (mode == TImode
2998 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
2999 {
3000 warned = true;
3001 warning ("SSE vector return without SSE enabled changes the ABI");
3002 }
3003 }
3004
3005 return NULL;
3006}
3007
53c17031
JH
3008/* Define how to find the value returned by a library function
3009 assuming the value has mode MODE. */
3010rtx
b96a374d 3011ix86_libcall_value (enum machine_mode mode)
53c17031
JH
3012{
3013 if (TARGET_64BIT)
3014 {
3015 switch (mode)
3016 {
f8a1ebc6
JH
3017 case SFmode:
3018 case SCmode:
3019 case DFmode:
3020 case DCmode:
9e9fb0ce 3021 case TFmode:
f8a1ebc6
JH
3022 return gen_rtx_REG (mode, FIRST_SSE_REG);
3023 case XFmode:
f8a1ebc6 3024 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
9e9fb0ce 3025 case XCmode:
f8a1ebc6
JH
3026 case TCmode:
3027 return NULL;
3028 default:
3029 return gen_rtx_REG (mode, 0);
53c17031
JH
3030 }
3031 }
3032 else
f8a1ebc6 3033 return gen_rtx_REG (mode, ix86_value_regno (mode));
b069de3b
SS
3034}
3035
3036/* Given a mode, return the register to use for a return value. */
3037
3038static int
b96a374d 3039ix86_value_regno (enum machine_mode mode)
b069de3b 3040{
a30b6839 3041 /* Floating point return values in %st(0). */
b069de3b
SS
3042 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
3043 return FIRST_FLOAT_REG;
a30b6839
RH
3044 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3045 we prevent this case when sse is not available. */
3046 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
b069de3b 3047 return FIRST_SSE_REG;
a30b6839 3048 /* Everything else in %eax. */
b069de3b 3049 return 0;
53c17031 3050}
ad919812
JH
3051\f
3052/* Create the va_list data type. */
53c17031 3053
c35d187f
RH
3054static tree
3055ix86_build_builtin_va_list (void)
ad919812
JH
3056{
3057 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
53c17031 3058
ad919812
JH
3059 /* For i386 we use plain pointer to argument area. */
3060 if (!TARGET_64BIT)
3061 return build_pointer_type (char_type_node);
3062
f1e639b1 3063 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
ad919812
JH
3064 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3065
fce5a9f2 3066 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
ad919812 3067 unsigned_type_node);
fce5a9f2 3068 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
ad919812
JH
3069 unsigned_type_node);
3070 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
3071 ptr_type_node);
3072 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
3073 ptr_type_node);
3074
3075 DECL_FIELD_CONTEXT (f_gpr) = record;
3076 DECL_FIELD_CONTEXT (f_fpr) = record;
3077 DECL_FIELD_CONTEXT (f_ovf) = record;
3078 DECL_FIELD_CONTEXT (f_sav) = record;
3079
3080 TREE_CHAIN (record) = type_decl;
3081 TYPE_NAME (record) = type_decl;
3082 TYPE_FIELDS (record) = f_gpr;
3083 TREE_CHAIN (f_gpr) = f_fpr;
3084 TREE_CHAIN (f_fpr) = f_ovf;
3085 TREE_CHAIN (f_ovf) = f_sav;
3086
3087 layout_type (record);
3088
3089 /* The correct type is an array type of one element. */
3090 return build_array_type (record, build_index_type (size_zero_node));
3091}
3092
a0524eb3 3093/* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
ad919812 3094
a0524eb3 3095static void
b96a374d
AJ
3096ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3097 tree type, int *pretend_size ATTRIBUTE_UNUSED,
3098 int no_rtl)
ad919812
JH
3099{
3100 CUMULATIVE_ARGS next_cum;
3101 rtx save_area = NULL_RTX, mem;
3102 rtx label;
3103 rtx label_ref;
3104 rtx tmp_reg;
3105 rtx nsse_reg;
3106 int set;
3107 tree fntype;
3108 int stdarg_p;
3109 int i;
3110
3111 if (!TARGET_64BIT)
3112 return;
3113
3114 /* Indicate to allocate space on the stack for varargs save area. */
3115 ix86_save_varrargs_registers = 1;
3116
5474eed5
JH
3117 cfun->stack_alignment_needed = 128;
3118
ad919812
JH
3119 fntype = TREE_TYPE (current_function_decl);
3120 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3121 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3122 != void_type_node));
3123
3124 /* For varargs, we do not want to skip the dummy va_dcl argument.
3125 For stdargs, we do want to skip the last named argument. */
3126 next_cum = *cum;
3127 if (stdarg_p)
3128 function_arg_advance (&next_cum, mode, type, 1);
3129
3130 if (!no_rtl)
3131 save_area = frame_pointer_rtx;
3132
3133 set = get_varargs_alias_set ();
3134
3135 for (i = next_cum.regno; i < ix86_regparm; i++)
3136 {
3137 mem = gen_rtx_MEM (Pmode,
3138 plus_constant (save_area, i * UNITS_PER_WORD));
0692acba 3139 set_mem_alias_set (mem, set);
ad919812
JH
3140 emit_move_insn (mem, gen_rtx_REG (Pmode,
3141 x86_64_int_parameter_registers[i]));
3142 }
3143
3144 if (next_cum.sse_nregs)
3145 {
3146 /* Now emit code to save SSE registers. The AX parameter contains number
d1f87653 3147 of SSE parameter registers used to call this function. We use
ad919812
JH
3148 sse_prologue_save insn template that produces computed jump across
3149 SSE saves. We need some preparation work to get this working. */
3150
3151 label = gen_label_rtx ();
3152 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3153
3154 /* Compute address to jump to :
3155 label - 5*eax + nnamed_sse_arguments*5 */
3156 tmp_reg = gen_reg_rtx (Pmode);
3157 nsse_reg = gen_reg_rtx (Pmode);
3158 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3159 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
44cf5b6a 3160 gen_rtx_MULT (Pmode, nsse_reg,
ad919812
JH
3161 GEN_INT (4))));
3162 if (next_cum.sse_regno)
3163 emit_move_insn
3164 (nsse_reg,
3165 gen_rtx_CONST (DImode,
3166 gen_rtx_PLUS (DImode,
3167 label_ref,
3168 GEN_INT (next_cum.sse_regno * 4))));
3169 else
3170 emit_move_insn (nsse_reg, label_ref);
3171 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3172
3173 /* Compute address of memory block we save into. We always use pointer
3174 pointing 127 bytes after first byte to store - this is needed to keep
3175 instruction size limited by 4 bytes. */
3176 tmp_reg = gen_reg_rtx (Pmode);
8ac61af7
RK
3177 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3178 plus_constant (save_area,
3179 8 * REGPARM_MAX + 127)));
ad919812 3180 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
14f73b5a 3181 set_mem_alias_set (mem, set);
8ac61af7 3182 set_mem_align (mem, BITS_PER_WORD);
ad919812
JH
3183
3184 /* And finally do the dirty job! */
8ac61af7
RK
3185 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3186 GEN_INT (next_cum.sse_regno), label));
ad919812
JH
3187 }
3188
3189}
3190
3191/* Implement va_start. */
3192
3193void
b96a374d 3194ix86_va_start (tree valist, rtx nextarg)
ad919812
JH
3195{
3196 HOST_WIDE_INT words, n_gpr, n_fpr;
3197 tree f_gpr, f_fpr, f_ovf, f_sav;
3198 tree gpr, fpr, ovf, sav, t;
3199
3200 /* Only 64bit target needs something special. */
3201 if (!TARGET_64BIT)
3202 {
e5faf155 3203 std_expand_builtin_va_start (valist, nextarg);
ad919812
JH
3204 return;
3205 }
3206
3207 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3208 f_fpr = TREE_CHAIN (f_gpr);
3209 f_ovf = TREE_CHAIN (f_fpr);
3210 f_sav = TREE_CHAIN (f_ovf);
3211
3212 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
44de5aeb
RK
3213 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3214 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3215 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3216 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
ad919812
JH
3217
3218 /* Count number of gp and fp argument registers used. */
3219 words = current_function_args_info.words;
3220 n_gpr = current_function_args_info.regno;
3221 n_fpr = current_function_args_info.sse_regno;
3222
3223 if (TARGET_DEBUG_ARG)
3224 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
b531087a 3225 (int) words, (int) n_gpr, (int) n_fpr);
ad919812
JH
3226
3227 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3228 build_int_2 (n_gpr * 8, 0));
3229 TREE_SIDE_EFFECTS (t) = 1;
3230 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3231
3232 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3233 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
3234 TREE_SIDE_EFFECTS (t) = 1;
3235 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3236
3237 /* Find the overflow area. */
3238 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3239 if (words != 0)
3240 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3241 build_int_2 (words * UNITS_PER_WORD, 0));
3242 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3243 TREE_SIDE_EFFECTS (t) = 1;
3244 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3245
3246 /* Find the register save area.
3247 Prologue of the function save it right above stack frame. */
3248 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3249 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3250 TREE_SIDE_EFFECTS (t) = 1;
3251 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3252}
3253
3254/* Implement va_arg. */
cd3ce9b4 3255
23a60a04
JM
3256tree
3257ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
cd3ce9b4 3258{
cd3ce9b4
JM
3259 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3260 tree f_gpr, f_fpr, f_ovf, f_sav;
3261 tree gpr, fpr, ovf, sav, t;
3262 int size, rsize;
3263 tree lab_false, lab_over = NULL_TREE;
3264 tree addr, t2;
3265 rtx container;
3266 int indirect_p = 0;
3267 tree ptrtype;
3268
3269 /* Only 64bit target needs something special. */
3270 if (!TARGET_64BIT)
23a60a04 3271 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
cd3ce9b4
JM
3272
3273 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3274 f_fpr = TREE_CHAIN (f_gpr);
3275 f_ovf = TREE_CHAIN (f_fpr);
3276 f_sav = TREE_CHAIN (f_ovf);
3277
3278 valist = build_fold_indirect_ref (valist);
44de5aeb
RK
3279 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3280 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3281 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3282 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
cd3ce9b4 3283
08b0dc1b
RH
3284 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
3285 if (indirect_p)
3286 type = build_pointer_type (type);
cd3ce9b4 3287 size = int_size_in_bytes (type);
cd3ce9b4
JM
3288 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3289
3290 container = construct_container (TYPE_MODE (type), type, 0,
3291 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3292 /*
3293 * Pull the value out of the saved registers ...
3294 */
3295
3296 addr = create_tmp_var (ptr_type_node, "addr");
3297 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3298
3299 if (container)
3300 {
3301 int needed_intregs, needed_sseregs;
e52a6df5 3302 bool need_temp;
cd3ce9b4
JM
3303 tree int_addr, sse_addr;
3304
3305 lab_false = create_artificial_label ();
3306 lab_over = create_artificial_label ();
3307
3308 examine_argument (TYPE_MODE (type), type, 0,
3309 &needed_intregs, &needed_sseregs);
3310
e52a6df5
JB
3311 need_temp = (!REG_P (container)
3312 && ((needed_intregs && TYPE_ALIGN (type) > 64)
3313 || TYPE_ALIGN (type) > 128));
cd3ce9b4
JM
3314
3315 /* In case we are passing structure, verify that it is consecutive block
3316 on the register save area. If not we need to do moves. */
3317 if (!need_temp && !REG_P (container))
3318 {
3319 /* Verify that all registers are strictly consecutive */
3320 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3321 {
3322 int i;
3323
3324 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3325 {
3326 rtx slot = XVECEXP (container, 0, i);
3327 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3328 || INTVAL (XEXP (slot, 1)) != i * 16)
3329 need_temp = 1;
3330 }
3331 }
3332 else
3333 {
3334 int i;
3335
3336 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3337 {
3338 rtx slot = XVECEXP (container, 0, i);
3339 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3340 || INTVAL (XEXP (slot, 1)) != i * 8)
3341 need_temp = 1;
3342 }
3343 }
3344 }
3345 if (!need_temp)
3346 {
3347 int_addr = addr;
3348 sse_addr = addr;
3349 }
3350 else
3351 {
3352 int_addr = create_tmp_var (ptr_type_node, "int_addr");
3353 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
3354 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
3355 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
3356 }
3357 /* First ensure that we fit completely in registers. */
3358 if (needed_intregs)
3359 {
3360 t = build_int_2 ((REGPARM_MAX - needed_intregs + 1) * 8, 0);
3361 TREE_TYPE (t) = TREE_TYPE (gpr);
3362 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
3363 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3364 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3365 gimplify_and_add (t, pre_p);
3366 }
3367 if (needed_sseregs)
3368 {
3369 t = build_int_2 ((SSE_REGPARM_MAX - needed_sseregs + 1) * 16
3370 + REGPARM_MAX * 8, 0);
3371 TREE_TYPE (t) = TREE_TYPE (fpr);
3372 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
3373 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3374 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3375 gimplify_and_add (t, pre_p);
3376 }
3377
3378 /* Compute index to start of area used for integer regs. */
3379 if (needed_intregs)
3380 {
3381 /* int_addr = gpr + sav; */
3382 t = build2 (PLUS_EXPR, ptr_type_node, sav, gpr);
3383 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
3384 gimplify_and_add (t, pre_p);
3385 }
3386 if (needed_sseregs)
3387 {
3388 /* sse_addr = fpr + sav; */
3389 t = build2 (PLUS_EXPR, ptr_type_node, sav, fpr);
3390 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
3391 gimplify_and_add (t, pre_p);
3392 }
3393 if (need_temp)
3394 {
3395 int i;
3396 tree temp = create_tmp_var (type, "va_arg_tmp");
3397
3398 /* addr = &temp; */
3399 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
3400 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
3401 gimplify_and_add (t, pre_p);
3402
3403 for (i = 0; i < XVECLEN (container, 0); i++)
3404 {
3405 rtx slot = XVECEXP (container, 0, i);
3406 rtx reg = XEXP (slot, 0);
3407 enum machine_mode mode = GET_MODE (reg);
3408 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
3409 tree addr_type = build_pointer_type (piece_type);
3410 tree src_addr, src;
3411 int src_offset;
3412 tree dest_addr, dest;
3413
3414 if (SSE_REGNO_P (REGNO (reg)))
3415 {
3416 src_addr = sse_addr;
3417 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3418 }
3419 else
3420 {
3421 src_addr = int_addr;
3422 src_offset = REGNO (reg) * 8;
3423 }
23a60a04 3424 src_addr = fold_convert (addr_type, src_addr);
cd3ce9b4
JM
3425 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
3426 size_int (src_offset)));
3427 src = build_fold_indirect_ref (src_addr);
3428
23a60a04 3429 dest_addr = fold_convert (addr_type, addr);
cd3ce9b4
JM
3430 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
3431 size_int (INTVAL (XEXP (slot, 1)))));
3432 dest = build_fold_indirect_ref (dest_addr);
3433
3434 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
3435 gimplify_and_add (t, pre_p);
3436 }
3437 }
3438
3439 if (needed_intregs)
3440 {
3441 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3442 build_int_2 (needed_intregs * 8, 0));
3443 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3444 gimplify_and_add (t, pre_p);
3445 }
3446 if (needed_sseregs)
3447 {
3448 t =
3449 build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3450 build_int_2 (needed_sseregs * 16, 0));
3451 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3452 gimplify_and_add (t, pre_p);
3453 }
3454
3455 t = build1 (GOTO_EXPR, void_type_node, lab_over);
3456 gimplify_and_add (t, pre_p);
3457
3458 t = build1 (LABEL_EXPR, void_type_node, lab_false);
3459 append_to_statement_list (t, pre_p);
3460 }
3461
3462 /* ... otherwise out of the overflow area. */
3463
3464 /* Care for on-stack alignment if needed. */
3465 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3466 t = ovf;
3467 else
3468 {
3469 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3470 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3471 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3472 }
3473 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
3474
3475 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
3476 gimplify_and_add (t2, pre_p);
3477
3478 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
3479 build_int_2 (rsize * UNITS_PER_WORD, 0));
3480 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3481 gimplify_and_add (t, pre_p);
3482
3483 if (container)
3484 {
3485 t = build1 (LABEL_EXPR, void_type_node, lab_over);
3486 append_to_statement_list (t, pre_p);
3487 }
3488
3489 ptrtype = build_pointer_type (type);
23a60a04 3490 addr = fold_convert (ptrtype, addr);
cd3ce9b4
JM
3491
3492 if (indirect_p)
3493 addr = build_fold_indirect_ref (addr);
23a60a04 3494 return build_fold_indirect_ref (addr);
cd3ce9b4 3495}
ad919812 3496\f
c3c637e3
GS
3497/* Return nonzero if OP is either a i387 or SSE fp register. */
3498int
b96a374d 3499any_fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
c3c637e3
GS
3500{
3501 return ANY_FP_REG_P (op);
3502}
3503
3504/* Return nonzero if OP is an i387 fp register. */
3505int
b96a374d 3506fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
c3c637e3
GS
3507{
3508 return FP_REG_P (op);
3509}
3510
3511/* Return nonzero if OP is a non-fp register_operand. */
3512int
b96a374d 3513register_and_not_any_fp_reg_operand (rtx op, enum machine_mode mode)
c3c637e3
GS
3514{
3515 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3516}
3517
40b982a9 3518/* Return nonzero if OP is a register operand other than an
c3c637e3
GS
3519 i387 fp register. */
3520int
b96a374d 3521register_and_not_fp_reg_operand (rtx op, enum machine_mode mode)
c3c637e3
GS
3522{
3523 return register_operand (op, mode) && !FP_REG_P (op);
3524}
3525
7dd4b4a3
JH
3526/* Return nonzero if OP is general operand representable on x86_64. */
3527
3528int
b96a374d 3529x86_64_general_operand (rtx op, enum machine_mode mode)
7dd4b4a3
JH
3530{
3531 if (!TARGET_64BIT)
3532 return general_operand (op, mode);
3533 if (nonimmediate_operand (op, mode))
3534 return 1;
c05dbe81 3535 return x86_64_sign_extended_value (op);
7dd4b4a3
JH
3536}
3537
3538/* Return nonzero if OP is general operand representable on x86_64
d6a7951f 3539 as either sign extended or zero extended constant. */
7dd4b4a3
JH
3540
3541int
b96a374d 3542x86_64_szext_general_operand (rtx op, enum machine_mode mode)
7dd4b4a3
JH
3543{
3544 if (!TARGET_64BIT)
3545 return general_operand (op, mode);
3546 if (nonimmediate_operand (op, mode))
3547 return 1;
c05dbe81 3548 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
7dd4b4a3
JH
3549}
3550
3551/* Return nonzero if OP is nonmemory operand representable on x86_64. */
3552
3553int
b96a374d 3554x86_64_nonmemory_operand (rtx op, enum machine_mode mode)
7dd4b4a3
JH
3555{
3556 if (!TARGET_64BIT)
3557 return nonmemory_operand (op, mode);
3558 if (register_operand (op, mode))
3559 return 1;
c05dbe81 3560 return x86_64_sign_extended_value (op);
7dd4b4a3
JH
3561}
3562
3563/* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3564
3565int
b96a374d 3566x86_64_movabs_operand (rtx op, enum machine_mode mode)
7dd4b4a3
JH
3567{
3568 if (!TARGET_64BIT || !flag_pic)
3569 return nonmemory_operand (op, mode);
c05dbe81 3570 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
7dd4b4a3
JH
3571 return 1;
3572 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3573 return 1;
3574 return 0;
3575}
3576
7e6dc358
JJ
3577/* Return nonzero if OPNUM's MEM should be matched
3578 in movabs* patterns. */
3579
3580int
3581ix86_check_movabs (rtx insn, int opnum)
3582{
3583 rtx set, mem;
3584
3585 set = PATTERN (insn);
3586 if (GET_CODE (set) == PARALLEL)
3587 set = XVECEXP (set, 0, 0);
3588 if (GET_CODE (set) != SET)
3589 abort ();
3590 mem = XEXP (set, opnum);
3591 while (GET_CODE (mem) == SUBREG)
3592 mem = SUBREG_REG (mem);
3593 if (GET_CODE (mem) != MEM)
3594 abort ();
3595 return (volatile_ok || !MEM_VOLATILE_P (mem));
3596}
3597
7dd4b4a3
JH
3598/* Return nonzero if OP is nonmemory operand representable on x86_64. */
3599
3600int
b96a374d 3601x86_64_szext_nonmemory_operand (rtx op, enum machine_mode mode)
7dd4b4a3
JH
3602{
3603 if (!TARGET_64BIT)
3604 return nonmemory_operand (op, mode);
3605 if (register_operand (op, mode))
3606 return 1;
c05dbe81 3607 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
7dd4b4a3
JH
3608}
3609
3610/* Return nonzero if OP is immediate operand representable on x86_64. */
3611
3612int
b96a374d 3613x86_64_immediate_operand (rtx op, enum machine_mode mode)
7dd4b4a3
JH
3614{
3615 if (!TARGET_64BIT)
3616 return immediate_operand (op, mode);
c05dbe81 3617 return x86_64_sign_extended_value (op);
7dd4b4a3
JH
3618}
3619
3620/* Return nonzero if OP is immediate operand representable on x86_64. */
3621
3622int
b96a374d 3623x86_64_zext_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7dd4b4a3
JH
3624{
3625 return x86_64_zero_extended_value (op);
3626}
3627
794a292d
JJ
3628/* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3629 for shift & compare patterns, as shifting by 0 does not change flags),
3630 else return zero. */
3631
3632int
b96a374d 3633const_int_1_31_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
794a292d
JJ
3634{
3635 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3636}
3637
e075ae69
RH
3638/* Returns 1 if OP is either a symbol reference or a sum of a symbol
3639 reference and a constant. */
b08de47e
MM
3640
3641int
8d531ab9 3642symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
2a2ab3f9 3643{
e075ae69 3644 switch (GET_CODE (op))
2a2ab3f9 3645 {
e075ae69
RH
3646 case SYMBOL_REF:
3647 case LABEL_REF:
3648 return 1;
3649
3650 case CONST:
3651 op = XEXP (op, 0);
3652 if (GET_CODE (op) == SYMBOL_REF
3653 || GET_CODE (op) == LABEL_REF
3654 || (GET_CODE (op) == UNSPEC
8ee41eaf
RH
3655 && (XINT (op, 1) == UNSPEC_GOT
3656 || XINT (op, 1) == UNSPEC_GOTOFF
3657 || XINT (op, 1) == UNSPEC_GOTPCREL)))
e075ae69
RH
3658 return 1;
3659 if (GET_CODE (op) != PLUS
3660 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3661 return 0;
3662
3663 op = XEXP (op, 0);
3664 if (GET_CODE (op) == SYMBOL_REF
3665 || GET_CODE (op) == LABEL_REF)
3666 return 1;
3667 /* Only @GOTOFF gets offsets. */
3668 if (GET_CODE (op) != UNSPEC
8ee41eaf 3669 || XINT (op, 1) != UNSPEC_GOTOFF)
e075ae69
RH
3670 return 0;
3671
3672 op = XVECEXP (op, 0, 0);
3673 if (GET_CODE (op) == SYMBOL_REF
3674 || GET_CODE (op) == LABEL_REF)
3675 return 1;
3676 return 0;
3677
3678 default:
3679 return 0;
2a2ab3f9
JVA
3680 }
3681}
2a2ab3f9 3682
e075ae69 3683/* Return true if the operand contains a @GOT or @GOTOFF reference. */
3b3c6a3f 3684
e075ae69 3685int
8d531ab9 3686pic_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69 3687{
6eb791fc
JH
3688 if (GET_CODE (op) != CONST)
3689 return 0;
3690 op = XEXP (op, 0);
3691 if (TARGET_64BIT)
3692 {
a0c8285b
JH
3693 if (GET_CODE (op) == UNSPEC
3694 && XINT (op, 1) == UNSPEC_GOTPCREL)
3695 return 1;
3696 if (GET_CODE (op) == PLUS
fdacb904
JH
3697 && GET_CODE (XEXP (op, 0)) == UNSPEC
3698 && XINT (XEXP (op, 0), 1) == UNSPEC_GOTPCREL)
6eb791fc
JH
3699 return 1;
3700 }
fce5a9f2 3701 else
2a2ab3f9 3702 {
e075ae69
RH
3703 if (GET_CODE (op) == UNSPEC)
3704 return 1;
3705 if (GET_CODE (op) != PLUS
3706 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3707 return 0;
3708 op = XEXP (op, 0);
3709 if (GET_CODE (op) == UNSPEC)
3710 return 1;
2a2ab3f9 3711 }
e075ae69 3712 return 0;
2a2ab3f9 3713}
2a2ab3f9 3714
623fe810
RH
3715/* Return true if OP is a symbolic operand that resolves locally. */
3716
3717static int
b96a374d 3718local_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
623fe810 3719{
623fe810
RH
3720 if (GET_CODE (op) == CONST
3721 && GET_CODE (XEXP (op, 0)) == PLUS
c05dbe81 3722 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
623fe810
RH
3723 op = XEXP (XEXP (op, 0), 0);
3724
8bfb45f8
JJ
3725 if (GET_CODE (op) == LABEL_REF)
3726 return 1;
3727
623fe810
RH
3728 if (GET_CODE (op) != SYMBOL_REF)
3729 return 0;
3730
2ae5ae57 3731 if (SYMBOL_REF_LOCAL_P (op))
623fe810
RH
3732 return 1;
3733
3734 /* There is, however, a not insubstantial body of code in the rest of
fce5a9f2 3735 the compiler that assumes it can just stick the results of
623fe810
RH
3736 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3737 /* ??? This is a hack. Should update the body of the compiler to
fb49053f 3738 always create a DECL an invoke targetm.encode_section_info. */
623fe810
RH
3739 if (strncmp (XSTR (op, 0), internal_label_prefix,
3740 internal_label_prefix_len) == 0)
3741 return 1;
3742
3743 return 0;
3744}
3745
2ae5ae57 3746/* Test for various thread-local symbols. */
f996902d
RH
3747
3748int
8d531ab9 3749tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
f996902d 3750{
f996902d
RH
3751 if (GET_CODE (op) != SYMBOL_REF)
3752 return 0;
2ae5ae57 3753 return SYMBOL_REF_TLS_MODEL (op);
f996902d
RH
3754}
3755
2ae5ae57 3756static inline int
b96a374d 3757tls_symbolic_operand_1 (rtx op, enum tls_model kind)
f996902d 3758{
f996902d
RH
3759 if (GET_CODE (op) != SYMBOL_REF)
3760 return 0;
2ae5ae57 3761 return SYMBOL_REF_TLS_MODEL (op) == kind;
f996902d
RH
3762}
3763
3764int
8d531ab9 3765global_dynamic_symbolic_operand (rtx op,
b96a374d 3766 enum machine_mode mode ATTRIBUTE_UNUSED)
f996902d
RH
3767{
3768 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3769}
3770
3771int
8d531ab9 3772local_dynamic_symbolic_operand (rtx op,
b96a374d 3773 enum machine_mode mode ATTRIBUTE_UNUSED)
f996902d
RH
3774{
3775 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3776}
3777
3778int
8d531ab9 3779initial_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
f996902d
RH
3780{
3781 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3782}
3783
3784int
8d531ab9 3785local_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
f996902d
RH
3786{
3787 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3788}
3789
28d52ffb
RH
3790/* Test for a valid operand for a call instruction. Don't allow the
3791 arg pointer register or virtual regs since they may decay into
3792 reg + const, which the patterns can't handle. */
2a2ab3f9 3793
e075ae69 3794int
b96a374d 3795call_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69 3796{
e075ae69
RH
3797 /* Disallow indirect through a virtual register. This leads to
3798 compiler aborts when trying to eliminate them. */
3799 if (GET_CODE (op) == REG
3800 && (op == arg_pointer_rtx
564d80f4 3801 || op == frame_pointer_rtx
e075ae69
RH
3802 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3803 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3804 return 0;
2a2ab3f9 3805
28d52ffb
RH
3806 /* Disallow `call 1234'. Due to varying assembler lameness this
3807 gets either rejected or translated to `call .+1234'. */
3808 if (GET_CODE (op) == CONST_INT)
3809 return 0;
3810
cbbf65e0
RH
3811 /* Explicitly allow SYMBOL_REF even if pic. */
3812 if (GET_CODE (op) == SYMBOL_REF)
e075ae69 3813 return 1;
2a2ab3f9 3814
cbbf65e0
RH
3815 /* Otherwise we can allow any general_operand in the address. */
3816 return general_operand (op, Pmode);
e075ae69 3817}
79325812 3818
4977bab6
ZW
3819/* Test for a valid operand for a call instruction. Don't allow the
3820 arg pointer register or virtual regs since they may decay into
3821 reg + const, which the patterns can't handle. */
3822
3823int
b96a374d 3824sibcall_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4977bab6
ZW
3825{
3826 /* Disallow indirect through a virtual register. This leads to
3827 compiler aborts when trying to eliminate them. */
3828 if (GET_CODE (op) == REG
3829 && (op == arg_pointer_rtx
3830 || op == frame_pointer_rtx
3831 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3832 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3833 return 0;
3834
3835 /* Explicitly allow SYMBOL_REF even if pic. */
3836 if (GET_CODE (op) == SYMBOL_REF)
3837 return 1;
3838
3839 /* Otherwise we can only allow register operands. */
3840 return register_operand (op, Pmode);
3841}
3842
e075ae69 3843int
b96a374d 3844constant_call_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69 3845{
eaf19aba
JJ
3846 if (GET_CODE (op) == CONST
3847 && GET_CODE (XEXP (op, 0)) == PLUS
3848 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3849 op = XEXP (XEXP (op, 0), 0);
e1ff012c 3850 return GET_CODE (op) == SYMBOL_REF;
e075ae69 3851}
2a2ab3f9 3852
e075ae69 3853/* Match exactly zero and one. */
e9a25f70 3854
0f290768 3855int
8d531ab9 3856const0_operand (rtx op, enum machine_mode mode)
e075ae69
RH
3857{
3858 return op == CONST0_RTX (mode);
3859}
e9a25f70 3860
0f290768 3861int
8d531ab9 3862const1_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69
RH
3863{
3864 return op == const1_rtx;
3865}
2a2ab3f9 3866
e075ae69 3867/* Match 2, 4, or 8. Used for leal multiplicands. */
e9a25f70 3868
e075ae69 3869int
8d531ab9 3870const248_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69
RH
3871{
3872 return (GET_CODE (op) == CONST_INT
3873 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3874}
e9a25f70 3875
ebe75517 3876int
8d531ab9 3877const_0_to_3_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
ebe75517
JH
3878{
3879 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 4);
3880}
3881
3882int
8d531ab9 3883const_0_to_7_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
ebe75517
JH
3884{
3885 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 8);
3886}
3887
3888int
8d531ab9 3889const_0_to_15_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
ebe75517
JH
3890{
3891 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 16);
3892}
3893
3894int
8d531ab9 3895const_0_to_255_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
ebe75517
JH
3896{
3897 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 256);
3898}
3899
3900
d1f87653 3901/* True if this is a constant appropriate for an increment or decrement. */
81fd0956 3902
e075ae69 3903int
8d531ab9 3904incdec_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69 3905{
f5143c46 3906 /* On Pentium4, the inc and dec operations causes extra dependency on flag
b4e89e2d 3907 registers, since carry flag is not set. */
89c43c0a 3908 if ((TARGET_PENTIUM4 || TARGET_NOCONA) && !optimize_size)
b4e89e2d 3909 return 0;
2b1c08f5 3910 return op == const1_rtx || op == constm1_rtx;
e075ae69 3911}
2a2ab3f9 3912
371bc54b
JH
3913/* Return nonzero if OP is acceptable as operand of DImode shift
3914 expander. */
3915
3916int
b96a374d 3917shiftdi_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
371bc54b
JH
3918{
3919 if (TARGET_64BIT)
3920 return nonimmediate_operand (op, mode);
3921 else
3922 return register_operand (op, mode);
3923}
3924
0f290768 3925/* Return false if this is the stack pointer, or any other fake
e075ae69
RH
3926 register eliminable to the stack pointer. Otherwise, this is
3927 a register operand.
2a2ab3f9 3928
e075ae69
RH
3929 This is used to prevent esp from being used as an index reg.
3930 Which would only happen in pathological cases. */
5f1ec3e6 3931
e075ae69 3932int
8d531ab9 3933reg_no_sp_operand (rtx op, enum machine_mode mode)
e075ae69
RH
3934{
3935 rtx t = op;
3936 if (GET_CODE (t) == SUBREG)
3937 t = SUBREG_REG (t);
564d80f4 3938 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
e075ae69 3939 return 0;
2a2ab3f9 3940
e075ae69 3941 return register_operand (op, mode);
2a2ab3f9 3942}
b840bfb0 3943
915119a5 3944int
8d531ab9 3945mmx_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
915119a5
BS
3946{
3947 return MMX_REG_P (op);
3948}
3949
2c5a510c
RH
3950/* Return false if this is any eliminable register. Otherwise
3951 general_operand. */
3952
3953int
8d531ab9 3954general_no_elim_operand (rtx op, enum machine_mode mode)
2c5a510c
RH
3955{
3956 rtx t = op;
3957 if (GET_CODE (t) == SUBREG)
3958 t = SUBREG_REG (t);
3959 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3960 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3961 || t == virtual_stack_dynamic_rtx)
3962 return 0;
1020a5ab
RH
3963 if (REG_P (t)
3964 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3965 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3966 return 0;
2c5a510c
RH
3967
3968 return general_operand (op, mode);
3969}
3970
3971/* Return false if this is any eliminable register. Otherwise
3972 register_operand or const_int. */
3973
3974int
8d531ab9 3975nonmemory_no_elim_operand (rtx op, enum machine_mode mode)
2c5a510c
RH
3976{
3977 rtx t = op;
3978 if (GET_CODE (t) == SUBREG)
3979 t = SUBREG_REG (t);
3980 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3981 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3982 || t == virtual_stack_dynamic_rtx)
3983 return 0;
3984
3985 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3986}
3987
7ec70495
JH
3988/* Return false if this is any eliminable register or stack register,
3989 otherwise work like register_operand. */
3990
3991int
8d531ab9 3992index_register_operand (rtx op, enum machine_mode mode)
7ec70495
JH
3993{
3994 rtx t = op;
3995 if (GET_CODE (t) == SUBREG)
3996 t = SUBREG_REG (t);
3997 if (!REG_P (t))
3998 return 0;
3999 if (t == arg_pointer_rtx
4000 || t == frame_pointer_rtx
4001 || t == virtual_incoming_args_rtx
4002 || t == virtual_stack_vars_rtx
4003 || t == virtual_stack_dynamic_rtx
4004 || REGNO (t) == STACK_POINTER_REGNUM)
4005 return 0;
4006
4007 return general_operand (op, mode);
4008}
4009
e075ae69 4010/* Return true if op is a Q_REGS class register. */
b840bfb0 4011
e075ae69 4012int
8d531ab9 4013q_regs_operand (rtx op, enum machine_mode mode)
b840bfb0 4014{
e075ae69
RH
4015 if (mode != VOIDmode && GET_MODE (op) != mode)
4016 return 0;
4017 if (GET_CODE (op) == SUBREG)
4018 op = SUBREG_REG (op);
7799175f 4019 return ANY_QI_REG_P (op);
0f290768 4020}
b840bfb0 4021
4977bab6
ZW
4022/* Return true if op is an flags register. */
4023
4024int
8d531ab9 4025flags_reg_operand (rtx op, enum machine_mode mode)
4977bab6
ZW
4026{
4027 if (mode != VOIDmode && GET_MODE (op) != mode)
4028 return 0;
4029 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
4030}
4031
e075ae69 4032/* Return true if op is a NON_Q_REGS class register. */
b840bfb0 4033
e075ae69 4034int
8d531ab9 4035non_q_regs_operand (rtx op, enum machine_mode mode)
e075ae69
RH
4036{
4037 if (mode != VOIDmode && GET_MODE (op) != mode)
4038 return 0;
4039 if (GET_CODE (op) == SUBREG)
4040 op = SUBREG_REG (op);
4041 return NON_QI_REG_P (op);
0f290768 4042}
b840bfb0 4043
4977bab6 4044int
b96a374d
AJ
4045zero_extended_scalar_load_operand (rtx op,
4046 enum machine_mode mode ATTRIBUTE_UNUSED)
4977bab6
ZW
4047{
4048 unsigned n_elts;
4049 if (GET_CODE (op) != MEM)
4050 return 0;
4051 op = maybe_get_pool_constant (op);
4052 if (!op)
4053 return 0;
4054 if (GET_CODE (op) != CONST_VECTOR)
4055 return 0;
4056 n_elts =
4057 (GET_MODE_SIZE (GET_MODE (op)) /
4058 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
4059 for (n_elts--; n_elts > 0; n_elts--)
4060 {
4061 rtx elt = CONST_VECTOR_ELT (op, n_elts);
4062 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
4063 return 0;
4064 }
4065 return 1;
4066}
4067
fdc4b40b
JH
4068/* Return 1 when OP is operand acceptable for standard SSE move. */
4069int
b96a374d 4070vector_move_operand (rtx op, enum machine_mode mode)
fdc4b40b
JH
4071{
4072 if (nonimmediate_operand (op, mode))
4073 return 1;
4074 if (GET_MODE (op) != mode && mode != VOIDmode)
4075 return 0;
4076 return (op == CONST0_RTX (GET_MODE (op)));
4077}
4078
74dc3e94
RH
4079/* Return true if op if a valid address, and does not contain
4080 a segment override. */
4081
4082int
8d531ab9 4083no_seg_address_operand (rtx op, enum machine_mode mode)
74dc3e94
RH
4084{
4085 struct ix86_address parts;
4086
4087 if (! address_operand (op, mode))
4088 return 0;
4089
4090 if (! ix86_decompose_address (op, &parts))
4091 abort ();
4092
4093 return parts.seg == SEG_DEFAULT;
4094}
4095
915119a5
BS
4096/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
4097 insns. */
4098int
b96a374d 4099sse_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
915119a5
BS
4100{
4101 enum rtx_code code = GET_CODE (op);
a46d1d38
JH
4102 switch (code)
4103 {
4104 /* Operations supported directly. */
4105 case EQ:
4106 case LT:
4107 case LE:
4108 case UNORDERED:
4109 case NE:
4110 case UNGE:
4111 case UNGT:
4112 case ORDERED:
4113 return 1;
4114 /* These are equivalent to ones above in non-IEEE comparisons. */
4115 case UNEQ:
4116 case UNLT:
4117 case UNLE:
4118 case LTGT:
4119 case GE:
4120 case GT:
4121 return !TARGET_IEEE_FP;
4122 default:
4123 return 0;
4124 }
915119a5 4125}
9076b9c1 4126/* Return 1 if OP is a valid comparison operator in valid mode. */
e075ae69 4127int
8d531ab9 4128ix86_comparison_operator (rtx op, enum machine_mode mode)
e075ae69 4129{
9076b9c1 4130 enum machine_mode inmode;
9a915772 4131 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
4132 if (mode != VOIDmode && GET_MODE (op) != mode)
4133 return 0;
ec8e098d 4134 if (!COMPARISON_P (op))
9a915772
JH
4135 return 0;
4136 inmode = GET_MODE (XEXP (op, 0));
4137
4138 if (inmode == CCFPmode || inmode == CCFPUmode)
4139 {
4140 enum rtx_code second_code, bypass_code;
4141 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4142 return (bypass_code == NIL && second_code == NIL);
4143 }
4144 switch (code)
3a3677ff
RH
4145 {
4146 case EQ: case NE:
3a3677ff 4147 return 1;
9076b9c1 4148 case LT: case GE:
7e08e190 4149 if (inmode == CCmode || inmode == CCGCmode
9076b9c1
JH
4150 || inmode == CCGOCmode || inmode == CCNOmode)
4151 return 1;
4152 return 0;
7e08e190 4153 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
7e08e190 4154 if (inmode == CCmode)
9076b9c1
JH
4155 return 1;
4156 return 0;
4157 case GT: case LE:
7e08e190 4158 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
9076b9c1
JH
4159 return 1;
4160 return 0;
3a3677ff
RH
4161 default:
4162 return 0;
4163 }
4164}
4165
e6e81735
JH
4166/* Return 1 if OP is a valid comparison operator testing carry flag
4167 to be set. */
4168int
8d531ab9 4169ix86_carry_flag_operator (rtx op, enum machine_mode mode)
e6e81735
JH
4170{
4171 enum machine_mode inmode;
4172 enum rtx_code code = GET_CODE (op);
4173
4174 if (mode != VOIDmode && GET_MODE (op) != mode)
4175 return 0;
ec8e098d 4176 if (!COMPARISON_P (op))
e6e81735
JH
4177 return 0;
4178 inmode = GET_MODE (XEXP (op, 0));
4179 if (GET_CODE (XEXP (op, 0)) != REG
4180 || REGNO (XEXP (op, 0)) != 17
4181 || XEXP (op, 1) != const0_rtx)
4182 return 0;
4183
4184 if (inmode == CCFPmode || inmode == CCFPUmode)
4185 {
4186 enum rtx_code second_code, bypass_code;
4187
4188 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4189 if (bypass_code != NIL || second_code != NIL)
4190 return 0;
4191 code = ix86_fp_compare_code_to_integer (code);
4192 }
4193 else if (inmode != CCmode)
4194 return 0;
4195 return code == LTU;
4196}
4197
9076b9c1 4198/* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3a3677ff 4199
9076b9c1 4200int
8d531ab9 4201fcmov_comparison_operator (rtx op, enum machine_mode mode)
3a3677ff 4202{
b62d22a2 4203 enum machine_mode inmode;
9a915772 4204 enum rtx_code code = GET_CODE (op);
e6e81735 4205
3a3677ff
RH
4206 if (mode != VOIDmode && GET_MODE (op) != mode)
4207 return 0;
ec8e098d 4208 if (!COMPARISON_P (op))
9a915772
JH
4209 return 0;
4210 inmode = GET_MODE (XEXP (op, 0));
4211 if (inmode == CCFPmode || inmode == CCFPUmode)
3a3677ff 4212 {
9a915772 4213 enum rtx_code second_code, bypass_code;
e6e81735 4214
9a915772
JH
4215 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4216 if (bypass_code != NIL || second_code != NIL)
4217 return 0;
4218 code = ix86_fp_compare_code_to_integer (code);
4219 }
4220 /* i387 supports just limited amount of conditional codes. */
4221 switch (code)
4222 {
4223 case LTU: case GTU: case LEU: case GEU:
4224 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
9076b9c1
JH
4225 return 1;
4226 return 0;
9a915772
JH
4227 case ORDERED: case UNORDERED:
4228 case EQ: case NE:
4229 return 1;
3a3677ff
RH
4230 default:
4231 return 0;
4232 }
e075ae69 4233}
b840bfb0 4234
e9e80858
JH
4235/* Return 1 if OP is a binary operator that can be promoted to wider mode. */
4236
4237int
8d531ab9 4238promotable_binary_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e9e80858
JH
4239{
4240 switch (GET_CODE (op))
4241 {
4242 case MULT:
4243 /* Modern CPUs have same latency for HImode and SImode multiply,
4244 but 386 and 486 do HImode multiply faster. */
9e555526 4245 return ix86_tune > PROCESSOR_I486;
e9e80858
JH
4246 case PLUS:
4247 case AND:
4248 case IOR:
4249 case XOR:
4250 case ASHIFT:
4251 return 1;
4252 default:
4253 return 0;
4254 }
4255}
4256
e075ae69
RH
4257/* Nearly general operand, but accept any const_double, since we wish
4258 to be able to drop them into memory rather than have them get pulled
4259 into registers. */
b840bfb0 4260
2a2ab3f9 4261int
8d531ab9 4262cmp_fp_expander_operand (rtx op, enum machine_mode mode)
2a2ab3f9 4263{
e075ae69 4264 if (mode != VOIDmode && mode != GET_MODE (op))
0b6b2900 4265 return 0;
e075ae69 4266 if (GET_CODE (op) == CONST_DOUBLE)
2a2ab3f9 4267 return 1;
e075ae69 4268 return general_operand (op, mode);
2a2ab3f9
JVA
4269}
4270
e075ae69 4271/* Match an SI or HImode register for a zero_extract. */
2a2ab3f9
JVA
4272
4273int
8d531ab9 4274ext_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
2a2ab3f9 4275{
3522082b 4276 int regno;
0d7d98ee
JH
4277 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
4278 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
e075ae69 4279 return 0;
3522082b
JH
4280
4281 if (!register_operand (op, VOIDmode))
4282 return 0;
4283
d1f87653 4284 /* Be careful to accept only registers having upper parts. */
3522082b
JH
4285 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
4286 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
e075ae69
RH
4287}
4288
4289/* Return 1 if this is a valid binary floating-point operation.
0f290768 4290 OP is the expression matched, and MODE is its mode. */
e075ae69
RH
4291
4292int
8d531ab9 4293binary_fp_operator (rtx op, enum machine_mode mode)
e075ae69
RH
4294{
4295 if (mode != VOIDmode && mode != GET_MODE (op))
4296 return 0;
4297
2a2ab3f9
JVA
4298 switch (GET_CODE (op))
4299 {
e075ae69
RH
4300 case PLUS:
4301 case MINUS:
4302 case MULT:
4303 case DIV:
4304 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
e9a25f70 4305
2a2ab3f9
JVA
4306 default:
4307 return 0;
4308 }
4309}
fee2770d 4310
e075ae69 4311int
8d531ab9 4312mult_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69
RH
4313{
4314 return GET_CODE (op) == MULT;
4315}
4316
4317int
8d531ab9 4318div_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69
RH
4319{
4320 return GET_CODE (op) == DIV;
4321}
0a726ef1
JL
4322
4323int
b96a374d 4324arith_or_logical_operator (rtx op, enum machine_mode mode)
0a726ef1 4325{
e075ae69 4326 return ((mode == VOIDmode || GET_MODE (op) == mode)
ec8e098d 4327 && ARITHMETIC_P (op));
0a726ef1
JL
4328}
4329
e075ae69 4330/* Returns 1 if OP is memory operand with a displacement. */
fee2770d
RS
4331
4332int
8d531ab9 4333memory_displacement_operand (rtx op, enum machine_mode mode)
4f2c8ebb 4334{
e075ae69 4335 struct ix86_address parts;
e9a25f70 4336
e075ae69
RH
4337 if (! memory_operand (op, mode))
4338 return 0;
4339
4340 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4341 abort ();
4342
4343 return parts.disp != NULL_RTX;
4f2c8ebb
RS
4344}
4345
16189740 4346/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
e075ae69
RH
4347 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4348
4349 ??? It seems likely that this will only work because cmpsi is an
4350 expander, and no actual insns use this. */
4f2c8ebb
RS
4351
4352int
b96a374d 4353cmpsi_operand (rtx op, enum machine_mode mode)
fee2770d 4354{
b9b2c339 4355 if (nonimmediate_operand (op, mode))
e075ae69
RH
4356 return 1;
4357
4358 if (GET_CODE (op) == AND
4359 && GET_MODE (op) == SImode
4360 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4361 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4362 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4363 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4364 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4365 && GET_CODE (XEXP (op, 1)) == CONST_INT)
fee2770d 4366 return 1;
e9a25f70 4367
fee2770d
RS
4368 return 0;
4369}
d784886d 4370
e075ae69
RH
4371/* Returns 1 if OP is memory operand that can not be represented by the
4372 modRM array. */
d784886d
RK
4373
4374int
8d531ab9 4375long_memory_operand (rtx op, enum machine_mode mode)
d784886d 4376{
e075ae69 4377 if (! memory_operand (op, mode))
d784886d
RK
4378 return 0;
4379
e075ae69 4380 return memory_address_length (op) != 0;
d784886d 4381}
2247f6ed
JH
4382
4383/* Return nonzero if the rtx is known aligned. */
4384
4385int
b96a374d 4386aligned_operand (rtx op, enum machine_mode mode)
2247f6ed
JH
4387{
4388 struct ix86_address parts;
4389
4390 if (!general_operand (op, mode))
4391 return 0;
4392
0f290768 4393 /* Registers and immediate operands are always "aligned". */
2247f6ed
JH
4394 if (GET_CODE (op) != MEM)
4395 return 1;
4396
0f290768 4397 /* Don't even try to do any aligned optimizations with volatiles. */
2247f6ed
JH
4398 if (MEM_VOLATILE_P (op))
4399 return 0;
4400
4401 op = XEXP (op, 0);
4402
4403 /* Pushes and pops are only valid on the stack pointer. */
4404 if (GET_CODE (op) == PRE_DEC
4405 || GET_CODE (op) == POST_INC)
4406 return 1;
4407
4408 /* Decode the address. */
4409 if (! ix86_decompose_address (op, &parts))
4410 abort ();
4411
4412 /* Look for some component that isn't known to be aligned. */
4413 if (parts.index)
4414 {
4415 if (parts.scale < 4
bdb429a5 4416 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
2247f6ed
JH
4417 return 0;
4418 }
4419 if (parts.base)
4420 {
bdb429a5 4421 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
2247f6ed
JH
4422 return 0;
4423 }
4424 if (parts.disp)
4425 {
4426 if (GET_CODE (parts.disp) != CONST_INT
4427 || (INTVAL (parts.disp) & 3) != 0)
4428 return 0;
4429 }
4430
4431 /* Didn't find one -- this must be an aligned address. */
4432 return 1;
4433}
e075ae69 4434\f
881b2a96
RS
4435/* Initialize the table of extra 80387 mathematical constants. */
4436
4437static void
b96a374d 4438init_ext_80387_constants (void)
881b2a96
RS
4439{
4440 static const char * cst[5] =
4441 {
4442 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4443 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4444 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4445 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4446 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4447 };
4448 int i;
4449
4450 for (i = 0; i < 5; i++)
4451 {
4452 real_from_string (&ext_80387_constants_table[i], cst[i]);
4453 /* Ensure each constant is rounded to XFmode precision. */
1f48e56d 4454 real_convert (&ext_80387_constants_table[i],
f8a1ebc6 4455 XFmode, &ext_80387_constants_table[i]);
881b2a96
RS
4456 }
4457
4458 ext_80387_constants_init = 1;
4459}
4460
e075ae69 4461/* Return true if the constant is something that can be loaded with
881b2a96 4462 a special instruction. */
57dbca5e
BS
4463
4464int
b96a374d 4465standard_80387_constant_p (rtx x)
57dbca5e 4466{
2b04e52b 4467 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
e075ae69 4468 return -1;
881b2a96 4469
2b04e52b
JH
4470 if (x == CONST0_RTX (GET_MODE (x)))
4471 return 1;
4472 if (x == CONST1_RTX (GET_MODE (x)))
4473 return 2;
881b2a96 4474
22cc69c4
RS
4475 /* For XFmode constants, try to find a special 80387 instruction when
4476 optimizing for size or on those CPUs that benefit from them. */
f8a1ebc6 4477 if (GET_MODE (x) == XFmode
22cc69c4 4478 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
881b2a96
RS
4479 {
4480 REAL_VALUE_TYPE r;
4481 int i;
4482
4483 if (! ext_80387_constants_init)
4484 init_ext_80387_constants ();
4485
4486 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4487 for (i = 0; i < 5; i++)
4488 if (real_identical (&r, &ext_80387_constants_table[i]))
4489 return i + 3;
4490 }
4491
e075ae69 4492 return 0;
57dbca5e
BS
4493}
4494
881b2a96
RS
4495/* Return the opcode of the special instruction to be used to load
4496 the constant X. */
4497
4498const char *
b96a374d 4499standard_80387_constant_opcode (rtx x)
881b2a96
RS
4500{
4501 switch (standard_80387_constant_p (x))
4502 {
b96a374d 4503 case 1:
881b2a96
RS
4504 return "fldz";
4505 case 2:
4506 return "fld1";
b96a374d 4507 case 3:
881b2a96
RS
4508 return "fldlg2";
4509 case 4:
4510 return "fldln2";
b96a374d 4511 case 5:
881b2a96
RS
4512 return "fldl2e";
4513 case 6:
4514 return "fldl2t";
b96a374d 4515 case 7:
881b2a96
RS
4516 return "fldpi";
4517 }
4518 abort ();
4519}
4520
4521/* Return the CONST_DOUBLE representing the 80387 constant that is
4522 loaded by the specified special instruction. The argument IDX
4523 matches the return value from standard_80387_constant_p. */
4524
4525rtx
b96a374d 4526standard_80387_constant_rtx (int idx)
881b2a96
RS
4527{
4528 int i;
4529
4530 if (! ext_80387_constants_init)
4531 init_ext_80387_constants ();
4532
4533 switch (idx)
4534 {
4535 case 3:
4536 case 4:
4537 case 5:
4538 case 6:
4539 case 7:
4540 i = idx - 3;
4541 break;
4542
4543 default:
4544 abort ();
4545 }
4546
1f48e56d 4547 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
f8a1ebc6 4548 XFmode);
881b2a96
RS
4549}
4550
2b04e52b
JH
4551/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4552 */
4553int
b96a374d 4554standard_sse_constant_p (rtx x)
2b04e52b 4555{
0e67d460
JH
4556 if (x == const0_rtx)
4557 return 1;
2b04e52b
JH
4558 return (x == CONST0_RTX (GET_MODE (x)));
4559}
4560
2a2ab3f9
JVA
4561/* Returns 1 if OP contains a symbol reference */
4562
4563int
b96a374d 4564symbolic_reference_mentioned_p (rtx op)
2a2ab3f9 4565{
8d531ab9
KH
4566 const char *fmt;
4567 int i;
2a2ab3f9
JVA
4568
4569 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4570 return 1;
4571
4572 fmt = GET_RTX_FORMAT (GET_CODE (op));
4573 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4574 {
4575 if (fmt[i] == 'E')
4576 {
8d531ab9 4577 int j;
2a2ab3f9
JVA
4578
4579 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4580 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4581 return 1;
4582 }
e9a25f70 4583
2a2ab3f9
JVA
4584 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4585 return 1;
4586 }
4587
4588 return 0;
4589}
e075ae69
RH
4590
4591/* Return 1 if it is appropriate to emit `ret' instructions in the
4592 body of a function. Do this only if the epilogue is simple, needing a
4593 couple of insns. Prior to reloading, we can't tell how many registers
4594 must be saved, so return 0 then. Return 0 if there is no frame
4595 marker to de-allocate.
4596
4597 If NON_SAVING_SETJMP is defined and true, then it is not possible
4598 for the epilogue to be simple, so return 0. This is a special case
4599 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4600 until final, but jump_optimize may need to know sooner if a
4601 `return' is OK. */
32b5b1aa
SC
4602
4603int
b96a374d 4604ix86_can_use_return_insn_p (void)
32b5b1aa 4605{
4dd2ac2c 4606 struct ix86_frame frame;
9a7372d6 4607
e075ae69
RH
4608#ifdef NON_SAVING_SETJMP
4609 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4610 return 0;
4611#endif
9a7372d6
RH
4612
4613 if (! reload_completed || frame_pointer_needed)
4614 return 0;
32b5b1aa 4615
9a7372d6
RH
4616 /* Don't allow more than 32 pop, since that's all we can do
4617 with one instruction. */
4618 if (current_function_pops_args
4619 && current_function_args_size >= 32768)
e075ae69 4620 return 0;
32b5b1aa 4621
4dd2ac2c
JH
4622 ix86_compute_frame_layout (&frame);
4623 return frame.to_allocate == 0 && frame.nregs == 0;
e075ae69 4624}
6189a572
JH
4625\f
4626/* Return 1 if VALUE can be stored in the sign extended immediate field. */
4627int
b96a374d 4628x86_64_sign_extended_value (rtx value)
6189a572
JH
4629{
4630 switch (GET_CODE (value))
4631 {
4632 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4633 to be at least 32 and this all acceptable constants are
4634 represented as CONST_INT. */
4635 case CONST_INT:
4636 if (HOST_BITS_PER_WIDE_INT == 32)
4637 return 1;
4638 else
4639 {
4640 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
fa9f36a1 4641 return trunc_int_for_mode (val, SImode) == val;
6189a572
JH
4642 }
4643 break;
4644
75d38379
JJ
4645 /* For certain code models, the symbolic references are known to fit.
4646 in CM_SMALL_PIC model we know it fits if it is local to the shared
4647 library. Don't count TLS SYMBOL_REFs here, since they should fit
4648 only if inside of UNSPEC handled below. */
6189a572 4649 case SYMBOL_REF:
d7222e38
JH
4650 /* TLS symbols are not constant. */
4651 if (tls_symbolic_operand (value, Pmode))
4652 return false;
c05dbe81 4653 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
6189a572
JH
4654
4655 /* For certain code models, the code is near as well. */
4656 case LABEL_REF:
c05dbe81
JH
4657 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4658 || ix86_cmodel == CM_KERNEL);
6189a572
JH
4659
4660 /* We also may accept the offsetted memory references in certain special
4661 cases. */
4662 case CONST:
75d38379
JJ
4663 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4664 switch (XINT (XEXP (value, 0), 1))
4665 {
4666 case UNSPEC_GOTPCREL:
4667 case UNSPEC_DTPOFF:
4668 case UNSPEC_GOTNTPOFF:
4669 case UNSPEC_NTPOFF:
4670 return 1;
4671 default:
4672 break;
4673 }
4674 if (GET_CODE (XEXP (value, 0)) == PLUS)
6189a572
JH
4675 {
4676 rtx op1 = XEXP (XEXP (value, 0), 0);
4677 rtx op2 = XEXP (XEXP (value, 0), 1);
4678 HOST_WIDE_INT offset;
4679
4680 if (ix86_cmodel == CM_LARGE)
4681 return 0;
4682 if (GET_CODE (op2) != CONST_INT)
4683 return 0;
4684 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4685 switch (GET_CODE (op1))
4686 {
4687 case SYMBOL_REF:
75d38379 4688 /* For CM_SMALL assume that latest object is 16MB before
6189a572
JH
4689 end of 31bits boundary. We may also accept pretty
4690 large negative constants knowing that all objects are
4691 in the positive half of address space. */
4692 if (ix86_cmodel == CM_SMALL
75d38379 4693 && offset < 16*1024*1024
6189a572
JH
4694 && trunc_int_for_mode (offset, SImode) == offset)
4695 return 1;
4696 /* For CM_KERNEL we know that all object resist in the
4697 negative half of 32bits address space. We may not
4698 accept negative offsets, since they may be just off
d6a7951f 4699 and we may accept pretty large positive ones. */
6189a572
JH
4700 if (ix86_cmodel == CM_KERNEL
4701 && offset > 0
4702 && trunc_int_for_mode (offset, SImode) == offset)
4703 return 1;
4704 break;
4705 case LABEL_REF:
4706 /* These conditions are similar to SYMBOL_REF ones, just the
4707 constraints for code models differ. */
c05dbe81 4708 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
75d38379 4709 && offset < 16*1024*1024
6189a572
JH
4710 && trunc_int_for_mode (offset, SImode) == offset)
4711 return 1;
4712 if (ix86_cmodel == CM_KERNEL
4713 && offset > 0
4714 && trunc_int_for_mode (offset, SImode) == offset)
4715 return 1;
4716 break;
75d38379
JJ
4717 case UNSPEC:
4718 switch (XINT (op1, 1))
4719 {
4720 case UNSPEC_DTPOFF:
4721 case UNSPEC_NTPOFF:
4722 if (offset > 0
4723 && trunc_int_for_mode (offset, SImode) == offset)
4724 return 1;
4725 }
4726 break;
6189a572
JH
4727 default:
4728 return 0;
4729 }
4730 }
4731 return 0;
4732 default:
4733 return 0;
4734 }
4735}
4736
4737/* Return 1 if VALUE can be stored in the zero extended immediate field. */
4738int
b96a374d 4739x86_64_zero_extended_value (rtx value)
6189a572
JH
4740{
4741 switch (GET_CODE (value))
4742 {
4743 case CONST_DOUBLE:
4744 if (HOST_BITS_PER_WIDE_INT == 32)
4745 return (GET_MODE (value) == VOIDmode
4746 && !CONST_DOUBLE_HIGH (value));
4747 else
4748 return 0;
4749 case CONST_INT:
4750 if (HOST_BITS_PER_WIDE_INT == 32)
4751 return INTVAL (value) >= 0;
4752 else
b531087a 4753 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
6189a572
JH
4754 break;
4755
4756 /* For certain code models, the symbolic references are known to fit. */
4757 case SYMBOL_REF:
d7222e38
JH
4758 /* TLS symbols are not constant. */
4759 if (tls_symbolic_operand (value, Pmode))
4760 return false;
6189a572
JH
4761 return ix86_cmodel == CM_SMALL;
4762
4763 /* For certain code models, the code is near as well. */
4764 case LABEL_REF:
4765 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4766
4767 /* We also may accept the offsetted memory references in certain special
4768 cases. */
4769 case CONST:
4770 if (GET_CODE (XEXP (value, 0)) == PLUS)
4771 {
4772 rtx op1 = XEXP (XEXP (value, 0), 0);
4773 rtx op2 = XEXP (XEXP (value, 0), 1);
4774
4775 if (ix86_cmodel == CM_LARGE)
4776 return 0;
4777 switch (GET_CODE (op1))
4778 {
4779 case SYMBOL_REF:
4780 return 0;
d6a7951f 4781 /* For small code model we may accept pretty large positive
6189a572
JH
4782 offsets, since one bit is available for free. Negative
4783 offsets are limited by the size of NULL pointer area
4784 specified by the ABI. */
4785 if (ix86_cmodel == CM_SMALL
4786 && GET_CODE (op2) == CONST_INT
4787 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4788 && (trunc_int_for_mode (INTVAL (op2), SImode)
4789 == INTVAL (op2)))
4790 return 1;
4791 /* ??? For the kernel, we may accept adjustment of
4792 -0x10000000, since we know that it will just convert
d6a7951f 4793 negative address space to positive, but perhaps this
6189a572
JH
4794 is not worthwhile. */
4795 break;
4796 case LABEL_REF:
4797 /* These conditions are similar to SYMBOL_REF ones, just the
4798 constraints for code models differ. */
4799 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4800 && GET_CODE (op2) == CONST_INT
4801 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4802 && (trunc_int_for_mode (INTVAL (op2), SImode)
4803 == INTVAL (op2)))
4804 return 1;
4805 break;
4806 default:
4807 return 0;
4808 }
4809 }
4810 return 0;
4811 default:
4812 return 0;
4813 }
4814}
6fca22eb
RH
4815
4816/* Value should be nonzero if functions must have frame pointers.
4817 Zero means the frame pointer need not be set up (and parms may
4818 be accessed via the stack pointer) in functions that seem suitable. */
4819
4820int
b96a374d 4821ix86_frame_pointer_required (void)
6fca22eb
RH
4822{
4823 /* If we accessed previous frames, then the generated code expects
4824 to be able to access the saved ebp value in our frame. */
4825 if (cfun->machine->accesses_prev_frame)
4826 return 1;
a4f31c00 4827
6fca22eb
RH
4828 /* Several x86 os'es need a frame pointer for other reasons,
4829 usually pertaining to setjmp. */
4830 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4831 return 1;
4832
4833 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4834 the frame pointer by default. Turn it back on now if we've not
4835 got a leaf function. */
a7943381 4836 if (TARGET_OMIT_LEAF_FRAME_POINTER
55ba61f3
JH
4837 && (!current_function_is_leaf))
4838 return 1;
4839
4840 if (current_function_profile)
6fca22eb
RH
4841 return 1;
4842
4843 return 0;
4844}
4845
4846/* Record that the current function accesses previous call frames. */
4847
4848void
b96a374d 4849ix86_setup_frame_addresses (void)
6fca22eb
RH
4850{
4851 cfun->machine->accesses_prev_frame = 1;
4852}
e075ae69 4853\f
145aacc2
RH
4854#if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4855# define USE_HIDDEN_LINKONCE 1
4856#else
4857# define USE_HIDDEN_LINKONCE 0
4858#endif
4859
bd09bdeb 4860static int pic_labels_used;
e9a25f70 4861
145aacc2
RH
4862/* Fills in the label name that should be used for a pc thunk for
4863 the given register. */
4864
4865static void
b96a374d 4866get_pc_thunk_name (char name[32], unsigned int regno)
145aacc2
RH
4867{
4868 if (USE_HIDDEN_LINKONCE)
4869 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4870 else
4871 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4872}
4873
4874
e075ae69
RH
4875/* This function generates code for -fpic that loads %ebx with
4876 the return address of the caller and then returns. */
4877
4878void
b96a374d 4879ix86_file_end (void)
e075ae69
RH
4880{
4881 rtx xops[2];
bd09bdeb 4882 int regno;
32b5b1aa 4883
bd09bdeb 4884 for (regno = 0; regno < 8; ++regno)
7c262518 4885 {
145aacc2
RH
4886 char name[32];
4887
bd09bdeb
RH
4888 if (! ((pic_labels_used >> regno) & 1))
4889 continue;
4890
145aacc2 4891 get_pc_thunk_name (name, regno);
bd09bdeb 4892
145aacc2
RH
4893 if (USE_HIDDEN_LINKONCE)
4894 {
4895 tree decl;
4896
4897 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4898 error_mark_node);
4899 TREE_PUBLIC (decl) = 1;
4900 TREE_STATIC (decl) = 1;
4901 DECL_ONE_ONLY (decl) = 1;
4902
4903 (*targetm.asm_out.unique_section) (decl, 0);
4904 named_section (decl, NULL, 0);
4905
a5fe455b
ZW
4906 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4907 fputs ("\t.hidden\t", asm_out_file);
4908 assemble_name (asm_out_file, name);
4909 fputc ('\n', asm_out_file);
4910 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
145aacc2
RH
4911 }
4912 else
4913 {
4914 text_section ();
a5fe455b 4915 ASM_OUTPUT_LABEL (asm_out_file, name);
145aacc2 4916 }
bd09bdeb
RH
4917
4918 xops[0] = gen_rtx_REG (SImode, regno);
4919 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4920 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4921 output_asm_insn ("ret", xops);
7c262518 4922 }
3edc56a9 4923
a5fe455b
ZW
4924 if (NEED_INDICATE_EXEC_STACK)
4925 file_end_indicate_exec_stack ();
32b5b1aa 4926}
32b5b1aa 4927
c8c03509 4928/* Emit code for the SET_GOT patterns. */
32b5b1aa 4929
c8c03509 4930const char *
b96a374d 4931output_set_got (rtx dest)
c8c03509
RH
4932{
4933 rtx xops[3];
0d7d98ee 4934
c8c03509 4935 xops[0] = dest;
5fc0e5df 4936 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
32b5b1aa 4937
c8c03509 4938 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
32b5b1aa 4939 {
c8c03509
RH
4940 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4941
4942 if (!flag_pic)
4943 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4944 else
4945 output_asm_insn ("call\t%a2", xops);
4946
b069de3b
SS
4947#if TARGET_MACHO
4948 /* Output the "canonical" label name ("Lxx$pb") here too. This
4949 is what will be referred to by the Mach-O PIC subsystem. */
4950 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4951#endif
4977bab6 4952 (*targetm.asm_out.internal_label) (asm_out_file, "L",
c8c03509
RH
4953 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4954
4955 if (flag_pic)
4956 output_asm_insn ("pop{l}\t%0", xops);
32b5b1aa 4957 }
e075ae69 4958 else
e5cb57e8 4959 {
145aacc2
RH
4960 char name[32];
4961 get_pc_thunk_name (name, REGNO (dest));
bd09bdeb 4962 pic_labels_used |= 1 << REGNO (dest);
f996902d 4963
145aacc2 4964 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
c8c03509
RH
4965 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4966 output_asm_insn ("call\t%X2", xops);
e5cb57e8 4967 }
e5cb57e8 4968
c8c03509
RH
4969 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4970 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
b069de3b 4971 else if (!TARGET_MACHO)
8e9fadc3 4972 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
79325812 4973
c8c03509 4974 return "";
e9a25f70 4975}
8dfe5673 4976
0d7d98ee 4977/* Generate an "push" pattern for input ARG. */
e9a25f70 4978
e075ae69 4979static rtx
b96a374d 4980gen_push (rtx arg)
e9a25f70 4981{
c5c76735 4982 return gen_rtx_SET (VOIDmode,
0d7d98ee
JH
4983 gen_rtx_MEM (Pmode,
4984 gen_rtx_PRE_DEC (Pmode,
c5c76735
JL
4985 stack_pointer_rtx)),
4986 arg);
e9a25f70
JL
4987}
4988
bd09bdeb
RH
4989/* Return >= 0 if there is an unused call-clobbered register available
4990 for the entire function. */
4991
4992static unsigned int
b96a374d 4993ix86_select_alt_pic_regnum (void)
bd09bdeb
RH
4994{
4995 if (current_function_is_leaf && !current_function_profile)
4996 {
4997 int i;
4998 for (i = 2; i >= 0; --i)
4999 if (!regs_ever_live[i])
5000 return i;
5001 }
5002
5003 return INVALID_REGNUM;
5004}
fce5a9f2 5005
4dd2ac2c
JH
5006/* Return 1 if we need to save REGNO. */
5007static int
b96a374d 5008ix86_save_reg (unsigned int regno, int maybe_eh_return)
1020a5ab 5009{
bd09bdeb
RH
5010 if (pic_offset_table_rtx
5011 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5012 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
66edd3b4 5013 || current_function_profile
8c38a24f
MM
5014 || current_function_calls_eh_return
5015 || current_function_uses_const_pool))
bd09bdeb
RH
5016 {
5017 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5018 return 0;
5019 return 1;
5020 }
1020a5ab
RH
5021
5022 if (current_function_calls_eh_return && maybe_eh_return)
5023 {
5024 unsigned i;
5025 for (i = 0; ; i++)
5026 {
b531087a 5027 unsigned test = EH_RETURN_DATA_REGNO (i);
1020a5ab
RH
5028 if (test == INVALID_REGNUM)
5029 break;
9b690711 5030 if (test == regno)
1020a5ab
RH
5031 return 1;
5032 }
5033 }
4dd2ac2c 5034
1020a5ab
RH
5035 return (regs_ever_live[regno]
5036 && !call_used_regs[regno]
5037 && !fixed_regs[regno]
5038 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4dd2ac2c
JH
5039}
5040
0903fcab
JH
5041/* Return number of registers to be saved on the stack. */
5042
5043static int
b96a374d 5044ix86_nsaved_regs (void)
0903fcab
JH
5045{
5046 int nregs = 0;
0903fcab
JH
5047 int regno;
5048
4dd2ac2c 5049 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 5050 if (ix86_save_reg (regno, true))
4dd2ac2c 5051 nregs++;
0903fcab
JH
5052 return nregs;
5053}
5054
5055/* Return the offset between two registers, one to be eliminated, and the other
5056 its replacement, at the start of a routine. */
5057
5058HOST_WIDE_INT
b96a374d 5059ix86_initial_elimination_offset (int from, int to)
0903fcab 5060{
4dd2ac2c
JH
5061 struct ix86_frame frame;
5062 ix86_compute_frame_layout (&frame);
564d80f4
JH
5063
5064 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 5065 return frame.hard_frame_pointer_offset;
564d80f4
JH
5066 else if (from == FRAME_POINTER_REGNUM
5067 && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 5068 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
5069 else
5070 {
564d80f4
JH
5071 if (to != STACK_POINTER_REGNUM)
5072 abort ();
5073 else if (from == ARG_POINTER_REGNUM)
4dd2ac2c 5074 return frame.stack_pointer_offset;
564d80f4
JH
5075 else if (from != FRAME_POINTER_REGNUM)
5076 abort ();
0903fcab 5077 else
4dd2ac2c 5078 return frame.stack_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
5079 }
5080}
5081
4dd2ac2c 5082/* Fill structure ix86_frame about frame of currently computed function. */
0f290768 5083
4dd2ac2c 5084static void
b96a374d 5085ix86_compute_frame_layout (struct ix86_frame *frame)
65954bd8 5086{
65954bd8 5087 HOST_WIDE_INT total_size;
564d80f4 5088 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
b19ee4bd 5089 HOST_WIDE_INT offset;
44affdae 5090 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4dd2ac2c 5091 HOST_WIDE_INT size = get_frame_size ();
65954bd8 5092
4dd2ac2c 5093 frame->nregs = ix86_nsaved_regs ();
564d80f4 5094 total_size = size;
65954bd8 5095
d7394366
JH
5096 /* During reload iteration the amount of registers saved can change.
5097 Recompute the value as needed. Do not recompute when amount of registers
5098 didn't change as reload does mutiple calls to the function and does not
5099 expect the decision to change within single iteration. */
5100 if (!optimize_size
5101 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
d9b40e8d
JH
5102 {
5103 int count = frame->nregs;
5104
d7394366 5105 cfun->machine->use_fast_prologue_epilogue_nregs = count;
d9b40e8d
JH
5106 /* The fast prologue uses move instead of push to save registers. This
5107 is significantly longer, but also executes faster as modern hardware
5108 can execute the moves in parallel, but can't do that for push/pop.
b96a374d 5109
d9b40e8d
JH
5110 Be careful about choosing what prologue to emit: When function takes
5111 many instructions to execute we may use slow version as well as in
5112 case function is known to be outside hot spot (this is known with
5113 feedback only). Weight the size of function by number of registers
5114 to save as it is cheap to use one or two push instructions but very
5115 slow to use many of them. */
5116 if (count)
5117 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5118 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5119 || (flag_branch_probabilities
5120 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5121 cfun->machine->use_fast_prologue_epilogue = false;
5122 else
5123 cfun->machine->use_fast_prologue_epilogue
5124 = !expensive_function_p (count);
5125 }
5126 if (TARGET_PROLOGUE_USING_MOVE
5127 && cfun->machine->use_fast_prologue_epilogue)
5128 frame->save_regs_using_mov = true;
5129 else
5130 frame->save_regs_using_mov = false;
5131
5132
9ba81eaa 5133 /* Skip return address and saved base pointer. */
4dd2ac2c
JH
5134 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5135
5136 frame->hard_frame_pointer_offset = offset;
564d80f4 5137
fcbfaa65
RK
5138 /* Do some sanity checking of stack_alignment_needed and
5139 preferred_alignment, since i386 port is the only using those features
f710504c 5140 that may break easily. */
564d80f4 5141
44affdae
JH
5142 if (size && !stack_alignment_needed)
5143 abort ();
44affdae
JH
5144 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
5145 abort ();
5146 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5147 abort ();
5148 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5149 abort ();
564d80f4 5150
4dd2ac2c
JH
5151 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5152 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
564d80f4 5153
4dd2ac2c
JH
5154 /* Register save area */
5155 offset += frame->nregs * UNITS_PER_WORD;
65954bd8 5156
8362f420
JH
5157 /* Va-arg area */
5158 if (ix86_save_varrargs_registers)
5159 {
5160 offset += X86_64_VARARGS_SIZE;
5161 frame->va_arg_size = X86_64_VARARGS_SIZE;
5162 }
5163 else
5164 frame->va_arg_size = 0;
5165
4dd2ac2c
JH
5166 /* Align start of frame for local function. */
5167 frame->padding1 = ((offset + stack_alignment_needed - 1)
5168 & -stack_alignment_needed) - offset;
f73ad30e 5169
4dd2ac2c 5170 offset += frame->padding1;
65954bd8 5171
4dd2ac2c
JH
5172 /* Frame pointer points here. */
5173 frame->frame_pointer_offset = offset;
54ff41b7 5174
4dd2ac2c 5175 offset += size;
65954bd8 5176
0b7ae565 5177 /* Add outgoing arguments area. Can be skipped if we eliminated
965514bd
JH
5178 all the function calls as dead code.
5179 Skipping is however impossible when function calls alloca. Alloca
5180 expander assumes that last current_function_outgoing_args_size
5181 of stack frame are unused. */
5182 if (ACCUMULATE_OUTGOING_ARGS
5183 && (!current_function_is_leaf || current_function_calls_alloca))
4dd2ac2c
JH
5184 {
5185 offset += current_function_outgoing_args_size;
5186 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5187 }
5188 else
5189 frame->outgoing_arguments_size = 0;
564d80f4 5190
002ff5bc
RH
5191 /* Align stack boundary. Only needed if we're calling another function
5192 or using alloca. */
5193 if (!current_function_is_leaf || current_function_calls_alloca)
0b7ae565
RH
5194 frame->padding2 = ((offset + preferred_alignment - 1)
5195 & -preferred_alignment) - offset;
5196 else
5197 frame->padding2 = 0;
4dd2ac2c
JH
5198
5199 offset += frame->padding2;
5200
5201 /* We've reached end of stack frame. */
5202 frame->stack_pointer_offset = offset;
5203
5204 /* Size prologue needs to allocate. */
5205 frame->to_allocate =
5206 (size + frame->padding1 + frame->padding2
8362f420 5207 + frame->outgoing_arguments_size + frame->va_arg_size);
4dd2ac2c 5208
b19ee4bd
JJ
5209 if ((!frame->to_allocate && frame->nregs <= 1)
5210 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
d9b40e8d
JH
5211 frame->save_regs_using_mov = false;
5212
a5b378d6 5213 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
8362f420
JH
5214 && current_function_is_leaf)
5215 {
5216 frame->red_zone_size = frame->to_allocate;
d9b40e8d
JH
5217 if (frame->save_regs_using_mov)
5218 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8362f420
JH
5219 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5220 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5221 }
5222 else
5223 frame->red_zone_size = 0;
5224 frame->to_allocate -= frame->red_zone_size;
5225 frame->stack_pointer_offset -= frame->red_zone_size;
4dd2ac2c
JH
5226#if 0
5227 fprintf (stderr, "nregs: %i\n", frame->nregs);
5228 fprintf (stderr, "size: %i\n", size);
5229 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5230 fprintf (stderr, "padding1: %i\n", frame->padding1);
8362f420 5231 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4dd2ac2c
JH
5232 fprintf (stderr, "padding2: %i\n", frame->padding2);
5233 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
8362f420 5234 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4dd2ac2c
JH
5235 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5236 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5237 frame->hard_frame_pointer_offset);
5238 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5239#endif
65954bd8
JL
5240}
5241
0903fcab
JH
5242/* Emit code to save registers in the prologue. */
5243
5244static void
b96a374d 5245ix86_emit_save_regs (void)
0903fcab 5246{
8d531ab9 5247 int regno;
0903fcab 5248 rtx insn;
0903fcab 5249
4dd2ac2c 5250 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 5251 if (ix86_save_reg (regno, true))
0903fcab 5252 {
0d7d98ee 5253 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
0903fcab
JH
5254 RTX_FRAME_RELATED_P (insn) = 1;
5255 }
5256}
5257
c6036a37
JH
5258/* Emit code to save registers using MOV insns. First register
5259 is restored from POINTER + OFFSET. */
5260static void
b96a374d 5261ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
c6036a37
JH
5262{
5263 int regno;
5264 rtx insn;
5265
5266 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5267 if (ix86_save_reg (regno, true))
5268 {
b72f00af
RK
5269 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5270 Pmode, offset),
c6036a37
JH
5271 gen_rtx_REG (Pmode, regno));
5272 RTX_FRAME_RELATED_P (insn) = 1;
5273 offset += UNITS_PER_WORD;
5274 }
5275}
5276
839a4992 5277/* Expand prologue or epilogue stack adjustment.
b19ee4bd
JJ
5278 The pattern exist to put a dependency on all ebp-based memory accesses.
5279 STYLE should be negative if instructions should be marked as frame related,
5280 zero if %r11 register is live and cannot be freely used and positive
5281 otherwise. */
5282
5283static void
5284pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5285{
5286 rtx insn;
5287
5288 if (! TARGET_64BIT)
5289 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5290 else if (x86_64_immediate_operand (offset, DImode))
5291 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5292 else
5293 {
5294 rtx r11;
5295 /* r11 is used by indirect sibcall return as well, set before the
5296 epilogue and used after the epilogue. ATM indirect sibcall
5297 shouldn't be used together with huge frame sizes in one
5298 function because of the frame_size check in sibcall.c. */
5299 if (style == 0)
5300 abort ();
5301 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5302 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5303 if (style < 0)
5304 RTX_FRAME_RELATED_P (insn) = 1;
5305 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5306 offset));
5307 }
5308 if (style < 0)
5309 RTX_FRAME_RELATED_P (insn) = 1;
5310}
5311
0f290768 5312/* Expand the prologue into a bunch of separate insns. */
e075ae69
RH
5313
5314void
b96a374d 5315ix86_expand_prologue (void)
2a2ab3f9 5316{
564d80f4 5317 rtx insn;
bd09bdeb 5318 bool pic_reg_used;
4dd2ac2c 5319 struct ix86_frame frame;
c6036a37 5320 HOST_WIDE_INT allocate;
4dd2ac2c 5321
4977bab6 5322 ix86_compute_frame_layout (&frame);
79325812 5323
e075ae69
RH
5324 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5325 slower on all targets. Also sdb doesn't like it. */
e9a25f70 5326
2a2ab3f9
JVA
5327 if (frame_pointer_needed)
5328 {
564d80f4 5329 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
e075ae69 5330 RTX_FRAME_RELATED_P (insn) = 1;
e9a25f70 5331
564d80f4 5332 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
e075ae69 5333 RTX_FRAME_RELATED_P (insn) = 1;
2a2ab3f9
JVA
5334 }
5335
c6036a37 5336 allocate = frame.to_allocate;
c6036a37 5337
d9b40e8d 5338 if (!frame.save_regs_using_mov)
c6036a37
JH
5339 ix86_emit_save_regs ();
5340 else
5341 allocate += frame.nregs * UNITS_PER_WORD;
564d80f4 5342
d9b40e8d
JH
5343 /* When using red zone we may start register saving before allocating
5344 the stack frame saving one cycle of the prologue. */
5345 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5346 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5347 : stack_pointer_rtx,
5348 -frame.nregs * UNITS_PER_WORD);
5349
c6036a37 5350 if (allocate == 0)
8dfe5673 5351 ;
e323735c 5352 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
b19ee4bd
JJ
5353 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5354 GEN_INT (-allocate), -1);
79325812 5355 else
8dfe5673 5356 {
fe9f516f
RH
5357 /* Only valid for Win32. */
5358 rtx eax = gen_rtx_REG (SImode, 0);
5359 bool eax_live = ix86_eax_live_at_start_p ();
e9a25f70 5360
8362f420 5361 if (TARGET_64BIT)
b1177d69 5362 abort ();
e075ae69 5363
fe9f516f
RH
5364 if (eax_live)
5365 {
5366 emit_insn (gen_push (eax));
5367 allocate -= 4;
5368 }
5369
5370 insn = emit_move_insn (eax, GEN_INT (allocate));
b1177d69 5371 RTX_FRAME_RELATED_P (insn) = 1;
98417968 5372
b1177d69
KC
5373 insn = emit_insn (gen_allocate_stack_worker (eax));
5374 RTX_FRAME_RELATED_P (insn) = 1;
fe9f516f
RH
5375
5376 if (eax_live)
5377 {
5378 rtx t = plus_constant (stack_pointer_rtx, allocate);
5379 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5380 }
e075ae69 5381 }
fe9f516f 5382
d9b40e8d 5383 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
c6036a37
JH
5384 {
5385 if (!frame_pointer_needed || !frame.to_allocate)
5386 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5387 else
5388 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5389 -frame.nregs * UNITS_PER_WORD);
5390 }
e9a25f70 5391
bd09bdeb
RH
5392 pic_reg_used = false;
5393 if (pic_offset_table_rtx
5394 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5395 || current_function_profile))
5396 {
5397 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5398
5399 if (alt_pic_reg_used != INVALID_REGNUM)
5400 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5401
5402 pic_reg_used = true;
5403 }
5404
e9a25f70 5405 if (pic_reg_used)
c8c03509
RH
5406 {
5407 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5408
66edd3b4
RH
5409 /* Even with accurate pre-reload life analysis, we can wind up
5410 deleting all references to the pic register after reload.
5411 Consider if cross-jumping unifies two sides of a branch
d1f87653 5412 controlled by a comparison vs the only read from a global.
66edd3b4
RH
5413 In which case, allow the set_got to be deleted, though we're
5414 too late to do anything about the ebx save in the prologue. */
c8c03509
RH
5415 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5416 }
77a989d1 5417
66edd3b4
RH
5418 /* Prevent function calls from be scheduled before the call to mcount.
5419 In the pic_reg_used case, make sure that the got load isn't deleted. */
5420 if (current_function_profile)
5421 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
77a989d1
SC
5422}
5423
da2d1d3a
JH
5424/* Emit code to restore saved registers using MOV insns. First register
5425 is restored from POINTER + OFFSET. */
5426static void
72613dfa
JH
5427ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5428 int maybe_eh_return)
da2d1d3a
JH
5429{
5430 int regno;
72613dfa 5431 rtx base_address = gen_rtx_MEM (Pmode, pointer);
da2d1d3a 5432
4dd2ac2c 5433 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 5434 if (ix86_save_reg (regno, maybe_eh_return))
da2d1d3a 5435 {
72613dfa
JH
5436 /* Ensure that adjust_address won't be forced to produce pointer
5437 out of range allowed by x86-64 instruction set. */
5438 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5439 {
5440 rtx r11;
5441
5442 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5443 emit_move_insn (r11, GEN_INT (offset));
5444 emit_insn (gen_adddi3 (r11, r11, pointer));
5445 base_address = gen_rtx_MEM (Pmode, r11);
5446 offset = 0;
5447 }
4dd2ac2c 5448 emit_move_insn (gen_rtx_REG (Pmode, regno),
72613dfa 5449 adjust_address (base_address, Pmode, offset));
4dd2ac2c 5450 offset += UNITS_PER_WORD;
da2d1d3a
JH
5451 }
5452}
5453
0f290768 5454/* Restore function stack, frame, and registers. */
e9a25f70 5455
2a2ab3f9 5456void
b96a374d 5457ix86_expand_epilogue (int style)
2a2ab3f9 5458{
1c71e60e 5459 int regno;
fdb8a883 5460 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4dd2ac2c 5461 struct ix86_frame frame;
65954bd8 5462 HOST_WIDE_INT offset;
4dd2ac2c
JH
5463
5464 ix86_compute_frame_layout (&frame);
2a2ab3f9 5465
a4f31c00 5466 /* Calculate start of saved registers relative to ebp. Special care
84e306b4
RH
5467 must be taken for the normal return case of a function using
5468 eh_return: the eax and edx registers are marked as saved, but not
5469 restored along this path. */
5470 offset = frame.nregs;
5471 if (current_function_calls_eh_return && style != 2)
5472 offset -= 2;
5473 offset *= -UNITS_PER_WORD;
2a2ab3f9 5474
fdb8a883
JW
5475 /* If we're only restoring one register and sp is not valid then
5476 using a move instruction to restore the register since it's
0f290768 5477 less work than reloading sp and popping the register.
da2d1d3a
JH
5478
5479 The default code result in stack adjustment using add/lea instruction,
5480 while this code results in LEAVE instruction (or discrete equivalent),
5481 so it is profitable in some other cases as well. Especially when there
5482 are no registers to restore. We also use this code when TARGET_USE_LEAVE
d1f87653 5483 and there is exactly one register to pop. This heuristic may need some
da2d1d3a 5484 tuning in future. */
4dd2ac2c 5485 if ((!sp_valid && frame.nregs <= 1)
2ab0437e 5486 || (TARGET_EPILOGUE_USING_MOVE
d9b40e8d 5487 && cfun->machine->use_fast_prologue_epilogue
c6036a37 5488 && (frame.nregs > 1 || frame.to_allocate))
4dd2ac2c 5489 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
2ab0437e 5490 || (frame_pointer_needed && TARGET_USE_LEAVE
d9b40e8d
JH
5491 && cfun->machine->use_fast_prologue_epilogue
5492 && frame.nregs == 1)
2ab0437e 5493 || current_function_calls_eh_return)
2a2ab3f9 5494 {
da2d1d3a
JH
5495 /* Restore registers. We can use ebp or esp to address the memory
5496 locations. If both are available, default to ebp, since offsets
5497 are known to be small. Only exception is esp pointing directly to the
5498 end of block of saved registers, where we may simplify addressing
5499 mode. */
5500
4dd2ac2c 5501 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
1020a5ab
RH
5502 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5503 frame.to_allocate, style == 2);
da2d1d3a 5504 else
1020a5ab
RH
5505 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5506 offset, style == 2);
5507
5508 /* eh_return epilogues need %ecx added to the stack pointer. */
5509 if (style == 2)
5510 {
5511 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
2a2ab3f9 5512
1020a5ab
RH
5513 if (frame_pointer_needed)
5514 {
5515 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5516 tmp = plus_constant (tmp, UNITS_PER_WORD);
5517 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5518
5519 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5520 emit_move_insn (hard_frame_pointer_rtx, tmp);
5521
b19ee4bd
JJ
5522 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5523 const0_rtx, style);
1020a5ab
RH
5524 }
5525 else
5526 {
5527 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5528 tmp = plus_constant (tmp, (frame.to_allocate
5529 + frame.nregs * UNITS_PER_WORD));
5530 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5531 }
5532 }
5533 else if (!frame_pointer_needed)
b19ee4bd
JJ
5534 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5535 GEN_INT (frame.to_allocate
5536 + frame.nregs * UNITS_PER_WORD),
5537 style);
0f290768 5538 /* If not an i386, mov & pop is faster than "leave". */
d9b40e8d
JH
5539 else if (TARGET_USE_LEAVE || optimize_size
5540 || !cfun->machine->use_fast_prologue_epilogue)
8362f420 5541 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
c8c5cb99 5542 else
2a2ab3f9 5543 {
b19ee4bd
JJ
5544 pro_epilogue_adjust_stack (stack_pointer_rtx,
5545 hard_frame_pointer_rtx,
5546 const0_rtx, style);
8362f420
JH
5547 if (TARGET_64BIT)
5548 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5549 else
5550 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
e9a25f70
JL
5551 }
5552 }
1c71e60e 5553 else
68f654ec 5554 {
1c71e60e
JH
5555 /* First step is to deallocate the stack frame so that we can
5556 pop the registers. */
5557 if (!sp_valid)
5558 {
5559 if (!frame_pointer_needed)
5560 abort ();
b19ee4bd
JJ
5561 pro_epilogue_adjust_stack (stack_pointer_rtx,
5562 hard_frame_pointer_rtx,
5563 GEN_INT (offset), style);
1c71e60e 5564 }
4dd2ac2c 5565 else if (frame.to_allocate)
b19ee4bd
JJ
5566 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5567 GEN_INT (frame.to_allocate), style);
1c71e60e 5568
4dd2ac2c 5569 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 5570 if (ix86_save_reg (regno, false))
8362f420
JH
5571 {
5572 if (TARGET_64BIT)
5573 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5574 else
5575 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5576 }
4dd2ac2c 5577 if (frame_pointer_needed)
8362f420 5578 {
f5143c46 5579 /* Leave results in shorter dependency chains on CPUs that are
2ab0437e
JH
5580 able to grok it fast. */
5581 if (TARGET_USE_LEAVE)
5582 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5583 else if (TARGET_64BIT)
8362f420
JH
5584 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5585 else
5586 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5587 }
68f654ec 5588 }
68f654ec 5589
cbbf65e0 5590 /* Sibcall epilogues don't want a return instruction. */
1020a5ab 5591 if (style == 0)
cbbf65e0
RH
5592 return;
5593
2a2ab3f9
JVA
5594 if (current_function_pops_args && current_function_args_size)
5595 {
e075ae69 5596 rtx popc = GEN_INT (current_function_pops_args);
2a2ab3f9 5597
b8c752c8
UD
5598 /* i386 can only pop 64K bytes. If asked to pop more, pop
5599 return address, do explicit add, and jump indirectly to the
0f290768 5600 caller. */
2a2ab3f9 5601
b8c752c8 5602 if (current_function_pops_args >= 65536)
2a2ab3f9 5603 {
e075ae69 5604 rtx ecx = gen_rtx_REG (SImode, 2);
e9a25f70 5605
b19ee4bd 5606 /* There is no "pascal" calling convention in 64bit ABI. */
8362f420 5607 if (TARGET_64BIT)
b531087a 5608 abort ();
8362f420 5609
e075ae69
RH
5610 emit_insn (gen_popsi1 (ecx));
5611 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
11837777 5612 emit_jump_insn (gen_return_indirect_internal (ecx));
e9a25f70 5613 }
79325812 5614 else
e075ae69
RH
5615 emit_jump_insn (gen_return_pop_internal (popc));
5616 }
5617 else
5618 emit_jump_insn (gen_return_internal ());
5619}
bd09bdeb
RH
5620
5621/* Reset from the function's potential modifications. */
5622
5623static void
b96a374d
AJ
5624ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5625 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
bd09bdeb
RH
5626{
5627 if (pic_offset_table_rtx)
5628 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5629}
e075ae69
RH
5630\f
5631/* Extract the parts of an RTL expression that is a valid memory address
b446e5a2
JH
5632 for an instruction. Return 0 if the structure of the address is
5633 grossly off. Return -1 if the address contains ASHIFT, so it is not
74dc3e94 5634 strictly valid, but still used for computing length of lea instruction. */
e075ae69
RH
5635
5636static int
8d531ab9 5637ix86_decompose_address (rtx addr, struct ix86_address *out)
e075ae69
RH
5638{
5639 rtx base = NULL_RTX;
5640 rtx index = NULL_RTX;
5641 rtx disp = NULL_RTX;
5642 HOST_WIDE_INT scale = 1;
5643 rtx scale_rtx = NULL_RTX;
b446e5a2 5644 int retval = 1;
74dc3e94 5645 enum ix86_address_seg seg = SEG_DEFAULT;
e075ae69 5646
90e4e4c5 5647 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
e075ae69
RH
5648 base = addr;
5649 else if (GET_CODE (addr) == PLUS)
5650 {
74dc3e94
RH
5651 rtx addends[4], op;
5652 int n = 0, i;
e075ae69 5653
74dc3e94
RH
5654 op = addr;
5655 do
e075ae69 5656 {
74dc3e94
RH
5657 if (n >= 4)
5658 return 0;
5659 addends[n++] = XEXP (op, 1);
5660 op = XEXP (op, 0);
2a2ab3f9 5661 }
74dc3e94
RH
5662 while (GET_CODE (op) == PLUS);
5663 if (n >= 4)
5664 return 0;
5665 addends[n] = op;
5666
5667 for (i = n; i >= 0; --i)
e075ae69 5668 {
74dc3e94
RH
5669 op = addends[i];
5670 switch (GET_CODE (op))
5671 {
5672 case MULT:
5673 if (index)
5674 return 0;
5675 index = XEXP (op, 0);
5676 scale_rtx = XEXP (op, 1);
5677 break;
5678
5679 case UNSPEC:
5680 if (XINT (op, 1) == UNSPEC_TP
5681 && TARGET_TLS_DIRECT_SEG_REFS
5682 && seg == SEG_DEFAULT)
5683 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5684 else
5685 return 0;
5686 break;
5687
5688 case REG:
5689 case SUBREG:
5690 if (!base)
5691 base = op;
5692 else if (!index)
5693 index = op;
5694 else
5695 return 0;
5696 break;
5697
5698 case CONST:
5699 case CONST_INT:
5700 case SYMBOL_REF:
5701 case LABEL_REF:
5702 if (disp)
5703 return 0;
5704 disp = op;
5705 break;
5706
5707 default:
5708 return 0;
5709 }
e075ae69 5710 }
e075ae69
RH
5711 }
5712 else if (GET_CODE (addr) == MULT)
5713 {
5714 index = XEXP (addr, 0); /* index*scale */
5715 scale_rtx = XEXP (addr, 1);
5716 }
5717 else if (GET_CODE (addr) == ASHIFT)
5718 {
5719 rtx tmp;
5720
5721 /* We're called for lea too, which implements ashift on occasion. */
5722 index = XEXP (addr, 0);
5723 tmp = XEXP (addr, 1);
5724 if (GET_CODE (tmp) != CONST_INT)
b446e5a2 5725 return 0;
e075ae69
RH
5726 scale = INTVAL (tmp);
5727 if ((unsigned HOST_WIDE_INT) scale > 3)
b446e5a2 5728 return 0;
e075ae69 5729 scale = 1 << scale;
b446e5a2 5730 retval = -1;
2a2ab3f9 5731 }
2a2ab3f9 5732 else
e075ae69
RH
5733 disp = addr; /* displacement */
5734
5735 /* Extract the integral value of scale. */
5736 if (scale_rtx)
e9a25f70 5737 {
e075ae69 5738 if (GET_CODE (scale_rtx) != CONST_INT)
b446e5a2 5739 return 0;
e075ae69 5740 scale = INTVAL (scale_rtx);
e9a25f70 5741 }
3b3c6a3f 5742
74dc3e94 5743 /* Allow arg pointer and stack pointer as index if there is not scaling. */
e075ae69 5744 if (base && index && scale == 1
74dc3e94
RH
5745 && (index == arg_pointer_rtx
5746 || index == frame_pointer_rtx
5747 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
e075ae69
RH
5748 {
5749 rtx tmp = base;
5750 base = index;
5751 index = tmp;
5752 }
5753
5754 /* Special case: %ebp cannot be encoded as a base without a displacement. */
564d80f4
JH
5755 if ((base == hard_frame_pointer_rtx
5756 || base == frame_pointer_rtx
5757 || base == arg_pointer_rtx) && !disp)
e075ae69
RH
5758 disp = const0_rtx;
5759
5760 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5761 Avoid this by transforming to [%esi+0]. */
9e555526 5762 if (ix86_tune == PROCESSOR_K6 && !optimize_size
e075ae69 5763 && base && !index && !disp
329e1d01 5764 && REG_P (base)
e075ae69
RH
5765 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5766 disp = const0_rtx;
5767
5768 /* Special case: encode reg+reg instead of reg*2. */
5769 if (!base && index && scale && scale == 2)
5770 base = index, scale = 1;
0f290768 5771
e075ae69
RH
5772 /* Special case: scaling cannot be encoded without base or displacement. */
5773 if (!base && !disp && index && scale != 1)
5774 disp = const0_rtx;
5775
5776 out->base = base;
5777 out->index = index;
5778 out->disp = disp;
5779 out->scale = scale;
74dc3e94 5780 out->seg = seg;
3b3c6a3f 5781
b446e5a2 5782 return retval;
e075ae69 5783}
01329426
JH
5784\f
5785/* Return cost of the memory address x.
5786 For i386, it is better to use a complex address than let gcc copy
5787 the address into a reg and make a new pseudo. But not if the address
5788 requires to two regs - that would mean more pseudos with longer
5789 lifetimes. */
dcefdf67 5790static int
b96a374d 5791ix86_address_cost (rtx x)
01329426
JH
5792{
5793 struct ix86_address parts;
5794 int cost = 1;
3b3c6a3f 5795
01329426
JH
5796 if (!ix86_decompose_address (x, &parts))
5797 abort ();
5798
5799 /* More complex memory references are better. */
5800 if (parts.disp && parts.disp != const0_rtx)
5801 cost--;
74dc3e94
RH
5802 if (parts.seg != SEG_DEFAULT)
5803 cost--;
01329426
JH
5804
5805 /* Attempt to minimize number of registers in the address. */
5806 if ((parts.base
5807 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5808 || (parts.index
5809 && (!REG_P (parts.index)
5810 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5811 cost++;
5812
5813 if (parts.base
5814 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5815 && parts.index
5816 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5817 && parts.base != parts.index)
5818 cost++;
5819
5820 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5821 since it's predecode logic can't detect the length of instructions
5822 and it degenerates to vector decoded. Increase cost of such
5823 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
0f290768 5824 to split such addresses or even refuse such addresses at all.
01329426
JH
5825
5826 Following addressing modes are affected:
5827 [base+scale*index]
5828 [scale*index+disp]
5829 [base+index]
0f290768 5830
01329426
JH
5831 The first and last case may be avoidable by explicitly coding the zero in
5832 memory address, but I don't have AMD-K6 machine handy to check this
5833 theory. */
5834
5835 if (TARGET_K6
5836 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5837 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5838 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5839 cost += 10;
0f290768 5840
01329426
JH
5841 return cost;
5842}
5843\f
b949ea8b
JW
5844/* If X is a machine specific address (i.e. a symbol or label being
5845 referenced as a displacement from the GOT implemented using an
5846 UNSPEC), then return the base term. Otherwise return X. */
5847
5848rtx
b96a374d 5849ix86_find_base_term (rtx x)
b949ea8b
JW
5850{
5851 rtx term;
5852
6eb791fc
JH
5853 if (TARGET_64BIT)
5854 {
5855 if (GET_CODE (x) != CONST)
5856 return x;
5857 term = XEXP (x, 0);
5858 if (GET_CODE (term) == PLUS
5859 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5860 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5861 term = XEXP (term, 0);
5862 if (GET_CODE (term) != UNSPEC
8ee41eaf 5863 || XINT (term, 1) != UNSPEC_GOTPCREL)
6eb791fc
JH
5864 return x;
5865
5866 term = XVECEXP (term, 0, 0);
5867
5868 if (GET_CODE (term) != SYMBOL_REF
5869 && GET_CODE (term) != LABEL_REF)
5870 return x;
5871
5872 return term;
5873 }
5874
69bd9368 5875 term = ix86_delegitimize_address (x);
b949ea8b
JW
5876
5877 if (GET_CODE (term) != SYMBOL_REF
5878 && GET_CODE (term) != LABEL_REF)
5879 return x;
5880
5881 return term;
5882}
5883\f
f996902d
RH
5884/* Determine if a given RTX is a valid constant. We already know this
5885 satisfies CONSTANT_P. */
5886
5887bool
b96a374d 5888legitimate_constant_p (rtx x)
f996902d
RH
5889{
5890 rtx inner;
5891
5892 switch (GET_CODE (x))
5893 {
5894 case SYMBOL_REF:
5895 /* TLS symbols are not constant. */
5896 if (tls_symbolic_operand (x, Pmode))
5897 return false;
5898 break;
5899
5900 case CONST:
5901 inner = XEXP (x, 0);
5902
5903 /* Offsets of TLS symbols are never valid.
5904 Discourage CSE from creating them. */
5905 if (GET_CODE (inner) == PLUS
5906 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5907 return false;
5908
d599f329
ZD
5909 if (GET_CODE (inner) == PLUS
5910 || GET_CODE (inner) == MINUS)
799b33a0
JH
5911 {
5912 if (GET_CODE (XEXP (inner, 1)) != CONST_INT)
5913 return false;
5914 inner = XEXP (inner, 0);
5915 }
5916
f996902d
RH
5917 /* Only some unspecs are valid as "constants". */
5918 if (GET_CODE (inner) == UNSPEC)
5919 switch (XINT (inner, 1))
5920 {
5921 case UNSPEC_TPOFF:
cb0e3e3f 5922 case UNSPEC_NTPOFF:
f996902d 5923 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
cb0e3e3f
RH
5924 case UNSPEC_DTPOFF:
5925 return local_dynamic_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
f996902d
RH
5926 default:
5927 return false;
5928 }
5929 break;
5930
5931 default:
5932 break;
5933 }
5934
5935 /* Otherwise we handle everything else in the move patterns. */
5936 return true;
5937}
5938
3a04ff64
RH
5939/* Determine if it's legal to put X into the constant pool. This
5940 is not possible for the address of thread-local symbols, which
5941 is checked above. */
5942
5943static bool
b96a374d 5944ix86_cannot_force_const_mem (rtx x)
3a04ff64
RH
5945{
5946 return !legitimate_constant_p (x);
5947}
5948
f996902d
RH
5949/* Determine if a given RTX is a valid constant address. */
5950
5951bool
b96a374d 5952constant_address_p (rtx x)
f996902d 5953{
a94f136b 5954 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
f996902d
RH
5955}
5956
5957/* Nonzero if the constant value X is a legitimate general operand
fce5a9f2 5958 when generating PIC code. It is given that flag_pic is on and
f996902d
RH
5959 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5960
5961bool
b96a374d 5962legitimate_pic_operand_p (rtx x)
f996902d
RH
5963{
5964 rtx inner;
5965
5966 switch (GET_CODE (x))
5967 {
5968 case CONST:
5969 inner = XEXP (x, 0);
5970
5971 /* Only some unspecs are valid as "constants". */
5972 if (GET_CODE (inner) == UNSPEC)
5973 switch (XINT (inner, 1))
5974 {
5975 case UNSPEC_TPOFF:
5976 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
f996902d
RH
5977 default:
5978 return false;
5979 }
5efb1046 5980 /* FALLTHRU */
f996902d
RH
5981
5982 case SYMBOL_REF:
5983 case LABEL_REF:
5984 return legitimate_pic_address_disp_p (x);
5985
5986 default:
5987 return true;
5988 }
5989}
5990
e075ae69
RH
5991/* Determine if a given CONST RTX is a valid memory displacement
5992 in PIC mode. */
0f290768 5993
59be65f6 5994int
8d531ab9 5995legitimate_pic_address_disp_p (rtx disp)
91bb873f 5996{
f996902d
RH
5997 bool saw_plus;
5998
6eb791fc
JH
5999 /* In 64bit mode we can allow direct addresses of symbols and labels
6000 when they are not dynamic symbols. */
c05dbe81
JH
6001 if (TARGET_64BIT)
6002 {
6003 /* TLS references should always be enclosed in UNSPEC. */
6004 if (tls_symbolic_operand (disp, GET_MODE (disp)))
6005 return 0;
6006 if (GET_CODE (disp) == SYMBOL_REF
6007 && ix86_cmodel == CM_SMALL_PIC
2ae5ae57 6008 && SYMBOL_REF_LOCAL_P (disp))
c05dbe81
JH
6009 return 1;
6010 if (GET_CODE (disp) == LABEL_REF)
6011 return 1;
6012 if (GET_CODE (disp) == CONST
a132b6a8
JJ
6013 && GET_CODE (XEXP (disp, 0)) == PLUS)
6014 {
6015 rtx op0 = XEXP (XEXP (disp, 0), 0);
6016 rtx op1 = XEXP (XEXP (disp, 0), 1);
6017
6018 /* TLS references should always be enclosed in UNSPEC. */
6019 if (tls_symbolic_operand (op0, GET_MODE (op0)))
6020 return 0;
6021 if (((GET_CODE (op0) == SYMBOL_REF
6022 && ix86_cmodel == CM_SMALL_PIC
6023 && SYMBOL_REF_LOCAL_P (op0))
6024 || GET_CODE (op0) == LABEL_REF)
6025 && GET_CODE (op1) == CONST_INT
6026 && INTVAL (op1) < 16*1024*1024
6027 && INTVAL (op1) >= -16*1024*1024)
6028 return 1;
6029 }
c05dbe81 6030 }
91bb873f
RH
6031 if (GET_CODE (disp) != CONST)
6032 return 0;
6033 disp = XEXP (disp, 0);
6034
6eb791fc
JH
6035 if (TARGET_64BIT)
6036 {
6037 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6038 of GOT tables. We should not need these anyway. */
6039 if (GET_CODE (disp) != UNSPEC
8ee41eaf 6040 || XINT (disp, 1) != UNSPEC_GOTPCREL)
6eb791fc
JH
6041 return 0;
6042
6043 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6044 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6045 return 0;
6046 return 1;
6047 }
6048
f996902d 6049 saw_plus = false;
91bb873f
RH
6050 if (GET_CODE (disp) == PLUS)
6051 {
6052 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
6053 return 0;
6054 disp = XEXP (disp, 0);
f996902d 6055 saw_plus = true;
91bb873f
RH
6056 }
6057
b069de3b
SS
6058 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
6059 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
6060 {
6061 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6062 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6063 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6064 {
6065 const char *sym_name = XSTR (XEXP (disp, 1), 0);
86ecdfb6 6066 if (! strcmp (sym_name, "<pic base>"))
b069de3b
SS
6067 return 1;
6068 }
6069 }
6070
8ee41eaf 6071 if (GET_CODE (disp) != UNSPEC)
91bb873f
RH
6072 return 0;
6073
623fe810
RH
6074 switch (XINT (disp, 1))
6075 {
8ee41eaf 6076 case UNSPEC_GOT:
f996902d
RH
6077 if (saw_plus)
6078 return false;
623fe810 6079 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
8ee41eaf 6080 case UNSPEC_GOTOFF:
799b33a0
JH
6081 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6082 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6083 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6084 return false;
f996902d 6085 case UNSPEC_GOTTPOFF:
dea73790
JJ
6086 case UNSPEC_GOTNTPOFF:
6087 case UNSPEC_INDNTPOFF:
f996902d
RH
6088 if (saw_plus)
6089 return false;
6090 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6091 case UNSPEC_NTPOFF:
f996902d
RH
6092 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6093 case UNSPEC_DTPOFF:
f996902d 6094 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
623fe810 6095 }
fce5a9f2 6096
623fe810 6097 return 0;
91bb873f
RH
6098}
6099
e075ae69
RH
6100/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6101 memory address for an instruction. The MODE argument is the machine mode
6102 for the MEM expression that wants to use this address.
6103
6104 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6105 convert common non-canonical forms to canonical form so that they will
6106 be recognized. */
6107
3b3c6a3f 6108int
8d531ab9 6109legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
3b3c6a3f 6110{
e075ae69
RH
6111 struct ix86_address parts;
6112 rtx base, index, disp;
6113 HOST_WIDE_INT scale;
6114 const char *reason = NULL;
6115 rtx reason_rtx = NULL_RTX;
3b3c6a3f
MM
6116
6117 if (TARGET_DEBUG_ADDR)
6118 {
6119 fprintf (stderr,
e9a25f70 6120 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
3b3c6a3f 6121 GET_MODE_NAME (mode), strict);
3b3c6a3f
MM
6122 debug_rtx (addr);
6123 }
6124
b446e5a2 6125 if (ix86_decompose_address (addr, &parts) <= 0)
3b3c6a3f 6126 {
e075ae69 6127 reason = "decomposition failed";
50e60bc3 6128 goto report_error;
3b3c6a3f
MM
6129 }
6130
e075ae69
RH
6131 base = parts.base;
6132 index = parts.index;
6133 disp = parts.disp;
6134 scale = parts.scale;
91f0226f 6135
e075ae69 6136 /* Validate base register.
e9a25f70
JL
6137
6138 Don't allow SUBREG's here, it can lead to spill failures when the base
3d771dfd
MM
6139 is one word out of a two word structure, which is represented internally
6140 as a DImode int. */
e9a25f70 6141
3b3c6a3f
MM
6142 if (base)
6143 {
e075ae69
RH
6144 reason_rtx = base;
6145
90e4e4c5 6146 if (GET_CODE (base) != REG)
3b3c6a3f 6147 {
e075ae69 6148 reason = "base is not a register";
50e60bc3 6149 goto report_error;
3b3c6a3f
MM
6150 }
6151
c954bd01
RH
6152 if (GET_MODE (base) != Pmode)
6153 {
e075ae69 6154 reason = "base is not in Pmode";
50e60bc3 6155 goto report_error;
c954bd01
RH
6156 }
6157
90e4e4c5
RH
6158 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
6159 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
3b3c6a3f 6160 {
e075ae69 6161 reason = "base is not valid";
50e60bc3 6162 goto report_error;
3b3c6a3f
MM
6163 }
6164 }
6165
e075ae69 6166 /* Validate index register.
e9a25f70
JL
6167
6168 Don't allow SUBREG's here, it can lead to spill failures when the index
3d771dfd
MM
6169 is one word out of a two word structure, which is represented internally
6170 as a DImode int. */
e075ae69
RH
6171
6172 if (index)
3b3c6a3f 6173 {
e075ae69
RH
6174 reason_rtx = index;
6175
90e4e4c5 6176 if (GET_CODE (index) != REG)
3b3c6a3f 6177 {
e075ae69 6178 reason = "index is not a register";
50e60bc3 6179 goto report_error;
3b3c6a3f
MM
6180 }
6181
e075ae69 6182 if (GET_MODE (index) != Pmode)
c954bd01 6183 {
e075ae69 6184 reason = "index is not in Pmode";
50e60bc3 6185 goto report_error;
c954bd01
RH
6186 }
6187
90e4e4c5
RH
6188 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
6189 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
3b3c6a3f 6190 {
e075ae69 6191 reason = "index is not valid";
50e60bc3 6192 goto report_error;
3b3c6a3f
MM
6193 }
6194 }
3b3c6a3f 6195
e075ae69
RH
6196 /* Validate scale factor. */
6197 if (scale != 1)
3b3c6a3f 6198 {
e075ae69
RH
6199 reason_rtx = GEN_INT (scale);
6200 if (!index)
3b3c6a3f 6201 {
e075ae69 6202 reason = "scale without index";
50e60bc3 6203 goto report_error;
3b3c6a3f
MM
6204 }
6205
e075ae69 6206 if (scale != 2 && scale != 4 && scale != 8)
3b3c6a3f 6207 {
e075ae69 6208 reason = "scale is not a valid multiplier";
50e60bc3 6209 goto report_error;
3b3c6a3f
MM
6210 }
6211 }
6212
91bb873f 6213 /* Validate displacement. */
3b3c6a3f
MM
6214 if (disp)
6215 {
e075ae69
RH
6216 reason_rtx = disp;
6217
f996902d
RH
6218 if (GET_CODE (disp) == CONST
6219 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6220 switch (XINT (XEXP (disp, 0), 1))
6221 {
6222 case UNSPEC_GOT:
6223 case UNSPEC_GOTOFF:
6224 case UNSPEC_GOTPCREL:
6225 if (!flag_pic)
6226 abort ();
6227 goto is_legitimate_pic;
6228
6229 case UNSPEC_GOTTPOFF:
dea73790
JJ
6230 case UNSPEC_GOTNTPOFF:
6231 case UNSPEC_INDNTPOFF:
f996902d
RH
6232 case UNSPEC_NTPOFF:
6233 case UNSPEC_DTPOFF:
6234 break;
6235
6236 default:
6237 reason = "invalid address unspec";
6238 goto report_error;
6239 }
6240
b069de3b
SS
6241 else if (flag_pic && (SYMBOLIC_CONST (disp)
6242#if TARGET_MACHO
6243 && !machopic_operand_p (disp)
6244#endif
6245 ))
3b3c6a3f 6246 {
f996902d 6247 is_legitimate_pic:
0d7d98ee
JH
6248 if (TARGET_64BIT && (index || base))
6249 {
75d38379
JJ
6250 /* foo@dtpoff(%rX) is ok. */
6251 if (GET_CODE (disp) != CONST
6252 || GET_CODE (XEXP (disp, 0)) != PLUS
6253 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6254 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6255 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6256 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6257 {
6258 reason = "non-constant pic memory reference";
6259 goto report_error;
6260 }
0d7d98ee 6261 }
75d38379 6262 else if (! legitimate_pic_address_disp_p (disp))
91bb873f 6263 {
e075ae69 6264 reason = "displacement is an invalid pic construct";
50e60bc3 6265 goto report_error;
91bb873f
RH
6266 }
6267
4e9efe54 6268 /* This code used to verify that a symbolic pic displacement
0f290768
KH
6269 includes the pic_offset_table_rtx register.
6270
4e9efe54
JH
6271 While this is good idea, unfortunately these constructs may
6272 be created by "adds using lea" optimization for incorrect
6273 code like:
6274
6275 int a;
6276 int foo(int i)
6277 {
6278 return *(&a+i);
6279 }
6280
50e60bc3 6281 This code is nonsensical, but results in addressing
4e9efe54 6282 GOT table with pic_offset_table_rtx base. We can't
f710504c 6283 just refuse it easily, since it gets matched by
4e9efe54
JH
6284 "addsi3" pattern, that later gets split to lea in the
6285 case output register differs from input. While this
6286 can be handled by separate addsi pattern for this case
6287 that never results in lea, this seems to be easier and
6288 correct fix for crash to disable this test. */
3b3c6a3f 6289 }
a94f136b
JH
6290 else if (GET_CODE (disp) != LABEL_REF
6291 && GET_CODE (disp) != CONST_INT
6292 && (GET_CODE (disp) != CONST
6293 || !legitimate_constant_p (disp))
6294 && (GET_CODE (disp) != SYMBOL_REF
6295 || !legitimate_constant_p (disp)))
f996902d
RH
6296 {
6297 reason = "displacement is not constant";
6298 goto report_error;
6299 }
c05dbe81
JH
6300 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
6301 {
6302 reason = "displacement is out of range";
6303 goto report_error;
6304 }
3b3c6a3f
MM
6305 }
6306
e075ae69 6307 /* Everything looks valid. */
3b3c6a3f 6308 if (TARGET_DEBUG_ADDR)
e075ae69 6309 fprintf (stderr, "Success.\n");
3b3c6a3f 6310 return TRUE;
e075ae69 6311
5bf0ebab 6312 report_error:
e075ae69
RH
6313 if (TARGET_DEBUG_ADDR)
6314 {
6315 fprintf (stderr, "Error: %s\n", reason);
6316 debug_rtx (reason_rtx);
6317 }
6318 return FALSE;
3b3c6a3f 6319}
3b3c6a3f 6320\f
55efb413
JW
6321/* Return an unique alias set for the GOT. */
6322
0f290768 6323static HOST_WIDE_INT
b96a374d 6324ix86_GOT_alias_set (void)
55efb413 6325{
5bf0ebab
RH
6326 static HOST_WIDE_INT set = -1;
6327 if (set == -1)
6328 set = new_alias_set ();
6329 return set;
0f290768 6330}
55efb413 6331
3b3c6a3f
MM
6332/* Return a legitimate reference for ORIG (an address) using the
6333 register REG. If REG is 0, a new pseudo is generated.
6334
91bb873f 6335 There are two types of references that must be handled:
3b3c6a3f
MM
6336
6337 1. Global data references must load the address from the GOT, via
6338 the PIC reg. An insn is emitted to do this load, and the reg is
6339 returned.
6340
91bb873f
RH
6341 2. Static data references, constant pool addresses, and code labels
6342 compute the address as an offset from the GOT, whose base is in
2ae5ae57 6343 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
91bb873f
RH
6344 differentiate them from global data objects. The returned
6345 address is the PIC reg + an unspec constant.
3b3c6a3f
MM
6346
6347 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
91bb873f 6348 reg also appears in the address. */
3b3c6a3f 6349
b39edae3 6350static rtx
b96a374d 6351legitimize_pic_address (rtx orig, rtx reg)
3b3c6a3f
MM
6352{
6353 rtx addr = orig;
6354 rtx new = orig;
91bb873f 6355 rtx base;
3b3c6a3f 6356
b069de3b
SS
6357#if TARGET_MACHO
6358 if (reg == 0)
6359 reg = gen_reg_rtx (Pmode);
6360 /* Use the generic Mach-O PIC machinery. */
6361 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6362#endif
6363
c05dbe81
JH
6364 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6365 new = addr;
6366 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
3b3c6a3f 6367 {
c05dbe81
JH
6368 /* This symbol may be referenced via a displacement from the PIC
6369 base address (@GOTOFF). */
3b3c6a3f 6370
c05dbe81
JH
6371 if (reload_in_progress)
6372 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
799b33a0
JH
6373 if (GET_CODE (addr) == CONST)
6374 addr = XEXP (addr, 0);
6375 if (GET_CODE (addr) == PLUS)
6376 {
6377 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6378 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6379 }
6380 else
6381 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
c05dbe81
JH
6382 new = gen_rtx_CONST (Pmode, new);
6383 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3b3c6a3f 6384
c05dbe81
JH
6385 if (reg != 0)
6386 {
6387 emit_move_insn (reg, new);
6388 new = reg;
6389 }
3b3c6a3f 6390 }
91bb873f 6391 else if (GET_CODE (addr) == SYMBOL_REF)
3b3c6a3f 6392 {
14f73b5a
JH
6393 if (TARGET_64BIT)
6394 {
8ee41eaf 6395 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
14f73b5a
JH
6396 new = gen_rtx_CONST (Pmode, new);
6397 new = gen_rtx_MEM (Pmode, new);
6398 RTX_UNCHANGING_P (new) = 1;
6399 set_mem_alias_set (new, ix86_GOT_alias_set ());
6400
6401 if (reg == 0)
6402 reg = gen_reg_rtx (Pmode);
6403 /* Use directly gen_movsi, otherwise the address is loaded
6404 into register for CSE. We don't want to CSE this addresses,
6405 instead we CSE addresses from the GOT table, so skip this. */
6406 emit_insn (gen_movsi (reg, new));
6407 new = reg;
6408 }
6409 else
6410 {
6411 /* This symbol must be referenced via a load from the
6412 Global Offset Table (@GOT). */
3b3c6a3f 6413
66edd3b4
RH
6414 if (reload_in_progress)
6415 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf 6416 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
14f73b5a
JH
6417 new = gen_rtx_CONST (Pmode, new);
6418 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6419 new = gen_rtx_MEM (Pmode, new);
6420 RTX_UNCHANGING_P (new) = 1;
6421 set_mem_alias_set (new, ix86_GOT_alias_set ());
3b3c6a3f 6422
14f73b5a
JH
6423 if (reg == 0)
6424 reg = gen_reg_rtx (Pmode);
6425 emit_move_insn (reg, new);
6426 new = reg;
6427 }
0f290768 6428 }
91bb873f
RH
6429 else
6430 {
6431 if (GET_CODE (addr) == CONST)
3b3c6a3f 6432 {
91bb873f 6433 addr = XEXP (addr, 0);
e3c8ea67
RH
6434
6435 /* We must match stuff we generate before. Assume the only
6436 unspecs that can get here are ours. Not that we could do
43f3a59d 6437 anything with them anyway.... */
e3c8ea67
RH
6438 if (GET_CODE (addr) == UNSPEC
6439 || (GET_CODE (addr) == PLUS
6440 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6441 return orig;
6442 if (GET_CODE (addr) != PLUS)
564d80f4 6443 abort ();
3b3c6a3f 6444 }
91bb873f
RH
6445 if (GET_CODE (addr) == PLUS)
6446 {
6447 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
e9a25f70 6448
91bb873f
RH
6449 /* Check first to see if this is a constant offset from a @GOTOFF
6450 symbol reference. */
623fe810 6451 if (local_symbolic_operand (op0, Pmode)
91bb873f
RH
6452 && GET_CODE (op1) == CONST_INT)
6453 {
6eb791fc
JH
6454 if (!TARGET_64BIT)
6455 {
66edd3b4
RH
6456 if (reload_in_progress)
6457 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf
RH
6458 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6459 UNSPEC_GOTOFF);
6eb791fc
JH
6460 new = gen_rtx_PLUS (Pmode, new, op1);
6461 new = gen_rtx_CONST (Pmode, new);
6462 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
91bb873f 6463
6eb791fc
JH
6464 if (reg != 0)
6465 {
6466 emit_move_insn (reg, new);
6467 new = reg;
6468 }
6469 }
6470 else
91bb873f 6471 {
75d38379
JJ
6472 if (INTVAL (op1) < -16*1024*1024
6473 || INTVAL (op1) >= 16*1024*1024)
6474 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
91bb873f
RH
6475 }
6476 }
6477 else
6478 {
6479 base = legitimize_pic_address (XEXP (addr, 0), reg);
6480 new = legitimize_pic_address (XEXP (addr, 1),
6481 base == reg ? NULL_RTX : reg);
6482
6483 if (GET_CODE (new) == CONST_INT)
6484 new = plus_constant (base, INTVAL (new));
6485 else
6486 {
6487 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6488 {
6489 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6490 new = XEXP (new, 1);
6491 }
6492 new = gen_rtx_PLUS (Pmode, base, new);
6493 }
6494 }
6495 }
3b3c6a3f
MM
6496 }
6497 return new;
6498}
6499\f
74dc3e94 6500/* Load the thread pointer. If TO_REG is true, force it into a register. */
f996902d
RH
6501
6502static rtx
b96a374d 6503get_thread_pointer (int to_reg)
f996902d 6504{
74dc3e94 6505 rtx tp, reg, insn;
f996902d
RH
6506
6507 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
74dc3e94
RH
6508 if (!to_reg)
6509 return tp;
f996902d 6510
74dc3e94
RH
6511 reg = gen_reg_rtx (Pmode);
6512 insn = gen_rtx_SET (VOIDmode, reg, tp);
6513 insn = emit_insn (insn);
6514
6515 return reg;
6516}
6517
6518/* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6519 false if we expect this to be used for a memory address and true if
6520 we expect to load the address into a register. */
6521
6522static rtx
b96a374d 6523legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
74dc3e94
RH
6524{
6525 rtx dest, base, off, pic;
6526 int type;
6527
6528 switch (model)
6529 {
6530 case TLS_MODEL_GLOBAL_DYNAMIC:
6531 dest = gen_reg_rtx (Pmode);
6532 if (TARGET_64BIT)
6533 {
6534 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6535
6536 start_sequence ();
6537 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6538 insns = get_insns ();
6539 end_sequence ();
6540
6541 emit_libcall_block (insns, dest, rax, x);
6542 }
6543 else
6544 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6545 break;
6546
6547 case TLS_MODEL_LOCAL_DYNAMIC:
6548 base = gen_reg_rtx (Pmode);
6549 if (TARGET_64BIT)
6550 {
6551 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6552
6553 start_sequence ();
6554 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6555 insns = get_insns ();
6556 end_sequence ();
6557
6558 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6559 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6560 emit_libcall_block (insns, base, rax, note);
6561 }
6562 else
6563 emit_insn (gen_tls_local_dynamic_base_32 (base));
6564
6565 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6566 off = gen_rtx_CONST (Pmode, off);
6567
6568 return gen_rtx_PLUS (Pmode, base, off);
6569
6570 case TLS_MODEL_INITIAL_EXEC:
6571 if (TARGET_64BIT)
6572 {
6573 pic = NULL;
6574 type = UNSPEC_GOTNTPOFF;
6575 }
6576 else if (flag_pic)
6577 {
6578 if (reload_in_progress)
6579 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6580 pic = pic_offset_table_rtx;
6581 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6582 }
6583 else if (!TARGET_GNU_TLS)
6584 {
6585 pic = gen_reg_rtx (Pmode);
6586 emit_insn (gen_set_got (pic));
6587 type = UNSPEC_GOTTPOFF;
6588 }
6589 else
6590 {
6591 pic = NULL;
6592 type = UNSPEC_INDNTPOFF;
6593 }
6594
6595 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6596 off = gen_rtx_CONST (Pmode, off);
6597 if (pic)
6598 off = gen_rtx_PLUS (Pmode, pic, off);
6599 off = gen_rtx_MEM (Pmode, off);
6600 RTX_UNCHANGING_P (off) = 1;
6601 set_mem_alias_set (off, ix86_GOT_alias_set ());
6602
6603 if (TARGET_64BIT || TARGET_GNU_TLS)
6604 {
6605 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6606 off = force_reg (Pmode, off);
6607 return gen_rtx_PLUS (Pmode, base, off);
6608 }
6609 else
6610 {
6611 base = get_thread_pointer (true);
6612 dest = gen_reg_rtx (Pmode);
6613 emit_insn (gen_subsi3 (dest, base, off));
6614 }
6615 break;
6616
6617 case TLS_MODEL_LOCAL_EXEC:
6618 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6619 (TARGET_64BIT || TARGET_GNU_TLS)
6620 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6621 off = gen_rtx_CONST (Pmode, off);
6622
6623 if (TARGET_64BIT || TARGET_GNU_TLS)
6624 {
6625 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6626 return gen_rtx_PLUS (Pmode, base, off);
6627 }
6628 else
6629 {
6630 base = get_thread_pointer (true);
6631 dest = gen_reg_rtx (Pmode);
6632 emit_insn (gen_subsi3 (dest, base, off));
6633 }
6634 break;
6635
6636 default:
6637 abort ();
6638 }
6639
6640 return dest;
f996902d 6641}
fce5a9f2 6642
3b3c6a3f
MM
6643/* Try machine-dependent ways of modifying an illegitimate address
6644 to be legitimate. If we find one, return the new, valid address.
6645 This macro is used in only one place: `memory_address' in explow.c.
6646
6647 OLDX is the address as it was before break_out_memory_refs was called.
6648 In some cases it is useful to look at this to decide what needs to be done.
6649
6650 MODE and WIN are passed so that this macro can use
6651 GO_IF_LEGITIMATE_ADDRESS.
6652
6653 It is always safe for this macro to do nothing. It exists to recognize
6654 opportunities to optimize the output.
6655
6656 For the 80386, we handle X+REG by loading X into a register R and
6657 using R+REG. R will go in a general reg and indexing will be used.
6658 However, if REG is a broken-out memory address or multiplication,
6659 nothing needs to be done because REG can certainly go in a general reg.
6660
6661 When -fpic is used, special handling is needed for symbolic references.
6662 See comments by legitimize_pic_address in i386.c for details. */
6663
6664rtx
8d531ab9 6665legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
3b3c6a3f
MM
6666{
6667 int changed = 0;
6668 unsigned log;
6669
6670 if (TARGET_DEBUG_ADDR)
6671 {
e9a25f70
JL
6672 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6673 GET_MODE_NAME (mode));
3b3c6a3f
MM
6674 debug_rtx (x);
6675 }
6676
f996902d
RH
6677 log = tls_symbolic_operand (x, mode);
6678 if (log)
74dc3e94 6679 return legitimize_tls_address (x, log, false);
b39edae3
RH
6680 if (GET_CODE (x) == CONST
6681 && GET_CODE (XEXP (x, 0)) == PLUS
6682 && (log = tls_symbolic_operand (XEXP (XEXP (x, 0), 0), Pmode)))
6683 {
6684 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
6685 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
6686 }
f996902d 6687
3b3c6a3f
MM
6688 if (flag_pic && SYMBOLIC_CONST (x))
6689 return legitimize_pic_address (x, 0);
6690
6691 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6692 if (GET_CODE (x) == ASHIFT
6693 && GET_CODE (XEXP (x, 1)) == CONST_INT
b531087a 6694 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
3b3c6a3f
MM
6695 {
6696 changed = 1;
a269a03c
JC
6697 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6698 GEN_INT (1 << log));
3b3c6a3f
MM
6699 }
6700
6701 if (GET_CODE (x) == PLUS)
6702 {
0f290768 6703 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
e9a25f70 6704
3b3c6a3f
MM
6705 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6706 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
b531087a 6707 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
3b3c6a3f
MM
6708 {
6709 changed = 1;
c5c76735
JL
6710 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6711 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6712 GEN_INT (1 << log));
3b3c6a3f
MM
6713 }
6714
6715 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6716 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
b531087a 6717 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
3b3c6a3f
MM
6718 {
6719 changed = 1;
c5c76735
JL
6720 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6721 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6722 GEN_INT (1 << log));
3b3c6a3f
MM
6723 }
6724
0f290768 6725 /* Put multiply first if it isn't already. */
3b3c6a3f
MM
6726 if (GET_CODE (XEXP (x, 1)) == MULT)
6727 {
6728 rtx tmp = XEXP (x, 0);
6729 XEXP (x, 0) = XEXP (x, 1);
6730 XEXP (x, 1) = tmp;
6731 changed = 1;
6732 }
6733
6734 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6735 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6736 created by virtual register instantiation, register elimination, and
6737 similar optimizations. */
6738 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6739 {
6740 changed = 1;
c5c76735
JL
6741 x = gen_rtx_PLUS (Pmode,
6742 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6743 XEXP (XEXP (x, 1), 0)),
6744 XEXP (XEXP (x, 1), 1));
3b3c6a3f
MM
6745 }
6746
e9a25f70
JL
6747 /* Canonicalize
6748 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3b3c6a3f
MM
6749 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6750 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6751 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6752 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6753 && CONSTANT_P (XEXP (x, 1)))
6754 {
00c79232
ML
6755 rtx constant;
6756 rtx other = NULL_RTX;
3b3c6a3f
MM
6757
6758 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6759 {
6760 constant = XEXP (x, 1);
6761 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6762 }
6763 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6764 {
6765 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6766 other = XEXP (x, 1);
6767 }
6768 else
6769 constant = 0;
6770
6771 if (constant)
6772 {
6773 changed = 1;
c5c76735
JL
6774 x = gen_rtx_PLUS (Pmode,
6775 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6776 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6777 plus_constant (other, INTVAL (constant)));
3b3c6a3f
MM
6778 }
6779 }
6780
6781 if (changed && legitimate_address_p (mode, x, FALSE))
6782 return x;
6783
6784 if (GET_CODE (XEXP (x, 0)) == MULT)
6785 {
6786 changed = 1;
6787 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6788 }
6789
6790 if (GET_CODE (XEXP (x, 1)) == MULT)
6791 {
6792 changed = 1;
6793 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6794 }
6795
6796 if (changed
6797 && GET_CODE (XEXP (x, 1)) == REG
6798 && GET_CODE (XEXP (x, 0)) == REG)
6799 return x;
6800
6801 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6802 {
6803 changed = 1;
6804 x = legitimize_pic_address (x, 0);
6805 }
6806
6807 if (changed && legitimate_address_p (mode, x, FALSE))
6808 return x;
6809
6810 if (GET_CODE (XEXP (x, 0)) == REG)
6811 {
8d531ab9
KH
6812 rtx temp = gen_reg_rtx (Pmode);
6813 rtx val = force_operand (XEXP (x, 1), temp);
3b3c6a3f
MM
6814 if (val != temp)
6815 emit_move_insn (temp, val);
6816
6817 XEXP (x, 1) = temp;
6818 return x;
6819 }
6820
6821 else if (GET_CODE (XEXP (x, 1)) == REG)
6822 {
8d531ab9
KH
6823 rtx temp = gen_reg_rtx (Pmode);
6824 rtx val = force_operand (XEXP (x, 0), temp);
3b3c6a3f
MM
6825 if (val != temp)
6826 emit_move_insn (temp, val);
6827
6828 XEXP (x, 0) = temp;
6829 return x;
6830 }
6831 }
6832
6833 return x;
6834}
2a2ab3f9
JVA
6835\f
6836/* Print an integer constant expression in assembler syntax. Addition
6837 and subtraction are the only arithmetic that may appear in these
6838 expressions. FILE is the stdio stream to write to, X is the rtx, and
6839 CODE is the operand print code from the output string. */
6840
6841static void
b96a374d 6842output_pic_addr_const (FILE *file, rtx x, int code)
2a2ab3f9
JVA
6843{
6844 char buf[256];
6845
6846 switch (GET_CODE (x))
6847 {
6848 case PC:
6849 if (flag_pic)
6850 putc ('.', file);
6851 else
6852 abort ();
6853 break;
6854
6855 case SYMBOL_REF:
79bba51c
AP
6856 /* Mark the decl as referenced so that cgraph will output the function. */
6857 if (SYMBOL_REF_DECL (x))
6858 mark_decl_referenced (SYMBOL_REF_DECL (x));
6859
91bb873f 6860 assemble_name (file, XSTR (x, 0));
12969f45 6861 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
91bb873f 6862 fputs ("@PLT", file);
2a2ab3f9
JVA
6863 break;
6864
91bb873f
RH
6865 case LABEL_REF:
6866 x = XEXP (x, 0);
5efb1046 6867 /* FALLTHRU */
2a2ab3f9
JVA
6868 case CODE_LABEL:
6869 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6870 assemble_name (asm_out_file, buf);
6871 break;
6872
6873 case CONST_INT:
f64cecad 6874 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2a2ab3f9
JVA
6875 break;
6876
6877 case CONST:
6878 /* This used to output parentheses around the expression,
6879 but that does not work on the 386 (either ATT or BSD assembler). */
6880 output_pic_addr_const (file, XEXP (x, 0), code);
6881 break;
6882
6883 case CONST_DOUBLE:
6884 if (GET_MODE (x) == VOIDmode)
6885 {
6886 /* We can use %d if the number is <32 bits and positive. */
6887 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
f64cecad
JC
6888 fprintf (file, "0x%lx%08lx",
6889 (unsigned long) CONST_DOUBLE_HIGH (x),
6890 (unsigned long) CONST_DOUBLE_LOW (x));
2a2ab3f9 6891 else
f64cecad 6892 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2a2ab3f9
JVA
6893 }
6894 else
6895 /* We can't handle floating point constants;
6896 PRINT_OPERAND must handle them. */
6897 output_operand_lossage ("floating constant misused");
6898 break;
6899
6900 case PLUS:
e9a25f70 6901 /* Some assemblers need integer constants to appear first. */
2a2ab3f9
JVA
6902 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6903 {
2a2ab3f9 6904 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 6905 putc ('+', file);
e9a25f70 6906 output_pic_addr_const (file, XEXP (x, 1), code);
2a2ab3f9 6907 }
91bb873f 6908 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2a2ab3f9 6909 {
2a2ab3f9 6910 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 6911 putc ('+', file);
e9a25f70 6912 output_pic_addr_const (file, XEXP (x, 0), code);
2a2ab3f9 6913 }
91bb873f
RH
6914 else
6915 abort ();
2a2ab3f9
JVA
6916 break;
6917
6918 case MINUS:
b069de3b
SS
6919 if (!TARGET_MACHO)
6920 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
2a2ab3f9 6921 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 6922 putc ('-', file);
2a2ab3f9 6923 output_pic_addr_const (file, XEXP (x, 1), code);
b069de3b
SS
6924 if (!TARGET_MACHO)
6925 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
2a2ab3f9
JVA
6926 break;
6927
91bb873f
RH
6928 case UNSPEC:
6929 if (XVECLEN (x, 0) != 1)
5bf0ebab 6930 abort ();
91bb873f
RH
6931 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6932 switch (XINT (x, 1))
77ebd435 6933 {
8ee41eaf 6934 case UNSPEC_GOT:
77ebd435
AJ
6935 fputs ("@GOT", file);
6936 break;
8ee41eaf 6937 case UNSPEC_GOTOFF:
77ebd435
AJ
6938 fputs ("@GOTOFF", file);
6939 break;
8ee41eaf 6940 case UNSPEC_GOTPCREL:
edfe8595 6941 fputs ("@GOTPCREL(%rip)", file);
6eb791fc 6942 break;
f996902d 6943 case UNSPEC_GOTTPOFF:
dea73790 6944 /* FIXME: This might be @TPOFF in Sun ld too. */
f996902d
RH
6945 fputs ("@GOTTPOFF", file);
6946 break;
6947 case UNSPEC_TPOFF:
6948 fputs ("@TPOFF", file);
6949 break;
6950 case UNSPEC_NTPOFF:
75d38379
JJ
6951 if (TARGET_64BIT)
6952 fputs ("@TPOFF", file);
6953 else
6954 fputs ("@NTPOFF", file);
f996902d
RH
6955 break;
6956 case UNSPEC_DTPOFF:
6957 fputs ("@DTPOFF", file);
6958 break;
dea73790 6959 case UNSPEC_GOTNTPOFF:
75d38379
JJ
6960 if (TARGET_64BIT)
6961 fputs ("@GOTTPOFF(%rip)", file);
6962 else
6963 fputs ("@GOTNTPOFF", file);
dea73790
JJ
6964 break;
6965 case UNSPEC_INDNTPOFF:
6966 fputs ("@INDNTPOFF", file);
6967 break;
77ebd435
AJ
6968 default:
6969 output_operand_lossage ("invalid UNSPEC as operand");
6970 break;
6971 }
91bb873f
RH
6972 break;
6973
2a2ab3f9
JVA
6974 default:
6975 output_operand_lossage ("invalid expression as operand");
6976 }
6977}
1865dbb5 6978
0f290768 6979/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
1865dbb5
JM
6980 We need to handle our special PIC relocations. */
6981
0f290768 6982void
b96a374d 6983i386_dwarf_output_addr_const (FILE *file, rtx x)
1865dbb5 6984{
14f73b5a 6985#ifdef ASM_QUAD
18b5b8d6 6986 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
14f73b5a
JH
6987#else
6988 if (TARGET_64BIT)
6989 abort ();
18b5b8d6 6990 fprintf (file, "%s", ASM_LONG);
14f73b5a 6991#endif
1865dbb5
JM
6992 if (flag_pic)
6993 output_pic_addr_const (file, x, '\0');
6994 else
6995 output_addr_const (file, x);
6996 fputc ('\n', file);
6997}
6998
b9203463
RH
6999/* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
7000 We need to emit DTP-relative relocations. */
7001
7002void
b96a374d 7003i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
b9203463 7004{
75d38379
JJ
7005 fputs (ASM_LONG, file);
7006 output_addr_const (file, x);
7007 fputs ("@DTPOFF", file);
b9203463
RH
7008 switch (size)
7009 {
7010 case 4:
b9203463
RH
7011 break;
7012 case 8:
75d38379 7013 fputs (", 0", file);
b9203463 7014 break;
b9203463
RH
7015 default:
7016 abort ();
7017 }
b9203463
RH
7018}
7019
1865dbb5
JM
7020/* In the name of slightly smaller debug output, and to cater to
7021 general assembler losage, recognize PIC+GOTOFF and turn it back
7022 into a direct symbol reference. */
7023
69bd9368 7024static rtx
b96a374d 7025ix86_delegitimize_address (rtx orig_x)
1865dbb5 7026{
ec65b2e3 7027 rtx x = orig_x, y;
1865dbb5 7028
4c8c0dec
JJ
7029 if (GET_CODE (x) == MEM)
7030 x = XEXP (x, 0);
7031
6eb791fc
JH
7032 if (TARGET_64BIT)
7033 {
7034 if (GET_CODE (x) != CONST
7035 || GET_CODE (XEXP (x, 0)) != UNSPEC
8ee41eaf 7036 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
4c8c0dec 7037 || GET_CODE (orig_x) != MEM)
6eb791fc
JH
7038 return orig_x;
7039 return XVECEXP (XEXP (x, 0), 0, 0);
7040 }
7041
1865dbb5 7042 if (GET_CODE (x) != PLUS
1865dbb5
JM
7043 || GET_CODE (XEXP (x, 1)) != CONST)
7044 return orig_x;
7045
ec65b2e3
JJ
7046 if (GET_CODE (XEXP (x, 0)) == REG
7047 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7048 /* %ebx + GOT/GOTOFF */
7049 y = NULL;
7050 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7051 {
7052 /* %ebx + %reg * scale + GOT/GOTOFF */
7053 y = XEXP (x, 0);
7054 if (GET_CODE (XEXP (y, 0)) == REG
7055 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
7056 y = XEXP (y, 1);
7057 else if (GET_CODE (XEXP (y, 1)) == REG
7058 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
7059 y = XEXP (y, 0);
7060 else
7061 return orig_x;
7062 if (GET_CODE (y) != REG
7063 && GET_CODE (y) != MULT
7064 && GET_CODE (y) != ASHIFT)
7065 return orig_x;
7066 }
7067 else
7068 return orig_x;
7069
1865dbb5
JM
7070 x = XEXP (XEXP (x, 1), 0);
7071 if (GET_CODE (x) == UNSPEC
8ee41eaf
RH
7072 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7073 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
ec65b2e3
JJ
7074 {
7075 if (y)
7076 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
7077 return XVECEXP (x, 0, 0);
7078 }
1865dbb5
JM
7079
7080 if (GET_CODE (x) == PLUS
7081 && GET_CODE (XEXP (x, 0)) == UNSPEC
7082 && GET_CODE (XEXP (x, 1)) == CONST_INT
8ee41eaf
RH
7083 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7084 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
7085 && GET_CODE (orig_x) != MEM)))
ec65b2e3
JJ
7086 {
7087 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
7088 if (y)
7089 return gen_rtx_PLUS (Pmode, y, x);
7090 return x;
7091 }
1865dbb5
JM
7092
7093 return orig_x;
7094}
2a2ab3f9 7095\f
a269a03c 7096static void
b96a374d
AJ
7097put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7098 int fp, FILE *file)
a269a03c 7099{
a269a03c
JC
7100 const char *suffix;
7101
9a915772
JH
7102 if (mode == CCFPmode || mode == CCFPUmode)
7103 {
7104 enum rtx_code second_code, bypass_code;
7105 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7106 if (bypass_code != NIL || second_code != NIL)
b531087a 7107 abort ();
9a915772
JH
7108 code = ix86_fp_compare_code_to_integer (code);
7109 mode = CCmode;
7110 }
a269a03c
JC
7111 if (reverse)
7112 code = reverse_condition (code);
e075ae69 7113
a269a03c
JC
7114 switch (code)
7115 {
7116 case EQ:
7117 suffix = "e";
7118 break;
a269a03c
JC
7119 case NE:
7120 suffix = "ne";
7121 break;
a269a03c 7122 case GT:
7e08e190 7123 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
e075ae69
RH
7124 abort ();
7125 suffix = "g";
a269a03c 7126 break;
a269a03c 7127 case GTU:
e075ae69
RH
7128 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
7129 Those same assemblers have the same but opposite losage on cmov. */
7e08e190 7130 if (mode != CCmode)
0f290768 7131 abort ();
e075ae69 7132 suffix = fp ? "nbe" : "a";
a269a03c 7133 break;
a269a03c 7134 case LT:
9076b9c1 7135 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 7136 suffix = "s";
7e08e190 7137 else if (mode == CCmode || mode == CCGCmode)
e075ae69 7138 suffix = "l";
9076b9c1 7139 else
0f290768 7140 abort ();
a269a03c 7141 break;
a269a03c 7142 case LTU:
9076b9c1 7143 if (mode != CCmode)
0f290768 7144 abort ();
a269a03c
JC
7145 suffix = "b";
7146 break;
a269a03c 7147 case GE:
9076b9c1 7148 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 7149 suffix = "ns";
7e08e190 7150 else if (mode == CCmode || mode == CCGCmode)
e075ae69 7151 suffix = "ge";
9076b9c1 7152 else
0f290768 7153 abort ();
a269a03c 7154 break;
a269a03c 7155 case GEU:
e075ae69 7156 /* ??? As above. */
7e08e190 7157 if (mode != CCmode)
0f290768 7158 abort ();
7e08e190 7159 suffix = fp ? "nb" : "ae";
a269a03c 7160 break;
a269a03c 7161 case LE:
7e08e190 7162 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
e075ae69
RH
7163 abort ();
7164 suffix = "le";
a269a03c 7165 break;
a269a03c 7166 case LEU:
9076b9c1
JH
7167 if (mode != CCmode)
7168 abort ();
7e08e190 7169 suffix = "be";
a269a03c 7170 break;
3a3677ff 7171 case UNORDERED:
9e7adcb3 7172 suffix = fp ? "u" : "p";
3a3677ff
RH
7173 break;
7174 case ORDERED:
9e7adcb3 7175 suffix = fp ? "nu" : "np";
3a3677ff 7176 break;
a269a03c
JC
7177 default:
7178 abort ();
7179 }
7180 fputs (suffix, file);
7181}
7182
a55f4481
RK
7183/* Print the name of register X to FILE based on its machine mode and number.
7184 If CODE is 'w', pretend the mode is HImode.
7185 If CODE is 'b', pretend the mode is QImode.
7186 If CODE is 'k', pretend the mode is SImode.
7187 If CODE is 'q', pretend the mode is DImode.
7188 If CODE is 'h', pretend the reg is the `high' byte register.
7189 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7190
e075ae69 7191void
b96a374d 7192print_reg (rtx x, int code, FILE *file)
e5cb57e8 7193{
a55f4481
RK
7194 if (REGNO (x) == ARG_POINTER_REGNUM
7195 || REGNO (x) == FRAME_POINTER_REGNUM
7196 || REGNO (x) == FLAGS_REG
7197 || REGNO (x) == FPSR_REG)
480feac0
ZW
7198 abort ();
7199
5bf0ebab 7200 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
e075ae69
RH
7201 putc ('%', file);
7202
ef6257cd 7203 if (code == 'w' || MMX_REG_P (x))
e075ae69
RH
7204 code = 2;
7205 else if (code == 'b')
7206 code = 1;
7207 else if (code == 'k')
7208 code = 4;
3f3f2124
JH
7209 else if (code == 'q')
7210 code = 8;
e075ae69
RH
7211 else if (code == 'y')
7212 code = 3;
7213 else if (code == 'h')
7214 code = 0;
7215 else
7216 code = GET_MODE_SIZE (GET_MODE (x));
e9a25f70 7217
3f3f2124
JH
7218 /* Irritatingly, AMD extended registers use different naming convention
7219 from the normal registers. */
7220 if (REX_INT_REG_P (x))
7221 {
885a70fd
JH
7222 if (!TARGET_64BIT)
7223 abort ();
3f3f2124
JH
7224 switch (code)
7225 {
ef6257cd 7226 case 0:
c725bd79 7227 error ("extended registers have no high halves");
3f3f2124
JH
7228 break;
7229 case 1:
7230 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7231 break;
7232 case 2:
7233 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7234 break;
7235 case 4:
7236 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7237 break;
7238 case 8:
7239 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7240 break;
7241 default:
c725bd79 7242 error ("unsupported operand size for extended register");
3f3f2124
JH
7243 break;
7244 }
7245 return;
7246 }
e075ae69
RH
7247 switch (code)
7248 {
7249 case 3:
7250 if (STACK_TOP_P (x))
7251 {
7252 fputs ("st(0)", file);
7253 break;
7254 }
5efb1046 7255 /* FALLTHRU */
e075ae69 7256 case 8:
3f3f2124 7257 case 4:
e075ae69 7258 case 12:
446988df 7259 if (! ANY_FP_REG_P (x))
885a70fd 7260 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
5efb1046 7261 /* FALLTHRU */
a7180f70 7262 case 16:
e075ae69 7263 case 2:
d4c32b6f 7264 normal:
e075ae69
RH
7265 fputs (hi_reg_name[REGNO (x)], file);
7266 break;
7267 case 1:
d4c32b6f
RH
7268 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7269 goto normal;
e075ae69
RH
7270 fputs (qi_reg_name[REGNO (x)], file);
7271 break;
7272 case 0:
d4c32b6f
RH
7273 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7274 goto normal;
e075ae69
RH
7275 fputs (qi_high_reg_name[REGNO (x)], file);
7276 break;
7277 default:
7278 abort ();
fe25fea3 7279 }
e5cb57e8
SC
7280}
7281
f996902d
RH
7282/* Locate some local-dynamic symbol still in use by this function
7283 so that we can print its name in some tls_local_dynamic_base
7284 pattern. */
7285
7286static const char *
b96a374d 7287get_some_local_dynamic_name (void)
f996902d
RH
7288{
7289 rtx insn;
7290
7291 if (cfun->machine->some_ld_name)
7292 return cfun->machine->some_ld_name;
7293
7294 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7295 if (INSN_P (insn)
7296 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7297 return cfun->machine->some_ld_name;
7298
7299 abort ();
7300}
7301
7302static int
b96a374d 7303get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
f996902d
RH
7304{
7305 rtx x = *px;
7306
7307 if (GET_CODE (x) == SYMBOL_REF
7308 && local_dynamic_symbolic_operand (x, Pmode))
7309 {
7310 cfun->machine->some_ld_name = XSTR (x, 0);
7311 return 1;
7312 }
7313
7314 return 0;
7315}
7316
2a2ab3f9 7317/* Meaning of CODE:
fe25fea3 7318 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
e5cb57e8 7319 C -- print opcode suffix for set/cmov insn.
fe25fea3 7320 c -- like C, but print reversed condition
ef6257cd 7321 F,f -- likewise, but for floating-point.
f6f5dff2
RO
7322 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7323 otherwise nothing
2a2ab3f9
JVA
7324 R -- print the prefix for register names.
7325 z -- print the opcode suffix for the size of the current operand.
7326 * -- print a star (in certain assembler syntax)
fb204271 7327 A -- print an absolute memory reference.
2a2ab3f9 7328 w -- print the operand as if it's a "word" (HImode) even if it isn't.
2d49677f
SC
7329 s -- print a shift double count, followed by the assemblers argument
7330 delimiter.
fe25fea3
SC
7331 b -- print the QImode name of the register for the indicated operand.
7332 %b0 would print %al if operands[0] is reg 0.
7333 w -- likewise, print the HImode name of the register.
7334 k -- likewise, print the SImode name of the register.
3f3f2124 7335 q -- likewise, print the DImode name of the register.
ef6257cd
JH
7336 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7337 y -- print "st(0)" instead of "st" as a register.
a46d1d38 7338 D -- print condition for SSE cmp instruction.
ef6257cd
JH
7339 P -- if PIC, print an @PLT suffix.
7340 X -- don't print any sort of PIC '@' suffix for a symbol.
f996902d 7341 & -- print some in-use local-dynamic symbol name.
a46d1d38 7342 */
2a2ab3f9
JVA
7343
7344void
b96a374d 7345print_operand (FILE *file, rtx x, int code)
2a2ab3f9
JVA
7346{
7347 if (code)
7348 {
7349 switch (code)
7350 {
7351 case '*':
80f33d06 7352 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9
JVA
7353 putc ('*', file);
7354 return;
7355
f996902d
RH
7356 case '&':
7357 assemble_name (file, get_some_local_dynamic_name ());
7358 return;
7359
fb204271 7360 case 'A':
80f33d06 7361 if (ASSEMBLER_DIALECT == ASM_ATT)
fb204271 7362 putc ('*', file);
80f33d06 7363 else if (ASSEMBLER_DIALECT == ASM_INTEL)
fb204271
DN
7364 {
7365 /* Intel syntax. For absolute addresses, registers should not
7366 be surrounded by braces. */
7367 if (GET_CODE (x) != REG)
7368 {
7369 putc ('[', file);
7370 PRINT_OPERAND (file, x, 0);
7371 putc (']', file);
7372 return;
7373 }
7374 }
80f33d06
GS
7375 else
7376 abort ();
fb204271
DN
7377
7378 PRINT_OPERAND (file, x, 0);
7379 return;
7380
7381
2a2ab3f9 7382 case 'L':
80f33d06 7383 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7384 putc ('l', file);
2a2ab3f9
JVA
7385 return;
7386
7387 case 'W':
80f33d06 7388 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7389 putc ('w', file);
2a2ab3f9
JVA
7390 return;
7391
7392 case 'B':
80f33d06 7393 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7394 putc ('b', file);
2a2ab3f9
JVA
7395 return;
7396
7397 case 'Q':
80f33d06 7398 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7399 putc ('l', file);
2a2ab3f9
JVA
7400 return;
7401
7402 case 'S':
80f33d06 7403 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7404 putc ('s', file);
2a2ab3f9
JVA
7405 return;
7406
5f1ec3e6 7407 case 'T':
80f33d06 7408 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7409 putc ('t', file);
5f1ec3e6
JVA
7410 return;
7411
2a2ab3f9
JVA
7412 case 'z':
7413 /* 387 opcodes don't get size suffixes if the operands are
0f290768 7414 registers. */
2a2ab3f9
JVA
7415 if (STACK_REG_P (x))
7416 return;
7417
831c4e87
KC
7418 /* Likewise if using Intel opcodes. */
7419 if (ASSEMBLER_DIALECT == ASM_INTEL)
7420 return;
7421
7422 /* This is the size of op from size of operand. */
2a2ab3f9
JVA
7423 switch (GET_MODE_SIZE (GET_MODE (x)))
7424 {
2a2ab3f9 7425 case 2:
155d8a47
JW
7426#ifdef HAVE_GAS_FILDS_FISTS
7427 putc ('s', file);
7428#endif
2a2ab3f9
JVA
7429 return;
7430
7431 case 4:
7432 if (GET_MODE (x) == SFmode)
7433 {
e075ae69 7434 putc ('s', file);
2a2ab3f9
JVA
7435 return;
7436 }
7437 else
e075ae69 7438 putc ('l', file);
2a2ab3f9
JVA
7439 return;
7440
5f1ec3e6 7441 case 12:
2b589241 7442 case 16:
e075ae69
RH
7443 putc ('t', file);
7444 return;
5f1ec3e6 7445
2a2ab3f9
JVA
7446 case 8:
7447 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
56c0e8fa
JVA
7448 {
7449#ifdef GAS_MNEMONICS
e075ae69 7450 putc ('q', file);
56c0e8fa 7451#else
e075ae69
RH
7452 putc ('l', file);
7453 putc ('l', file);
56c0e8fa
JVA
7454#endif
7455 }
e075ae69
RH
7456 else
7457 putc ('l', file);
2a2ab3f9 7458 return;
155d8a47
JW
7459
7460 default:
7461 abort ();
2a2ab3f9 7462 }
4af3895e
JVA
7463
7464 case 'b':
7465 case 'w':
7466 case 'k':
3f3f2124 7467 case 'q':
4af3895e
JVA
7468 case 'h':
7469 case 'y':
5cb6195d 7470 case 'X':
e075ae69 7471 case 'P':
4af3895e
JVA
7472 break;
7473
2d49677f
SC
7474 case 's':
7475 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7476 {
7477 PRINT_OPERAND (file, x, 0);
e075ae69 7478 putc (',', file);
2d49677f 7479 }
a269a03c
JC
7480 return;
7481
a46d1d38
JH
7482 case 'D':
7483 /* Little bit of braindamage here. The SSE compare instructions
7484 does use completely different names for the comparisons that the
7485 fp conditional moves. */
7486 switch (GET_CODE (x))
7487 {
7488 case EQ:
7489 case UNEQ:
7490 fputs ("eq", file);
7491 break;
7492 case LT:
7493 case UNLT:
7494 fputs ("lt", file);
7495 break;
7496 case LE:
7497 case UNLE:
7498 fputs ("le", file);
7499 break;
7500 case UNORDERED:
7501 fputs ("unord", file);
7502 break;
7503 case NE:
7504 case LTGT:
7505 fputs ("neq", file);
7506 break;
7507 case UNGE:
7508 case GE:
7509 fputs ("nlt", file);
7510 break;
7511 case UNGT:
7512 case GT:
7513 fputs ("nle", file);
7514 break;
7515 case ORDERED:
7516 fputs ("ord", file);
7517 break;
7518 default:
7519 abort ();
7520 break;
7521 }
7522 return;
048b1c95 7523 case 'O':
f6f5dff2 7524#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
048b1c95
JJ
7525 if (ASSEMBLER_DIALECT == ASM_ATT)
7526 {
7527 switch (GET_MODE (x))
7528 {
7529 case HImode: putc ('w', file); break;
7530 case SImode:
7531 case SFmode: putc ('l', file); break;
7532 case DImode:
7533 case DFmode: putc ('q', file); break;
7534 default: abort ();
7535 }
7536 putc ('.', file);
7537 }
7538#endif
7539 return;
1853aadd 7540 case 'C':
e075ae69 7541 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
1853aadd 7542 return;
fe25fea3 7543 case 'F':
f6f5dff2 7544#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
048b1c95
JJ
7545 if (ASSEMBLER_DIALECT == ASM_ATT)
7546 putc ('.', file);
7547#endif
e075ae69 7548 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
fe25fea3
SC
7549 return;
7550
e9a25f70 7551 /* Like above, but reverse condition */
e075ae69 7552 case 'c':
fce5a9f2 7553 /* Check to see if argument to %c is really a constant
c1d5afc4 7554 and not a condition code which needs to be reversed. */
ec8e098d 7555 if (!COMPARISON_P (x))
c1d5afc4
CR
7556 {
7557 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7558 return;
7559 }
e075ae69
RH
7560 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7561 return;
fe25fea3 7562 case 'f':
f6f5dff2 7563#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
048b1c95
JJ
7564 if (ASSEMBLER_DIALECT == ASM_ATT)
7565 putc ('.', file);
7566#endif
e075ae69 7567 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
1853aadd 7568 return;
ef6257cd
JH
7569 case '+':
7570 {
7571 rtx x;
e5cb57e8 7572
ef6257cd
JH
7573 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7574 return;
a4f31c00 7575
ef6257cd
JH
7576 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7577 if (x)
7578 {
7579 int pred_val = INTVAL (XEXP (x, 0));
7580
7581 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7582 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7583 {
7584 int taken = pred_val > REG_BR_PROB_BASE / 2;
7585 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7586
7587 /* Emit hints only in the case default branch prediction
d1f87653 7588 heuristics would fail. */
ef6257cd
JH
7589 if (taken != cputaken)
7590 {
7591 /* We use 3e (DS) prefix for taken branches and
7592 2e (CS) prefix for not taken branches. */
7593 if (taken)
7594 fputs ("ds ; ", file);
7595 else
7596 fputs ("cs ; ", file);
7597 }
7598 }
7599 }
7600 return;
7601 }
4af3895e 7602 default:
a52453cc 7603 output_operand_lossage ("invalid operand code `%c'", code);
2a2ab3f9
JVA
7604 }
7605 }
e9a25f70 7606
2a2ab3f9 7607 if (GET_CODE (x) == REG)
a55f4481 7608 print_reg (x, code, file);
e9a25f70 7609
2a2ab3f9
JVA
7610 else if (GET_CODE (x) == MEM)
7611 {
e075ae69 7612 /* No `byte ptr' prefix for call instructions. */
80f33d06 7613 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
2a2ab3f9 7614 {
69ddee61 7615 const char * size;
e075ae69
RH
7616 switch (GET_MODE_SIZE (GET_MODE (x)))
7617 {
7618 case 1: size = "BYTE"; break;
7619 case 2: size = "WORD"; break;
7620 case 4: size = "DWORD"; break;
7621 case 8: size = "QWORD"; break;
7622 case 12: size = "XWORD"; break;
a7180f70 7623 case 16: size = "XMMWORD"; break;
e075ae69 7624 default:
564d80f4 7625 abort ();
e075ae69 7626 }
fb204271
DN
7627
7628 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7629 if (code == 'b')
7630 size = "BYTE";
7631 else if (code == 'w')
7632 size = "WORD";
7633 else if (code == 'k')
7634 size = "DWORD";
7635
e075ae69
RH
7636 fputs (size, file);
7637 fputs (" PTR ", file);
2a2ab3f9 7638 }
e075ae69
RH
7639
7640 x = XEXP (x, 0);
0d7d98ee 7641 /* Avoid (%rip) for call operands. */
d10f5ecf 7642 if (CONSTANT_ADDRESS_P (x) && code == 'P'
0d7d98ee
JH
7643 && GET_CODE (x) != CONST_INT)
7644 output_addr_const (file, x);
c8b94768
RH
7645 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7646 output_operand_lossage ("invalid constraints for operand");
2a2ab3f9 7647 else
e075ae69 7648 output_address (x);
2a2ab3f9 7649 }
e9a25f70 7650
2a2ab3f9
JVA
7651 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7652 {
e9a25f70
JL
7653 REAL_VALUE_TYPE r;
7654 long l;
7655
5f1ec3e6
JVA
7656 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7657 REAL_VALUE_TO_TARGET_SINGLE (r, l);
e075ae69 7658
80f33d06 7659 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7660 putc ('$', file);
781f4ec1 7661 fprintf (file, "0x%08lx", l);
5f1ec3e6 7662 }
e9a25f70 7663
74dc3e94
RH
7664 /* These float cases don't actually occur as immediate operands. */
7665 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
5f1ec3e6 7666 {
e9a25f70
JL
7667 char dstr[30];
7668
da6eec72 7669 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
5f1ec3e6 7670 fprintf (file, "%s", dstr);
2a2ab3f9 7671 }
e9a25f70 7672
2b589241 7673 else if (GET_CODE (x) == CONST_DOUBLE
f8a1ebc6 7674 && GET_MODE (x) == XFmode)
2a2ab3f9 7675 {
e9a25f70
JL
7676 char dstr[30];
7677
da6eec72 7678 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
5f1ec3e6 7679 fprintf (file, "%s", dstr);
2a2ab3f9 7680 }
f996902d 7681
79325812 7682 else
2a2ab3f9 7683 {
4af3895e 7684 if (code != 'P')
2a2ab3f9 7685 {
695dac07 7686 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
e075ae69 7687 {
80f33d06 7688 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
7689 putc ('$', file);
7690 }
2a2ab3f9
JVA
7691 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7692 || GET_CODE (x) == LABEL_REF)
e075ae69 7693 {
80f33d06 7694 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
7695 putc ('$', file);
7696 else
7697 fputs ("OFFSET FLAT:", file);
7698 }
2a2ab3f9 7699 }
e075ae69
RH
7700 if (GET_CODE (x) == CONST_INT)
7701 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7702 else if (flag_pic)
2a2ab3f9
JVA
7703 output_pic_addr_const (file, x, code);
7704 else
7705 output_addr_const (file, x);
7706 }
7707}
7708\f
7709/* Print a memory operand whose address is ADDR. */
7710
7711void
8d531ab9 7712print_operand_address (FILE *file, rtx addr)
2a2ab3f9 7713{
e075ae69
RH
7714 struct ix86_address parts;
7715 rtx base, index, disp;
7716 int scale;
e9a25f70 7717
e075ae69
RH
7718 if (! ix86_decompose_address (addr, &parts))
7719 abort ();
e9a25f70 7720
e075ae69
RH
7721 base = parts.base;
7722 index = parts.index;
7723 disp = parts.disp;
7724 scale = parts.scale;
e9a25f70 7725
74dc3e94
RH
7726 switch (parts.seg)
7727 {
7728 case SEG_DEFAULT:
7729 break;
7730 case SEG_FS:
7731 case SEG_GS:
7732 if (USER_LABEL_PREFIX[0] == 0)
7733 putc ('%', file);
7734 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7735 break;
7736 default:
7737 abort ();
7738 }
7739
e075ae69
RH
7740 if (!base && !index)
7741 {
7742 /* Displacement only requires special attention. */
e9a25f70 7743
e075ae69 7744 if (GET_CODE (disp) == CONST_INT)
2a2ab3f9 7745 {
74dc3e94 7746 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
fb204271
DN
7747 {
7748 if (USER_LABEL_PREFIX[0] == 0)
7749 putc ('%', file);
7750 fputs ("ds:", file);
7751 }
74dc3e94 7752 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
2a2ab3f9 7753 }
e075ae69 7754 else if (flag_pic)
74dc3e94 7755 output_pic_addr_const (file, disp, 0);
e075ae69 7756 else
74dc3e94 7757 output_addr_const (file, disp);
0d7d98ee
JH
7758
7759 /* Use one byte shorter RIP relative addressing for 64bit mode. */
edfe8595 7760 if (TARGET_64BIT
74dc3e94
RH
7761 && ((GET_CODE (disp) == SYMBOL_REF
7762 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7763 || GET_CODE (disp) == LABEL_REF
7764 || (GET_CODE (disp) == CONST
7765 && GET_CODE (XEXP (disp, 0)) == PLUS
7766 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7767 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7768 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
0d7d98ee 7769 fputs ("(%rip)", file);
e075ae69
RH
7770 }
7771 else
7772 {
80f33d06 7773 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9 7774 {
e075ae69 7775 if (disp)
2a2ab3f9 7776 {
c399861d 7777 if (flag_pic)
e075ae69
RH
7778 output_pic_addr_const (file, disp, 0);
7779 else if (GET_CODE (disp) == LABEL_REF)
7780 output_asm_label (disp);
2a2ab3f9 7781 else
e075ae69 7782 output_addr_const (file, disp);
2a2ab3f9
JVA
7783 }
7784
e075ae69
RH
7785 putc ('(', file);
7786 if (base)
a55f4481 7787 print_reg (base, 0, file);
e075ae69 7788 if (index)
2a2ab3f9 7789 {
e075ae69 7790 putc (',', file);
a55f4481 7791 print_reg (index, 0, file);
e075ae69
RH
7792 if (scale != 1)
7793 fprintf (file, ",%d", scale);
2a2ab3f9 7794 }
e075ae69 7795 putc (')', file);
2a2ab3f9 7796 }
2a2ab3f9
JVA
7797 else
7798 {
e075ae69 7799 rtx offset = NULL_RTX;
e9a25f70 7800
e075ae69
RH
7801 if (disp)
7802 {
7803 /* Pull out the offset of a symbol; print any symbol itself. */
7804 if (GET_CODE (disp) == CONST
7805 && GET_CODE (XEXP (disp, 0)) == PLUS
7806 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7807 {
7808 offset = XEXP (XEXP (disp, 0), 1);
7809 disp = gen_rtx_CONST (VOIDmode,
7810 XEXP (XEXP (disp, 0), 0));
7811 }
ce193852 7812
e075ae69
RH
7813 if (flag_pic)
7814 output_pic_addr_const (file, disp, 0);
7815 else if (GET_CODE (disp) == LABEL_REF)
7816 output_asm_label (disp);
7817 else if (GET_CODE (disp) == CONST_INT)
7818 offset = disp;
7819 else
7820 output_addr_const (file, disp);
7821 }
e9a25f70 7822
e075ae69
RH
7823 putc ('[', file);
7824 if (base)
a8620236 7825 {
a55f4481 7826 print_reg (base, 0, file);
e075ae69
RH
7827 if (offset)
7828 {
7829 if (INTVAL (offset) >= 0)
7830 putc ('+', file);
7831 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7832 }
a8620236 7833 }
e075ae69
RH
7834 else if (offset)
7835 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
2247a58c 7836 else
e075ae69 7837 putc ('0', file);
e9a25f70 7838
e075ae69
RH
7839 if (index)
7840 {
7841 putc ('+', file);
a55f4481 7842 print_reg (index, 0, file);
e075ae69
RH
7843 if (scale != 1)
7844 fprintf (file, "*%d", scale);
7845 }
7846 putc (']', file);
7847 }
2a2ab3f9
JVA
7848 }
7849}
f996902d
RH
7850
7851bool
b96a374d 7852output_addr_const_extra (FILE *file, rtx x)
f996902d
RH
7853{
7854 rtx op;
7855
7856 if (GET_CODE (x) != UNSPEC)
7857 return false;
7858
7859 op = XVECEXP (x, 0, 0);
7860 switch (XINT (x, 1))
7861 {
7862 case UNSPEC_GOTTPOFF:
7863 output_addr_const (file, op);
dea73790 7864 /* FIXME: This might be @TPOFF in Sun ld. */
f996902d
RH
7865 fputs ("@GOTTPOFF", file);
7866 break;
7867 case UNSPEC_TPOFF:
7868 output_addr_const (file, op);
7869 fputs ("@TPOFF", file);
7870 break;
7871 case UNSPEC_NTPOFF:
7872 output_addr_const (file, op);
75d38379
JJ
7873 if (TARGET_64BIT)
7874 fputs ("@TPOFF", file);
7875 else
7876 fputs ("@NTPOFF", file);
f996902d
RH
7877 break;
7878 case UNSPEC_DTPOFF:
7879 output_addr_const (file, op);
7880 fputs ("@DTPOFF", file);
7881 break;
dea73790
JJ
7882 case UNSPEC_GOTNTPOFF:
7883 output_addr_const (file, op);
75d38379
JJ
7884 if (TARGET_64BIT)
7885 fputs ("@GOTTPOFF(%rip)", file);
7886 else
7887 fputs ("@GOTNTPOFF", file);
dea73790
JJ
7888 break;
7889 case UNSPEC_INDNTPOFF:
7890 output_addr_const (file, op);
7891 fputs ("@INDNTPOFF", file);
7892 break;
f996902d
RH
7893
7894 default:
7895 return false;
7896 }
7897
7898 return true;
7899}
2a2ab3f9
JVA
7900\f
7901/* Split one or more DImode RTL references into pairs of SImode
7902 references. The RTL can be REG, offsettable MEM, integer constant, or
7903 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7904 split and "num" is its length. lo_half and hi_half are output arrays
0f290768 7905 that parallel "operands". */
2a2ab3f9
JVA
7906
7907void
b96a374d 7908split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
2a2ab3f9
JVA
7909{
7910 while (num--)
7911 {
57dbca5e 7912 rtx op = operands[num];
b932f770
JH
7913
7914 /* simplify_subreg refuse to split volatile memory addresses,
7915 but we still have to handle it. */
7916 if (GET_CODE (op) == MEM)
2a2ab3f9 7917 {
f4ef873c 7918 lo_half[num] = adjust_address (op, SImode, 0);
b72f00af 7919 hi_half[num] = adjust_address (op, SImode, 4);
2a2ab3f9
JVA
7920 }
7921 else
b932f770 7922 {
38ca929b
JH
7923 lo_half[num] = simplify_gen_subreg (SImode, op,
7924 GET_MODE (op) == VOIDmode
7925 ? DImode : GET_MODE (op), 0);
7926 hi_half[num] = simplify_gen_subreg (SImode, op,
7927 GET_MODE (op) == VOIDmode
7928 ? DImode : GET_MODE (op), 4);
b932f770 7929 }
2a2ab3f9
JVA
7930 }
7931}
44cf5b6a
JH
7932/* Split one or more TImode RTL references into pairs of SImode
7933 references. The RTL can be REG, offsettable MEM, integer constant, or
7934 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7935 split and "num" is its length. lo_half and hi_half are output arrays
7936 that parallel "operands". */
7937
7938void
b96a374d 7939split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
44cf5b6a
JH
7940{
7941 while (num--)
7942 {
7943 rtx op = operands[num];
b932f770
JH
7944
7945 /* simplify_subreg refuse to split volatile memory addresses, but we
7946 still have to handle it. */
7947 if (GET_CODE (op) == MEM)
44cf5b6a
JH
7948 {
7949 lo_half[num] = adjust_address (op, DImode, 0);
7950 hi_half[num] = adjust_address (op, DImode, 8);
7951 }
7952 else
b932f770
JH
7953 {
7954 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7955 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7956 }
44cf5b6a
JH
7957 }
7958}
2a2ab3f9 7959\f
2a2ab3f9
JVA
7960/* Output code to perform a 387 binary operation in INSN, one of PLUS,
7961 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7962 is the expression of the binary operation. The output may either be
7963 emitted here, or returned to the caller, like all output_* functions.
7964
7965 There is no guarantee that the operands are the same mode, as they
0f290768 7966 might be within FLOAT or FLOAT_EXTEND expressions. */
2a2ab3f9 7967
e3c2afab
AM
7968#ifndef SYSV386_COMPAT
7969/* Set to 1 for compatibility with brain-damaged assemblers. No-one
7970 wants to fix the assemblers because that causes incompatibility
7971 with gcc. No-one wants to fix gcc because that causes
7972 incompatibility with assemblers... You can use the option of
7973 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7974#define SYSV386_COMPAT 1
7975#endif
7976
69ddee61 7977const char *
b96a374d 7978output_387_binary_op (rtx insn, rtx *operands)
2a2ab3f9 7979{
e3c2afab 7980 static char buf[30];
69ddee61 7981 const char *p;
1deaa899
JH
7982 const char *ssep;
7983 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
2a2ab3f9 7984
e3c2afab
AM
7985#ifdef ENABLE_CHECKING
7986 /* Even if we do not want to check the inputs, this documents input
7987 constraints. Which helps in understanding the following code. */
7988 if (STACK_REG_P (operands[0])
7989 && ((REG_P (operands[1])
7990 && REGNO (operands[0]) == REGNO (operands[1])
7991 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7992 || (REG_P (operands[2])
7993 && REGNO (operands[0]) == REGNO (operands[2])
7994 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7995 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7996 ; /* ok */
1deaa899 7997 else if (!is_sse)
e3c2afab
AM
7998 abort ();
7999#endif
8000
2a2ab3f9
JVA
8001 switch (GET_CODE (operands[3]))
8002 {
8003 case PLUS:
e075ae69
RH
8004 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8005 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8006 p = "fiadd";
8007 else
8008 p = "fadd";
1deaa899 8009 ssep = "add";
2a2ab3f9
JVA
8010 break;
8011
8012 case MINUS:
e075ae69
RH
8013 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8014 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8015 p = "fisub";
8016 else
8017 p = "fsub";
1deaa899 8018 ssep = "sub";
2a2ab3f9
JVA
8019 break;
8020
8021 case MULT:
e075ae69
RH
8022 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8023 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8024 p = "fimul";
8025 else
8026 p = "fmul";
1deaa899 8027 ssep = "mul";
2a2ab3f9
JVA
8028 break;
8029
8030 case DIV:
e075ae69
RH
8031 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8032 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8033 p = "fidiv";
8034 else
8035 p = "fdiv";
1deaa899 8036 ssep = "div";
2a2ab3f9
JVA
8037 break;
8038
8039 default:
8040 abort ();
8041 }
8042
1deaa899
JH
8043 if (is_sse)
8044 {
8045 strcpy (buf, ssep);
8046 if (GET_MODE (operands[0]) == SFmode)
8047 strcat (buf, "ss\t{%2, %0|%0, %2}");
8048 else
8049 strcat (buf, "sd\t{%2, %0|%0, %2}");
8050 return buf;
8051 }
e075ae69 8052 strcpy (buf, p);
2a2ab3f9
JVA
8053
8054 switch (GET_CODE (operands[3]))
8055 {
8056 case MULT:
8057 case PLUS:
8058 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8059 {
e3c2afab 8060 rtx temp = operands[2];
2a2ab3f9
JVA
8061 operands[2] = operands[1];
8062 operands[1] = temp;
8063 }
8064
e3c2afab
AM
8065 /* know operands[0] == operands[1]. */
8066
2a2ab3f9 8067 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
8068 {
8069 p = "%z2\t%2";
8070 break;
8071 }
2a2ab3f9
JVA
8072
8073 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63
JL
8074 {
8075 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
8076 /* How is it that we are storing to a dead operand[2]?
8077 Well, presumably operands[1] is dead too. We can't
8078 store the result to st(0) as st(0) gets popped on this
8079 instruction. Instead store to operands[2] (which I
8080 think has to be st(1)). st(1) will be popped later.
8081 gcc <= 2.8.1 didn't have this check and generated
8082 assembly code that the Unixware assembler rejected. */
8083 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 8084 else
e3c2afab 8085 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
e075ae69 8086 break;
6b28fd63 8087 }
2a2ab3f9
JVA
8088
8089 if (STACK_TOP_P (operands[0]))
e3c2afab 8090 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 8091 else
e3c2afab 8092 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
e075ae69 8093 break;
2a2ab3f9
JVA
8094
8095 case MINUS:
8096 case DIV:
8097 if (GET_CODE (operands[1]) == MEM)
e075ae69
RH
8098 {
8099 p = "r%z1\t%1";
8100 break;
8101 }
2a2ab3f9
JVA
8102
8103 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
8104 {
8105 p = "%z2\t%2";
8106 break;
8107 }
2a2ab3f9 8108
2a2ab3f9 8109 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63 8110 {
e3c2afab
AM
8111#if SYSV386_COMPAT
8112 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8113 derived assemblers, confusingly reverse the direction of
8114 the operation for fsub{r} and fdiv{r} when the
8115 destination register is not st(0). The Intel assembler
8116 doesn't have this brain damage. Read !SYSV386_COMPAT to
8117 figure out what the hardware really does. */
8118 if (STACK_TOP_P (operands[0]))
8119 p = "{p\t%0, %2|rp\t%2, %0}";
8120 else
8121 p = "{rp\t%2, %0|p\t%0, %2}";
8122#else
6b28fd63 8123 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
8124 /* As above for fmul/fadd, we can't store to st(0). */
8125 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 8126 else
e3c2afab
AM
8127 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8128#endif
e075ae69 8129 break;
6b28fd63 8130 }
2a2ab3f9
JVA
8131
8132 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6b28fd63 8133 {
e3c2afab 8134#if SYSV386_COMPAT
6b28fd63 8135 if (STACK_TOP_P (operands[0]))
e3c2afab 8136 p = "{rp\t%0, %1|p\t%1, %0}";
6b28fd63 8137 else
e3c2afab
AM
8138 p = "{p\t%1, %0|rp\t%0, %1}";
8139#else
8140 if (STACK_TOP_P (operands[0]))
8141 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8142 else
8143 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8144#endif
e075ae69 8145 break;
6b28fd63 8146 }
2a2ab3f9
JVA
8147
8148 if (STACK_TOP_P (operands[0]))
8149 {
8150 if (STACK_TOP_P (operands[1]))
e3c2afab 8151 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 8152 else
e3c2afab 8153 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
e075ae69 8154 break;
2a2ab3f9
JVA
8155 }
8156 else if (STACK_TOP_P (operands[1]))
e3c2afab
AM
8157 {
8158#if SYSV386_COMPAT
8159 p = "{\t%1, %0|r\t%0, %1}";
8160#else
8161 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8162#endif
8163 }
2a2ab3f9 8164 else
e3c2afab
AM
8165 {
8166#if SYSV386_COMPAT
8167 p = "{r\t%2, %0|\t%0, %2}";
8168#else
8169 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8170#endif
8171 }
e075ae69 8172 break;
2a2ab3f9
JVA
8173
8174 default:
8175 abort ();
8176 }
e075ae69
RH
8177
8178 strcat (buf, p);
8179 return buf;
2a2ab3f9 8180}
e075ae69 8181
a4f31c00 8182/* Output code to initialize control word copies used by
7a2e09f4
JH
8183 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
8184 is set to control word rounding downwards. */
8185void
b96a374d 8186emit_i387_cw_initialization (rtx normal, rtx round_down)
7a2e09f4
JH
8187{
8188 rtx reg = gen_reg_rtx (HImode);
8189
8190 emit_insn (gen_x86_fnstcw_1 (normal));
8191 emit_move_insn (reg, normal);
8192 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
8193 && !TARGET_64BIT)
8194 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8195 else
8196 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
8197 emit_move_insn (round_down, reg);
8198}
8199
2a2ab3f9 8200/* Output code for INSN to convert a float to a signed int. OPERANDS
46d21d2c 8201 are the insn operands. The output may be [HSD]Imode and the input
e075ae69 8202 operand may be [SDX]Fmode. */
2a2ab3f9 8203
69ddee61 8204const char *
b96a374d 8205output_fix_trunc (rtx insn, rtx *operands)
2a2ab3f9
JVA
8206{
8207 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
e075ae69 8208 int dimode_p = GET_MODE (operands[0]) == DImode;
2a2ab3f9 8209
e075ae69
RH
8210 /* Jump through a hoop or two for DImode, since the hardware has no
8211 non-popping instruction. We used to do this a different way, but
8212 that was somewhat fragile and broke with post-reload splitters. */
a05924f9
JH
8213 if (dimode_p && !stack_top_dies)
8214 output_asm_insn ("fld\t%y1", operands);
e075ae69 8215
7a2e09f4 8216 if (!STACK_TOP_P (operands[1]))
10195bd8
JW
8217 abort ();
8218
e075ae69 8219 if (GET_CODE (operands[0]) != MEM)
7a2e09f4 8220 abort ();
e9a25f70 8221
7a2e09f4 8222 output_asm_insn ("fldcw\t%3", operands);
e075ae69 8223 if (stack_top_dies || dimode_p)
7a2e09f4 8224 output_asm_insn ("fistp%z0\t%0", operands);
10195bd8 8225 else
7a2e09f4 8226 output_asm_insn ("fist%z0\t%0", operands);
e075ae69 8227 output_asm_insn ("fldcw\t%2", operands);
10195bd8 8228
e075ae69 8229 return "";
2a2ab3f9 8230}
cda749b1 8231
e075ae69
RH
8232/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8233 should be used and 2 when fnstsw should be used. UNORDERED_P is true
8234 when fucom should be used. */
8235
69ddee61 8236const char *
b96a374d 8237output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
cda749b1 8238{
e075ae69
RH
8239 int stack_top_dies;
8240 rtx cmp_op0 = operands[0];
8241 rtx cmp_op1 = operands[1];
0644b628 8242 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
e075ae69
RH
8243
8244 if (eflags_p == 2)
8245 {
8246 cmp_op0 = cmp_op1;
8247 cmp_op1 = operands[2];
8248 }
0644b628
JH
8249 if (is_sse)
8250 {
8251 if (GET_MODE (operands[0]) == SFmode)
8252 if (unordered_p)
8253 return "ucomiss\t{%1, %0|%0, %1}";
8254 else
a5cf80f0 8255 return "comiss\t{%1, %0|%0, %1}";
0644b628
JH
8256 else
8257 if (unordered_p)
8258 return "ucomisd\t{%1, %0|%0, %1}";
8259 else
a5cf80f0 8260 return "comisd\t{%1, %0|%0, %1}";
0644b628 8261 }
cda749b1 8262
e075ae69 8263 if (! STACK_TOP_P (cmp_op0))
cda749b1
JW
8264 abort ();
8265
e075ae69 8266 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
cda749b1 8267
e075ae69
RH
8268 if (STACK_REG_P (cmp_op1)
8269 && stack_top_dies
8270 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8271 && REGNO (cmp_op1) != FIRST_STACK_REG)
cda749b1 8272 {
e075ae69
RH
8273 /* If both the top of the 387 stack dies, and the other operand
8274 is also a stack register that dies, then this must be a
8275 `fcompp' float compare */
8276
8277 if (eflags_p == 1)
8278 {
8279 /* There is no double popping fcomi variant. Fortunately,
8280 eflags is immune from the fstp's cc clobbering. */
8281 if (unordered_p)
8282 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8283 else
8284 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8285 return "fstp\t%y0";
8286 }
8287 else
cda749b1 8288 {
e075ae69
RH
8289 if (eflags_p == 2)
8290 {
8291 if (unordered_p)
8292 return "fucompp\n\tfnstsw\t%0";
8293 else
8294 return "fcompp\n\tfnstsw\t%0";
8295 }
cda749b1
JW
8296 else
8297 {
e075ae69
RH
8298 if (unordered_p)
8299 return "fucompp";
8300 else
8301 return "fcompp";
cda749b1
JW
8302 }
8303 }
cda749b1
JW
8304 }
8305 else
8306 {
e075ae69 8307 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
cda749b1 8308
0f290768 8309 static const char * const alt[24] =
e075ae69
RH
8310 {
8311 "fcom%z1\t%y1",
8312 "fcomp%z1\t%y1",
8313 "fucom%z1\t%y1",
8314 "fucomp%z1\t%y1",
0f290768 8315
e075ae69
RH
8316 "ficom%z1\t%y1",
8317 "ficomp%z1\t%y1",
8318 NULL,
8319 NULL,
8320
8321 "fcomi\t{%y1, %0|%0, %y1}",
8322 "fcomip\t{%y1, %0|%0, %y1}",
8323 "fucomi\t{%y1, %0|%0, %y1}",
8324 "fucomip\t{%y1, %0|%0, %y1}",
8325
8326 NULL,
8327 NULL,
8328 NULL,
8329 NULL,
8330
8331 "fcom%z2\t%y2\n\tfnstsw\t%0",
8332 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8333 "fucom%z2\t%y2\n\tfnstsw\t%0",
8334 "fucomp%z2\t%y2\n\tfnstsw\t%0",
0f290768 8335
e075ae69
RH
8336 "ficom%z2\t%y2\n\tfnstsw\t%0",
8337 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8338 NULL,
8339 NULL
8340 };
8341
8342 int mask;
69ddee61 8343 const char *ret;
e075ae69
RH
8344
8345 mask = eflags_p << 3;
8346 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
8347 mask |= unordered_p << 1;
8348 mask |= stack_top_dies;
8349
8350 if (mask >= 24)
8351 abort ();
8352 ret = alt[mask];
8353 if (ret == NULL)
8354 abort ();
cda749b1 8355
e075ae69 8356 return ret;
cda749b1
JW
8357 }
8358}
2a2ab3f9 8359
f88c65f7 8360void
b96a374d 8361ix86_output_addr_vec_elt (FILE *file, int value)
f88c65f7
RH
8362{
8363 const char *directive = ASM_LONG;
8364
8365 if (TARGET_64BIT)
8366 {
8367#ifdef ASM_QUAD
8368 directive = ASM_QUAD;
8369#else
8370 abort ();
8371#endif
8372 }
8373
8374 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8375}
8376
8377void
b96a374d 8378ix86_output_addr_diff_elt (FILE *file, int value, int rel)
f88c65f7
RH
8379{
8380 if (TARGET_64BIT)
74411039 8381 fprintf (file, "%s%s%d-%s%d\n",
f88c65f7
RH
8382 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8383 else if (HAVE_AS_GOTOFF_IN_DATA)
8384 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
b069de3b
SS
8385#if TARGET_MACHO
8386 else if (TARGET_MACHO)
86ecdfb6
AP
8387 {
8388 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8389 machopic_output_function_base_name (file);
8390 fprintf(file, "\n");
8391 }
b069de3b 8392#endif
f88c65f7 8393 else
5fc0e5df
KW
8394 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8395 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
f88c65f7 8396}
32b5b1aa 8397\f
a8bac9ab
RH
8398/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8399 for the target. */
8400
8401void
b96a374d 8402ix86_expand_clear (rtx dest)
a8bac9ab
RH
8403{
8404 rtx tmp;
8405
8406 /* We play register width games, which are only valid after reload. */
8407 if (!reload_completed)
8408 abort ();
8409
8410 /* Avoid HImode and its attendant prefix byte. */
8411 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8412 dest = gen_rtx_REG (SImode, REGNO (dest));
8413
8414 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8415
8416 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8417 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8418 {
8419 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8420 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8421 }
8422
8423 emit_insn (tmp);
8424}
8425
f996902d
RH
8426/* X is an unchanging MEM. If it is a constant pool reference, return
8427 the constant pool rtx, else NULL. */
8428
8429static rtx
b96a374d 8430maybe_get_pool_constant (rtx x)
f996902d 8431{
69bd9368 8432 x = ix86_delegitimize_address (XEXP (x, 0));
f996902d
RH
8433
8434 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8435 return get_pool_constant (x);
8436
8437 return NULL_RTX;
8438}
8439
79325812 8440void
b96a374d 8441ix86_expand_move (enum machine_mode mode, rtx operands[])
32b5b1aa 8442{
e075ae69 8443 int strict = (reload_in_progress || reload_completed);
74dc3e94
RH
8444 rtx op0, op1;
8445 enum tls_model model;
f996902d
RH
8446
8447 op0 = operands[0];
8448 op1 = operands[1];
8449
74dc3e94
RH
8450 model = tls_symbolic_operand (op1, Pmode);
8451 if (model)
f996902d 8452 {
74dc3e94
RH
8453 op1 = legitimize_tls_address (op1, model, true);
8454 op1 = force_operand (op1, op0);
8455 if (op1 == op0)
8456 return;
f996902d 8457 }
74dc3e94
RH
8458
8459 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
f996902d 8460 {
b069de3b
SS
8461#if TARGET_MACHO
8462 if (MACHOPIC_PURE)
8463 {
8464 rtx temp = ((reload_in_progress
8465 || ((op0 && GET_CODE (op0) == REG)
8466 && mode == Pmode))
8467 ? op0 : gen_reg_rtx (Pmode));
8468 op1 = machopic_indirect_data_reference (op1, temp);
8469 op1 = machopic_legitimize_pic_address (op1, mode,
8470 temp == op1 ? 0 : temp);
8471 }
74dc3e94
RH
8472 else if (MACHOPIC_INDIRECT)
8473 op1 = machopic_indirect_data_reference (op1, 0);
8474 if (op0 == op1)
8475 return;
8476#else
f996902d
RH
8477 if (GET_CODE (op0) == MEM)
8478 op1 = force_reg (Pmode, op1);
e075ae69 8479 else
b39edae3 8480 op1 = legitimize_address (op1, op1, Pmode);
74dc3e94 8481#endif /* TARGET_MACHO */
e075ae69
RH
8482 }
8483 else
8484 {
f996902d 8485 if (GET_CODE (op0) == MEM
44cf5b6a 8486 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
f996902d
RH
8487 || !push_operand (op0, mode))
8488 && GET_CODE (op1) == MEM)
8489 op1 = force_reg (mode, op1);
e9a25f70 8490
f996902d
RH
8491 if (push_operand (op0, mode)
8492 && ! general_no_elim_operand (op1, mode))
8493 op1 = copy_to_mode_reg (mode, op1);
2c5a510c 8494
44cf5b6a
JH
8495 /* Force large constants in 64bit compilation into register
8496 to get them CSEed. */
8497 if (TARGET_64BIT && mode == DImode
f996902d
RH
8498 && immediate_operand (op1, mode)
8499 && !x86_64_zero_extended_value (op1)
8500 && !register_operand (op0, mode)
44cf5b6a 8501 && optimize && !reload_completed && !reload_in_progress)
f996902d 8502 op1 = copy_to_mode_reg (mode, op1);
44cf5b6a 8503
e075ae69 8504 if (FLOAT_MODE_P (mode))
32b5b1aa 8505 {
d7a29404
JH
8506 /* If we are loading a floating point constant to a register,
8507 force the value to memory now, since we'll get better code
8508 out the back end. */
e075ae69
RH
8509
8510 if (strict)
8511 ;
ddc67067
MM
8512 else if (GET_CODE (op1) == CONST_DOUBLE)
8513 {
8514 op1 = validize_mem (force_const_mem (mode, op1));
8515 if (!register_operand (op0, mode))
8516 {
8517 rtx temp = gen_reg_rtx (mode);
8518 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8519 emit_move_insn (op0, temp);
8520 return;
8521 }
8522 }
32b5b1aa 8523 }
32b5b1aa 8524 }
e9a25f70 8525
74dc3e94 8526 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
e075ae69 8527}
e9a25f70 8528
e37af218 8529void
b96a374d 8530ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
e37af218
RH
8531{
8532 /* Force constants other than zero into memory. We do not know how
8533 the instructions used to build constants modify the upper 64 bits
8534 of the register, once we have that information we may be able
8535 to handle some of them more efficiently. */
8536 if ((reload_in_progress | reload_completed) == 0
8537 && register_operand (operands[0], mode)
fdc4b40b 8538 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
2b28d405 8539 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
e37af218
RH
8540
8541 /* Make operand1 a register if it isn't already. */
f8ca7923 8542 if (!no_new_pseudos
e37af218 8543 && !register_operand (operands[0], mode)
b105d6da 8544 && !register_operand (operands[1], mode))
e37af218 8545 {
59bef189 8546 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
e37af218
RH
8547 emit_move_insn (operands[0], temp);
8548 return;
8549 }
8550
8551 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
fce5a9f2 8552}
e37af218 8553
e075ae69
RH
8554/* Attempt to expand a binary operator. Make the expansion closer to the
8555 actual machine, then just general_operand, which will allow 3 separate
9d81fc27 8556 memory references (one output, two input) in a single insn. */
e9a25f70 8557
e075ae69 8558void
b96a374d
AJ
8559ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8560 rtx operands[])
e075ae69
RH
8561{
8562 int matching_memory;
8563 rtx src1, src2, dst, op, clob;
8564
8565 dst = operands[0];
8566 src1 = operands[1];
8567 src2 = operands[2];
8568
8569 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
ec8e098d 8570 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
e075ae69
RH
8571 && (rtx_equal_p (dst, src2)
8572 || immediate_operand (src1, mode)))
8573 {
8574 rtx temp = src1;
8575 src1 = src2;
8576 src2 = temp;
32b5b1aa 8577 }
e9a25f70 8578
e075ae69
RH
8579 /* If the destination is memory, and we do not have matching source
8580 operands, do things in registers. */
8581 matching_memory = 0;
8582 if (GET_CODE (dst) == MEM)
32b5b1aa 8583 {
e075ae69
RH
8584 if (rtx_equal_p (dst, src1))
8585 matching_memory = 1;
ec8e098d 8586 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
e075ae69
RH
8587 && rtx_equal_p (dst, src2))
8588 matching_memory = 2;
8589 else
8590 dst = gen_reg_rtx (mode);
8591 }
0f290768 8592
e075ae69
RH
8593 /* Both source operands cannot be in memory. */
8594 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8595 {
8596 if (matching_memory != 2)
8597 src2 = force_reg (mode, src2);
8598 else
8599 src1 = force_reg (mode, src1);
32b5b1aa 8600 }
e9a25f70 8601
06a964de
JH
8602 /* If the operation is not commutable, source 1 cannot be a constant
8603 or non-matching memory. */
0f290768 8604 if ((CONSTANT_P (src1)
06a964de 8605 || (!matching_memory && GET_CODE (src1) == MEM))
ec8e098d 8606 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
e075ae69 8607 src1 = force_reg (mode, src1);
0f290768 8608
e075ae69 8609 /* If optimizing, copy to regs to improve CSE */
fe577e58 8610 if (optimize && ! no_new_pseudos)
32b5b1aa 8611 {
e075ae69
RH
8612 if (GET_CODE (dst) == MEM)
8613 dst = gen_reg_rtx (mode);
8614 if (GET_CODE (src1) == MEM)
8615 src1 = force_reg (mode, src1);
8616 if (GET_CODE (src2) == MEM)
8617 src2 = force_reg (mode, src2);
32b5b1aa 8618 }
e9a25f70 8619
e075ae69
RH
8620 /* Emit the instruction. */
8621
8622 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8623 if (reload_in_progress)
8624 {
8625 /* Reload doesn't know about the flags register, and doesn't know that
8626 it doesn't want to clobber it. We can only do this with PLUS. */
8627 if (code != PLUS)
8628 abort ();
8629 emit_insn (op);
8630 }
8631 else
32b5b1aa 8632 {
e075ae69
RH
8633 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8634 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
32b5b1aa 8635 }
e9a25f70 8636
e075ae69
RH
8637 /* Fix up the destination if needed. */
8638 if (dst != operands[0])
8639 emit_move_insn (operands[0], dst);
8640}
8641
8642/* Return TRUE or FALSE depending on whether the binary operator meets the
8643 appropriate constraints. */
8644
8645int
b96a374d
AJ
8646ix86_binary_operator_ok (enum rtx_code code,
8647 enum machine_mode mode ATTRIBUTE_UNUSED,
8648 rtx operands[3])
e075ae69
RH
8649{
8650 /* Both source operands cannot be in memory. */
8651 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8652 return 0;
8653 /* If the operation is not commutable, source 1 cannot be a constant. */
ec8e098d 8654 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
e075ae69
RH
8655 return 0;
8656 /* If the destination is memory, we must have a matching source operand. */
8657 if (GET_CODE (operands[0]) == MEM
8658 && ! (rtx_equal_p (operands[0], operands[1])
ec8e098d 8659 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
e075ae69
RH
8660 && rtx_equal_p (operands[0], operands[2]))))
8661 return 0;
06a964de 8662 /* If the operation is not commutable and the source 1 is memory, we must
d6a7951f 8663 have a matching destination. */
06a964de 8664 if (GET_CODE (operands[1]) == MEM
ec8e098d 8665 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
06a964de
JH
8666 && ! rtx_equal_p (operands[0], operands[1]))
8667 return 0;
e075ae69
RH
8668 return 1;
8669}
8670
8671/* Attempt to expand a unary operator. Make the expansion closer to the
8672 actual machine, then just general_operand, which will allow 2 separate
9d81fc27 8673 memory references (one output, one input) in a single insn. */
e075ae69 8674
9d81fc27 8675void
b96a374d
AJ
8676ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8677 rtx operands[])
e075ae69 8678{
06a964de
JH
8679 int matching_memory;
8680 rtx src, dst, op, clob;
8681
8682 dst = operands[0];
8683 src = operands[1];
e075ae69 8684
06a964de
JH
8685 /* If the destination is memory, and we do not have matching source
8686 operands, do things in registers. */
8687 matching_memory = 0;
8688 if (GET_CODE (dst) == MEM)
32b5b1aa 8689 {
06a964de
JH
8690 if (rtx_equal_p (dst, src))
8691 matching_memory = 1;
e075ae69 8692 else
06a964de 8693 dst = gen_reg_rtx (mode);
32b5b1aa 8694 }
e9a25f70 8695
06a964de
JH
8696 /* When source operand is memory, destination must match. */
8697 if (!matching_memory && GET_CODE (src) == MEM)
8698 src = force_reg (mode, src);
0f290768 8699
06a964de 8700 /* If optimizing, copy to regs to improve CSE */
fe577e58 8701 if (optimize && ! no_new_pseudos)
06a964de
JH
8702 {
8703 if (GET_CODE (dst) == MEM)
8704 dst = gen_reg_rtx (mode);
8705 if (GET_CODE (src) == MEM)
8706 src = force_reg (mode, src);
8707 }
8708
8709 /* Emit the instruction. */
8710
8711 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8712 if (reload_in_progress || code == NOT)
8713 {
8714 /* Reload doesn't know about the flags register, and doesn't know that
8715 it doesn't want to clobber it. */
8716 if (code != NOT)
8717 abort ();
8718 emit_insn (op);
8719 }
8720 else
8721 {
8722 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8723 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8724 }
8725
8726 /* Fix up the destination if needed. */
8727 if (dst != operands[0])
8728 emit_move_insn (operands[0], dst);
e075ae69
RH
8729}
8730
8731/* Return TRUE or FALSE depending on whether the unary operator meets the
8732 appropriate constraints. */
8733
8734int
b96a374d
AJ
8735ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8736 enum machine_mode mode ATTRIBUTE_UNUSED,
8737 rtx operands[2] ATTRIBUTE_UNUSED)
e075ae69 8738{
06a964de
JH
8739 /* If one of operands is memory, source and destination must match. */
8740 if ((GET_CODE (operands[0]) == MEM
8741 || GET_CODE (operands[1]) == MEM)
8742 && ! rtx_equal_p (operands[0], operands[1]))
8743 return FALSE;
e075ae69
RH
8744 return TRUE;
8745}
8746
16189740
RH
8747/* Return TRUE or FALSE depending on whether the first SET in INSN
8748 has source and destination with matching CC modes, and that the
8749 CC mode is at least as constrained as REQ_MODE. */
8750
8751int
b96a374d 8752ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
16189740
RH
8753{
8754 rtx set;
8755 enum machine_mode set_mode;
8756
8757 set = PATTERN (insn);
8758 if (GET_CODE (set) == PARALLEL)
8759 set = XVECEXP (set, 0, 0);
8760 if (GET_CODE (set) != SET)
8761 abort ();
9076b9c1
JH
8762 if (GET_CODE (SET_SRC (set)) != COMPARE)
8763 abort ();
16189740
RH
8764
8765 set_mode = GET_MODE (SET_DEST (set));
8766 switch (set_mode)
8767 {
9076b9c1
JH
8768 case CCNOmode:
8769 if (req_mode != CCNOmode
8770 && (req_mode != CCmode
8771 || XEXP (SET_SRC (set), 1) != const0_rtx))
8772 return 0;
8773 break;
16189740 8774 case CCmode:
9076b9c1 8775 if (req_mode == CCGCmode)
16189740 8776 return 0;
5efb1046 8777 /* FALLTHRU */
9076b9c1
JH
8778 case CCGCmode:
8779 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8780 return 0;
5efb1046 8781 /* FALLTHRU */
9076b9c1 8782 case CCGOCmode:
16189740
RH
8783 if (req_mode == CCZmode)
8784 return 0;
5efb1046 8785 /* FALLTHRU */
16189740
RH
8786 case CCZmode:
8787 break;
8788
8789 default:
8790 abort ();
8791 }
8792
8793 return (GET_MODE (SET_SRC (set)) == set_mode);
8794}
8795
e075ae69
RH
8796/* Generate insn patterns to do an integer compare of OPERANDS. */
8797
8798static rtx
b96a374d 8799ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
e075ae69
RH
8800{
8801 enum machine_mode cmpmode;
8802 rtx tmp, flags;
8803
8804 cmpmode = SELECT_CC_MODE (code, op0, op1);
8805 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8806
8807 /* This is very simple, but making the interface the same as in the
8808 FP case makes the rest of the code easier. */
8809 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8810 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8811
8812 /* Return the test that should be put into the flags user, i.e.
8813 the bcc, scc, or cmov instruction. */
8814 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8815}
8816
3a3677ff
RH
8817/* Figure out whether to use ordered or unordered fp comparisons.
8818 Return the appropriate mode to use. */
e075ae69 8819
b1cdafbb 8820enum machine_mode
b96a374d 8821ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
e075ae69 8822{
9e7adcb3
JH
8823 /* ??? In order to make all comparisons reversible, we do all comparisons
8824 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8825 all forms trapping and nontrapping comparisons, we can make inequality
8826 comparisons trapping again, since it results in better code when using
8827 FCOM based compares. */
8828 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
3a3677ff
RH
8829}
8830
9076b9c1 8831enum machine_mode
b96a374d 8832ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
9076b9c1
JH
8833{
8834 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8835 return ix86_fp_compare_mode (code);
8836 switch (code)
8837 {
8838 /* Only zero flag is needed. */
8839 case EQ: /* ZF=0 */
8840 case NE: /* ZF!=0 */
8841 return CCZmode;
8842 /* Codes needing carry flag. */
265dab10
JH
8843 case GEU: /* CF=0 */
8844 case GTU: /* CF=0 & ZF=0 */
7e08e190
JH
8845 case LTU: /* CF=1 */
8846 case LEU: /* CF=1 | ZF=1 */
265dab10 8847 return CCmode;
9076b9c1
JH
8848 /* Codes possibly doable only with sign flag when
8849 comparing against zero. */
8850 case GE: /* SF=OF or SF=0 */
7e08e190 8851 case LT: /* SF<>OF or SF=1 */
9076b9c1
JH
8852 if (op1 == const0_rtx)
8853 return CCGOCmode;
8854 else
8855 /* For other cases Carry flag is not required. */
8856 return CCGCmode;
8857 /* Codes doable only with sign flag when comparing
8858 against zero, but we miss jump instruction for it
4aae8a9a 8859 so we need to use relational tests against overflow
9076b9c1
JH
8860 that thus needs to be zero. */
8861 case GT: /* ZF=0 & SF=OF */
8862 case LE: /* ZF=1 | SF<>OF */
8863 if (op1 == const0_rtx)
8864 return CCNOmode;
8865 else
8866 return CCGCmode;
7fcd7218
JH
8867 /* strcmp pattern do (use flags) and combine may ask us for proper
8868 mode. */
8869 case USE:
8870 return CCmode;
9076b9c1 8871 default:
0f290768 8872 abort ();
9076b9c1
JH
8873 }
8874}
8875
e129d93a
ILT
8876/* Return the fixed registers used for condition codes. */
8877
8878static bool
8879ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
8880{
8881 *p1 = FLAGS_REG;
8882 *p2 = FPSR_REG;
8883 return true;
8884}
8885
8886/* If two condition code modes are compatible, return a condition code
8887 mode which is compatible with both. Otherwise, return
8888 VOIDmode. */
8889
8890static enum machine_mode
8891ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
8892{
8893 if (m1 == m2)
8894 return m1;
8895
8896 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8897 return VOIDmode;
8898
8899 if ((m1 == CCGCmode && m2 == CCGOCmode)
8900 || (m1 == CCGOCmode && m2 == CCGCmode))
8901 return CCGCmode;
8902
8903 switch (m1)
8904 {
8905 default:
8906 abort ();
8907
8908 case CCmode:
8909 case CCGCmode:
8910 case CCGOCmode:
8911 case CCNOmode:
8912 case CCZmode:
8913 switch (m2)
8914 {
8915 default:
8916 return VOIDmode;
8917
8918 case CCmode:
8919 case CCGCmode:
8920 case CCGOCmode:
8921 case CCNOmode:
8922 case CCZmode:
8923 return CCmode;
8924 }
8925
8926 case CCFPmode:
8927 case CCFPUmode:
8928 /* These are only compatible with themselves, which we already
8929 checked above. */
8930 return VOIDmode;
8931 }
8932}
8933
3a3677ff
RH
8934/* Return true if we should use an FCOMI instruction for this fp comparison. */
8935
a940d8bd 8936int
b96a374d 8937ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
3a3677ff 8938{
9e7adcb3
JH
8939 enum rtx_code swapped_code = swap_condition (code);
8940 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8941 || (ix86_fp_comparison_cost (swapped_code)
8942 == ix86_fp_comparison_fcomi_cost (swapped_code)));
3a3677ff
RH
8943}
8944
0f290768 8945/* Swap, force into registers, or otherwise massage the two operands
3a3677ff 8946 to a fp comparison. The operands are updated in place; the new
d1f87653 8947 comparison code is returned. */
3a3677ff
RH
8948
8949static enum rtx_code
b96a374d 8950ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
3a3677ff
RH
8951{
8952 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8953 rtx op0 = *pop0, op1 = *pop1;
8954 enum machine_mode op_mode = GET_MODE (op0);
0644b628 8955 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
3a3677ff 8956
e075ae69 8957 /* All of the unordered compare instructions only work on registers.
3a3677ff
RH
8958 The same is true of the XFmode compare instructions. The same is
8959 true of the fcomi compare instructions. */
8960
0644b628
JH
8961 if (!is_sse
8962 && (fpcmp_mode == CCFPUmode
8963 || op_mode == XFmode
0644b628 8964 || ix86_use_fcomi_compare (code)))
e075ae69 8965 {
3a3677ff
RH
8966 op0 = force_reg (op_mode, op0);
8967 op1 = force_reg (op_mode, op1);
e075ae69
RH
8968 }
8969 else
8970 {
8971 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8972 things around if they appear profitable, otherwise force op0
8973 into a register. */
8974
8975 if (standard_80387_constant_p (op0) == 0
8976 || (GET_CODE (op0) == MEM
8977 && ! (standard_80387_constant_p (op1) == 0
8978 || GET_CODE (op1) == MEM)))
32b5b1aa 8979 {
e075ae69
RH
8980 rtx tmp;
8981 tmp = op0, op0 = op1, op1 = tmp;
8982 code = swap_condition (code);
8983 }
8984
8985 if (GET_CODE (op0) != REG)
3a3677ff 8986 op0 = force_reg (op_mode, op0);
e075ae69
RH
8987
8988 if (CONSTANT_P (op1))
8989 {
8990 if (standard_80387_constant_p (op1))
3a3677ff 8991 op1 = force_reg (op_mode, op1);
e075ae69 8992 else
3a3677ff 8993 op1 = validize_mem (force_const_mem (op_mode, op1));
32b5b1aa
SC
8994 }
8995 }
e9a25f70 8996
9e7adcb3
JH
8997 /* Try to rearrange the comparison to make it cheaper. */
8998 if (ix86_fp_comparison_cost (code)
8999 > ix86_fp_comparison_cost (swap_condition (code))
558740bf 9000 && (GET_CODE (op1) == REG || !no_new_pseudos))
9e7adcb3
JH
9001 {
9002 rtx tmp;
9003 tmp = op0, op0 = op1, op1 = tmp;
9004 code = swap_condition (code);
9005 if (GET_CODE (op0) != REG)
9006 op0 = force_reg (op_mode, op0);
9007 }
9008
3a3677ff
RH
9009 *pop0 = op0;
9010 *pop1 = op1;
9011 return code;
9012}
9013
c0c102a9
JH
9014/* Convert comparison codes we use to represent FP comparison to integer
9015 code that will result in proper branch. Return UNKNOWN if no such code
9016 is available. */
9017static enum rtx_code
b96a374d 9018ix86_fp_compare_code_to_integer (enum rtx_code code)
c0c102a9
JH
9019{
9020 switch (code)
9021 {
9022 case GT:
9023 return GTU;
9024 case GE:
9025 return GEU;
9026 case ORDERED:
9027 case UNORDERED:
9028 return code;
9029 break;
9030 case UNEQ:
9031 return EQ;
9032 break;
9033 case UNLT:
9034 return LTU;
9035 break;
9036 case UNLE:
9037 return LEU;
9038 break;
9039 case LTGT:
9040 return NE;
9041 break;
9042 default:
9043 return UNKNOWN;
9044 }
9045}
9046
9047/* Split comparison code CODE into comparisons we can do using branch
9048 instructions. BYPASS_CODE is comparison code for branch that will
9049 branch around FIRST_CODE and SECOND_CODE. If some of branches
9050 is not required, set value to NIL.
9051 We never require more than two branches. */
9052static void
b96a374d
AJ
9053ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
9054 enum rtx_code *first_code,
9055 enum rtx_code *second_code)
c0c102a9
JH
9056{
9057 *first_code = code;
9058 *bypass_code = NIL;
9059 *second_code = NIL;
9060
9061 /* The fcomi comparison sets flags as follows:
9062
9063 cmp ZF PF CF
9064 > 0 0 0
9065 < 0 0 1
9066 = 1 0 0
9067 un 1 1 1 */
9068
9069 switch (code)
9070 {
9071 case GT: /* GTU - CF=0 & ZF=0 */
9072 case GE: /* GEU - CF=0 */
9073 case ORDERED: /* PF=0 */
9074 case UNORDERED: /* PF=1 */
9075 case UNEQ: /* EQ - ZF=1 */
9076 case UNLT: /* LTU - CF=1 */
9077 case UNLE: /* LEU - CF=1 | ZF=1 */
9078 case LTGT: /* EQ - ZF=0 */
9079 break;
9080 case LT: /* LTU - CF=1 - fails on unordered */
9081 *first_code = UNLT;
9082 *bypass_code = UNORDERED;
9083 break;
9084 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
9085 *first_code = UNLE;
9086 *bypass_code = UNORDERED;
9087 break;
9088 case EQ: /* EQ - ZF=1 - fails on unordered */
9089 *first_code = UNEQ;
9090 *bypass_code = UNORDERED;
9091 break;
9092 case NE: /* NE - ZF=0 - fails on unordered */
9093 *first_code = LTGT;
9094 *second_code = UNORDERED;
9095 break;
9096 case UNGE: /* GEU - CF=0 - fails on unordered */
9097 *first_code = GE;
9098 *second_code = UNORDERED;
9099 break;
9100 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
9101 *first_code = GT;
9102 *second_code = UNORDERED;
9103 break;
9104 default:
9105 abort ();
9106 }
9107 if (!TARGET_IEEE_FP)
9108 {
9109 *second_code = NIL;
9110 *bypass_code = NIL;
9111 }
9112}
9113
9e7adcb3 9114/* Return cost of comparison done fcom + arithmetics operations on AX.
5bdc5878 9115 All following functions do use number of instructions as a cost metrics.
9e7adcb3
JH
9116 In future this should be tweaked to compute bytes for optimize_size and
9117 take into account performance of various instructions on various CPUs. */
9118static int
b96a374d 9119ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
9e7adcb3
JH
9120{
9121 if (!TARGET_IEEE_FP)
9122 return 4;
9123 /* The cost of code output by ix86_expand_fp_compare. */
9124 switch (code)
9125 {
9126 case UNLE:
9127 case UNLT:
9128 case LTGT:
9129 case GT:
9130 case GE:
9131 case UNORDERED:
9132 case ORDERED:
9133 case UNEQ:
9134 return 4;
9135 break;
9136 case LT:
9137 case NE:
9138 case EQ:
9139 case UNGE:
9140 return 5;
9141 break;
9142 case LE:
9143 case UNGT:
9144 return 6;
9145 break;
9146 default:
9147 abort ();
9148 }
9149}
9150
9151/* Return cost of comparison done using fcomi operation.
9152 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9153static int
b96a374d 9154ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9e7adcb3
JH
9155{
9156 enum rtx_code bypass_code, first_code, second_code;
d1f87653 9157 /* Return arbitrarily high cost when instruction is not supported - this
9e7adcb3
JH
9158 prevents gcc from using it. */
9159 if (!TARGET_CMOVE)
9160 return 1024;
9161 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9162 return (bypass_code != NIL || second_code != NIL) + 2;
9163}
9164
9165/* Return cost of comparison done using sahf operation.
9166 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9167static int
b96a374d 9168ix86_fp_comparison_sahf_cost (enum rtx_code code)
9e7adcb3
JH
9169{
9170 enum rtx_code bypass_code, first_code, second_code;
d1f87653 9171 /* Return arbitrarily high cost when instruction is not preferred - this
9e7adcb3
JH
9172 avoids gcc from using it. */
9173 if (!TARGET_USE_SAHF && !optimize_size)
9174 return 1024;
9175 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9176 return (bypass_code != NIL || second_code != NIL) + 3;
9177}
9178
9179/* Compute cost of the comparison done using any method.
9180 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9181static int
b96a374d 9182ix86_fp_comparison_cost (enum rtx_code code)
9e7adcb3
JH
9183{
9184 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9185 int min;
9186
9187 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9188 sahf_cost = ix86_fp_comparison_sahf_cost (code);
9189
9190 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9191 if (min > sahf_cost)
9192 min = sahf_cost;
9193 if (min > fcomi_cost)
9194 min = fcomi_cost;
9195 return min;
9196}
c0c102a9 9197
3a3677ff
RH
9198/* Generate insn patterns to do a floating point compare of OPERANDS. */
9199
9e7adcb3 9200static rtx
b96a374d
AJ
9201ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
9202 rtx *second_test, rtx *bypass_test)
3a3677ff
RH
9203{
9204 enum machine_mode fpcmp_mode, intcmp_mode;
c0c102a9 9205 rtx tmp, tmp2;
9e7adcb3 9206 int cost = ix86_fp_comparison_cost (code);
c0c102a9 9207 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
9208
9209 fpcmp_mode = ix86_fp_compare_mode (code);
9210 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9211
9e7adcb3
JH
9212 if (second_test)
9213 *second_test = NULL_RTX;
9214 if (bypass_test)
9215 *bypass_test = NULL_RTX;
9216
c0c102a9
JH
9217 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9218
9e7adcb3
JH
9219 /* Do fcomi/sahf based test when profitable. */
9220 if ((bypass_code == NIL || bypass_test)
9221 && (second_code == NIL || second_test)
9222 && ix86_fp_comparison_arithmetics_cost (code) > cost)
32b5b1aa 9223 {
c0c102a9
JH
9224 if (TARGET_CMOVE)
9225 {
9226 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9227 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9228 tmp);
9229 emit_insn (tmp);
9230 }
9231 else
9232 {
9233 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 9234 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
9235 if (!scratch)
9236 scratch = gen_reg_rtx (HImode);
c0c102a9
JH
9237 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9238 emit_insn (gen_x86_sahf_1 (scratch));
9239 }
e075ae69
RH
9240
9241 /* The FP codes work out to act like unsigned. */
9a915772 9242 intcmp_mode = fpcmp_mode;
9e7adcb3
JH
9243 code = first_code;
9244 if (bypass_code != NIL)
9245 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9246 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9247 const0_rtx);
9248 if (second_code != NIL)
9249 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9250 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9251 const0_rtx);
e075ae69
RH
9252 }
9253 else
9254 {
9255 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
e075ae69 9256 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 9257 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
9258 if (!scratch)
9259 scratch = gen_reg_rtx (HImode);
3a3677ff 9260 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
e075ae69 9261
9a915772
JH
9262 /* In the unordered case, we have to check C2 for NaN's, which
9263 doesn't happen to work out to anything nice combination-wise.
9264 So do some bit twiddling on the value we've got in AH to come
9265 up with an appropriate set of condition codes. */
e075ae69 9266
9a915772
JH
9267 intcmp_mode = CCNOmode;
9268 switch (code)
32b5b1aa 9269 {
9a915772
JH
9270 case GT:
9271 case UNGT:
9272 if (code == GT || !TARGET_IEEE_FP)
32b5b1aa 9273 {
3a3677ff 9274 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
e075ae69 9275 code = EQ;
9a915772
JH
9276 }
9277 else
9278 {
9279 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9280 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9281 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9282 intcmp_mode = CCmode;
9283 code = GEU;
9284 }
9285 break;
9286 case LT:
9287 case UNLT:
9288 if (code == LT && TARGET_IEEE_FP)
9289 {
3a3677ff
RH
9290 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9291 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
e075ae69
RH
9292 intcmp_mode = CCmode;
9293 code = EQ;
9a915772
JH
9294 }
9295 else
9296 {
9297 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9298 code = NE;
9299 }
9300 break;
9301 case GE:
9302 case UNGE:
9303 if (code == GE || !TARGET_IEEE_FP)
9304 {
3a3677ff 9305 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
e075ae69 9306 code = EQ;
9a915772
JH
9307 }
9308 else
9309 {
9310 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9311 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9312 GEN_INT (0x01)));
9313 code = NE;
9314 }
9315 break;
9316 case LE:
9317 case UNLE:
9318 if (code == LE && TARGET_IEEE_FP)
9319 {
3a3677ff
RH
9320 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9321 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9322 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
9323 intcmp_mode = CCmode;
9324 code = LTU;
9a915772
JH
9325 }
9326 else
9327 {
9328 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9329 code = NE;
9330 }
9331 break;
9332 case EQ:
9333 case UNEQ:
9334 if (code == EQ && TARGET_IEEE_FP)
9335 {
3a3677ff
RH
9336 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9337 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
9338 intcmp_mode = CCmode;
9339 code = EQ;
9a915772
JH
9340 }
9341 else
9342 {
3a3677ff
RH
9343 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9344 code = NE;
9345 break;
9a915772
JH
9346 }
9347 break;
9348 case NE:
9349 case LTGT:
9350 if (code == NE && TARGET_IEEE_FP)
9351 {
3a3677ff 9352 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9a915772
JH
9353 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9354 GEN_INT (0x40)));
3a3677ff 9355 code = NE;
9a915772
JH
9356 }
9357 else
9358 {
3a3677ff
RH
9359 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9360 code = EQ;
32b5b1aa 9361 }
9a915772
JH
9362 break;
9363
9364 case UNORDERED:
9365 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9366 code = NE;
9367 break;
9368 case ORDERED:
9369 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9370 code = EQ;
9371 break;
9372
9373 default:
9374 abort ();
32b5b1aa 9375 }
32b5b1aa 9376 }
e075ae69
RH
9377
9378 /* Return the test that should be put into the flags user, i.e.
9379 the bcc, scc, or cmov instruction. */
9380 return gen_rtx_fmt_ee (code, VOIDmode,
9381 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9382 const0_rtx);
9383}
9384
9e3e266c 9385rtx
b96a374d 9386ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
e075ae69
RH
9387{
9388 rtx op0, op1, ret;
9389 op0 = ix86_compare_op0;
9390 op1 = ix86_compare_op1;
9391
a1b8572c
JH
9392 if (second_test)
9393 *second_test = NULL_RTX;
9394 if (bypass_test)
9395 *bypass_test = NULL_RTX;
9396
e075ae69 9397 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
bf71a4f8 9398 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
77ebd435 9399 second_test, bypass_test);
32b5b1aa 9400 else
e075ae69
RH
9401 ret = ix86_expand_int_compare (code, op0, op1);
9402
9403 return ret;
9404}
9405
03598dea
JH
9406/* Return true if the CODE will result in nontrivial jump sequence. */
9407bool
b96a374d 9408ix86_fp_jump_nontrivial_p (enum rtx_code code)
03598dea
JH
9409{
9410 enum rtx_code bypass_code, first_code, second_code;
9411 if (!TARGET_CMOVE)
9412 return true;
9413 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9414 return bypass_code != NIL || second_code != NIL;
9415}
9416
e075ae69 9417void
b96a374d 9418ix86_expand_branch (enum rtx_code code, rtx label)
e075ae69 9419{
3a3677ff 9420 rtx tmp;
e075ae69 9421
3a3677ff 9422 switch (GET_MODE (ix86_compare_op0))
32b5b1aa 9423 {
3a3677ff
RH
9424 case QImode:
9425 case HImode:
9426 case SImode:
0d7d98ee 9427 simple:
a1b8572c 9428 tmp = ix86_expand_compare (code, NULL, NULL);
e075ae69
RH
9429 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9430 gen_rtx_LABEL_REF (VOIDmode, label),
9431 pc_rtx);
9432 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32b5b1aa 9433 return;
e075ae69 9434
3a3677ff
RH
9435 case SFmode:
9436 case DFmode:
0f290768 9437 case XFmode:
3a3677ff
RH
9438 {
9439 rtvec vec;
9440 int use_fcomi;
03598dea 9441 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
9442
9443 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9444 &ix86_compare_op1);
fce5a9f2 9445
03598dea
JH
9446 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9447
9448 /* Check whether we will use the natural sequence with one jump. If
9449 so, we can expand jump early. Otherwise delay expansion by
9450 creating compound insn to not confuse optimizers. */
9451 if (bypass_code == NIL && second_code == NIL
9452 && TARGET_CMOVE)
9453 {
9454 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9455 gen_rtx_LABEL_REF (VOIDmode, label),
9456 pc_rtx, NULL_RTX);
9457 }
9458 else
9459 {
9460 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9461 ix86_compare_op0, ix86_compare_op1);
9462 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9463 gen_rtx_LABEL_REF (VOIDmode, label),
9464 pc_rtx);
9465 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9466
9467 use_fcomi = ix86_use_fcomi_compare (code);
9468 vec = rtvec_alloc (3 + !use_fcomi);
9469 RTVEC_ELT (vec, 0) = tmp;
9470 RTVEC_ELT (vec, 1)
9471 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9472 RTVEC_ELT (vec, 2)
9473 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9474 if (! use_fcomi)
9475 RTVEC_ELT (vec, 3)
9476 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9477
9478 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9479 }
3a3677ff
RH
9480 return;
9481 }
32b5b1aa 9482
3a3677ff 9483 case DImode:
0d7d98ee
JH
9484 if (TARGET_64BIT)
9485 goto simple;
3a3677ff
RH
9486 /* Expand DImode branch into multiple compare+branch. */
9487 {
9488 rtx lo[2], hi[2], label2;
9489 enum rtx_code code1, code2, code3;
32b5b1aa 9490
3a3677ff
RH
9491 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9492 {
9493 tmp = ix86_compare_op0;
9494 ix86_compare_op0 = ix86_compare_op1;
9495 ix86_compare_op1 = tmp;
9496 code = swap_condition (code);
9497 }
9498 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9499 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
32b5b1aa 9500
3a3677ff
RH
9501 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9502 avoid two branches. This costs one extra insn, so disable when
9503 optimizing for size. */
32b5b1aa 9504
3a3677ff
RH
9505 if ((code == EQ || code == NE)
9506 && (!optimize_size
9507 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9508 {
9509 rtx xor0, xor1;
32b5b1aa 9510
3a3677ff
RH
9511 xor1 = hi[0];
9512 if (hi[1] != const0_rtx)
9513 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9514 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 9515
3a3677ff
RH
9516 xor0 = lo[0];
9517 if (lo[1] != const0_rtx)
9518 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9519 NULL_RTX, 0, OPTAB_WIDEN);
e075ae69 9520
3a3677ff
RH
9521 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9522 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 9523
3a3677ff
RH
9524 ix86_compare_op0 = tmp;
9525 ix86_compare_op1 = const0_rtx;
9526 ix86_expand_branch (code, label);
9527 return;
9528 }
e075ae69 9529
1f9124e4
JJ
9530 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9531 op1 is a constant and the low word is zero, then we can just
9532 examine the high word. */
32b5b1aa 9533
1f9124e4
JJ
9534 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9535 switch (code)
9536 {
9537 case LT: case LTU: case GE: case GEU:
9538 ix86_compare_op0 = hi[0];
9539 ix86_compare_op1 = hi[1];
9540 ix86_expand_branch (code, label);
9541 return;
9542 default:
9543 break;
9544 }
e075ae69 9545
3a3677ff 9546 /* Otherwise, we need two or three jumps. */
e075ae69 9547
3a3677ff 9548 label2 = gen_label_rtx ();
e075ae69 9549
3a3677ff
RH
9550 code1 = code;
9551 code2 = swap_condition (code);
9552 code3 = unsigned_condition (code);
e075ae69 9553
3a3677ff
RH
9554 switch (code)
9555 {
9556 case LT: case GT: case LTU: case GTU:
9557 break;
e075ae69 9558
3a3677ff
RH
9559 case LE: code1 = LT; code2 = GT; break;
9560 case GE: code1 = GT; code2 = LT; break;
9561 case LEU: code1 = LTU; code2 = GTU; break;
9562 case GEU: code1 = GTU; code2 = LTU; break;
e075ae69 9563
3a3677ff
RH
9564 case EQ: code1 = NIL; code2 = NE; break;
9565 case NE: code2 = NIL; break;
e075ae69 9566
3a3677ff
RH
9567 default:
9568 abort ();
9569 }
e075ae69 9570
3a3677ff
RH
9571 /*
9572 * a < b =>
9573 * if (hi(a) < hi(b)) goto true;
9574 * if (hi(a) > hi(b)) goto false;
9575 * if (lo(a) < lo(b)) goto true;
9576 * false:
9577 */
9578
9579 ix86_compare_op0 = hi[0];
9580 ix86_compare_op1 = hi[1];
9581
9582 if (code1 != NIL)
9583 ix86_expand_branch (code1, label);
9584 if (code2 != NIL)
9585 ix86_expand_branch (code2, label2);
9586
9587 ix86_compare_op0 = lo[0];
9588 ix86_compare_op1 = lo[1];
9589 ix86_expand_branch (code3, label);
9590
9591 if (code2 != NIL)
9592 emit_label (label2);
9593 return;
9594 }
e075ae69 9595
3a3677ff
RH
9596 default:
9597 abort ();
9598 }
32b5b1aa 9599}
e075ae69 9600
9e7adcb3
JH
9601/* Split branch based on floating point condition. */
9602void
b96a374d
AJ
9603ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9604 rtx target1, rtx target2, rtx tmp)
9e7adcb3
JH
9605{
9606 rtx second, bypass;
9607 rtx label = NULL_RTX;
03598dea 9608 rtx condition;
6b24c259
JH
9609 int bypass_probability = -1, second_probability = -1, probability = -1;
9610 rtx i;
9e7adcb3
JH
9611
9612 if (target2 != pc_rtx)
9613 {
9614 rtx tmp = target2;
9615 code = reverse_condition_maybe_unordered (code);
9616 target2 = target1;
9617 target1 = tmp;
9618 }
9619
9620 condition = ix86_expand_fp_compare (code, op1, op2,
9621 tmp, &second, &bypass);
6b24c259
JH
9622
9623 if (split_branch_probability >= 0)
9624 {
9625 /* Distribute the probabilities across the jumps.
9626 Assume the BYPASS and SECOND to be always test
9627 for UNORDERED. */
9628 probability = split_branch_probability;
9629
d6a7951f 9630 /* Value of 1 is low enough to make no need for probability
6b24c259
JH
9631 to be updated. Later we may run some experiments and see
9632 if unordered values are more frequent in practice. */
9633 if (bypass)
9634 bypass_probability = 1;
9635 if (second)
9636 second_probability = 1;
9637 }
9e7adcb3
JH
9638 if (bypass != NULL_RTX)
9639 {
9640 label = gen_label_rtx ();
6b24c259
JH
9641 i = emit_jump_insn (gen_rtx_SET
9642 (VOIDmode, pc_rtx,
9643 gen_rtx_IF_THEN_ELSE (VOIDmode,
9644 bypass,
9645 gen_rtx_LABEL_REF (VOIDmode,
9646 label),
9647 pc_rtx)));
9648 if (bypass_probability >= 0)
9649 REG_NOTES (i)
9650 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9651 GEN_INT (bypass_probability),
9652 REG_NOTES (i));
9653 }
9654 i = emit_jump_insn (gen_rtx_SET
9e7adcb3
JH
9655 (VOIDmode, pc_rtx,
9656 gen_rtx_IF_THEN_ELSE (VOIDmode,
6b24c259
JH
9657 condition, target1, target2)));
9658 if (probability >= 0)
9659 REG_NOTES (i)
9660 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9661 GEN_INT (probability),
9662 REG_NOTES (i));
9663 if (second != NULL_RTX)
9e7adcb3 9664 {
6b24c259
JH
9665 i = emit_jump_insn (gen_rtx_SET
9666 (VOIDmode, pc_rtx,
9667 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9668 target2)));
9669 if (second_probability >= 0)
9670 REG_NOTES (i)
9671 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9672 GEN_INT (second_probability),
9673 REG_NOTES (i));
9e7adcb3 9674 }
9e7adcb3
JH
9675 if (label != NULL_RTX)
9676 emit_label (label);
9677}
9678
32b5b1aa 9679int
b96a374d 9680ix86_expand_setcc (enum rtx_code code, rtx dest)
32b5b1aa 9681{
3a627503 9682 rtx ret, tmp, tmpreg, equiv;
a1b8572c 9683 rtx second_test, bypass_test;
e075ae69 9684
885a70fd
JH
9685 if (GET_MODE (ix86_compare_op0) == DImode
9686 && !TARGET_64BIT)
e075ae69
RH
9687 return 0; /* FAIL */
9688
b932f770
JH
9689 if (GET_MODE (dest) != QImode)
9690 abort ();
e075ae69 9691
a1b8572c 9692 ret = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
9693 PUT_MODE (ret, QImode);
9694
9695 tmp = dest;
a1b8572c 9696 tmpreg = dest;
32b5b1aa 9697
e075ae69 9698 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
a1b8572c
JH
9699 if (bypass_test || second_test)
9700 {
9701 rtx test = second_test;
9702 int bypass = 0;
9703 rtx tmp2 = gen_reg_rtx (QImode);
9704 if (bypass_test)
9705 {
9706 if (second_test)
b531087a 9707 abort ();
a1b8572c
JH
9708 test = bypass_test;
9709 bypass = 1;
9710 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9711 }
9712 PUT_MODE (test, QImode);
9713 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9714
9715 if (bypass)
9716 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9717 else
9718 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9719 }
e075ae69 9720
3a627503
RS
9721 /* Attach a REG_EQUAL note describing the comparison result. */
9722 equiv = simplify_gen_relational (code, QImode,
9723 GET_MODE (ix86_compare_op0),
9724 ix86_compare_op0, ix86_compare_op1);
9725 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9726
e075ae69 9727 return 1; /* DONE */
32b5b1aa 9728}
e075ae69 9729
c35d187f
RH
9730/* Expand comparison setting or clearing carry flag. Return true when
9731 successful and set pop for the operation. */
9732static bool
b96a374d 9733ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
4977bab6
ZW
9734{
9735 enum machine_mode mode =
9736 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9737
9738 /* Do not handle DImode compares that go trought special path. Also we can't
43f3a59d 9739 deal with FP compares yet. This is possible to add. */
e6e81735
JH
9740 if ((mode == DImode && !TARGET_64BIT))
9741 return false;
9742 if (FLOAT_MODE_P (mode))
9743 {
9744 rtx second_test = NULL, bypass_test = NULL;
9745 rtx compare_op, compare_seq;
9746
9747 /* Shortcut: following common codes never translate into carry flag compares. */
9748 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9749 || code == ORDERED || code == UNORDERED)
9750 return false;
9751
9752 /* These comparisons require zero flag; swap operands so they won't. */
9753 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9754 && !TARGET_IEEE_FP)
9755 {
9756 rtx tmp = op0;
9757 op0 = op1;
9758 op1 = tmp;
9759 code = swap_condition (code);
9760 }
9761
c51e6d85
KH
9762 /* Try to expand the comparison and verify that we end up with carry flag
9763 based comparison. This is fails to be true only when we decide to expand
9764 comparison using arithmetic that is not too common scenario. */
e6e81735
JH
9765 start_sequence ();
9766 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9767 &second_test, &bypass_test);
9768 compare_seq = get_insns ();
9769 end_sequence ();
9770
9771 if (second_test || bypass_test)
9772 return false;
9773 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9774 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9775 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9776 else
9777 code = GET_CODE (compare_op);
9778 if (code != LTU && code != GEU)
9779 return false;
9780 emit_insn (compare_seq);
9781 *pop = compare_op;
9782 return true;
9783 }
9784 if (!INTEGRAL_MODE_P (mode))
4977bab6
ZW
9785 return false;
9786 switch (code)
9787 {
9788 case LTU:
9789 case GEU:
9790 break;
9791
9792 /* Convert a==0 into (unsigned)a<1. */
9793 case EQ:
9794 case NE:
9795 if (op1 != const0_rtx)
9796 return false;
9797 op1 = const1_rtx;
9798 code = (code == EQ ? LTU : GEU);
9799 break;
9800
9801 /* Convert a>b into b<a or a>=b-1. */
9802 case GTU:
9803 case LEU:
9804 if (GET_CODE (op1) == CONST_INT)
9805 {
9806 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9807 /* Bail out on overflow. We still can swap operands but that
43f3a59d 9808 would force loading of the constant into register. */
4977bab6
ZW
9809 if (op1 == const0_rtx
9810 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9811 return false;
9812 code = (code == GTU ? GEU : LTU);
9813 }
9814 else
9815 {
9816 rtx tmp = op1;
9817 op1 = op0;
9818 op0 = tmp;
9819 code = (code == GTU ? LTU : GEU);
9820 }
9821 break;
9822
ccea753c 9823 /* Convert a>=0 into (unsigned)a<0x80000000. */
4977bab6
ZW
9824 case LT:
9825 case GE:
9826 if (mode == DImode || op1 != const0_rtx)
9827 return false;
ccea753c 9828 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
4977bab6
ZW
9829 code = (code == LT ? GEU : LTU);
9830 break;
9831 case LE:
9832 case GT:
9833 if (mode == DImode || op1 != constm1_rtx)
9834 return false;
ccea753c 9835 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
4977bab6
ZW
9836 code = (code == LE ? GEU : LTU);
9837 break;
9838
9839 default:
9840 return false;
9841 }
ebe75517
JH
9842 /* Swapping operands may cause constant to appear as first operand. */
9843 if (!nonimmediate_operand (op0, VOIDmode))
9844 {
9845 if (no_new_pseudos)
9846 return false;
9847 op0 = force_reg (mode, op0);
9848 }
4977bab6
ZW
9849 ix86_compare_op0 = op0;
9850 ix86_compare_op1 = op1;
9851 *pop = ix86_expand_compare (code, NULL, NULL);
9852 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9853 abort ();
9854 return true;
9855}
9856
32b5b1aa 9857int
b96a374d 9858ix86_expand_int_movcc (rtx operands[])
32b5b1aa 9859{
e075ae69
RH
9860 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9861 rtx compare_seq, compare_op;
a1b8572c 9862 rtx second_test, bypass_test;
635559ab 9863 enum machine_mode mode = GET_MODE (operands[0]);
4977bab6 9864 bool sign_bit_compare_p = false;;
3a3677ff 9865
e075ae69 9866 start_sequence ();
a1b8572c 9867 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
2f937369 9868 compare_seq = get_insns ();
e075ae69
RH
9869 end_sequence ();
9870
9871 compare_code = GET_CODE (compare_op);
9872
4977bab6
ZW
9873 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9874 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9875 sign_bit_compare_p = true;
9876
e075ae69
RH
9877 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9878 HImode insns, we'd be swallowed in word prefix ops. */
9879
4977bab6 9880 if ((mode != HImode || TARGET_FAST_PREFIX)
635559ab 9881 && (mode != DImode || TARGET_64BIT)
0f290768 9882 && GET_CODE (operands[2]) == CONST_INT
e075ae69
RH
9883 && GET_CODE (operands[3]) == CONST_INT)
9884 {
9885 rtx out = operands[0];
9886 HOST_WIDE_INT ct = INTVAL (operands[2]);
9887 HOST_WIDE_INT cf = INTVAL (operands[3]);
9888 HOST_WIDE_INT diff;
9889
4977bab6
ZW
9890 diff = ct - cf;
9891 /* Sign bit compares are better done using shifts than we do by using
b96a374d 9892 sbb. */
4977bab6
ZW
9893 if (sign_bit_compare_p
9894 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9895 ix86_compare_op1, &compare_op))
e075ae69 9896 {
e075ae69
RH
9897 /* Detect overlap between destination and compare sources. */
9898 rtx tmp = out;
9899
4977bab6 9900 if (!sign_bit_compare_p)
36583fea 9901 {
e6e81735
JH
9902 bool fpcmp = false;
9903
4977bab6
ZW
9904 compare_code = GET_CODE (compare_op);
9905
e6e81735
JH
9906 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9907 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9908 {
9909 fpcmp = true;
9910 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9911 }
9912
4977bab6
ZW
9913 /* To simplify rest of code, restrict to the GEU case. */
9914 if (compare_code == LTU)
9915 {
9916 HOST_WIDE_INT tmp = ct;
9917 ct = cf;
9918 cf = tmp;
9919 compare_code = reverse_condition (compare_code);
9920 code = reverse_condition (code);
9921 }
e6e81735
JH
9922 else
9923 {
9924 if (fpcmp)
9925 PUT_CODE (compare_op,
9926 reverse_condition_maybe_unordered
9927 (GET_CODE (compare_op)));
9928 else
9929 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9930 }
4977bab6 9931 diff = ct - cf;
36583fea 9932
4977bab6
ZW
9933 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9934 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9935 tmp = gen_reg_rtx (mode);
e075ae69 9936
4977bab6 9937 if (mode == DImode)
e6e81735 9938 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
4977bab6 9939 else
e6e81735 9940 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
4977bab6 9941 }
14f73b5a 9942 else
4977bab6
ZW
9943 {
9944 if (code == GT || code == GE)
9945 code = reverse_condition (code);
9946 else
9947 {
9948 HOST_WIDE_INT tmp = ct;
9949 ct = cf;
9950 cf = tmp;
5fb48685 9951 diff = ct - cf;
4977bab6
ZW
9952 }
9953 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9954 ix86_compare_op1, VOIDmode, 0, -1);
9955 }
e075ae69 9956
36583fea
JH
9957 if (diff == 1)
9958 {
9959 /*
9960 * cmpl op0,op1
9961 * sbbl dest,dest
9962 * [addl dest, ct]
9963 *
9964 * Size 5 - 8.
9965 */
9966 if (ct)
b96a374d 9967 tmp = expand_simple_binop (mode, PLUS,
635559ab 9968 tmp, GEN_INT (ct),
4977bab6 9969 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
9970 }
9971 else if (cf == -1)
9972 {
9973 /*
9974 * cmpl op0,op1
9975 * sbbl dest,dest
9976 * orl $ct, dest
9977 *
9978 * Size 8.
9979 */
635559ab
JH
9980 tmp = expand_simple_binop (mode, IOR,
9981 tmp, GEN_INT (ct),
4977bab6 9982 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
9983 }
9984 else if (diff == -1 && ct)
9985 {
9986 /*
9987 * cmpl op0,op1
9988 * sbbl dest,dest
06ec023f 9989 * notl dest
36583fea
JH
9990 * [addl dest, cf]
9991 *
9992 * Size 8 - 11.
9993 */
4977bab6 9994 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
635559ab 9995 if (cf)
b96a374d 9996 tmp = expand_simple_binop (mode, PLUS,
4977bab6
ZW
9997 copy_rtx (tmp), GEN_INT (cf),
9998 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
9999 }
10000 else
10001 {
10002 /*
10003 * cmpl op0,op1
10004 * sbbl dest,dest
06ec023f 10005 * [notl dest]
36583fea
JH
10006 * andl cf - ct, dest
10007 * [addl dest, ct]
10008 *
10009 * Size 8 - 11.
10010 */
06ec023f
RB
10011
10012 if (cf == 0)
10013 {
10014 cf = ct;
10015 ct = 0;
4977bab6 10016 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
06ec023f
RB
10017 }
10018
635559ab 10019 tmp = expand_simple_binop (mode, AND,
4977bab6 10020 copy_rtx (tmp),
d8bf17f9 10021 gen_int_mode (cf - ct, mode),
4977bab6 10022 copy_rtx (tmp), 1, OPTAB_DIRECT);
635559ab 10023 if (ct)
b96a374d 10024 tmp = expand_simple_binop (mode, PLUS,
4977bab6
ZW
10025 copy_rtx (tmp), GEN_INT (ct),
10026 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea 10027 }
e075ae69 10028
4977bab6
ZW
10029 if (!rtx_equal_p (tmp, out))
10030 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
e075ae69
RH
10031
10032 return 1; /* DONE */
10033 }
10034
e075ae69
RH
10035 if (diff < 0)
10036 {
10037 HOST_WIDE_INT tmp;
10038 tmp = ct, ct = cf, cf = tmp;
10039 diff = -diff;
734dba19
JH
10040 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10041 {
10042 /* We may be reversing unordered compare to normal compare, that
10043 is not valid in general (we may convert non-trapping condition
10044 to trapping one), however on i386 we currently emit all
10045 comparisons unordered. */
10046 compare_code = reverse_condition_maybe_unordered (compare_code);
10047 code = reverse_condition_maybe_unordered (code);
10048 }
10049 else
10050 {
10051 compare_code = reverse_condition (compare_code);
10052 code = reverse_condition (code);
10053 }
e075ae69 10054 }
0f2a3457
JJ
10055
10056 compare_code = NIL;
10057 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
10058 && GET_CODE (ix86_compare_op1) == CONST_INT)
10059 {
10060 if (ix86_compare_op1 == const0_rtx
10061 && (code == LT || code == GE))
10062 compare_code = code;
10063 else if (ix86_compare_op1 == constm1_rtx)
10064 {
10065 if (code == LE)
10066 compare_code = LT;
10067 else if (code == GT)
10068 compare_code = GE;
10069 }
10070 }
10071
10072 /* Optimize dest = (op0 < 0) ? -1 : cf. */
10073 if (compare_code != NIL
10074 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
10075 && (cf == -1 || ct == -1))
10076 {
10077 /* If lea code below could be used, only optimize
10078 if it results in a 2 insn sequence. */
10079
10080 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
10081 || diff == 3 || diff == 5 || diff == 9)
10082 || (compare_code == LT && ct == -1)
10083 || (compare_code == GE && cf == -1))
10084 {
10085 /*
10086 * notl op1 (if necessary)
10087 * sarl $31, op1
10088 * orl cf, op1
10089 */
10090 if (ct != -1)
10091 {
10092 cf = ct;
b96a374d 10093 ct = -1;
0f2a3457
JJ
10094 code = reverse_condition (code);
10095 }
10096
10097 out = emit_store_flag (out, code, ix86_compare_op0,
10098 ix86_compare_op1, VOIDmode, 0, -1);
10099
10100 out = expand_simple_binop (mode, IOR,
10101 out, GEN_INT (cf),
10102 out, 1, OPTAB_DIRECT);
10103 if (out != operands[0])
10104 emit_move_insn (operands[0], out);
10105
10106 return 1; /* DONE */
10107 }
10108 }
10109
4977bab6 10110
635559ab
JH
10111 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
10112 || diff == 3 || diff == 5 || diff == 9)
4977bab6 10113 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
c05dbe81 10114 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
e075ae69
RH
10115 {
10116 /*
10117 * xorl dest,dest
10118 * cmpl op1,op2
10119 * setcc dest
10120 * lea cf(dest*(ct-cf)),dest
10121 *
10122 * Size 14.
10123 *
10124 * This also catches the degenerate setcc-only case.
10125 */
10126
10127 rtx tmp;
10128 int nops;
10129
10130 out = emit_store_flag (out, code, ix86_compare_op0,
10131 ix86_compare_op1, VOIDmode, 0, 1);
10132
10133 nops = 0;
97f51ac4
RB
10134 /* On x86_64 the lea instruction operates on Pmode, so we need
10135 to get arithmetics done in proper mode to match. */
e075ae69 10136 if (diff == 1)
068f5dea 10137 tmp = copy_rtx (out);
e075ae69
RH
10138 else
10139 {
885a70fd 10140 rtx out1;
068f5dea 10141 out1 = copy_rtx (out);
635559ab 10142 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
e075ae69
RH
10143 nops++;
10144 if (diff & 1)
10145 {
635559ab 10146 tmp = gen_rtx_PLUS (mode, tmp, out1);
e075ae69
RH
10147 nops++;
10148 }
10149 }
10150 if (cf != 0)
10151 {
635559ab 10152 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
e075ae69
RH
10153 nops++;
10154 }
4977bab6 10155 if (!rtx_equal_p (tmp, out))
e075ae69 10156 {
14f73b5a 10157 if (nops == 1)
a5cf80f0 10158 out = force_operand (tmp, copy_rtx (out));
e075ae69 10159 else
4977bab6 10160 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
e075ae69 10161 }
4977bab6 10162 if (!rtx_equal_p (out, operands[0]))
1985ef90 10163 emit_move_insn (operands[0], copy_rtx (out));
e075ae69
RH
10164
10165 return 1; /* DONE */
10166 }
10167
10168 /*
10169 * General case: Jumpful:
10170 * xorl dest,dest cmpl op1, op2
10171 * cmpl op1, op2 movl ct, dest
10172 * setcc dest jcc 1f
10173 * decl dest movl cf, dest
10174 * andl (cf-ct),dest 1:
10175 * addl ct,dest
0f290768 10176 *
e075ae69
RH
10177 * Size 20. Size 14.
10178 *
10179 * This is reasonably steep, but branch mispredict costs are
10180 * high on modern cpus, so consider failing only if optimizing
10181 * for space.
e075ae69
RH
10182 */
10183
4977bab6
ZW
10184 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10185 && BRANCH_COST >= 2)
e075ae69 10186 {
97f51ac4 10187 if (cf == 0)
e075ae69 10188 {
97f51ac4
RB
10189 cf = ct;
10190 ct = 0;
734dba19 10191 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
0f2a3457
JJ
10192 /* We may be reversing unordered compare to normal compare,
10193 that is not valid in general (we may convert non-trapping
10194 condition to trapping one), however on i386 we currently
10195 emit all comparisons unordered. */
10196 code = reverse_condition_maybe_unordered (code);
10197 else
10198 {
10199 code = reverse_condition (code);
10200 if (compare_code != NIL)
10201 compare_code = reverse_condition (compare_code);
10202 }
10203 }
10204
10205 if (compare_code != NIL)
10206 {
10207 /* notl op1 (if needed)
10208 sarl $31, op1
10209 andl (cf-ct), op1
b96a374d 10210 addl ct, op1
0f2a3457
JJ
10211
10212 For x < 0 (resp. x <= -1) there will be no notl,
10213 so if possible swap the constants to get rid of the
10214 complement.
10215 True/false will be -1/0 while code below (store flag
10216 followed by decrement) is 0/-1, so the constants need
10217 to be exchanged once more. */
10218
10219 if (compare_code == GE || !cf)
734dba19 10220 {
b96a374d 10221 code = reverse_condition (code);
0f2a3457 10222 compare_code = LT;
734dba19
JH
10223 }
10224 else
10225 {
0f2a3457 10226 HOST_WIDE_INT tmp = cf;
b96a374d 10227 cf = ct;
0f2a3457 10228 ct = tmp;
734dba19 10229 }
0f2a3457
JJ
10230
10231 out = emit_store_flag (out, code, ix86_compare_op0,
10232 ix86_compare_op1, VOIDmode, 0, -1);
e075ae69 10233 }
0f2a3457
JJ
10234 else
10235 {
10236 out = emit_store_flag (out, code, ix86_compare_op0,
10237 ix86_compare_op1, VOIDmode, 0, 1);
e075ae69 10238
4977bab6
ZW
10239 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10240 copy_rtx (out), 1, OPTAB_DIRECT);
0f2a3457 10241 }
e075ae69 10242
4977bab6 10243 out = expand_simple_binop (mode, AND, copy_rtx (out),
d8bf17f9 10244 gen_int_mode (cf - ct, mode),
4977bab6 10245 copy_rtx (out), 1, OPTAB_DIRECT);
97f51ac4 10246 if (ct)
4977bab6
ZW
10247 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10248 copy_rtx (out), 1, OPTAB_DIRECT);
10249 if (!rtx_equal_p (out, operands[0]))
10250 emit_move_insn (operands[0], copy_rtx (out));
e075ae69
RH
10251
10252 return 1; /* DONE */
10253 }
10254 }
10255
4977bab6 10256 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
e075ae69
RH
10257 {
10258 /* Try a few things more with specific constants and a variable. */
10259
78a0d70c 10260 optab op;
e075ae69
RH
10261 rtx var, orig_out, out, tmp;
10262
4977bab6 10263 if (BRANCH_COST <= 2)
e075ae69
RH
10264 return 0; /* FAIL */
10265
0f290768 10266 /* If one of the two operands is an interesting constant, load a
e075ae69 10267 constant with the above and mask it in with a logical operation. */
0f290768 10268
e075ae69
RH
10269 if (GET_CODE (operands[2]) == CONST_INT)
10270 {
10271 var = operands[3];
4977bab6 10272 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
e075ae69 10273 operands[3] = constm1_rtx, op = and_optab;
4977bab6 10274 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
e075ae69 10275 operands[3] = const0_rtx, op = ior_optab;
78a0d70c
ZW
10276 else
10277 return 0; /* FAIL */
e075ae69
RH
10278 }
10279 else if (GET_CODE (operands[3]) == CONST_INT)
10280 {
10281 var = operands[2];
4977bab6 10282 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
e075ae69 10283 operands[2] = constm1_rtx, op = and_optab;
4977bab6 10284 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
e075ae69 10285 operands[2] = const0_rtx, op = ior_optab;
78a0d70c
ZW
10286 else
10287 return 0; /* FAIL */
e075ae69 10288 }
78a0d70c 10289 else
e075ae69
RH
10290 return 0; /* FAIL */
10291
10292 orig_out = operands[0];
635559ab 10293 tmp = gen_reg_rtx (mode);
e075ae69
RH
10294 operands[0] = tmp;
10295
10296 /* Recurse to get the constant loaded. */
10297 if (ix86_expand_int_movcc (operands) == 0)
10298 return 0; /* FAIL */
10299
10300 /* Mask in the interesting variable. */
635559ab 10301 out = expand_binop (mode, op, var, tmp, orig_out, 0,
e075ae69 10302 OPTAB_WIDEN);
4977bab6
ZW
10303 if (!rtx_equal_p (out, orig_out))
10304 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
e075ae69
RH
10305
10306 return 1; /* DONE */
10307 }
10308
10309 /*
10310 * For comparison with above,
10311 *
10312 * movl cf,dest
10313 * movl ct,tmp
10314 * cmpl op1,op2
10315 * cmovcc tmp,dest
10316 *
10317 * Size 15.
10318 */
10319
635559ab
JH
10320 if (! nonimmediate_operand (operands[2], mode))
10321 operands[2] = force_reg (mode, operands[2]);
10322 if (! nonimmediate_operand (operands[3], mode))
10323 operands[3] = force_reg (mode, operands[3]);
e075ae69 10324
a1b8572c
JH
10325 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10326 {
635559ab 10327 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
10328 emit_move_insn (tmp, operands[3]);
10329 operands[3] = tmp;
10330 }
10331 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10332 {
635559ab 10333 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
10334 emit_move_insn (tmp, operands[2]);
10335 operands[2] = tmp;
10336 }
4977bab6 10337
c9682caf 10338 if (! register_operand (operands[2], VOIDmode)
b96a374d 10339 && (mode == QImode
4977bab6 10340 || ! register_operand (operands[3], VOIDmode)))
635559ab 10341 operands[2] = force_reg (mode, operands[2]);
a1b8572c 10342
4977bab6
ZW
10343 if (mode == QImode
10344 && ! register_operand (operands[3], VOIDmode))
10345 operands[3] = force_reg (mode, operands[3]);
10346
e075ae69
RH
10347 emit_insn (compare_seq);
10348 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 10349 gen_rtx_IF_THEN_ELSE (mode,
e075ae69
RH
10350 compare_op, operands[2],
10351 operands[3])));
a1b8572c 10352 if (bypass_test)
4977bab6 10353 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
635559ab 10354 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c 10355 bypass_test,
4977bab6
ZW
10356 copy_rtx (operands[3]),
10357 copy_rtx (operands[0]))));
a1b8572c 10358 if (second_test)
4977bab6 10359 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
635559ab 10360 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c 10361 second_test,
4977bab6
ZW
10362 copy_rtx (operands[2]),
10363 copy_rtx (operands[0]))));
e075ae69
RH
10364
10365 return 1; /* DONE */
e9a25f70 10366}
e075ae69 10367
32b5b1aa 10368int
b96a374d 10369ix86_expand_fp_movcc (rtx operands[])
32b5b1aa 10370{
e075ae69 10371 enum rtx_code code;
e075ae69 10372 rtx tmp;
a1b8572c 10373 rtx compare_op, second_test, bypass_test;
32b5b1aa 10374
0073023d
JH
10375 /* For SF/DFmode conditional moves based on comparisons
10376 in same mode, we may want to use SSE min/max instructions. */
965f5423
JH
10377 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
10378 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
0073023d 10379 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
fa9f36a1
JH
10380 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10381 && (!TARGET_IEEE_FP
10382 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
0073023d
JH
10383 /* We may be called from the post-reload splitter. */
10384 && (!REG_P (operands[0])
10385 || SSE_REG_P (operands[0])
52a661a6 10386 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
0073023d
JH
10387 {
10388 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10389 code = GET_CODE (operands[1]);
10390
10391 /* See if we have (cross) match between comparison operands and
10392 conditional move operands. */
10393 if (rtx_equal_p (operands[2], op1))
10394 {
10395 rtx tmp = op0;
10396 op0 = op1;
10397 op1 = tmp;
10398 code = reverse_condition_maybe_unordered (code);
10399 }
10400 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10401 {
10402 /* Check for min operation. */
4977bab6 10403 if (code == LT || code == UNLE)
0073023d 10404 {
4977bab6
ZW
10405 if (code == UNLE)
10406 {
10407 rtx tmp = op0;
10408 op0 = op1;
10409 op1 = tmp;
10410 }
0073023d
JH
10411 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10412 if (memory_operand (op0, VOIDmode))
10413 op0 = force_reg (GET_MODE (operands[0]), op0);
10414 if (GET_MODE (operands[0]) == SFmode)
10415 emit_insn (gen_minsf3 (operands[0], op0, op1));
10416 else
10417 emit_insn (gen_mindf3 (operands[0], op0, op1));
10418 return 1;
10419 }
10420 /* Check for max operation. */
4977bab6 10421 if (code == GT || code == UNGE)
0073023d 10422 {
4977bab6
ZW
10423 if (code == UNGE)
10424 {
10425 rtx tmp = op0;
10426 op0 = op1;
10427 op1 = tmp;
10428 }
0073023d
JH
10429 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10430 if (memory_operand (op0, VOIDmode))
10431 op0 = force_reg (GET_MODE (operands[0]), op0);
10432 if (GET_MODE (operands[0]) == SFmode)
10433 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10434 else
10435 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10436 return 1;
10437 }
10438 }
10439 /* Manage condition to be sse_comparison_operator. In case we are
10440 in non-ieee mode, try to canonicalize the destination operand
10441 to be first in the comparison - this helps reload to avoid extra
10442 moves. */
10443 if (!sse_comparison_operator (operands[1], VOIDmode)
10444 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10445 {
10446 rtx tmp = ix86_compare_op0;
10447 ix86_compare_op0 = ix86_compare_op1;
10448 ix86_compare_op1 = tmp;
10449 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10450 VOIDmode, ix86_compare_op0,
10451 ix86_compare_op1);
10452 }
d1f87653 10453 /* Similarly try to manage result to be first operand of conditional
fa9f36a1
JH
10454 move. We also don't support the NE comparison on SSE, so try to
10455 avoid it. */
037f20f1
JH
10456 if ((rtx_equal_p (operands[0], operands[3])
10457 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10458 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
0073023d
JH
10459 {
10460 rtx tmp = operands[2];
10461 operands[2] = operands[3];
92d0fb09 10462 operands[3] = tmp;
0073023d
JH
10463 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10464 (GET_CODE (operands[1])),
10465 VOIDmode, ix86_compare_op0,
10466 ix86_compare_op1);
10467 }
10468 if (GET_MODE (operands[0]) == SFmode)
10469 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10470 operands[2], operands[3],
10471 ix86_compare_op0, ix86_compare_op1));
10472 else
10473 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10474 operands[2], operands[3],
10475 ix86_compare_op0, ix86_compare_op1));
10476 return 1;
10477 }
10478
e075ae69 10479 /* The floating point conditional move instructions don't directly
0f290768 10480 support conditions resulting from a signed integer comparison. */
32b5b1aa 10481
e075ae69 10482 code = GET_CODE (operands[1]);
a1b8572c 10483 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9e7adcb3
JH
10484
10485 /* The floating point conditional move instructions don't directly
10486 support signed integer comparisons. */
10487
a1b8572c 10488 if (!fcmov_comparison_operator (compare_op, VOIDmode))
e075ae69 10489 {
a1b8572c 10490 if (second_test != NULL || bypass_test != NULL)
b531087a 10491 abort ();
e075ae69 10492 tmp = gen_reg_rtx (QImode);
3a3677ff 10493 ix86_expand_setcc (code, tmp);
e075ae69
RH
10494 code = NE;
10495 ix86_compare_op0 = tmp;
10496 ix86_compare_op1 = const0_rtx;
a1b8572c
JH
10497 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10498 }
10499 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10500 {
10501 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10502 emit_move_insn (tmp, operands[3]);
10503 operands[3] = tmp;
10504 }
10505 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10506 {
10507 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10508 emit_move_insn (tmp, operands[2]);
10509 operands[2] = tmp;
e075ae69 10510 }
e9a25f70 10511
e075ae69
RH
10512 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10513 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9e7adcb3 10514 compare_op,
e075ae69
RH
10515 operands[2],
10516 operands[3])));
a1b8572c
JH
10517 if (bypass_test)
10518 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10519 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10520 bypass_test,
10521 operands[3],
10522 operands[0])));
10523 if (second_test)
10524 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10525 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10526 second_test,
10527 operands[2],
10528 operands[0])));
32b5b1aa 10529
e075ae69 10530 return 1;
32b5b1aa
SC
10531}
10532
7b52eede
JH
10533/* Expand conditional increment or decrement using adb/sbb instructions.
10534 The default case using setcc followed by the conditional move can be
10535 done by generic code. */
10536int
b96a374d 10537ix86_expand_int_addcc (rtx operands[])
7b52eede
JH
10538{
10539 enum rtx_code code = GET_CODE (operands[1]);
10540 rtx compare_op;
10541 rtx val = const0_rtx;
e6e81735 10542 bool fpcmp = false;
e6e81735 10543 enum machine_mode mode = GET_MODE (operands[0]);
7b52eede
JH
10544
10545 if (operands[3] != const1_rtx
10546 && operands[3] != constm1_rtx)
10547 return 0;
10548 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10549 ix86_compare_op1, &compare_op))
10550 return 0;
e6e81735
JH
10551 code = GET_CODE (compare_op);
10552
10553 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10554 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10555 {
10556 fpcmp = true;
10557 code = ix86_fp_compare_code_to_integer (code);
10558 }
10559
10560 if (code != LTU)
10561 {
10562 val = constm1_rtx;
10563 if (fpcmp)
10564 PUT_CODE (compare_op,
10565 reverse_condition_maybe_unordered
10566 (GET_CODE (compare_op)));
10567 else
10568 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10569 }
10570 PUT_MODE (compare_op, mode);
10571
10572 /* Construct either adc or sbb insn. */
10573 if ((code == LTU) == (operands[3] == constm1_rtx))
7b52eede
JH
10574 {
10575 switch (GET_MODE (operands[0]))
10576 {
10577 case QImode:
e6e81735 10578 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10579 break;
10580 case HImode:
e6e81735 10581 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10582 break;
10583 case SImode:
e6e81735 10584 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10585 break;
10586 case DImode:
e6e81735 10587 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
7b52eede
JH
10588 break;
10589 default:
10590 abort ();
10591 }
10592 }
10593 else
10594 {
10595 switch (GET_MODE (operands[0]))
10596 {
10597 case QImode:
e6e81735 10598 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10599 break;
10600 case HImode:
e6e81735 10601 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10602 break;
10603 case SImode:
e6e81735 10604 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10605 break;
10606 case DImode:
e6e81735 10607 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
7b52eede
JH
10608 break;
10609 default:
10610 abort ();
10611 }
10612 }
10613 return 1; /* DONE */
10614}
10615
10616
2450a057
JH
10617/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10618 works for floating pointer parameters and nonoffsetable memories.
10619 For pushes, it returns just stack offsets; the values will be saved
10620 in the right order. Maximally three parts are generated. */
10621
2b589241 10622static int
b96a374d 10623ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
32b5b1aa 10624{
26e5b205
JH
10625 int size;
10626
10627 if (!TARGET_64BIT)
f8a1ebc6 10628 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
26e5b205
JH
10629 else
10630 size = (GET_MODE_SIZE (mode) + 4) / 8;
2450a057 10631
a7180f70
BS
10632 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10633 abort ();
2450a057
JH
10634 if (size < 2 || size > 3)
10635 abort ();
10636
f996902d
RH
10637 /* Optimize constant pool reference to immediates. This is used by fp
10638 moves, that force all constants to memory to allow combining. */
10639 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10640 {
10641 rtx tmp = maybe_get_pool_constant (operand);
10642 if (tmp)
10643 operand = tmp;
10644 }
d7a29404 10645
2450a057 10646 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
e075ae69 10647 {
2450a057
JH
10648 /* The only non-offsetable memories we handle are pushes. */
10649 if (! push_operand (operand, VOIDmode))
10650 abort ();
10651
26e5b205
JH
10652 operand = copy_rtx (operand);
10653 PUT_MODE (operand, Pmode);
2450a057
JH
10654 parts[0] = parts[1] = parts[2] = operand;
10655 }
26e5b205 10656 else if (!TARGET_64BIT)
2450a057
JH
10657 {
10658 if (mode == DImode)
10659 split_di (&operand, 1, &parts[0], &parts[1]);
10660 else
e075ae69 10661 {
2450a057
JH
10662 if (REG_P (operand))
10663 {
10664 if (!reload_completed)
10665 abort ();
10666 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10667 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10668 if (size == 3)
10669 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10670 }
10671 else if (offsettable_memref_p (operand))
10672 {
f4ef873c 10673 operand = adjust_address (operand, SImode, 0);
2450a057 10674 parts[0] = operand;
b72f00af 10675 parts[1] = adjust_address (operand, SImode, 4);
2450a057 10676 if (size == 3)
b72f00af 10677 parts[2] = adjust_address (operand, SImode, 8);
2450a057
JH
10678 }
10679 else if (GET_CODE (operand) == CONST_DOUBLE)
10680 {
10681 REAL_VALUE_TYPE r;
2b589241 10682 long l[4];
2450a057
JH
10683
10684 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10685 switch (mode)
10686 {
10687 case XFmode:
10688 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
d8bf17f9 10689 parts[2] = gen_int_mode (l[2], SImode);
2450a057
JH
10690 break;
10691 case DFmode:
10692 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10693 break;
10694 default:
10695 abort ();
10696 }
d8bf17f9
LB
10697 parts[1] = gen_int_mode (l[1], SImode);
10698 parts[0] = gen_int_mode (l[0], SImode);
2450a057
JH
10699 }
10700 else
10701 abort ();
e075ae69 10702 }
2450a057 10703 }
26e5b205
JH
10704 else
10705 {
44cf5b6a
JH
10706 if (mode == TImode)
10707 split_ti (&operand, 1, &parts[0], &parts[1]);
26e5b205
JH
10708 if (mode == XFmode || mode == TFmode)
10709 {
f8a1ebc6 10710 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
26e5b205
JH
10711 if (REG_P (operand))
10712 {
10713 if (!reload_completed)
10714 abort ();
10715 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
f8a1ebc6 10716 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
26e5b205
JH
10717 }
10718 else if (offsettable_memref_p (operand))
10719 {
b72f00af 10720 operand = adjust_address (operand, DImode, 0);
26e5b205 10721 parts[0] = operand;
f8a1ebc6 10722 parts[1] = adjust_address (operand, upper_mode, 8);
26e5b205
JH
10723 }
10724 else if (GET_CODE (operand) == CONST_DOUBLE)
10725 {
10726 REAL_VALUE_TYPE r;
10727 long l[3];
10728
10729 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9953b5e1 10730 real_to_target (l, &r, mode);
26e5b205
JH
10731 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10732 if (HOST_BITS_PER_WIDE_INT >= 64)
523fbd9d 10733 parts[0]
d8bf17f9 10734 = gen_int_mode
44cf5b6a 10735 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
b531087a 10736 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
d8bf17f9 10737 DImode);
26e5b205
JH
10738 else
10739 parts[0] = immed_double_const (l[0], l[1], DImode);
f8a1ebc6
JH
10740 if (upper_mode == SImode)
10741 parts[1] = gen_int_mode (l[2], SImode);
10742 else if (HOST_BITS_PER_WIDE_INT >= 64)
10743 parts[1]
10744 = gen_int_mode
10745 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10746 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10747 DImode);
10748 else
10749 parts[1] = immed_double_const (l[2], l[3], DImode);
26e5b205
JH
10750 }
10751 else
10752 abort ();
10753 }
10754 }
2450a057 10755
2b589241 10756 return size;
2450a057
JH
10757}
10758
10759/* Emit insns to perform a move or push of DI, DF, and XF values.
10760 Return false when normal moves are needed; true when all required
10761 insns have been emitted. Operands 2-4 contain the input values
10762 int the correct order; operands 5-7 contain the output values. */
10763
26e5b205 10764void
b96a374d 10765ix86_split_long_move (rtx operands[])
2450a057
JH
10766{
10767 rtx part[2][3];
26e5b205 10768 int nparts;
2450a057
JH
10769 int push = 0;
10770 int collisions = 0;
26e5b205
JH
10771 enum machine_mode mode = GET_MODE (operands[0]);
10772
10773 /* The DFmode expanders may ask us to move double.
10774 For 64bit target this is single move. By hiding the fact
10775 here we simplify i386.md splitters. */
10776 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10777 {
8cdfa312
RH
10778 /* Optimize constant pool reference to immediates. This is used by
10779 fp moves, that force all constants to memory to allow combining. */
26e5b205
JH
10780
10781 if (GET_CODE (operands[1]) == MEM
10782 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10783 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10784 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10785 if (push_operand (operands[0], VOIDmode))
b47b4f21
JH
10786 {
10787 operands[0] = copy_rtx (operands[0]);
10788 PUT_MODE (operands[0], Pmode);
10789 }
26e5b205
JH
10790 else
10791 operands[0] = gen_lowpart (DImode, operands[0]);
10792 operands[1] = gen_lowpart (DImode, operands[1]);
10793 emit_move_insn (operands[0], operands[1]);
10794 return;
10795 }
2450a057 10796
2450a057
JH
10797 /* The only non-offsettable memory we handle is push. */
10798 if (push_operand (operands[0], VOIDmode))
10799 push = 1;
10800 else if (GET_CODE (operands[0]) == MEM
10801 && ! offsettable_memref_p (operands[0]))
10802 abort ();
10803
26e5b205
JH
10804 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10805 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
2450a057
JH
10806
10807 /* When emitting push, take care for source operands on the stack. */
10808 if (push && GET_CODE (operands[1]) == MEM
10809 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10810 {
26e5b205 10811 if (nparts == 3)
886cbb88
JH
10812 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10813 XEXP (part[1][2], 0));
10814 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10815 XEXP (part[1][1], 0));
2450a057
JH
10816 }
10817
0f290768 10818 /* We need to do copy in the right order in case an address register
2450a057
JH
10819 of the source overlaps the destination. */
10820 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10821 {
10822 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10823 collisions++;
10824 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10825 collisions++;
26e5b205 10826 if (nparts == 3
2450a057
JH
10827 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10828 collisions++;
10829
10830 /* Collision in the middle part can be handled by reordering. */
26e5b205 10831 if (collisions == 1 && nparts == 3
2450a057 10832 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
e075ae69 10833 {
2450a057
JH
10834 rtx tmp;
10835 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10836 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10837 }
e075ae69 10838
2450a057
JH
10839 /* If there are more collisions, we can't handle it by reordering.
10840 Do an lea to the last part and use only one colliding move. */
10841 else if (collisions > 1)
10842 {
8231b3f9
RH
10843 rtx base;
10844
2450a057 10845 collisions = 1;
8231b3f9
RH
10846
10847 base = part[0][nparts - 1];
10848
10849 /* Handle the case when the last part isn't valid for lea.
10850 Happens in 64-bit mode storing the 12-byte XFmode. */
10851 if (GET_MODE (base) != Pmode)
10852 base = gen_rtx_REG (Pmode, REGNO (base));
10853
10854 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10855 part[1][0] = replace_equiv_address (part[1][0], base);
10856 part[1][1] = replace_equiv_address (part[1][1],
10857 plus_constant (base, UNITS_PER_WORD));
26e5b205 10858 if (nparts == 3)
8231b3f9
RH
10859 part[1][2] = replace_equiv_address (part[1][2],
10860 plus_constant (base, 8));
2450a057
JH
10861 }
10862 }
10863
10864 if (push)
10865 {
26e5b205 10866 if (!TARGET_64BIT)
2b589241 10867 {
26e5b205
JH
10868 if (nparts == 3)
10869 {
f8a1ebc6
JH
10870 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10871 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
26e5b205
JH
10872 emit_move_insn (part[0][2], part[1][2]);
10873 }
2b589241 10874 }
26e5b205
JH
10875 else
10876 {
10877 /* In 64bit mode we don't have 32bit push available. In case this is
10878 register, it is OK - we will just use larger counterpart. We also
10879 retype memory - these comes from attempt to avoid REX prefix on
10880 moving of second half of TFmode value. */
10881 if (GET_MODE (part[1][1]) == SImode)
10882 {
10883 if (GET_CODE (part[1][1]) == MEM)
f4ef873c 10884 part[1][1] = adjust_address (part[1][1], DImode, 0);
26e5b205
JH
10885 else if (REG_P (part[1][1]))
10886 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10887 else
b531087a 10888 abort ();
886cbb88
JH
10889 if (GET_MODE (part[1][0]) == SImode)
10890 part[1][0] = part[1][1];
26e5b205
JH
10891 }
10892 }
10893 emit_move_insn (part[0][1], part[1][1]);
10894 emit_move_insn (part[0][0], part[1][0]);
10895 return;
2450a057
JH
10896 }
10897
10898 /* Choose correct order to not overwrite the source before it is copied. */
10899 if ((REG_P (part[0][0])
10900 && REG_P (part[1][1])
10901 && (REGNO (part[0][0]) == REGNO (part[1][1])
26e5b205 10902 || (nparts == 3
2450a057
JH
10903 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10904 || (collisions > 0
10905 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10906 {
26e5b205 10907 if (nparts == 3)
2450a057 10908 {
26e5b205
JH
10909 operands[2] = part[0][2];
10910 operands[3] = part[0][1];
10911 operands[4] = part[0][0];
10912 operands[5] = part[1][2];
10913 operands[6] = part[1][1];
10914 operands[7] = part[1][0];
2450a057
JH
10915 }
10916 else
10917 {
26e5b205
JH
10918 operands[2] = part[0][1];
10919 operands[3] = part[0][0];
10920 operands[5] = part[1][1];
10921 operands[6] = part[1][0];
2450a057
JH
10922 }
10923 }
10924 else
10925 {
26e5b205 10926 if (nparts == 3)
2450a057 10927 {
26e5b205
JH
10928 operands[2] = part[0][0];
10929 operands[3] = part[0][1];
10930 operands[4] = part[0][2];
10931 operands[5] = part[1][0];
10932 operands[6] = part[1][1];
10933 operands[7] = part[1][2];
2450a057
JH
10934 }
10935 else
10936 {
26e5b205
JH
10937 operands[2] = part[0][0];
10938 operands[3] = part[0][1];
10939 operands[5] = part[1][0];
10940 operands[6] = part[1][1];
e075ae69
RH
10941 }
10942 }
26e5b205
JH
10943 emit_move_insn (operands[2], operands[5]);
10944 emit_move_insn (operands[3], operands[6]);
10945 if (nparts == 3)
10946 emit_move_insn (operands[4], operands[7]);
32b5b1aa 10947
26e5b205 10948 return;
32b5b1aa 10949}
32b5b1aa 10950
e075ae69 10951void
b96a374d 10952ix86_split_ashldi (rtx *operands, rtx scratch)
32b5b1aa 10953{
e075ae69
RH
10954 rtx low[2], high[2];
10955 int count;
b985a30f 10956
e075ae69
RH
10957 if (GET_CODE (operands[2]) == CONST_INT)
10958 {
10959 split_di (operands, 2, low, high);
10960 count = INTVAL (operands[2]) & 63;
32b5b1aa 10961
e075ae69
RH
10962 if (count >= 32)
10963 {
10964 emit_move_insn (high[0], low[1]);
10965 emit_move_insn (low[0], const0_rtx);
b985a30f 10966
e075ae69
RH
10967 if (count > 32)
10968 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10969 }
10970 else
10971 {
10972 if (!rtx_equal_p (operands[0], operands[1]))
10973 emit_move_insn (operands[0], operands[1]);
10974 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10975 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10976 }
10977 }
10978 else
10979 {
10980 if (!rtx_equal_p (operands[0], operands[1]))
10981 emit_move_insn (operands[0], operands[1]);
b985a30f 10982
e075ae69 10983 split_di (operands, 1, low, high);
b985a30f 10984
e075ae69
RH
10985 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10986 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
32b5b1aa 10987
fe577e58 10988 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 10989 {
fe577e58 10990 if (! no_new_pseudos)
e075ae69
RH
10991 scratch = force_reg (SImode, const0_rtx);
10992 else
10993 emit_move_insn (scratch, const0_rtx);
10994
10995 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10996 scratch));
10997 }
10998 else
10999 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
11000 }
e9a25f70 11001}
32b5b1aa 11002
e075ae69 11003void
b96a374d 11004ix86_split_ashrdi (rtx *operands, rtx scratch)
32b5b1aa 11005{
e075ae69
RH
11006 rtx low[2], high[2];
11007 int count;
32b5b1aa 11008
e075ae69
RH
11009 if (GET_CODE (operands[2]) == CONST_INT)
11010 {
11011 split_di (operands, 2, low, high);
11012 count = INTVAL (operands[2]) & 63;
32b5b1aa 11013
8937b6a2
RS
11014 if (count == 63)
11015 {
11016 emit_move_insn (high[0], high[1]);
11017 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
11018 emit_move_insn (low[0], high[0]);
11019
11020 }
11021 else if (count >= 32)
e075ae69
RH
11022 {
11023 emit_move_insn (low[0], high[1]);
32b5b1aa 11024
e075ae69
RH
11025 if (! reload_completed)
11026 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
11027 else
11028 {
11029 emit_move_insn (high[0], low[0]);
11030 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
11031 }
11032
11033 if (count > 32)
11034 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
11035 }
11036 else
11037 {
11038 if (!rtx_equal_p (operands[0], operands[1]))
11039 emit_move_insn (operands[0], operands[1]);
11040 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
11041 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
11042 }
11043 }
11044 else
32b5b1aa 11045 {
e075ae69
RH
11046 if (!rtx_equal_p (operands[0], operands[1]))
11047 emit_move_insn (operands[0], operands[1]);
11048
11049 split_di (operands, 1, low, high);
11050
11051 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
11052 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
11053
fe577e58 11054 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 11055 {
fe577e58 11056 if (! no_new_pseudos)
e075ae69
RH
11057 scratch = gen_reg_rtx (SImode);
11058 emit_move_insn (scratch, high[0]);
11059 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
11060 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
11061 scratch));
11062 }
11063 else
11064 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
32b5b1aa 11065 }
e075ae69 11066}
32b5b1aa 11067
e075ae69 11068void
b96a374d 11069ix86_split_lshrdi (rtx *operands, rtx scratch)
e075ae69
RH
11070{
11071 rtx low[2], high[2];
11072 int count;
32b5b1aa 11073
e075ae69 11074 if (GET_CODE (operands[2]) == CONST_INT)
32b5b1aa 11075 {
e075ae69
RH
11076 split_di (operands, 2, low, high);
11077 count = INTVAL (operands[2]) & 63;
11078
11079 if (count >= 32)
c7271385 11080 {
e075ae69
RH
11081 emit_move_insn (low[0], high[1]);
11082 emit_move_insn (high[0], const0_rtx);
32b5b1aa 11083
e075ae69
RH
11084 if (count > 32)
11085 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
11086 }
11087 else
11088 {
11089 if (!rtx_equal_p (operands[0], operands[1]))
11090 emit_move_insn (operands[0], operands[1]);
11091 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
11092 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
11093 }
32b5b1aa 11094 }
e075ae69
RH
11095 else
11096 {
11097 if (!rtx_equal_p (operands[0], operands[1]))
11098 emit_move_insn (operands[0], operands[1]);
32b5b1aa 11099
e075ae69
RH
11100 split_di (operands, 1, low, high);
11101
11102 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
11103 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
11104
11105 /* Heh. By reversing the arguments, we can reuse this pattern. */
fe577e58 11106 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 11107 {
fe577e58 11108 if (! no_new_pseudos)
e075ae69
RH
11109 scratch = force_reg (SImode, const0_rtx);
11110 else
11111 emit_move_insn (scratch, const0_rtx);
11112
11113 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
11114 scratch));
11115 }
11116 else
11117 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
11118 }
32b5b1aa 11119}
3f803cd9 11120
0407c02b 11121/* Helper function for the string operations below. Dest VARIABLE whether
0945b39d
JH
11122 it is aligned to VALUE bytes. If true, jump to the label. */
11123static rtx
b96a374d 11124ix86_expand_aligntest (rtx variable, int value)
0945b39d
JH
11125{
11126 rtx label = gen_label_rtx ();
11127 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
11128 if (GET_MODE (variable) == DImode)
11129 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
11130 else
11131 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
11132 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
d43e0b7d 11133 1, label);
0945b39d
JH
11134 return label;
11135}
11136
11137/* Adjust COUNTER by the VALUE. */
11138static void
b96a374d 11139ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
0945b39d
JH
11140{
11141 if (GET_MODE (countreg) == DImode)
11142 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
11143 else
11144 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
11145}
11146
11147/* Zero extend possibly SImode EXP to Pmode register. */
d24b3457 11148rtx
b96a374d 11149ix86_zero_extend_to_Pmode (rtx exp)
0945b39d
JH
11150{
11151 rtx r;
11152 if (GET_MODE (exp) == VOIDmode)
11153 return force_reg (Pmode, exp);
11154 if (GET_MODE (exp) == Pmode)
11155 return copy_to_mode_reg (Pmode, exp);
11156 r = gen_reg_rtx (Pmode);
11157 emit_insn (gen_zero_extendsidi2 (r, exp));
11158 return r;
11159}
11160
11161/* Expand string move (memcpy) operation. Use i386 string operations when
70128ad9 11162 profitable. expand_clrmem contains similar code. */
0945b39d 11163int
70128ad9 11164ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
0945b39d 11165{
4e44c1ef 11166 rtx srcreg, destreg, countreg, srcexp, destexp;
0945b39d
JH
11167 enum machine_mode counter_mode;
11168 HOST_WIDE_INT align = 0;
11169 unsigned HOST_WIDE_INT count = 0;
0945b39d 11170
0945b39d
JH
11171 if (GET_CODE (align_exp) == CONST_INT)
11172 align = INTVAL (align_exp);
11173
d0a5295a
RH
11174 /* Can't use any of this if the user has appropriated esi or edi. */
11175 if (global_regs[4] || global_regs[5])
11176 return 0;
11177
5519a4f9 11178 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
11179 if (!TARGET_ALIGN_STRINGOPS)
11180 align = 64;
11181
11182 if (GET_CODE (count_exp) == CONST_INT)
26771da7
JH
11183 {
11184 count = INTVAL (count_exp);
11185 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11186 return 0;
11187 }
0945b39d
JH
11188
11189 /* Figure out proper mode for counter. For 32bits it is always SImode,
11190 for 64bits use SImode when possible, otherwise DImode.
11191 Set count to number of bytes copied when known at compile time. */
11192 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11193 || x86_64_zero_extended_value (count_exp))
11194 counter_mode = SImode;
11195 else
11196 counter_mode = DImode;
11197
11198 if (counter_mode != SImode && counter_mode != DImode)
11199 abort ();
11200
11201 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
4e44c1ef
JJ
11202 if (destreg != XEXP (dst, 0))
11203 dst = replace_equiv_address_nv (dst, destreg);
0945b39d 11204 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
4e44c1ef
JJ
11205 if (srcreg != XEXP (src, 0))
11206 src = replace_equiv_address_nv (src, srcreg);
0945b39d
JH
11207
11208 /* When optimizing for size emit simple rep ; movsb instruction for
11209 counts not divisible by 4. */
11210
11211 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11212 {
4e44c1ef 11213 emit_insn (gen_cld ());
0945b39d 11214 countreg = ix86_zero_extend_to_Pmode (count_exp);
4e44c1ef
JJ
11215 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11216 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
11217 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
11218 destexp, srcexp));
0945b39d
JH
11219 }
11220
11221 /* For constant aligned (or small unaligned) copies use rep movsl
11222 followed by code copying the rest. For PentiumPro ensure 8 byte
11223 alignment to allow rep movsl acceleration. */
11224
11225 else if (count != 0
11226 && (align >= 8
11227 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 11228 || optimize_size || count < (unsigned int) 64))
0945b39d 11229 {
4e44c1ef 11230 unsigned HOST_WIDE_INT offset = 0;
0945b39d 11231 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
4e44c1ef
JJ
11232 rtx srcmem, dstmem;
11233
11234 emit_insn (gen_cld ());
0945b39d
JH
11235 if (count & ~(size - 1))
11236 {
11237 countreg = copy_to_mode_reg (counter_mode,
11238 GEN_INT ((count >> (size == 4 ? 2 : 3))
11239 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11240 countreg = ix86_zero_extend_to_Pmode (countreg);
4e44c1ef
JJ
11241
11242 destexp = gen_rtx_ASHIFT (Pmode, countreg,
11243 GEN_INT (size == 4 ? 2 : 3));
11244 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11245 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11246
11247 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11248 countreg, destexp, srcexp));
11249 offset = count & ~(size - 1);
0945b39d
JH
11250 }
11251 if (size == 8 && (count & 0x04))
4e44c1ef
JJ
11252 {
11253 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
11254 offset);
11255 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
11256 offset);
11257 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11258 offset += 4;
11259 }
0945b39d 11260 if (count & 0x02)
4e44c1ef
JJ
11261 {
11262 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
11263 offset);
11264 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
11265 offset);
11266 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11267 offset += 2;
11268 }
0945b39d 11269 if (count & 0x01)
4e44c1ef
JJ
11270 {
11271 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
11272 offset);
11273 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
11274 offset);
11275 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11276 }
0945b39d
JH
11277 }
11278 /* The generic code based on the glibc implementation:
11279 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11280 allowing accelerated copying there)
11281 - copy the data using rep movsl
11282 - copy the rest. */
11283 else
11284 {
11285 rtx countreg2;
11286 rtx label = NULL;
4e44c1ef 11287 rtx srcmem, dstmem;
37ad04a5
JH
11288 int desired_alignment = (TARGET_PENTIUMPRO
11289 && (count == 0 || count >= (unsigned int) 260)
11290 ? 8 : UNITS_PER_WORD);
4e44c1ef
JJ
11291 /* Get rid of MEM_OFFSETs, they won't be accurate. */
11292 dst = change_address (dst, BLKmode, destreg);
11293 src = change_address (src, BLKmode, srcreg);
0945b39d
JH
11294
11295 /* In case we don't know anything about the alignment, default to
11296 library version, since it is usually equally fast and result in
b96a374d 11297 shorter code.
4977bab6
ZW
11298
11299 Also emit call when we know that the count is large and call overhead
11300 will not be important. */
11301 if (!TARGET_INLINE_ALL_STRINGOPS
11302 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
4e44c1ef 11303 return 0;
0945b39d
JH
11304
11305 if (TARGET_SINGLE_STRINGOP)
11306 emit_insn (gen_cld ());
11307
11308 countreg2 = gen_reg_rtx (Pmode);
11309 countreg = copy_to_mode_reg (counter_mode, count_exp);
11310
11311 /* We don't use loops to align destination and to copy parts smaller
11312 than 4 bytes, because gcc is able to optimize such code better (in
11313 the case the destination or the count really is aligned, gcc is often
11314 able to predict the branches) and also it is friendlier to the
a4f31c00 11315 hardware branch prediction.
0945b39d 11316
d1f87653 11317 Using loops is beneficial for generic case, because we can
0945b39d
JH
11318 handle small counts using the loops. Many CPUs (such as Athlon)
11319 have large REP prefix setup costs.
11320
4aae8a9a 11321 This is quite costly. Maybe we can revisit this decision later or
0945b39d
JH
11322 add some customizability to this code. */
11323
37ad04a5 11324 if (count == 0 && align < desired_alignment)
0945b39d
JH
11325 {
11326 label = gen_label_rtx ();
aaae0bb9 11327 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
d43e0b7d 11328 LEU, 0, counter_mode, 1, label);
0945b39d
JH
11329 }
11330 if (align <= 1)
11331 {
11332 rtx label = ix86_expand_aligntest (destreg, 1);
4e44c1ef
JJ
11333 srcmem = change_address (src, QImode, srcreg);
11334 dstmem = change_address (dst, QImode, destreg);
11335 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
11336 ix86_adjust_counter (countreg, 1);
11337 emit_label (label);
11338 LABEL_NUSES (label) = 1;
11339 }
11340 if (align <= 2)
11341 {
11342 rtx label = ix86_expand_aligntest (destreg, 2);
4e44c1ef
JJ
11343 srcmem = change_address (src, HImode, srcreg);
11344 dstmem = change_address (dst, HImode, destreg);
11345 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
11346 ix86_adjust_counter (countreg, 2);
11347 emit_label (label);
11348 LABEL_NUSES (label) = 1;
11349 }
37ad04a5 11350 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
11351 {
11352 rtx label = ix86_expand_aligntest (destreg, 4);
4e44c1ef
JJ
11353 srcmem = change_address (src, SImode, srcreg);
11354 dstmem = change_address (dst, SImode, destreg);
11355 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
11356 ix86_adjust_counter (countreg, 4);
11357 emit_label (label);
11358 LABEL_NUSES (label) = 1;
11359 }
11360
37ad04a5
JH
11361 if (label && desired_alignment > 4 && !TARGET_64BIT)
11362 {
11363 emit_label (label);
11364 LABEL_NUSES (label) = 1;
11365 label = NULL_RTX;
11366 }
0945b39d
JH
11367 if (!TARGET_SINGLE_STRINGOP)
11368 emit_insn (gen_cld ());
11369 if (TARGET_64BIT)
11370 {
11371 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11372 GEN_INT (3)));
4e44c1ef 11373 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
0945b39d
JH
11374 }
11375 else
11376 {
4e44c1ef
JJ
11377 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11378 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
0945b39d 11379 }
4e44c1ef
JJ
11380 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11381 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11382 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11383 countreg2, destexp, srcexp));
0945b39d
JH
11384
11385 if (label)
11386 {
11387 emit_label (label);
11388 LABEL_NUSES (label) = 1;
11389 }
11390 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
4e44c1ef
JJ
11391 {
11392 srcmem = change_address (src, SImode, srcreg);
11393 dstmem = change_address (dst, SImode, destreg);
11394 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11395 }
0945b39d
JH
11396 if ((align <= 4 || count == 0) && TARGET_64BIT)
11397 {
11398 rtx label = ix86_expand_aligntest (countreg, 4);
4e44c1ef
JJ
11399 srcmem = change_address (src, SImode, srcreg);
11400 dstmem = change_address (dst, SImode, destreg);
11401 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
11402 emit_label (label);
11403 LABEL_NUSES (label) = 1;
11404 }
11405 if (align > 2 && count != 0 && (count & 2))
4e44c1ef
JJ
11406 {
11407 srcmem = change_address (src, HImode, srcreg);
11408 dstmem = change_address (dst, HImode, destreg);
11409 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11410 }
0945b39d
JH
11411 if (align <= 2 || count == 0)
11412 {
11413 rtx label = ix86_expand_aligntest (countreg, 2);
4e44c1ef
JJ
11414 srcmem = change_address (src, HImode, srcreg);
11415 dstmem = change_address (dst, HImode, destreg);
11416 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
11417 emit_label (label);
11418 LABEL_NUSES (label) = 1;
11419 }
11420 if (align > 1 && count != 0 && (count & 1))
4e44c1ef
JJ
11421 {
11422 srcmem = change_address (src, QImode, srcreg);
11423 dstmem = change_address (dst, QImode, destreg);
11424 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11425 }
0945b39d
JH
11426 if (align <= 1 || count == 0)
11427 {
11428 rtx label = ix86_expand_aligntest (countreg, 1);
4e44c1ef
JJ
11429 srcmem = change_address (src, QImode, srcreg);
11430 dstmem = change_address (dst, QImode, destreg);
11431 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
11432 emit_label (label);
11433 LABEL_NUSES (label) = 1;
11434 }
11435 }
11436
0945b39d
JH
11437 return 1;
11438}
11439
11440/* Expand string clear operation (bzero). Use i386 string operations when
70128ad9 11441 profitable. expand_movmem contains similar code. */
0945b39d 11442int
70128ad9 11443ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
0945b39d 11444{
4e44c1ef 11445 rtx destreg, zeroreg, countreg, destexp;
0945b39d
JH
11446 enum machine_mode counter_mode;
11447 HOST_WIDE_INT align = 0;
11448 unsigned HOST_WIDE_INT count = 0;
11449
11450 if (GET_CODE (align_exp) == CONST_INT)
11451 align = INTVAL (align_exp);
11452
d0a5295a
RH
11453 /* Can't use any of this if the user has appropriated esi. */
11454 if (global_regs[4])
11455 return 0;
11456
5519a4f9 11457 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
11458 if (!TARGET_ALIGN_STRINGOPS)
11459 align = 32;
11460
11461 if (GET_CODE (count_exp) == CONST_INT)
26771da7
JH
11462 {
11463 count = INTVAL (count_exp);
11464 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11465 return 0;
11466 }
0945b39d
JH
11467 /* Figure out proper mode for counter. For 32bits it is always SImode,
11468 for 64bits use SImode when possible, otherwise DImode.
11469 Set count to number of bytes copied when known at compile time. */
11470 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11471 || x86_64_zero_extended_value (count_exp))
11472 counter_mode = SImode;
11473 else
11474 counter_mode = DImode;
11475
4e44c1ef
JJ
11476 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11477 if (destreg != XEXP (dst, 0))
11478 dst = replace_equiv_address_nv (dst, destreg);
0945b39d
JH
11479
11480 emit_insn (gen_cld ());
11481
11482 /* When optimizing for size emit simple rep ; movsb instruction for
11483 counts not divisible by 4. */
11484
11485 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11486 {
11487 countreg = ix86_zero_extend_to_Pmode (count_exp);
11488 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
4e44c1ef
JJ
11489 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11490 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
0945b39d
JH
11491 }
11492 else if (count != 0
11493 && (align >= 8
11494 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 11495 || optimize_size || count < (unsigned int) 64))
0945b39d
JH
11496 {
11497 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
4e44c1ef
JJ
11498 unsigned HOST_WIDE_INT offset = 0;
11499
0945b39d
JH
11500 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11501 if (count & ~(size - 1))
11502 {
11503 countreg = copy_to_mode_reg (counter_mode,
11504 GEN_INT ((count >> (size == 4 ? 2 : 3))
11505 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11506 countreg = ix86_zero_extend_to_Pmode (countreg);
4e44c1ef
JJ
11507 destexp = gen_rtx_ASHIFT (Pmode, countreg, GEN_INT (size == 4 ? 2 : 3));
11508 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11509 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11510 offset = count & ~(size - 1);
0945b39d
JH
11511 }
11512 if (size == 8 && (count & 0x04))
4e44c1ef
JJ
11513 {
11514 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
11515 offset);
11516 emit_insn (gen_strset (destreg, mem,
0945b39d 11517 gen_rtx_SUBREG (SImode, zeroreg, 0)));
4e44c1ef
JJ
11518 offset += 4;
11519 }
0945b39d 11520 if (count & 0x02)
4e44c1ef
JJ
11521 {
11522 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
11523 offset);
11524 emit_insn (gen_strset (destreg, mem,
0945b39d 11525 gen_rtx_SUBREG (HImode, zeroreg, 0)));
4e44c1ef
JJ
11526 offset += 2;
11527 }
0945b39d 11528 if (count & 0x01)
4e44c1ef
JJ
11529 {
11530 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
11531 offset);
11532 emit_insn (gen_strset (destreg, mem,
0945b39d 11533 gen_rtx_SUBREG (QImode, zeroreg, 0)));
4e44c1ef 11534 }
0945b39d
JH
11535 }
11536 else
11537 {
11538 rtx countreg2;
11539 rtx label = NULL;
37ad04a5
JH
11540 /* Compute desired alignment of the string operation. */
11541 int desired_alignment = (TARGET_PENTIUMPRO
11542 && (count == 0 || count >= (unsigned int) 260)
11543 ? 8 : UNITS_PER_WORD);
0945b39d
JH
11544
11545 /* In case we don't know anything about the alignment, default to
11546 library version, since it is usually equally fast and result in
4977bab6
ZW
11547 shorter code.
11548
11549 Also emit call when we know that the count is large and call overhead
11550 will not be important. */
11551 if (!TARGET_INLINE_ALL_STRINGOPS
11552 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
0945b39d
JH
11553 return 0;
11554
11555 if (TARGET_SINGLE_STRINGOP)
11556 emit_insn (gen_cld ());
11557
11558 countreg2 = gen_reg_rtx (Pmode);
11559 countreg = copy_to_mode_reg (counter_mode, count_exp);
11560 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
4e44c1ef
JJ
11561 /* Get rid of MEM_OFFSET, it won't be accurate. */
11562 dst = change_address (dst, BLKmode, destreg);
0945b39d 11563
37ad04a5 11564 if (count == 0 && align < desired_alignment)
0945b39d
JH
11565 {
11566 label = gen_label_rtx ();
37ad04a5 11567 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
d43e0b7d 11568 LEU, 0, counter_mode, 1, label);
0945b39d
JH
11569 }
11570 if (align <= 1)
11571 {
11572 rtx label = ix86_expand_aligntest (destreg, 1);
4e44c1ef
JJ
11573 emit_insn (gen_strset (destreg, dst,
11574 gen_rtx_SUBREG (QImode, zeroreg, 0)));
0945b39d
JH
11575 ix86_adjust_counter (countreg, 1);
11576 emit_label (label);
11577 LABEL_NUSES (label) = 1;
11578 }
11579 if (align <= 2)
11580 {
11581 rtx label = ix86_expand_aligntest (destreg, 2);
4e44c1ef
JJ
11582 emit_insn (gen_strset (destreg, dst,
11583 gen_rtx_SUBREG (HImode, zeroreg, 0)));
0945b39d
JH
11584 ix86_adjust_counter (countreg, 2);
11585 emit_label (label);
11586 LABEL_NUSES (label) = 1;
11587 }
37ad04a5 11588 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
11589 {
11590 rtx label = ix86_expand_aligntest (destreg, 4);
4e44c1ef
JJ
11591 emit_insn (gen_strset (destreg, dst,
11592 (TARGET_64BIT
11593 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11594 : zeroreg)));
0945b39d
JH
11595 ix86_adjust_counter (countreg, 4);
11596 emit_label (label);
11597 LABEL_NUSES (label) = 1;
11598 }
11599
37ad04a5
JH
11600 if (label && desired_alignment > 4 && !TARGET_64BIT)
11601 {
11602 emit_label (label);
11603 LABEL_NUSES (label) = 1;
11604 label = NULL_RTX;
11605 }
11606
0945b39d
JH
11607 if (!TARGET_SINGLE_STRINGOP)
11608 emit_insn (gen_cld ());
11609 if (TARGET_64BIT)
11610 {
11611 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11612 GEN_INT (3)));
4e44c1ef 11613 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
0945b39d
JH
11614 }
11615 else
11616 {
4e44c1ef
JJ
11617 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11618 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
0945b39d 11619 }
4e44c1ef
JJ
11620 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11621 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
11622
0945b39d
JH
11623 if (label)
11624 {
11625 emit_label (label);
11626 LABEL_NUSES (label) = 1;
11627 }
37ad04a5 11628
0945b39d 11629 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
4e44c1ef
JJ
11630 emit_insn (gen_strset (destreg, dst,
11631 gen_rtx_SUBREG (SImode, zeroreg, 0)));
0945b39d
JH
11632 if (TARGET_64BIT && (align <= 4 || count == 0))
11633 {
79258dce 11634 rtx label = ix86_expand_aligntest (countreg, 4);
4e44c1ef
JJ
11635 emit_insn (gen_strset (destreg, dst,
11636 gen_rtx_SUBREG (SImode, zeroreg, 0)));
0945b39d
JH
11637 emit_label (label);
11638 LABEL_NUSES (label) = 1;
11639 }
11640 if (align > 2 && count != 0 && (count & 2))
4e44c1ef
JJ
11641 emit_insn (gen_strset (destreg, dst,
11642 gen_rtx_SUBREG (HImode, zeroreg, 0)));
0945b39d
JH
11643 if (align <= 2 || count == 0)
11644 {
74411039 11645 rtx label = ix86_expand_aligntest (countreg, 2);
4e44c1ef
JJ
11646 emit_insn (gen_strset (destreg, dst,
11647 gen_rtx_SUBREG (HImode, zeroreg, 0)));
0945b39d
JH
11648 emit_label (label);
11649 LABEL_NUSES (label) = 1;
11650 }
11651 if (align > 1 && count != 0 && (count & 1))
4e44c1ef
JJ
11652 emit_insn (gen_strset (destreg, dst,
11653 gen_rtx_SUBREG (QImode, zeroreg, 0)));
0945b39d
JH
11654 if (align <= 1 || count == 0)
11655 {
74411039 11656 rtx label = ix86_expand_aligntest (countreg, 1);
4e44c1ef
JJ
11657 emit_insn (gen_strset (destreg, dst,
11658 gen_rtx_SUBREG (QImode, zeroreg, 0)));
0945b39d
JH
11659 emit_label (label);
11660 LABEL_NUSES (label) = 1;
11661 }
11662 }
11663 return 1;
11664}
4e44c1ef 11665
0945b39d
JH
11666/* Expand strlen. */
11667int
b96a374d 11668ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
0945b39d
JH
11669{
11670 rtx addr, scratch1, scratch2, scratch3, scratch4;
11671
11672 /* The generic case of strlen expander is long. Avoid it's
11673 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11674
11675 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11676 && !TARGET_INLINE_ALL_STRINGOPS
11677 && !optimize_size
11678 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11679 return 0;
11680
11681 addr = force_reg (Pmode, XEXP (src, 0));
11682 scratch1 = gen_reg_rtx (Pmode);
11683
11684 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11685 && !optimize_size)
11686 {
11687 /* Well it seems that some optimizer does not combine a call like
11688 foo(strlen(bar), strlen(bar));
11689 when the move and the subtraction is done here. It does calculate
11690 the length just once when these instructions are done inside of
11691 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11692 often used and I use one fewer register for the lifetime of
11693 output_strlen_unroll() this is better. */
11694
11695 emit_move_insn (out, addr);
11696
4e44c1ef 11697 ix86_expand_strlensi_unroll_1 (out, src, align);
0945b39d
JH
11698
11699 /* strlensi_unroll_1 returns the address of the zero at the end of
11700 the string, like memchr(), so compute the length by subtracting
11701 the start address. */
11702 if (TARGET_64BIT)
11703 emit_insn (gen_subdi3 (out, out, addr));
11704 else
11705 emit_insn (gen_subsi3 (out, out, addr));
11706 }
11707 else
11708 {
4e44c1ef 11709 rtx unspec;
0945b39d
JH
11710 scratch2 = gen_reg_rtx (Pmode);
11711 scratch3 = gen_reg_rtx (Pmode);
11712 scratch4 = force_reg (Pmode, constm1_rtx);
11713
11714 emit_move_insn (scratch3, addr);
11715 eoschar = force_reg (QImode, eoschar);
11716
11717 emit_insn (gen_cld ());
4e44c1ef
JJ
11718 src = replace_equiv_address_nv (src, scratch3);
11719
11720 /* If .md starts supporting :P, this can be done in .md. */
11721 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
11722 scratch4), UNSPEC_SCAS);
11723 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
0945b39d
JH
11724 if (TARGET_64BIT)
11725 {
0945b39d
JH
11726 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11727 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11728 }
11729 else
11730 {
0945b39d
JH
11731 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11732 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11733 }
11734 }
11735 return 1;
11736}
11737
e075ae69
RH
11738/* Expand the appropriate insns for doing strlen if not just doing
11739 repnz; scasb
11740
11741 out = result, initialized with the start address
11742 align_rtx = alignment of the address.
11743 scratch = scratch register, initialized with the startaddress when
77ebd435 11744 not aligned, otherwise undefined
3f803cd9 11745
39e3f58c 11746 This is just the body. It needs the initializations mentioned above and
3f803cd9
SC
11747 some address computing at the end. These things are done in i386.md. */
11748
0945b39d 11749static void
4e44c1ef 11750ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
3f803cd9 11751{
e075ae69
RH
11752 int align;
11753 rtx tmp;
11754 rtx align_2_label = NULL_RTX;
11755 rtx align_3_label = NULL_RTX;
11756 rtx align_4_label = gen_label_rtx ();
11757 rtx end_0_label = gen_label_rtx ();
e075ae69 11758 rtx mem;
e2e52e1b 11759 rtx tmpreg = gen_reg_rtx (SImode);
0945b39d 11760 rtx scratch = gen_reg_rtx (SImode);
e6e81735 11761 rtx cmp;
e075ae69
RH
11762
11763 align = 0;
11764 if (GET_CODE (align_rtx) == CONST_INT)
11765 align = INTVAL (align_rtx);
3f803cd9 11766
e9a25f70 11767 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
3f803cd9 11768
e9a25f70 11769 /* Is there a known alignment and is it less than 4? */
e075ae69 11770 if (align < 4)
3f803cd9 11771 {
0945b39d
JH
11772 rtx scratch1 = gen_reg_rtx (Pmode);
11773 emit_move_insn (scratch1, out);
e9a25f70 11774 /* Is there a known alignment and is it not 2? */
e075ae69 11775 if (align != 2)
3f803cd9 11776 {
e075ae69
RH
11777 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11778 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11779
11780 /* Leave just the 3 lower bits. */
0945b39d 11781 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
e075ae69
RH
11782 NULL_RTX, 0, OPTAB_WIDEN);
11783
9076b9c1 11784 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 11785 Pmode, 1, align_4_label);
60c81c89 11786 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
d43e0b7d 11787 Pmode, 1, align_2_label);
60c81c89 11788 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
d43e0b7d 11789 Pmode, 1, align_3_label);
3f803cd9
SC
11790 }
11791 else
11792 {
e9a25f70
JL
11793 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11794 check if is aligned to 4 - byte. */
e9a25f70 11795
60c81c89 11796 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
e075ae69
RH
11797 NULL_RTX, 0, OPTAB_WIDEN);
11798
9076b9c1 11799 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 11800 Pmode, 1, align_4_label);
3f803cd9
SC
11801 }
11802
4e44c1ef 11803 mem = change_address (src, QImode, out);
e9a25f70 11804
e075ae69 11805 /* Now compare the bytes. */
e9a25f70 11806
0f290768 11807 /* Compare the first n unaligned byte on a byte per byte basis. */
9076b9c1 11808 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
d43e0b7d 11809 QImode, 1, end_0_label);
3f803cd9 11810
0f290768 11811 /* Increment the address. */
0945b39d
JH
11812 if (TARGET_64BIT)
11813 emit_insn (gen_adddi3 (out, out, const1_rtx));
11814 else
11815 emit_insn (gen_addsi3 (out, out, const1_rtx));
e9a25f70 11816
e075ae69
RH
11817 /* Not needed with an alignment of 2 */
11818 if (align != 2)
11819 {
11820 emit_label (align_2_label);
3f803cd9 11821
d43e0b7d
RK
11822 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11823 end_0_label);
e075ae69 11824
0945b39d
JH
11825 if (TARGET_64BIT)
11826 emit_insn (gen_adddi3 (out, out, const1_rtx));
11827 else
11828 emit_insn (gen_addsi3 (out, out, const1_rtx));
e075ae69
RH
11829
11830 emit_label (align_3_label);
11831 }
11832
d43e0b7d
RK
11833 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11834 end_0_label);
e075ae69 11835
0945b39d
JH
11836 if (TARGET_64BIT)
11837 emit_insn (gen_adddi3 (out, out, const1_rtx));
11838 else
11839 emit_insn (gen_addsi3 (out, out, const1_rtx));
3f803cd9
SC
11840 }
11841
e075ae69
RH
11842 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11843 align this loop. It gives only huge programs, but does not help to
11844 speed up. */
11845 emit_label (align_4_label);
3f803cd9 11846
4e44c1ef 11847 mem = change_address (src, SImode, out);
e075ae69 11848 emit_move_insn (scratch, mem);
0945b39d
JH
11849 if (TARGET_64BIT)
11850 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11851 else
11852 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
e075ae69 11853
e2e52e1b
JH
11854 /* This formula yields a nonzero result iff one of the bytes is zero.
11855 This saves three branches inside loop and many cycles. */
11856
11857 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11858 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11859 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
7471a1f0 11860 emit_insn (gen_andsi3 (tmpreg, tmpreg,
d8bf17f9 11861 gen_int_mode (0x80808080, SImode)));
d43e0b7d
RK
11862 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11863 align_4_label);
e2e52e1b
JH
11864
11865 if (TARGET_CMOVE)
11866 {
11867 rtx reg = gen_reg_rtx (SImode);
0945b39d 11868 rtx reg2 = gen_reg_rtx (Pmode);
e2e52e1b
JH
11869 emit_move_insn (reg, tmpreg);
11870 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11871
0f290768 11872 /* If zero is not in the first two bytes, move two bytes forward. */
16189740 11873 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
11874 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11875 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11876 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11877 gen_rtx_IF_THEN_ELSE (SImode, tmp,
77ebd435
AJ
11878 reg,
11879 tmpreg)));
e2e52e1b 11880 /* Emit lea manually to avoid clobbering of flags. */
0945b39d 11881 emit_insn (gen_rtx_SET (SImode, reg2,
60c81c89 11882 gen_rtx_PLUS (Pmode, out, const2_rtx)));
e2e52e1b
JH
11883
11884 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11885 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11886 emit_insn (gen_rtx_SET (VOIDmode, out,
0945b39d 11887 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
a4f31c00
AJ
11888 reg2,
11889 out)));
e2e52e1b
JH
11890
11891 }
11892 else
11893 {
11894 rtx end_2_label = gen_label_rtx ();
11895 /* Is zero in the first two bytes? */
11896
16189740 11897 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
11898 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11899 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11900 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11901 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11902 pc_rtx);
11903 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11904 JUMP_LABEL (tmp) = end_2_label;
11905
0f290768 11906 /* Not in the first two. Move two bytes forward. */
e2e52e1b 11907 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
0945b39d 11908 if (TARGET_64BIT)
60c81c89 11909 emit_insn (gen_adddi3 (out, out, const2_rtx));
0945b39d 11910 else
60c81c89 11911 emit_insn (gen_addsi3 (out, out, const2_rtx));
e2e52e1b
JH
11912
11913 emit_label (end_2_label);
11914
11915 }
11916
0f290768 11917 /* Avoid branch in fixing the byte. */
e2e52e1b 11918 tmpreg = gen_lowpart (QImode, tmpreg);
7e08e190 11919 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
e6e81735 11920 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
0945b39d 11921 if (TARGET_64BIT)
e6e81735 11922 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
0945b39d 11923 else
e6e81735 11924 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
e075ae69
RH
11925
11926 emit_label (end_0_label);
11927}
0e07aff3
RH
11928
11929void
0f901c4c
SH
11930ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11931 rtx callarg2 ATTRIBUTE_UNUSED,
b96a374d 11932 rtx pop, int sibcall)
0e07aff3
RH
11933{
11934 rtx use = NULL, call;
11935
11936 if (pop == const0_rtx)
11937 pop = NULL;
11938 if (TARGET_64BIT && pop)
11939 abort ();
11940
b069de3b
SS
11941#if TARGET_MACHO
11942 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11943 fnaddr = machopic_indirect_call_target (fnaddr);
11944#else
0e07aff3
RH
11945 /* Static functions and indirect calls don't need the pic register. */
11946 if (! TARGET_64BIT && flag_pic
11947 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
12969f45 11948 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
66edd3b4 11949 use_reg (&use, pic_offset_table_rtx);
0e07aff3
RH
11950
11951 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11952 {
11953 rtx al = gen_rtx_REG (QImode, 0);
11954 emit_move_insn (al, callarg2);
11955 use_reg (&use, al);
11956 }
b069de3b 11957#endif /* TARGET_MACHO */
0e07aff3
RH
11958
11959 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11960 {
11961 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11962 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11963 }
4977bab6
ZW
11964 if (sibcall && TARGET_64BIT
11965 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11966 {
11967 rtx addr;
11968 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
b19ee4bd 11969 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
4977bab6
ZW
11970 emit_move_insn (fnaddr, addr);
11971 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11972 }
0e07aff3
RH
11973
11974 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11975 if (retval)
11976 call = gen_rtx_SET (VOIDmode, retval, call);
11977 if (pop)
11978 {
11979 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11980 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11981 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11982 }
11983
11984 call = emit_call_insn (call);
11985 if (use)
11986 CALL_INSN_FUNCTION_USAGE (call) = use;
11987}
fce5a9f2 11988
e075ae69 11989\f
e075ae69
RH
11990/* Clear stack slot assignments remembered from previous functions.
11991 This is called from INIT_EXPANDERS once before RTL is emitted for each
11992 function. */
11993
e2500fed 11994static struct machine_function *
b96a374d 11995ix86_init_machine_status (void)
37b15744 11996{
d7394366
JH
11997 struct machine_function *f;
11998
11999 f = ggc_alloc_cleared (sizeof (struct machine_function));
12000 f->use_fast_prologue_epilogue_nregs = -1;
8330e2c6
AJ
12001
12002 return f;
1526a060
BS
12003}
12004
e075ae69
RH
12005/* Return a MEM corresponding to a stack slot with mode MODE.
12006 Allocate a new slot if necessary.
12007
12008 The RTL for a function can have several slots available: N is
12009 which slot to use. */
12010
12011rtx
b96a374d 12012assign_386_stack_local (enum machine_mode mode, int n)
e075ae69 12013{
ddb0ae00
ZW
12014 struct stack_local_entry *s;
12015
e075ae69
RH
12016 if (n < 0 || n >= MAX_386_STACK_LOCALS)
12017 abort ();
12018
ddb0ae00
ZW
12019 for (s = ix86_stack_locals; s; s = s->next)
12020 if (s->mode == mode && s->n == n)
12021 return s->rtl;
12022
12023 s = (struct stack_local_entry *)
12024 ggc_alloc (sizeof (struct stack_local_entry));
12025 s->n = n;
12026 s->mode = mode;
12027 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
e075ae69 12028
ddb0ae00
ZW
12029 s->next = ix86_stack_locals;
12030 ix86_stack_locals = s;
12031 return s->rtl;
e075ae69 12032}
f996902d
RH
12033
12034/* Construct the SYMBOL_REF for the tls_get_addr function. */
12035
e2500fed 12036static GTY(()) rtx ix86_tls_symbol;
f996902d 12037rtx
b96a374d 12038ix86_tls_get_addr (void)
f996902d 12039{
f996902d 12040
e2500fed 12041 if (!ix86_tls_symbol)
f996902d 12042 {
75d38379
JJ
12043 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
12044 (TARGET_GNU_TLS && !TARGET_64BIT)
12045 ? "___tls_get_addr"
12046 : "__tls_get_addr");
f996902d
RH
12047 }
12048
e2500fed 12049 return ix86_tls_symbol;
f996902d 12050}
e075ae69
RH
12051\f
12052/* Calculate the length of the memory address in the instruction
12053 encoding. Does not include the one-byte modrm, opcode, or prefix. */
12054
12055static int
b96a374d 12056memory_address_length (rtx addr)
e075ae69
RH
12057{
12058 struct ix86_address parts;
12059 rtx base, index, disp;
12060 int len;
12061
12062 if (GET_CODE (addr) == PRE_DEC
0d7d98ee
JH
12063 || GET_CODE (addr) == POST_INC
12064 || GET_CODE (addr) == PRE_MODIFY
12065 || GET_CODE (addr) == POST_MODIFY)
e075ae69 12066 return 0;
3f803cd9 12067
e075ae69
RH
12068 if (! ix86_decompose_address (addr, &parts))
12069 abort ();
3f803cd9 12070
e075ae69
RH
12071 base = parts.base;
12072 index = parts.index;
12073 disp = parts.disp;
12074 len = 0;
3f803cd9 12075
7b65ed54
EB
12076 /* Rule of thumb:
12077 - esp as the base always wants an index,
12078 - ebp as the base always wants a displacement. */
12079
e075ae69
RH
12080 /* Register Indirect. */
12081 if (base && !index && !disp)
12082 {
7b65ed54
EB
12083 /* esp (for its index) and ebp (for its displacement) need
12084 the two-byte modrm form. */
e075ae69
RH
12085 if (addr == stack_pointer_rtx
12086 || addr == arg_pointer_rtx
564d80f4
JH
12087 || addr == frame_pointer_rtx
12088 || addr == hard_frame_pointer_rtx)
e075ae69 12089 len = 1;
3f803cd9 12090 }
e9a25f70 12091
e075ae69
RH
12092 /* Direct Addressing. */
12093 else if (disp && !base && !index)
12094 len = 4;
12095
3f803cd9
SC
12096 else
12097 {
e075ae69
RH
12098 /* Find the length of the displacement constant. */
12099 if (disp)
12100 {
12101 if (GET_CODE (disp) == CONST_INT
9b73c90a
EB
12102 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
12103 && base)
e075ae69
RH
12104 len = 1;
12105 else
12106 len = 4;
12107 }
7b65ed54
EB
12108 /* ebp always wants a displacement. */
12109 else if (base == hard_frame_pointer_rtx)
12110 len = 1;
3f803cd9 12111
43f3a59d 12112 /* An index requires the two-byte modrm form.... */
7b65ed54
EB
12113 if (index
12114 /* ...like esp, which always wants an index. */
12115 || base == stack_pointer_rtx
12116 || base == arg_pointer_rtx
12117 || base == frame_pointer_rtx)
e075ae69 12118 len += 1;
3f803cd9
SC
12119 }
12120
e075ae69
RH
12121 return len;
12122}
79325812 12123
5bf0ebab
RH
12124/* Compute default value for "length_immediate" attribute. When SHORTFORM
12125 is set, expect that insn have 8bit immediate alternative. */
e075ae69 12126int
b96a374d 12127ix86_attr_length_immediate_default (rtx insn, int shortform)
e075ae69 12128{
6ef67412
JH
12129 int len = 0;
12130 int i;
6c698a6d 12131 extract_insn_cached (insn);
6ef67412
JH
12132 for (i = recog_data.n_operands - 1; i >= 0; --i)
12133 if (CONSTANT_P (recog_data.operand[i]))
3071fab5 12134 {
6ef67412 12135 if (len)
3071fab5 12136 abort ();
6ef67412
JH
12137 if (shortform
12138 && GET_CODE (recog_data.operand[i]) == CONST_INT
12139 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
12140 len = 1;
12141 else
12142 {
12143 switch (get_attr_mode (insn))
12144 {
12145 case MODE_QI:
12146 len+=1;
12147 break;
12148 case MODE_HI:
12149 len+=2;
12150 break;
12151 case MODE_SI:
12152 len+=4;
12153 break;
14f73b5a
JH
12154 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
12155 case MODE_DI:
12156 len+=4;
12157 break;
6ef67412 12158 default:
c725bd79 12159 fatal_insn ("unknown insn mode", insn);
6ef67412
JH
12160 }
12161 }
3071fab5 12162 }
6ef67412
JH
12163 return len;
12164}
12165/* Compute default value for "length_address" attribute. */
12166int
b96a374d 12167ix86_attr_length_address_default (rtx insn)
6ef67412
JH
12168{
12169 int i;
9b73c90a
EB
12170
12171 if (get_attr_type (insn) == TYPE_LEA)
12172 {
12173 rtx set = PATTERN (insn);
12174 if (GET_CODE (set) == SET)
12175 ;
12176 else if (GET_CODE (set) == PARALLEL
12177 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
12178 set = XVECEXP (set, 0, 0);
12179 else
12180 {
12181#ifdef ENABLE_CHECKING
12182 abort ();
12183#endif
12184 return 0;
12185 }
12186
12187 return memory_address_length (SET_SRC (set));
12188 }
12189
6c698a6d 12190 extract_insn_cached (insn);
1ccbefce
RH
12191 for (i = recog_data.n_operands - 1; i >= 0; --i)
12192 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 12193 {
6ef67412 12194 return memory_address_length (XEXP (recog_data.operand[i], 0));
e075ae69
RH
12195 break;
12196 }
6ef67412 12197 return 0;
3f803cd9 12198}
e075ae69
RH
12199\f
12200/* Return the maximum number of instructions a cpu can issue. */
b657fc39 12201
c237e94a 12202static int
b96a374d 12203ix86_issue_rate (void)
b657fc39 12204{
9e555526 12205 switch (ix86_tune)
b657fc39 12206 {
e075ae69
RH
12207 case PROCESSOR_PENTIUM:
12208 case PROCESSOR_K6:
12209 return 2;
79325812 12210
e075ae69 12211 case PROCESSOR_PENTIUMPRO:
b4e89e2d
JH
12212 case PROCESSOR_PENTIUM4:
12213 case PROCESSOR_ATHLON:
4977bab6 12214 case PROCESSOR_K8:
89c43c0a 12215 case PROCESSOR_NOCONA:
e075ae69 12216 return 3;
b657fc39 12217
b657fc39 12218 default:
e075ae69 12219 return 1;
b657fc39 12220 }
b657fc39
L
12221}
12222
e075ae69
RH
12223/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12224 by DEP_INSN and nothing set by DEP_INSN. */
b657fc39 12225
e075ae69 12226static int
b96a374d 12227ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
e075ae69
RH
12228{
12229 rtx set, set2;
b657fc39 12230
e075ae69
RH
12231 /* Simplify the test for uninteresting insns. */
12232 if (insn_type != TYPE_SETCC
12233 && insn_type != TYPE_ICMOV
12234 && insn_type != TYPE_FCMOV
12235 && insn_type != TYPE_IBR)
12236 return 0;
b657fc39 12237
e075ae69
RH
12238 if ((set = single_set (dep_insn)) != 0)
12239 {
12240 set = SET_DEST (set);
12241 set2 = NULL_RTX;
12242 }
12243 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
12244 && XVECLEN (PATTERN (dep_insn), 0) == 2
12245 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
12246 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
12247 {
12248 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12249 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12250 }
78a0d70c
ZW
12251 else
12252 return 0;
b657fc39 12253
78a0d70c
ZW
12254 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
12255 return 0;
b657fc39 12256
f5143c46 12257 /* This test is true if the dependent insn reads the flags but
78a0d70c
ZW
12258 not any other potentially set register. */
12259 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
12260 return 0;
12261
12262 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
12263 return 0;
12264
12265 return 1;
e075ae69 12266}
b657fc39 12267
e075ae69
RH
12268/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12269 address with operands set by DEP_INSN. */
12270
12271static int
b96a374d 12272ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
e075ae69
RH
12273{
12274 rtx addr;
12275
6ad48e84
JH
12276 if (insn_type == TYPE_LEA
12277 && TARGET_PENTIUM)
5fbdde42
RH
12278 {
12279 addr = PATTERN (insn);
12280 if (GET_CODE (addr) == SET)
12281 ;
12282 else if (GET_CODE (addr) == PARALLEL
12283 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
12284 addr = XVECEXP (addr, 0, 0);
12285 else
12286 abort ();
12287 addr = SET_SRC (addr);
12288 }
e075ae69
RH
12289 else
12290 {
12291 int i;
6c698a6d 12292 extract_insn_cached (insn);
1ccbefce
RH
12293 for (i = recog_data.n_operands - 1; i >= 0; --i)
12294 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 12295 {
1ccbefce 12296 addr = XEXP (recog_data.operand[i], 0);
e075ae69
RH
12297 goto found;
12298 }
12299 return 0;
12300 found:;
b657fc39
L
12301 }
12302
e075ae69 12303 return modified_in_p (addr, dep_insn);
b657fc39 12304}
a269a03c 12305
c237e94a 12306static int
b96a374d 12307ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
a269a03c 12308{
e075ae69 12309 enum attr_type insn_type, dep_insn_type;
8695f61e 12310 enum attr_memory memory;
e075ae69 12311 rtx set, set2;
9b00189f 12312 int dep_insn_code_number;
a269a03c 12313
d1f87653 12314 /* Anti and output dependencies have zero cost on all CPUs. */
e075ae69 12315 if (REG_NOTE_KIND (link) != 0)
309ada50 12316 return 0;
a269a03c 12317
9b00189f
JH
12318 dep_insn_code_number = recog_memoized (dep_insn);
12319
e075ae69 12320 /* If we can't recognize the insns, we can't really do anything. */
9b00189f 12321 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
e075ae69 12322 return cost;
a269a03c 12323
1c71e60e
JH
12324 insn_type = get_attr_type (insn);
12325 dep_insn_type = get_attr_type (dep_insn);
9b00189f 12326
9e555526 12327 switch (ix86_tune)
a269a03c
JC
12328 {
12329 case PROCESSOR_PENTIUM:
e075ae69
RH
12330 /* Address Generation Interlock adds a cycle of latency. */
12331 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12332 cost += 1;
12333
12334 /* ??? Compares pair with jump/setcc. */
12335 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12336 cost = 0;
12337
d1f87653 12338 /* Floating point stores require value to be ready one cycle earlier. */
0f290768 12339 if (insn_type == TYPE_FMOV
e075ae69
RH
12340 && get_attr_memory (insn) == MEMORY_STORE
12341 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12342 cost += 1;
12343 break;
a269a03c 12344
e075ae69 12345 case PROCESSOR_PENTIUMPRO:
6ad48e84 12346 memory = get_attr_memory (insn);
e075ae69
RH
12347
12348 /* INT->FP conversion is expensive. */
12349 if (get_attr_fp_int_src (dep_insn))
12350 cost += 5;
12351
12352 /* There is one cycle extra latency between an FP op and a store. */
12353 if (insn_type == TYPE_FMOV
12354 && (set = single_set (dep_insn)) != NULL_RTX
12355 && (set2 = single_set (insn)) != NULL_RTX
12356 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12357 && GET_CODE (SET_DEST (set2)) == MEM)
12358 cost += 1;
6ad48e84
JH
12359
12360 /* Show ability of reorder buffer to hide latency of load by executing
12361 in parallel with previous instruction in case
12362 previous instruction is not needed to compute the address. */
12363 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12364 && !ix86_agi_dependant (insn, dep_insn, insn_type))
b96a374d 12365 {
6ad48e84
JH
12366 /* Claim moves to take one cycle, as core can issue one load
12367 at time and the next load can start cycle later. */
12368 if (dep_insn_type == TYPE_IMOV
12369 || dep_insn_type == TYPE_FMOV)
12370 cost = 1;
12371 else if (cost > 1)
12372 cost--;
12373 }
e075ae69 12374 break;
a269a03c 12375
e075ae69 12376 case PROCESSOR_K6:
6ad48e84 12377 memory = get_attr_memory (insn);
8695f61e 12378
e075ae69
RH
12379 /* The esp dependency is resolved before the instruction is really
12380 finished. */
12381 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12382 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12383 return 1;
a269a03c 12384
e075ae69
RH
12385 /* INT->FP conversion is expensive. */
12386 if (get_attr_fp_int_src (dep_insn))
12387 cost += 5;
6ad48e84
JH
12388
12389 /* Show ability of reorder buffer to hide latency of load by executing
12390 in parallel with previous instruction in case
12391 previous instruction is not needed to compute the address. */
12392 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12393 && !ix86_agi_dependant (insn, dep_insn, insn_type))
b96a374d 12394 {
6ad48e84
JH
12395 /* Claim moves to take one cycle, as core can issue one load
12396 at time and the next load can start cycle later. */
12397 if (dep_insn_type == TYPE_IMOV
12398 || dep_insn_type == TYPE_FMOV)
12399 cost = 1;
12400 else if (cost > 2)
12401 cost -= 2;
12402 else
12403 cost = 1;
12404 }
a14003ee 12405 break;
e075ae69 12406
309ada50 12407 case PROCESSOR_ATHLON:
4977bab6 12408 case PROCESSOR_K8:
6ad48e84 12409 memory = get_attr_memory (insn);
6ad48e84 12410
6ad48e84
JH
12411 /* Show ability of reorder buffer to hide latency of load by executing
12412 in parallel with previous instruction in case
12413 previous instruction is not needed to compute the address. */
12414 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12415 && !ix86_agi_dependant (insn, dep_insn, insn_type))
b96a374d 12416 {
26f74aa3
JH
12417 enum attr_unit unit = get_attr_unit (insn);
12418 int loadcost = 3;
12419
12420 /* Because of the difference between the length of integer and
12421 floating unit pipeline preparation stages, the memory operands
b96a374d 12422 for floating point are cheaper.
26f74aa3 12423
c51e6d85 12424 ??? For Athlon it the difference is most probably 2. */
26f74aa3
JH
12425 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12426 loadcost = 3;
12427 else
12428 loadcost = TARGET_ATHLON ? 2 : 0;
12429
12430 if (cost >= loadcost)
12431 cost -= loadcost;
6ad48e84
JH
12432 else
12433 cost = 0;
12434 }
309ada50 12435
a269a03c 12436 default:
a269a03c
JC
12437 break;
12438 }
12439
12440 return cost;
12441}
0a726ef1 12442
9b690711
RH
12443/* How many alternative schedules to try. This should be as wide as the
12444 scheduling freedom in the DFA, but no wider. Making this value too
12445 large results extra work for the scheduler. */
12446
12447static int
b96a374d 12448ia32_multipass_dfa_lookahead (void)
9b690711 12449{
9e555526 12450 if (ix86_tune == PROCESSOR_PENTIUM)
9b690711 12451 return 2;
56bab446 12452
8695f61e
SB
12453 if (ix86_tune == PROCESSOR_PENTIUMPRO
12454 || ix86_tune == PROCESSOR_K6)
56bab446
SB
12455 return 1;
12456
9b690711 12457 else
56bab446 12458 return 0;
9b690711
RH
12459}
12460
0e4970d7 12461\f
a7180f70
BS
12462/* Compute the alignment given to a constant that is being placed in memory.
12463 EXP is the constant and ALIGN is the alignment that the object would
12464 ordinarily have.
12465 The value of this function is used instead of that alignment to align
12466 the object. */
12467
12468int
b96a374d 12469ix86_constant_alignment (tree exp, int align)
a7180f70
BS
12470{
12471 if (TREE_CODE (exp) == REAL_CST)
12472 {
12473 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12474 return 64;
12475 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12476 return 128;
12477 }
4137ba7a
JJ
12478 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
12479 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
12480 return BITS_PER_WORD;
a7180f70
BS
12481
12482 return align;
12483}
12484
12485/* Compute the alignment for a static variable.
12486 TYPE is the data type, and ALIGN is the alignment that
12487 the object would ordinarily have. The value of this function is used
12488 instead of that alignment to align the object. */
12489
12490int
b96a374d 12491ix86_data_alignment (tree type, int align)
a7180f70
BS
12492{
12493 if (AGGREGATE_TYPE_P (type)
12494 && TYPE_SIZE (type)
12495 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12496 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12497 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12498 return 256;
12499
0d7d98ee
JH
12500 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12501 to 16byte boundary. */
12502 if (TARGET_64BIT)
12503 {
12504 if (AGGREGATE_TYPE_P (type)
12505 && TYPE_SIZE (type)
12506 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12507 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12508 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12509 return 128;
12510 }
12511
a7180f70
BS
12512 if (TREE_CODE (type) == ARRAY_TYPE)
12513 {
12514 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12515 return 64;
12516 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12517 return 128;
12518 }
12519 else if (TREE_CODE (type) == COMPLEX_TYPE)
12520 {
0f290768 12521
a7180f70
BS
12522 if (TYPE_MODE (type) == DCmode && align < 64)
12523 return 64;
12524 if (TYPE_MODE (type) == XCmode && align < 128)
12525 return 128;
12526 }
12527 else if ((TREE_CODE (type) == RECORD_TYPE
12528 || TREE_CODE (type) == UNION_TYPE
12529 || TREE_CODE (type) == QUAL_UNION_TYPE)
12530 && TYPE_FIELDS (type))
12531 {
12532 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12533 return 64;
12534 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12535 return 128;
12536 }
12537 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12538 || TREE_CODE (type) == INTEGER_TYPE)
12539 {
12540 if (TYPE_MODE (type) == DFmode && align < 64)
12541 return 64;
12542 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12543 return 128;
12544 }
12545
12546 return align;
12547}
12548
12549/* Compute the alignment for a local variable.
12550 TYPE is the data type, and ALIGN is the alignment that
12551 the object would ordinarily have. The value of this macro is used
12552 instead of that alignment to align the object. */
12553
12554int
b96a374d 12555ix86_local_alignment (tree type, int align)
a7180f70 12556{
0d7d98ee
JH
12557 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12558 to 16byte boundary. */
12559 if (TARGET_64BIT)
12560 {
12561 if (AGGREGATE_TYPE_P (type)
12562 && TYPE_SIZE (type)
12563 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12564 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12565 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12566 return 128;
12567 }
a7180f70
BS
12568 if (TREE_CODE (type) == ARRAY_TYPE)
12569 {
12570 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12571 return 64;
12572 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12573 return 128;
12574 }
12575 else if (TREE_CODE (type) == COMPLEX_TYPE)
12576 {
12577 if (TYPE_MODE (type) == DCmode && align < 64)
12578 return 64;
12579 if (TYPE_MODE (type) == XCmode && align < 128)
12580 return 128;
12581 }
12582 else if ((TREE_CODE (type) == RECORD_TYPE
12583 || TREE_CODE (type) == UNION_TYPE
12584 || TREE_CODE (type) == QUAL_UNION_TYPE)
12585 && TYPE_FIELDS (type))
12586 {
12587 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12588 return 64;
12589 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12590 return 128;
12591 }
12592 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12593 || TREE_CODE (type) == INTEGER_TYPE)
12594 {
0f290768 12595
a7180f70
BS
12596 if (TYPE_MODE (type) == DFmode && align < 64)
12597 return 64;
12598 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12599 return 128;
12600 }
12601 return align;
12602}
0ed08620
JH
12603\f
12604/* Emit RTL insns to initialize the variable parts of a trampoline.
12605 FNADDR is an RTX for the address of the function's pure code.
12606 CXT is an RTX for the static chain value for the function. */
12607void
b96a374d 12608x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
0ed08620
JH
12609{
12610 if (!TARGET_64BIT)
12611 {
12612 /* Compute offset from the end of the jmp to the target function. */
12613 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12614 plus_constant (tramp, 10),
12615 NULL_RTX, 1, OPTAB_DIRECT);
12616 emit_move_insn (gen_rtx_MEM (QImode, tramp),
d8bf17f9 12617 gen_int_mode (0xb9, QImode));
0ed08620
JH
12618 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12619 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
d8bf17f9 12620 gen_int_mode (0xe9, QImode));
0ed08620
JH
12621 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12622 }
12623 else
12624 {
12625 int offset = 0;
12626 /* Try to load address using shorter movl instead of movabs.
12627 We may want to support movq for kernel mode, but kernel does not use
12628 trampolines at the moment. */
12629 if (x86_64_zero_extended_value (fnaddr))
12630 {
12631 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12632 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12633 gen_int_mode (0xbb41, HImode));
0ed08620
JH
12634 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12635 gen_lowpart (SImode, fnaddr));
12636 offset += 6;
12637 }
12638 else
12639 {
12640 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12641 gen_int_mode (0xbb49, HImode));
0ed08620
JH
12642 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12643 fnaddr);
12644 offset += 10;
12645 }
12646 /* Load static chain using movabs to r10. */
12647 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12648 gen_int_mode (0xba49, HImode));
0ed08620
JH
12649 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12650 cxt);
12651 offset += 10;
12652 /* Jump to the r11 */
12653 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12654 gen_int_mode (0xff49, HImode));
0ed08620 12655 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
d8bf17f9 12656 gen_int_mode (0xe3, QImode));
0ed08620
JH
12657 offset += 3;
12658 if (offset > TRAMPOLINE_SIZE)
b531087a 12659 abort ();
0ed08620 12660 }
5791cc29
JT
12661
12662#ifdef TRANSFER_FROM_TRAMPOLINE
f84d109f 12663 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
5791cc29
JT
12664 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12665#endif
0ed08620 12666}
eeb06b1b 12667\f
6e34d3a3
JM
12668#define def_builtin(MASK, NAME, TYPE, CODE) \
12669do { \
12670 if ((MASK) & target_flags \
12671 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12672 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12673 NULL, NULL_TREE); \
eeb06b1b 12674} while (0)
bd793c65 12675
bd793c65
BS
12676struct builtin_description
12677{
8b60264b
KG
12678 const unsigned int mask;
12679 const enum insn_code icode;
12680 const char *const name;
12681 const enum ix86_builtins code;
12682 const enum rtx_code comparison;
12683 const unsigned int flag;
bd793c65
BS
12684};
12685
8b60264b 12686static const struct builtin_description bdesc_comi[] =
bd793c65 12687{
37f22004
L
12688 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12689 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12690 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12691 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12692 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12693 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12694 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12695 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12696 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12697 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12698 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12699 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
1194ca05
JH
12700 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12701 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12702 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12703 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12704 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12705 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12706 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12707 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12708 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12709 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12710 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12711 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
bd793c65
BS
12712};
12713
8b60264b 12714static const struct builtin_description bdesc_2arg[] =
bd793c65
BS
12715{
12716 /* SSE */
37f22004
L
12717 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12718 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12719 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12720 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12721 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12722 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12723 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12724 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12725
12726 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12727 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12728 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12729 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12730 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12731 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12732 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12733 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12734 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12735 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12736 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12737 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12738 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12739 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12740 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12741 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12742 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12743 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12744 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12745 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12746
12747 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12748 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12749 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12750 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12751
12752 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12753 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12754 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12755 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12756
12757 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12758 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12759 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12760 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12761 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
bd793c65
BS
12762
12763 /* MMX */
eeb06b1b
BS
12764 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12765 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12766 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
d50672ef 12767 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
eeb06b1b
BS
12768 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12769 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12770 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
d50672ef 12771 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
eeb06b1b
BS
12772
12773 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12774 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12775 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12776 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12777 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12778 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12779 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12780 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12781
12782 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12783 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
37f22004 12784 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
eeb06b1b
BS
12785
12786 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12787 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12788 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12789 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12790
37f22004
L
12791 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12792 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
eeb06b1b
BS
12793
12794 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12795 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12796 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12797 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12798 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12799 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12800
37f22004
L
12801 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12802 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12803 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12804 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
eeb06b1b
BS
12805
12806 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12807 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12808 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12809 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12810 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12811 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
bd793c65
BS
12812
12813 /* Special. */
eeb06b1b
BS
12814 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12815 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12816 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12817
37f22004
L
12818 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12819 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12820 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
eeb06b1b
BS
12821
12822 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12823 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12824 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12825 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12826 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12827 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12828
12829 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12830 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12831 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12832 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12833 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12834 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12835
12836 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12837 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12838 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12839 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12840
37f22004 12841 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
fbe5eb6d
BS
12842 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12843
12844 /* SSE2 */
12845 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12846 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12847 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12848 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12849 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12850 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12851 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12852 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12853
12854 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12855 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12856 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12857 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12858 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12859 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12860 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12861 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12862 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12863 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12864 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12865 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12866 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12867 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12868 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
fbe5eb6d
BS
12869 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12870 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12871 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12872 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
fbe5eb6d
BS
12873 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12874
12875 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12876 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12877 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12878 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12879
1877be45
JH
12880 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12881 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12882 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12883 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
fbe5eb6d
BS
12884
12885 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12886 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12887 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12888
12889 /* SSE2 MMX */
12890 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12891 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12892 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
d50672ef 12893 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
fbe5eb6d
BS
12894 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12895 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12896 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
d50672ef 12897 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
fbe5eb6d
BS
12898
12899 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12900 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12901 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12902 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12903 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12904 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12905 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12906 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12907
12908 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12909 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
fbe5eb6d 12910
916b60b7
BS
12911 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12912 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12913 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12914 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
fbe5eb6d
BS
12915
12916 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12917 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12918
12919 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12920 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12921 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12922 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12923 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12924 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12925
12926 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12927 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12928 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12929 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12930
12931 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12932 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12933 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
077084dd 12934 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
fbe5eb6d
BS
12935 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12936 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12937 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
f02e1358 12938 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
fbe5eb6d 12939
916b60b7
BS
12940 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12941 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12942 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12943
12944 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12945 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12946
9e9fb0ce
JB
12947 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
12948 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
12949
916b60b7
BS
12950 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12951 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12952 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12953 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12954 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12955 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12956
12957 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12958 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12959 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12960 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12961 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12962 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12963
12964 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12965 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12966 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12967 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12968
12969 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12970
fbe5eb6d 12971 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
37f22004 12972 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
fbe5eb6d 12973 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
22c7c85e
L
12974 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
12975
9e200aaf
KC
12976 /* SSE3 MMX */
12977 { MASK_SSE3, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
12978 { MASK_SSE3, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
12979 { MASK_SSE3, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
12980 { MASK_SSE3, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
12981 { MASK_SSE3, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
12982 { MASK_SSE3, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
bd793c65
BS
12983};
12984
8b60264b 12985static const struct builtin_description bdesc_1arg[] =
bd793c65 12986{
37f22004
L
12987 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12988 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
fbe5eb6d 12989
37f22004
L
12990 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12991 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12992 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
fbe5eb6d 12993
37f22004
L
12994 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12995 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12996 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
12997 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12998 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12999 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
fbe5eb6d
BS
13000
13001 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
13002 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
13003 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
f02e1358 13004 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
fbe5eb6d
BS
13005
13006 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
13007
13008 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
13009 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
bd793c65 13010
fbe5eb6d
BS
13011 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
13012 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
13013 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
13014 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
13015 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
bd793c65 13016
fbe5eb6d 13017 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
bd793c65 13018
fbe5eb6d
BS
13019 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
13020 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
37f22004
L
13021 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
13022 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
fbe5eb6d
BS
13023
13024 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
13025 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
f02e1358
JH
13026 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
13027
22c7c85e
L
13028 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
13029
9e200aaf
KC
13030 /* SSE3 */
13031 { MASK_SSE3, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
13032 { MASK_SSE3, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
13033 { MASK_SSE3, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
bd793c65
BS
13034};
13035
f6155fda 13036void
b96a374d 13037ix86_init_builtins (void)
f6155fda
SS
13038{
13039 if (TARGET_MMX)
13040 ix86_init_mmx_sse_builtins ();
13041}
13042
13043/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
bd793c65
BS
13044 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13045 builtins. */
e37af218 13046static void
b96a374d 13047ix86_init_mmx_sse_builtins (void)
bd793c65 13048{
8b60264b 13049 const struct builtin_description * d;
77ebd435 13050 size_t i;
bd793c65 13051
4a5eab38
PB
13052 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
13053 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
13054 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
13055 tree V2DI_type_node = build_vector_type_for_mode (intDI_type_node, V2DImode);
13056 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
13057 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
13058 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
13059 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
13060 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
13061 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
13062
bd793c65 13063 tree pchar_type_node = build_pointer_type (char_type_node);
068f5dea
JH
13064 tree pcchar_type_node = build_pointer_type (
13065 build_type_variant (char_type_node, 1, 0));
bd793c65 13066 tree pfloat_type_node = build_pointer_type (float_type_node);
068f5dea
JH
13067 tree pcfloat_type_node = build_pointer_type (
13068 build_type_variant (float_type_node, 1, 0));
bd793c65 13069 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
916b60b7 13070 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
bd793c65
BS
13071 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13072
13073 /* Comparisons. */
13074 tree int_ftype_v4sf_v4sf
b4de2f7d
AH
13075 = build_function_type_list (integer_type_node,
13076 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13077 tree v4si_ftype_v4sf_v4sf
b4de2f7d
AH
13078 = build_function_type_list (V4SI_type_node,
13079 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13080 /* MMX/SSE/integer conversions. */
bd793c65 13081 tree int_ftype_v4sf
b4de2f7d
AH
13082 = build_function_type_list (integer_type_node,
13083 V4SF_type_node, NULL_TREE);
453ee231
JH
13084 tree int64_ftype_v4sf
13085 = build_function_type_list (long_long_integer_type_node,
13086 V4SF_type_node, NULL_TREE);
bd793c65 13087 tree int_ftype_v8qi
b4de2f7d 13088 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
bd793c65 13089 tree v4sf_ftype_v4sf_int
b4de2f7d
AH
13090 = build_function_type_list (V4SF_type_node,
13091 V4SF_type_node, integer_type_node, NULL_TREE);
453ee231
JH
13092 tree v4sf_ftype_v4sf_int64
13093 = build_function_type_list (V4SF_type_node,
13094 V4SF_type_node, long_long_integer_type_node,
13095 NULL_TREE);
bd793c65 13096 tree v4sf_ftype_v4sf_v2si
b4de2f7d
AH
13097 = build_function_type_list (V4SF_type_node,
13098 V4SF_type_node, V2SI_type_node, NULL_TREE);
bd793c65 13099 tree int_ftype_v4hi_int
b4de2f7d
AH
13100 = build_function_type_list (integer_type_node,
13101 V4HI_type_node, integer_type_node, NULL_TREE);
bd793c65 13102 tree v4hi_ftype_v4hi_int_int
e7a60f56 13103 = build_function_type_list (V4HI_type_node, V4HI_type_node,
b4de2f7d
AH
13104 integer_type_node, integer_type_node,
13105 NULL_TREE);
bd793c65
BS
13106 /* Miscellaneous. */
13107 tree v8qi_ftype_v4hi_v4hi
b4de2f7d
AH
13108 = build_function_type_list (V8QI_type_node,
13109 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 13110 tree v4hi_ftype_v2si_v2si
b4de2f7d
AH
13111 = build_function_type_list (V4HI_type_node,
13112 V2SI_type_node, V2SI_type_node, NULL_TREE);
bd793c65 13113 tree v4sf_ftype_v4sf_v4sf_int
b4de2f7d
AH
13114 = build_function_type_list (V4SF_type_node,
13115 V4SF_type_node, V4SF_type_node,
13116 integer_type_node, NULL_TREE);
bd793c65 13117 tree v2si_ftype_v4hi_v4hi
b4de2f7d
AH
13118 = build_function_type_list (V2SI_type_node,
13119 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 13120 tree v4hi_ftype_v4hi_int
b4de2f7d 13121 = build_function_type_list (V4HI_type_node,
e7a60f56 13122 V4HI_type_node, integer_type_node, NULL_TREE);
bd793c65 13123 tree v4hi_ftype_v4hi_di
b4de2f7d
AH
13124 = build_function_type_list (V4HI_type_node,
13125 V4HI_type_node, long_long_unsigned_type_node,
13126 NULL_TREE);
bd793c65 13127 tree v2si_ftype_v2si_di
b4de2f7d
AH
13128 = build_function_type_list (V2SI_type_node,
13129 V2SI_type_node, long_long_unsigned_type_node,
13130 NULL_TREE);
bd793c65 13131 tree void_ftype_void
b4de2f7d 13132 = build_function_type (void_type_node, void_list_node);
bd793c65 13133 tree void_ftype_unsigned
b4de2f7d 13134 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
22c7c85e
L
13135 tree void_ftype_unsigned_unsigned
13136 = build_function_type_list (void_type_node, unsigned_type_node,
13137 unsigned_type_node, NULL_TREE);
13138 tree void_ftype_pcvoid_unsigned_unsigned
13139 = build_function_type_list (void_type_node, const_ptr_type_node,
13140 unsigned_type_node, unsigned_type_node,
13141 NULL_TREE);
bd793c65 13142 tree unsigned_ftype_void
b4de2f7d 13143 = build_function_type (unsigned_type_node, void_list_node);
bd793c65 13144 tree di_ftype_void
b4de2f7d 13145 = build_function_type (long_long_unsigned_type_node, void_list_node);
e37af218 13146 tree v4sf_ftype_void
b4de2f7d 13147 = build_function_type (V4SF_type_node, void_list_node);
bd793c65 13148 tree v2si_ftype_v4sf
b4de2f7d 13149 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13150 /* Loads/stores. */
bd793c65 13151 tree void_ftype_v8qi_v8qi_pchar
b4de2f7d
AH
13152 = build_function_type_list (void_type_node,
13153 V8QI_type_node, V8QI_type_node,
13154 pchar_type_node, NULL_TREE);
068f5dea
JH
13155 tree v4sf_ftype_pcfloat
13156 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
bd793c65
BS
13157 /* @@@ the type is bogus */
13158 tree v4sf_ftype_v4sf_pv2si
b4de2f7d 13159 = build_function_type_list (V4SF_type_node,
f8ca7923 13160 V4SF_type_node, pv2si_type_node, NULL_TREE);
1255c85c 13161 tree void_ftype_pv2si_v4sf
b4de2f7d 13162 = build_function_type_list (void_type_node,
f8ca7923 13163 pv2si_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13164 tree void_ftype_pfloat_v4sf
b4de2f7d
AH
13165 = build_function_type_list (void_type_node,
13166 pfloat_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13167 tree void_ftype_pdi_di
b4de2f7d
AH
13168 = build_function_type_list (void_type_node,
13169 pdi_type_node, long_long_unsigned_type_node,
13170 NULL_TREE);
916b60b7 13171 tree void_ftype_pv2di_v2di
b4de2f7d
AH
13172 = build_function_type_list (void_type_node,
13173 pv2di_type_node, V2DI_type_node, NULL_TREE);
bd793c65
BS
13174 /* Normal vector unops. */
13175 tree v4sf_ftype_v4sf
b4de2f7d 13176 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
0f290768 13177
bd793c65
BS
13178 /* Normal vector binops. */
13179 tree v4sf_ftype_v4sf_v4sf
b4de2f7d
AH
13180 = build_function_type_list (V4SF_type_node,
13181 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13182 tree v8qi_ftype_v8qi_v8qi
b4de2f7d
AH
13183 = build_function_type_list (V8QI_type_node,
13184 V8QI_type_node, V8QI_type_node, NULL_TREE);
bd793c65 13185 tree v4hi_ftype_v4hi_v4hi
b4de2f7d
AH
13186 = build_function_type_list (V4HI_type_node,
13187 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 13188 tree v2si_ftype_v2si_v2si
b4de2f7d
AH
13189 = build_function_type_list (V2SI_type_node,
13190 V2SI_type_node, V2SI_type_node, NULL_TREE);
bd793c65 13191 tree di_ftype_di_di
b4de2f7d
AH
13192 = build_function_type_list (long_long_unsigned_type_node,
13193 long_long_unsigned_type_node,
13194 long_long_unsigned_type_node, NULL_TREE);
bd793c65 13195
47f339cf 13196 tree v2si_ftype_v2sf
ae3aa00d 13197 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
47f339cf 13198 tree v2sf_ftype_v2si
b4de2f7d 13199 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
47f339cf 13200 tree v2si_ftype_v2si
b4de2f7d 13201 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
47f339cf 13202 tree v2sf_ftype_v2sf
b4de2f7d 13203 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 13204 tree v2sf_ftype_v2sf_v2sf
b4de2f7d
AH
13205 = build_function_type_list (V2SF_type_node,
13206 V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 13207 tree v2si_ftype_v2sf_v2sf
b4de2f7d
AH
13208 = build_function_type_list (V2SI_type_node,
13209 V2SF_type_node, V2SF_type_node, NULL_TREE);
fbe5eb6d 13210 tree pint_type_node = build_pointer_type (integer_type_node);
068f5dea
JH
13211 tree pcint_type_node = build_pointer_type (
13212 build_type_variant (integer_type_node, 1, 0));
fbe5eb6d 13213 tree pdouble_type_node = build_pointer_type (double_type_node);
068f5dea
JH
13214 tree pcdouble_type_node = build_pointer_type (
13215 build_type_variant (double_type_node, 1, 0));
fbe5eb6d 13216 tree int_ftype_v2df_v2df
b4de2f7d
AH
13217 = build_function_type_list (integer_type_node,
13218 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d
BS
13219
13220 tree ti_ftype_void
b4de2f7d 13221 = build_function_type (intTI_type_node, void_list_node);
f02e1358
JH
13222 tree v2di_ftype_void
13223 = build_function_type (V2DI_type_node, void_list_node);
fbe5eb6d 13224 tree ti_ftype_ti_ti
b4de2f7d
AH
13225 = build_function_type_list (intTI_type_node,
13226 intTI_type_node, intTI_type_node, NULL_TREE);
068f5dea
JH
13227 tree void_ftype_pcvoid
13228 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
fbe5eb6d 13229 tree v2di_ftype_di
b4de2f7d
AH
13230 = build_function_type_list (V2DI_type_node,
13231 long_long_unsigned_type_node, NULL_TREE);
f02e1358
JH
13232 tree di_ftype_v2di
13233 = build_function_type_list (long_long_unsigned_type_node,
13234 V2DI_type_node, NULL_TREE);
fbe5eb6d 13235 tree v4sf_ftype_v4si
b4de2f7d 13236 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 13237 tree v4si_ftype_v4sf
b4de2f7d 13238 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 13239 tree v2df_ftype_v4si
b4de2f7d 13240 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 13241 tree v4si_ftype_v2df
b4de2f7d 13242 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13243 tree v2si_ftype_v2df
b4de2f7d 13244 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13245 tree v4sf_ftype_v2df
b4de2f7d 13246 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13247 tree v2df_ftype_v2si
b4de2f7d 13248 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
fbe5eb6d 13249 tree v2df_ftype_v4sf
b4de2f7d 13250 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 13251 tree int_ftype_v2df
b4de2f7d 13252 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
453ee231
JH
13253 tree int64_ftype_v2df
13254 = build_function_type_list (long_long_integer_type_node,
b96a374d 13255 V2DF_type_node, NULL_TREE);
fbe5eb6d 13256 tree v2df_ftype_v2df_int
b4de2f7d
AH
13257 = build_function_type_list (V2DF_type_node,
13258 V2DF_type_node, integer_type_node, NULL_TREE);
453ee231
JH
13259 tree v2df_ftype_v2df_int64
13260 = build_function_type_list (V2DF_type_node,
13261 V2DF_type_node, long_long_integer_type_node,
13262 NULL_TREE);
fbe5eb6d 13263 tree v4sf_ftype_v4sf_v2df
b4de2f7d
AH
13264 = build_function_type_list (V4SF_type_node,
13265 V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13266 tree v2df_ftype_v2df_v4sf
b4de2f7d
AH
13267 = build_function_type_list (V2DF_type_node,
13268 V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 13269 tree v2df_ftype_v2df_v2df_int
b4de2f7d
AH
13270 = build_function_type_list (V2DF_type_node,
13271 V2DF_type_node, V2DF_type_node,
13272 integer_type_node,
13273 NULL_TREE);
fbe5eb6d 13274 tree v2df_ftype_v2df_pv2si
b4de2f7d
AH
13275 = build_function_type_list (V2DF_type_node,
13276 V2DF_type_node, pv2si_type_node, NULL_TREE);
fbe5eb6d 13277 tree void_ftype_pv2si_v2df
b4de2f7d
AH
13278 = build_function_type_list (void_type_node,
13279 pv2si_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13280 tree void_ftype_pdouble_v2df
b4de2f7d
AH
13281 = build_function_type_list (void_type_node,
13282 pdouble_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13283 tree void_ftype_pint_int
b4de2f7d
AH
13284 = build_function_type_list (void_type_node,
13285 pint_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 13286 tree void_ftype_v16qi_v16qi_pchar
b4de2f7d
AH
13287 = build_function_type_list (void_type_node,
13288 V16QI_type_node, V16QI_type_node,
13289 pchar_type_node, NULL_TREE);
068f5dea
JH
13290 tree v2df_ftype_pcdouble
13291 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
fbe5eb6d 13292 tree v2df_ftype_v2df_v2df
b4de2f7d
AH
13293 = build_function_type_list (V2DF_type_node,
13294 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13295 tree v16qi_ftype_v16qi_v16qi
b4de2f7d
AH
13296 = build_function_type_list (V16QI_type_node,
13297 V16QI_type_node, V16QI_type_node, NULL_TREE);
fbe5eb6d 13298 tree v8hi_ftype_v8hi_v8hi
b4de2f7d
AH
13299 = build_function_type_list (V8HI_type_node,
13300 V8HI_type_node, V8HI_type_node, NULL_TREE);
fbe5eb6d 13301 tree v4si_ftype_v4si_v4si
b4de2f7d
AH
13302 = build_function_type_list (V4SI_type_node,
13303 V4SI_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 13304 tree v2di_ftype_v2di_v2di
b4de2f7d
AH
13305 = build_function_type_list (V2DI_type_node,
13306 V2DI_type_node, V2DI_type_node, NULL_TREE);
fbe5eb6d 13307 tree v2di_ftype_v2df_v2df
b4de2f7d
AH
13308 = build_function_type_list (V2DI_type_node,
13309 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13310 tree v2df_ftype_v2df
b4de2f7d 13311 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13312 tree v2df_ftype_double
b4de2f7d 13313 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
fbe5eb6d 13314 tree v2df_ftype_double_double
b4de2f7d
AH
13315 = build_function_type_list (V2DF_type_node,
13316 double_type_node, double_type_node, NULL_TREE);
fbe5eb6d 13317 tree int_ftype_v8hi_int
b4de2f7d
AH
13318 = build_function_type_list (integer_type_node,
13319 V8HI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 13320 tree v8hi_ftype_v8hi_int_int
b4de2f7d
AH
13321 = build_function_type_list (V8HI_type_node,
13322 V8HI_type_node, integer_type_node,
13323 integer_type_node, NULL_TREE);
916b60b7 13324 tree v2di_ftype_v2di_int
b4de2f7d
AH
13325 = build_function_type_list (V2DI_type_node,
13326 V2DI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 13327 tree v4si_ftype_v4si_int
b4de2f7d
AH
13328 = build_function_type_list (V4SI_type_node,
13329 V4SI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 13330 tree v8hi_ftype_v8hi_int
b4de2f7d
AH
13331 = build_function_type_list (V8HI_type_node,
13332 V8HI_type_node, integer_type_node, NULL_TREE);
916b60b7 13333 tree v8hi_ftype_v8hi_v2di
b4de2f7d
AH
13334 = build_function_type_list (V8HI_type_node,
13335 V8HI_type_node, V2DI_type_node, NULL_TREE);
916b60b7 13336 tree v4si_ftype_v4si_v2di
b4de2f7d
AH
13337 = build_function_type_list (V4SI_type_node,
13338 V4SI_type_node, V2DI_type_node, NULL_TREE);
916b60b7 13339 tree v4si_ftype_v8hi_v8hi
b4de2f7d
AH
13340 = build_function_type_list (V4SI_type_node,
13341 V8HI_type_node, V8HI_type_node, NULL_TREE);
916b60b7 13342 tree di_ftype_v8qi_v8qi
b4de2f7d
AH
13343 = build_function_type_list (long_long_unsigned_type_node,
13344 V8QI_type_node, V8QI_type_node, NULL_TREE);
9e9fb0ce
JB
13345 tree di_ftype_v2si_v2si
13346 = build_function_type_list (long_long_unsigned_type_node,
13347 V2SI_type_node, V2SI_type_node, NULL_TREE);
916b60b7 13348 tree v2di_ftype_v16qi_v16qi
b4de2f7d
AH
13349 = build_function_type_list (V2DI_type_node,
13350 V16QI_type_node, V16QI_type_node, NULL_TREE);
9e9fb0ce
JB
13351 tree v2di_ftype_v4si_v4si
13352 = build_function_type_list (V2DI_type_node,
13353 V4SI_type_node, V4SI_type_node, NULL_TREE);
916b60b7 13354 tree int_ftype_v16qi
b4de2f7d 13355 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
068f5dea
JH
13356 tree v16qi_ftype_pcchar
13357 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
f02e1358
JH
13358 tree void_ftype_pchar_v16qi
13359 = build_function_type_list (void_type_node,
13360 pchar_type_node, V16QI_type_node, NULL_TREE);
068f5dea
JH
13361 tree v4si_ftype_pcint
13362 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13363 tree void_ftype_pcint_v4si
f02e1358 13364 = build_function_type_list (void_type_node,
068f5dea 13365 pcint_type_node, V4SI_type_node, NULL_TREE);
f02e1358
JH
13366 tree v2di_ftype_v2di
13367 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
47f339cf 13368
f8a1ebc6
JH
13369 tree float80_type;
13370 tree float128_type;
13371
13372 /* The __float80 type. */
13373 if (TYPE_MODE (long_double_type_node) == XFmode)
13374 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
13375 "__float80");
13376 else
13377 {
13378 /* The __float80 type. */
13379 float80_type = make_node (REAL_TYPE);
13380 TYPE_PRECISION (float80_type) = 96;
13381 layout_type (float80_type);
13382 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
13383 }
13384
13385 float128_type = make_node (REAL_TYPE);
13386 TYPE_PRECISION (float128_type) = 128;
13387 layout_type (float128_type);
13388 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
13389
bd793c65
BS
13390 /* Add all builtins that are more or less simple operations on two
13391 operands. */
ca7558fc 13392 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
13393 {
13394 /* Use one of the operands; the target can have a different mode for
13395 mask-generating compares. */
13396 enum machine_mode mode;
13397 tree type;
13398
13399 if (d->name == 0)
13400 continue;
13401 mode = insn_data[d->icode].operand[1].mode;
13402
bd793c65
BS
13403 switch (mode)
13404 {
fbe5eb6d
BS
13405 case V16QImode:
13406 type = v16qi_ftype_v16qi_v16qi;
13407 break;
13408 case V8HImode:
13409 type = v8hi_ftype_v8hi_v8hi;
13410 break;
13411 case V4SImode:
13412 type = v4si_ftype_v4si_v4si;
13413 break;
13414 case V2DImode:
13415 type = v2di_ftype_v2di_v2di;
13416 break;
13417 case V2DFmode:
13418 type = v2df_ftype_v2df_v2df;
13419 break;
13420 case TImode:
13421 type = ti_ftype_ti_ti;
13422 break;
bd793c65
BS
13423 case V4SFmode:
13424 type = v4sf_ftype_v4sf_v4sf;
13425 break;
13426 case V8QImode:
13427 type = v8qi_ftype_v8qi_v8qi;
13428 break;
13429 case V4HImode:
13430 type = v4hi_ftype_v4hi_v4hi;
13431 break;
13432 case V2SImode:
13433 type = v2si_ftype_v2si_v2si;
13434 break;
bd793c65
BS
13435 case DImode:
13436 type = di_ftype_di_di;
13437 break;
13438
13439 default:
13440 abort ();
13441 }
0f290768 13442
bd793c65
BS
13443 /* Override for comparisons. */
13444 if (d->icode == CODE_FOR_maskcmpv4sf3
13445 || d->icode == CODE_FOR_maskncmpv4sf3
13446 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13447 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13448 type = v4si_ftype_v4sf_v4sf;
13449
fbe5eb6d
BS
13450 if (d->icode == CODE_FOR_maskcmpv2df3
13451 || d->icode == CODE_FOR_maskncmpv2df3
13452 || d->icode == CODE_FOR_vmmaskcmpv2df3
13453 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13454 type = v2di_ftype_v2df_v2df;
13455
eeb06b1b 13456 def_builtin (d->mask, d->name, type, d->code);
bd793c65
BS
13457 }
13458
13459 /* Add the remaining MMX insns with somewhat more complicated types. */
eeb06b1b
BS
13460 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13461 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
eeb06b1b
BS
13462 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13463 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13464 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13465
13466 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13467 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13468 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13469
13470 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13471 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13472
13473 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13474 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
0f290768 13475
bd793c65 13476 /* comi/ucomi insns. */
ca7558fc 13477 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
fbe5eb6d
BS
13478 if (d->mask == MASK_SSE2)
13479 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13480 else
13481 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
bd793c65 13482
1255c85c
BS
13483 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13484 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13485 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
bd793c65 13486
37f22004
L
13487 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13488 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13489 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13490 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13491 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13492 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13493 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13494 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13495 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13496 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13497 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13498
13499 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13500 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13501
13502 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13503
13504 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13505 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13506 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13507 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13508 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13509 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13510
13511 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13512 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13513 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13514 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13515
13516 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13517 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13518 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13519 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13520
13521 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13522
13523 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13524
13525 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13526 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13527 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13528 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13529 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13530 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13531
13532 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
bd793c65 13533
47f339cf
BS
13534 /* Original 3DNow! */
13535 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13536 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13537 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13538 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13539 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13540 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13541 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13542 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13543 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13544 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13545 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13546 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13547 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13548 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13549 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13550 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13551 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13552 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13553 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13554 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
47f339cf
BS
13555
13556 /* 3DNow! extension as used in the Athlon CPU. */
13557 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13558 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13559 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13560 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13561 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13562 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13563
37f22004 13564 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
fbe5eb6d
BS
13565
13566 /* SSE2 */
13567 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13568 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13569
13570 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13571 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
f02e1358 13572 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
fbe5eb6d 13573
068f5dea
JH
13574 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13575 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13576 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
fbe5eb6d
BS
13577 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13578 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13579 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13580
13581 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13582 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13583 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13584 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13585
13586 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
916b60b7 13587 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
fbe5eb6d
BS
13588 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13589 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
916b60b7 13590 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
fbe5eb6d
BS
13591
13592 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13593 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13594 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
916b60b7 13595 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
fbe5eb6d
BS
13596
13597 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13598 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13599
13600 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13601
13602 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
916b60b7 13603 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
fbe5eb6d
BS
13604
13605 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13606 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13607 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13608 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13609 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13610
13611 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13612
13613 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13614 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
37f22004
L
13615 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13616 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
fbe5eb6d
BS
13617
13618 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13619 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13620 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13621
13622 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
37f22004 13623 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
fbe5eb6d
BS
13624 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13625 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13626
13627 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13628 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13629 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
068f5dea
JH
13630 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13631 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
fbe5eb6d
BS
13632 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13633 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13634
068f5dea 13635 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
fbe5eb6d
BS
13636 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13637 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
916b60b7 13638
068f5dea
JH
13639 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13640 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13641 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
f02e1358
JH
13642 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13643 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
068f5dea 13644 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
f02e1358
JH
13645 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13646
37f22004 13647 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
f02e1358 13648
9e9fb0ce
JB
13649 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
13650 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
13651
916b60b7
BS
13652 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13653 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13654 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13655
13656 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13657 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13658 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13659
13660 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13661 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13662
ab3146fd 13663 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
916b60b7
BS
13664 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13665 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13666 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13667
ab3146fd 13668 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
916b60b7
BS
13669 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13670 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13671 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13672
13673 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13674 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13675
13676 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
22c7c85e
L
13677
13678 /* Prescott New Instructions. */
9e200aaf 13679 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
22c7c85e
L
13680 void_ftype_pcvoid_unsigned_unsigned,
13681 IX86_BUILTIN_MONITOR);
9e200aaf 13682 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
22c7c85e
L
13683 void_ftype_unsigned_unsigned,
13684 IX86_BUILTIN_MWAIT);
9e200aaf 13685 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
22c7c85e
L
13686 v4sf_ftype_v4sf,
13687 IX86_BUILTIN_MOVSHDUP);
9e200aaf 13688 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
22c7c85e
L
13689 v4sf_ftype_v4sf,
13690 IX86_BUILTIN_MOVSLDUP);
9e200aaf 13691 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
22c7c85e 13692 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
9e200aaf 13693 def_builtin (MASK_SSE3, "__builtin_ia32_loadddup",
22c7c85e 13694 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
9e200aaf 13695 def_builtin (MASK_SSE3, "__builtin_ia32_movddup",
22c7c85e 13696 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
bd793c65
BS
13697}
13698
13699/* Errors in the source file can cause expand_expr to return const0_rtx
13700 where we expect a vector. To avoid crashing, use one of the vector
13701 clear instructions. */
13702static rtx
b96a374d 13703safe_vector_operand (rtx x, enum machine_mode mode)
bd793c65
BS
13704{
13705 if (x != const0_rtx)
13706 return x;
13707 x = gen_reg_rtx (mode);
13708
47f339cf 13709 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
bd793c65
BS
13710 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13711 : gen_rtx_SUBREG (DImode, x, 0)));
13712 else
e37af218 13713 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
4977bab6
ZW
13714 : gen_rtx_SUBREG (V4SFmode, x, 0),
13715 CONST0_RTX (V4SFmode)));
bd793c65
BS
13716 return x;
13717}
13718
13719/* Subroutine of ix86_expand_builtin to take care of binop insns. */
13720
13721static rtx
b96a374d 13722ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
bd793c65
BS
13723{
13724 rtx pat;
13725 tree arg0 = TREE_VALUE (arglist);
13726 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13727 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13728 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13729 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13730 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13731 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13732
13733 if (VECTOR_MODE_P (mode0))
13734 op0 = safe_vector_operand (op0, mode0);
13735 if (VECTOR_MODE_P (mode1))
13736 op1 = safe_vector_operand (op1, mode1);
13737
13738 if (! target
13739 || GET_MODE (target) != tmode
13740 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13741 target = gen_reg_rtx (tmode);
13742
d9deed68
JH
13743 if (GET_MODE (op1) == SImode && mode1 == TImode)
13744 {
13745 rtx x = gen_reg_rtx (V4SImode);
13746 emit_insn (gen_sse2_loadd (x, op1));
13747 op1 = gen_lowpart (TImode, x);
13748 }
13749
bd793c65
BS
13750 /* In case the insn wants input operands in modes different from
13751 the result, abort. */
ebe75517
JH
13752 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13753 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
bd793c65
BS
13754 abort ();
13755
13756 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13757 op0 = copy_to_mode_reg (mode0, op0);
13758 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13759 op1 = copy_to_mode_reg (mode1, op1);
13760
59bef189
RH
13761 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13762 yet one of the two must not be a memory. This is normally enforced
13763 by expanders, but we didn't bother to create one here. */
13764 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13765 op0 = copy_to_mode_reg (mode0, op0);
13766
bd793c65
BS
13767 pat = GEN_FCN (icode) (target, op0, op1);
13768 if (! pat)
13769 return 0;
13770 emit_insn (pat);
13771 return target;
13772}
13773
13774/* Subroutine of ix86_expand_builtin to take care of stores. */
13775
13776static rtx
b96a374d 13777ix86_expand_store_builtin (enum insn_code icode, tree arglist)
bd793c65
BS
13778{
13779 rtx pat;
13780 tree arg0 = TREE_VALUE (arglist);
13781 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13782 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13783 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13784 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13785 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13786
13787 if (VECTOR_MODE_P (mode1))
13788 op1 = safe_vector_operand (op1, mode1);
13789
13790 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
7f0e57bd 13791 op1 = copy_to_mode_reg (mode1, op1);
59bef189 13792
bd793c65
BS
13793 pat = GEN_FCN (icode) (op0, op1);
13794 if (pat)
13795 emit_insn (pat);
13796 return 0;
13797}
13798
13799/* Subroutine of ix86_expand_builtin to take care of unop insns. */
13800
13801static rtx
b96a374d
AJ
13802ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13803 rtx target, int do_load)
bd793c65
BS
13804{
13805 rtx pat;
13806 tree arg0 = TREE_VALUE (arglist);
13807 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13808 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13809 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13810
13811 if (! target
13812 || GET_MODE (target) != tmode
13813 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13814 target = gen_reg_rtx (tmode);
13815 if (do_load)
13816 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13817 else
13818 {
13819 if (VECTOR_MODE_P (mode0))
13820 op0 = safe_vector_operand (op0, mode0);
13821
13822 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13823 op0 = copy_to_mode_reg (mode0, op0);
13824 }
13825
13826 pat = GEN_FCN (icode) (target, op0);
13827 if (! pat)
13828 return 0;
13829 emit_insn (pat);
13830 return target;
13831}
13832
13833/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13834 sqrtss, rsqrtss, rcpss. */
13835
13836static rtx
b96a374d 13837ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
bd793c65
BS
13838{
13839 rtx pat;
13840 tree arg0 = TREE_VALUE (arglist);
59bef189 13841 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
bd793c65
BS
13842 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13843 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13844
13845 if (! target
13846 || GET_MODE (target) != tmode
13847 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13848 target = gen_reg_rtx (tmode);
13849
13850 if (VECTOR_MODE_P (mode0))
13851 op0 = safe_vector_operand (op0, mode0);
13852
13853 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13854 op0 = copy_to_mode_reg (mode0, op0);
fce5a9f2 13855
59bef189
RH
13856 op1 = op0;
13857 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13858 op1 = copy_to_mode_reg (mode0, op1);
fce5a9f2 13859
59bef189 13860 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
13861 if (! pat)
13862 return 0;
13863 emit_insn (pat);
13864 return target;
13865}
13866
13867/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13868
13869static rtx
b96a374d
AJ
13870ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13871 rtx target)
bd793c65
BS
13872{
13873 rtx pat;
13874 tree arg0 = TREE_VALUE (arglist);
13875 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13876 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13877 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13878 rtx op2;
13879 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13880 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13881 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13882 enum rtx_code comparison = d->comparison;
13883
13884 if (VECTOR_MODE_P (mode0))
13885 op0 = safe_vector_operand (op0, mode0);
13886 if (VECTOR_MODE_P (mode1))
13887 op1 = safe_vector_operand (op1, mode1);
13888
13889 /* Swap operands if we have a comparison that isn't available in
13890 hardware. */
13891 if (d->flag)
13892 {
21e1b5f1
BS
13893 rtx tmp = gen_reg_rtx (mode1);
13894 emit_move_insn (tmp, op1);
bd793c65 13895 op1 = op0;
21e1b5f1 13896 op0 = tmp;
bd793c65 13897 }
21e1b5f1
BS
13898
13899 if (! target
13900 || GET_MODE (target) != tmode
13901 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
bd793c65
BS
13902 target = gen_reg_rtx (tmode);
13903
13904 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13905 op0 = copy_to_mode_reg (mode0, op0);
13906 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13907 op1 = copy_to_mode_reg (mode1, op1);
13908
13909 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13910 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13911 if (! pat)
13912 return 0;
13913 emit_insn (pat);
13914 return target;
13915}
13916
13917/* Subroutine of ix86_expand_builtin to take care of comi insns. */
13918
13919static rtx
b96a374d
AJ
13920ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
13921 rtx target)
bd793c65
BS
13922{
13923 rtx pat;
13924 tree arg0 = TREE_VALUE (arglist);
13925 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13926 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13927 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13928 rtx op2;
13929 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13930 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13931 enum rtx_code comparison = d->comparison;
13932
13933 if (VECTOR_MODE_P (mode0))
13934 op0 = safe_vector_operand (op0, mode0);
13935 if (VECTOR_MODE_P (mode1))
13936 op1 = safe_vector_operand (op1, mode1);
13937
13938 /* Swap operands if we have a comparison that isn't available in
13939 hardware. */
13940 if (d->flag)
13941 {
13942 rtx tmp = op1;
13943 op1 = op0;
13944 op0 = tmp;
bd793c65
BS
13945 }
13946
13947 target = gen_reg_rtx (SImode);
13948 emit_move_insn (target, const0_rtx);
13949 target = gen_rtx_SUBREG (QImode, target, 0);
13950
13951 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13952 op0 = copy_to_mode_reg (mode0, op0);
13953 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13954 op1 = copy_to_mode_reg (mode1, op1);
13955
13956 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
1194ca05 13957 pat = GEN_FCN (d->icode) (op0, op1);
bd793c65
BS
13958 if (! pat)
13959 return 0;
13960 emit_insn (pat);
29628f27
BS
13961 emit_insn (gen_rtx_SET (VOIDmode,
13962 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13963 gen_rtx_fmt_ee (comparison, QImode,
1194ca05 13964 SET_DEST (pat),
29628f27 13965 const0_rtx)));
bd793c65 13966
6f1a6c5b 13967 return SUBREG_REG (target);
bd793c65
BS
13968}
13969
13970/* Expand an expression EXP that calls a built-in function,
13971 with result going to TARGET if that's convenient
13972 (and in mode MODE if that's convenient).
13973 SUBTARGET may be used as the target for computing one of EXP's operands.
13974 IGNORE is nonzero if the value is to be ignored. */
13975
13976rtx
b96a374d
AJ
13977ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
13978 enum machine_mode mode ATTRIBUTE_UNUSED,
13979 int ignore ATTRIBUTE_UNUSED)
bd793c65 13980{
8b60264b 13981 const struct builtin_description *d;
77ebd435 13982 size_t i;
bd793c65
BS
13983 enum insn_code icode;
13984 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13985 tree arglist = TREE_OPERAND (exp, 1);
e37af218 13986 tree arg0, arg1, arg2;
bd793c65
BS
13987 rtx op0, op1, op2, pat;
13988 enum machine_mode tmode, mode0, mode1, mode2;
8752c357 13989 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
bd793c65
BS
13990
13991 switch (fcode)
13992 {
13993 case IX86_BUILTIN_EMMS:
13994 emit_insn (gen_emms ());
13995 return 0;
13996
13997 case IX86_BUILTIN_SFENCE:
13998 emit_insn (gen_sfence ());
13999 return 0;
14000
bd793c65 14001 case IX86_BUILTIN_PEXTRW:
fbe5eb6d
BS
14002 case IX86_BUILTIN_PEXTRW128:
14003 icode = (fcode == IX86_BUILTIN_PEXTRW
14004 ? CODE_FOR_mmx_pextrw
14005 : CODE_FOR_sse2_pextrw);
bd793c65
BS
14006 arg0 = TREE_VALUE (arglist);
14007 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14008 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14009 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14010 tmode = insn_data[icode].operand[0].mode;
14011 mode0 = insn_data[icode].operand[1].mode;
14012 mode1 = insn_data[icode].operand[2].mode;
14013
14014 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14015 op0 = copy_to_mode_reg (mode0, op0);
14016 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14017 {
ebe75517
JH
14018 error ("selector must be an integer constant in the range 0..%i",
14019 fcode == IX86_BUILTIN_PEXTRW ? 3:7);
6f1a6c5b 14020 return gen_reg_rtx (tmode);
bd793c65
BS
14021 }
14022 if (target == 0
14023 || GET_MODE (target) != tmode
14024 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14025 target = gen_reg_rtx (tmode);
14026 pat = GEN_FCN (icode) (target, op0, op1);
14027 if (! pat)
14028 return 0;
14029 emit_insn (pat);
14030 return target;
14031
14032 case IX86_BUILTIN_PINSRW:
fbe5eb6d
BS
14033 case IX86_BUILTIN_PINSRW128:
14034 icode = (fcode == IX86_BUILTIN_PINSRW
14035 ? CODE_FOR_mmx_pinsrw
14036 : CODE_FOR_sse2_pinsrw);
bd793c65
BS
14037 arg0 = TREE_VALUE (arglist);
14038 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14039 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14040 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14041 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14042 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14043 tmode = insn_data[icode].operand[0].mode;
14044 mode0 = insn_data[icode].operand[1].mode;
14045 mode1 = insn_data[icode].operand[2].mode;
14046 mode2 = insn_data[icode].operand[3].mode;
14047
14048 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14049 op0 = copy_to_mode_reg (mode0, op0);
14050 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14051 op1 = copy_to_mode_reg (mode1, op1);
14052 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14053 {
ebe75517
JH
14054 error ("selector must be an integer constant in the range 0..%i",
14055 fcode == IX86_BUILTIN_PINSRW ? 15:255);
bd793c65
BS
14056 return const0_rtx;
14057 }
14058 if (target == 0
14059 || GET_MODE (target) != tmode
14060 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14061 target = gen_reg_rtx (tmode);
14062 pat = GEN_FCN (icode) (target, op0, op1, op2);
14063 if (! pat)
14064 return 0;
14065 emit_insn (pat);
14066 return target;
14067
14068 case IX86_BUILTIN_MASKMOVQ:
077084dd 14069 case IX86_BUILTIN_MASKMOVDQU:
fbe5eb6d
BS
14070 icode = (fcode == IX86_BUILTIN_MASKMOVQ
14071 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
f8ca7923
JH
14072 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
14073 : CODE_FOR_sse2_maskmovdqu));
bd793c65
BS
14074 /* Note the arg order is different from the operand order. */
14075 arg1 = TREE_VALUE (arglist);
14076 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14077 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14078 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14079 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14080 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14081 mode0 = insn_data[icode].operand[0].mode;
14082 mode1 = insn_data[icode].operand[1].mode;
14083 mode2 = insn_data[icode].operand[2].mode;
14084
5c464583 14085 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
bd793c65
BS
14086 op0 = copy_to_mode_reg (mode0, op0);
14087 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14088 op1 = copy_to_mode_reg (mode1, op1);
14089 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14090 op2 = copy_to_mode_reg (mode2, op2);
14091 pat = GEN_FCN (icode) (op0, op1, op2);
14092 if (! pat)
14093 return 0;
14094 emit_insn (pat);
14095 return 0;
14096
14097 case IX86_BUILTIN_SQRTSS:
14098 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
14099 case IX86_BUILTIN_RSQRTSS:
14100 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
14101 case IX86_BUILTIN_RCPSS:
14102 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
14103
14104 case IX86_BUILTIN_LOADAPS:
14105 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
14106
14107 case IX86_BUILTIN_LOADUPS:
14108 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14109
14110 case IX86_BUILTIN_STOREAPS:
e37af218 14111 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
f02e1358 14112
bd793c65 14113 case IX86_BUILTIN_STOREUPS:
e37af218 14114 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
bd793c65
BS
14115
14116 case IX86_BUILTIN_LOADSS:
14117 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
14118
14119 case IX86_BUILTIN_STORESS:
e37af218 14120 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
bd793c65 14121
0f290768 14122 case IX86_BUILTIN_LOADHPS:
bd793c65 14123 case IX86_BUILTIN_LOADLPS:
fbe5eb6d
BS
14124 case IX86_BUILTIN_LOADHPD:
14125 case IX86_BUILTIN_LOADLPD:
14126 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
14127 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
14128 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
997404de 14129 : CODE_FOR_sse2_movsd);
bd793c65
BS
14130 arg0 = TREE_VALUE (arglist);
14131 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14132 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14133 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14134 tmode = insn_data[icode].operand[0].mode;
14135 mode0 = insn_data[icode].operand[1].mode;
14136 mode1 = insn_data[icode].operand[2].mode;
14137
14138 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14139 op0 = copy_to_mode_reg (mode0, op0);
14140 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14141 if (target == 0
14142 || GET_MODE (target) != tmode
14143 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14144 target = gen_reg_rtx (tmode);
14145 pat = GEN_FCN (icode) (target, op0, op1);
14146 if (! pat)
14147 return 0;
14148 emit_insn (pat);
14149 return target;
0f290768 14150
bd793c65
BS
14151 case IX86_BUILTIN_STOREHPS:
14152 case IX86_BUILTIN_STORELPS:
fbe5eb6d
BS
14153 case IX86_BUILTIN_STOREHPD:
14154 case IX86_BUILTIN_STORELPD:
14155 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
14156 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
14157 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
997404de 14158 : CODE_FOR_sse2_movsd);
bd793c65
BS
14159 arg0 = TREE_VALUE (arglist);
14160 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14161 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14162 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14163 mode0 = insn_data[icode].operand[1].mode;
14164 mode1 = insn_data[icode].operand[2].mode;
14165
14166 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14167 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14168 op1 = copy_to_mode_reg (mode1, op1);
14169
14170 pat = GEN_FCN (icode) (op0, op0, op1);
14171 if (! pat)
14172 return 0;
14173 emit_insn (pat);
14174 return 0;
14175
14176 case IX86_BUILTIN_MOVNTPS:
e37af218 14177 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
bd793c65 14178 case IX86_BUILTIN_MOVNTQ:
e37af218 14179 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
bd793c65
BS
14180
14181 case IX86_BUILTIN_LDMXCSR:
14182 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14183 target = assign_386_stack_local (SImode, 0);
14184 emit_move_insn (target, op0);
14185 emit_insn (gen_ldmxcsr (target));
14186 return 0;
14187
14188 case IX86_BUILTIN_STMXCSR:
14189 target = assign_386_stack_local (SImode, 0);
14190 emit_insn (gen_stmxcsr (target));
14191 return copy_to_mode_reg (SImode, target);
14192
bd793c65 14193 case IX86_BUILTIN_SHUFPS:
fbe5eb6d
BS
14194 case IX86_BUILTIN_SHUFPD:
14195 icode = (fcode == IX86_BUILTIN_SHUFPS
14196 ? CODE_FOR_sse_shufps
14197 : CODE_FOR_sse2_shufpd);
bd793c65
BS
14198 arg0 = TREE_VALUE (arglist);
14199 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14200 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14201 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14202 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14203 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14204 tmode = insn_data[icode].operand[0].mode;
14205 mode0 = insn_data[icode].operand[1].mode;
14206 mode1 = insn_data[icode].operand[2].mode;
14207 mode2 = insn_data[icode].operand[3].mode;
14208
14209 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14210 op0 = copy_to_mode_reg (mode0, op0);
14211 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14212 op1 = copy_to_mode_reg (mode1, op1);
14213 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14214 {
14215 /* @@@ better error message */
14216 error ("mask must be an immediate");
6f1a6c5b 14217 return gen_reg_rtx (tmode);
bd793c65
BS
14218 }
14219 if (target == 0
14220 || GET_MODE (target) != tmode
14221 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14222 target = gen_reg_rtx (tmode);
14223 pat = GEN_FCN (icode) (target, op0, op1, op2);
14224 if (! pat)
14225 return 0;
14226 emit_insn (pat);
14227 return target;
14228
14229 case IX86_BUILTIN_PSHUFW:
fbe5eb6d
BS
14230 case IX86_BUILTIN_PSHUFD:
14231 case IX86_BUILTIN_PSHUFHW:
14232 case IX86_BUILTIN_PSHUFLW:
14233 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14234 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14235 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14236 : CODE_FOR_mmx_pshufw);
bd793c65
BS
14237 arg0 = TREE_VALUE (arglist);
14238 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14239 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14240 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14241 tmode = insn_data[icode].operand[0].mode;
29628f27
BS
14242 mode1 = insn_data[icode].operand[1].mode;
14243 mode2 = insn_data[icode].operand[2].mode;
bd793c65 14244
29628f27
BS
14245 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14246 op0 = copy_to_mode_reg (mode1, op0);
14247 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
bd793c65
BS
14248 {
14249 /* @@@ better error message */
14250 error ("mask must be an immediate");
14251 return const0_rtx;
14252 }
14253 if (target == 0
14254 || GET_MODE (target) != tmode
14255 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14256 target = gen_reg_rtx (tmode);
29628f27 14257 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
14258 if (! pat)
14259 return 0;
14260 emit_insn (pat);
14261 return target;
14262
ab3146fd
ZD
14263 case IX86_BUILTIN_PSLLDQI128:
14264 case IX86_BUILTIN_PSRLDQI128:
14265 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14266 : CODE_FOR_sse2_lshrti3);
14267 arg0 = TREE_VALUE (arglist);
14268 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14269 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14270 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14271 tmode = insn_data[icode].operand[0].mode;
14272 mode1 = insn_data[icode].operand[1].mode;
14273 mode2 = insn_data[icode].operand[2].mode;
14274
14275 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14276 {
14277 op0 = copy_to_reg (op0);
14278 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14279 }
14280 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14281 {
14282 error ("shift must be an immediate");
14283 return const0_rtx;
14284 }
14285 target = gen_reg_rtx (V2DImode);
14286 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14287 if (! pat)
14288 return 0;
14289 emit_insn (pat);
14290 return target;
14291
47f339cf
BS
14292 case IX86_BUILTIN_FEMMS:
14293 emit_insn (gen_femms ());
14294 return NULL_RTX;
14295
14296 case IX86_BUILTIN_PAVGUSB:
14297 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14298
14299 case IX86_BUILTIN_PF2ID:
14300 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14301
14302 case IX86_BUILTIN_PFACC:
14303 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14304
14305 case IX86_BUILTIN_PFADD:
14306 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14307
14308 case IX86_BUILTIN_PFCMPEQ:
14309 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14310
14311 case IX86_BUILTIN_PFCMPGE:
14312 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14313
14314 case IX86_BUILTIN_PFCMPGT:
14315 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14316
14317 case IX86_BUILTIN_PFMAX:
14318 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14319
14320 case IX86_BUILTIN_PFMIN:
14321 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14322
14323 case IX86_BUILTIN_PFMUL:
14324 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14325
14326 case IX86_BUILTIN_PFRCP:
14327 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14328
14329 case IX86_BUILTIN_PFRCPIT1:
14330 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14331
14332 case IX86_BUILTIN_PFRCPIT2:
14333 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14334
14335 case IX86_BUILTIN_PFRSQIT1:
14336 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14337
14338 case IX86_BUILTIN_PFRSQRT:
14339 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14340
14341 case IX86_BUILTIN_PFSUB:
14342 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14343
14344 case IX86_BUILTIN_PFSUBR:
14345 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14346
14347 case IX86_BUILTIN_PI2FD:
14348 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14349
14350 case IX86_BUILTIN_PMULHRW:
14351 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14352
47f339cf
BS
14353 case IX86_BUILTIN_PF2IW:
14354 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14355
14356 case IX86_BUILTIN_PFNACC:
14357 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14358
14359 case IX86_BUILTIN_PFPNACC:
14360 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14361
14362 case IX86_BUILTIN_PI2FW:
14363 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14364
14365 case IX86_BUILTIN_PSWAPDSI:
14366 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14367
14368 case IX86_BUILTIN_PSWAPDSF:
14369 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14370
e37af218
RH
14371 case IX86_BUILTIN_SSE_ZERO:
14372 target = gen_reg_rtx (V4SFmode);
4977bab6 14373 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
bd793c65
BS
14374 return target;
14375
bd793c65
BS
14376 case IX86_BUILTIN_MMX_ZERO:
14377 target = gen_reg_rtx (DImode);
14378 emit_insn (gen_mmx_clrdi (target));
14379 return target;
14380
f02e1358
JH
14381 case IX86_BUILTIN_CLRTI:
14382 target = gen_reg_rtx (V2DImode);
14383 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14384 return target;
14385
14386
fbe5eb6d
BS
14387 case IX86_BUILTIN_SQRTSD:
14388 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14389 case IX86_BUILTIN_LOADAPD:
14390 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14391 case IX86_BUILTIN_LOADUPD:
14392 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14393
14394 case IX86_BUILTIN_STOREAPD:
14395 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14396 case IX86_BUILTIN_STOREUPD:
14397 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14398
14399 case IX86_BUILTIN_LOADSD:
14400 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14401
14402 case IX86_BUILTIN_STORESD:
14403 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14404
14405 case IX86_BUILTIN_SETPD1:
14406 target = assign_386_stack_local (DFmode, 0);
14407 arg0 = TREE_VALUE (arglist);
14408 emit_move_insn (adjust_address (target, DFmode, 0),
14409 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14410 op0 = gen_reg_rtx (V2DFmode);
14411 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
60c81c89 14412 emit_insn (gen_sse2_shufpd (op0, op0, op0, const0_rtx));
fbe5eb6d
BS
14413 return op0;
14414
14415 case IX86_BUILTIN_SETPD:
14416 target = assign_386_stack_local (V2DFmode, 0);
14417 arg0 = TREE_VALUE (arglist);
14418 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14419 emit_move_insn (adjust_address (target, DFmode, 0),
14420 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14421 emit_move_insn (adjust_address (target, DFmode, 8),
14422 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14423 op0 = gen_reg_rtx (V2DFmode);
14424 emit_insn (gen_sse2_movapd (op0, target));
14425 return op0;
14426
14427 case IX86_BUILTIN_LOADRPD:
14428 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14429 gen_reg_rtx (V2DFmode), 1);
60c81c89 14430 emit_insn (gen_sse2_shufpd (target, target, target, const1_rtx));
fbe5eb6d
BS
14431 return target;
14432
14433 case IX86_BUILTIN_LOADPD1:
14434 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14435 gen_reg_rtx (V2DFmode), 1);
14436 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14437 return target;
14438
14439 case IX86_BUILTIN_STOREPD1:
14440 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14441 case IX86_BUILTIN_STORERPD:
14442 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14443
48126a97
JH
14444 case IX86_BUILTIN_CLRPD:
14445 target = gen_reg_rtx (V2DFmode);
14446 emit_insn (gen_sse_clrv2df (target));
14447 return target;
14448
fbe5eb6d
BS
14449 case IX86_BUILTIN_MFENCE:
14450 emit_insn (gen_sse2_mfence ());
14451 return 0;
14452 case IX86_BUILTIN_LFENCE:
14453 emit_insn (gen_sse2_lfence ());
14454 return 0;
14455
14456 case IX86_BUILTIN_CLFLUSH:
14457 arg0 = TREE_VALUE (arglist);
14458 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14459 icode = CODE_FOR_sse2_clflush;
1194ca05
JH
14460 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14461 op0 = copy_to_mode_reg (Pmode, op0);
fbe5eb6d
BS
14462
14463 emit_insn (gen_sse2_clflush (op0));
14464 return 0;
14465
14466 case IX86_BUILTIN_MOVNTPD:
14467 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14468 case IX86_BUILTIN_MOVNTDQ:
916b60b7 14469 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
fbe5eb6d
BS
14470 case IX86_BUILTIN_MOVNTI:
14471 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14472
f02e1358
JH
14473 case IX86_BUILTIN_LOADDQA:
14474 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14475 case IX86_BUILTIN_LOADDQU:
14476 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14477 case IX86_BUILTIN_LOADD:
14478 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14479
14480 case IX86_BUILTIN_STOREDQA:
14481 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14482 case IX86_BUILTIN_STOREDQU:
14483 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14484 case IX86_BUILTIN_STORED:
14485 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14486
22c7c85e
L
14487 case IX86_BUILTIN_MONITOR:
14488 arg0 = TREE_VALUE (arglist);
14489 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14490 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14491 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14492 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14493 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14494 if (!REG_P (op0))
14495 op0 = copy_to_mode_reg (SImode, op0);
14496 if (!REG_P (op1))
14497 op1 = copy_to_mode_reg (SImode, op1);
14498 if (!REG_P (op2))
14499 op2 = copy_to_mode_reg (SImode, op2);
14500 emit_insn (gen_monitor (op0, op1, op2));
14501 return 0;
14502
14503 case IX86_BUILTIN_MWAIT:
14504 arg0 = TREE_VALUE (arglist);
14505 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14506 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14507 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14508 if (!REG_P (op0))
14509 op0 = copy_to_mode_reg (SImode, op0);
14510 if (!REG_P (op1))
14511 op1 = copy_to_mode_reg (SImode, op1);
14512 emit_insn (gen_mwait (op0, op1));
14513 return 0;
14514
14515 case IX86_BUILTIN_LOADDDUP:
14516 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
14517
14518 case IX86_BUILTIN_LDDQU:
14519 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
14520 1);
14521
bd793c65
BS
14522 default:
14523 break;
14524 }
14525
ca7558fc 14526 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
14527 if (d->code == fcode)
14528 {
14529 /* Compares are treated specially. */
14530 if (d->icode == CODE_FOR_maskcmpv4sf3
14531 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14532 || d->icode == CODE_FOR_maskncmpv4sf3
fbe5eb6d
BS
14533 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14534 || d->icode == CODE_FOR_maskcmpv2df3
14535 || d->icode == CODE_FOR_vmmaskcmpv2df3
14536 || d->icode == CODE_FOR_maskncmpv2df3
14537 || d->icode == CODE_FOR_vmmaskncmpv2df3)
bd793c65
BS
14538 return ix86_expand_sse_compare (d, arglist, target);
14539
14540 return ix86_expand_binop_builtin (d->icode, arglist, target);
14541 }
14542
ca7558fc 14543 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
bd793c65
BS
14544 if (d->code == fcode)
14545 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
0f290768 14546
ca7558fc 14547 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
bd793c65
BS
14548 if (d->code == fcode)
14549 return ix86_expand_sse_comi (d, arglist, target);
0f290768 14550
bd793c65
BS
14551 /* @@@ Should really do something sensible here. */
14552 return 0;
bd793c65 14553}
4211a8fb
JH
14554
14555/* Store OPERAND to the memory after reload is completed. This means
f710504c 14556 that we can't easily use assign_stack_local. */
4211a8fb 14557rtx
b96a374d 14558ix86_force_to_memory (enum machine_mode mode, rtx operand)
4211a8fb 14559{
898d374d 14560 rtx result;
4211a8fb
JH
14561 if (!reload_completed)
14562 abort ();
a5b378d6 14563 if (TARGET_RED_ZONE)
898d374d
JH
14564 {
14565 result = gen_rtx_MEM (mode,
14566 gen_rtx_PLUS (Pmode,
14567 stack_pointer_rtx,
14568 GEN_INT (-RED_ZONE_SIZE)));
14569 emit_move_insn (result, operand);
14570 }
a5b378d6 14571 else if (!TARGET_RED_ZONE && TARGET_64BIT)
4211a8fb 14572 {
898d374d 14573 switch (mode)
4211a8fb 14574 {
898d374d
JH
14575 case HImode:
14576 case SImode:
14577 operand = gen_lowpart (DImode, operand);
5efb1046 14578 /* FALLTHRU */
898d374d 14579 case DImode:
4211a8fb 14580 emit_insn (
898d374d
JH
14581 gen_rtx_SET (VOIDmode,
14582 gen_rtx_MEM (DImode,
14583 gen_rtx_PRE_DEC (DImode,
14584 stack_pointer_rtx)),
14585 operand));
14586 break;
14587 default:
14588 abort ();
14589 }
14590 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14591 }
14592 else
14593 {
14594 switch (mode)
14595 {
14596 case DImode:
14597 {
14598 rtx operands[2];
14599 split_di (&operand, 1, operands, operands + 1);
14600 emit_insn (
14601 gen_rtx_SET (VOIDmode,
14602 gen_rtx_MEM (SImode,
14603 gen_rtx_PRE_DEC (Pmode,
14604 stack_pointer_rtx)),
14605 operands[1]));
14606 emit_insn (
14607 gen_rtx_SET (VOIDmode,
14608 gen_rtx_MEM (SImode,
14609 gen_rtx_PRE_DEC (Pmode,
14610 stack_pointer_rtx)),
14611 operands[0]));
14612 }
14613 break;
14614 case HImode:
14615 /* It is better to store HImodes as SImodes. */
14616 if (!TARGET_PARTIAL_REG_STALL)
14617 operand = gen_lowpart (SImode, operand);
5efb1046 14618 /* FALLTHRU */
898d374d 14619 case SImode:
4211a8fb 14620 emit_insn (
898d374d
JH
14621 gen_rtx_SET (VOIDmode,
14622 gen_rtx_MEM (GET_MODE (operand),
14623 gen_rtx_PRE_DEC (SImode,
14624 stack_pointer_rtx)),
14625 operand));
14626 break;
14627 default:
14628 abort ();
4211a8fb 14629 }
898d374d 14630 result = gen_rtx_MEM (mode, stack_pointer_rtx);
4211a8fb 14631 }
898d374d 14632 return result;
4211a8fb
JH
14633}
14634
14635/* Free operand from the memory. */
14636void
b96a374d 14637ix86_free_from_memory (enum machine_mode mode)
4211a8fb 14638{
a5b378d6 14639 if (!TARGET_RED_ZONE)
898d374d
JH
14640 {
14641 int size;
14642
14643 if (mode == DImode || TARGET_64BIT)
14644 size = 8;
14645 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14646 size = 2;
14647 else
14648 size = 4;
14649 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14650 to pop or add instruction if registers are available. */
14651 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14652 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14653 GEN_INT (size))));
14654 }
4211a8fb 14655}
a946dd00 14656
f84aa48a
JH
14657/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14658 QImode must go into class Q_REGS.
14659 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
892a2d68 14660 movdf to do mem-to-mem moves through integer regs. */
f84aa48a 14661enum reg_class
b96a374d 14662ix86_preferred_reload_class (rtx x, enum reg_class class)
f84aa48a 14663{
1877be45
JH
14664 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14665 return NO_REGS;
f84aa48a
JH
14666 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14667 {
14668 /* SSE can't load any constant directly yet. */
14669 if (SSE_CLASS_P (class))
14670 return NO_REGS;
14671 /* Floats can load 0 and 1. */
14672 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14673 {
14674 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14675 if (MAYBE_SSE_CLASS_P (class))
14676 return (reg_class_subset_p (class, GENERAL_REGS)
14677 ? GENERAL_REGS : FLOAT_REGS);
14678 else
14679 return class;
14680 }
14681 /* General regs can load everything. */
14682 if (reg_class_subset_p (class, GENERAL_REGS))
14683 return GENERAL_REGS;
14684 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14685 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14686 return NO_REGS;
14687 }
14688 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14689 return NO_REGS;
14690 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14691 return Q_REGS;
14692 return class;
14693}
14694
14695/* If we are copying between general and FP registers, we need a memory
14696 location. The same is true for SSE and MMX registers.
14697
14698 The macro can't work reliably when one of the CLASSES is class containing
14699 registers from multiple units (SSE, MMX, integer). We avoid this by never
14700 combining those units in single alternative in the machine description.
14701 Ensure that this constraint holds to avoid unexpected surprises.
14702
14703 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14704 enforce these sanity checks. */
14705int
b96a374d
AJ
14706ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14707 enum machine_mode mode, int strict)
f84aa48a
JH
14708{
14709 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14710 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14711 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14712 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14713 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14714 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14715 {
14716 if (strict)
14717 abort ();
14718 else
14719 return 1;
14720 }
14721 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
8f62128d
JH
14722 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14723 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14724 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14725 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
f84aa48a
JH
14726}
14727/* Return the cost of moving data from a register in class CLASS1 to
a4f31c00 14728 one in class CLASS2.
f84aa48a
JH
14729
14730 It is not required that the cost always equal 2 when FROM is the same as TO;
14731 on some machines it is expensive to move between registers if they are not
14732 general registers. */
14733int
b96a374d
AJ
14734ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14735 enum reg_class class2)
f84aa48a
JH
14736{
14737 /* In case we require secondary memory, compute cost of the store followed
b96a374d 14738 by load. In order to avoid bad register allocation choices, we need
d631b80a
RH
14739 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14740
f84aa48a
JH
14741 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14742 {
d631b80a
RH
14743 int cost = 1;
14744
14745 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14746 MEMORY_MOVE_COST (mode, class1, 1));
14747 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14748 MEMORY_MOVE_COST (mode, class2, 1));
b96a374d 14749
d631b80a
RH
14750 /* In case of copying from general_purpose_register we may emit multiple
14751 stores followed by single load causing memory size mismatch stall.
d1f87653 14752 Count this as arbitrarily high cost of 20. */
62415523 14753 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
d631b80a
RH
14754 cost += 20;
14755
14756 /* In the case of FP/MMX moves, the registers actually overlap, and we
14757 have to switch modes in order to treat them differently. */
14758 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14759 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14760 cost += 20;
14761
14762 return cost;
f84aa48a 14763 }
d631b80a 14764
92d0fb09 14765 /* Moves between SSE/MMX and integer unit are expensive. */
62415523
JH
14766 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14767 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
fa79946e
JH
14768 return ix86_cost->mmxsse_to_integer;
14769 if (MAYBE_FLOAT_CLASS_P (class1))
14770 return ix86_cost->fp_move;
14771 if (MAYBE_SSE_CLASS_P (class1))
14772 return ix86_cost->sse_move;
14773 if (MAYBE_MMX_CLASS_P (class1))
14774 return ix86_cost->mmx_move;
f84aa48a
JH
14775 return 2;
14776}
14777
a946dd00
JH
14778/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14779int
b96a374d 14780ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
a946dd00
JH
14781{
14782 /* Flags and only flags can only hold CCmode values. */
14783 if (CC_REGNO_P (regno))
14784 return GET_MODE_CLASS (mode) == MODE_CC;
14785 if (GET_MODE_CLASS (mode) == MODE_CC
14786 || GET_MODE_CLASS (mode) == MODE_RANDOM
14787 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14788 return 0;
14789 if (FP_REGNO_P (regno))
14790 return VALID_FP_MODE_P (mode);
14791 if (SSE_REGNO_P (regno))
a67a3220 14792 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
a946dd00 14793 if (MMX_REGNO_P (regno))
a67a3220
JH
14794 return (TARGET_MMX
14795 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
a946dd00
JH
14796 /* We handle both integer and floats in the general purpose registers.
14797 In future we should be able to handle vector modes as well. */
14798 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14799 return 0;
14800 /* Take care for QImode values - they can be in non-QI regs, but then
14801 they do cause partial register stalls. */
d2836273 14802 if (regno < 4 || mode != QImode || TARGET_64BIT)
a946dd00
JH
14803 return 1;
14804 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14805}
fa79946e
JH
14806
14807/* Return the cost of moving data of mode M between a
14808 register and memory. A value of 2 is the default; this cost is
14809 relative to those in `REGISTER_MOVE_COST'.
14810
14811 If moving between registers and memory is more expensive than
14812 between two registers, you should define this macro to express the
a4f31c00
AJ
14813 relative cost.
14814
fa79946e
JH
14815 Model also increased moving costs of QImode registers in non
14816 Q_REGS classes.
14817 */
14818int
b96a374d 14819ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
fa79946e
JH
14820{
14821 if (FLOAT_CLASS_P (class))
14822 {
14823 int index;
14824 switch (mode)
14825 {
14826 case SFmode:
14827 index = 0;
14828 break;
14829 case DFmode:
14830 index = 1;
14831 break;
14832 case XFmode:
fa79946e
JH
14833 index = 2;
14834 break;
14835 default:
14836 return 100;
14837 }
14838 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14839 }
14840 if (SSE_CLASS_P (class))
14841 {
14842 int index;
14843 switch (GET_MODE_SIZE (mode))
14844 {
14845 case 4:
14846 index = 0;
14847 break;
14848 case 8:
14849 index = 1;
14850 break;
14851 case 16:
14852 index = 2;
14853 break;
14854 default:
14855 return 100;
14856 }
14857 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14858 }
14859 if (MMX_CLASS_P (class))
14860 {
14861 int index;
14862 switch (GET_MODE_SIZE (mode))
14863 {
14864 case 4:
14865 index = 0;
14866 break;
14867 case 8:
14868 index = 1;
14869 break;
14870 default:
14871 return 100;
14872 }
14873 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14874 }
14875 switch (GET_MODE_SIZE (mode))
14876 {
14877 case 1:
14878 if (in)
14879 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14880 : ix86_cost->movzbl_load);
14881 else
14882 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14883 : ix86_cost->int_store[0] + 4);
14884 break;
14885 case 2:
14886 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14887 default:
14888 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14889 if (mode == TFmode)
14890 mode = XFmode;
3bb7e126 14891 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
bce75972
VM
14892 * (((int) GET_MODE_SIZE (mode)
14893 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
fa79946e
JH
14894 }
14895}
0ecf09f9 14896
3c50106f
RH
14897/* Compute a (partial) cost for rtx X. Return true if the complete
14898 cost has been computed, and false if subexpressions should be
14899 scanned. In either case, *TOTAL contains the cost result. */
14900
14901static bool
b96a374d 14902ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
3c50106f
RH
14903{
14904 enum machine_mode mode = GET_MODE (x);
14905
14906 switch (code)
14907 {
14908 case CONST_INT:
14909 case CONST:
14910 case LABEL_REF:
14911 case SYMBOL_REF:
14912 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
14913 *total = 3;
14914 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
14915 *total = 2;
3504dad3
JH
14916 else if (flag_pic && SYMBOLIC_CONST (x)
14917 && (!TARGET_64BIT
14918 || (!GET_CODE (x) != LABEL_REF
14919 && (GET_CODE (x) != SYMBOL_REF
12969f45 14920 || !SYMBOL_REF_LOCAL_P (x)))))
3c50106f
RH
14921 *total = 1;
14922 else
14923 *total = 0;
14924 return true;
14925
14926 case CONST_DOUBLE:
14927 if (mode == VOIDmode)
14928 *total = 0;
14929 else
14930 switch (standard_80387_constant_p (x))
14931 {
14932 case 1: /* 0.0 */
14933 *total = 1;
14934 break;
881b2a96 14935 default: /* Other constants */
3c50106f
RH
14936 *total = 2;
14937 break;
881b2a96
RS
14938 case 0:
14939 case -1:
3c50106f
RH
14940 /* Start with (MEM (SYMBOL_REF)), since that's where
14941 it'll probably end up. Add a penalty for size. */
14942 *total = (COSTS_N_INSNS (1)
3504dad3 14943 + (flag_pic != 0 && !TARGET_64BIT)
3c50106f
RH
14944 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14945 break;
14946 }
14947 return true;
14948
14949 case ZERO_EXTEND:
14950 /* The zero extensions is often completely free on x86_64, so make
14951 it as cheap as possible. */
14952 if (TARGET_64BIT && mode == DImode
14953 && GET_MODE (XEXP (x, 0)) == SImode)
14954 *total = 1;
14955 else if (TARGET_ZERO_EXTEND_WITH_AND)
14956 *total = COSTS_N_INSNS (ix86_cost->add);
14957 else
14958 *total = COSTS_N_INSNS (ix86_cost->movzx);
14959 return false;
14960
14961 case SIGN_EXTEND:
14962 *total = COSTS_N_INSNS (ix86_cost->movsx);
14963 return false;
14964
14965 case ASHIFT:
14966 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14967 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14968 {
14969 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14970 if (value == 1)
14971 {
14972 *total = COSTS_N_INSNS (ix86_cost->add);
14973 return false;
14974 }
14975 if ((value == 2 || value == 3)
3c50106f
RH
14976 && ix86_cost->lea <= ix86_cost->shift_const)
14977 {
14978 *total = COSTS_N_INSNS (ix86_cost->lea);
14979 return false;
14980 }
14981 }
5efb1046 14982 /* FALLTHRU */
3c50106f
RH
14983
14984 case ROTATE:
14985 case ASHIFTRT:
14986 case LSHIFTRT:
14987 case ROTATERT:
14988 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
14989 {
14990 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14991 {
14992 if (INTVAL (XEXP (x, 1)) > 32)
14993 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
14994 else
14995 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
14996 }
14997 else
14998 {
14999 if (GET_CODE (XEXP (x, 1)) == AND)
15000 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
15001 else
15002 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
15003 }
15004 }
15005 else
15006 {
15007 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15008 *total = COSTS_N_INSNS (ix86_cost->shift_const);
15009 else
15010 *total = COSTS_N_INSNS (ix86_cost->shift_var);
15011 }
15012 return false;
15013
15014 case MULT:
15015 if (FLOAT_MODE_P (mode))
3c50106f 15016 {
4a5eab38
PB
15017 *total = COSTS_N_INSNS (ix86_cost->fmul);
15018 return false;
3c50106f
RH
15019 }
15020 else
15021 {
4a5eab38
PB
15022 rtx op0 = XEXP (x, 0);
15023 rtx op1 = XEXP (x, 1);
15024 int nbits;
15025 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15026 {
15027 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15028 for (nbits = 0; value != 0; value &= value - 1)
15029 nbits++;
15030 }
15031 else
15032 /* This is arbitrary. */
15033 nbits = 7;
15034
15035 /* Compute costs correctly for widening multiplication. */
15036 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
15037 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
15038 == GET_MODE_SIZE (mode))
15039 {
15040 int is_mulwiden = 0;
15041 enum machine_mode inner_mode = GET_MODE (op0);
15042
15043 if (GET_CODE (op0) == GET_CODE (op1))
15044 is_mulwiden = 1, op1 = XEXP (op1, 0);
15045 else if (GET_CODE (op1) == CONST_INT)
15046 {
15047 if (GET_CODE (op0) == SIGN_EXTEND)
15048 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
15049 == INTVAL (op1);
15050 else
15051 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
15052 }
15053
15054 if (is_mulwiden)
15055 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
15056 }
15057
15058 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15059 + nbits * ix86_cost->mult_bit)
15060 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code);
15061
15062 return true;
3c50106f 15063 }
3c50106f
RH
15064
15065 case DIV:
15066 case UDIV:
15067 case MOD:
15068 case UMOD:
15069 if (FLOAT_MODE_P (mode))
15070 *total = COSTS_N_INSNS (ix86_cost->fdiv);
15071 else
15072 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15073 return false;
15074
15075 case PLUS:
15076 if (FLOAT_MODE_P (mode))
15077 *total = COSTS_N_INSNS (ix86_cost->fadd);
e0c00392 15078 else if (GET_MODE_CLASS (mode) == MODE_INT
3c50106f
RH
15079 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15080 {
15081 if (GET_CODE (XEXP (x, 0)) == PLUS
15082 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15083 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15084 && CONSTANT_P (XEXP (x, 1)))
15085 {
15086 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15087 if (val == 2 || val == 4 || val == 8)
15088 {
15089 *total = COSTS_N_INSNS (ix86_cost->lea);
15090 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15091 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15092 outer_code);
15093 *total += rtx_cost (XEXP (x, 1), outer_code);
15094 return true;
15095 }
15096 }
15097 else if (GET_CODE (XEXP (x, 0)) == MULT
15098 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15099 {
15100 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15101 if (val == 2 || val == 4 || val == 8)
15102 {
15103 *total = COSTS_N_INSNS (ix86_cost->lea);
15104 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15105 *total += rtx_cost (XEXP (x, 1), outer_code);
15106 return true;
15107 }
15108 }
15109 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15110 {
15111 *total = COSTS_N_INSNS (ix86_cost->lea);
15112 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15113 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15114 *total += rtx_cost (XEXP (x, 1), outer_code);
15115 return true;
15116 }
15117 }
5efb1046 15118 /* FALLTHRU */
3c50106f
RH
15119
15120 case MINUS:
15121 if (FLOAT_MODE_P (mode))
15122 {
15123 *total = COSTS_N_INSNS (ix86_cost->fadd);
15124 return false;
15125 }
5efb1046 15126 /* FALLTHRU */
3c50106f
RH
15127
15128 case AND:
15129 case IOR:
15130 case XOR:
15131 if (!TARGET_64BIT && mode == DImode)
15132 {
15133 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15134 + (rtx_cost (XEXP (x, 0), outer_code)
15135 << (GET_MODE (XEXP (x, 0)) != DImode))
15136 + (rtx_cost (XEXP (x, 1), outer_code)
b96a374d 15137 << (GET_MODE (XEXP (x, 1)) != DImode)));
3c50106f
RH
15138 return true;
15139 }
5efb1046 15140 /* FALLTHRU */
3c50106f
RH
15141
15142 case NEG:
15143 if (FLOAT_MODE_P (mode))
15144 {
15145 *total = COSTS_N_INSNS (ix86_cost->fchs);
15146 return false;
15147 }
5efb1046 15148 /* FALLTHRU */
3c50106f
RH
15149
15150 case NOT:
15151 if (!TARGET_64BIT && mode == DImode)
15152 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15153 else
15154 *total = COSTS_N_INSNS (ix86_cost->add);
15155 return false;
15156
15157 case FLOAT_EXTEND:
15158 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
15159 *total = 0;
15160 return false;
15161
15162 case ABS:
15163 if (FLOAT_MODE_P (mode))
15164 *total = COSTS_N_INSNS (ix86_cost->fabs);
15165 return false;
15166
15167 case SQRT:
15168 if (FLOAT_MODE_P (mode))
15169 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15170 return false;
15171
74dc3e94
RH
15172 case UNSPEC:
15173 if (XINT (x, 1) == UNSPEC_TP)
15174 *total = 0;
15175 return false;
15176
3c50106f
RH
15177 default:
15178 return false;
15179 }
15180}
15181
21c318ba 15182#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
2cc07db4 15183static void
b96a374d 15184ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
2cc07db4
RH
15185{
15186 init_section ();
15187 fputs ("\tpushl $", asm_out_file);
15188 assemble_name (asm_out_file, XSTR (symbol, 0));
15189 fputc ('\n', asm_out_file);
15190}
15191#endif
162f023b 15192
b069de3b
SS
15193#if TARGET_MACHO
15194
15195static int current_machopic_label_num;
15196
15197/* Given a symbol name and its associated stub, write out the
15198 definition of the stub. */
15199
15200void
b96a374d 15201machopic_output_stub (FILE *file, const char *symb, const char *stub)
b069de3b
SS
15202{
15203 unsigned int length;
15204 char *binder_name, *symbol_name, lazy_ptr_name[32];
15205 int label = ++current_machopic_label_num;
15206
15207 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15208 symb = (*targetm.strip_name_encoding) (symb);
15209
15210 length = strlen (stub);
15211 binder_name = alloca (length + 32);
15212 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15213
15214 length = strlen (symb);
15215 symbol_name = alloca (length + 32);
15216 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15217
15218 sprintf (lazy_ptr_name, "L%d$lz", label);
15219
15220 if (MACHOPIC_PURE)
15221 machopic_picsymbol_stub_section ();
15222 else
15223 machopic_symbol_stub_section ();
15224
15225 fprintf (file, "%s:\n", stub);
15226 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15227
15228 if (MACHOPIC_PURE)
15229 {
15230 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15231 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15232 fprintf (file, "\tjmp %%edx\n");
15233 }
15234 else
15235 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
b96a374d 15236
b069de3b 15237 fprintf (file, "%s:\n", binder_name);
b96a374d 15238
b069de3b
SS
15239 if (MACHOPIC_PURE)
15240 {
15241 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15242 fprintf (file, "\tpushl %%eax\n");
15243 }
15244 else
15245 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15246
15247 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15248
15249 machopic_lazy_symbol_ptr_section ();
15250 fprintf (file, "%s:\n", lazy_ptr_name);
15251 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15252 fprintf (file, "\t.long %s\n", binder_name);
15253}
15254#endif /* TARGET_MACHO */
15255
162f023b
JH
15256/* Order the registers for register allocator. */
15257
15258void
b96a374d 15259x86_order_regs_for_local_alloc (void)
162f023b
JH
15260{
15261 int pos = 0;
15262 int i;
15263
15264 /* First allocate the local general purpose registers. */
15265 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15266 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15267 reg_alloc_order [pos++] = i;
15268
15269 /* Global general purpose registers. */
15270 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15271 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15272 reg_alloc_order [pos++] = i;
15273
15274 /* x87 registers come first in case we are doing FP math
15275 using them. */
15276 if (!TARGET_SSE_MATH)
15277 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15278 reg_alloc_order [pos++] = i;
fce5a9f2 15279
162f023b
JH
15280 /* SSE registers. */
15281 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15282 reg_alloc_order [pos++] = i;
15283 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15284 reg_alloc_order [pos++] = i;
15285
d1f87653 15286 /* x87 registers. */
162f023b
JH
15287 if (TARGET_SSE_MATH)
15288 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15289 reg_alloc_order [pos++] = i;
15290
15291 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15292 reg_alloc_order [pos++] = i;
15293
15294 /* Initialize the rest of array as we do not allocate some registers
15295 at all. */
15296 while (pos < FIRST_PSEUDO_REGISTER)
15297 reg_alloc_order [pos++] = 0;
15298}
194734e9 15299
4977bab6
ZW
15300#ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15301#define TARGET_USE_MS_BITFIELD_LAYOUT 0
15302#endif
15303
fe77449a
DR
15304/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15305 struct attribute_spec.handler. */
15306static tree
b96a374d
AJ
15307ix86_handle_struct_attribute (tree *node, tree name,
15308 tree args ATTRIBUTE_UNUSED,
15309 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
fe77449a
DR
15310{
15311 tree *type = NULL;
15312 if (DECL_P (*node))
15313 {
15314 if (TREE_CODE (*node) == TYPE_DECL)
15315 type = &TREE_TYPE (*node);
15316 }
15317 else
15318 type = node;
15319
15320 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15321 || TREE_CODE (*type) == UNION_TYPE)))
15322 {
15323 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15324 *no_add_attrs = true;
15325 }
15326
15327 else if ((is_attribute_p ("ms_struct", name)
15328 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15329 || ((is_attribute_p ("gcc_struct", name)
15330 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15331 {
15332 warning ("`%s' incompatible attribute ignored",
15333 IDENTIFIER_POINTER (name));
15334 *no_add_attrs = true;
15335 }
15336
15337 return NULL_TREE;
15338}
15339
4977bab6 15340static bool
b96a374d 15341ix86_ms_bitfield_layout_p (tree record_type)
4977bab6 15342{
fe77449a 15343 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
021bad8e 15344 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
fe77449a 15345 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
4977bab6
ZW
15346}
15347
483ab821
MM
15348/* Returns an expression indicating where the this parameter is
15349 located on entry to the FUNCTION. */
15350
15351static rtx
b96a374d 15352x86_this_parameter (tree function)
483ab821
MM
15353{
15354 tree type = TREE_TYPE (function);
15355
3961e8fe
RH
15356 if (TARGET_64BIT)
15357 {
61f71b34 15358 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
3961e8fe
RH
15359 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15360 }
15361
e767b5be 15362 if (ix86_function_regparm (type, function) > 0)
483ab821
MM
15363 {
15364 tree parm;
15365
15366 parm = TYPE_ARG_TYPES (type);
15367 /* Figure out whether or not the function has a variable number of
15368 arguments. */
3961e8fe 15369 for (; parm; parm = TREE_CHAIN (parm))
483ab821
MM
15370 if (TREE_VALUE (parm) == void_type_node)
15371 break;
e767b5be 15372 /* If not, the this parameter is in the first argument. */
483ab821 15373 if (parm)
e767b5be
JH
15374 {
15375 int regno = 0;
15376 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
15377 regno = 2;
02e02343 15378 return gen_rtx_REG (SImode, regno);
e767b5be 15379 }
483ab821
MM
15380 }
15381
61f71b34 15382 if (aggregate_value_p (TREE_TYPE (type), type))
483ab821
MM
15383 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15384 else
15385 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15386}
15387
3961e8fe
RH
15388/* Determine whether x86_output_mi_thunk can succeed. */
15389
15390static bool
b96a374d
AJ
15391x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15392 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15393 HOST_WIDE_INT vcall_offset, tree function)
3961e8fe
RH
15394{
15395 /* 64-bit can handle anything. */
15396 if (TARGET_64BIT)
15397 return true;
15398
15399 /* For 32-bit, everything's fine if we have one free register. */
e767b5be 15400 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
3961e8fe
RH
15401 return true;
15402
15403 /* Need a free register for vcall_offset. */
15404 if (vcall_offset)
15405 return false;
15406
15407 /* Need a free register for GOT references. */
15408 if (flag_pic && !(*targetm.binds_local_p) (function))
15409 return false;
15410
15411 /* Otherwise ok. */
15412 return true;
15413}
15414
15415/* Output the assembler code for a thunk function. THUNK_DECL is the
15416 declaration for the thunk function itself, FUNCTION is the decl for
15417 the target function. DELTA is an immediate constant offset to be
272d0bee 15418 added to THIS. If VCALL_OFFSET is nonzero, the word at
3961e8fe 15419 *(*this + vcall_offset) should be added to THIS. */
483ab821 15420
c590b625 15421static void
b96a374d
AJ
15422x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15423 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15424 HOST_WIDE_INT vcall_offset, tree function)
194734e9 15425{
194734e9 15426 rtx xops[3];
3961e8fe
RH
15427 rtx this = x86_this_parameter (function);
15428 rtx this_reg, tmp;
194734e9 15429
3961e8fe
RH
15430 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15431 pull it in now and let DELTA benefit. */
15432 if (REG_P (this))
15433 this_reg = this;
15434 else if (vcall_offset)
15435 {
15436 /* Put the this parameter into %eax. */
15437 xops[0] = this;
15438 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15439 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15440 }
15441 else
15442 this_reg = NULL_RTX;
15443
15444 /* Adjust the this parameter by a fixed constant. */
15445 if (delta)
194734e9 15446 {
483ab821 15447 xops[0] = GEN_INT (delta);
3961e8fe
RH
15448 xops[1] = this_reg ? this_reg : this;
15449 if (TARGET_64BIT)
194734e9 15450 {
3961e8fe
RH
15451 if (!x86_64_general_operand (xops[0], DImode))
15452 {
15453 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15454 xops[1] = tmp;
15455 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15456 xops[0] = tmp;
15457 xops[1] = this;
15458 }
15459 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
194734e9
JH
15460 }
15461 else
3961e8fe 15462 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
194734e9 15463 }
3961e8fe
RH
15464
15465 /* Adjust the this parameter by a value stored in the vtable. */
15466 if (vcall_offset)
194734e9 15467 {
3961e8fe
RH
15468 if (TARGET_64BIT)
15469 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15470 else
e767b5be
JH
15471 {
15472 int tmp_regno = 2 /* ECX */;
15473 if (lookup_attribute ("fastcall",
15474 TYPE_ATTRIBUTES (TREE_TYPE (function))))
15475 tmp_regno = 0 /* EAX */;
15476 tmp = gen_rtx_REG (SImode, tmp_regno);
15477 }
483ab821 15478
3961e8fe
RH
15479 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15480 xops[1] = tmp;
15481 if (TARGET_64BIT)
15482 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15483 else
15484 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
483ab821 15485
3961e8fe
RH
15486 /* Adjust the this parameter. */
15487 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15488 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15489 {
15490 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15491 xops[0] = GEN_INT (vcall_offset);
15492 xops[1] = tmp2;
15493 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15494 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
483ab821 15495 }
3961e8fe
RH
15496 xops[1] = this_reg;
15497 if (TARGET_64BIT)
15498 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15499 else
15500 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15501 }
194734e9 15502
3961e8fe
RH
15503 /* If necessary, drop THIS back to its stack slot. */
15504 if (this_reg && this_reg != this)
15505 {
15506 xops[0] = this_reg;
15507 xops[1] = this;
15508 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15509 }
194734e9 15510
89ce1c8f 15511 xops[0] = XEXP (DECL_RTL (function), 0);
3961e8fe
RH
15512 if (TARGET_64BIT)
15513 {
15514 if (!flag_pic || (*targetm.binds_local_p) (function))
15515 output_asm_insn ("jmp\t%P0", xops);
15516 else
fcbe3b89 15517 {
89ce1c8f 15518 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
fcbe3b89
RH
15519 tmp = gen_rtx_CONST (Pmode, tmp);
15520 tmp = gen_rtx_MEM (QImode, tmp);
15521 xops[0] = tmp;
15522 output_asm_insn ("jmp\t%A0", xops);
15523 }
3961e8fe
RH
15524 }
15525 else
15526 {
15527 if (!flag_pic || (*targetm.binds_local_p) (function))
15528 output_asm_insn ("jmp\t%P0", xops);
194734e9 15529 else
21ff35fb 15530#if TARGET_MACHO
095fa594
SH
15531 if (TARGET_MACHO)
15532 {
0f901c4c 15533 const char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
095fa594
SH
15534 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15535 tmp = gen_rtx_MEM (QImode, tmp);
15536 xops[0] = tmp;
15537 output_asm_insn ("jmp\t%0", xops);
15538 }
15539 else
15540#endif /* TARGET_MACHO */
194734e9 15541 {
3961e8fe
RH
15542 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15543 output_set_got (tmp);
15544
15545 xops[1] = tmp;
15546 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15547 output_asm_insn ("jmp\t{*}%1", xops);
194734e9
JH
15548 }
15549 }
15550}
e2500fed 15551
1bc7c5b6 15552static void
b96a374d 15553x86_file_start (void)
1bc7c5b6
ZW
15554{
15555 default_file_start ();
15556 if (X86_FILE_START_VERSION_DIRECTIVE)
15557 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15558 if (X86_FILE_START_FLTUSED)
15559 fputs ("\t.global\t__fltused\n", asm_out_file);
15560 if (ix86_asm_dialect == ASM_INTEL)
15561 fputs ("\t.intel_syntax\n", asm_out_file);
15562}
15563
e932b21b 15564int
b96a374d 15565x86_field_alignment (tree field, int computed)
e932b21b
JH
15566{
15567 enum machine_mode mode;
ad9335eb
JJ
15568 tree type = TREE_TYPE (field);
15569
15570 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
e932b21b 15571 return computed;
ad9335eb
JJ
15572 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15573 ? get_inner_array_type (type) : type);
39e3a681
JJ
15574 if (mode == DFmode || mode == DCmode
15575 || GET_MODE_CLASS (mode) == MODE_INT
15576 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
e932b21b
JH
15577 return MIN (32, computed);
15578 return computed;
15579}
15580
a5fa1ecd
JH
15581/* Output assembler code to FILE to increment profiler label # LABELNO
15582 for profiling a function entry. */
15583void
b96a374d 15584x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
a5fa1ecd
JH
15585{
15586 if (TARGET_64BIT)
15587 if (flag_pic)
15588 {
15589#ifndef NO_PROFILE_COUNTERS
15590 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15591#endif
15592 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15593 }
15594 else
15595 {
15596#ifndef NO_PROFILE_COUNTERS
15597 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15598#endif
15599 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15600 }
15601 else if (flag_pic)
15602 {
15603#ifndef NO_PROFILE_COUNTERS
15604 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15605 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15606#endif
15607 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15608 }
15609 else
15610 {
15611#ifndef NO_PROFILE_COUNTERS
ff6e2d3e 15612 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
a5fa1ecd
JH
15613 PROFILE_COUNT_REGISTER);
15614#endif
15615 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15616 }
15617}
15618
d2c49530
JH
15619/* We don't have exact information about the insn sizes, but we may assume
15620 quite safely that we are informed about all 1 byte insns and memory
c51e6d85 15621 address sizes. This is enough to eliminate unnecessary padding in
d2c49530
JH
15622 99% of cases. */
15623
15624static int
b96a374d 15625min_insn_size (rtx insn)
d2c49530
JH
15626{
15627 int l = 0;
15628
15629 if (!INSN_P (insn) || !active_insn_p (insn))
15630 return 0;
15631
15632 /* Discard alignments we've emit and jump instructions. */
15633 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15634 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15635 return 0;
15636 if (GET_CODE (insn) == JUMP_INSN
15637 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15638 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15639 return 0;
15640
15641 /* Important case - calls are always 5 bytes.
15642 It is common to have many calls in the row. */
15643 if (GET_CODE (insn) == CALL_INSN
15644 && symbolic_reference_mentioned_p (PATTERN (insn))
15645 && !SIBLING_CALL_P (insn))
15646 return 5;
15647 if (get_attr_length (insn) <= 1)
15648 return 1;
15649
15650 /* For normal instructions we may rely on the sizes of addresses
15651 and the presence of symbol to require 4 bytes of encoding.
15652 This is not the case for jumps where references are PC relative. */
15653 if (GET_CODE (insn) != JUMP_INSN)
15654 {
15655 l = get_attr_length_address (insn);
15656 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15657 l = 4;
15658 }
15659 if (l)
15660 return 1+l;
15661 else
15662 return 2;
15663}
15664
c51e6d85 15665/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
d2c49530
JH
15666 window. */
15667
15668static void
be04394b 15669ix86_avoid_jump_misspredicts (void)
d2c49530
JH
15670{
15671 rtx insn, start = get_insns ();
15672 int nbytes = 0, njumps = 0;
15673 int isjump = 0;
15674
15675 /* Look for all minimal intervals of instructions containing 4 jumps.
15676 The intervals are bounded by START and INSN. NBYTES is the total
15677 size of instructions in the interval including INSN and not including
15678 START. When the NBYTES is smaller than 16 bytes, it is possible
15679 that the end of START and INSN ends up in the same 16byte page.
15680
15681 The smallest offset in the page INSN can start is the case where START
15682 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15683 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15684 */
15685 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15686 {
15687
15688 nbytes += min_insn_size (insn);
c263766c
RH
15689 if (dump_file)
15690 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
d2c49530
JH
15691 INSN_UID (insn), min_insn_size (insn));
15692 if ((GET_CODE (insn) == JUMP_INSN
15693 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15694 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15695 || GET_CODE (insn) == CALL_INSN)
15696 njumps++;
15697 else
15698 continue;
15699
15700 while (njumps > 3)
15701 {
15702 start = NEXT_INSN (start);
15703 if ((GET_CODE (start) == JUMP_INSN
15704 && GET_CODE (PATTERN (start)) != ADDR_VEC
15705 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15706 || GET_CODE (start) == CALL_INSN)
15707 njumps--, isjump = 1;
15708 else
15709 isjump = 0;
15710 nbytes -= min_insn_size (start);
15711 }
15712 if (njumps < 0)
15713 abort ();
c263766c
RH
15714 if (dump_file)
15715 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
d2c49530
JH
15716 INSN_UID (start), INSN_UID (insn), nbytes);
15717
15718 if (njumps == 3 && isjump && nbytes < 16)
15719 {
15720 int padsize = 15 - nbytes + min_insn_size (insn);
15721
c263766c
RH
15722 if (dump_file)
15723 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
15724 INSN_UID (insn), padsize);
d2c49530
JH
15725 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15726 }
15727 }
15728}
15729
be04394b 15730/* AMD Athlon works faster
d1f87653 15731 when RET is not destination of conditional jump or directly preceded
2a500b9e
JH
15732 by other jump instruction. We avoid the penalty by inserting NOP just
15733 before the RET instructions in such cases. */
18dbd950 15734static void
be04394b 15735ix86_pad_returns (void)
2a500b9e
JH
15736{
15737 edge e;
15738
2a500b9e
JH
15739 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15740 {
15741 basic_block bb = e->src;
a813c111 15742 rtx ret = BB_END (bb);
2a500b9e 15743 rtx prev;
253c7a00 15744 bool replace = false;
2a500b9e 15745
253c7a00
JH
15746 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15747 || !maybe_hot_bb_p (bb))
2a500b9e 15748 continue;
4977bab6
ZW
15749 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15750 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15751 break;
2a500b9e
JH
15752 if (prev && GET_CODE (prev) == CODE_LABEL)
15753 {
15754 edge e;
15755 for (e = bb->pred; e; e = e->pred_next)
4977bab6 15756 if (EDGE_FREQUENCY (e) && e->src->index >= 0
2a500b9e 15757 && !(e->flags & EDGE_FALLTHRU))
253c7a00 15758 replace = true;
2a500b9e 15759 }
253c7a00 15760 if (!replace)
2a500b9e 15761 {
4977bab6 15762 prev = prev_active_insn (ret);
25f57a0e
JH
15763 if (prev
15764 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15765 || GET_CODE (prev) == CALL_INSN))
253c7a00 15766 replace = true;
c51e6d85 15767 /* Empty functions get branch mispredict even when the jump destination
4977bab6
ZW
15768 is not visible to us. */
15769 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
253c7a00
JH
15770 replace = true;
15771 }
15772 if (replace)
15773 {
15774 emit_insn_before (gen_return_internal_long (), ret);
15775 delete_insn (ret);
2a500b9e 15776 }
2a500b9e 15777 }
be04394b
JH
15778}
15779
15780/* Implement machine specific optimizations. We implement padding of returns
15781 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
15782static void
15783ix86_reorg (void)
15784{
15785 if (TARGET_ATHLON_K8 && optimize && !optimize_size)
15786 ix86_pad_returns ();
15787 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
15788 ix86_avoid_jump_misspredicts ();
2a500b9e
JH
15789}
15790
4977bab6
ZW
15791/* Return nonzero when QImode register that must be represented via REX prefix
15792 is used. */
15793bool
b96a374d 15794x86_extended_QIreg_mentioned_p (rtx insn)
4977bab6
ZW
15795{
15796 int i;
15797 extract_insn_cached (insn);
15798 for (i = 0; i < recog_data.n_operands; i++)
15799 if (REG_P (recog_data.operand[i])
15800 && REGNO (recog_data.operand[i]) >= 4)
15801 return true;
15802 return false;
15803}
15804
15805/* Return nonzero when P points to register encoded via REX prefix.
15806 Called via for_each_rtx. */
15807static int
b96a374d 15808extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
4977bab6
ZW
15809{
15810 unsigned int regno;
15811 if (!REG_P (*p))
15812 return 0;
15813 regno = REGNO (*p);
15814 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15815}
15816
15817/* Return true when INSN mentions register that must be encoded using REX
15818 prefix. */
15819bool
b96a374d 15820x86_extended_reg_mentioned_p (rtx insn)
4977bab6
ZW
15821{
15822 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15823}
15824
1d6ba901 15825/* Generate an unsigned DImode/SImode to FP conversion. This is the same code
8d705469
JH
15826 optabs would emit if we didn't have TFmode patterns. */
15827
15828void
b96a374d 15829x86_emit_floatuns (rtx operands[2])
8d705469
JH
15830{
15831 rtx neglab, donelab, i0, i1, f0, in, out;
1d6ba901
ZD
15832 enum machine_mode mode, inmode;
15833
15834 inmode = GET_MODE (operands[1]);
15835 if (inmode != SImode
15836 && inmode != DImode)
15837 abort ();
8d705469
JH
15838
15839 out = operands[0];
1d6ba901 15840 in = force_reg (inmode, operands[1]);
8d705469
JH
15841 mode = GET_MODE (out);
15842 neglab = gen_label_rtx ();
15843 donelab = gen_label_rtx ();
15844 i1 = gen_reg_rtx (Pmode);
15845 f0 = gen_reg_rtx (mode);
15846
15847 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15848
15849 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15850 emit_jump_insn (gen_jump (donelab));
15851 emit_barrier ();
15852
15853 emit_label (neglab);
15854
15855 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15856 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15857 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15858 expand_float (f0, i0, 0);
15859 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15860
15861 emit_label (donelab);
15862}
15863
997404de
JH
15864/* Initialize vector TARGET via VALS. */
15865void
15866ix86_expand_vector_init (rtx target, rtx vals)
15867{
15868 enum machine_mode mode = GET_MODE (target);
15869 int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
15870 int n_elts = (GET_MODE_SIZE (mode) / elt_size);
15871 int i;
15872
15873 for (i = n_elts - 1; i >= 0; i--)
15874 if (GET_CODE (XVECEXP (vals, 0, i)) != CONST_INT
15875 && GET_CODE (XVECEXP (vals, 0, i)) != CONST_DOUBLE)
15876 break;
15877
15878 /* Few special cases first...
15879 ... constants are best loaded from constant pool. */
15880 if (i < 0)
15881 {
15882 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15883 return;
15884 }
15885
15886 /* ... values where only first field is non-constant are best loaded
1ae58c30 15887 from the pool and overwritten via move later. */
997404de
JH
15888 if (!i)
15889 {
15890 rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
15891 GET_MODE_INNER (mode), 0);
15892
15893 op = force_reg (mode, op);
15894 XVECEXP (vals, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode));
15895 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15896 switch (GET_MODE (target))
15897 {
15898 case V2DFmode:
15899 emit_insn (gen_sse2_movsd (target, target, op));
15900 break;
15901 case V4SFmode:
15902 emit_insn (gen_sse_movss (target, target, op));
15903 break;
15904 default:
15905 break;
15906 }
15907 return;
15908 }
15909
15910 /* And the busy sequence doing rotations. */
15911 switch (GET_MODE (target))
15912 {
15913 case V2DFmode:
15914 {
15915 rtx vecop0 =
15916 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 0), DFmode, 0);
15917 rtx vecop1 =
15918 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 1), DFmode, 0);
15919
15920 vecop0 = force_reg (V2DFmode, vecop0);
15921 vecop1 = force_reg (V2DFmode, vecop1);
15922 emit_insn (gen_sse2_unpcklpd (target, vecop0, vecop1));
15923 }
15924 break;
15925 case V4SFmode:
15926 {
15927 rtx vecop0 =
15928 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 0), SFmode, 0);
15929 rtx vecop1 =
15930 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 1), SFmode, 0);
15931 rtx vecop2 =
15932 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 2), SFmode, 0);
15933 rtx vecop3 =
15934 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 3), SFmode, 0);
15935 rtx tmp1 = gen_reg_rtx (V4SFmode);
15936 rtx tmp2 = gen_reg_rtx (V4SFmode);
15937
15938 vecop0 = force_reg (V4SFmode, vecop0);
15939 vecop1 = force_reg (V4SFmode, vecop1);
15940 vecop2 = force_reg (V4SFmode, vecop2);
15941 vecop3 = force_reg (V4SFmode, vecop3);
15942 emit_insn (gen_sse_unpcklps (tmp1, vecop1, vecop3));
15943 emit_insn (gen_sse_unpcklps (tmp2, vecop0, vecop2));
15944 emit_insn (gen_sse_unpcklps (target, tmp2, tmp1));
15945 }
15946 break;
15947 default:
15948 abort ();
15949 }
15950}
15951
67dfe110
KH
15952/* Worker function for TARGET_MD_ASM_CLOBBERS.
15953
15954 We do this in the new i386 backend to maintain source compatibility
15955 with the old cc0-based compiler. */
15956
15957static tree
15958ix86_md_asm_clobbers (tree clobbers)
15959{
15960 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
15961 clobbers);
15962 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
15963 clobbers);
15964 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
15965 clobbers);
15966 return clobbers;
15967}
15968
3c5cb3e4
KH
15969/* Worker function for REVERSE_CONDITION. */
15970
15971enum rtx_code
15972ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
15973{
15974 return (mode != CCFPmode && mode != CCFPUmode
15975 ? reverse_condition (code)
15976 : reverse_condition_maybe_unordered (code));
15977}
15978
5ea9cb6e
RS
15979/* Output code to perform an x87 FP register move, from OPERANDS[1]
15980 to OPERANDS[0]. */
15981
15982const char *
15983output_387_reg_move (rtx insn, rtx *operands)
15984{
15985 if (REG_P (operands[1])
15986 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
15987 {
15988 if (REGNO (operands[0]) == FIRST_STACK_REG
15989 && TARGET_USE_FFREEP)
15990 return "ffreep\t%y0";
15991 return "fstp\t%y0";
15992 }
15993 if (STACK_TOP_P (operands[0]))
15994 return "fld%z1\t%y1";
15995 return "fst\t%y0";
15996}
15997
5ae27cfa
UB
15998/* Output code to perform a conditional jump to LABEL, if C2 flag in
15999 FP status register is set. */
16000
16001void
16002ix86_emit_fp_unordered_jump (rtx label)
16003{
16004 rtx reg = gen_reg_rtx (HImode);
16005 rtx temp;
16006
16007 emit_insn (gen_x86_fnstsw_1 (reg));
2484cc35
UB
16008
16009 if (TARGET_USE_SAHF)
16010 {
16011 emit_insn (gen_x86_sahf_1 (reg));
16012
16013 temp = gen_rtx_REG (CCmode, FLAGS_REG);
16014 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
16015 }
16016 else
16017 {
16018 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
16019
16020 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16021 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
16022 }
5ae27cfa 16023
5ae27cfa
UB
16024 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
16025 gen_rtx_LABEL_REF (VOIDmode, label),
16026 pc_rtx);
16027 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
16028 emit_jump_insn (temp);
16029}
16030
c2fcfa4f
UB
16031/* Output code to perform a log1p XFmode calculation. */
16032
16033void ix86_emit_i387_log1p (rtx op0, rtx op1)
16034{
16035 rtx label1 = gen_label_rtx ();
16036 rtx label2 = gen_label_rtx ();
16037
16038 rtx tmp = gen_reg_rtx (XFmode);
16039 rtx tmp2 = gen_reg_rtx (XFmode);
16040
16041 emit_insn (gen_absxf2 (tmp, op1));
16042 emit_insn (gen_cmpxf (tmp,
16043 CONST_DOUBLE_FROM_REAL_VALUE (
16044 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
16045 XFmode)));
16046 emit_jump_insn (gen_bge (label1));
16047
16048 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
16049 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
16050 emit_jump (label2);
16051
16052 emit_label (label1);
16053 emit_move_insn (tmp, CONST1_RTX (XFmode));
16054 emit_insn (gen_addxf3 (tmp, op1, tmp));
16055 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
16056 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
16057
16058 emit_label (label2);
16059}
16060
e2500fed 16061#include "gt-i386.h"
This page took 4.821351 seconds and 5 git commands to generate.