]> gcc.gnu.org Git - gcc.git/blame - gcc/config/i386/i386.c
PR libgcj/12016, PR libgcj/18405, PR libgcj/17738:
[gcc.git] / gcc / config / i386 / i386.c
CommitLineData
e075ae69 1/* Subroutines used for code generation on IA-32.
07933f72 2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
2cdb3148 3 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
2a2ab3f9 4
188fc5b5 5This file is part of GCC.
2a2ab3f9 6
188fc5b5 7GCC is free software; you can redistribute it and/or modify
2a2ab3f9
JVA
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
188fc5b5 12GCC is distributed in the hope that it will be useful,
2a2ab3f9
JVA
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
188fc5b5 18along with GCC; see the file COPYING. If not, write to
97aadbb9 19the Free Software Foundation, 59 Temple Place - Suite 330,
0f290768 20Boston, MA 02111-1307, USA. */
2a2ab3f9 21
2a2ab3f9 22#include "config.h"
bb5177ac 23#include "system.h"
4977bab6
ZW
24#include "coretypes.h"
25#include "tm.h"
2a2ab3f9 26#include "rtl.h"
6baf1cc8
BS
27#include "tree.h"
28#include "tm_p.h"
2a2ab3f9
JVA
29#include "regs.h"
30#include "hard-reg-set.h"
31#include "real.h"
32#include "insn-config.h"
33#include "conditions.h"
2a2ab3f9 34#include "output.h"
8bc527af 35#include "insn-codes.h"
2a2ab3f9 36#include "insn-attr.h"
2a2ab3f9 37#include "flags.h"
a8ffcc81 38#include "except.h"
ecbc4695 39#include "function.h"
00c79232 40#include "recog.h"
ced8dd8c 41#include "expr.h"
e78d8e51 42#include "optabs.h"
f103890b 43#include "toplev.h"
e075ae69 44#include "basic-block.h"
1526a060 45#include "ggc.h"
672a6f42
NB
46#include "target.h"
47#include "target-def.h"
f1e639b1 48#include "langhooks.h"
dafc5b82 49#include "cgraph.h"
cd3ce9b4 50#include "tree-gimple.h"
2a2ab3f9 51
8dfe5673 52#ifndef CHECK_STACK_LIMIT
07933f72 53#define CHECK_STACK_LIMIT (-1)
8dfe5673
RK
54#endif
55
3c50106f
RH
56/* Return index of given mode in mult and division cost tables. */
57#define MODE_INDEX(mode) \
58 ((mode) == QImode ? 0 \
59 : (mode) == HImode ? 1 \
60 : (mode) == SImode ? 2 \
61 : (mode) == DImode ? 3 \
62 : 4)
63
2ab0437e 64/* Processor costs (relative to an add) */
fce5a9f2 65static const
2ab0437e
JH
66struct processor_costs size_cost = { /* costs for tunning for size */
67 2, /* cost of an add instruction */
68 3, /* cost of a lea instruction */
69 2, /* variable shift costs */
70 3, /* constant shift costs */
4977bab6 71 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
2ab0437e 72 0, /* cost of multiply per each bit set */
4977bab6 73 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
44cf5b6a
JH
74 3, /* cost of movsx */
75 3, /* cost of movzx */
2ab0437e
JH
76 0, /* "large" insn */
77 2, /* MOVE_RATIO */
78 2, /* cost for loading QImode using movzbl */
79 {2, 2, 2}, /* cost of loading integer registers
80 in QImode, HImode and SImode.
81 Relative to reg-reg move (2). */
82 {2, 2, 2}, /* cost of storing integer registers */
83 2, /* cost of reg,reg fld/fst */
84 {2, 2, 2}, /* cost of loading fp registers
85 in SFmode, DFmode and XFmode */
86 {2, 2, 2}, /* cost of loading integer registers */
87 3, /* cost of moving MMX register */
88 {3, 3}, /* cost of loading MMX registers
89 in SImode and DImode */
90 {3, 3}, /* cost of storing MMX registers
91 in SImode and DImode */
92 3, /* cost of moving SSE register */
93 {3, 3, 3}, /* cost of loading SSE registers
94 in SImode, DImode and TImode */
95 {3, 3, 3}, /* cost of storing SSE registers
96 in SImode, DImode and TImode */
97 3, /* MMX or SSE register to integer */
f4365627
JH
98 0, /* size of prefetch block */
99 0, /* number of parallel prefetches */
4977bab6 100 1, /* Branch cost */
229b303a
RS
101 2, /* cost of FADD and FSUB insns. */
102 2, /* cost of FMUL instruction. */
103 2, /* cost of FDIV instruction. */
104 2, /* cost of FABS instruction. */
105 2, /* cost of FCHS instruction. */
106 2, /* cost of FSQRT instruction. */
2ab0437e 107};
229b303a 108
32b5b1aa 109/* Processor costs (relative to an add) */
fce5a9f2 110static const
32b5b1aa 111struct processor_costs i386_cost = { /* 386 specific costs */
e9a25f70 112 1, /* cost of an add instruction */
32b5b1aa
SC
113 1, /* cost of a lea instruction */
114 3, /* variable shift costs */
115 2, /* constant shift costs */
4977bab6 116 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
32b5b1aa 117 1, /* cost of multiply per each bit set */
4977bab6 118 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
44cf5b6a
JH
119 3, /* cost of movsx */
120 2, /* cost of movzx */
96e7ae40 121 15, /* "large" insn */
e2e52e1b 122 3, /* MOVE_RATIO */
7c6b971d 123 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
124 {2, 4, 2}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
0f290768 126 Relative to reg-reg move (2). */
96e7ae40
JH
127 {2, 4, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {8, 8, 8}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
fa79946e
JH
131 {8, 8, 8}, /* cost of loading integer registers */
132 2, /* cost of moving MMX register */
133 {4, 8}, /* cost of loading MMX registers
134 in SImode and DImode */
135 {4, 8}, /* cost of storing MMX registers
136 in SImode and DImode */
137 2, /* cost of moving SSE register */
138 {4, 8, 16}, /* cost of loading SSE registers
139 in SImode, DImode and TImode */
140 {4, 8, 16}, /* cost of storing SSE registers
141 in SImode, DImode and TImode */
142 3, /* MMX or SSE register to integer */
f4365627
JH
143 0, /* size of prefetch block */
144 0, /* number of parallel prefetches */
4977bab6 145 1, /* Branch cost */
229b303a
RS
146 23, /* cost of FADD and FSUB insns. */
147 27, /* cost of FMUL instruction. */
148 88, /* cost of FDIV instruction. */
149 22, /* cost of FABS instruction. */
150 24, /* cost of FCHS instruction. */
151 122, /* cost of FSQRT instruction. */
32b5b1aa
SC
152};
153
fce5a9f2 154static const
32b5b1aa
SC
155struct processor_costs i486_cost = { /* 486 specific costs */
156 1, /* cost of an add instruction */
157 1, /* cost of a lea instruction */
158 3, /* variable shift costs */
159 2, /* constant shift costs */
4977bab6 160 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
32b5b1aa 161 1, /* cost of multiply per each bit set */
4977bab6 162 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
44cf5b6a
JH
163 3, /* cost of movsx */
164 2, /* cost of movzx */
96e7ae40 165 15, /* "large" insn */
e2e52e1b 166 3, /* MOVE_RATIO */
7c6b971d 167 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
0f290768 170 Relative to reg-reg move (2). */
96e7ae40
JH
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
fa79946e
JH
175 {8, 8, 8}, /* cost of loading integer registers */
176 2, /* cost of moving MMX register */
177 {4, 8}, /* cost of loading MMX registers
178 in SImode and DImode */
179 {4, 8}, /* cost of storing MMX registers
180 in SImode and DImode */
181 2, /* cost of moving SSE register */
182 {4, 8, 16}, /* cost of loading SSE registers
183 in SImode, DImode and TImode */
184 {4, 8, 16}, /* cost of storing SSE registers
185 in SImode, DImode and TImode */
f4365627
JH
186 3, /* MMX or SSE register to integer */
187 0, /* size of prefetch block */
188 0, /* number of parallel prefetches */
4977bab6 189 1, /* Branch cost */
229b303a
RS
190 8, /* cost of FADD and FSUB insns. */
191 16, /* cost of FMUL instruction. */
192 73, /* cost of FDIV instruction. */
193 3, /* cost of FABS instruction. */
194 3, /* cost of FCHS instruction. */
195 83, /* cost of FSQRT instruction. */
32b5b1aa
SC
196};
197
fce5a9f2 198static const
e5cb57e8 199struct processor_costs pentium_cost = {
32b5b1aa
SC
200 1, /* cost of an add instruction */
201 1, /* cost of a lea instruction */
856b07a1 202 4, /* variable shift costs */
e5cb57e8 203 1, /* constant shift costs */
4977bab6 204 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
856b07a1 205 0, /* cost of multiply per each bit set */
4977bab6 206 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
44cf5b6a
JH
207 3, /* cost of movsx */
208 2, /* cost of movzx */
96e7ae40 209 8, /* "large" insn */
e2e52e1b 210 6, /* MOVE_RATIO */
7c6b971d 211 6, /* cost for loading QImode using movzbl */
96e7ae40
JH
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
0f290768 214 Relative to reg-reg move (2). */
96e7ae40
JH
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {2, 2, 6}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
fa79946e
JH
219 {4, 4, 6}, /* cost of loading integer registers */
220 8, /* cost of moving MMX register */
221 {8, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {8, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
f4365627
JH
230 3, /* MMX or SSE register to integer */
231 0, /* size of prefetch block */
232 0, /* number of parallel prefetches */
4977bab6 233 2, /* Branch cost */
229b303a
RS
234 3, /* cost of FADD and FSUB insns. */
235 3, /* cost of FMUL instruction. */
236 39, /* cost of FDIV instruction. */
237 1, /* cost of FABS instruction. */
238 1, /* cost of FCHS instruction. */
239 70, /* cost of FSQRT instruction. */
32b5b1aa
SC
240};
241
fce5a9f2 242static const
856b07a1
SC
243struct processor_costs pentiumpro_cost = {
244 1, /* cost of an add instruction */
245 1, /* cost of a lea instruction */
e075ae69 246 1, /* variable shift costs */
856b07a1 247 1, /* constant shift costs */
4977bab6 248 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
856b07a1 249 0, /* cost of multiply per each bit set */
4977bab6 250 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
44cf5b6a
JH
251 1, /* cost of movsx */
252 1, /* cost of movzx */
96e7ae40 253 8, /* "large" insn */
e2e52e1b 254 6, /* MOVE_RATIO */
7c6b971d 255 2, /* cost for loading QImode using movzbl */
96e7ae40
JH
256 {4, 4, 4}, /* cost of loading integer registers
257 in QImode, HImode and SImode.
0f290768 258 Relative to reg-reg move (2). */
96e7ae40
JH
259 {2, 2, 2}, /* cost of storing integer registers */
260 2, /* cost of reg,reg fld/fst */
261 {2, 2, 6}, /* cost of loading fp registers
262 in SFmode, DFmode and XFmode */
fa79946e
JH
263 {4, 4, 6}, /* cost of loading integer registers */
264 2, /* cost of moving MMX register */
265 {2, 2}, /* cost of loading MMX registers
266 in SImode and DImode */
267 {2, 2}, /* cost of storing MMX registers
268 in SImode and DImode */
269 2, /* cost of moving SSE register */
270 {2, 2, 8}, /* cost of loading SSE registers
271 in SImode, DImode and TImode */
272 {2, 2, 8}, /* cost of storing SSE registers
273 in SImode, DImode and TImode */
f4365627
JH
274 3, /* MMX or SSE register to integer */
275 32, /* size of prefetch block */
276 6, /* number of parallel prefetches */
4977bab6 277 2, /* Branch cost */
229b303a
RS
278 3, /* cost of FADD and FSUB insns. */
279 5, /* cost of FMUL instruction. */
280 56, /* cost of FDIV instruction. */
281 2, /* cost of FABS instruction. */
282 2, /* cost of FCHS instruction. */
283 56, /* cost of FSQRT instruction. */
856b07a1
SC
284};
285
fce5a9f2 286static const
a269a03c
JC
287struct processor_costs k6_cost = {
288 1, /* cost of an add instruction */
e075ae69 289 2, /* cost of a lea instruction */
a269a03c
JC
290 1, /* variable shift costs */
291 1, /* constant shift costs */
4977bab6 292 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
a269a03c 293 0, /* cost of multiply per each bit set */
4977bab6 294 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
44cf5b6a
JH
295 2, /* cost of movsx */
296 2, /* cost of movzx */
96e7ae40 297 8, /* "large" insn */
e2e52e1b 298 4, /* MOVE_RATIO */
7c6b971d 299 3, /* cost for loading QImode using movzbl */
96e7ae40
JH
300 {4, 5, 4}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
0f290768 302 Relative to reg-reg move (2). */
96e7ae40
JH
303 {2, 3, 2}, /* cost of storing integer registers */
304 4, /* cost of reg,reg fld/fst */
305 {6, 6, 6}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
fa79946e
JH
307 {4, 4, 4}, /* cost of loading integer registers */
308 2, /* cost of moving MMX register */
309 {2, 2}, /* cost of loading MMX registers
310 in SImode and DImode */
311 {2, 2}, /* cost of storing MMX registers
312 in SImode and DImode */
313 2, /* cost of moving SSE register */
314 {2, 2, 8}, /* cost of loading SSE registers
315 in SImode, DImode and TImode */
316 {2, 2, 8}, /* cost of storing SSE registers
317 in SImode, DImode and TImode */
f4365627
JH
318 6, /* MMX or SSE register to integer */
319 32, /* size of prefetch block */
320 1, /* number of parallel prefetches */
4977bab6 321 1, /* Branch cost */
229b303a
RS
322 2, /* cost of FADD and FSUB insns. */
323 2, /* cost of FMUL instruction. */
4f770e7b
RS
324 56, /* cost of FDIV instruction. */
325 2, /* cost of FABS instruction. */
229b303a
RS
326 2, /* cost of FCHS instruction. */
327 56, /* cost of FSQRT instruction. */
a269a03c
JC
328};
329
fce5a9f2 330static const
309ada50
JH
331struct processor_costs athlon_cost = {
332 1, /* cost of an add instruction */
0b5107cf 333 2, /* cost of a lea instruction */
309ada50
JH
334 1, /* variable shift costs */
335 1, /* constant shift costs */
4977bab6 336 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
309ada50 337 0, /* cost of multiply per each bit set */
4977bab6 338 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
44cf5b6a
JH
339 1, /* cost of movsx */
340 1, /* cost of movzx */
309ada50 341 8, /* "large" insn */
e2e52e1b 342 9, /* MOVE_RATIO */
309ada50 343 4, /* cost for loading QImode using movzbl */
b72b1c29 344 {3, 4, 3}, /* cost of loading integer registers
309ada50 345 in QImode, HImode and SImode.
0f290768 346 Relative to reg-reg move (2). */
b72b1c29 347 {3, 4, 3}, /* cost of storing integer registers */
309ada50 348 4, /* cost of reg,reg fld/fst */
b72b1c29 349 {4, 4, 12}, /* cost of loading fp registers
309ada50 350 in SFmode, DFmode and XFmode */
b72b1c29 351 {6, 6, 8}, /* cost of loading integer registers */
fa79946e 352 2, /* cost of moving MMX register */
b72b1c29 353 {4, 4}, /* cost of loading MMX registers
fa79946e 354 in SImode and DImode */
b72b1c29 355 {4, 4}, /* cost of storing MMX registers
fa79946e
JH
356 in SImode and DImode */
357 2, /* cost of moving SSE register */
b72b1c29 358 {4, 4, 6}, /* cost of loading SSE registers
fa79946e 359 in SImode, DImode and TImode */
b72b1c29 360 {4, 4, 5}, /* cost of storing SSE registers
fa79946e 361 in SImode, DImode and TImode */
b72b1c29 362 5, /* MMX or SSE register to integer */
f4365627
JH
363 64, /* size of prefetch block */
364 6, /* number of parallel prefetches */
8c1e80e9 365 5, /* Branch cost */
229b303a
RS
366 4, /* cost of FADD and FSUB insns. */
367 4, /* cost of FMUL instruction. */
368 24, /* cost of FDIV instruction. */
369 2, /* cost of FABS instruction. */
370 2, /* cost of FCHS instruction. */
371 35, /* cost of FSQRT instruction. */
309ada50
JH
372};
373
4977bab6
ZW
374static const
375struct processor_costs k8_cost = {
376 1, /* cost of an add instruction */
377 2, /* cost of a lea instruction */
378 1, /* variable shift costs */
379 1, /* constant shift costs */
380 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
381 0, /* cost of multiply per each bit set */
382 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
383 1, /* cost of movsx */
384 1, /* cost of movzx */
385 8, /* "large" insn */
386 9, /* MOVE_RATIO */
387 4, /* cost for loading QImode using movzbl */
388 {3, 4, 3}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {3, 4, 3}, /* cost of storing integer registers */
392 4, /* cost of reg,reg fld/fst */
393 {4, 4, 12}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {6, 6, 8}, /* cost of loading integer registers */
396 2, /* cost of moving MMX register */
397 {3, 3}, /* cost of loading MMX registers
398 in SImode and DImode */
399 {4, 4}, /* cost of storing MMX registers
400 in SImode and DImode */
401 2, /* cost of moving SSE register */
402 {4, 3, 6}, /* cost of loading SSE registers
403 in SImode, DImode and TImode */
404 {4, 4, 5}, /* cost of storing SSE registers
405 in SImode, DImode and TImode */
406 5, /* MMX or SSE register to integer */
407 64, /* size of prefetch block */
408 6, /* number of parallel prefetches */
8c1e80e9 409 5, /* Branch cost */
4977bab6
ZW
410 4, /* cost of FADD and FSUB insns. */
411 4, /* cost of FMUL instruction. */
412 19, /* cost of FDIV instruction. */
413 2, /* cost of FABS instruction. */
414 2, /* cost of FCHS instruction. */
415 35, /* cost of FSQRT instruction. */
416};
417
fce5a9f2 418static const
b4e89e2d
JH
419struct processor_costs pentium4_cost = {
420 1, /* cost of an add instruction */
e0c00392 421 3, /* cost of a lea instruction */
4977bab6
ZW
422 4, /* variable shift costs */
423 4, /* constant shift costs */
424 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
b4e89e2d 425 0, /* cost of multiply per each bit set */
4977bab6 426 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
44cf5b6a
JH
427 1, /* cost of movsx */
428 1, /* cost of movzx */
b4e89e2d
JH
429 16, /* "large" insn */
430 6, /* MOVE_RATIO */
431 2, /* cost for loading QImode using movzbl */
432 {4, 5, 4}, /* cost of loading integer registers
433 in QImode, HImode and SImode.
434 Relative to reg-reg move (2). */
435 {2, 3, 2}, /* cost of storing integer registers */
436 2, /* cost of reg,reg fld/fst */
437 {2, 2, 6}, /* cost of loading fp registers
438 in SFmode, DFmode and XFmode */
439 {4, 4, 6}, /* cost of loading integer registers */
440 2, /* cost of moving MMX register */
441 {2, 2}, /* cost of loading MMX registers
442 in SImode and DImode */
443 {2, 2}, /* cost of storing MMX registers
444 in SImode and DImode */
445 12, /* cost of moving SSE register */
446 {12, 12, 12}, /* cost of loading SSE registers
447 in SImode, DImode and TImode */
448 {2, 2, 8}, /* cost of storing SSE registers
449 in SImode, DImode and TImode */
450 10, /* MMX or SSE register to integer */
f4365627
JH
451 64, /* size of prefetch block */
452 6, /* number of parallel prefetches */
4977bab6 453 2, /* Branch cost */
229b303a
RS
454 5, /* cost of FADD and FSUB insns. */
455 7, /* cost of FMUL instruction. */
456 43, /* cost of FDIV instruction. */
457 2, /* cost of FABS instruction. */
458 2, /* cost of FCHS instruction. */
459 43, /* cost of FSQRT instruction. */
b4e89e2d
JH
460};
461
89c43c0a
VM
462static const
463struct processor_costs nocona_cost = {
464 1, /* cost of an add instruction */
465 1, /* cost of a lea instruction */
466 1, /* variable shift costs */
467 1, /* constant shift costs */
468 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
469 0, /* cost of multiply per each bit set */
470 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
471 1, /* cost of movsx */
472 1, /* cost of movzx */
473 16, /* "large" insn */
474 9, /* MOVE_RATIO */
475 4, /* cost for loading QImode using movzbl */
476 {4, 4, 4}, /* cost of loading integer registers
477 in QImode, HImode and SImode.
478 Relative to reg-reg move (2). */
479 {4, 4, 4}, /* cost of storing integer registers */
480 3, /* cost of reg,reg fld/fst */
481 {12, 12, 12}, /* cost of loading fp registers
482 in SFmode, DFmode and XFmode */
483 {4, 4, 4}, /* cost of loading integer registers */
484 6, /* cost of moving MMX register */
485 {12, 12}, /* cost of loading MMX registers
486 in SImode and DImode */
487 {12, 12}, /* cost of storing MMX registers
488 in SImode and DImode */
489 6, /* cost of moving SSE register */
490 {12, 12, 12}, /* cost of loading SSE registers
491 in SImode, DImode and TImode */
492 {12, 12, 12}, /* cost of storing SSE registers
493 in SImode, DImode and TImode */
494 8, /* MMX or SSE register to integer */
495 128, /* size of prefetch block */
496 8, /* number of parallel prefetches */
497 1, /* Branch cost */
498 6, /* cost of FADD and FSUB insns. */
499 8, /* cost of FMUL instruction. */
500 40, /* cost of FDIV instruction. */
501 3, /* cost of FABS instruction. */
502 3, /* cost of FCHS instruction. */
503 44, /* cost of FSQRT instruction. */
504};
505
8b60264b 506const struct processor_costs *ix86_cost = &pentium_cost;
32b5b1aa 507
a269a03c
JC
508/* Processor feature/optimization bitmasks. */
509#define m_386 (1<<PROCESSOR_I386)
510#define m_486 (1<<PROCESSOR_I486)
511#define m_PENT (1<<PROCESSOR_PENTIUM)
512#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
513#define m_K6 (1<<PROCESSOR_K6)
309ada50 514#define m_ATHLON (1<<PROCESSOR_ATHLON)
b4e89e2d 515#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
4977bab6
ZW
516#define m_K8 (1<<PROCESSOR_K8)
517#define m_ATHLON_K8 (m_K8 | m_ATHLON)
89c43c0a 518#define m_NOCONA (1<<PROCESSOR_NOCONA)
a269a03c 519
4977bab6 520const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
89c43c0a 521const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
a269a03c 522const int x86_zero_extend_with_and = m_486 | m_PENT;
89c43c0a 523const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA /* m_386 | m_K6 */;
e075ae69 524const int x86_double_with_add = ~m_386;
a269a03c 525const int x86_use_bit_test = m_386;
4977bab6 526const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
89c43c0a 527const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
4977bab6 528const int x86_3dnow_a = m_ATHLON_K8;
89c43c0a 529const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
d20bf446
L
530/* Branch hints were put in P4 based on simulation result. But
531 after P4 was made, no performance benefit was observed with
532 branch hints. It also increases the code size. As the result,
533 icc never generates branch hints. */
534const int x86_branch_hints = 0;
89c43c0a 535const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA;
e075ae69
RH
536const int x86_partial_reg_stall = m_PPRO;
537const int x86_use_loop = m_K6;
4977bab6 538const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
e075ae69
RH
539const int x86_use_mov0 = m_K6;
540const int x86_use_cltd = ~(m_PENT | m_K6);
541const int x86_read_modify_write = ~m_PENT;
542const int x86_read_modify = ~(m_PENT | m_PPRO);
543const int x86_split_long_moves = m_PPRO;
4977bab6 544const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
285464d0 545const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
89c43c0a 546const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
d9f32422
JH
547const int x86_qimode_math = ~(0);
548const int x86_promote_qi_regs = 0;
549const int x86_himode_math = ~(m_PPRO);
550const int x86_promote_hi_regs = m_PPRO;
89c43c0a
VM
551const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA;
552const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA;
553const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA;
554const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA;
555const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO);
556const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
557const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
558const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO;
7b50a809
JH
559const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
560const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
89c43c0a 561const int x86_decompose_lea = m_PENT4 | m_NOCONA;
495333a6 562const int x86_shift1 = ~m_486;
89c43c0a
VM
563const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
564const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO;
41afe4ef
RH
565/* Set for machines where the type and dependencies are resolved on SSE
566 register parts instead of whole registers, so we may maintain just
567 lower part of scalar values in proper format leaving the upper part
568 undefined. */
569const int x86_sse_split_regs = m_ATHLON_K8;
4977bab6 570const int x86_sse_typeless_stores = m_ATHLON_K8;
89c43c0a 571const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
4977bab6
ZW
572const int x86_use_ffreep = m_ATHLON_K8;
573const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
8f62128d 574const int x86_inter_unit_moves = ~(m_ATHLON_K8);
89c43c0a 575const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO;
be04394b
JH
576/* Some CPU cores are not able to predict more than 4 branch instructions in
577 the 16 byte window. */
89c43c0a 578const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
03e00d30 579const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT;
7cacf53e 580const int x86_use_bt = m_ATHLON_K8;
a269a03c 581
d1f87653 582/* In case the average insn count for single function invocation is
6ab16dd9
JH
583 lower than this constant, emit fast (but longer) prologue and
584 epilogue code. */
4977bab6 585#define FAST_PROLOGUE_INSN_COUNT 20
5bf0ebab 586
5bf0ebab
RH
587/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
588static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
589static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
590static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
4c0d89b5
RS
591
592/* Array of the smallest class containing reg number REGNO, indexed by
0f290768 593 REGNO. Used by REGNO_REG_CLASS in i386.h. */
4c0d89b5 594
e075ae69 595enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
4c0d89b5
RS
596{
597 /* ax, dx, cx, bx */
ab408a86 598 AREG, DREG, CREG, BREG,
4c0d89b5 599 /* si, di, bp, sp */
e075ae69 600 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
4c0d89b5
RS
601 /* FP registers */
602 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
79325812 603 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
4c0d89b5 604 /* arg pointer */
83774849 605 NON_Q_REGS,
564d80f4 606 /* flags, fpsr, dirflag, frame */
a7180f70
BS
607 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
608 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
609 SSE_REGS, SSE_REGS,
610 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
3d117b30
JH
611 MMX_REGS, MMX_REGS,
612 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
613 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
614 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
615 SSE_REGS, SSE_REGS,
4c0d89b5 616};
c572e5ba 617
3d117b30 618/* The "default" register map used in 32bit mode. */
83774849 619
0f290768 620int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
621{
622 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
623 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
3d117b30 624 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
625 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
626 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
3d117b30
JH
627 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
628 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
629};
630
5bf0ebab
RH
631static int const x86_64_int_parameter_registers[6] =
632{
633 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
634 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
635};
636
637static int const x86_64_int_return_registers[4] =
638{
639 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
640};
53c17031 641
0f7fa3d0
JH
642/* The "default" register map used in 64bit mode. */
643int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
644{
645 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
3de72741 646 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
0f7fa3d0
JH
647 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
648 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
649 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
650 8,9,10,11,12,13,14,15, /* extended integer registers */
651 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
652};
653
83774849
RH
654/* Define the register numbers to be used in Dwarf debugging information.
655 The SVR4 reference port C compiler uses the following register numbers
656 in its Dwarf output code:
657 0 for %eax (gcc regno = 0)
658 1 for %ecx (gcc regno = 2)
659 2 for %edx (gcc regno = 1)
660 3 for %ebx (gcc regno = 3)
661 4 for %esp (gcc regno = 7)
662 5 for %ebp (gcc regno = 6)
663 6 for %esi (gcc regno = 4)
664 7 for %edi (gcc regno = 5)
665 The following three DWARF register numbers are never generated by
666 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
667 believes these numbers have these meanings.
668 8 for %eip (no gcc equivalent)
669 9 for %eflags (gcc regno = 17)
670 10 for %trapno (no gcc equivalent)
671 It is not at all clear how we should number the FP stack registers
672 for the x86 architecture. If the version of SDB on x86/svr4 were
673 a bit less brain dead with respect to floating-point then we would
674 have a precedent to follow with respect to DWARF register numbers
675 for x86 FP registers, but the SDB on x86/svr4 is so completely
676 broken with respect to FP registers that it is hardly worth thinking
677 of it as something to strive for compatibility with.
678 The version of x86/svr4 SDB I have at the moment does (partially)
679 seem to believe that DWARF register number 11 is associated with
680 the x86 register %st(0), but that's about all. Higher DWARF
681 register numbers don't seem to be associated with anything in
682 particular, and even for DWARF regno 11, SDB only seems to under-
683 stand that it should say that a variable lives in %st(0) (when
684 asked via an `=' command) if we said it was in DWARF regno 11,
685 but SDB still prints garbage when asked for the value of the
686 variable in question (via a `/' command).
687 (Also note that the labels SDB prints for various FP stack regs
688 when doing an `x' command are all wrong.)
689 Note that these problems generally don't affect the native SVR4
690 C compiler because it doesn't allow the use of -O with -g and
691 because when it is *not* optimizing, it allocates a memory
692 location for each floating-point variable, and the memory
693 location is what gets described in the DWARF AT_location
694 attribute for the variable in question.
695 Regardless of the severe mental illness of the x86/svr4 SDB, we
696 do something sensible here and we use the following DWARF
697 register numbers. Note that these are all stack-top-relative
698 numbers.
699 11 for %st(0) (gcc regno = 8)
700 12 for %st(1) (gcc regno = 9)
701 13 for %st(2) (gcc regno = 10)
702 14 for %st(3) (gcc regno = 11)
703 15 for %st(4) (gcc regno = 12)
704 16 for %st(5) (gcc regno = 13)
705 17 for %st(6) (gcc regno = 14)
706 18 for %st(7) (gcc regno = 15)
707*/
0f290768 708int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
709{
710 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
711 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
3f3f2124 712 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
713 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
714 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
d1f87653
KH
715 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
716 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
717};
718
c572e5ba
JVA
719/* Test and compare insns in i386.md store the information needed to
720 generate branch and scc insns here. */
721
07933f72
GS
722rtx ix86_compare_op0 = NULL_RTX;
723rtx ix86_compare_op1 = NULL_RTX;
f5316dfe 724
7a2e09f4 725#define MAX_386_STACK_LOCALS 3
8362f420
JH
726/* Size of the register save area. */
727#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
36edd3cc
BS
728
729/* Define the structure for the machine field in struct function. */
ddb0ae00
ZW
730
731struct stack_local_entry GTY(())
732{
733 unsigned short mode;
734 unsigned short n;
735 rtx rtl;
736 struct stack_local_entry *next;
737};
738
4dd2ac2c
JH
739/* Structure describing stack frame layout.
740 Stack grows downward:
741
742 [arguments]
743 <- ARG_POINTER
744 saved pc
745
746 saved frame pointer if frame_pointer_needed
747 <- HARD_FRAME_POINTER
748 [saved regs]
749
750 [padding1] \
751 )
752 [va_arg registers] (
753 > to_allocate <- FRAME_POINTER
754 [frame] (
755 )
756 [padding2] /
757 */
758struct ix86_frame
759{
760 int nregs;
761 int padding1;
8362f420 762 int va_arg_size;
4dd2ac2c
JH
763 HOST_WIDE_INT frame;
764 int padding2;
765 int outgoing_arguments_size;
8362f420 766 int red_zone_size;
4dd2ac2c
JH
767
768 HOST_WIDE_INT to_allocate;
769 /* The offsets relative to ARG_POINTER. */
770 HOST_WIDE_INT frame_pointer_offset;
771 HOST_WIDE_INT hard_frame_pointer_offset;
772 HOST_WIDE_INT stack_pointer_offset;
d9b40e8d
JH
773
774 /* When save_regs_using_mov is set, emit prologue using
775 move instead of push instructions. */
776 bool save_regs_using_mov;
4dd2ac2c
JH
777};
778
c93e80a5
JH
779/* Used to enable/disable debugging features. */
780const char *ix86_debug_arg_string, *ix86_debug_addr_string;
6189a572
JH
781/* Code model option as passed by user. */
782const char *ix86_cmodel_string;
783/* Parsed value. */
784enum cmodel ix86_cmodel;
80f33d06
GS
785/* Asm dialect. */
786const char *ix86_asm_string;
787enum asm_dialect ix86_asm_dialect = ASM_ATT;
f996902d
RH
788/* TLS dialext. */
789const char *ix86_tls_dialect_string;
790enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
6189a572 791
5bf0ebab 792/* Which unit we are generating floating point math for. */
965f5423
JH
793enum fpmath_unit ix86_fpmath;
794
5bf0ebab 795/* Which cpu are we scheduling for. */
9e555526 796enum processor_type ix86_tune;
5bf0ebab
RH
797/* Which instruction set architecture to use. */
798enum processor_type ix86_arch;
c8c5cb99
SC
799
800/* Strings to hold which cpu and instruction set architecture to use. */
9e555526 801const char *ix86_tune_string; /* for -mtune=<xxx> */
9c23aa47 802const char *ix86_arch_string; /* for -march=<xxx> */
965f5423 803const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
c8c5cb99 804
0f290768 805/* # of registers to use to pass arguments. */
e075ae69 806const char *ix86_regparm_string;
e9a25f70 807
f4365627
JH
808/* true if sse prefetch instruction is not NOOP. */
809int x86_prefetch_sse;
810
e075ae69
RH
811/* ix86_regparm_string as a number */
812int ix86_regparm;
e9a25f70
JL
813
814/* Alignment to use for loops and jumps: */
815
0f290768 816/* Power of two alignment for loops. */
e075ae69 817const char *ix86_align_loops_string;
e9a25f70 818
0f290768 819/* Power of two alignment for non-loop jumps. */
e075ae69 820const char *ix86_align_jumps_string;
e9a25f70 821
3af4bd89 822/* Power of two alignment for stack boundary in bytes. */
e075ae69 823const char *ix86_preferred_stack_boundary_string;
3af4bd89
JH
824
825/* Preferred alignment for stack boundary in bits. */
95899b34 826unsigned int ix86_preferred_stack_boundary;
3af4bd89 827
e9a25f70 828/* Values 1-5: see jump.c */
e075ae69
RH
829int ix86_branch_cost;
830const char *ix86_branch_cost_string;
e9a25f70 831
0f290768 832/* Power of two alignment for functions. */
e075ae69 833const char *ix86_align_funcs_string;
623fe810
RH
834
835/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
8fe75e43
RH
836char internal_label_prefix[16];
837int internal_label_prefix_len;
e075ae69 838\f
b96a374d
AJ
839static void output_pic_addr_const (FILE *, rtx, int);
840static void put_condition_code (enum rtx_code, enum machine_mode,
841 int, int, FILE *);
842static const char *get_some_local_dynamic_name (void);
843static int get_some_local_dynamic_name_1 (rtx *, void *);
b96a374d
AJ
844static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
845static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
846 rtx *);
e129d93a
ILT
847static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
848static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
849 enum machine_mode);
b96a374d
AJ
850static rtx get_thread_pointer (int);
851static rtx legitimize_tls_address (rtx, enum tls_model, int);
852static void get_pc_thunk_name (char [32], unsigned int);
853static rtx gen_push (rtx);
b96a374d
AJ
854static int ix86_flags_dependant (rtx, rtx, enum attr_type);
855static int ix86_agi_dependant (rtx, rtx, enum attr_type);
b96a374d
AJ
856static struct machine_function * ix86_init_machine_status (void);
857static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
858static int ix86_nsaved_regs (void);
859static void ix86_emit_save_regs (void);
860static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
72613dfa 861static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
b96a374d 862static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
b96a374d
AJ
863static HOST_WIDE_INT ix86_GOT_alias_set (void);
864static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
865static rtx ix86_expand_aligntest (rtx, int);
4e44c1ef 866static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
b96a374d
AJ
867static int ix86_issue_rate (void);
868static int ix86_adjust_cost (rtx, rtx, rtx, int);
b96a374d
AJ
869static int ia32_multipass_dfa_lookahead (void);
870static void ix86_init_mmx_sse_builtins (void);
871static rtx x86_this_parameter (tree);
872static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
873 HOST_WIDE_INT, tree);
874static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
875static void x86_file_start (void);
876static void ix86_reorg (void);
c35d187f
RH
877static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
878static tree ix86_build_builtin_va_list (void);
a0524eb3
KH
879static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
880 tree, int *, int);
23a60a04 881static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
f676971a 882static bool ix86_vector_mode_supported_p (enum machine_mode);
e075ae69 883
b96a374d
AJ
884static int ix86_address_cost (rtx);
885static bool ix86_cannot_force_const_mem (rtx);
886static rtx ix86_delegitimize_address (rtx);
bd793c65
BS
887
888struct builtin_description;
b96a374d
AJ
889static rtx ix86_expand_sse_comi (const struct builtin_description *,
890 tree, rtx);
891static rtx ix86_expand_sse_compare (const struct builtin_description *,
892 tree, rtx);
893static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
894static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
895static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
896static rtx ix86_expand_store_builtin (enum insn_code, tree);
897static rtx safe_vector_operand (rtx, enum machine_mode);
b96a374d
AJ
898static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
899static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
900static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
901static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
902static int ix86_fp_comparison_cost (enum rtx_code code);
903static unsigned int ix86_select_alt_pic_regnum (void);
904static int ix86_save_reg (unsigned int, int);
905static void ix86_compute_frame_layout (struct ix86_frame *);
906static int ix86_comp_type_attributes (tree, tree);
e767b5be 907static int ix86_function_regparm (tree, tree);
91d231cb 908const struct attribute_spec ix86_attribute_table[];
b96a374d
AJ
909static bool ix86_function_ok_for_sibcall (tree, tree);
910static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
911static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
912static int ix86_value_regno (enum machine_mode);
913static bool contains_128bit_aligned_vector_p (tree);
0397ac35 914static rtx ix86_struct_value_rtx (tree, int);
b96a374d
AJ
915static bool ix86_ms_bitfield_layout_p (tree);
916static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
917static int extended_reg_mentioned_1 (rtx *, void *);
918static bool ix86_rtx_costs (rtx, int, int, int *);
919static int min_insn_size (rtx);
67dfe110 920static tree ix86_md_asm_clobbers (tree clobbers);
fe984136 921static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
8cd5a4e0
RH
922static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
923 tree, bool);
7c262518 924
7915fbaa
MM
925/* This function is only used on Solaris. */
926static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
927 ATTRIBUTE_UNUSED;
e56feed6 928
53c17031
JH
929/* Register class used for passing given 64bit part of the argument.
930 These represent classes as documented by the PS ABI, with the exception
931 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
d1f87653 932 use SF or DFmode move instead of DImode to avoid reformatting penalties.
53c17031 933
d1f87653 934 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
53c17031
JH
935 whenever possible (upper half does contain padding).
936 */
937enum x86_64_reg_class
938 {
939 X86_64_NO_CLASS,
940 X86_64_INTEGER_CLASS,
941 X86_64_INTEGERSI_CLASS,
942 X86_64_SSE_CLASS,
943 X86_64_SSESF_CLASS,
944 X86_64_SSEDF_CLASS,
945 X86_64_SSEUP_CLASS,
946 X86_64_X87_CLASS,
947 X86_64_X87UP_CLASS,
499accd7 948 X86_64_COMPLEX_X87_CLASS,
53c17031
JH
949 X86_64_MEMORY_CLASS
950 };
6c4ccfd8
RH
951static const char * const x86_64_reg_class_name[] = {
952 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
953 "sseup", "x87", "x87up", "cplx87", "no"
954};
53c17031
JH
955
956#define MAX_CLASSES 4
881b2a96 957
43f3a59d 958/* Table of constants used by fldpi, fldln2, etc.... */
881b2a96
RS
959static REAL_VALUE_TYPE ext_80387_constants_table [5];
960static bool ext_80387_constants_init = 0;
b96a374d 961static void init_ext_80387_constants (void);
672a6f42
NB
962\f
963/* Initialize the GCC target structure. */
91d231cb
JM
964#undef TARGET_ATTRIBUTE_TABLE
965#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
b2ca3702 966#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
f5f4be42
NB
967# undef TARGET_MERGE_DECL_ATTRIBUTES
968# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
672a6f42
NB
969#endif
970
8d8e52be
JM
971#undef TARGET_COMP_TYPE_ATTRIBUTES
972#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
973
f6155fda
SS
974#undef TARGET_INIT_BUILTINS
975#define TARGET_INIT_BUILTINS ix86_init_builtins
976
977#undef TARGET_EXPAND_BUILTIN
978#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
979
bd09bdeb
RH
980#undef TARGET_ASM_FUNCTION_EPILOGUE
981#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
08c148a8 982
17b53c33
NB
983#undef TARGET_ASM_OPEN_PAREN
984#define TARGET_ASM_OPEN_PAREN ""
985#undef TARGET_ASM_CLOSE_PAREN
986#define TARGET_ASM_CLOSE_PAREN ""
987
301d03af
RS
988#undef TARGET_ASM_ALIGNED_HI_OP
989#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
990#undef TARGET_ASM_ALIGNED_SI_OP
991#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
992#ifdef ASM_QUAD
993#undef TARGET_ASM_ALIGNED_DI_OP
994#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
995#endif
996
997#undef TARGET_ASM_UNALIGNED_HI_OP
998#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
999#undef TARGET_ASM_UNALIGNED_SI_OP
1000#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1001#undef TARGET_ASM_UNALIGNED_DI_OP
1002#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1003
c237e94a
ZW
1004#undef TARGET_SCHED_ADJUST_COST
1005#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1006#undef TARGET_SCHED_ISSUE_RATE
1007#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
9b690711
RH
1008#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1009#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1010 ia32_multipass_dfa_lookahead
c237e94a 1011
4977bab6
ZW
1012#undef TARGET_FUNCTION_OK_FOR_SIBCALL
1013#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1014
f996902d
RH
1015#ifdef HAVE_AS_TLS
1016#undef TARGET_HAVE_TLS
1017#define TARGET_HAVE_TLS true
1018#endif
3a04ff64
RH
1019#undef TARGET_CANNOT_FORCE_CONST_MEM
1020#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
f996902d 1021
7daebb7a 1022#undef TARGET_DELEGITIMIZE_ADDRESS
69bd9368 1023#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
7daebb7a 1024
4977bab6
ZW
1025#undef TARGET_MS_BITFIELD_LAYOUT_P
1026#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1027
c590b625
RH
1028#undef TARGET_ASM_OUTPUT_MI_THUNK
1029#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
3961e8fe
RH
1030#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1031#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
c590b625 1032
1bc7c5b6
ZW
1033#undef TARGET_ASM_FILE_START
1034#define TARGET_ASM_FILE_START x86_file_start
1035
3c50106f
RH
1036#undef TARGET_RTX_COSTS
1037#define TARGET_RTX_COSTS ix86_rtx_costs
dcefdf67
RH
1038#undef TARGET_ADDRESS_COST
1039#define TARGET_ADDRESS_COST ix86_address_cost
3c50106f 1040
e129d93a
ILT
1041#undef TARGET_FIXED_CONDITION_CODE_REGS
1042#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1043#undef TARGET_CC_MODES_COMPATIBLE
1044#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1045
18dbd950
RS
1046#undef TARGET_MACHINE_DEPENDENT_REORG
1047#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1048
c35d187f
RH
1049#undef TARGET_BUILD_BUILTIN_VA_LIST
1050#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1051
67dfe110
KH
1052#undef TARGET_MD_ASM_CLOBBERS
1053#define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1054
9184f892
KH
1055#undef TARGET_PROMOTE_PROTOTYPES
1056#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
0397ac35
RH
1057#undef TARGET_STRUCT_VALUE_RTX
1058#define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
a0524eb3
KH
1059#undef TARGET_SETUP_INCOMING_VARARGS
1060#define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
fe984136
RH
1061#undef TARGET_MUST_PASS_IN_STACK
1062#define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
8cd5a4e0
RH
1063#undef TARGET_PASS_BY_REFERENCE
1064#define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
a0524eb3 1065
cd3ce9b4
JM
1066#undef TARGET_GIMPLIFY_VA_ARG_EXPR
1067#define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1068
f676971a
EC
1069#undef TARGET_VECTOR_MODE_SUPPORTED_P
1070#define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1071
07a43492
DJ
1072#ifdef SUBTARGET_INSERT_ATTRIBUTES
1073#undef TARGET_INSERT_ATTRIBUTES
1074#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1075#endif
1076
f6897b10 1077struct gcc_target targetm = TARGET_INITIALIZER;
89c43c0a 1078
e075ae69 1079\f
67c2b45f
JS
1080/* The svr4 ABI for the i386 says that records and unions are returned
1081 in memory. */
1082#ifndef DEFAULT_PCC_STRUCT_RETURN
1083#define DEFAULT_PCC_STRUCT_RETURN 1
1084#endif
1085
f5316dfe
MM
1086/* Sometimes certain combinations of command options do not make
1087 sense on a particular target machine. You can define a macro
1088 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1089 defined, is executed once just after all the command options have
1090 been parsed.
1091
1092 Don't use this macro to turn on various extra optimizations for
1093 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1094
1095void
b96a374d 1096override_options (void)
f5316dfe 1097{
400500c4 1098 int i;
3326f410
DJ
1099 int ix86_tune_defaulted = 0;
1100
e075ae69
RH
1101 /* Comes from final.c -- no real reason to change it. */
1102#define MAX_CODE_ALIGN 16
f5316dfe 1103
c8c5cb99
SC
1104 static struct ptt
1105 {
8b60264b
KG
1106 const struct processor_costs *cost; /* Processor costs */
1107 const int target_enable; /* Target flags to enable. */
1108 const int target_disable; /* Target flags to disable. */
1109 const int align_loop; /* Default alignments. */
2cca7283 1110 const int align_loop_max_skip;
8b60264b 1111 const int align_jump;
2cca7283 1112 const int align_jump_max_skip;
8b60264b 1113 const int align_func;
e075ae69 1114 }
0f290768 1115 const processor_target_table[PROCESSOR_max] =
e075ae69 1116 {
4977bab6
ZW
1117 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1118 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1119 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1120 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1121 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1122 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1123 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
89c43c0a
VM
1124 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1125 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}
e075ae69
RH
1126 };
1127
f4365627 1128 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
e075ae69
RH
1129 static struct pta
1130 {
8b60264b
KG
1131 const char *const name; /* processor name or nickname. */
1132 const enum processor_type processor;
0dd0e980
JH
1133 const enum pta_flags
1134 {
1135 PTA_SSE = 1,
1136 PTA_SSE2 = 2,
5bbeea44
JH
1137 PTA_SSE3 = 4,
1138 PTA_MMX = 8,
1139 PTA_PREFETCH_SSE = 16,
1140 PTA_3DNOW = 32,
4977bab6
ZW
1141 PTA_3DNOW_A = 64,
1142 PTA_64BIT = 128
0dd0e980 1143 } flags;
e075ae69 1144 }
0f290768 1145 const processor_alias_table[] =
e075ae69 1146 {
0dd0e980
JH
1147 {"i386", PROCESSOR_I386, 0},
1148 {"i486", PROCESSOR_I486, 0},
1149 {"i586", PROCESSOR_PENTIUM, 0},
1150 {"pentium", PROCESSOR_PENTIUM, 0},
1151 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
3780101d
JG
1152 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1153 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1154 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
3462df62 1155 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
0dd0e980
JH
1156 {"i686", PROCESSOR_PENTIUMPRO, 0},
1157 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1158 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
f4365627 1159 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
5bbeea44
JH
1160 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1161 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1162 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1163 | PTA_MMX | PTA_PREFETCH_SSE},
1164 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1165 | PTA_MMX | PTA_PREFETCH_SSE},
89c43c0a
VM
1166 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1167 | PTA_MMX | PTA_PREFETCH_SSE},
1168 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
5bbeea44 1169 | PTA_MMX | PTA_PREFETCH_SSE},
0dd0e980
JH
1170 {"k6", PROCESSOR_K6, PTA_MMX},
1171 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1172 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
f4365627 1173 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1174 | PTA_3DNOW_A},
f4365627 1175 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
0dd0e980 1176 | PTA_3DNOW | PTA_3DNOW_A},
f4365627 1177 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1178 | PTA_3DNOW_A | PTA_SSE},
f4365627 1179 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1180 | PTA_3DNOW_A | PTA_SSE},
f4365627 1181 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1182 | PTA_3DNOW_A | PTA_SSE},
3fec9fa9
JJ
1183 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1184 | PTA_SSE | PTA_SSE2 },
4977bab6
ZW
1185 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1186 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
9a609388
JH
1187 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1188 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1189 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1190 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1191 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1192 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
3af4bd89 1193 };
c8c5cb99 1194
ca7558fc 1195 int const pta_size = ARRAY_SIZE (processor_alias_table);
c8c5cb99 1196
554707bd
DJ
1197#ifdef SUBTARGET_OVERRIDE_OPTIONS
1198 SUBTARGET_OVERRIDE_OPTIONS;
1199#endif
1200
41ed2237 1201 /* Set the default values for switches whose default depends on TARGET_64BIT
d1f87653 1202 in case they weren't overwritten by command line options. */
55ba61f3
JH
1203 if (TARGET_64BIT)
1204 {
1205 if (flag_omit_frame_pointer == 2)
1206 flag_omit_frame_pointer = 1;
1207 if (flag_asynchronous_unwind_tables == 2)
1208 flag_asynchronous_unwind_tables = 1;
1209 if (flag_pcc_struct_return == 2)
1210 flag_pcc_struct_return = 0;
1211 }
1212 else
1213 {
1214 if (flag_omit_frame_pointer == 2)
1215 flag_omit_frame_pointer = 0;
1216 if (flag_asynchronous_unwind_tables == 2)
1217 flag_asynchronous_unwind_tables = 0;
1218 if (flag_pcc_struct_return == 2)
7c712dcc 1219 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
55ba61f3
JH
1220 }
1221
9e555526
RH
1222 if (!ix86_tune_string && ix86_arch_string)
1223 ix86_tune_string = ix86_arch_string;
1224 if (!ix86_tune_string)
3326f410
DJ
1225 {
1226 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1227 ix86_tune_defaulted = 1;
1228 }
f4365627 1229 if (!ix86_arch_string)
3fec9fa9 1230 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
e075ae69 1231
6189a572
JH
1232 if (ix86_cmodel_string != 0)
1233 {
1234 if (!strcmp (ix86_cmodel_string, "small"))
1235 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1236 else if (flag_pic)
c725bd79 1237 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
6189a572
JH
1238 else if (!strcmp (ix86_cmodel_string, "32"))
1239 ix86_cmodel = CM_32;
1240 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1241 ix86_cmodel = CM_KERNEL;
1242 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1243 ix86_cmodel = CM_MEDIUM;
1244 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1245 ix86_cmodel = CM_LARGE;
1246 else
1247 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1248 }
1249 else
1250 {
1251 ix86_cmodel = CM_32;
1252 if (TARGET_64BIT)
1253 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1254 }
c93e80a5
JH
1255 if (ix86_asm_string != 0)
1256 {
1257 if (!strcmp (ix86_asm_string, "intel"))
1258 ix86_asm_dialect = ASM_INTEL;
1259 else if (!strcmp (ix86_asm_string, "att"))
1260 ix86_asm_dialect = ASM_ATT;
1261 else
1262 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1263 }
6189a572 1264 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
9e637a26 1265 error ("code model %qs not supported in the %s bit mode",
6189a572
JH
1266 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1267 if (ix86_cmodel == CM_LARGE)
9e637a26 1268 sorry ("code model %<large%> not supported yet");
0c2dc519 1269 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
c725bd79 1270 sorry ("%i-bit mode not compiled in",
0c2dc519 1271 (target_flags & MASK_64BIT) ? 64 : 32);
6189a572 1272
f4365627
JH
1273 for (i = 0; i < pta_size; i++)
1274 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1275 {
1276 ix86_arch = processor_alias_table[i].processor;
1277 /* Default cpu tuning to the architecture. */
9e555526 1278 ix86_tune = ix86_arch;
f4365627 1279 if (processor_alias_table[i].flags & PTA_MMX
9ef1b13a 1280 && !(target_flags_explicit & MASK_MMX))
f4365627
JH
1281 target_flags |= MASK_MMX;
1282 if (processor_alias_table[i].flags & PTA_3DNOW
9ef1b13a 1283 && !(target_flags_explicit & MASK_3DNOW))
f4365627
JH
1284 target_flags |= MASK_3DNOW;
1285 if (processor_alias_table[i].flags & PTA_3DNOW_A
9ef1b13a 1286 && !(target_flags_explicit & MASK_3DNOW_A))
f4365627
JH
1287 target_flags |= MASK_3DNOW_A;
1288 if (processor_alias_table[i].flags & PTA_SSE
9ef1b13a 1289 && !(target_flags_explicit & MASK_SSE))
f4365627
JH
1290 target_flags |= MASK_SSE;
1291 if (processor_alias_table[i].flags & PTA_SSE2
9ef1b13a 1292 && !(target_flags_explicit & MASK_SSE2))
f4365627 1293 target_flags |= MASK_SSE2;
5bbeea44
JH
1294 if (processor_alias_table[i].flags & PTA_SSE3
1295 && !(target_flags_explicit & MASK_SSE3))
1296 target_flags |= MASK_SSE3;
f4365627
JH
1297 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1298 x86_prefetch_sse = true;
6716ecbc
JM
1299 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1300 error ("CPU you selected does not support x86-64 "
1301 "instruction set");
1302 break;
1303 }
1304
1305 if (i == pta_size)
1306 error ("bad value (%s) for -march= switch", ix86_arch_string);
1307
1308 for (i = 0; i < pta_size; i++)
1309 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1310 {
1311 ix86_tune = processor_alias_table[i].processor;
4977bab6 1312 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3326f410
DJ
1313 {
1314 if (ix86_tune_defaulted)
1315 {
1316 ix86_tune_string = "x86-64";
1317 for (i = 0; i < pta_size; i++)
1318 if (! strcmp (ix86_tune_string,
1319 processor_alias_table[i].name))
1320 break;
1321 ix86_tune = processor_alias_table[i].processor;
1322 }
1323 else
1324 error ("CPU you selected does not support x86-64 "
1325 "instruction set");
1326 }
c618c6ec
JJ
1327 /* Intel CPUs have always interpreted SSE prefetch instructions as
1328 NOPs; so, we can enable SSE prefetch instructions even when
1329 -mtune (rather than -march) points us to a processor that has them.
1330 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1331 higher processors. */
1332 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1333 x86_prefetch_sse = true;
f4365627
JH
1334 break;
1335 }
f4365627 1336 if (i == pta_size)
9e555526 1337 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
e075ae69 1338
2ab0437e
JH
1339 if (optimize_size)
1340 ix86_cost = &size_cost;
1341 else
9e555526
RH
1342 ix86_cost = processor_target_table[ix86_tune].cost;
1343 target_flags |= processor_target_table[ix86_tune].target_enable;
1344 target_flags &= ~processor_target_table[ix86_tune].target_disable;
e075ae69 1345
36edd3cc
BS
1346 /* Arrange to set up i386_stack_locals for all functions. */
1347 init_machine_status = ix86_init_machine_status;
fce5a9f2 1348
0f290768 1349 /* Validate -mregparm= value. */
e075ae69 1350 if (ix86_regparm_string)
b08de47e 1351 {
400500c4
RK
1352 i = atoi (ix86_regparm_string);
1353 if (i < 0 || i > REGPARM_MAX)
1354 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1355 else
1356 ix86_regparm = i;
b08de47e 1357 }
0d7d98ee
JH
1358 else
1359 if (TARGET_64BIT)
1360 ix86_regparm = REGPARM_MAX;
b08de47e 1361
3e18fdf6 1362 /* If the user has provided any of the -malign-* options,
a4f31c00 1363 warn and use that value only if -falign-* is not set.
3e18fdf6 1364 Remove this code in GCC 3.2 or later. */
e075ae69 1365 if (ix86_align_loops_string)
b08de47e 1366 {
3e18fdf6
GK
1367 warning ("-malign-loops is obsolete, use -falign-loops");
1368 if (align_loops == 0)
1369 {
1370 i = atoi (ix86_align_loops_string);
1371 if (i < 0 || i > MAX_CODE_ALIGN)
1372 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1373 else
1374 align_loops = 1 << i;
1375 }
b08de47e 1376 }
3af4bd89 1377
e075ae69 1378 if (ix86_align_jumps_string)
b08de47e 1379 {
3e18fdf6
GK
1380 warning ("-malign-jumps is obsolete, use -falign-jumps");
1381 if (align_jumps == 0)
1382 {
1383 i = atoi (ix86_align_jumps_string);
1384 if (i < 0 || i > MAX_CODE_ALIGN)
1385 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1386 else
1387 align_jumps = 1 << i;
1388 }
b08de47e 1389 }
b08de47e 1390
e075ae69 1391 if (ix86_align_funcs_string)
b08de47e 1392 {
3e18fdf6
GK
1393 warning ("-malign-functions is obsolete, use -falign-functions");
1394 if (align_functions == 0)
1395 {
1396 i = atoi (ix86_align_funcs_string);
1397 if (i < 0 || i > MAX_CODE_ALIGN)
1398 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1399 else
1400 align_functions = 1 << i;
1401 }
b08de47e 1402 }
3af4bd89 1403
3e18fdf6 1404 /* Default align_* from the processor table. */
3e18fdf6 1405 if (align_loops == 0)
2cca7283 1406 {
9e555526
RH
1407 align_loops = processor_target_table[ix86_tune].align_loop;
1408 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2cca7283 1409 }
3e18fdf6 1410 if (align_jumps == 0)
2cca7283 1411 {
9e555526
RH
1412 align_jumps = processor_target_table[ix86_tune].align_jump;
1413 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2cca7283 1414 }
3e18fdf6 1415 if (align_functions == 0)
2cca7283 1416 {
9e555526 1417 align_functions = processor_target_table[ix86_tune].align_func;
2cca7283 1418 }
3e18fdf6 1419
e4c0478d 1420 /* Validate -mpreferred-stack-boundary= value, or provide default.
fbb83b43
AO
1421 The default of 128 bits is for Pentium III's SSE __m128, but we
1422 don't want additional code to keep the stack aligned when
1423 optimizing for code size. */
1424 ix86_preferred_stack_boundary = (optimize_size
ef49d42e 1425 ? TARGET_64BIT ? 128 : 32
fbb83b43 1426 : 128);
e075ae69 1427 if (ix86_preferred_stack_boundary_string)
3af4bd89 1428 {
400500c4 1429 i = atoi (ix86_preferred_stack_boundary_string);
ef49d42e 1430 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
c6257c5d 1431 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
ef49d42e 1432 TARGET_64BIT ? 4 : 2);
400500c4
RK
1433 else
1434 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3af4bd89 1435 }
77a989d1 1436
0f290768 1437 /* Validate -mbranch-cost= value, or provide default. */
9e555526 1438 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
e075ae69 1439 if (ix86_branch_cost_string)
804a8ee0 1440 {
400500c4
RK
1441 i = atoi (ix86_branch_cost_string);
1442 if (i < 0 || i > 5)
1443 error ("-mbranch-cost=%d is not between 0 and 5", i);
1444 else
1445 ix86_branch_cost = i;
804a8ee0 1446 }
804a8ee0 1447
f996902d
RH
1448 if (ix86_tls_dialect_string)
1449 {
1450 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1451 ix86_tls_dialect = TLS_DIALECT_GNU;
1452 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1453 ix86_tls_dialect = TLS_DIALECT_SUN;
1454 else
1455 error ("bad value (%s) for -mtls-dialect= switch",
1456 ix86_tls_dialect_string);
1457 }
1458
e9a25f70 1459 /* Keep nonleaf frame pointers. */
14c473b9
RS
1460 if (flag_omit_frame_pointer)
1461 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1462 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
77a989d1 1463 flag_omit_frame_pointer = 1;
e075ae69
RH
1464
1465 /* If we're doing fast math, we don't care about comparison order
1466 wrt NaNs. This lets us use a shorter comparison sequence. */
de6c5979 1467 if (flag_unsafe_math_optimizations)
e075ae69
RH
1468 target_flags &= ~MASK_IEEE_FP;
1469
30c99a84
RH
1470 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1471 since the insns won't need emulation. */
9690a821 1472 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
30c99a84
RH
1473 target_flags &= ~MASK_NO_FANCY_MATH_387;
1474
ba2baa55
RS
1475 /* Likewise, if the target doesn't have a 387, or we've specified
1476 software floating point, don't use 387 inline instrinsics. */
1477 if (!TARGET_80387)
1478 target_flags |= MASK_NO_FANCY_MATH_387;
1479
9e200aaf
KC
1480 /* Turn on SSE2 builtins for -msse3. */
1481 if (TARGET_SSE3)
22c7c85e
L
1482 target_flags |= MASK_SSE2;
1483
1484 /* Turn on SSE builtins for -msse2. */
1485 if (TARGET_SSE2)
1486 target_flags |= MASK_SSE;
1487
a5370cf0
RH
1488 /* Turn on MMX builtins for -msse. */
1489 if (TARGET_SSE)
1490 {
1491 target_flags |= MASK_MMX & ~target_flags_explicit;
1492 x86_prefetch_sse = true;
1493 }
1494
1495 /* Turn on MMX builtins for 3Dnow. */
1496 if (TARGET_3DNOW)
1497 target_flags |= MASK_MMX;
1498
14f73b5a
JH
1499 if (TARGET_64BIT)
1500 {
1501 if (TARGET_ALIGN_DOUBLE)
c725bd79 1502 error ("-malign-double makes no sense in the 64bit mode");
14f73b5a 1503 if (TARGET_RTD)
c725bd79 1504 error ("-mrtd calling convention not supported in the 64bit mode");
a5370cf0
RH
1505
1506 /* Enable by default the SSE and MMX builtins. Do allow the user to
1507 explicitly disable any of these. In particular, disabling SSE and
1508 MMX for kernel code is extremely useful. */
1509 target_flags
1510 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
1511 & ~target_flags_explicit);
1512
1513 if (TARGET_SSE)
1514 ix86_fpmath = FPMATH_SSE;
14f73b5a 1515 }
965f5423 1516 else
a5b378d6
JH
1517 {
1518 ix86_fpmath = FPMATH_387;
1519 /* i386 ABI does not specify red zone. It still makes sense to use it
1520 when programmer takes care to stack from being destroyed. */
1521 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1522 target_flags |= MASK_NO_RED_ZONE;
1523 }
965f5423
JH
1524
1525 if (ix86_fpmath_string != 0)
1526 {
1527 if (! strcmp (ix86_fpmath_string, "387"))
1528 ix86_fpmath = FPMATH_387;
1529 else if (! strcmp (ix86_fpmath_string, "sse"))
1530 {
1531 if (!TARGET_SSE)
1532 {
1533 warning ("SSE instruction set disabled, using 387 arithmetics");
1534 ix86_fpmath = FPMATH_387;
1535 }
1536 else
1537 ix86_fpmath = FPMATH_SSE;
1538 }
1539 else if (! strcmp (ix86_fpmath_string, "387,sse")
1540 || ! strcmp (ix86_fpmath_string, "sse,387"))
1541 {
1542 if (!TARGET_SSE)
1543 {
1544 warning ("SSE instruction set disabled, using 387 arithmetics");
1545 ix86_fpmath = FPMATH_387;
1546 }
1547 else if (!TARGET_80387)
1548 {
1549 warning ("387 instruction set disabled, using SSE arithmetics");
1550 ix86_fpmath = FPMATH_SSE;
1551 }
1552 else
1553 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1554 }
fce5a9f2 1555 else
965f5423
JH
1556 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1557 }
14f73b5a 1558
ec0641f6
RS
1559 /* If fpmath doesn't include 387, disable use of x87 intrinsics. */
1560 if (! (ix86_fpmath & FPMATH_387))
1561 target_flags |= MASK_NO_FANCY_MATH_387;
1562
9e555526 1563 if ((x86_accumulate_outgoing_args & TUNEMASK)
9ef1b13a 1564 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
c6036a37
JH
1565 && !optimize_size)
1566 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
623fe810
RH
1567
1568 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1569 {
1570 char *p;
1571 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1572 p = strchr (internal_label_prefix, 'X');
1573 internal_label_prefix_len = p - internal_label_prefix;
1574 *p = '\0';
1575 }
a5370cf0
RH
1576
1577 /* When scheduling description is not available, disable scheduler pass
1578 so it won't slow down the compilation and make x87 code slower. */
ad7b96a9
JH
1579 if (!TARGET_SCHEDULE)
1580 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
f5316dfe
MM
1581}
1582\f
32b5b1aa 1583void
b96a374d 1584optimization_options (int level, int size ATTRIBUTE_UNUSED)
32b5b1aa 1585{
e9a25f70
JL
1586 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1587 make the problem with not enough registers even worse. */
32b5b1aa
SC
1588#ifdef INSN_SCHEDULING
1589 if (level > 1)
1590 flag_schedule_insns = 0;
1591#endif
55ba61f3
JH
1592
1593 /* The default values of these switches depend on the TARGET_64BIT
1594 that is not known at this moment. Mark these values with 2 and
1595 let user the to override these. In case there is no command line option
1596 specifying them, we will set the defaults in override_options. */
1597 if (optimize >= 1)
1598 flag_omit_frame_pointer = 2;
1599 flag_pcc_struct_return = 2;
1600 flag_asynchronous_unwind_tables = 2;
4f514514
JM
1601#ifdef SUBTARGET_OPTIMIZATION_OPTIONS
1602 SUBTARGET_OPTIMIZATION_OPTIONS;
1603#endif
32b5b1aa 1604}
b08de47e 1605\f
91d231cb
JM
1606/* Table of valid machine attributes. */
1607const struct attribute_spec ix86_attribute_table[] =
b08de47e 1608{
91d231cb 1609 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
b08de47e
MM
1610 /* Stdcall attribute says callee is responsible for popping arguments
1611 if they are not variable. */
91d231cb 1612 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
e91f04de
CH
1613 /* Fastcall attribute says callee is responsible for popping arguments
1614 if they are not variable. */
1615 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
91d231cb
JM
1616 /* Cdecl attribute says the callee is a normal C declaration */
1617 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
b08de47e 1618 /* Regparm attribute specifies how many integer arguments are to be
0f290768 1619 passed in registers. */
91d231cb 1620 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
b2ca3702
MM
1621#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1622 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
1623 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
3da1eb0b 1624 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
91d231cb 1625#endif
fe77449a
DR
1626 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1627 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
07a43492
DJ
1628#ifdef SUBTARGET_ATTRIBUTE_TABLE
1629 SUBTARGET_ATTRIBUTE_TABLE,
1630#endif
91d231cb
JM
1631 { NULL, 0, 0, false, false, false, NULL }
1632};
1633
5fbf0217
EB
1634/* Decide whether we can make a sibling call to a function. DECL is the
1635 declaration of the function being targeted by the call and EXP is the
1636 CALL_EXPR representing the call. */
4977bab6
ZW
1637
1638static bool
b96a374d 1639ix86_function_ok_for_sibcall (tree decl, tree exp)
4977bab6
ZW
1640{
1641 /* If we are generating position-independent code, we cannot sibcall
1642 optimize any indirect call, or a direct call to a global function,
1643 as the PLT requires %ebx be live. */
1644 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1645 return false;
1646
1647 /* If we are returning floats on the 80387 register stack, we cannot
1648 make a sibcall from a function that doesn't return a float to a
5fbf0217
EB
1649 function that does or, conversely, from a function that does return
1650 a float to a function that doesn't; the necessary stack adjustment
1651 would not be executed. */
4977bab6 1652 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
5fbf0217 1653 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
4977bab6
ZW
1654 return false;
1655
1656 /* If this call is indirect, we'll need to be able to use a call-clobbered
b96a374d 1657 register for the address of the target function. Make sure that all
4977bab6
ZW
1658 such registers are not used for passing parameters. */
1659 if (!decl && !TARGET_64BIT)
1660 {
e767b5be 1661 tree type;
4977bab6
ZW
1662
1663 /* We're looking at the CALL_EXPR, we need the type of the function. */
1664 type = TREE_OPERAND (exp, 0); /* pointer expression */
1665 type = TREE_TYPE (type); /* pointer type */
1666 type = TREE_TYPE (type); /* function type */
1667
e767b5be 1668 if (ix86_function_regparm (type, NULL) >= 3)
4977bab6
ZW
1669 {
1670 /* ??? Need to count the actual number of registers to be used,
1671 not the possible number of registers. Fix later. */
1672 return false;
1673 }
1674 }
1675
1676 /* Otherwise okay. That also includes certain types of indirect calls. */
1677 return true;
1678}
1679
e91f04de 1680/* Handle a "cdecl", "stdcall", or "fastcall" attribute;
91d231cb
JM
1681 arguments as in struct attribute_spec.handler. */
1682static tree
b96a374d
AJ
1683ix86_handle_cdecl_attribute (tree *node, tree name,
1684 tree args ATTRIBUTE_UNUSED,
1685 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
91d231cb
JM
1686{
1687 if (TREE_CODE (*node) != FUNCTION_TYPE
1688 && TREE_CODE (*node) != METHOD_TYPE
1689 && TREE_CODE (*node) != FIELD_DECL
1690 && TREE_CODE (*node) != TYPE_DECL)
b08de47e 1691 {
9e637a26 1692 warning ("%qs attribute only applies to functions",
91d231cb
JM
1693 IDENTIFIER_POINTER (name));
1694 *no_add_attrs = true;
1695 }
e91f04de
CH
1696 else
1697 {
1698 if (is_attribute_p ("fastcall", name))
1699 {
1700 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1701 {
1702 error ("fastcall and stdcall attributes are not compatible");
1703 }
1704 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1705 {
1706 error ("fastcall and regparm attributes are not compatible");
1707 }
1708 }
1709 else if (is_attribute_p ("stdcall", name))
1710 {
1711 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1712 {
1713 error ("fastcall and stdcall attributes are not compatible");
1714 }
1715 }
1716 }
b08de47e 1717
91d231cb
JM
1718 if (TARGET_64BIT)
1719 {
9e637a26 1720 warning ("%qs attribute ignored", IDENTIFIER_POINTER (name));
91d231cb
JM
1721 *no_add_attrs = true;
1722 }
b08de47e 1723
91d231cb
JM
1724 return NULL_TREE;
1725}
b08de47e 1726
91d231cb
JM
1727/* Handle a "regparm" attribute;
1728 arguments as in struct attribute_spec.handler. */
1729static tree
b96a374d
AJ
1730ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1731 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
91d231cb
JM
1732{
1733 if (TREE_CODE (*node) != FUNCTION_TYPE
1734 && TREE_CODE (*node) != METHOD_TYPE
1735 && TREE_CODE (*node) != FIELD_DECL
1736 && TREE_CODE (*node) != TYPE_DECL)
1737 {
9e637a26 1738 warning ("%qs attribute only applies to functions",
91d231cb
JM
1739 IDENTIFIER_POINTER (name));
1740 *no_add_attrs = true;
1741 }
1742 else
1743 {
1744 tree cst;
b08de47e 1745
91d231cb
JM
1746 cst = TREE_VALUE (args);
1747 if (TREE_CODE (cst) != INTEGER_CST)
1748 {
9e637a26 1749 warning ("%qs attribute requires an integer constant argument",
91d231cb
JM
1750 IDENTIFIER_POINTER (name));
1751 *no_add_attrs = true;
1752 }
1753 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1754 {
9e637a26 1755 warning ("argument to %qs attribute larger than %d",
91d231cb
JM
1756 IDENTIFIER_POINTER (name), REGPARM_MAX);
1757 *no_add_attrs = true;
1758 }
e91f04de
CH
1759
1760 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
e767b5be
JH
1761 {
1762 error ("fastcall and regparm attributes are not compatible");
1763 }
b08de47e
MM
1764 }
1765
91d231cb 1766 return NULL_TREE;
b08de47e
MM
1767}
1768
1769/* Return 0 if the attributes for two types are incompatible, 1 if they
1770 are compatible, and 2 if they are nearly compatible (which causes a
1771 warning to be generated). */
1772
8d8e52be 1773static int
b96a374d 1774ix86_comp_type_attributes (tree type1, tree type2)
b08de47e 1775{
0f290768 1776 /* Check for mismatch of non-default calling convention. */
27c38fbe 1777 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
afcfe58c
MM
1778
1779 if (TREE_CODE (type1) != FUNCTION_TYPE)
1780 return 1;
1781
b96a374d 1782 /* Check for mismatched fastcall types */
e91f04de
CH
1783 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1784 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
b96a374d 1785 return 0;
e91f04de 1786
afcfe58c 1787 /* Check for mismatched return types (cdecl vs stdcall). */
6093f019
RH
1788 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1789 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
265d94ac
MM
1790 return 0;
1791 if (ix86_function_regparm (type1, NULL)
1792 != ix86_function_regparm (type2, NULL))
afcfe58c 1793 return 0;
b08de47e
MM
1794 return 1;
1795}
b08de47e 1796\f
e767b5be
JH
1797/* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1798 DECL may be NULL when calling function indirectly
839a4992 1799 or considering a libcall. */
483ab821
MM
1800
1801static int
e767b5be 1802ix86_function_regparm (tree type, tree decl)
483ab821
MM
1803{
1804 tree attr;
e767b5be
JH
1805 int regparm = ix86_regparm;
1806 bool user_convention = false;
483ab821 1807
e767b5be
JH
1808 if (!TARGET_64BIT)
1809 {
1810 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1811 if (attr)
1812 {
1813 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1814 user_convention = true;
1815 }
1816
1817 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1818 {
1819 regparm = 2;
1820 user_convention = true;
1821 }
1822
1823 /* Use register calling convention for local functions when possible. */
1824 if (!TARGET_64BIT && !user_convention && decl
cb0bc263 1825 && flag_unit_at_a_time && !profile_flag)
e767b5be
JH
1826 {
1827 struct cgraph_local_info *i = cgraph_local_info (decl);
1828 if (i && i->local)
1829 {
1830 /* We can't use regparm(3) for nested functions as these use
1831 static chain pointer in third argument. */
1832 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1833 regparm = 2;
1834 else
1835 regparm = 3;
1836 }
1837 }
1838 }
1839 return regparm;
483ab821
MM
1840}
1841
f676971a 1842/* Return true if EAX is live at the start of the function. Used by
fe9f516f
RH
1843 ix86_expand_prologue to determine if we need special help before
1844 calling allocate_stack_worker. */
1845
1846static bool
1847ix86_eax_live_at_start_p (void)
1848{
1849 /* Cheat. Don't bother working forward from ix86_function_regparm
1850 to the function type to whether an actual argument is located in
1851 eax. Instead just look at cfg info, which is still close enough
1852 to correct at this point. This gives false positives for broken
1853 functions that might use uninitialized data that happens to be
1854 allocated in eax, but who cares? */
1855 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1856}
1857
b08de47e
MM
1858/* Value is the number of bytes of arguments automatically
1859 popped when returning from a subroutine call.
1860 FUNDECL is the declaration node of the function (as a tree),
1861 FUNTYPE is the data type of the function (as a tree),
1862 or for a library call it is an identifier node for the subroutine name.
1863 SIZE is the number of bytes of arguments passed on the stack.
1864
1865 On the 80386, the RTD insn may be used to pop them if the number
1866 of args is fixed, but if the number is variable then the caller
1867 must pop them all. RTD can't be used for library calls now
1868 because the library is compiled with the Unix compiler.
1869 Use of RTD is a selectable option, since it is incompatible with
1870 standard Unix calling sequences. If the option is not selected,
1871 the caller must always pop the args.
1872
1873 The attribute stdcall is equivalent to RTD on a per module basis. */
1874
1875int
b96a374d 1876ix86_return_pops_args (tree fundecl, tree funtype, int size)
79325812 1877{
3345ee7d 1878 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
b08de47e 1879
43f3a59d 1880 /* Cdecl functions override -mrtd, and never pop the stack. */
e9a25f70 1881 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
79325812 1882
43f3a59d
KH
1883 /* Stdcall and fastcall functions will pop the stack if not
1884 variable args. */
e91f04de
CH
1885 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1886 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
698cdd84 1887 rtd = 1;
79325812 1888
698cdd84
SC
1889 if (rtd
1890 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
e9a25f70
JL
1891 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1892 == void_type_node)))
698cdd84
SC
1893 return size;
1894 }
79325812 1895
232b8f52 1896 /* Lose any fake structure return argument if it is passed on the stack. */
61f71b34 1897 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
61fec9ff
JB
1898 && !TARGET_64BIT
1899 && !KEEP_AGGREGATE_RETURN_POINTER)
232b8f52 1900 {
e767b5be 1901 int nregs = ix86_function_regparm (funtype, fundecl);
232b8f52
JJ
1902
1903 if (!nregs)
1904 return GET_MODE_SIZE (Pmode);
1905 }
1906
1907 return 0;
b08de47e 1908}
b08de47e
MM
1909\f
1910/* Argument support functions. */
1911
53c17031
JH
1912/* Return true when register may be used to pass function parameters. */
1913bool
b96a374d 1914ix86_function_arg_regno_p (int regno)
53c17031
JH
1915{
1916 int i;
1917 if (!TARGET_64BIT)
0333394e
JJ
1918 return (regno < REGPARM_MAX
1919 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
53c17031
JH
1920 if (SSE_REGNO_P (regno) && TARGET_SSE)
1921 return true;
1922 /* RAX is used as hidden argument to va_arg functions. */
1923 if (!regno)
1924 return true;
1925 for (i = 0; i < REGPARM_MAX; i++)
1926 if (regno == x86_64_int_parameter_registers[i])
1927 return true;
1928 return false;
1929}
1930
fe984136
RH
1931/* Return if we do not know how to pass TYPE solely in registers. */
1932
1933static bool
1934ix86_must_pass_in_stack (enum machine_mode mode, tree type)
1935{
1936 if (must_pass_in_stack_var_size_or_pad (mode, type))
1937 return true;
dcbca208
RH
1938
1939 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
1940 The layout_type routine is crafty and tries to trick us into passing
1941 currently unsupported vector types on the stack by using TImode. */
1942 return (!TARGET_64BIT && mode == TImode
1943 && type && TREE_CODE (type) != VECTOR_TYPE);
fe984136
RH
1944}
1945
b08de47e
MM
1946/* Initialize a variable CUM of type CUMULATIVE_ARGS
1947 for a call to a function whose data type is FNTYPE.
1948 For a library call, FNTYPE is 0. */
1949
1950void
b96a374d
AJ
1951init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1952 tree fntype, /* tree ptr for function decl */
1953 rtx libname, /* SYMBOL_REF of library name or 0 */
1954 tree fndecl)
b08de47e
MM
1955{
1956 static CUMULATIVE_ARGS zero_cum;
1957 tree param, next_param;
1958
1959 if (TARGET_DEBUG_ARG)
1960 {
1961 fprintf (stderr, "\ninit_cumulative_args (");
1962 if (fntype)
e9a25f70
JL
1963 fprintf (stderr, "fntype code = %s, ret code = %s",
1964 tree_code_name[(int) TREE_CODE (fntype)],
1965 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
b08de47e
MM
1966 else
1967 fprintf (stderr, "no fntype");
1968
1969 if (libname)
1970 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1971 }
1972
1973 *cum = zero_cum;
1974
1975 /* Set up the number of registers to use for passing arguments. */
e767b5be
JH
1976 if (fntype)
1977 cum->nregs = ix86_function_regparm (fntype, fndecl);
1978 else
1979 cum->nregs = ix86_regparm;
78fbfc4b
JB
1980 if (TARGET_SSE)
1981 cum->sse_nregs = SSE_REGPARM_MAX;
1982 if (TARGET_MMX)
1983 cum->mmx_nregs = MMX_REGPARM_MAX;
e1be55d0
JH
1984 cum->warn_sse = true;
1985 cum->warn_mmx = true;
53c17031 1986 cum->maybe_vaarg = false;
b08de47e 1987
e91f04de
CH
1988 /* Use ecx and edx registers if function has fastcall attribute */
1989 if (fntype && !TARGET_64BIT)
1990 {
1991 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1992 {
1993 cum->nregs = 2;
1994 cum->fastcall = 1;
1995 }
1996 }
1997
b08de47e
MM
1998 /* Determine if this function has variable arguments. This is
1999 indicated by the last argument being 'void_type_mode' if there
2000 are no variable arguments. If there are variable arguments, then
78fbfc4b 2001 we won't pass anything in registers in 32-bit mode. */
b08de47e 2002
78fbfc4b 2003 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
b08de47e
MM
2004 {
2005 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
e9a25f70 2006 param != 0; param = next_param)
b08de47e
MM
2007 {
2008 next_param = TREE_CHAIN (param);
e9a25f70 2009 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
53c17031
JH
2010 {
2011 if (!TARGET_64BIT)
e91f04de
CH
2012 {
2013 cum->nregs = 0;
e1be55d0
JH
2014 cum->sse_nregs = 0;
2015 cum->mmx_nregs = 0;
2016 cum->warn_sse = 0;
2017 cum->warn_mmx = 0;
e91f04de
CH
2018 cum->fastcall = 0;
2019 }
53c17031
JH
2020 cum->maybe_vaarg = true;
2021 }
b08de47e
MM
2022 }
2023 }
53c17031
JH
2024 if ((!fntype && !libname)
2025 || (fntype && !TYPE_ARG_TYPES (fntype)))
2026 cum->maybe_vaarg = 1;
b08de47e
MM
2027
2028 if (TARGET_DEBUG_ARG)
2029 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2030
2031 return;
2032}
2033
6c4ccfd8
RH
2034/* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2035 But in the case of vector types, it is some vector mode.
2036
2037 When we have only some of our vector isa extensions enabled, then there
2038 are some modes for which vector_mode_supported_p is false. For these
2039 modes, the generic vector support in gcc will choose some non-vector mode
2040 in order to implement the type. By computing the natural mode, we'll
2041 select the proper ABI location for the operand and not depend on whatever
2042 the middle-end decides to do with these vector types. */
2043
2044static enum machine_mode
2045type_natural_mode (tree type)
2046{
2047 enum machine_mode mode = TYPE_MODE (type);
2048
2049 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2050 {
2051 HOST_WIDE_INT size = int_size_in_bytes (type);
2052 if ((size == 8 || size == 16)
2053 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2054 && TYPE_VECTOR_SUBPARTS (type) > 1)
2055 {
2056 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2057
2058 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2059 mode = MIN_MODE_VECTOR_FLOAT;
2060 else
2061 mode = MIN_MODE_VECTOR_INT;
2062
2063 /* Get the mode which has this inner mode and number of units. */
2064 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2065 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2066 && GET_MODE_INNER (mode) == innermode)
2067 return mode;
2068
2069 abort ();
2070 }
2071 }
2072
2073 return mode;
2074}
2075
2076/* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2077 this may not agree with the mode that the type system has chosen for the
2078 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2079 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2080
2081static rtx
2082gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2083 unsigned int regno)
2084{
2085 rtx tmp;
2086
2087 if (orig_mode != BLKmode)
2088 tmp = gen_rtx_REG (orig_mode, regno);
2089 else
2090 {
2091 tmp = gen_rtx_REG (mode, regno);
2092 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2093 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2094 }
2095
2096 return tmp;
2097}
2098
d1f87653 2099/* x86-64 register passing implementation. See x86-64 ABI for details. Goal
f710504c 2100 of this code is to classify each 8bytes of incoming argument by the register
53c17031
JH
2101 class and assign registers accordingly. */
2102
2103/* Return the union class of CLASS1 and CLASS2.
2104 See the x86-64 PS ABI for details. */
2105
2106static enum x86_64_reg_class
b96a374d 2107merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
53c17031
JH
2108{
2109 /* Rule #1: If both classes are equal, this is the resulting class. */
2110 if (class1 == class2)
2111 return class1;
2112
2113 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2114 the other class. */
2115 if (class1 == X86_64_NO_CLASS)
2116 return class2;
2117 if (class2 == X86_64_NO_CLASS)
2118 return class1;
2119
2120 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2121 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2122 return X86_64_MEMORY_CLASS;
2123
2124 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2125 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2126 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2127 return X86_64_INTEGERSI_CLASS;
2128 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2129 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2130 return X86_64_INTEGER_CLASS;
2131
499accd7
JB
2132 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2133 MEMORY is used. */
2134 if (class1 == X86_64_X87_CLASS
2135 || class1 == X86_64_X87UP_CLASS
2136 || class1 == X86_64_COMPLEX_X87_CLASS
2137 || class2 == X86_64_X87_CLASS
2138 || class2 == X86_64_X87UP_CLASS
2139 || class2 == X86_64_COMPLEX_X87_CLASS)
53c17031
JH
2140 return X86_64_MEMORY_CLASS;
2141
2142 /* Rule #6: Otherwise class SSE is used. */
2143 return X86_64_SSE_CLASS;
2144}
2145
2146/* Classify the argument of type TYPE and mode MODE.
2147 CLASSES will be filled by the register class used to pass each word
2148 of the operand. The number of words is returned. In case the parameter
2149 should be passed in memory, 0 is returned. As a special case for zero
2150 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2151
2152 BIT_OFFSET is used internally for handling records and specifies offset
2153 of the offset in bits modulo 256 to avoid overflow cases.
2154
2155 See the x86-64 PS ABI for details.
2156*/
2157
2158static int
b96a374d
AJ
2159classify_argument (enum machine_mode mode, tree type,
2160 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
53c17031 2161{
296e4ae8 2162 HOST_WIDE_INT bytes =
53c17031 2163 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
23327dae 2164 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
53c17031 2165
c60ee6f5
JH
2166 /* Variable sized entities are always passed/returned in memory. */
2167 if (bytes < 0)
2168 return 0;
2169
dafc5b82 2170 if (mode != VOIDmode
fe984136 2171 && targetm.calls.must_pass_in_stack (mode, type))
dafc5b82
JH
2172 return 0;
2173
53c17031
JH
2174 if (type && AGGREGATE_TYPE_P (type))
2175 {
2176 int i;
2177 tree field;
2178 enum x86_64_reg_class subclasses[MAX_CLASSES];
2179
2180 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2181 if (bytes > 16)
2182 return 0;
2183
2184 for (i = 0; i < words; i++)
2185 classes[i] = X86_64_NO_CLASS;
2186
2187 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2188 signalize memory class, so handle it as special case. */
2189 if (!words)
2190 {
2191 classes[0] = X86_64_NO_CLASS;
2192 return 1;
2193 }
2194
2195 /* Classify each field of record and merge classes. */
2196 if (TREE_CODE (type) == RECORD_TYPE)
2197 {
91ea38f9 2198 /* For classes first merge in the field of the subclasses. */
fa743e8c 2199 if (TYPE_BINFO (type))
91ea38f9 2200 {
fa743e8c 2201 tree binfo, base_binfo;
e8112eac 2202 int basenum;
91ea38f9 2203
e8112eac
ZK
2204 for (binfo = TYPE_BINFO (type), basenum = 0;
2205 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
91ea38f9 2206 {
91ea38f9 2207 int num;
fa743e8c
NS
2208 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2209 tree type = BINFO_TYPE (base_binfo);
91ea38f9
JH
2210
2211 num = classify_argument (TYPE_MODE (type),
2212 type, subclasses,
2213 (offset + bit_offset) % 256);
2214 if (!num)
2215 return 0;
2216 for (i = 0; i < num; i++)
2217 {
db01f480 2218 int pos = (offset + (bit_offset % 64)) / 8 / 8;
91ea38f9
JH
2219 classes[i + pos] =
2220 merge_classes (subclasses[i], classes[i + pos]);
2221 }
2222 }
2223 }
43f3a59d 2224 /* And now merge the fields of structure. */
53c17031
JH
2225 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2226 {
2227 if (TREE_CODE (field) == FIELD_DECL)
2228 {
2229 int num;
2230
2231 /* Bitfields are always classified as integer. Handle them
2232 early, since later code would consider them to be
2233 misaligned integers. */
2234 if (DECL_BIT_FIELD (field))
2235 {
2236 for (i = int_bit_position (field) / 8 / 8;
2237 i < (int_bit_position (field)
2238 + tree_low_cst (DECL_SIZE (field), 0)
b96a374d 2239 + 63) / 8 / 8; i++)
53c17031
JH
2240 classes[i] =
2241 merge_classes (X86_64_INTEGER_CLASS,
2242 classes[i]);
2243 }
2244 else
2245 {
2246 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2247 TREE_TYPE (field), subclasses,
2248 (int_bit_position (field)
2249 + bit_offset) % 256);
2250 if (!num)
2251 return 0;
2252 for (i = 0; i < num; i++)
2253 {
2254 int pos =
db01f480 2255 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
53c17031
JH
2256 classes[i + pos] =
2257 merge_classes (subclasses[i], classes[i + pos]);
2258 }
2259 }
2260 }
2261 }
2262 }
2263 /* Arrays are handled as small records. */
2264 else if (TREE_CODE (type) == ARRAY_TYPE)
2265 {
2266 int num;
2267 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2268 TREE_TYPE (type), subclasses, bit_offset);
2269 if (!num)
2270 return 0;
2271
2272 /* The partial classes are now full classes. */
2273 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2274 subclasses[0] = X86_64_SSE_CLASS;
2275 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2276 subclasses[0] = X86_64_INTEGER_CLASS;
2277
2278 for (i = 0; i < words; i++)
2279 classes[i] = subclasses[i % num];
2280 }
2281 /* Unions are similar to RECORD_TYPE but offset is always 0. */
e4dbaed5
AS
2282 else if (TREE_CODE (type) == UNION_TYPE
2283 || TREE_CODE (type) == QUAL_UNION_TYPE)
53c17031 2284 {
91ea38f9 2285 /* For classes first merge in the field of the subclasses. */
fa743e8c 2286 if (TYPE_BINFO (type))
91ea38f9 2287 {
fa743e8c 2288 tree binfo, base_binfo;
e8112eac 2289 int basenum;
91ea38f9 2290
e8112eac
ZK
2291 for (binfo = TYPE_BINFO (type), basenum = 0;
2292 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
91ea38f9 2293 {
91ea38f9 2294 int num;
fa743e8c
NS
2295 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2296 tree type = BINFO_TYPE (base_binfo);
91ea38f9
JH
2297
2298 num = classify_argument (TYPE_MODE (type),
2299 type, subclasses,
db01f480 2300 (offset + (bit_offset % 64)) % 256);
91ea38f9
JH
2301 if (!num)
2302 return 0;
2303 for (i = 0; i < num; i++)
2304 {
c16576e6 2305 int pos = (offset + (bit_offset % 64)) / 8 / 8;
91ea38f9
JH
2306 classes[i + pos] =
2307 merge_classes (subclasses[i], classes[i + pos]);
2308 }
2309 }
2310 }
53c17031
JH
2311 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2312 {
2313 if (TREE_CODE (field) == FIELD_DECL)
2314 {
2315 int num;
2316 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2317 TREE_TYPE (field), subclasses,
2318 bit_offset);
2319 if (!num)
2320 return 0;
2321 for (i = 0; i < num; i++)
2322 classes[i] = merge_classes (subclasses[i], classes[i]);
2323 }
2324 }
2325 }
2326 else
2327 abort ();
2328
2329 /* Final merger cleanup. */
2330 for (i = 0; i < words; i++)
2331 {
2332 /* If one class is MEMORY, everything should be passed in
2333 memory. */
2334 if (classes[i] == X86_64_MEMORY_CLASS)
2335 return 0;
2336
d6a7951f 2337 /* The X86_64_SSEUP_CLASS should be always preceded by
53c17031
JH
2338 X86_64_SSE_CLASS. */
2339 if (classes[i] == X86_64_SSEUP_CLASS
2340 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2341 classes[i] = X86_64_SSE_CLASS;
2342
d6a7951f 2343 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
53c17031
JH
2344 if (classes[i] == X86_64_X87UP_CLASS
2345 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2346 classes[i] = X86_64_SSE_CLASS;
2347 }
2348 return words;
2349 }
2350
2351 /* Compute alignment needed. We align all types to natural boundaries with
2352 exception of XFmode that is aligned to 64bits. */
2353 if (mode != VOIDmode && mode != BLKmode)
2354 {
2355 int mode_alignment = GET_MODE_BITSIZE (mode);
2356
2357 if (mode == XFmode)
2358 mode_alignment = 128;
2359 else if (mode == XCmode)
2360 mode_alignment = 256;
2c6b27c3
JH
2361 if (COMPLEX_MODE_P (mode))
2362 mode_alignment /= 2;
f5143c46 2363 /* Misaligned fields are always returned in memory. */
53c17031
JH
2364 if (bit_offset % mode_alignment)
2365 return 0;
2366 }
2367
9e9fb0ce
JB
2368 /* for V1xx modes, just use the base mode */
2369 if (VECTOR_MODE_P (mode)
2370 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
2371 mode = GET_MODE_INNER (mode);
2372
53c17031
JH
2373 /* Classification of atomic types. */
2374 switch (mode)
2375 {
2376 case DImode:
2377 case SImode:
2378 case HImode:
2379 case QImode:
2380 case CSImode:
2381 case CHImode:
2382 case CQImode:
2383 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2384 classes[0] = X86_64_INTEGERSI_CLASS;
2385 else
2386 classes[0] = X86_64_INTEGER_CLASS;
2387 return 1;
2388 case CDImode:
2389 case TImode:
2390 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2391 return 2;
2392 case CTImode:
9e9fb0ce 2393 return 0;
53c17031
JH
2394 case SFmode:
2395 if (!(bit_offset % 64))
2396 classes[0] = X86_64_SSESF_CLASS;
2397 else
2398 classes[0] = X86_64_SSE_CLASS;
2399 return 1;
2400 case DFmode:
2401 classes[0] = X86_64_SSEDF_CLASS;
2402 return 1;
f8a1ebc6 2403 case XFmode:
53c17031
JH
2404 classes[0] = X86_64_X87_CLASS;
2405 classes[1] = X86_64_X87UP_CLASS;
2406 return 2;
f8a1ebc6 2407 case TFmode:
9e9fb0ce
JB
2408 classes[0] = X86_64_SSE_CLASS;
2409 classes[1] = X86_64_SSEUP_CLASS;
53c17031
JH
2410 return 2;
2411 case SCmode:
2412 classes[0] = X86_64_SSE_CLASS;
2413 return 1;
9e9fb0ce
JB
2414 case DCmode:
2415 classes[0] = X86_64_SSEDF_CLASS;
2416 classes[1] = X86_64_SSEDF_CLASS;
2417 return 2;
2418 case XCmode:
499accd7
JB
2419 classes[0] = X86_64_COMPLEX_X87_CLASS;
2420 return 1;
9e9fb0ce 2421 case TCmode:
499accd7 2422 /* This modes is larger than 16 bytes. */
9e9fb0ce 2423 return 0;
e95d6b23
JH
2424 case V4SFmode:
2425 case V4SImode:
495333a6
JH
2426 case V16QImode:
2427 case V8HImode:
2428 case V2DFmode:
2429 case V2DImode:
e95d6b23
JH
2430 classes[0] = X86_64_SSE_CLASS;
2431 classes[1] = X86_64_SSEUP_CLASS;
2432 return 2;
2433 case V2SFmode:
2434 case V2SImode:
2435 case V4HImode:
2436 case V8QImode:
9e9fb0ce
JB
2437 classes[0] = X86_64_SSE_CLASS;
2438 return 1;
53c17031 2439 case BLKmode:
e95d6b23 2440 case VOIDmode:
53c17031
JH
2441 return 0;
2442 default:
9e9fb0ce
JB
2443 if (VECTOR_MODE_P (mode))
2444 {
2445 if (bytes > 16)
2446 return 0;
2447 if (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT)
2448 {
2449 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2450 classes[0] = X86_64_INTEGERSI_CLASS;
2451 else
2452 classes[0] = X86_64_INTEGER_CLASS;
2453 classes[1] = X86_64_INTEGER_CLASS;
2454 return 1 + (bytes > 8);
2455 }
2456 }
53c17031
JH
2457 abort ();
2458 }
2459}
2460
2461/* Examine the argument and return set number of register required in each
f5143c46 2462 class. Return 0 iff parameter should be passed in memory. */
53c17031 2463static int
b96a374d
AJ
2464examine_argument (enum machine_mode mode, tree type, int in_return,
2465 int *int_nregs, int *sse_nregs)
53c17031
JH
2466{
2467 enum x86_64_reg_class class[MAX_CLASSES];
2468 int n = classify_argument (mode, type, class, 0);
2469
2470 *int_nregs = 0;
2471 *sse_nregs = 0;
2472 if (!n)
2473 return 0;
2474 for (n--; n >= 0; n--)
2475 switch (class[n])
2476 {
2477 case X86_64_INTEGER_CLASS:
2478 case X86_64_INTEGERSI_CLASS:
2479 (*int_nregs)++;
2480 break;
2481 case X86_64_SSE_CLASS:
2482 case X86_64_SSESF_CLASS:
2483 case X86_64_SSEDF_CLASS:
2484 (*sse_nregs)++;
2485 break;
2486 case X86_64_NO_CLASS:
2487 case X86_64_SSEUP_CLASS:
2488 break;
2489 case X86_64_X87_CLASS:
2490 case X86_64_X87UP_CLASS:
2491 if (!in_return)
2492 return 0;
2493 break;
499accd7
JB
2494 case X86_64_COMPLEX_X87_CLASS:
2495 return in_return ? 2 : 0;
53c17031
JH
2496 case X86_64_MEMORY_CLASS:
2497 abort ();
2498 }
2499 return 1;
2500}
6c4ccfd8 2501
53c17031
JH
2502/* Construct container for the argument used by GCC interface. See
2503 FUNCTION_ARG for the detailed description. */
6c4ccfd8 2504
53c17031 2505static rtx
6c4ccfd8
RH
2506construct_container (enum machine_mode mode, enum machine_mode orig_mode,
2507 tree type, int in_return, int nintregs, int nsseregs,
2508 const int *intreg, int sse_regno)
53c17031
JH
2509{
2510 enum machine_mode tmpmode;
2511 int bytes =
2512 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2513 enum x86_64_reg_class class[MAX_CLASSES];
2514 int n;
2515 int i;
2516 int nexps = 0;
2517 int needed_sseregs, needed_intregs;
2518 rtx exp[MAX_CLASSES];
2519 rtx ret;
2520
2521 n = classify_argument (mode, type, class, 0);
2522 if (TARGET_DEBUG_ARG)
2523 {
2524 if (!n)
2525 fprintf (stderr, "Memory class\n");
2526 else
2527 {
2528 fprintf (stderr, "Classes:");
2529 for (i = 0; i < n; i++)
2530 {
2531 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2532 }
2533 fprintf (stderr, "\n");
2534 }
2535 }
2536 if (!n)
2537 return NULL;
6c4ccfd8
RH
2538 if (!examine_argument (mode, type, in_return, &needed_intregs,
2539 &needed_sseregs))
53c17031
JH
2540 return NULL;
2541 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2542 return NULL;
2543
a5370cf0
RH
2544 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
2545 some less clueful developer tries to use floating-point anyway. */
2546 if (needed_sseregs && !TARGET_SSE)
2547 {
2548 static bool issued_error;
2549 if (!issued_error)
2550 {
2551 issued_error = true;
2552 if (in_return)
2553 error ("SSE register return with SSE disabled");
2554 else
2555 error ("SSE register argument with SSE disabled");
2556 }
2557 return NULL;
2558 }
2559
53c17031
JH
2560 /* First construct simple cases. Avoid SCmode, since we want to use
2561 single register to pass this type. */
2562 if (n == 1 && mode != SCmode)
2563 switch (class[0])
2564 {
2565 case X86_64_INTEGER_CLASS:
2566 case X86_64_INTEGERSI_CLASS:
2567 return gen_rtx_REG (mode, intreg[0]);
2568 case X86_64_SSE_CLASS:
2569 case X86_64_SSESF_CLASS:
2570 case X86_64_SSEDF_CLASS:
6c4ccfd8 2571 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
53c17031 2572 case X86_64_X87_CLASS:
499accd7 2573 case X86_64_COMPLEX_X87_CLASS:
53c17031
JH
2574 return gen_rtx_REG (mode, FIRST_STACK_REG);
2575 case X86_64_NO_CLASS:
2576 /* Zero sized array, struct or class. */
2577 return NULL;
2578 default:
2579 abort ();
2580 }
2c6b27c3
JH
2581 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2582 && mode != BLKmode)
e95d6b23 2583 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
53c17031
JH
2584 if (n == 2
2585 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
f8a1ebc6 2586 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
53c17031
JH
2587 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2588 && class[1] == X86_64_INTEGER_CLASS
f8a1ebc6 2589 && (mode == CDImode || mode == TImode || mode == TFmode)
53c17031
JH
2590 && intreg[0] + 1 == intreg[1])
2591 return gen_rtx_REG (mode, intreg[0]);
53c17031
JH
2592
2593 /* Otherwise figure out the entries of the PARALLEL. */
2594 for (i = 0; i < n; i++)
2595 {
2596 switch (class[i])
2597 {
2598 case X86_64_NO_CLASS:
2599 break;
2600 case X86_64_INTEGER_CLASS:
2601 case X86_64_INTEGERSI_CLASS:
d1f87653 2602 /* Merge TImodes on aligned occasions here too. */
53c17031
JH
2603 if (i * 8 + 8 > bytes)
2604 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2605 else if (class[i] == X86_64_INTEGERSI_CLASS)
2606 tmpmode = SImode;
2607 else
2608 tmpmode = DImode;
2609 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2610 if (tmpmode == BLKmode)
2611 tmpmode = DImode;
2612 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2613 gen_rtx_REG (tmpmode, *intreg),
2614 GEN_INT (i*8));
2615 intreg++;
2616 break;
2617 case X86_64_SSESF_CLASS:
2618 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2619 gen_rtx_REG (SFmode,
2620 SSE_REGNO (sse_regno)),
2621 GEN_INT (i*8));
2622 sse_regno++;
2623 break;
2624 case X86_64_SSEDF_CLASS:
2625 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2626 gen_rtx_REG (DFmode,
2627 SSE_REGNO (sse_regno)),
2628 GEN_INT (i*8));
2629 sse_regno++;
2630 break;
2631 case X86_64_SSE_CLASS:
12f5c45e
JH
2632 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2633 tmpmode = TImode;
53c17031
JH
2634 else
2635 tmpmode = DImode;
2636 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2637 gen_rtx_REG (tmpmode,
2638 SSE_REGNO (sse_regno)),
2639 GEN_INT (i*8));
12f5c45e
JH
2640 if (tmpmode == TImode)
2641 i++;
53c17031
JH
2642 sse_regno++;
2643 break;
2644 default:
2645 abort ();
2646 }
2647 }
2648 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2649 for (i = 0; i < nexps; i++)
2650 XVECEXP (ret, 0, i) = exp [i];
2651 return ret;
2652}
2653
b08de47e
MM
2654/* Update the data in CUM to advance over an argument
2655 of mode MODE and data type TYPE.
2656 (TYPE is null for libcalls where that information may not be available.) */
2657
2658void
6c4ccfd8
RH
2659function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
2660 tree type, int named)
b08de47e 2661{
5ac9118e
KG
2662 int bytes =
2663 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
2664 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2665
2666 if (TARGET_DEBUG_ARG)
6c4ccfd8
RH
2667 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
2668 "mode=%s, named=%d)\n\n",
2669 words, cum->words, cum->nregs, cum->sse_nregs,
2670 GET_MODE_NAME (mode), named);
53c17031 2671 if (TARGET_64BIT)
b08de47e 2672 {
53c17031
JH
2673 int int_nregs, sse_nregs;
2674 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2675 cum->words += words;
2676 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
82a127a9 2677 {
53c17031
JH
2678 cum->nregs -= int_nregs;
2679 cum->sse_nregs -= sse_nregs;
2680 cum->regno += int_nregs;
2681 cum->sse_regno += sse_nregs;
82a127a9 2682 }
53c17031
JH
2683 else
2684 cum->words += words;
b08de47e 2685 }
a4f31c00 2686 else
82a127a9 2687 {
bcf17554
JH
2688 if (TARGET_SSE && SSE_REG_MODE_P (mode)
2689 && (!type || !AGGREGATE_TYPE_P (type)))
53c17031
JH
2690 {
2691 cum->sse_words += words;
2692 cum->sse_nregs -= 1;
2693 cum->sse_regno += 1;
2694 if (cum->sse_nregs <= 0)
2695 {
2696 cum->sse_nregs = 0;
2697 cum->sse_regno = 0;
2698 }
2699 }
bcf17554
JH
2700 else if (TARGET_MMX && MMX_REG_MODE_P (mode)
2701 && (!type || !AGGREGATE_TYPE_P (type)))
2702 {
2703 cum->mmx_words += words;
2704 cum->mmx_nregs -= 1;
2705 cum->mmx_regno += 1;
2706 if (cum->mmx_nregs <= 0)
2707 {
2708 cum->mmx_nregs = 0;
2709 cum->mmx_regno = 0;
2710 }
2711 }
53c17031 2712 else
82a127a9 2713 {
53c17031
JH
2714 cum->words += words;
2715 cum->nregs -= words;
2716 cum->regno += words;
2717
2718 if (cum->nregs <= 0)
2719 {
2720 cum->nregs = 0;
2721 cum->regno = 0;
2722 }
82a127a9
CM
2723 }
2724 }
b08de47e
MM
2725 return;
2726}
2727
2728/* Define where to put the arguments to a function.
2729 Value is zero to push the argument on the stack,
2730 or a hard register in which to store the argument.
2731
2732 MODE is the argument's machine mode.
2733 TYPE is the data type of the argument (as a tree).
2734 This is null for libcalls where that information may
2735 not be available.
2736 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2737 the preceding args and about the function being called.
2738 NAMED is nonzero if this argument is a named parameter
2739 (otherwise it is an extra parameter matching an ellipsis). */
2740
07933f72 2741rtx
dcbca208
RH
2742function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
2743 tree type, int named)
b08de47e 2744{
dcbca208
RH
2745 enum machine_mode mode = orig_mode;
2746 rtx ret = NULL_RTX;
5ac9118e
KG
2747 int bytes =
2748 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e 2749 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
bcf17554 2750 static bool warnedsse, warnedmmx;
b08de47e 2751
90d5887b
PB
2752 /* To simplify the code below, represent vector types with a vector mode
2753 even if MMX/SSE are not active. */
6c4ccfd8
RH
2754 if (type && TREE_CODE (type) == VECTOR_TYPE)
2755 mode = type_natural_mode (type);
90d5887b 2756
5bdc5878 2757 /* Handle a hidden AL argument containing number of registers for varargs
53c17031
JH
2758 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2759 any AL settings. */
32ee7d1d 2760 if (mode == VOIDmode)
b08de47e 2761 {
53c17031
JH
2762 if (TARGET_64BIT)
2763 return GEN_INT (cum->maybe_vaarg
2764 ? (cum->sse_nregs < 0
2765 ? SSE_REGPARM_MAX
2766 : cum->sse_regno)
2767 : -1);
2768 else
2769 return constm1_rtx;
b08de47e 2770 }
53c17031 2771 if (TARGET_64BIT)
6c4ccfd8
RH
2772 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
2773 cum->sse_nregs,
53c17031
JH
2774 &x86_64_int_parameter_registers [cum->regno],
2775 cum->sse_regno);
2776 else
2777 switch (mode)
2778 {
2779 /* For now, pass fp/complex values on the stack. */
2780 default:
2781 break;
2782
2783 case BLKmode:
8d454008
RH
2784 if (bytes < 0)
2785 break;
5efb1046 2786 /* FALLTHRU */
53c17031
JH
2787 case DImode:
2788 case SImode:
2789 case HImode:
2790 case QImode:
2791 if (words <= cum->nregs)
b96a374d
AJ
2792 {
2793 int regno = cum->regno;
2794
2795 /* Fastcall allocates the first two DWORD (SImode) or
2796 smaller arguments to ECX and EDX. */
2797 if (cum->fastcall)
2798 {
2799 if (mode == BLKmode || mode == DImode)
2800 break;
2801
2802 /* ECX not EAX is the first allocated register. */
2803 if (regno == 0)
e767b5be 2804 regno = 2;
b96a374d
AJ
2805 }
2806 ret = gen_rtx_REG (mode, regno);
2807 }
53c17031
JH
2808 break;
2809 case TImode:
bcf17554
JH
2810 case V16QImode:
2811 case V8HImode:
2812 case V4SImode:
2813 case V2DImode:
2814 case V4SFmode:
2815 case V2DFmode:
2816 if (!type || !AGGREGATE_TYPE_P (type))
2817 {
78fbfc4b 2818 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
bcf17554
JH
2819 {
2820 warnedsse = true;
2821 warning ("SSE vector argument without SSE enabled "
2822 "changes the ABI");
2823 }
2824 if (cum->sse_nregs)
6c4ccfd8 2825 ret = gen_reg_or_parallel (mode, orig_mode,
dcbca208 2826 cum->sse_regno + FIRST_SSE_REG);
bcf17554
JH
2827 }
2828 break;
2829 case V8QImode:
2830 case V4HImode:
2831 case V2SImode:
2832 case V2SFmode:
2833 if (!type || !AGGREGATE_TYPE_P (type))
2834 {
e1be55d0 2835 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
bcf17554
JH
2836 {
2837 warnedmmx = true;
2838 warning ("MMX vector argument without MMX enabled "
2839 "changes the ABI");
2840 }
2841 if (cum->mmx_nregs)
6c4ccfd8 2842 ret = gen_reg_or_parallel (mode, orig_mode,
dcbca208 2843 cum->mmx_regno + FIRST_MMX_REG);
bcf17554 2844 }
53c17031
JH
2845 break;
2846 }
b08de47e
MM
2847
2848 if (TARGET_DEBUG_ARG)
2849 {
2850 fprintf (stderr,
91ea38f9 2851 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
b08de47e
MM
2852 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2853
2854 if (ret)
91ea38f9 2855 print_simple_rtl (stderr, ret);
b08de47e
MM
2856 else
2857 fprintf (stderr, ", stack");
2858
2859 fprintf (stderr, " )\n");
2860 }
2861
2862 return ret;
2863}
53c17031 2864
09b2e78d
ZD
2865/* A C expression that indicates when an argument must be passed by
2866 reference. If nonzero for an argument, a copy of that argument is
2867 made in memory and a pointer to the argument is passed instead of
2868 the argument itself. The pointer is passed in whatever way is
2869 appropriate for passing a pointer to that type. */
2870
8cd5a4e0
RH
2871static bool
2872ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2873 enum machine_mode mode ATTRIBUTE_UNUSED,
2874 tree type, bool named ATTRIBUTE_UNUSED)
09b2e78d
ZD
2875{
2876 if (!TARGET_64BIT)
2877 return 0;
2878
2879 if (type && int_size_in_bytes (type) == -1)
2880 {
2881 if (TARGET_DEBUG_ARG)
2882 fprintf (stderr, "function_arg_pass_by_reference\n");
2883 return 1;
2884 }
2885
2886 return 0;
2887}
2888
8b978a57 2889/* Return true when TYPE should be 128bit aligned for 32bit argument passing
90d5887b 2890 ABI. Only called if TARGET_SSE. */
8b978a57 2891static bool
b96a374d 2892contains_128bit_aligned_vector_p (tree type)
8b978a57
JH
2893{
2894 enum machine_mode mode = TYPE_MODE (type);
2895 if (SSE_REG_MODE_P (mode)
2896 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2897 return true;
2898 if (TYPE_ALIGN (type) < 128)
2899 return false;
2900
2901 if (AGGREGATE_TYPE_P (type))
2902 {
2a43945f 2903 /* Walk the aggregates recursively. */
8b978a57
JH
2904 if (TREE_CODE (type) == RECORD_TYPE
2905 || TREE_CODE (type) == UNION_TYPE
2906 || TREE_CODE (type) == QUAL_UNION_TYPE)
2907 {
2908 tree field;
2909
fa743e8c 2910 if (TYPE_BINFO (type))
8b978a57 2911 {
fa743e8c 2912 tree binfo, base_binfo;
8b978a57
JH
2913 int i;
2914
fa743e8c
NS
2915 for (binfo = TYPE_BINFO (type), i = 0;
2916 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
2917 if (contains_128bit_aligned_vector_p (BINFO_TYPE (base_binfo)))
2918 return true;
8b978a57 2919 }
43f3a59d 2920 /* And now merge the fields of structure. */
8b978a57
JH
2921 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2922 {
2923 if (TREE_CODE (field) == FIELD_DECL
2924 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2925 return true;
2926 }
2927 }
2928 /* Just for use if some languages passes arrays by value. */
2929 else if (TREE_CODE (type) == ARRAY_TYPE)
2930 {
2931 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2932 return true;
2933 }
2934 else
2935 abort ();
2936 }
2937 return false;
2938}
2939
bb498ea3
AH
2940/* Gives the alignment boundary, in bits, of an argument with the
2941 specified mode and type. */
53c17031
JH
2942
2943int
b96a374d 2944ix86_function_arg_boundary (enum machine_mode mode, tree type)
53c17031
JH
2945{
2946 int align;
53c17031
JH
2947 if (type)
2948 align = TYPE_ALIGN (type);
2949 else
2950 align = GET_MODE_ALIGNMENT (mode);
2951 if (align < PARM_BOUNDARY)
2952 align = PARM_BOUNDARY;
8b978a57
JH
2953 if (!TARGET_64BIT)
2954 {
2955 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2956 make an exception for SSE modes since these require 128bit
b96a374d 2957 alignment.
8b978a57
JH
2958
2959 The handling here differs from field_alignment. ICC aligns MMX
2960 arguments to 4 byte boundaries, while structure fields are aligned
2961 to 8 byte boundaries. */
78fbfc4b
JB
2962 if (!TARGET_SSE)
2963 align = PARM_BOUNDARY;
2964 else if (!type)
8b978a57
JH
2965 {
2966 if (!SSE_REG_MODE_P (mode))
2967 align = PARM_BOUNDARY;
2968 }
2969 else
2970 {
2971 if (!contains_128bit_aligned_vector_p (type))
2972 align = PARM_BOUNDARY;
2973 }
8b978a57 2974 }
53c17031
JH
2975 if (align > 128)
2976 align = 128;
2977 return align;
2978}
2979
2980/* Return true if N is a possible register number of function value. */
2981bool
b96a374d 2982ix86_function_value_regno_p (int regno)
53c17031
JH
2983{
2984 if (!TARGET_64BIT)
2985 {
2986 return ((regno) == 0
2987 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2988 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2989 }
2990 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2991 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2992 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2993}
2994
2995/* Define how to find the value returned by a function.
2996 VALTYPE is the data type of the value (as a tree).
2997 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2998 otherwise, FUNC is 0. */
2999rtx
b96a374d 3000ix86_function_value (tree valtype)
53c17031
JH
3001{
3002 if (TARGET_64BIT)
3003 {
6c4ccfd8
RH
3004 rtx ret = construct_container (type_natural_mode (valtype),
3005 TYPE_MODE (valtype), valtype,
3006 1, REGPARM_MAX, SSE_REGPARM_MAX,
53c17031 3007 x86_64_int_return_registers, 0);
6c4ccfd8
RH
3008 /* For zero sized structures, construct_container return NULL, but we
3009 need to keep rest of compiler happy by returning meaningful value. */
53c17031
JH
3010 if (!ret)
3011 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
3012 return ret;
3013 }
3014 else
b069de3b
SS
3015 return gen_rtx_REG (TYPE_MODE (valtype),
3016 ix86_value_regno (TYPE_MODE (valtype)));
53c17031
JH
3017}
3018
f5143c46 3019/* Return false iff type is returned in memory. */
53c17031 3020int
b96a374d 3021ix86_return_in_memory (tree type)
53c17031 3022{
a30b6839
RH
3023 int needed_intregs, needed_sseregs, size;
3024 enum machine_mode mode = TYPE_MODE (type);
3025
53c17031 3026 if (TARGET_64BIT)
a30b6839
RH
3027 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
3028
3029 if (mode == BLKmode)
3030 return 1;
3031
3032 size = int_size_in_bytes (type);
3033
3034 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3035 return 0;
3036
3037 if (VECTOR_MODE_P (mode) || mode == TImode)
53c17031 3038 {
a30b6839
RH
3039 /* User-created vectors small enough to fit in EAX. */
3040 if (size < 8)
5e062767 3041 return 0;
a30b6839
RH
3042
3043 /* MMX/3dNow values are returned on the stack, since we've
3044 got to EMMS/FEMMS before returning. */
3045 if (size == 8)
53c17031 3046 return 1;
a30b6839 3047
0397ac35 3048 /* SSE values are returned in XMM0, except when it doesn't exist. */
a30b6839 3049 if (size == 16)
0397ac35 3050 return (TARGET_SSE ? 0 : 1);
53c17031 3051 }
a30b6839 3052
cf2348cb 3053 if (mode == XFmode)
a30b6839 3054 return 0;
f8a1ebc6 3055
a30b6839
RH
3056 if (size > 12)
3057 return 1;
3058 return 0;
53c17031
JH
3059}
3060
0397ac35
RH
3061/* When returning SSE vector types, we have a choice of either
3062 (1) being abi incompatible with a -march switch, or
3063 (2) generating an error.
3064 Given no good solution, I think the safest thing is one warning.
3065 The user won't be able to use -Werror, but....
3066
3067 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3068 called in response to actually generating a caller or callee that
3069 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3070 via aggregate_value_p for general type probing from tree-ssa. */
3071
3072static rtx
3073ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3074{
3075 static bool warned;
3076
3077 if (!TARGET_SSE && type && !warned)
3078 {
3079 /* Look at the return type of the function, not the function type. */
3080 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3081
3082 if (mode == TImode
3083 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3084 {
3085 warned = true;
3086 warning ("SSE vector return without SSE enabled changes the ABI");
3087 }
3088 }
3089
3090 return NULL;
3091}
3092
53c17031
JH
3093/* Define how to find the value returned by a library function
3094 assuming the value has mode MODE. */
3095rtx
b96a374d 3096ix86_libcall_value (enum machine_mode mode)
53c17031
JH
3097{
3098 if (TARGET_64BIT)
3099 {
3100 switch (mode)
3101 {
f8a1ebc6
JH
3102 case SFmode:
3103 case SCmode:
3104 case DFmode:
3105 case DCmode:
9e9fb0ce 3106 case TFmode:
f8a1ebc6
JH
3107 return gen_rtx_REG (mode, FIRST_SSE_REG);
3108 case XFmode:
9e9fb0ce 3109 case XCmode:
499accd7 3110 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
f8a1ebc6
JH
3111 case TCmode:
3112 return NULL;
3113 default:
3114 return gen_rtx_REG (mode, 0);
53c17031
JH
3115 }
3116 }
3117 else
f8a1ebc6 3118 return gen_rtx_REG (mode, ix86_value_regno (mode));
b069de3b
SS
3119}
3120
3121/* Given a mode, return the register to use for a return value. */
3122
3123static int
b96a374d 3124ix86_value_regno (enum machine_mode mode)
b069de3b 3125{
a30b6839 3126 /* Floating point return values in %st(0). */
b069de3b
SS
3127 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
3128 return FIRST_FLOAT_REG;
a30b6839
RH
3129 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3130 we prevent this case when sse is not available. */
3131 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
b069de3b 3132 return FIRST_SSE_REG;
a30b6839 3133 /* Everything else in %eax. */
b069de3b 3134 return 0;
53c17031 3135}
ad919812
JH
3136\f
3137/* Create the va_list data type. */
53c17031 3138
c35d187f
RH
3139static tree
3140ix86_build_builtin_va_list (void)
ad919812
JH
3141{
3142 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
53c17031 3143
ad919812
JH
3144 /* For i386 we use plain pointer to argument area. */
3145 if (!TARGET_64BIT)
3146 return build_pointer_type (char_type_node);
3147
f1e639b1 3148 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
ad919812
JH
3149 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3150
fce5a9f2 3151 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
ad919812 3152 unsigned_type_node);
fce5a9f2 3153 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
ad919812
JH
3154 unsigned_type_node);
3155 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
3156 ptr_type_node);
3157 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
3158 ptr_type_node);
3159
3160 DECL_FIELD_CONTEXT (f_gpr) = record;
3161 DECL_FIELD_CONTEXT (f_fpr) = record;
3162 DECL_FIELD_CONTEXT (f_ovf) = record;
3163 DECL_FIELD_CONTEXT (f_sav) = record;
3164
3165 TREE_CHAIN (record) = type_decl;
3166 TYPE_NAME (record) = type_decl;
3167 TYPE_FIELDS (record) = f_gpr;
3168 TREE_CHAIN (f_gpr) = f_fpr;
3169 TREE_CHAIN (f_fpr) = f_ovf;
3170 TREE_CHAIN (f_ovf) = f_sav;
3171
3172 layout_type (record);
3173
3174 /* The correct type is an array type of one element. */
3175 return build_array_type (record, build_index_type (size_zero_node));
3176}
3177
a0524eb3 3178/* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
ad919812 3179
a0524eb3 3180static void
b96a374d
AJ
3181ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3182 tree type, int *pretend_size ATTRIBUTE_UNUSED,
3183 int no_rtl)
ad919812
JH
3184{
3185 CUMULATIVE_ARGS next_cum;
3186 rtx save_area = NULL_RTX, mem;
3187 rtx label;
3188 rtx label_ref;
3189 rtx tmp_reg;
3190 rtx nsse_reg;
3191 int set;
3192 tree fntype;
3193 int stdarg_p;
3194 int i;
3195
3196 if (!TARGET_64BIT)
3197 return;
3198
3199 /* Indicate to allocate space on the stack for varargs save area. */
3200 ix86_save_varrargs_registers = 1;
3201
5474eed5
JH
3202 cfun->stack_alignment_needed = 128;
3203
ad919812
JH
3204 fntype = TREE_TYPE (current_function_decl);
3205 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3206 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3207 != void_type_node));
3208
3209 /* For varargs, we do not want to skip the dummy va_dcl argument.
3210 For stdargs, we do want to skip the last named argument. */
3211 next_cum = *cum;
3212 if (stdarg_p)
3213 function_arg_advance (&next_cum, mode, type, 1);
3214
3215 if (!no_rtl)
3216 save_area = frame_pointer_rtx;
3217
3218 set = get_varargs_alias_set ();
3219
5496b36f 3220 for (i = next_cum.regno; i < ix86_regparm; i++)
ad919812
JH
3221 {
3222 mem = gen_rtx_MEM (Pmode,
3223 plus_constant (save_area, i * UNITS_PER_WORD));
0692acba 3224 set_mem_alias_set (mem, set);
ad919812
JH
3225 emit_move_insn (mem, gen_rtx_REG (Pmode,
3226 x86_64_int_parameter_registers[i]));
3227 }
3228
5496b36f 3229 if (next_cum.sse_nregs)
ad919812
JH
3230 {
3231 /* Now emit code to save SSE registers. The AX parameter contains number
d1f87653 3232 of SSE parameter registers used to call this function. We use
ad919812
JH
3233 sse_prologue_save insn template that produces computed jump across
3234 SSE saves. We need some preparation work to get this working. */
3235
3236 label = gen_label_rtx ();
3237 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3238
3239 /* Compute address to jump to :
3240 label - 5*eax + nnamed_sse_arguments*5 */
3241 tmp_reg = gen_reg_rtx (Pmode);
3242 nsse_reg = gen_reg_rtx (Pmode);
3243 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3244 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
44cf5b6a 3245 gen_rtx_MULT (Pmode, nsse_reg,
ad919812
JH
3246 GEN_INT (4))));
3247 if (next_cum.sse_regno)
3248 emit_move_insn
3249 (nsse_reg,
3250 gen_rtx_CONST (DImode,
3251 gen_rtx_PLUS (DImode,
3252 label_ref,
3253 GEN_INT (next_cum.sse_regno * 4))));
3254 else
3255 emit_move_insn (nsse_reg, label_ref);
3256 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3257
3258 /* Compute address of memory block we save into. We always use pointer
3259 pointing 127 bytes after first byte to store - this is needed to keep
3260 instruction size limited by 4 bytes. */
3261 tmp_reg = gen_reg_rtx (Pmode);
8ac61af7
RK
3262 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3263 plus_constant (save_area,
3264 8 * REGPARM_MAX + 127)));
ad919812 3265 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
14f73b5a 3266 set_mem_alias_set (mem, set);
8ac61af7 3267 set_mem_align (mem, BITS_PER_WORD);
ad919812
JH
3268
3269 /* And finally do the dirty job! */
8ac61af7
RK
3270 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3271 GEN_INT (next_cum.sse_regno), label));
ad919812
JH
3272 }
3273
3274}
3275
3276/* Implement va_start. */
3277
3278void
b96a374d 3279ix86_va_start (tree valist, rtx nextarg)
ad919812
JH
3280{
3281 HOST_WIDE_INT words, n_gpr, n_fpr;
3282 tree f_gpr, f_fpr, f_ovf, f_sav;
3283 tree gpr, fpr, ovf, sav, t;
3284
3285 /* Only 64bit target needs something special. */
3286 if (!TARGET_64BIT)
3287 {
e5faf155 3288 std_expand_builtin_va_start (valist, nextarg);
ad919812
JH
3289 return;
3290 }
3291
3292 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3293 f_fpr = TREE_CHAIN (f_gpr);
3294 f_ovf = TREE_CHAIN (f_fpr);
3295 f_sav = TREE_CHAIN (f_ovf);
3296
3297 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
44de5aeb
RK
3298 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3299 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3300 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3301 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
ad919812
JH
3302
3303 /* Count number of gp and fp argument registers used. */
3304 words = current_function_args_info.words;
3305 n_gpr = current_function_args_info.regno;
3306 n_fpr = current_function_args_info.sse_regno;
3307
3308 if (TARGET_DEBUG_ARG)
3309 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
b531087a 3310 (int) words, (int) n_gpr, (int) n_fpr);
ad919812 3311
5496b36f
JJ
3312 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3313 build_int_cst (NULL_TREE, n_gpr * 8));
3314 TREE_SIDE_EFFECTS (t) = 1;
3315 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
ad919812 3316
5496b36f
JJ
3317 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3318 build_int_cst (NULL_TREE, n_fpr * 16 + 8*REGPARM_MAX));
3319 TREE_SIDE_EFFECTS (t) = 1;
3320 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
ad919812
JH
3321
3322 /* Find the overflow area. */
3323 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3324 if (words != 0)
3325 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
7d60be94 3326 build_int_cst (NULL_TREE, words * UNITS_PER_WORD));
ad919812
JH
3327 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3328 TREE_SIDE_EFFECTS (t) = 1;
3329 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3330
5496b36f
JJ
3331 /* Find the register save area.
3332 Prologue of the function save it right above stack frame. */
3333 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3334 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3335 TREE_SIDE_EFFECTS (t) = 1;
3336 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
ad919812
JH
3337}
3338
3339/* Implement va_arg. */
cd3ce9b4 3340
23a60a04
JM
3341tree
3342ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
cd3ce9b4 3343{
cd3ce9b4
JM
3344 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3345 tree f_gpr, f_fpr, f_ovf, f_sav;
3346 tree gpr, fpr, ovf, sav, t;
3347 int size, rsize;
3348 tree lab_false, lab_over = NULL_TREE;
3349 tree addr, t2;
3350 rtx container;
3351 int indirect_p = 0;
3352 tree ptrtype;
52cf10a3 3353 enum machine_mode nat_mode;
cd3ce9b4
JM
3354
3355 /* Only 64bit target needs something special. */
3356 if (!TARGET_64BIT)
23a60a04 3357 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
cd3ce9b4
JM
3358
3359 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3360 f_fpr = TREE_CHAIN (f_gpr);
3361 f_ovf = TREE_CHAIN (f_fpr);
3362 f_sav = TREE_CHAIN (f_ovf);
3363
c2433d7d 3364 valist = build_va_arg_indirect_ref (valist);
44de5aeb
RK
3365 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3366 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3367 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3368 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
cd3ce9b4 3369
08b0dc1b
RH
3370 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
3371 if (indirect_p)
3372 type = build_pointer_type (type);
cd3ce9b4 3373 size = int_size_in_bytes (type);
cd3ce9b4
JM
3374 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3375
52cf10a3
RH
3376 nat_mode = type_natural_mode (type);
3377 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
3378 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
6c4ccfd8
RH
3379
3380 /* Pull the value out of the saved registers. */
cd3ce9b4
JM
3381
3382 addr = create_tmp_var (ptr_type_node, "addr");
3383 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3384
3385 if (container)
3386 {
3387 int needed_intregs, needed_sseregs;
e52a6df5 3388 bool need_temp;
cd3ce9b4
JM
3389 tree int_addr, sse_addr;
3390
3391 lab_false = create_artificial_label ();
3392 lab_over = create_artificial_label ();
3393
52cf10a3 3394 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
cd3ce9b4 3395
e52a6df5
JB
3396 need_temp = (!REG_P (container)
3397 && ((needed_intregs && TYPE_ALIGN (type) > 64)
3398 || TYPE_ALIGN (type) > 128));
cd3ce9b4
JM
3399
3400 /* In case we are passing structure, verify that it is consecutive block
3401 on the register save area. If not we need to do moves. */
3402 if (!need_temp && !REG_P (container))
3403 {
3404 /* Verify that all registers are strictly consecutive */
3405 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3406 {
3407 int i;
3408
3409 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3410 {
3411 rtx slot = XVECEXP (container, 0, i);
3412 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3413 || INTVAL (XEXP (slot, 1)) != i * 16)
3414 need_temp = 1;
3415 }
3416 }
3417 else
3418 {
3419 int i;
3420
3421 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3422 {
3423 rtx slot = XVECEXP (container, 0, i);
3424 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3425 || INTVAL (XEXP (slot, 1)) != i * 8)
3426 need_temp = 1;
3427 }
3428 }
3429 }
3430 if (!need_temp)
3431 {
3432 int_addr = addr;
3433 sse_addr = addr;
3434 }
3435 else
3436 {
3437 int_addr = create_tmp_var (ptr_type_node, "int_addr");
3438 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
3439 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
3440 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
3441 }
56d76b69 3442
cd3ce9b4
JM
3443 /* First ensure that we fit completely in registers. */
3444 if (needed_intregs)
3445 {
4a90aeeb 3446 t = build_int_cst (TREE_TYPE (gpr),
7d60be94 3447 (REGPARM_MAX - needed_intregs + 1) * 8);
cd3ce9b4
JM
3448 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
3449 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3450 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3451 gimplify_and_add (t, pre_p);
3452 }
3453 if (needed_sseregs)
3454 {
4a90aeeb
NS
3455 t = build_int_cst (TREE_TYPE (fpr),
3456 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7d60be94 3457 + REGPARM_MAX * 8);
cd3ce9b4
JM
3458 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
3459 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3460 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3461 gimplify_and_add (t, pre_p);
3462 }
3463
3464 /* Compute index to start of area used for integer regs. */
3465 if (needed_intregs)
3466 {
3467 /* int_addr = gpr + sav; */
56d76b69
RH
3468 t = fold_convert (ptr_type_node, gpr);
3469 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
cd3ce9b4
JM
3470 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
3471 gimplify_and_add (t, pre_p);
3472 }
3473 if (needed_sseregs)
3474 {
3475 /* sse_addr = fpr + sav; */
56d76b69
RH
3476 t = fold_convert (ptr_type_node, fpr);
3477 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
cd3ce9b4
JM
3478 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
3479 gimplify_and_add (t, pre_p);
3480 }
3481 if (need_temp)
3482 {
3483 int i;
3484 tree temp = create_tmp_var (type, "va_arg_tmp");
3485
3486 /* addr = &temp; */
3487 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
3488 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
3489 gimplify_and_add (t, pre_p);
f676971a 3490
cd3ce9b4
JM
3491 for (i = 0; i < XVECLEN (container, 0); i++)
3492 {
3493 rtx slot = XVECEXP (container, 0, i);
3494 rtx reg = XEXP (slot, 0);
3495 enum machine_mode mode = GET_MODE (reg);
3496 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
3497 tree addr_type = build_pointer_type (piece_type);
3498 tree src_addr, src;
3499 int src_offset;
3500 tree dest_addr, dest;
3501
3502 if (SSE_REGNO_P (REGNO (reg)))
3503 {
3504 src_addr = sse_addr;
3505 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3506 }
3507 else
3508 {
3509 src_addr = int_addr;
3510 src_offset = REGNO (reg) * 8;
3511 }
8fe75e43
RH
3512 src_addr = fold_convert (addr_type, src_addr);
3513 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
3514 size_int (src_offset)));
c2433d7d 3515 src = build_va_arg_indirect_ref (src_addr);
e6e81735 3516
8fe75e43
RH
3517 dest_addr = fold_convert (addr_type, addr);
3518 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
3519 size_int (INTVAL (XEXP (slot, 1)))));
c2433d7d 3520 dest = build_va_arg_indirect_ref (dest_addr);
3a3677ff 3521
8fe75e43
RH
3522 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
3523 gimplify_and_add (t, pre_p);
3524 }
3525 }
e6e81735 3526
8fe75e43
RH
3527 if (needed_intregs)
3528 {
3529 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
56d76b69 3530 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8fe75e43
RH
3531 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3532 gimplify_and_add (t, pre_p);
3533 }
3534 if (needed_sseregs)
3535 {
4a90aeeb 3536 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
56d76b69 3537 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8fe75e43
RH
3538 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3539 gimplify_and_add (t, pre_p);
3540 }
e6e81735 3541
8fe75e43
RH
3542 t = build1 (GOTO_EXPR, void_type_node, lab_over);
3543 gimplify_and_add (t, pre_p);
3544
3545 t = build1 (LABEL_EXPR, void_type_node, lab_false);
3546 append_to_statement_list (t, pre_p);
3a3677ff 3547 }
b840bfb0 3548
8fe75e43 3549 /* ... otherwise out of the overflow area. */
e9e80858 3550
8fe75e43
RH
3551 /* Care for on-stack alignment if needed. */
3552 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3553 t = ovf;
3554 else
e9e80858 3555 {
8fe75e43 3556 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4a90aeeb 3557 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf,
56d76b69 3558 build_int_cst (TREE_TYPE (ovf), align - 1));
4a90aeeb 3559 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
56d76b69 3560 build_int_cst (TREE_TYPE (t), -align));
e9e80858 3561 }
8fe75e43 3562 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
e075ae69 3563
8fe75e43
RH
3564 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
3565 gimplify_and_add (t2, pre_p);
e075ae69 3566
8fe75e43 3567 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
56d76b69 3568 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
8fe75e43
RH
3569 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3570 gimplify_and_add (t, pre_p);
e075ae69 3571
8fe75e43 3572 if (container)
2a2ab3f9 3573 {
8fe75e43
RH
3574 t = build1 (LABEL_EXPR, void_type_node, lab_over);
3575 append_to_statement_list (t, pre_p);
2a2ab3f9 3576 }
e075ae69 3577
8fe75e43
RH
3578 ptrtype = build_pointer_type (type);
3579 addr = fold_convert (ptrtype, addr);
0a726ef1 3580
8fe75e43 3581 if (indirect_p)
c2433d7d
FCE
3582 addr = build_va_arg_indirect_ref (addr);
3583 return build_va_arg_indirect_ref (addr);
0a726ef1 3584}
8fe75e43
RH
3585\f
3586/* Return nonzero if OPNUM's MEM should be matched
3587 in movabs* patterns. */
fee2770d
RS
3588
3589int
8fe75e43 3590ix86_check_movabs (rtx insn, int opnum)
4f2c8ebb 3591{
8fe75e43 3592 rtx set, mem;
e075ae69 3593
8fe75e43
RH
3594 set = PATTERN (insn);
3595 if (GET_CODE (set) == PARALLEL)
3596 set = XVECEXP (set, 0, 0);
3597 if (GET_CODE (set) != SET)
e075ae69 3598 abort ();
8fe75e43
RH
3599 mem = XEXP (set, opnum);
3600 while (GET_CODE (mem) == SUBREG)
3601 mem = SUBREG_REG (mem);
3602 if (GET_CODE (mem) != MEM)
2247f6ed 3603 abort ();
8fe75e43 3604 return (volatile_ok || !MEM_VOLATILE_P (mem));
2247f6ed 3605}
e075ae69 3606\f
881b2a96
RS
3607/* Initialize the table of extra 80387 mathematical constants. */
3608
3609static void
b96a374d 3610init_ext_80387_constants (void)
881b2a96
RS
3611{
3612 static const char * cst[5] =
3613 {
3614 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
3615 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
3616 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
3617 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
3618 "3.1415926535897932385128089594061862044", /* 4: fldpi */
3619 };
3620 int i;
3621
3622 for (i = 0; i < 5; i++)
3623 {
3624 real_from_string (&ext_80387_constants_table[i], cst[i]);
3625 /* Ensure each constant is rounded to XFmode precision. */
1f48e56d 3626 real_convert (&ext_80387_constants_table[i],
f8a1ebc6 3627 XFmode, &ext_80387_constants_table[i]);
881b2a96
RS
3628 }
3629
3630 ext_80387_constants_init = 1;
3631}
3632
e075ae69 3633/* Return true if the constant is something that can be loaded with
881b2a96 3634 a special instruction. */
57dbca5e
BS
3635
3636int
b96a374d 3637standard_80387_constant_p (rtx x)
57dbca5e 3638{
2b04e52b 3639 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
e075ae69 3640 return -1;
881b2a96 3641
2b04e52b
JH
3642 if (x == CONST0_RTX (GET_MODE (x)))
3643 return 1;
3644 if (x == CONST1_RTX (GET_MODE (x)))
3645 return 2;
881b2a96 3646
22cc69c4
RS
3647 /* For XFmode constants, try to find a special 80387 instruction when
3648 optimizing for size or on those CPUs that benefit from them. */
f8a1ebc6 3649 if (GET_MODE (x) == XFmode
22cc69c4 3650 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
881b2a96
RS
3651 {
3652 REAL_VALUE_TYPE r;
3653 int i;
3654
3655 if (! ext_80387_constants_init)
3656 init_ext_80387_constants ();
3657
3658 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3659 for (i = 0; i < 5; i++)
3660 if (real_identical (&r, &ext_80387_constants_table[i]))
3661 return i + 3;
3662 }
3663
e075ae69 3664 return 0;
57dbca5e
BS
3665}
3666
881b2a96
RS
3667/* Return the opcode of the special instruction to be used to load
3668 the constant X. */
3669
3670const char *
b96a374d 3671standard_80387_constant_opcode (rtx x)
881b2a96
RS
3672{
3673 switch (standard_80387_constant_p (x))
3674 {
b96a374d 3675 case 1:
881b2a96
RS
3676 return "fldz";
3677 case 2:
3678 return "fld1";
b96a374d 3679 case 3:
881b2a96
RS
3680 return "fldlg2";
3681 case 4:
3682 return "fldln2";
b96a374d 3683 case 5:
881b2a96
RS
3684 return "fldl2e";
3685 case 6:
3686 return "fldl2t";
b96a374d 3687 case 7:
881b2a96
RS
3688 return "fldpi";
3689 }
3690 abort ();
3691}
3692
3693/* Return the CONST_DOUBLE representing the 80387 constant that is
3694 loaded by the specified special instruction. The argument IDX
3695 matches the return value from standard_80387_constant_p. */
3696
3697rtx
b96a374d 3698standard_80387_constant_rtx (int idx)
881b2a96
RS
3699{
3700 int i;
3701
3702 if (! ext_80387_constants_init)
3703 init_ext_80387_constants ();
3704
3705 switch (idx)
3706 {
3707 case 3:
3708 case 4:
3709 case 5:
3710 case 6:
3711 case 7:
3712 i = idx - 3;
3713 break;
3714
3715 default:
3716 abort ();
3717 }
3718
1f48e56d 3719 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
f8a1ebc6 3720 XFmode);
881b2a96
RS
3721}
3722
2b04e52b
JH
3723/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3724 */
3725int
b96a374d 3726standard_sse_constant_p (rtx x)
2b04e52b 3727{
0e67d460
JH
3728 if (x == const0_rtx)
3729 return 1;
2b04e52b
JH
3730 return (x == CONST0_RTX (GET_MODE (x)));
3731}
3732
2a2ab3f9
JVA
3733/* Returns 1 if OP contains a symbol reference */
3734
3735int
b96a374d 3736symbolic_reference_mentioned_p (rtx op)
2a2ab3f9 3737{
8d531ab9
KH
3738 const char *fmt;
3739 int i;
2a2ab3f9
JVA
3740
3741 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3742 return 1;
3743
3744 fmt = GET_RTX_FORMAT (GET_CODE (op));
3745 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3746 {
3747 if (fmt[i] == 'E')
3748 {
8d531ab9 3749 int j;
2a2ab3f9
JVA
3750
3751 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3752 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3753 return 1;
3754 }
e9a25f70 3755
2a2ab3f9
JVA
3756 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3757 return 1;
3758 }
3759
3760 return 0;
3761}
e075ae69
RH
3762
3763/* Return 1 if it is appropriate to emit `ret' instructions in the
3764 body of a function. Do this only if the epilogue is simple, needing a
3765 couple of insns. Prior to reloading, we can't tell how many registers
3766 must be saved, so return 0 then. Return 0 if there is no frame
6e14af16 3767 marker to de-allocate. */
32b5b1aa
SC
3768
3769int
b96a374d 3770ix86_can_use_return_insn_p (void)
32b5b1aa 3771{
4dd2ac2c 3772 struct ix86_frame frame;
9a7372d6 3773
9a7372d6
RH
3774 if (! reload_completed || frame_pointer_needed)
3775 return 0;
32b5b1aa 3776
9a7372d6
RH
3777 /* Don't allow more than 32 pop, since that's all we can do
3778 with one instruction. */
3779 if (current_function_pops_args
3780 && current_function_args_size >= 32768)
e075ae69 3781 return 0;
32b5b1aa 3782
4dd2ac2c
JH
3783 ix86_compute_frame_layout (&frame);
3784 return frame.to_allocate == 0 && frame.nregs == 0;
e075ae69 3785}
6189a572 3786\f
6fca22eb
RH
3787/* Value should be nonzero if functions must have frame pointers.
3788 Zero means the frame pointer need not be set up (and parms may
3789 be accessed via the stack pointer) in functions that seem suitable. */
3790
3791int
b96a374d 3792ix86_frame_pointer_required (void)
6fca22eb
RH
3793{
3794 /* If we accessed previous frames, then the generated code expects
3795 to be able to access the saved ebp value in our frame. */
3796 if (cfun->machine->accesses_prev_frame)
3797 return 1;
a4f31c00 3798
6fca22eb
RH
3799 /* Several x86 os'es need a frame pointer for other reasons,
3800 usually pertaining to setjmp. */
3801 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3802 return 1;
3803
3804 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3805 the frame pointer by default. Turn it back on now if we've not
3806 got a leaf function. */
a7943381 3807 if (TARGET_OMIT_LEAF_FRAME_POINTER
55ba61f3
JH
3808 && (!current_function_is_leaf))
3809 return 1;
3810
3811 if (current_function_profile)
6fca22eb
RH
3812 return 1;
3813
3814 return 0;
3815}
3816
3817/* Record that the current function accesses previous call frames. */
3818
3819void
b96a374d 3820ix86_setup_frame_addresses (void)
6fca22eb
RH
3821{
3822 cfun->machine->accesses_prev_frame = 1;
3823}
e075ae69 3824\f
145aacc2
RH
3825#if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
3826# define USE_HIDDEN_LINKONCE 1
3827#else
3828# define USE_HIDDEN_LINKONCE 0
3829#endif
3830
bd09bdeb 3831static int pic_labels_used;
e9a25f70 3832
145aacc2
RH
3833/* Fills in the label name that should be used for a pc thunk for
3834 the given register. */
3835
3836static void
b96a374d 3837get_pc_thunk_name (char name[32], unsigned int regno)
145aacc2
RH
3838{
3839 if (USE_HIDDEN_LINKONCE)
3840 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
3841 else
3842 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
3843}
3844
3845
e075ae69
RH
3846/* This function generates code for -fpic that loads %ebx with
3847 the return address of the caller and then returns. */
3848
3849void
b96a374d 3850ix86_file_end (void)
e075ae69
RH
3851{
3852 rtx xops[2];
bd09bdeb 3853 int regno;
32b5b1aa 3854
bd09bdeb 3855 for (regno = 0; regno < 8; ++regno)
7c262518 3856 {
145aacc2
RH
3857 char name[32];
3858
bd09bdeb
RH
3859 if (! ((pic_labels_used >> regno) & 1))
3860 continue;
3861
145aacc2 3862 get_pc_thunk_name (name, regno);
bd09bdeb 3863
145aacc2
RH
3864 if (USE_HIDDEN_LINKONCE)
3865 {
3866 tree decl;
3867
3868 decl = build_decl (FUNCTION_DECL, get_identifier (name),
3869 error_mark_node);
3870 TREE_PUBLIC (decl) = 1;
3871 TREE_STATIC (decl) = 1;
3872 DECL_ONE_ONLY (decl) = 1;
3873
3874 (*targetm.asm_out.unique_section) (decl, 0);
3875 named_section (decl, NULL, 0);
3876
a5fe455b
ZW
3877 (*targetm.asm_out.globalize_label) (asm_out_file, name);
3878 fputs ("\t.hidden\t", asm_out_file);
3879 assemble_name (asm_out_file, name);
3880 fputc ('\n', asm_out_file);
3881 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
145aacc2
RH
3882 }
3883 else
3884 {
3885 text_section ();
a5fe455b 3886 ASM_OUTPUT_LABEL (asm_out_file, name);
145aacc2 3887 }
bd09bdeb
RH
3888
3889 xops[0] = gen_rtx_REG (SImode, regno);
3890 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3891 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3892 output_asm_insn ("ret", xops);
7c262518 3893 }
3edc56a9 3894
a5fe455b
ZW
3895 if (NEED_INDICATE_EXEC_STACK)
3896 file_end_indicate_exec_stack ();
32b5b1aa 3897}
32b5b1aa 3898
c8c03509 3899/* Emit code for the SET_GOT patterns. */
32b5b1aa 3900
c8c03509 3901const char *
b96a374d 3902output_set_got (rtx dest)
c8c03509
RH
3903{
3904 rtx xops[3];
0d7d98ee 3905
c8c03509 3906 xops[0] = dest;
5fc0e5df 3907 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
32b5b1aa 3908
c8c03509 3909 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
32b5b1aa 3910 {
c8c03509
RH
3911 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
3912
3913 if (!flag_pic)
3914 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3915 else
3916 output_asm_insn ("call\t%a2", xops);
3917
b069de3b
SS
3918#if TARGET_MACHO
3919 /* Output the "canonical" label name ("Lxx$pb") here too. This
3920 is what will be referred to by the Mach-O PIC subsystem. */
3921 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
3922#endif
4977bab6 3923 (*targetm.asm_out.internal_label) (asm_out_file, "L",
c8c03509
RH
3924 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
3925
3926 if (flag_pic)
3927 output_asm_insn ("pop{l}\t%0", xops);
32b5b1aa 3928 }
e075ae69 3929 else
e5cb57e8 3930 {
145aacc2
RH
3931 char name[32];
3932 get_pc_thunk_name (name, REGNO (dest));
bd09bdeb 3933 pic_labels_used |= 1 << REGNO (dest);
f996902d 3934
145aacc2 3935 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
c8c03509
RH
3936 xops[2] = gen_rtx_MEM (QImode, xops[2]);
3937 output_asm_insn ("call\t%X2", xops);
e5cb57e8 3938 }
e5cb57e8 3939
c8c03509
RH
3940 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
3941 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
b069de3b 3942 else if (!TARGET_MACHO)
8e9fadc3 3943 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
79325812 3944
c8c03509 3945 return "";
e9a25f70 3946}
8dfe5673 3947
0d7d98ee 3948/* Generate an "push" pattern for input ARG. */
e9a25f70 3949
e075ae69 3950static rtx
b96a374d 3951gen_push (rtx arg)
e9a25f70 3952{
c5c76735 3953 return gen_rtx_SET (VOIDmode,
0d7d98ee
JH
3954 gen_rtx_MEM (Pmode,
3955 gen_rtx_PRE_DEC (Pmode,
c5c76735
JL
3956 stack_pointer_rtx)),
3957 arg);
e9a25f70
JL
3958}
3959
bd09bdeb
RH
3960/* Return >= 0 if there is an unused call-clobbered register available
3961 for the entire function. */
3962
3963static unsigned int
b96a374d 3964ix86_select_alt_pic_regnum (void)
bd09bdeb
RH
3965{
3966 if (current_function_is_leaf && !current_function_profile)
3967 {
3968 int i;
3969 for (i = 2; i >= 0; --i)
3970 if (!regs_ever_live[i])
3971 return i;
3972 }
3973
3974 return INVALID_REGNUM;
3975}
fce5a9f2 3976
4dd2ac2c
JH
3977/* Return 1 if we need to save REGNO. */
3978static int
b96a374d 3979ix86_save_reg (unsigned int regno, int maybe_eh_return)
1020a5ab 3980{
bd09bdeb
RH
3981 if (pic_offset_table_rtx
3982 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
3983 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
66edd3b4 3984 || current_function_profile
8c38a24f
MM
3985 || current_function_calls_eh_return
3986 || current_function_uses_const_pool))
bd09bdeb
RH
3987 {
3988 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
3989 return 0;
3990 return 1;
3991 }
1020a5ab
RH
3992
3993 if (current_function_calls_eh_return && maybe_eh_return)
3994 {
3995 unsigned i;
3996 for (i = 0; ; i++)
3997 {
b531087a 3998 unsigned test = EH_RETURN_DATA_REGNO (i);
1020a5ab
RH
3999 if (test == INVALID_REGNUM)
4000 break;
9b690711 4001 if (test == regno)
1020a5ab
RH
4002 return 1;
4003 }
4004 }
4dd2ac2c 4005
1020a5ab
RH
4006 return (regs_ever_live[regno]
4007 && !call_used_regs[regno]
4008 && !fixed_regs[regno]
4009 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4dd2ac2c
JH
4010}
4011
0903fcab
JH
4012/* Return number of registers to be saved on the stack. */
4013
4014static int
b96a374d 4015ix86_nsaved_regs (void)
0903fcab
JH
4016{
4017 int nregs = 0;
0903fcab
JH
4018 int regno;
4019
4dd2ac2c 4020 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 4021 if (ix86_save_reg (regno, true))
4dd2ac2c 4022 nregs++;
0903fcab
JH
4023 return nregs;
4024}
4025
4026/* Return the offset between two registers, one to be eliminated, and the other
4027 its replacement, at the start of a routine. */
4028
4029HOST_WIDE_INT
b96a374d 4030ix86_initial_elimination_offset (int from, int to)
0903fcab 4031{
4dd2ac2c
JH
4032 struct ix86_frame frame;
4033 ix86_compute_frame_layout (&frame);
564d80f4
JH
4034
4035 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 4036 return frame.hard_frame_pointer_offset;
564d80f4
JH
4037 else if (from == FRAME_POINTER_REGNUM
4038 && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 4039 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
4040 else
4041 {
564d80f4
JH
4042 if (to != STACK_POINTER_REGNUM)
4043 abort ();
4044 else if (from == ARG_POINTER_REGNUM)
4dd2ac2c 4045 return frame.stack_pointer_offset;
564d80f4
JH
4046 else if (from != FRAME_POINTER_REGNUM)
4047 abort ();
0903fcab 4048 else
4dd2ac2c 4049 return frame.stack_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
4050 }
4051}
4052
4dd2ac2c 4053/* Fill structure ix86_frame about frame of currently computed function. */
0f290768 4054
4dd2ac2c 4055static void
b96a374d 4056ix86_compute_frame_layout (struct ix86_frame *frame)
65954bd8 4057{
65954bd8 4058 HOST_WIDE_INT total_size;
95899b34 4059 unsigned int stack_alignment_needed;
b19ee4bd 4060 HOST_WIDE_INT offset;
95899b34 4061 unsigned int preferred_alignment;
4dd2ac2c 4062 HOST_WIDE_INT size = get_frame_size ();
65954bd8 4063
4dd2ac2c 4064 frame->nregs = ix86_nsaved_regs ();
564d80f4 4065 total_size = size;
65954bd8 4066
95899b34
RH
4067 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4068 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4069
d7394366
JH
4070 /* During reload iteration the amount of registers saved can change.
4071 Recompute the value as needed. Do not recompute when amount of registers
4072 didn't change as reload does mutiple calls to the function and does not
4073 expect the decision to change within single iteration. */
4074 if (!optimize_size
4075 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
d9b40e8d
JH
4076 {
4077 int count = frame->nregs;
4078
d7394366 4079 cfun->machine->use_fast_prologue_epilogue_nregs = count;
d9b40e8d
JH
4080 /* The fast prologue uses move instead of push to save registers. This
4081 is significantly longer, but also executes faster as modern hardware
4082 can execute the moves in parallel, but can't do that for push/pop.
b96a374d 4083
d9b40e8d
JH
4084 Be careful about choosing what prologue to emit: When function takes
4085 many instructions to execute we may use slow version as well as in
4086 case function is known to be outside hot spot (this is known with
4087 feedback only). Weight the size of function by number of registers
4088 to save as it is cheap to use one or two push instructions but very
4089 slow to use many of them. */
4090 if (count)
4091 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4092 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4093 || (flag_branch_probabilities
4094 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4095 cfun->machine->use_fast_prologue_epilogue = false;
4096 else
4097 cfun->machine->use_fast_prologue_epilogue
4098 = !expensive_function_p (count);
4099 }
4100 if (TARGET_PROLOGUE_USING_MOVE
4101 && cfun->machine->use_fast_prologue_epilogue)
4102 frame->save_regs_using_mov = true;
4103 else
4104 frame->save_regs_using_mov = false;
4105
4106
9ba81eaa 4107 /* Skip return address and saved base pointer. */
4dd2ac2c
JH
4108 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4109
4110 frame->hard_frame_pointer_offset = offset;
564d80f4 4111
fcbfaa65
RK
4112 /* Do some sanity checking of stack_alignment_needed and
4113 preferred_alignment, since i386 port is the only using those features
f710504c 4114 that may break easily. */
564d80f4 4115
44affdae
JH
4116 if (size && !stack_alignment_needed)
4117 abort ();
44affdae
JH
4118 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4119 abort ();
4120 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4121 abort ();
4122 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4123 abort ();
564d80f4 4124
4dd2ac2c
JH
4125 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4126 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
564d80f4 4127
4dd2ac2c
JH
4128 /* Register save area */
4129 offset += frame->nregs * UNITS_PER_WORD;
65954bd8 4130
8362f420
JH
4131 /* Va-arg area */
4132 if (ix86_save_varrargs_registers)
4133 {
4134 offset += X86_64_VARARGS_SIZE;
4135 frame->va_arg_size = X86_64_VARARGS_SIZE;
4136 }
4137 else
4138 frame->va_arg_size = 0;
4139
4dd2ac2c
JH
4140 /* Align start of frame for local function. */
4141 frame->padding1 = ((offset + stack_alignment_needed - 1)
4142 & -stack_alignment_needed) - offset;
f73ad30e 4143
4dd2ac2c 4144 offset += frame->padding1;
65954bd8 4145
4dd2ac2c
JH
4146 /* Frame pointer points here. */
4147 frame->frame_pointer_offset = offset;
54ff41b7 4148
4dd2ac2c 4149 offset += size;
65954bd8 4150
0b7ae565 4151 /* Add outgoing arguments area. Can be skipped if we eliminated
965514bd
JH
4152 all the function calls as dead code.
4153 Skipping is however impossible when function calls alloca. Alloca
4154 expander assumes that last current_function_outgoing_args_size
4155 of stack frame are unused. */
4156 if (ACCUMULATE_OUTGOING_ARGS
4157 && (!current_function_is_leaf || current_function_calls_alloca))
4dd2ac2c
JH
4158 {
4159 offset += current_function_outgoing_args_size;
4160 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4161 }
4162 else
4163 frame->outgoing_arguments_size = 0;
564d80f4 4164
002ff5bc
RH
4165 /* Align stack boundary. Only needed if we're calling another function
4166 or using alloca. */
4167 if (!current_function_is_leaf || current_function_calls_alloca)
0b7ae565
RH
4168 frame->padding2 = ((offset + preferred_alignment - 1)
4169 & -preferred_alignment) - offset;
4170 else
4171 frame->padding2 = 0;
4dd2ac2c
JH
4172
4173 offset += frame->padding2;
4174
4175 /* We've reached end of stack frame. */
4176 frame->stack_pointer_offset = offset;
4177
4178 /* Size prologue needs to allocate. */
4179 frame->to_allocate =
4180 (size + frame->padding1 + frame->padding2
8362f420 4181 + frame->outgoing_arguments_size + frame->va_arg_size);
4dd2ac2c 4182
b19ee4bd
JJ
4183 if ((!frame->to_allocate && frame->nregs <= 1)
4184 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
d9b40e8d
JH
4185 frame->save_regs_using_mov = false;
4186
a5b378d6 4187 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
8362f420
JH
4188 && current_function_is_leaf)
4189 {
4190 frame->red_zone_size = frame->to_allocate;
d9b40e8d
JH
4191 if (frame->save_regs_using_mov)
4192 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8362f420
JH
4193 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4194 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4195 }
4196 else
4197 frame->red_zone_size = 0;
4198 frame->to_allocate -= frame->red_zone_size;
4199 frame->stack_pointer_offset -= frame->red_zone_size;
4dd2ac2c
JH
4200#if 0
4201 fprintf (stderr, "nregs: %i\n", frame->nregs);
4202 fprintf (stderr, "size: %i\n", size);
4203 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4204 fprintf (stderr, "padding1: %i\n", frame->padding1);
8362f420 4205 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4dd2ac2c
JH
4206 fprintf (stderr, "padding2: %i\n", frame->padding2);
4207 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
8362f420 4208 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4dd2ac2c
JH
4209 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4210 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4211 frame->hard_frame_pointer_offset);
4212 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4213#endif
65954bd8
JL
4214}
4215
0903fcab
JH
4216/* Emit code to save registers in the prologue. */
4217
4218static void
b96a374d 4219ix86_emit_save_regs (void)
0903fcab 4220{
8d531ab9 4221 int regno;
0903fcab 4222 rtx insn;
0903fcab 4223
4dd2ac2c 4224 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 4225 if (ix86_save_reg (regno, true))
0903fcab 4226 {
0d7d98ee 4227 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
0903fcab
JH
4228 RTX_FRAME_RELATED_P (insn) = 1;
4229 }
4230}
4231
c6036a37
JH
4232/* Emit code to save registers using MOV insns. First register
4233 is restored from POINTER + OFFSET. */
4234static void
b96a374d 4235ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
c6036a37
JH
4236{
4237 int regno;
4238 rtx insn;
4239
4240 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4241 if (ix86_save_reg (regno, true))
4242 {
b72f00af
RK
4243 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4244 Pmode, offset),
c6036a37
JH
4245 gen_rtx_REG (Pmode, regno));
4246 RTX_FRAME_RELATED_P (insn) = 1;
4247 offset += UNITS_PER_WORD;
4248 }
4249}
4250
839a4992 4251/* Expand prologue or epilogue stack adjustment.
b19ee4bd
JJ
4252 The pattern exist to put a dependency on all ebp-based memory accesses.
4253 STYLE should be negative if instructions should be marked as frame related,
4254 zero if %r11 register is live and cannot be freely used and positive
4255 otherwise. */
4256
4257static void
4258pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
4259{
4260 rtx insn;
4261
4262 if (! TARGET_64BIT)
4263 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
4264 else if (x86_64_immediate_operand (offset, DImode))
4265 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
4266 else
4267 {
4268 rtx r11;
4269 /* r11 is used by indirect sibcall return as well, set before the
4270 epilogue and used after the epilogue. ATM indirect sibcall
4271 shouldn't be used together with huge frame sizes in one
4272 function because of the frame_size check in sibcall.c. */
4273 if (style == 0)
4274 abort ();
4275 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4276 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
4277 if (style < 0)
4278 RTX_FRAME_RELATED_P (insn) = 1;
4279 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
4280 offset));
4281 }
4282 if (style < 0)
4283 RTX_FRAME_RELATED_P (insn) = 1;
4284}
4285
0f290768 4286/* Expand the prologue into a bunch of separate insns. */
e075ae69
RH
4287
4288void
b96a374d 4289ix86_expand_prologue (void)
2a2ab3f9 4290{
564d80f4 4291 rtx insn;
bd09bdeb 4292 bool pic_reg_used;
4dd2ac2c 4293 struct ix86_frame frame;
c6036a37 4294 HOST_WIDE_INT allocate;
4dd2ac2c 4295
4977bab6 4296 ix86_compute_frame_layout (&frame);
79325812 4297
e075ae69
RH
4298 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4299 slower on all targets. Also sdb doesn't like it. */
e9a25f70 4300
2a2ab3f9
JVA
4301 if (frame_pointer_needed)
4302 {
564d80f4 4303 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
e075ae69 4304 RTX_FRAME_RELATED_P (insn) = 1;
e9a25f70 4305
564d80f4 4306 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
e075ae69 4307 RTX_FRAME_RELATED_P (insn) = 1;
2a2ab3f9
JVA
4308 }
4309
c6036a37 4310 allocate = frame.to_allocate;
c6036a37 4311
d9b40e8d 4312 if (!frame.save_regs_using_mov)
c6036a37
JH
4313 ix86_emit_save_regs ();
4314 else
4315 allocate += frame.nregs * UNITS_PER_WORD;
564d80f4 4316
d9b40e8d
JH
4317 /* When using red zone we may start register saving before allocating
4318 the stack frame saving one cycle of the prologue. */
4319 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
4320 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
4321 : stack_pointer_rtx,
4322 -frame.nregs * UNITS_PER_WORD);
4323
c6036a37 4324 if (allocate == 0)
8dfe5673 4325 ;
e323735c 4326 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
b19ee4bd
JJ
4327 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4328 GEN_INT (-allocate), -1);
79325812 4329 else
8dfe5673 4330 {
fe9f516f
RH
4331 /* Only valid for Win32. */
4332 rtx eax = gen_rtx_REG (SImode, 0);
4333 bool eax_live = ix86_eax_live_at_start_p ();
5fc94ac4 4334 rtx t;
e9a25f70 4335
8362f420 4336 if (TARGET_64BIT)
b1177d69 4337 abort ();
e075ae69 4338
fe9f516f
RH
4339 if (eax_live)
4340 {
4341 emit_insn (gen_push (eax));
4342 allocate -= 4;
4343 }
4344
5fc94ac4 4345 emit_move_insn (eax, GEN_INT (allocate));
98417968 4346
b1177d69
KC
4347 insn = emit_insn (gen_allocate_stack_worker (eax));
4348 RTX_FRAME_RELATED_P (insn) = 1;
5fc94ac4
RH
4349 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
4350 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
4351 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
4352 t, REG_NOTES (insn));
fe9f516f
RH
4353
4354 if (eax_live)
4355 {
ea5f7a19
RS
4356 if (frame_pointer_needed)
4357 t = plus_constant (hard_frame_pointer_rtx,
4358 allocate
4359 - frame.to_allocate
4360 - frame.nregs * UNITS_PER_WORD);
4361 else
4362 t = plus_constant (stack_pointer_rtx, allocate);
fe9f516f
RH
4363 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
4364 }
e075ae69 4365 }
fe9f516f 4366
d9b40e8d 4367 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
c6036a37
JH
4368 {
4369 if (!frame_pointer_needed || !frame.to_allocate)
4370 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4371 else
4372 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4373 -frame.nregs * UNITS_PER_WORD);
4374 }
e9a25f70 4375
bd09bdeb
RH
4376 pic_reg_used = false;
4377 if (pic_offset_table_rtx
4378 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4379 || current_function_profile))
4380 {
4381 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4382
4383 if (alt_pic_reg_used != INVALID_REGNUM)
4384 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4385
4386 pic_reg_used = true;
4387 }
4388
e9a25f70 4389 if (pic_reg_used)
c8c03509
RH
4390 {
4391 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4392
66edd3b4
RH
4393 /* Even with accurate pre-reload life analysis, we can wind up
4394 deleting all references to the pic register after reload.
4395 Consider if cross-jumping unifies two sides of a branch
d1f87653 4396 controlled by a comparison vs the only read from a global.
66edd3b4
RH
4397 In which case, allow the set_got to be deleted, though we're
4398 too late to do anything about the ebx save in the prologue. */
c8c03509
RH
4399 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4400 }
77a989d1 4401
66edd3b4
RH
4402 /* Prevent function calls from be scheduled before the call to mcount.
4403 In the pic_reg_used case, make sure that the got load isn't deleted. */
4404 if (current_function_profile)
4405 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
77a989d1
SC
4406}
4407
da2d1d3a
JH
4408/* Emit code to restore saved registers using MOV insns. First register
4409 is restored from POINTER + OFFSET. */
4410static void
72613dfa
JH
4411ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
4412 int maybe_eh_return)
da2d1d3a
JH
4413{
4414 int regno;
72613dfa 4415 rtx base_address = gen_rtx_MEM (Pmode, pointer);
da2d1d3a 4416
4dd2ac2c 4417 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 4418 if (ix86_save_reg (regno, maybe_eh_return))
da2d1d3a 4419 {
72613dfa
JH
4420 /* Ensure that adjust_address won't be forced to produce pointer
4421 out of range allowed by x86-64 instruction set. */
4422 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
4423 {
4424 rtx r11;
4425
4426 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4427 emit_move_insn (r11, GEN_INT (offset));
4428 emit_insn (gen_adddi3 (r11, r11, pointer));
4429 base_address = gen_rtx_MEM (Pmode, r11);
4430 offset = 0;
4431 }
4dd2ac2c 4432 emit_move_insn (gen_rtx_REG (Pmode, regno),
72613dfa 4433 adjust_address (base_address, Pmode, offset));
4dd2ac2c 4434 offset += UNITS_PER_WORD;
da2d1d3a
JH
4435 }
4436}
4437
0f290768 4438/* Restore function stack, frame, and registers. */
e9a25f70 4439
2a2ab3f9 4440void
b96a374d 4441ix86_expand_epilogue (int style)
2a2ab3f9 4442{
1c71e60e 4443 int regno;
fdb8a883 4444 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4dd2ac2c 4445 struct ix86_frame frame;
65954bd8 4446 HOST_WIDE_INT offset;
4dd2ac2c
JH
4447
4448 ix86_compute_frame_layout (&frame);
2a2ab3f9 4449
a4f31c00 4450 /* Calculate start of saved registers relative to ebp. Special care
84e306b4
RH
4451 must be taken for the normal return case of a function using
4452 eh_return: the eax and edx registers are marked as saved, but not
4453 restored along this path. */
4454 offset = frame.nregs;
4455 if (current_function_calls_eh_return && style != 2)
4456 offset -= 2;
4457 offset *= -UNITS_PER_WORD;
2a2ab3f9 4458
fdb8a883
JW
4459 /* If we're only restoring one register and sp is not valid then
4460 using a move instruction to restore the register since it's
0f290768 4461 less work than reloading sp and popping the register.
da2d1d3a
JH
4462
4463 The default code result in stack adjustment using add/lea instruction,
4464 while this code results in LEAVE instruction (or discrete equivalent),
4465 so it is profitable in some other cases as well. Especially when there
4466 are no registers to restore. We also use this code when TARGET_USE_LEAVE
d1f87653 4467 and there is exactly one register to pop. This heuristic may need some
da2d1d3a 4468 tuning in future. */
4dd2ac2c 4469 if ((!sp_valid && frame.nregs <= 1)
2ab0437e 4470 || (TARGET_EPILOGUE_USING_MOVE
d9b40e8d 4471 && cfun->machine->use_fast_prologue_epilogue
c6036a37 4472 && (frame.nregs > 1 || frame.to_allocate))
4dd2ac2c 4473 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
2ab0437e 4474 || (frame_pointer_needed && TARGET_USE_LEAVE
d9b40e8d
JH
4475 && cfun->machine->use_fast_prologue_epilogue
4476 && frame.nregs == 1)
2ab0437e 4477 || current_function_calls_eh_return)
2a2ab3f9 4478 {
da2d1d3a
JH
4479 /* Restore registers. We can use ebp or esp to address the memory
4480 locations. If both are available, default to ebp, since offsets
4481 are known to be small. Only exception is esp pointing directly to the
4482 end of block of saved registers, where we may simplify addressing
4483 mode. */
4484
4dd2ac2c 4485 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
1020a5ab
RH
4486 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4487 frame.to_allocate, style == 2);
da2d1d3a 4488 else
1020a5ab
RH
4489 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4490 offset, style == 2);
4491
4492 /* eh_return epilogues need %ecx added to the stack pointer. */
4493 if (style == 2)
4494 {
4495 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
2a2ab3f9 4496
1020a5ab
RH
4497 if (frame_pointer_needed)
4498 {
4499 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4500 tmp = plus_constant (tmp, UNITS_PER_WORD);
4501 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4502
4503 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4504 emit_move_insn (hard_frame_pointer_rtx, tmp);
4505
b19ee4bd
JJ
4506 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
4507 const0_rtx, style);
1020a5ab
RH
4508 }
4509 else
4510 {
4511 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4512 tmp = plus_constant (tmp, (frame.to_allocate
4513 + frame.nregs * UNITS_PER_WORD));
4514 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4515 }
4516 }
4517 else if (!frame_pointer_needed)
b19ee4bd
JJ
4518 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4519 GEN_INT (frame.to_allocate
4520 + frame.nregs * UNITS_PER_WORD),
4521 style);
0f290768 4522 /* If not an i386, mov & pop is faster than "leave". */
d9b40e8d
JH
4523 else if (TARGET_USE_LEAVE || optimize_size
4524 || !cfun->machine->use_fast_prologue_epilogue)
8362f420 4525 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
c8c5cb99 4526 else
2a2ab3f9 4527 {
b19ee4bd
JJ
4528 pro_epilogue_adjust_stack (stack_pointer_rtx,
4529 hard_frame_pointer_rtx,
4530 const0_rtx, style);
8362f420
JH
4531 if (TARGET_64BIT)
4532 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4533 else
4534 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
e9a25f70
JL
4535 }
4536 }
1c71e60e 4537 else
68f654ec 4538 {
1c71e60e
JH
4539 /* First step is to deallocate the stack frame so that we can
4540 pop the registers. */
4541 if (!sp_valid)
4542 {
4543 if (!frame_pointer_needed)
4544 abort ();
b19ee4bd
JJ
4545 pro_epilogue_adjust_stack (stack_pointer_rtx,
4546 hard_frame_pointer_rtx,
4547 GEN_INT (offset), style);
1c71e60e 4548 }
4dd2ac2c 4549 else if (frame.to_allocate)
b19ee4bd
JJ
4550 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4551 GEN_INT (frame.to_allocate), style);
1c71e60e 4552
4dd2ac2c 4553 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 4554 if (ix86_save_reg (regno, false))
8362f420
JH
4555 {
4556 if (TARGET_64BIT)
4557 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4558 else
4559 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4560 }
4dd2ac2c 4561 if (frame_pointer_needed)
8362f420 4562 {
f5143c46 4563 /* Leave results in shorter dependency chains on CPUs that are
2ab0437e
JH
4564 able to grok it fast. */
4565 if (TARGET_USE_LEAVE)
4566 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4567 else if (TARGET_64BIT)
8362f420
JH
4568 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4569 else
4570 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4571 }
68f654ec 4572 }
68f654ec 4573
cbbf65e0 4574 /* Sibcall epilogues don't want a return instruction. */
1020a5ab 4575 if (style == 0)
cbbf65e0
RH
4576 return;
4577
2a2ab3f9
JVA
4578 if (current_function_pops_args && current_function_args_size)
4579 {
e075ae69 4580 rtx popc = GEN_INT (current_function_pops_args);
2a2ab3f9 4581
b8c752c8
UD
4582 /* i386 can only pop 64K bytes. If asked to pop more, pop
4583 return address, do explicit add, and jump indirectly to the
0f290768 4584 caller. */
2a2ab3f9 4585
b8c752c8 4586 if (current_function_pops_args >= 65536)
2a2ab3f9 4587 {
e075ae69 4588 rtx ecx = gen_rtx_REG (SImode, 2);
e9a25f70 4589
b19ee4bd 4590 /* There is no "pascal" calling convention in 64bit ABI. */
8362f420 4591 if (TARGET_64BIT)
b531087a 4592 abort ();
8362f420 4593
e075ae69
RH
4594 emit_insn (gen_popsi1 (ecx));
4595 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
11837777 4596 emit_jump_insn (gen_return_indirect_internal (ecx));
e9a25f70 4597 }
79325812 4598 else
e075ae69
RH
4599 emit_jump_insn (gen_return_pop_internal (popc));
4600 }
4601 else
4602 emit_jump_insn (gen_return_internal ());
4603}
bd09bdeb
RH
4604
4605/* Reset from the function's potential modifications. */
4606
4607static void
b96a374d
AJ
4608ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
4609 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
bd09bdeb
RH
4610{
4611 if (pic_offset_table_rtx)
4612 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4613}
e075ae69
RH
4614\f
4615/* Extract the parts of an RTL expression that is a valid memory address
b446e5a2
JH
4616 for an instruction. Return 0 if the structure of the address is
4617 grossly off. Return -1 if the address contains ASHIFT, so it is not
74dc3e94 4618 strictly valid, but still used for computing length of lea instruction. */
e075ae69 4619
8fe75e43 4620int
8d531ab9 4621ix86_decompose_address (rtx addr, struct ix86_address *out)
e075ae69
RH
4622{
4623 rtx base = NULL_RTX;
4624 rtx index = NULL_RTX;
4625 rtx disp = NULL_RTX;
4626 HOST_WIDE_INT scale = 1;
4627 rtx scale_rtx = NULL_RTX;
b446e5a2 4628 int retval = 1;
74dc3e94 4629 enum ix86_address_seg seg = SEG_DEFAULT;
e075ae69 4630
90e4e4c5 4631 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
e075ae69
RH
4632 base = addr;
4633 else if (GET_CODE (addr) == PLUS)
4634 {
74dc3e94
RH
4635 rtx addends[4], op;
4636 int n = 0, i;
e075ae69 4637
74dc3e94
RH
4638 op = addr;
4639 do
e075ae69 4640 {
74dc3e94
RH
4641 if (n >= 4)
4642 return 0;
4643 addends[n++] = XEXP (op, 1);
4644 op = XEXP (op, 0);
2a2ab3f9 4645 }
74dc3e94
RH
4646 while (GET_CODE (op) == PLUS);
4647 if (n >= 4)
4648 return 0;
4649 addends[n] = op;
4650
4651 for (i = n; i >= 0; --i)
e075ae69 4652 {
74dc3e94
RH
4653 op = addends[i];
4654 switch (GET_CODE (op))
4655 {
4656 case MULT:
4657 if (index)
4658 return 0;
4659 index = XEXP (op, 0);
4660 scale_rtx = XEXP (op, 1);
4661 break;
4662
4663 case UNSPEC:
4664 if (XINT (op, 1) == UNSPEC_TP
4665 && TARGET_TLS_DIRECT_SEG_REFS
4666 && seg == SEG_DEFAULT)
4667 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
4668 else
4669 return 0;
4670 break;
4671
4672 case REG:
4673 case SUBREG:
4674 if (!base)
4675 base = op;
4676 else if (!index)
4677 index = op;
4678 else
4679 return 0;
4680 break;
4681
4682 case CONST:
4683 case CONST_INT:
4684 case SYMBOL_REF:
4685 case LABEL_REF:
4686 if (disp)
4687 return 0;
4688 disp = op;
4689 break;
4690
4691 default:
4692 return 0;
4693 }
e075ae69 4694 }
e075ae69
RH
4695 }
4696 else if (GET_CODE (addr) == MULT)
4697 {
4698 index = XEXP (addr, 0); /* index*scale */
4699 scale_rtx = XEXP (addr, 1);
4700 }
4701 else if (GET_CODE (addr) == ASHIFT)
4702 {
4703 rtx tmp;
4704
4705 /* We're called for lea too, which implements ashift on occasion. */
4706 index = XEXP (addr, 0);
4707 tmp = XEXP (addr, 1);
4708 if (GET_CODE (tmp) != CONST_INT)
b446e5a2 4709 return 0;
e075ae69
RH
4710 scale = INTVAL (tmp);
4711 if ((unsigned HOST_WIDE_INT) scale > 3)
b446e5a2 4712 return 0;
e075ae69 4713 scale = 1 << scale;
b446e5a2 4714 retval = -1;
2a2ab3f9 4715 }
2a2ab3f9 4716 else
e075ae69
RH
4717 disp = addr; /* displacement */
4718
4719 /* Extract the integral value of scale. */
4720 if (scale_rtx)
e9a25f70 4721 {
e075ae69 4722 if (GET_CODE (scale_rtx) != CONST_INT)
b446e5a2 4723 return 0;
e075ae69 4724 scale = INTVAL (scale_rtx);
e9a25f70 4725 }
3b3c6a3f 4726
74dc3e94 4727 /* Allow arg pointer and stack pointer as index if there is not scaling. */
e075ae69 4728 if (base && index && scale == 1
74dc3e94
RH
4729 && (index == arg_pointer_rtx
4730 || index == frame_pointer_rtx
4731 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
e075ae69
RH
4732 {
4733 rtx tmp = base;
4734 base = index;
4735 index = tmp;
4736 }
4737
4738 /* Special case: %ebp cannot be encoded as a base without a displacement. */
564d80f4
JH
4739 if ((base == hard_frame_pointer_rtx
4740 || base == frame_pointer_rtx
4741 || base == arg_pointer_rtx) && !disp)
e075ae69
RH
4742 disp = const0_rtx;
4743
4744 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4745 Avoid this by transforming to [%esi+0]. */
9e555526 4746 if (ix86_tune == PROCESSOR_K6 && !optimize_size
e075ae69 4747 && base && !index && !disp
329e1d01 4748 && REG_P (base)
e075ae69
RH
4749 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4750 disp = const0_rtx;
4751
4752 /* Special case: encode reg+reg instead of reg*2. */
4753 if (!base && index && scale && scale == 2)
4754 base = index, scale = 1;
0f290768 4755
e075ae69
RH
4756 /* Special case: scaling cannot be encoded without base or displacement. */
4757 if (!base && !disp && index && scale != 1)
4758 disp = const0_rtx;
4759
4760 out->base = base;
4761 out->index = index;
4762 out->disp = disp;
4763 out->scale = scale;
74dc3e94 4764 out->seg = seg;
3b3c6a3f 4765
b446e5a2 4766 return retval;
e075ae69 4767}
01329426
JH
4768\f
4769/* Return cost of the memory address x.
4770 For i386, it is better to use a complex address than let gcc copy
4771 the address into a reg and make a new pseudo. But not if the address
4772 requires to two regs - that would mean more pseudos with longer
4773 lifetimes. */
dcefdf67 4774static int
b96a374d 4775ix86_address_cost (rtx x)
01329426
JH
4776{
4777 struct ix86_address parts;
4778 int cost = 1;
3b3c6a3f 4779
01329426
JH
4780 if (!ix86_decompose_address (x, &parts))
4781 abort ();
4782
4783 /* More complex memory references are better. */
4784 if (parts.disp && parts.disp != const0_rtx)
4785 cost--;
74dc3e94
RH
4786 if (parts.seg != SEG_DEFAULT)
4787 cost--;
01329426
JH
4788
4789 /* Attempt to minimize number of registers in the address. */
4790 if ((parts.base
4791 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4792 || (parts.index
4793 && (!REG_P (parts.index)
4794 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4795 cost++;
4796
4797 if (parts.base
4798 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4799 && parts.index
4800 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4801 && parts.base != parts.index)
4802 cost++;
4803
4804 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4805 since it's predecode logic can't detect the length of instructions
4806 and it degenerates to vector decoded. Increase cost of such
4807 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
0f290768 4808 to split such addresses or even refuse such addresses at all.
01329426
JH
4809
4810 Following addressing modes are affected:
4811 [base+scale*index]
4812 [scale*index+disp]
4813 [base+index]
0f290768 4814
01329426
JH
4815 The first and last case may be avoidable by explicitly coding the zero in
4816 memory address, but I don't have AMD-K6 machine handy to check this
4817 theory. */
4818
4819 if (TARGET_K6
4820 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4821 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4822 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4823 cost += 10;
0f290768 4824
01329426
JH
4825 return cost;
4826}
4827\f
b949ea8b
JW
4828/* If X is a machine specific address (i.e. a symbol or label being
4829 referenced as a displacement from the GOT implemented using an
4830 UNSPEC), then return the base term. Otherwise return X. */
4831
4832rtx
b96a374d 4833ix86_find_base_term (rtx x)
b949ea8b
JW
4834{
4835 rtx term;
4836
6eb791fc
JH
4837 if (TARGET_64BIT)
4838 {
4839 if (GET_CODE (x) != CONST)
4840 return x;
4841 term = XEXP (x, 0);
4842 if (GET_CODE (term) == PLUS
4843 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4844 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4845 term = XEXP (term, 0);
4846 if (GET_CODE (term) != UNSPEC
8ee41eaf 4847 || XINT (term, 1) != UNSPEC_GOTPCREL)
6eb791fc
JH
4848 return x;
4849
4850 term = XVECEXP (term, 0, 0);
4851
4852 if (GET_CODE (term) != SYMBOL_REF
4853 && GET_CODE (term) != LABEL_REF)
4854 return x;
4855
4856 return term;
4857 }
4858
69bd9368 4859 term = ix86_delegitimize_address (x);
b949ea8b
JW
4860
4861 if (GET_CODE (term) != SYMBOL_REF
4862 && GET_CODE (term) != LABEL_REF)
4863 return x;
4864
4865 return term;
4866}
828a4fe4
MS
4867
4868/* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
4869 this is used for to form addresses to local data when -fPIC is in
4870 use. */
4871
4872static bool
4873darwin_local_data_pic (rtx disp)
4874{
4875 if (GET_CODE (disp) == MINUS)
4876 {
4877 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
4878 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
4879 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
4880 {
4881 const char *sym_name = XSTR (XEXP (disp, 1), 0);
4882 if (! strcmp (sym_name, "<pic base>"))
4883 return true;
4884 }
4885 }
4886
4887 return false;
4888}
b949ea8b 4889\f
f996902d
RH
4890/* Determine if a given RTX is a valid constant. We already know this
4891 satisfies CONSTANT_P. */
4892
4893bool
b96a374d 4894legitimate_constant_p (rtx x)
f996902d 4895{
f996902d
RH
4896 switch (GET_CODE (x))
4897 {
f996902d 4898 case CONST:
1e19ac74 4899 x = XEXP (x, 0);
f996902d 4900
1e19ac74 4901 if (GET_CODE (x) == PLUS)
828a4fe4 4902 {
1e19ac74 4903 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
828a4fe4 4904 return false;
1e19ac74 4905 x = XEXP (x, 0);
828a4fe4
MS
4906 }
4907
1e19ac74 4908 if (TARGET_MACHO && darwin_local_data_pic (x))
828a4fe4
MS
4909 return true;
4910
f996902d 4911 /* Only some unspecs are valid as "constants". */
1e19ac74
RH
4912 if (GET_CODE (x) == UNSPEC)
4913 switch (XINT (x, 1))
f996902d
RH
4914 {
4915 case UNSPEC_TPOFF:
cb0e3e3f 4916 case UNSPEC_NTPOFF:
1e19ac74 4917 return local_exec_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
cb0e3e3f 4918 case UNSPEC_DTPOFF:
1e19ac74 4919 return local_dynamic_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
f996902d
RH
4920 default:
4921 return false;
4922 }
1e19ac74
RH
4923
4924 /* We must have drilled down to a symbol. */
4925 if (!symbolic_operand (x, Pmode))
4926 return false;
4927 /* FALLTHRU */
4928
4929 case SYMBOL_REF:
4930 /* TLS symbols are never valid. */
4931 if (tls_symbolic_operand (x, Pmode))
4932 return false;
f996902d
RH
4933 break;
4934
4935 default:
4936 break;
4937 }
4938
4939 /* Otherwise we handle everything else in the move patterns. */
4940 return true;
4941}
4942
3a04ff64
RH
4943/* Determine if it's legal to put X into the constant pool. This
4944 is not possible for the address of thread-local symbols, which
4945 is checked above. */
4946
4947static bool
b96a374d 4948ix86_cannot_force_const_mem (rtx x)
3a04ff64
RH
4949{
4950 return !legitimate_constant_p (x);
4951}
4952
f996902d
RH
4953/* Determine if a given RTX is a valid constant address. */
4954
4955bool
b96a374d 4956constant_address_p (rtx x)
f996902d 4957{
a94f136b 4958 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
f996902d
RH
4959}
4960
4961/* Nonzero if the constant value X is a legitimate general operand
fce5a9f2 4962 when generating PIC code. It is given that flag_pic is on and
f996902d
RH
4963 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
4964
4965bool
b96a374d 4966legitimate_pic_operand_p (rtx x)
f996902d
RH
4967{
4968 rtx inner;
4969
4970 switch (GET_CODE (x))
4971 {
4972 case CONST:
4973 inner = XEXP (x, 0);
4974
4975 /* Only some unspecs are valid as "constants". */
4976 if (GET_CODE (inner) == UNSPEC)
4977 switch (XINT (inner, 1))
4978 {
4979 case UNSPEC_TPOFF:
4980 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
f996902d
RH
4981 default:
4982 return false;
4983 }
5efb1046 4984 /* FALLTHRU */
f996902d
RH
4985
4986 case SYMBOL_REF:
4987 case LABEL_REF:
4988 return legitimate_pic_address_disp_p (x);
4989
4990 default:
4991 return true;
4992 }
4993}
4994
e075ae69
RH
4995/* Determine if a given CONST RTX is a valid memory displacement
4996 in PIC mode. */
0f290768 4997
59be65f6 4998int
8d531ab9 4999legitimate_pic_address_disp_p (rtx disp)
91bb873f 5000{
f996902d
RH
5001 bool saw_plus;
5002
6eb791fc
JH
5003 /* In 64bit mode we can allow direct addresses of symbols and labels
5004 when they are not dynamic symbols. */
c05dbe81
JH
5005 if (TARGET_64BIT)
5006 {
5007 /* TLS references should always be enclosed in UNSPEC. */
5008 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5009 return 0;
5010 if (GET_CODE (disp) == SYMBOL_REF
5011 && ix86_cmodel == CM_SMALL_PIC
2ae5ae57 5012 && SYMBOL_REF_LOCAL_P (disp))
c05dbe81
JH
5013 return 1;
5014 if (GET_CODE (disp) == LABEL_REF)
5015 return 1;
5016 if (GET_CODE (disp) == CONST
a132b6a8
JJ
5017 && GET_CODE (XEXP (disp, 0)) == PLUS)
5018 {
5019 rtx op0 = XEXP (XEXP (disp, 0), 0);
5020 rtx op1 = XEXP (XEXP (disp, 0), 1);
5021
5022 /* TLS references should always be enclosed in UNSPEC. */
5023 if (tls_symbolic_operand (op0, GET_MODE (op0)))
5024 return 0;
5025 if (((GET_CODE (op0) == SYMBOL_REF
5026 && ix86_cmodel == CM_SMALL_PIC
5027 && SYMBOL_REF_LOCAL_P (op0))
5028 || GET_CODE (op0) == LABEL_REF)
5029 && GET_CODE (op1) == CONST_INT
5030 && INTVAL (op1) < 16*1024*1024
5031 && INTVAL (op1) >= -16*1024*1024)
5032 return 1;
5033 }
c05dbe81 5034 }
91bb873f
RH
5035 if (GET_CODE (disp) != CONST)
5036 return 0;
5037 disp = XEXP (disp, 0);
5038
6eb791fc
JH
5039 if (TARGET_64BIT)
5040 {
5041 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5042 of GOT tables. We should not need these anyway. */
5043 if (GET_CODE (disp) != UNSPEC
8ee41eaf 5044 || XINT (disp, 1) != UNSPEC_GOTPCREL)
6eb791fc
JH
5045 return 0;
5046
5047 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5048 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5049 return 0;
5050 return 1;
5051 }
5052
f996902d 5053 saw_plus = false;
91bb873f
RH
5054 if (GET_CODE (disp) == PLUS)
5055 {
5056 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5057 return 0;
5058 disp = XEXP (disp, 0);
f996902d 5059 saw_plus = true;
91bb873f
RH
5060 }
5061
828a4fe4
MS
5062 if (TARGET_MACHO && darwin_local_data_pic (disp))
5063 return 1;
b069de3b 5064
8ee41eaf 5065 if (GET_CODE (disp) != UNSPEC)
91bb873f
RH
5066 return 0;
5067
623fe810
RH
5068 switch (XINT (disp, 1))
5069 {
8ee41eaf 5070 case UNSPEC_GOT:
f996902d
RH
5071 if (saw_plus)
5072 return false;
623fe810 5073 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
8ee41eaf 5074 case UNSPEC_GOTOFF:
799b33a0
JH
5075 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5076 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5077 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5078 return false;
f996902d 5079 case UNSPEC_GOTTPOFF:
dea73790
JJ
5080 case UNSPEC_GOTNTPOFF:
5081 case UNSPEC_INDNTPOFF:
f996902d
RH
5082 if (saw_plus)
5083 return false;
5084 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5085 case UNSPEC_NTPOFF:
f996902d
RH
5086 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5087 case UNSPEC_DTPOFF:
f996902d 5088 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
623fe810 5089 }
fce5a9f2 5090
623fe810 5091 return 0;
91bb873f
RH
5092}
5093
e075ae69
RH
5094/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5095 memory address for an instruction. The MODE argument is the machine mode
5096 for the MEM expression that wants to use this address.
5097
5098 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5099 convert common non-canonical forms to canonical form so that they will
5100 be recognized. */
5101
3b3c6a3f 5102int
8d531ab9 5103legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
3b3c6a3f 5104{
e075ae69
RH
5105 struct ix86_address parts;
5106 rtx base, index, disp;
5107 HOST_WIDE_INT scale;
5108 const char *reason = NULL;
5109 rtx reason_rtx = NULL_RTX;
3b3c6a3f
MM
5110
5111 if (TARGET_DEBUG_ADDR)
5112 {
5113 fprintf (stderr,
e9a25f70 5114 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
3b3c6a3f 5115 GET_MODE_NAME (mode), strict);
3b3c6a3f
MM
5116 debug_rtx (addr);
5117 }
5118
b446e5a2 5119 if (ix86_decompose_address (addr, &parts) <= 0)
3b3c6a3f 5120 {
e075ae69 5121 reason = "decomposition failed";
50e60bc3 5122 goto report_error;
3b3c6a3f
MM
5123 }
5124
e075ae69
RH
5125 base = parts.base;
5126 index = parts.index;
5127 disp = parts.disp;
5128 scale = parts.scale;
91f0226f 5129
e075ae69 5130 /* Validate base register.
e9a25f70
JL
5131
5132 Don't allow SUBREG's here, it can lead to spill failures when the base
3d771dfd
MM
5133 is one word out of a two word structure, which is represented internally
5134 as a DImode int. */
e9a25f70 5135
3b3c6a3f
MM
5136 if (base)
5137 {
e075ae69
RH
5138 reason_rtx = base;
5139
90e4e4c5 5140 if (GET_CODE (base) != REG)
3b3c6a3f 5141 {
e075ae69 5142 reason = "base is not a register";
50e60bc3 5143 goto report_error;
3b3c6a3f
MM
5144 }
5145
c954bd01
RH
5146 if (GET_MODE (base) != Pmode)
5147 {
e075ae69 5148 reason = "base is not in Pmode";
50e60bc3 5149 goto report_error;
c954bd01
RH
5150 }
5151
90e4e4c5
RH
5152 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
5153 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
3b3c6a3f 5154 {
e075ae69 5155 reason = "base is not valid";
50e60bc3 5156 goto report_error;
3b3c6a3f
MM
5157 }
5158 }
5159
e075ae69 5160 /* Validate index register.
e9a25f70
JL
5161
5162 Don't allow SUBREG's here, it can lead to spill failures when the index
3d771dfd
MM
5163 is one word out of a two word structure, which is represented internally
5164 as a DImode int. */
e075ae69
RH
5165
5166 if (index)
3b3c6a3f 5167 {
e075ae69
RH
5168 reason_rtx = index;
5169
90e4e4c5 5170 if (GET_CODE (index) != REG)
3b3c6a3f 5171 {
e075ae69 5172 reason = "index is not a register";
50e60bc3 5173 goto report_error;
3b3c6a3f
MM
5174 }
5175
e075ae69 5176 if (GET_MODE (index) != Pmode)
c954bd01 5177 {
e075ae69 5178 reason = "index is not in Pmode";
50e60bc3 5179 goto report_error;
c954bd01
RH
5180 }
5181
90e4e4c5
RH
5182 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
5183 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
3b3c6a3f 5184 {
e075ae69 5185 reason = "index is not valid";
50e60bc3 5186 goto report_error;
3b3c6a3f
MM
5187 }
5188 }
3b3c6a3f 5189
e075ae69
RH
5190 /* Validate scale factor. */
5191 if (scale != 1)
3b3c6a3f 5192 {
e075ae69
RH
5193 reason_rtx = GEN_INT (scale);
5194 if (!index)
3b3c6a3f 5195 {
e075ae69 5196 reason = "scale without index";
50e60bc3 5197 goto report_error;
3b3c6a3f
MM
5198 }
5199
e075ae69 5200 if (scale != 2 && scale != 4 && scale != 8)
3b3c6a3f 5201 {
e075ae69 5202 reason = "scale is not a valid multiplier";
50e60bc3 5203 goto report_error;
3b3c6a3f
MM
5204 }
5205 }
5206
91bb873f 5207 /* Validate displacement. */
3b3c6a3f
MM
5208 if (disp)
5209 {
e075ae69
RH
5210 reason_rtx = disp;
5211
f996902d
RH
5212 if (GET_CODE (disp) == CONST
5213 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5214 switch (XINT (XEXP (disp, 0), 1))
5215 {
5216 case UNSPEC_GOT:
5217 case UNSPEC_GOTOFF:
5218 case UNSPEC_GOTPCREL:
5219 if (!flag_pic)
5220 abort ();
5221 goto is_legitimate_pic;
5222
5223 case UNSPEC_GOTTPOFF:
dea73790
JJ
5224 case UNSPEC_GOTNTPOFF:
5225 case UNSPEC_INDNTPOFF:
f996902d
RH
5226 case UNSPEC_NTPOFF:
5227 case UNSPEC_DTPOFF:
5228 break;
5229
5230 default:
5231 reason = "invalid address unspec";
5232 goto report_error;
5233 }
5234
b069de3b
SS
5235 else if (flag_pic && (SYMBOLIC_CONST (disp)
5236#if TARGET_MACHO
5237 && !machopic_operand_p (disp)
5238#endif
5239 ))
3b3c6a3f 5240 {
f996902d 5241 is_legitimate_pic:
0d7d98ee
JH
5242 if (TARGET_64BIT && (index || base))
5243 {
75d38379
JJ
5244 /* foo@dtpoff(%rX) is ok. */
5245 if (GET_CODE (disp) != CONST
5246 || GET_CODE (XEXP (disp, 0)) != PLUS
5247 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5248 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5249 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5250 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5251 {
5252 reason = "non-constant pic memory reference";
5253 goto report_error;
5254 }
0d7d98ee 5255 }
75d38379 5256 else if (! legitimate_pic_address_disp_p (disp))
91bb873f 5257 {
e075ae69 5258 reason = "displacement is an invalid pic construct";
50e60bc3 5259 goto report_error;
91bb873f
RH
5260 }
5261
4e9efe54 5262 /* This code used to verify that a symbolic pic displacement
0f290768
KH
5263 includes the pic_offset_table_rtx register.
5264
4e9efe54
JH
5265 While this is good idea, unfortunately these constructs may
5266 be created by "adds using lea" optimization for incorrect
5267 code like:
5268
5269 int a;
5270 int foo(int i)
5271 {
5272 return *(&a+i);
5273 }
5274
50e60bc3 5275 This code is nonsensical, but results in addressing
4e9efe54 5276 GOT table with pic_offset_table_rtx base. We can't
f710504c 5277 just refuse it easily, since it gets matched by
4e9efe54
JH
5278 "addsi3" pattern, that later gets split to lea in the
5279 case output register differs from input. While this
5280 can be handled by separate addsi pattern for this case
5281 that never results in lea, this seems to be easier and
5282 correct fix for crash to disable this test. */
3b3c6a3f 5283 }
a94f136b
JH
5284 else if (GET_CODE (disp) != LABEL_REF
5285 && GET_CODE (disp) != CONST_INT
5286 && (GET_CODE (disp) != CONST
5287 || !legitimate_constant_p (disp))
5288 && (GET_CODE (disp) != SYMBOL_REF
5289 || !legitimate_constant_p (disp)))
f996902d
RH
5290 {
5291 reason = "displacement is not constant";
5292 goto report_error;
5293 }
8fe75e43
RH
5294 else if (TARGET_64BIT
5295 && !x86_64_immediate_operand (disp, VOIDmode))
c05dbe81
JH
5296 {
5297 reason = "displacement is out of range";
5298 goto report_error;
5299 }
3b3c6a3f
MM
5300 }
5301
e075ae69 5302 /* Everything looks valid. */
3b3c6a3f 5303 if (TARGET_DEBUG_ADDR)
e075ae69 5304 fprintf (stderr, "Success.\n");
3b3c6a3f 5305 return TRUE;
e075ae69 5306
5bf0ebab 5307 report_error:
e075ae69
RH
5308 if (TARGET_DEBUG_ADDR)
5309 {
5310 fprintf (stderr, "Error: %s\n", reason);
5311 debug_rtx (reason_rtx);
5312 }
5313 return FALSE;
3b3c6a3f 5314}
3b3c6a3f 5315\f
55efb413
JW
5316/* Return an unique alias set for the GOT. */
5317
0f290768 5318static HOST_WIDE_INT
b96a374d 5319ix86_GOT_alias_set (void)
55efb413 5320{
5bf0ebab
RH
5321 static HOST_WIDE_INT set = -1;
5322 if (set == -1)
5323 set = new_alias_set ();
5324 return set;
0f290768 5325}
55efb413 5326
3b3c6a3f
MM
5327/* Return a legitimate reference for ORIG (an address) using the
5328 register REG. If REG is 0, a new pseudo is generated.
5329
91bb873f 5330 There are two types of references that must be handled:
3b3c6a3f
MM
5331
5332 1. Global data references must load the address from the GOT, via
5333 the PIC reg. An insn is emitted to do this load, and the reg is
5334 returned.
5335
91bb873f
RH
5336 2. Static data references, constant pool addresses, and code labels
5337 compute the address as an offset from the GOT, whose base is in
2ae5ae57 5338 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
91bb873f
RH
5339 differentiate them from global data objects. The returned
5340 address is the PIC reg + an unspec constant.
3b3c6a3f
MM
5341
5342 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
91bb873f 5343 reg also appears in the address. */
3b3c6a3f 5344
b39edae3 5345static rtx
b96a374d 5346legitimize_pic_address (rtx orig, rtx reg)
3b3c6a3f
MM
5347{
5348 rtx addr = orig;
5349 rtx new = orig;
91bb873f 5350 rtx base;
3b3c6a3f 5351
b069de3b
SS
5352#if TARGET_MACHO
5353 if (reg == 0)
5354 reg = gen_reg_rtx (Pmode);
5355 /* Use the generic Mach-O PIC machinery. */
5356 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5357#endif
5358
c05dbe81
JH
5359 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
5360 new = addr;
5361 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
3b3c6a3f 5362 {
c05dbe81
JH
5363 /* This symbol may be referenced via a displacement from the PIC
5364 base address (@GOTOFF). */
3b3c6a3f 5365
c05dbe81
JH
5366 if (reload_in_progress)
5367 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
799b33a0
JH
5368 if (GET_CODE (addr) == CONST)
5369 addr = XEXP (addr, 0);
5370 if (GET_CODE (addr) == PLUS)
5371 {
5372 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
5373 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
5374 }
5375 else
5376 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
c05dbe81
JH
5377 new = gen_rtx_CONST (Pmode, new);
5378 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3b3c6a3f 5379
c05dbe81
JH
5380 if (reg != 0)
5381 {
5382 emit_move_insn (reg, new);
5383 new = reg;
5384 }
3b3c6a3f 5385 }
91bb873f 5386 else if (GET_CODE (addr) == SYMBOL_REF)
3b3c6a3f 5387 {
14f73b5a
JH
5388 if (TARGET_64BIT)
5389 {
8ee41eaf 5390 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
14f73b5a 5391 new = gen_rtx_CONST (Pmode, new);
542a8afa 5392 new = gen_const_mem (Pmode, new);
14f73b5a
JH
5393 set_mem_alias_set (new, ix86_GOT_alias_set ());
5394
5395 if (reg == 0)
5396 reg = gen_reg_rtx (Pmode);
5397 /* Use directly gen_movsi, otherwise the address is loaded
5398 into register for CSE. We don't want to CSE this addresses,
5399 instead we CSE addresses from the GOT table, so skip this. */
5400 emit_insn (gen_movsi (reg, new));
5401 new = reg;
5402 }
5403 else
5404 {
5405 /* This symbol must be referenced via a load from the
5406 Global Offset Table (@GOT). */
3b3c6a3f 5407
66edd3b4
RH
5408 if (reload_in_progress)
5409 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf 5410 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
14f73b5a
JH
5411 new = gen_rtx_CONST (Pmode, new);
5412 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
542a8afa 5413 new = gen_const_mem (Pmode, new);
14f73b5a 5414 set_mem_alias_set (new, ix86_GOT_alias_set ());
3b3c6a3f 5415
14f73b5a
JH
5416 if (reg == 0)
5417 reg = gen_reg_rtx (Pmode);
5418 emit_move_insn (reg, new);
5419 new = reg;
5420 }
0f290768 5421 }
91bb873f
RH
5422 else
5423 {
5424 if (GET_CODE (addr) == CONST)
3b3c6a3f 5425 {
91bb873f 5426 addr = XEXP (addr, 0);
e3c8ea67
RH
5427
5428 /* We must match stuff we generate before. Assume the only
5429 unspecs that can get here are ours. Not that we could do
43f3a59d 5430 anything with them anyway.... */
e3c8ea67
RH
5431 if (GET_CODE (addr) == UNSPEC
5432 || (GET_CODE (addr) == PLUS
5433 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5434 return orig;
5435 if (GET_CODE (addr) != PLUS)
564d80f4 5436 abort ();
3b3c6a3f 5437 }
91bb873f
RH
5438 if (GET_CODE (addr) == PLUS)
5439 {
5440 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
e9a25f70 5441
91bb873f
RH
5442 /* Check first to see if this is a constant offset from a @GOTOFF
5443 symbol reference. */
623fe810 5444 if (local_symbolic_operand (op0, Pmode)
91bb873f
RH
5445 && GET_CODE (op1) == CONST_INT)
5446 {
6eb791fc
JH
5447 if (!TARGET_64BIT)
5448 {
66edd3b4
RH
5449 if (reload_in_progress)
5450 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf
RH
5451 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5452 UNSPEC_GOTOFF);
6eb791fc
JH
5453 new = gen_rtx_PLUS (Pmode, new, op1);
5454 new = gen_rtx_CONST (Pmode, new);
5455 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
91bb873f 5456
6eb791fc
JH
5457 if (reg != 0)
5458 {
5459 emit_move_insn (reg, new);
5460 new = reg;
5461 }
5462 }
5463 else
91bb873f 5464 {
75d38379
JJ
5465 if (INTVAL (op1) < -16*1024*1024
5466 || INTVAL (op1) >= 16*1024*1024)
b8771ace 5467 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
91bb873f
RH
5468 }
5469 }
5470 else
5471 {
5472 base = legitimize_pic_address (XEXP (addr, 0), reg);
5473 new = legitimize_pic_address (XEXP (addr, 1),
5474 base == reg ? NULL_RTX : reg);
5475
5476 if (GET_CODE (new) == CONST_INT)
5477 new = plus_constant (base, INTVAL (new));
5478 else
5479 {
5480 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5481 {
5482 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5483 new = XEXP (new, 1);
5484 }
5485 new = gen_rtx_PLUS (Pmode, base, new);
5486 }
5487 }
5488 }
3b3c6a3f
MM
5489 }
5490 return new;
5491}
5492\f
74dc3e94 5493/* Load the thread pointer. If TO_REG is true, force it into a register. */
f996902d
RH
5494
5495static rtx
b96a374d 5496get_thread_pointer (int to_reg)
f996902d 5497{
74dc3e94 5498 rtx tp, reg, insn;
f996902d
RH
5499
5500 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
74dc3e94
RH
5501 if (!to_reg)
5502 return tp;
f996902d 5503
74dc3e94
RH
5504 reg = gen_reg_rtx (Pmode);
5505 insn = gen_rtx_SET (VOIDmode, reg, tp);
5506 insn = emit_insn (insn);
5507
5508 return reg;
5509}
5510
5511/* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
5512 false if we expect this to be used for a memory address and true if
5513 we expect to load the address into a register. */
5514
5515static rtx
b96a374d 5516legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
74dc3e94
RH
5517{
5518 rtx dest, base, off, pic;
5519 int type;
5520
5521 switch (model)
5522 {
5523 case TLS_MODEL_GLOBAL_DYNAMIC:
5524 dest = gen_reg_rtx (Pmode);
5525 if (TARGET_64BIT)
5526 {
5527 rtx rax = gen_rtx_REG (Pmode, 0), insns;
5528
5529 start_sequence ();
5530 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
5531 insns = get_insns ();
5532 end_sequence ();
5533
5534 emit_libcall_block (insns, dest, rax, x);
5535 }
5536 else
5537 emit_insn (gen_tls_global_dynamic_32 (dest, x));
5538 break;
5539
5540 case TLS_MODEL_LOCAL_DYNAMIC:
5541 base = gen_reg_rtx (Pmode);
5542 if (TARGET_64BIT)
5543 {
5544 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
5545
5546 start_sequence ();
5547 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
5548 insns = get_insns ();
5549 end_sequence ();
5550
5551 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
5552 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
5553 emit_libcall_block (insns, base, rax, note);
5554 }
5555 else
5556 emit_insn (gen_tls_local_dynamic_base_32 (base));
5557
5558 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5559 off = gen_rtx_CONST (Pmode, off);
5560
5561 return gen_rtx_PLUS (Pmode, base, off);
5562
5563 case TLS_MODEL_INITIAL_EXEC:
5564 if (TARGET_64BIT)
5565 {
5566 pic = NULL;
5567 type = UNSPEC_GOTNTPOFF;
5568 }
5569 else if (flag_pic)
5570 {
5571 if (reload_in_progress)
5572 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5573 pic = pic_offset_table_rtx;
5574 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
5575 }
5576 else if (!TARGET_GNU_TLS)
5577 {
5578 pic = gen_reg_rtx (Pmode);
5579 emit_insn (gen_set_got (pic));
5580 type = UNSPEC_GOTTPOFF;
5581 }
5582 else
5583 {
5584 pic = NULL;
5585 type = UNSPEC_INDNTPOFF;
5586 }
5587
5588 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
5589 off = gen_rtx_CONST (Pmode, off);
5590 if (pic)
5591 off = gen_rtx_PLUS (Pmode, pic, off);
542a8afa 5592 off = gen_const_mem (Pmode, off);
74dc3e94
RH
5593 set_mem_alias_set (off, ix86_GOT_alias_set ());
5594
5595 if (TARGET_64BIT || TARGET_GNU_TLS)
5596 {
5597 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
5598 off = force_reg (Pmode, off);
5599 return gen_rtx_PLUS (Pmode, base, off);
5600 }
5601 else
5602 {
5603 base = get_thread_pointer (true);
5604 dest = gen_reg_rtx (Pmode);
5605 emit_insn (gen_subsi3 (dest, base, off));
5606 }
5607 break;
5608
5609 case TLS_MODEL_LOCAL_EXEC:
5610 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5611 (TARGET_64BIT || TARGET_GNU_TLS)
5612 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5613 off = gen_rtx_CONST (Pmode, off);
5614
5615 if (TARGET_64BIT || TARGET_GNU_TLS)
5616 {
5617 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
5618 return gen_rtx_PLUS (Pmode, base, off);
5619 }
5620 else
5621 {
5622 base = get_thread_pointer (true);
5623 dest = gen_reg_rtx (Pmode);
5624 emit_insn (gen_subsi3 (dest, base, off));
5625 }
5626 break;
5627
5628 default:
5629 abort ();
5630 }
5631
5632 return dest;
f996902d 5633}
fce5a9f2 5634
3b3c6a3f
MM
5635/* Try machine-dependent ways of modifying an illegitimate address
5636 to be legitimate. If we find one, return the new, valid address.
5637 This macro is used in only one place: `memory_address' in explow.c.
5638
5639 OLDX is the address as it was before break_out_memory_refs was called.
5640 In some cases it is useful to look at this to decide what needs to be done.
5641
5642 MODE and WIN are passed so that this macro can use
5643 GO_IF_LEGITIMATE_ADDRESS.
5644
5645 It is always safe for this macro to do nothing. It exists to recognize
5646 opportunities to optimize the output.
5647
5648 For the 80386, we handle X+REG by loading X into a register R and
5649 using R+REG. R will go in a general reg and indexing will be used.
5650 However, if REG is a broken-out memory address or multiplication,
5651 nothing needs to be done because REG can certainly go in a general reg.
5652
5653 When -fpic is used, special handling is needed for symbolic references.
5654 See comments by legitimize_pic_address in i386.c for details. */
5655
5656rtx
8d531ab9 5657legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
3b3c6a3f
MM
5658{
5659 int changed = 0;
5660 unsigned log;
5661
5662 if (TARGET_DEBUG_ADDR)
5663 {
e9a25f70
JL
5664 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5665 GET_MODE_NAME (mode));
3b3c6a3f
MM
5666 debug_rtx (x);
5667 }
5668
8fe75e43 5669 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
f996902d 5670 if (log)
74dc3e94 5671 return legitimize_tls_address (x, log, false);
b39edae3
RH
5672 if (GET_CODE (x) == CONST
5673 && GET_CODE (XEXP (x, 0)) == PLUS
8fe75e43
RH
5674 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
5675 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
b39edae3
RH
5676 {
5677 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
5678 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
5679 }
f996902d 5680
3b3c6a3f
MM
5681 if (flag_pic && SYMBOLIC_CONST (x))
5682 return legitimize_pic_address (x, 0);
5683
5684 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5685 if (GET_CODE (x) == ASHIFT
5686 && GET_CODE (XEXP (x, 1)) == CONST_INT
b531087a 5687 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
3b3c6a3f
MM
5688 {
5689 changed = 1;
a269a03c
JC
5690 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5691 GEN_INT (1 << log));
3b3c6a3f
MM
5692 }
5693
5694 if (GET_CODE (x) == PLUS)
5695 {
0f290768 5696 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
e9a25f70 5697
3b3c6a3f
MM
5698 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5699 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
b531087a 5700 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
3b3c6a3f
MM
5701 {
5702 changed = 1;
c5c76735
JL
5703 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5704 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5705 GEN_INT (1 << log));
3b3c6a3f
MM
5706 }
5707
5708 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5709 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
b531087a 5710 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
3b3c6a3f
MM
5711 {
5712 changed = 1;
c5c76735
JL
5713 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5714 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5715 GEN_INT (1 << log));
3b3c6a3f
MM
5716 }
5717
0f290768 5718 /* Put multiply first if it isn't already. */
3b3c6a3f
MM
5719 if (GET_CODE (XEXP (x, 1)) == MULT)
5720 {
5721 rtx tmp = XEXP (x, 0);
5722 XEXP (x, 0) = XEXP (x, 1);
5723 XEXP (x, 1) = tmp;
5724 changed = 1;
5725 }
5726
5727 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5728 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5729 created by virtual register instantiation, register elimination, and
5730 similar optimizations. */
5731 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5732 {
5733 changed = 1;
c5c76735
JL
5734 x = gen_rtx_PLUS (Pmode,
5735 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5736 XEXP (XEXP (x, 1), 0)),
5737 XEXP (XEXP (x, 1), 1));
3b3c6a3f
MM
5738 }
5739
e9a25f70
JL
5740 /* Canonicalize
5741 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3b3c6a3f
MM
5742 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5743 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5744 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5745 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5746 && CONSTANT_P (XEXP (x, 1)))
5747 {
00c79232
ML
5748 rtx constant;
5749 rtx other = NULL_RTX;
3b3c6a3f
MM
5750
5751 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5752 {
5753 constant = XEXP (x, 1);
5754 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5755 }
5756 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5757 {
5758 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5759 other = XEXP (x, 1);
5760 }
5761 else
5762 constant = 0;
5763
5764 if (constant)
5765 {
5766 changed = 1;
c5c76735
JL
5767 x = gen_rtx_PLUS (Pmode,
5768 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5769 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5770 plus_constant (other, INTVAL (constant)));
3b3c6a3f
MM
5771 }
5772 }
5773
5774 if (changed && legitimate_address_p (mode, x, FALSE))
5775 return x;
5776
5777 if (GET_CODE (XEXP (x, 0)) == MULT)
5778 {
5779 changed = 1;
5780 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5781 }
5782
5783 if (GET_CODE (XEXP (x, 1)) == MULT)
5784 {
5785 changed = 1;
5786 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5787 }
5788
5789 if (changed
5790 && GET_CODE (XEXP (x, 1)) == REG
5791 && GET_CODE (XEXP (x, 0)) == REG)
5792 return x;
5793
5794 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5795 {
5796 changed = 1;
5797 x = legitimize_pic_address (x, 0);
5798 }
5799
5800 if (changed && legitimate_address_p (mode, x, FALSE))
5801 return x;
5802
5803 if (GET_CODE (XEXP (x, 0)) == REG)
5804 {
8d531ab9
KH
5805 rtx temp = gen_reg_rtx (Pmode);
5806 rtx val = force_operand (XEXP (x, 1), temp);
3b3c6a3f
MM
5807 if (val != temp)
5808 emit_move_insn (temp, val);
5809
5810 XEXP (x, 1) = temp;
5811 return x;
5812 }
5813
5814 else if (GET_CODE (XEXP (x, 1)) == REG)
5815 {
8d531ab9
KH
5816 rtx temp = gen_reg_rtx (Pmode);
5817 rtx val = force_operand (XEXP (x, 0), temp);
3b3c6a3f
MM
5818 if (val != temp)
5819 emit_move_insn (temp, val);
5820
5821 XEXP (x, 0) = temp;
5822 return x;
5823 }
5824 }
5825
5826 return x;
5827}
2a2ab3f9
JVA
5828\f
5829/* Print an integer constant expression in assembler syntax. Addition
5830 and subtraction are the only arithmetic that may appear in these
5831 expressions. FILE is the stdio stream to write to, X is the rtx, and
5832 CODE is the operand print code from the output string. */
5833
5834static void
b96a374d 5835output_pic_addr_const (FILE *file, rtx x, int code)
2a2ab3f9
JVA
5836{
5837 char buf[256];
5838
5839 switch (GET_CODE (x))
5840 {
5841 case PC:
5842 if (flag_pic)
5843 putc ('.', file);
5844 else
5845 abort ();
5846 break;
5847
5848 case SYMBOL_REF:
79bba51c
AP
5849 /* Mark the decl as referenced so that cgraph will output the function. */
5850 if (SYMBOL_REF_DECL (x))
5851 mark_decl_referenced (SYMBOL_REF_DECL (x));
5852
91bb873f 5853 assemble_name (file, XSTR (x, 0));
12969f45 5854 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
91bb873f 5855 fputs ("@PLT", file);
2a2ab3f9
JVA
5856 break;
5857
91bb873f
RH
5858 case LABEL_REF:
5859 x = XEXP (x, 0);
5efb1046 5860 /* FALLTHRU */
2a2ab3f9
JVA
5861 case CODE_LABEL:
5862 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5863 assemble_name (asm_out_file, buf);
5864 break;
5865
5866 case CONST_INT:
f64cecad 5867 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2a2ab3f9
JVA
5868 break;
5869
5870 case CONST:
5871 /* This used to output parentheses around the expression,
5872 but that does not work on the 386 (either ATT or BSD assembler). */
5873 output_pic_addr_const (file, XEXP (x, 0), code);
5874 break;
5875
5876 case CONST_DOUBLE:
5877 if (GET_MODE (x) == VOIDmode)
5878 {
5879 /* We can use %d if the number is <32 bits and positive. */
5880 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
f64cecad
JC
5881 fprintf (file, "0x%lx%08lx",
5882 (unsigned long) CONST_DOUBLE_HIGH (x),
5883 (unsigned long) CONST_DOUBLE_LOW (x));
2a2ab3f9 5884 else
f64cecad 5885 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2a2ab3f9
JVA
5886 }
5887 else
5888 /* We can't handle floating point constants;
5889 PRINT_OPERAND must handle them. */
5890 output_operand_lossage ("floating constant misused");
5891 break;
5892
5893 case PLUS:
e9a25f70 5894 /* Some assemblers need integer constants to appear first. */
2a2ab3f9
JVA
5895 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5896 {
2a2ab3f9 5897 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 5898 putc ('+', file);
e9a25f70 5899 output_pic_addr_const (file, XEXP (x, 1), code);
2a2ab3f9 5900 }
91bb873f 5901 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2a2ab3f9 5902 {
2a2ab3f9 5903 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 5904 putc ('+', file);
e9a25f70 5905 output_pic_addr_const (file, XEXP (x, 0), code);
2a2ab3f9 5906 }
91bb873f
RH
5907 else
5908 abort ();
2a2ab3f9
JVA
5909 break;
5910
5911 case MINUS:
b069de3b
SS
5912 if (!TARGET_MACHO)
5913 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
2a2ab3f9 5914 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 5915 putc ('-', file);
2a2ab3f9 5916 output_pic_addr_const (file, XEXP (x, 1), code);
b069de3b
SS
5917 if (!TARGET_MACHO)
5918 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
2a2ab3f9
JVA
5919 break;
5920
91bb873f
RH
5921 case UNSPEC:
5922 if (XVECLEN (x, 0) != 1)
5bf0ebab 5923 abort ();
91bb873f
RH
5924 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5925 switch (XINT (x, 1))
77ebd435 5926 {
8ee41eaf 5927 case UNSPEC_GOT:
77ebd435
AJ
5928 fputs ("@GOT", file);
5929 break;
8ee41eaf 5930 case UNSPEC_GOTOFF:
77ebd435
AJ
5931 fputs ("@GOTOFF", file);
5932 break;
8ee41eaf 5933 case UNSPEC_GOTPCREL:
edfe8595 5934 fputs ("@GOTPCREL(%rip)", file);
6eb791fc 5935 break;
f996902d 5936 case UNSPEC_GOTTPOFF:
dea73790 5937 /* FIXME: This might be @TPOFF in Sun ld too. */
f996902d
RH
5938 fputs ("@GOTTPOFF", file);
5939 break;
5940 case UNSPEC_TPOFF:
5941 fputs ("@TPOFF", file);
5942 break;
5943 case UNSPEC_NTPOFF:
75d38379
JJ
5944 if (TARGET_64BIT)
5945 fputs ("@TPOFF", file);
5946 else
5947 fputs ("@NTPOFF", file);
f996902d
RH
5948 break;
5949 case UNSPEC_DTPOFF:
5950 fputs ("@DTPOFF", file);
5951 break;
dea73790 5952 case UNSPEC_GOTNTPOFF:
75d38379
JJ
5953 if (TARGET_64BIT)
5954 fputs ("@GOTTPOFF(%rip)", file);
5955 else
5956 fputs ("@GOTNTPOFF", file);
dea73790
JJ
5957 break;
5958 case UNSPEC_INDNTPOFF:
5959 fputs ("@INDNTPOFF", file);
5960 break;
77ebd435
AJ
5961 default:
5962 output_operand_lossage ("invalid UNSPEC as operand");
5963 break;
5964 }
91bb873f
RH
5965 break;
5966
2a2ab3f9
JVA
5967 default:
5968 output_operand_lossage ("invalid expression as operand");
5969 }
5970}
1865dbb5 5971
b9203463
RH
5972/* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
5973 We need to emit DTP-relative relocations. */
5974
5975void
b96a374d 5976i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
b9203463 5977{
75d38379
JJ
5978 fputs (ASM_LONG, file);
5979 output_addr_const (file, x);
5980 fputs ("@DTPOFF", file);
b9203463
RH
5981 switch (size)
5982 {
5983 case 4:
b9203463
RH
5984 break;
5985 case 8:
75d38379 5986 fputs (", 0", file);
b9203463 5987 break;
b9203463
RH
5988 default:
5989 abort ();
5990 }
b9203463
RH
5991}
5992
1865dbb5
JM
5993/* In the name of slightly smaller debug output, and to cater to
5994 general assembler losage, recognize PIC+GOTOFF and turn it back
5995 into a direct symbol reference. */
5996
69bd9368 5997static rtx
b96a374d 5998ix86_delegitimize_address (rtx orig_x)
1865dbb5 5999{
ec65b2e3 6000 rtx x = orig_x, y;
1865dbb5 6001
4c8c0dec
JJ
6002 if (GET_CODE (x) == MEM)
6003 x = XEXP (x, 0);
6004
6eb791fc
JH
6005 if (TARGET_64BIT)
6006 {
6007 if (GET_CODE (x) != CONST
6008 || GET_CODE (XEXP (x, 0)) != UNSPEC
8ee41eaf 6009 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
4c8c0dec 6010 || GET_CODE (orig_x) != MEM)
6eb791fc
JH
6011 return orig_x;
6012 return XVECEXP (XEXP (x, 0), 0, 0);
6013 }
6014
1865dbb5 6015 if (GET_CODE (x) != PLUS
1865dbb5
JM
6016 || GET_CODE (XEXP (x, 1)) != CONST)
6017 return orig_x;
6018
ec65b2e3
JJ
6019 if (GET_CODE (XEXP (x, 0)) == REG
6020 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6021 /* %ebx + GOT/GOTOFF */
6022 y = NULL;
6023 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6024 {
6025 /* %ebx + %reg * scale + GOT/GOTOFF */
6026 y = XEXP (x, 0);
6027 if (GET_CODE (XEXP (y, 0)) == REG
6028 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6029 y = XEXP (y, 1);
6030 else if (GET_CODE (XEXP (y, 1)) == REG
6031 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6032 y = XEXP (y, 0);
6033 else
6034 return orig_x;
6035 if (GET_CODE (y) != REG
6036 && GET_CODE (y) != MULT
6037 && GET_CODE (y) != ASHIFT)
6038 return orig_x;
6039 }
6040 else
6041 return orig_x;
6042
1865dbb5
JM
6043 x = XEXP (XEXP (x, 1), 0);
6044 if (GET_CODE (x) == UNSPEC
8ee41eaf
RH
6045 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6046 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
ec65b2e3
JJ
6047 {
6048 if (y)
6049 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6050 return XVECEXP (x, 0, 0);
6051 }
1865dbb5
JM
6052
6053 if (GET_CODE (x) == PLUS
6054 && GET_CODE (XEXP (x, 0)) == UNSPEC
6055 && GET_CODE (XEXP (x, 1)) == CONST_INT
8ee41eaf
RH
6056 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6057 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6058 && GET_CODE (orig_x) != MEM)))
ec65b2e3
JJ
6059 {
6060 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6061 if (y)
6062 return gen_rtx_PLUS (Pmode, y, x);
6063 return x;
6064 }
1865dbb5
JM
6065
6066 return orig_x;
6067}
2a2ab3f9 6068\f
a269a03c 6069static void
b96a374d
AJ
6070put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6071 int fp, FILE *file)
a269a03c 6072{
a269a03c
JC
6073 const char *suffix;
6074
9a915772
JH
6075 if (mode == CCFPmode || mode == CCFPUmode)
6076 {
6077 enum rtx_code second_code, bypass_code;
6078 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
f822d252 6079 if (bypass_code != UNKNOWN || second_code != UNKNOWN)
b531087a 6080 abort ();
9a915772
JH
6081 code = ix86_fp_compare_code_to_integer (code);
6082 mode = CCmode;
6083 }
a269a03c
JC
6084 if (reverse)
6085 code = reverse_condition (code);
e075ae69 6086
a269a03c
JC
6087 switch (code)
6088 {
6089 case EQ:
6090 suffix = "e";
6091 break;
a269a03c
JC
6092 case NE:
6093 suffix = "ne";
6094 break;
a269a03c 6095 case GT:
7e08e190 6096 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
e075ae69
RH
6097 abort ();
6098 suffix = "g";
a269a03c 6099 break;
a269a03c 6100 case GTU:
e075ae69
RH
6101 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6102 Those same assemblers have the same but opposite losage on cmov. */
7e08e190 6103 if (mode != CCmode)
0f290768 6104 abort ();
e075ae69 6105 suffix = fp ? "nbe" : "a";
a269a03c 6106 break;
a269a03c 6107 case LT:
9076b9c1 6108 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 6109 suffix = "s";
7e08e190 6110 else if (mode == CCmode || mode == CCGCmode)
e075ae69 6111 suffix = "l";
9076b9c1 6112 else
0f290768 6113 abort ();
a269a03c 6114 break;
a269a03c 6115 case LTU:
9076b9c1 6116 if (mode != CCmode)
0f290768 6117 abort ();
a269a03c
JC
6118 suffix = "b";
6119 break;
a269a03c 6120 case GE:
9076b9c1 6121 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 6122 suffix = "ns";
7e08e190 6123 else if (mode == CCmode || mode == CCGCmode)
e075ae69 6124 suffix = "ge";
9076b9c1 6125 else
0f290768 6126 abort ();
a269a03c 6127 break;
a269a03c 6128 case GEU:
e075ae69 6129 /* ??? As above. */
7e08e190 6130 if (mode != CCmode)
0f290768 6131 abort ();
7e08e190 6132 suffix = fp ? "nb" : "ae";
a269a03c 6133 break;
a269a03c 6134 case LE:
7e08e190 6135 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
e075ae69
RH
6136 abort ();
6137 suffix = "le";
a269a03c 6138 break;
a269a03c 6139 case LEU:
9076b9c1
JH
6140 if (mode != CCmode)
6141 abort ();
7e08e190 6142 suffix = "be";
a269a03c 6143 break;
3a3677ff 6144 case UNORDERED:
9e7adcb3 6145 suffix = fp ? "u" : "p";
3a3677ff
RH
6146 break;
6147 case ORDERED:
9e7adcb3 6148 suffix = fp ? "nu" : "np";
3a3677ff 6149 break;
a269a03c
JC
6150 default:
6151 abort ();
6152 }
6153 fputs (suffix, file);
6154}
6155
a55f4481
RK
6156/* Print the name of register X to FILE based on its machine mode and number.
6157 If CODE is 'w', pretend the mode is HImode.
6158 If CODE is 'b', pretend the mode is QImode.
6159 If CODE is 'k', pretend the mode is SImode.
6160 If CODE is 'q', pretend the mode is DImode.
6161 If CODE is 'h', pretend the reg is the `high' byte register.
6162 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
6163
e075ae69 6164void
b96a374d 6165print_reg (rtx x, int code, FILE *file)
e5cb57e8 6166{
a55f4481
RK
6167 if (REGNO (x) == ARG_POINTER_REGNUM
6168 || REGNO (x) == FRAME_POINTER_REGNUM
6169 || REGNO (x) == FLAGS_REG
6170 || REGNO (x) == FPSR_REG)
480feac0
ZW
6171 abort ();
6172
5bf0ebab 6173 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
e075ae69
RH
6174 putc ('%', file);
6175
ef6257cd 6176 if (code == 'w' || MMX_REG_P (x))
e075ae69
RH
6177 code = 2;
6178 else if (code == 'b')
6179 code = 1;
6180 else if (code == 'k')
6181 code = 4;
3f3f2124
JH
6182 else if (code == 'q')
6183 code = 8;
e075ae69
RH
6184 else if (code == 'y')
6185 code = 3;
6186 else if (code == 'h')
6187 code = 0;
6188 else
6189 code = GET_MODE_SIZE (GET_MODE (x));
e9a25f70 6190
3f3f2124
JH
6191 /* Irritatingly, AMD extended registers use different naming convention
6192 from the normal registers. */
6193 if (REX_INT_REG_P (x))
6194 {
885a70fd
JH
6195 if (!TARGET_64BIT)
6196 abort ();
3f3f2124
JH
6197 switch (code)
6198 {
ef6257cd 6199 case 0:
c725bd79 6200 error ("extended registers have no high halves");
3f3f2124
JH
6201 break;
6202 case 1:
6203 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6204 break;
6205 case 2:
6206 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6207 break;
6208 case 4:
6209 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6210 break;
6211 case 8:
6212 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6213 break;
6214 default:
c725bd79 6215 error ("unsupported operand size for extended register");
3f3f2124
JH
6216 break;
6217 }
6218 return;
6219 }
e075ae69
RH
6220 switch (code)
6221 {
6222 case 3:
6223 if (STACK_TOP_P (x))
6224 {
6225 fputs ("st(0)", file);
6226 break;
6227 }
5efb1046 6228 /* FALLTHRU */
e075ae69 6229 case 8:
3f3f2124 6230 case 4:
e075ae69 6231 case 12:
446988df 6232 if (! ANY_FP_REG_P (x))
885a70fd 6233 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
5efb1046 6234 /* FALLTHRU */
a7180f70 6235 case 16:
e075ae69 6236 case 2:
d4c32b6f 6237 normal:
e075ae69
RH
6238 fputs (hi_reg_name[REGNO (x)], file);
6239 break;
6240 case 1:
d4c32b6f
RH
6241 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
6242 goto normal;
e075ae69
RH
6243 fputs (qi_reg_name[REGNO (x)], file);
6244 break;
6245 case 0:
d4c32b6f
RH
6246 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
6247 goto normal;
e075ae69
RH
6248 fputs (qi_high_reg_name[REGNO (x)], file);
6249 break;
6250 default:
6251 abort ();
fe25fea3 6252 }
e5cb57e8
SC
6253}
6254
f996902d
RH
6255/* Locate some local-dynamic symbol still in use by this function
6256 so that we can print its name in some tls_local_dynamic_base
6257 pattern. */
6258
6259static const char *
b96a374d 6260get_some_local_dynamic_name (void)
f996902d
RH
6261{
6262 rtx insn;
6263
6264 if (cfun->machine->some_ld_name)
6265 return cfun->machine->some_ld_name;
6266
6267 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6268 if (INSN_P (insn)
6269 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6270 return cfun->machine->some_ld_name;
6271
6272 abort ();
6273}
6274
6275static int
b96a374d 6276get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
f996902d
RH
6277{
6278 rtx x = *px;
6279
6280 if (GET_CODE (x) == SYMBOL_REF
6281 && local_dynamic_symbolic_operand (x, Pmode))
6282 {
6283 cfun->machine->some_ld_name = XSTR (x, 0);
6284 return 1;
6285 }
6286
6287 return 0;
6288}
6289
2a2ab3f9 6290/* Meaning of CODE:
fe25fea3 6291 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
e5cb57e8 6292 C -- print opcode suffix for set/cmov insn.
fe25fea3 6293 c -- like C, but print reversed condition
ef6257cd 6294 F,f -- likewise, but for floating-point.
f6f5dff2
RO
6295 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
6296 otherwise nothing
2a2ab3f9
JVA
6297 R -- print the prefix for register names.
6298 z -- print the opcode suffix for the size of the current operand.
6299 * -- print a star (in certain assembler syntax)
fb204271 6300 A -- print an absolute memory reference.
2a2ab3f9 6301 w -- print the operand as if it's a "word" (HImode) even if it isn't.
2d49677f
SC
6302 s -- print a shift double count, followed by the assemblers argument
6303 delimiter.
fe25fea3
SC
6304 b -- print the QImode name of the register for the indicated operand.
6305 %b0 would print %al if operands[0] is reg 0.
6306 w -- likewise, print the HImode name of the register.
6307 k -- likewise, print the SImode name of the register.
3f3f2124 6308 q -- likewise, print the DImode name of the register.
ef6257cd
JH
6309 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6310 y -- print "st(0)" instead of "st" as a register.
a46d1d38 6311 D -- print condition for SSE cmp instruction.
ef6257cd
JH
6312 P -- if PIC, print an @PLT suffix.
6313 X -- don't print any sort of PIC '@' suffix for a symbol.
f996902d 6314 & -- print some in-use local-dynamic symbol name.
ef719a44 6315 H -- print a memory address offset by 8; used for sse high-parts
a46d1d38 6316 */
2a2ab3f9
JVA
6317
6318void
b96a374d 6319print_operand (FILE *file, rtx x, int code)
2a2ab3f9
JVA
6320{
6321 if (code)
6322 {
6323 switch (code)
6324 {
6325 case '*':
80f33d06 6326 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9
JVA
6327 putc ('*', file);
6328 return;
6329
f996902d
RH
6330 case '&':
6331 assemble_name (file, get_some_local_dynamic_name ());
6332 return;
6333
fb204271 6334 case 'A':
80f33d06 6335 if (ASSEMBLER_DIALECT == ASM_ATT)
fb204271 6336 putc ('*', file);
80f33d06 6337 else if (ASSEMBLER_DIALECT == ASM_INTEL)
fb204271
DN
6338 {
6339 /* Intel syntax. For absolute addresses, registers should not
6340 be surrounded by braces. */
6341 if (GET_CODE (x) != REG)
6342 {
6343 putc ('[', file);
6344 PRINT_OPERAND (file, x, 0);
6345 putc (']', file);
6346 return;
6347 }
6348 }
80f33d06
GS
6349 else
6350 abort ();
fb204271
DN
6351
6352 PRINT_OPERAND (file, x, 0);
6353 return;
6354
6355
2a2ab3f9 6356 case 'L':
80f33d06 6357 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6358 putc ('l', file);
2a2ab3f9
JVA
6359 return;
6360
6361 case 'W':
80f33d06 6362 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6363 putc ('w', file);
2a2ab3f9
JVA
6364 return;
6365
6366 case 'B':
80f33d06 6367 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6368 putc ('b', file);
2a2ab3f9
JVA
6369 return;
6370
6371 case 'Q':
80f33d06 6372 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6373 putc ('l', file);
2a2ab3f9
JVA
6374 return;
6375
6376 case 'S':
80f33d06 6377 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6378 putc ('s', file);
2a2ab3f9
JVA
6379 return;
6380
5f1ec3e6 6381 case 'T':
80f33d06 6382 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6383 putc ('t', file);
5f1ec3e6
JVA
6384 return;
6385
2a2ab3f9
JVA
6386 case 'z':
6387 /* 387 opcodes don't get size suffixes if the operands are
0f290768 6388 registers. */
2a2ab3f9
JVA
6389 if (STACK_REG_P (x))
6390 return;
6391
831c4e87
KC
6392 /* Likewise if using Intel opcodes. */
6393 if (ASSEMBLER_DIALECT == ASM_INTEL)
6394 return;
6395
6396 /* This is the size of op from size of operand. */
2a2ab3f9
JVA
6397 switch (GET_MODE_SIZE (GET_MODE (x)))
6398 {
2a2ab3f9 6399 case 2:
155d8a47
JW
6400#ifdef HAVE_GAS_FILDS_FISTS
6401 putc ('s', file);
6402#endif
2a2ab3f9
JVA
6403 return;
6404
6405 case 4:
6406 if (GET_MODE (x) == SFmode)
6407 {
e075ae69 6408 putc ('s', file);
2a2ab3f9
JVA
6409 return;
6410 }
6411 else
e075ae69 6412 putc ('l', file);
2a2ab3f9
JVA
6413 return;
6414
5f1ec3e6 6415 case 12:
2b589241 6416 case 16:
e075ae69
RH
6417 putc ('t', file);
6418 return;
5f1ec3e6 6419
2a2ab3f9
JVA
6420 case 8:
6421 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
56c0e8fa
JVA
6422 {
6423#ifdef GAS_MNEMONICS
e075ae69 6424 putc ('q', file);
56c0e8fa 6425#else
e075ae69
RH
6426 putc ('l', file);
6427 putc ('l', file);
56c0e8fa
JVA
6428#endif
6429 }
e075ae69
RH
6430 else
6431 putc ('l', file);
2a2ab3f9 6432 return;
155d8a47
JW
6433
6434 default:
6435 abort ();
2a2ab3f9 6436 }
4af3895e
JVA
6437
6438 case 'b':
6439 case 'w':
6440 case 'k':
3f3f2124 6441 case 'q':
4af3895e
JVA
6442 case 'h':
6443 case 'y':
5cb6195d 6444 case 'X':
e075ae69 6445 case 'P':
4af3895e
JVA
6446 break;
6447
2d49677f
SC
6448 case 's':
6449 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6450 {
6451 PRINT_OPERAND (file, x, 0);
e075ae69 6452 putc (',', file);
2d49677f 6453 }
a269a03c
JC
6454 return;
6455
a46d1d38
JH
6456 case 'D':
6457 /* Little bit of braindamage here. The SSE compare instructions
6458 does use completely different names for the comparisons that the
6459 fp conditional moves. */
6460 switch (GET_CODE (x))
6461 {
6462 case EQ:
6463 case UNEQ:
6464 fputs ("eq", file);
6465 break;
6466 case LT:
6467 case UNLT:
6468 fputs ("lt", file);
6469 break;
6470 case LE:
6471 case UNLE:
6472 fputs ("le", file);
6473 break;
6474 case UNORDERED:
6475 fputs ("unord", file);
6476 break;
6477 case NE:
6478 case LTGT:
6479 fputs ("neq", file);
6480 break;
6481 case UNGE:
6482 case GE:
6483 fputs ("nlt", file);
6484 break;
6485 case UNGT:
6486 case GT:
6487 fputs ("nle", file);
6488 break;
6489 case ORDERED:
6490 fputs ("ord", file);
6491 break;
6492 default:
6493 abort ();
6494 break;
6495 }
6496 return;
048b1c95 6497 case 'O':
f6f5dff2 6498#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
048b1c95
JJ
6499 if (ASSEMBLER_DIALECT == ASM_ATT)
6500 {
6501 switch (GET_MODE (x))
6502 {
6503 case HImode: putc ('w', file); break;
6504 case SImode:
6505 case SFmode: putc ('l', file); break;
6506 case DImode:
6507 case DFmode: putc ('q', file); break;
6508 default: abort ();
6509 }
6510 putc ('.', file);
6511 }
6512#endif
6513 return;
1853aadd 6514 case 'C':
e075ae69 6515 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
1853aadd 6516 return;
fe25fea3 6517 case 'F':
f6f5dff2 6518#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
048b1c95
JJ
6519 if (ASSEMBLER_DIALECT == ASM_ATT)
6520 putc ('.', file);
6521#endif
e075ae69 6522 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
fe25fea3
SC
6523 return;
6524
e9a25f70 6525 /* Like above, but reverse condition */
e075ae69 6526 case 'c':
fce5a9f2 6527 /* Check to see if argument to %c is really a constant
c1d5afc4 6528 and not a condition code which needs to be reversed. */
ec8e098d 6529 if (!COMPARISON_P (x))
c1d5afc4
CR
6530 {
6531 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6532 return;
6533 }
e075ae69
RH
6534 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6535 return;
fe25fea3 6536 case 'f':
f6f5dff2 6537#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
048b1c95
JJ
6538 if (ASSEMBLER_DIALECT == ASM_ATT)
6539 putc ('.', file);
6540#endif
e075ae69 6541 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
1853aadd 6542 return;
ef719a44
RH
6543
6544 case 'H':
6545 /* It doesn't actually matter what mode we use here, as we're
6546 only going to use this for printing. */
6547 x = adjust_address_nv (x, DImode, 8);
6548 break;
6549
ef6257cd
JH
6550 case '+':
6551 {
6552 rtx x;
e5cb57e8 6553
ef6257cd
JH
6554 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6555 return;
a4f31c00 6556
ef6257cd
JH
6557 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6558 if (x)
6559 {
6560 int pred_val = INTVAL (XEXP (x, 0));
6561
6562 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6563 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6564 {
6565 int taken = pred_val > REG_BR_PROB_BASE / 2;
6566 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6567
6568 /* Emit hints only in the case default branch prediction
d1f87653 6569 heuristics would fail. */
ef6257cd
JH
6570 if (taken != cputaken)
6571 {
6572 /* We use 3e (DS) prefix for taken branches and
6573 2e (CS) prefix for not taken branches. */
6574 if (taken)
6575 fputs ("ds ; ", file);
6576 else
6577 fputs ("cs ; ", file);
6578 }
6579 }
6580 }
6581 return;
6582 }
4af3895e 6583 default:
9e637a26 6584 output_operand_lossage ("invalid operand code '%c'", code);
2a2ab3f9
JVA
6585 }
6586 }
e9a25f70 6587
2a2ab3f9 6588 if (GET_CODE (x) == REG)
a55f4481 6589 print_reg (x, code, file);
e9a25f70 6590
2a2ab3f9
JVA
6591 else if (GET_CODE (x) == MEM)
6592 {
e075ae69 6593 /* No `byte ptr' prefix for call instructions. */
80f33d06 6594 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
2a2ab3f9 6595 {
69ddee61 6596 const char * size;
e075ae69
RH
6597 switch (GET_MODE_SIZE (GET_MODE (x)))
6598 {
6599 case 1: size = "BYTE"; break;
6600 case 2: size = "WORD"; break;
6601 case 4: size = "DWORD"; break;
6602 case 8: size = "QWORD"; break;
6603 case 12: size = "XWORD"; break;
a7180f70 6604 case 16: size = "XMMWORD"; break;
e075ae69 6605 default:
564d80f4 6606 abort ();
e075ae69 6607 }
fb204271
DN
6608
6609 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6610 if (code == 'b')
6611 size = "BYTE";
6612 else if (code == 'w')
6613 size = "WORD";
6614 else if (code == 'k')
6615 size = "DWORD";
6616
e075ae69
RH
6617 fputs (size, file);
6618 fputs (" PTR ", file);
2a2ab3f9 6619 }
e075ae69
RH
6620
6621 x = XEXP (x, 0);
0d7d98ee 6622 /* Avoid (%rip) for call operands. */
d10f5ecf 6623 if (CONSTANT_ADDRESS_P (x) && code == 'P'
0d7d98ee
JH
6624 && GET_CODE (x) != CONST_INT)
6625 output_addr_const (file, x);
c8b94768
RH
6626 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6627 output_operand_lossage ("invalid constraints for operand");
2a2ab3f9 6628 else
e075ae69 6629 output_address (x);
2a2ab3f9 6630 }
e9a25f70 6631
2a2ab3f9
JVA
6632 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6633 {
e9a25f70
JL
6634 REAL_VALUE_TYPE r;
6635 long l;
6636
5f1ec3e6
JVA
6637 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6638 REAL_VALUE_TO_TARGET_SINGLE (r, l);
e075ae69 6639
80f33d06 6640 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6641 putc ('$', file);
781f4ec1 6642 fprintf (file, "0x%08lx", l);
5f1ec3e6 6643 }
e9a25f70 6644
74dc3e94
RH
6645 /* These float cases don't actually occur as immediate operands. */
6646 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
5f1ec3e6 6647 {
e9a25f70
JL
6648 char dstr[30];
6649
da6eec72 6650 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
5f1ec3e6 6651 fprintf (file, "%s", dstr);
2a2ab3f9 6652 }
e9a25f70 6653
2b589241 6654 else if (GET_CODE (x) == CONST_DOUBLE
f8a1ebc6 6655 && GET_MODE (x) == XFmode)
2a2ab3f9 6656 {
e9a25f70
JL
6657 char dstr[30];
6658
da6eec72 6659 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
5f1ec3e6 6660 fprintf (file, "%s", dstr);
2a2ab3f9 6661 }
f996902d 6662
79325812 6663 else
2a2ab3f9 6664 {
4af3895e 6665 if (code != 'P')
2a2ab3f9 6666 {
695dac07 6667 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
e075ae69 6668 {
80f33d06 6669 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
6670 putc ('$', file);
6671 }
2a2ab3f9
JVA
6672 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6673 || GET_CODE (x) == LABEL_REF)
e075ae69 6674 {
80f33d06 6675 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
6676 putc ('$', file);
6677 else
6678 fputs ("OFFSET FLAT:", file);
6679 }
2a2ab3f9 6680 }
e075ae69
RH
6681 if (GET_CODE (x) == CONST_INT)
6682 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6683 else if (flag_pic)
2a2ab3f9
JVA
6684 output_pic_addr_const (file, x, code);
6685 else
6686 output_addr_const (file, x);
6687 }
6688}
6689\f
6690/* Print a memory operand whose address is ADDR. */
6691
6692void
8d531ab9 6693print_operand_address (FILE *file, rtx addr)
2a2ab3f9 6694{
e075ae69
RH
6695 struct ix86_address parts;
6696 rtx base, index, disp;
6697 int scale;
e9a25f70 6698
e075ae69
RH
6699 if (! ix86_decompose_address (addr, &parts))
6700 abort ();
e9a25f70 6701
e075ae69
RH
6702 base = parts.base;
6703 index = parts.index;
6704 disp = parts.disp;
6705 scale = parts.scale;
e9a25f70 6706
74dc3e94
RH
6707 switch (parts.seg)
6708 {
6709 case SEG_DEFAULT:
6710 break;
6711 case SEG_FS:
6712 case SEG_GS:
6713 if (USER_LABEL_PREFIX[0] == 0)
6714 putc ('%', file);
6715 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
6716 break;
6717 default:
6718 abort ();
6719 }
6720
e075ae69
RH
6721 if (!base && !index)
6722 {
6723 /* Displacement only requires special attention. */
e9a25f70 6724
e075ae69 6725 if (GET_CODE (disp) == CONST_INT)
2a2ab3f9 6726 {
74dc3e94 6727 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
fb204271
DN
6728 {
6729 if (USER_LABEL_PREFIX[0] == 0)
6730 putc ('%', file);
6731 fputs ("ds:", file);
6732 }
74dc3e94 6733 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
2a2ab3f9 6734 }
e075ae69 6735 else if (flag_pic)
74dc3e94 6736 output_pic_addr_const (file, disp, 0);
e075ae69 6737 else
74dc3e94 6738 output_addr_const (file, disp);
0d7d98ee
JH
6739
6740 /* Use one byte shorter RIP relative addressing for 64bit mode. */
edfe8595 6741 if (TARGET_64BIT
74dc3e94
RH
6742 && ((GET_CODE (disp) == SYMBOL_REF
6743 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
6744 || GET_CODE (disp) == LABEL_REF
6745 || (GET_CODE (disp) == CONST
6746 && GET_CODE (XEXP (disp, 0)) == PLUS
6747 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
6748 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
6749 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
0d7d98ee 6750 fputs ("(%rip)", file);
e075ae69
RH
6751 }
6752 else
6753 {
80f33d06 6754 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9 6755 {
e075ae69 6756 if (disp)
2a2ab3f9 6757 {
c399861d 6758 if (flag_pic)
e075ae69
RH
6759 output_pic_addr_const (file, disp, 0);
6760 else if (GET_CODE (disp) == LABEL_REF)
6761 output_asm_label (disp);
2a2ab3f9 6762 else
e075ae69 6763 output_addr_const (file, disp);
2a2ab3f9
JVA
6764 }
6765
e075ae69
RH
6766 putc ('(', file);
6767 if (base)
a55f4481 6768 print_reg (base, 0, file);
e075ae69 6769 if (index)
2a2ab3f9 6770 {
e075ae69 6771 putc (',', file);
a55f4481 6772 print_reg (index, 0, file);
e075ae69
RH
6773 if (scale != 1)
6774 fprintf (file, ",%d", scale);
2a2ab3f9 6775 }
e075ae69 6776 putc (')', file);
2a2ab3f9 6777 }
2a2ab3f9
JVA
6778 else
6779 {
e075ae69 6780 rtx offset = NULL_RTX;
e9a25f70 6781
e075ae69
RH
6782 if (disp)
6783 {
6784 /* Pull out the offset of a symbol; print any symbol itself. */
6785 if (GET_CODE (disp) == CONST
6786 && GET_CODE (XEXP (disp, 0)) == PLUS
6787 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6788 {
6789 offset = XEXP (XEXP (disp, 0), 1);
6790 disp = gen_rtx_CONST (VOIDmode,
6791 XEXP (XEXP (disp, 0), 0));
6792 }
ce193852 6793
e075ae69
RH
6794 if (flag_pic)
6795 output_pic_addr_const (file, disp, 0);
6796 else if (GET_CODE (disp) == LABEL_REF)
6797 output_asm_label (disp);
6798 else if (GET_CODE (disp) == CONST_INT)
6799 offset = disp;
6800 else
6801 output_addr_const (file, disp);
6802 }
e9a25f70 6803
e075ae69
RH
6804 putc ('[', file);
6805 if (base)
a8620236 6806 {
a55f4481 6807 print_reg (base, 0, file);
e075ae69
RH
6808 if (offset)
6809 {
6810 if (INTVAL (offset) >= 0)
6811 putc ('+', file);
6812 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6813 }
a8620236 6814 }
e075ae69
RH
6815 else if (offset)
6816 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
2247a58c 6817 else
e075ae69 6818 putc ('0', file);
e9a25f70 6819
e075ae69
RH
6820 if (index)
6821 {
6822 putc ('+', file);
a55f4481 6823 print_reg (index, 0, file);
e075ae69
RH
6824 if (scale != 1)
6825 fprintf (file, "*%d", scale);
6826 }
6827 putc (']', file);
6828 }
2a2ab3f9
JVA
6829 }
6830}
f996902d
RH
6831
6832bool
b96a374d 6833output_addr_const_extra (FILE *file, rtx x)
f996902d
RH
6834{
6835 rtx op;
6836
6837 if (GET_CODE (x) != UNSPEC)
6838 return false;
6839
6840 op = XVECEXP (x, 0, 0);
6841 switch (XINT (x, 1))
6842 {
6843 case UNSPEC_GOTTPOFF:
6844 output_addr_const (file, op);
dea73790 6845 /* FIXME: This might be @TPOFF in Sun ld. */
f996902d
RH
6846 fputs ("@GOTTPOFF", file);
6847 break;
6848 case UNSPEC_TPOFF:
6849 output_addr_const (file, op);
6850 fputs ("@TPOFF", file);
6851 break;
6852 case UNSPEC_NTPOFF:
6853 output_addr_const (file, op);
75d38379
JJ
6854 if (TARGET_64BIT)
6855 fputs ("@TPOFF", file);
6856 else
6857 fputs ("@NTPOFF", file);
f996902d
RH
6858 break;
6859 case UNSPEC_DTPOFF:
6860 output_addr_const (file, op);
6861 fputs ("@DTPOFF", file);
6862 break;
dea73790
JJ
6863 case UNSPEC_GOTNTPOFF:
6864 output_addr_const (file, op);
75d38379
JJ
6865 if (TARGET_64BIT)
6866 fputs ("@GOTTPOFF(%rip)", file);
6867 else
6868 fputs ("@GOTNTPOFF", file);
dea73790
JJ
6869 break;
6870 case UNSPEC_INDNTPOFF:
6871 output_addr_const (file, op);
6872 fputs ("@INDNTPOFF", file);
6873 break;
f996902d
RH
6874
6875 default:
6876 return false;
6877 }
6878
6879 return true;
6880}
2a2ab3f9
JVA
6881\f
6882/* Split one or more DImode RTL references into pairs of SImode
6883 references. The RTL can be REG, offsettable MEM, integer constant, or
6884 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6885 split and "num" is its length. lo_half and hi_half are output arrays
0f290768 6886 that parallel "operands". */
2a2ab3f9
JVA
6887
6888void
b96a374d 6889split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
2a2ab3f9
JVA
6890{
6891 while (num--)
6892 {
57dbca5e 6893 rtx op = operands[num];
b932f770
JH
6894
6895 /* simplify_subreg refuse to split volatile memory addresses,
6896 but we still have to handle it. */
6897 if (GET_CODE (op) == MEM)
2a2ab3f9 6898 {
f4ef873c 6899 lo_half[num] = adjust_address (op, SImode, 0);
b72f00af 6900 hi_half[num] = adjust_address (op, SImode, 4);
2a2ab3f9
JVA
6901 }
6902 else
b932f770 6903 {
38ca929b
JH
6904 lo_half[num] = simplify_gen_subreg (SImode, op,
6905 GET_MODE (op) == VOIDmode
6906 ? DImode : GET_MODE (op), 0);
6907 hi_half[num] = simplify_gen_subreg (SImode, op,
6908 GET_MODE (op) == VOIDmode
6909 ? DImode : GET_MODE (op), 4);
b932f770 6910 }
2a2ab3f9
JVA
6911 }
6912}
44cf5b6a
JH
6913/* Split one or more TImode RTL references into pairs of SImode
6914 references. The RTL can be REG, offsettable MEM, integer constant, or
6915 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6916 split and "num" is its length. lo_half and hi_half are output arrays
6917 that parallel "operands". */
6918
6919void
b96a374d 6920split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
44cf5b6a
JH
6921{
6922 while (num--)
6923 {
6924 rtx op = operands[num];
b932f770
JH
6925
6926 /* simplify_subreg refuse to split volatile memory addresses, but we
6927 still have to handle it. */
6928 if (GET_CODE (op) == MEM)
44cf5b6a
JH
6929 {
6930 lo_half[num] = adjust_address (op, DImode, 0);
6931 hi_half[num] = adjust_address (op, DImode, 8);
6932 }
6933 else
b932f770
JH
6934 {
6935 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6936 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6937 }
44cf5b6a
JH
6938 }
6939}
2a2ab3f9 6940\f
2a2ab3f9
JVA
6941/* Output code to perform a 387 binary operation in INSN, one of PLUS,
6942 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6943 is the expression of the binary operation. The output may either be
6944 emitted here, or returned to the caller, like all output_* functions.
6945
6946 There is no guarantee that the operands are the same mode, as they
0f290768 6947 might be within FLOAT or FLOAT_EXTEND expressions. */
2a2ab3f9 6948
e3c2afab
AM
6949#ifndef SYSV386_COMPAT
6950/* Set to 1 for compatibility with brain-damaged assemblers. No-one
6951 wants to fix the assemblers because that causes incompatibility
6952 with gcc. No-one wants to fix gcc because that causes
6953 incompatibility with assemblers... You can use the option of
6954 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6955#define SYSV386_COMPAT 1
6956#endif
6957
69ddee61 6958const char *
b96a374d 6959output_387_binary_op (rtx insn, rtx *operands)
2a2ab3f9 6960{
e3c2afab 6961 static char buf[30];
69ddee61 6962 const char *p;
1deaa899 6963 const char *ssep;
89b17498 6964 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
2a2ab3f9 6965
e3c2afab
AM
6966#ifdef ENABLE_CHECKING
6967 /* Even if we do not want to check the inputs, this documents input
6968 constraints. Which helps in understanding the following code. */
6969 if (STACK_REG_P (operands[0])
6970 && ((REG_P (operands[1])
6971 && REGNO (operands[0]) == REGNO (operands[1])
6972 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
6973 || (REG_P (operands[2])
6974 && REGNO (operands[0]) == REGNO (operands[2])
6975 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
6976 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
6977 ; /* ok */
1deaa899 6978 else if (!is_sse)
e3c2afab
AM
6979 abort ();
6980#endif
6981
2a2ab3f9
JVA
6982 switch (GET_CODE (operands[3]))
6983 {
6984 case PLUS:
e075ae69
RH
6985 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6986 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6987 p = "fiadd";
6988 else
6989 p = "fadd";
1deaa899 6990 ssep = "add";
2a2ab3f9
JVA
6991 break;
6992
6993 case MINUS:
e075ae69
RH
6994 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6995 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6996 p = "fisub";
6997 else
6998 p = "fsub";
1deaa899 6999 ssep = "sub";
2a2ab3f9
JVA
7000 break;
7001
7002 case MULT:
e075ae69
RH
7003 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7004 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7005 p = "fimul";
7006 else
7007 p = "fmul";
1deaa899 7008 ssep = "mul";
2a2ab3f9
JVA
7009 break;
7010
7011 case DIV:
e075ae69
RH
7012 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7013 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7014 p = "fidiv";
7015 else
7016 p = "fdiv";
1deaa899 7017 ssep = "div";
2a2ab3f9
JVA
7018 break;
7019
7020 default:
7021 abort ();
7022 }
7023
1deaa899
JH
7024 if (is_sse)
7025 {
7026 strcpy (buf, ssep);
7027 if (GET_MODE (operands[0]) == SFmode)
7028 strcat (buf, "ss\t{%2, %0|%0, %2}");
7029 else
7030 strcat (buf, "sd\t{%2, %0|%0, %2}");
7031 return buf;
7032 }
e075ae69 7033 strcpy (buf, p);
2a2ab3f9
JVA
7034
7035 switch (GET_CODE (operands[3]))
7036 {
7037 case MULT:
7038 case PLUS:
7039 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7040 {
e3c2afab 7041 rtx temp = operands[2];
2a2ab3f9
JVA
7042 operands[2] = operands[1];
7043 operands[1] = temp;
7044 }
7045
e3c2afab
AM
7046 /* know operands[0] == operands[1]. */
7047
2a2ab3f9 7048 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
7049 {
7050 p = "%z2\t%2";
7051 break;
7052 }
2a2ab3f9
JVA
7053
7054 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63
JL
7055 {
7056 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
7057 /* How is it that we are storing to a dead operand[2]?
7058 Well, presumably operands[1] is dead too. We can't
7059 store the result to st(0) as st(0) gets popped on this
7060 instruction. Instead store to operands[2] (which I
7061 think has to be st(1)). st(1) will be popped later.
7062 gcc <= 2.8.1 didn't have this check and generated
7063 assembly code that the Unixware assembler rejected. */
7064 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 7065 else
e3c2afab 7066 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
e075ae69 7067 break;
6b28fd63 7068 }
2a2ab3f9
JVA
7069
7070 if (STACK_TOP_P (operands[0]))
e3c2afab 7071 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 7072 else
e3c2afab 7073 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
e075ae69 7074 break;
2a2ab3f9
JVA
7075
7076 case MINUS:
7077 case DIV:
7078 if (GET_CODE (operands[1]) == MEM)
e075ae69
RH
7079 {
7080 p = "r%z1\t%1";
7081 break;
7082 }
2a2ab3f9
JVA
7083
7084 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
7085 {
7086 p = "%z2\t%2";
7087 break;
7088 }
2a2ab3f9 7089
2a2ab3f9 7090 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63 7091 {
e3c2afab
AM
7092#if SYSV386_COMPAT
7093 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7094 derived assemblers, confusingly reverse the direction of
7095 the operation for fsub{r} and fdiv{r} when the
7096 destination register is not st(0). The Intel assembler
7097 doesn't have this brain damage. Read !SYSV386_COMPAT to
7098 figure out what the hardware really does. */
7099 if (STACK_TOP_P (operands[0]))
7100 p = "{p\t%0, %2|rp\t%2, %0}";
7101 else
7102 p = "{rp\t%2, %0|p\t%0, %2}";
7103#else
6b28fd63 7104 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
7105 /* As above for fmul/fadd, we can't store to st(0). */
7106 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 7107 else
e3c2afab
AM
7108 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7109#endif
e075ae69 7110 break;
6b28fd63 7111 }
2a2ab3f9
JVA
7112
7113 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6b28fd63 7114 {
e3c2afab 7115#if SYSV386_COMPAT
6b28fd63 7116 if (STACK_TOP_P (operands[0]))
e3c2afab 7117 p = "{rp\t%0, %1|p\t%1, %0}";
6b28fd63 7118 else
e3c2afab
AM
7119 p = "{p\t%1, %0|rp\t%0, %1}";
7120#else
7121 if (STACK_TOP_P (operands[0]))
7122 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7123 else
7124 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7125#endif
e075ae69 7126 break;
6b28fd63 7127 }
2a2ab3f9
JVA
7128
7129 if (STACK_TOP_P (operands[0]))
7130 {
7131 if (STACK_TOP_P (operands[1]))
e3c2afab 7132 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 7133 else
e3c2afab 7134 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
e075ae69 7135 break;
2a2ab3f9
JVA
7136 }
7137 else if (STACK_TOP_P (operands[1]))
e3c2afab
AM
7138 {
7139#if SYSV386_COMPAT
7140 p = "{\t%1, %0|r\t%0, %1}";
7141#else
7142 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7143#endif
7144 }
2a2ab3f9 7145 else
e3c2afab
AM
7146 {
7147#if SYSV386_COMPAT
7148 p = "{r\t%2, %0|\t%0, %2}";
7149#else
7150 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7151#endif
7152 }
e075ae69 7153 break;
2a2ab3f9
JVA
7154
7155 default:
7156 abort ();
7157 }
e075ae69
RH
7158
7159 strcat (buf, p);
7160 return buf;
2a2ab3f9 7161}
e075ae69 7162
edeacc14
UB
7163/* Output code to initialize control word copies used by trunc?f?i and
7164 rounding patterns. CURRENT_MODE is set to current control word,
7165 while NEW_MODE is set to new control word. */
7166
7a2e09f4 7167void
edeacc14 7168emit_i387_cw_initialization (rtx current_mode, rtx new_mode, int mode)
7a2e09f4
JH
7169{
7170 rtx reg = gen_reg_rtx (HImode);
7171
edeacc14
UB
7172 emit_insn (gen_x86_fnstcw_1 (current_mode));
7173 emit_move_insn (reg, current_mode);
7174
7a2e09f4
JH
7175 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7176 && !TARGET_64BIT)
edeacc14
UB
7177 {
7178 switch (mode)
7179 {
7180 case I387_CW_FLOOR:
7181 /* round down toward -oo */
7182 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
7183 break;
7184
7185 case I387_CW_CEIL:
7186 /* round up toward +oo */
7187 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
7188 break;
7189
7190 case I387_CW_TRUNC:
7191 /* round toward zero (truncate) */
7192 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7193 break;
7194
7195 case I387_CW_MASK_PM:
7196 /* mask precision exception for nearbyint() */
7197 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
7198 break;
7199
7200 default:
7201 abort();
7202 }
7203 }
7a2e09f4 7204 else
edeacc14
UB
7205 {
7206 switch (mode)
7207 {
7208 case I387_CW_FLOOR:
7209 /* round down toward -oo */
7210 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
7211 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
7212 break;
7213
7214 case I387_CW_CEIL:
7215 /* round up toward +oo */
7216 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
7217 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
7218 break;
7219
7220 case I387_CW_TRUNC:
7221 /* round toward zero (truncate) */
7222 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
7223 break;
7224
7225 case I387_CW_MASK_PM:
7226 /* mask precision exception for nearbyint() */
7227 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
7228 break;
7229
7230 default:
7231 abort();
7232 }
7233 }
7234
7235 emit_move_insn (new_mode, reg);
7a2e09f4
JH
7236}
7237
2a2ab3f9 7238/* Output code for INSN to convert a float to a signed int. OPERANDS
46d21d2c 7239 are the insn operands. The output may be [HSD]Imode and the input
e075ae69 7240 operand may be [SDX]Fmode. */
2a2ab3f9 7241
69ddee61 7242const char *
b96a374d 7243output_fix_trunc (rtx insn, rtx *operands)
2a2ab3f9
JVA
7244{
7245 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
e075ae69 7246 int dimode_p = GET_MODE (operands[0]) == DImode;
2a2ab3f9 7247
e075ae69
RH
7248 /* Jump through a hoop or two for DImode, since the hardware has no
7249 non-popping instruction. We used to do this a different way, but
7250 that was somewhat fragile and broke with post-reload splitters. */
a05924f9
JH
7251 if (dimode_p && !stack_top_dies)
7252 output_asm_insn ("fld\t%y1", operands);
e075ae69 7253
7a2e09f4 7254 if (!STACK_TOP_P (operands[1]))
10195bd8
JW
7255 abort ();
7256
e075ae69 7257 if (GET_CODE (operands[0]) != MEM)
7a2e09f4 7258 abort ();
e9a25f70 7259
7a2e09f4 7260 output_asm_insn ("fldcw\t%3", operands);
e075ae69 7261 if (stack_top_dies || dimode_p)
7a2e09f4 7262 output_asm_insn ("fistp%z0\t%0", operands);
10195bd8 7263 else
7a2e09f4 7264 output_asm_insn ("fist%z0\t%0", operands);
e075ae69 7265 output_asm_insn ("fldcw\t%2", operands);
10195bd8 7266
e075ae69 7267 return "";
2a2ab3f9 7268}
cda749b1 7269
e075ae69 7270/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7c82106f 7271 should be used. UNORDERED_P is true when fucom should be used. */
e075ae69 7272
69ddee61 7273const char *
b96a374d 7274output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
cda749b1 7275{
e075ae69 7276 int stack_top_dies;
869d095e 7277 rtx cmp_op0, cmp_op1;
7c82106f 7278 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
e075ae69 7279
7c82106f 7280 if (eflags_p)
e075ae69 7281 {
7c82106f
UB
7282 cmp_op0 = operands[0];
7283 cmp_op1 = operands[1];
e075ae69 7284 }
869d095e
UB
7285 else
7286 {
7c82106f
UB
7287 cmp_op0 = operands[1];
7288 cmp_op1 = operands[2];
869d095e
UB
7289 }
7290
0644b628
JH
7291 if (is_sse)
7292 {
7293 if (GET_MODE (operands[0]) == SFmode)
7294 if (unordered_p)
7295 return "ucomiss\t{%1, %0|%0, %1}";
7296 else
a5cf80f0 7297 return "comiss\t{%1, %0|%0, %1}";
0644b628
JH
7298 else
7299 if (unordered_p)
7300 return "ucomisd\t{%1, %0|%0, %1}";
7301 else
a5cf80f0 7302 return "comisd\t{%1, %0|%0, %1}";
0644b628 7303 }
cda749b1 7304
e075ae69 7305 if (! STACK_TOP_P (cmp_op0))
cda749b1
JW
7306 abort ();
7307
e075ae69 7308 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
cda749b1 7309
869d095e
UB
7310 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
7311 {
7312 if (stack_top_dies)
7313 {
7314 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
7315 return TARGET_USE_FFREEP ? "ffreep\t%y1" : "fstp\t%y1";
7316 }
7317 else
7318 return "ftst\n\tfnstsw\t%0";
7319 }
7320
e075ae69
RH
7321 if (STACK_REG_P (cmp_op1)
7322 && stack_top_dies
7323 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7324 && REGNO (cmp_op1) != FIRST_STACK_REG)
cda749b1 7325 {
e075ae69
RH
7326 /* If both the top of the 387 stack dies, and the other operand
7327 is also a stack register that dies, then this must be a
7328 `fcompp' float compare */
7329
7c82106f 7330 if (eflags_p)
e075ae69
RH
7331 {
7332 /* There is no double popping fcomi variant. Fortunately,
7333 eflags is immune from the fstp's cc clobbering. */
7334 if (unordered_p)
7335 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7336 else
7337 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
fb364dc4 7338 return TARGET_USE_FFREEP ? "ffreep\t%y0" : "fstp\t%y0";
e075ae69
RH
7339 }
7340 else
cda749b1 7341 {
7c82106f
UB
7342 if (unordered_p)
7343 return "fucompp\n\tfnstsw\t%0";
cda749b1 7344 else
7c82106f 7345 return "fcompp\n\tfnstsw\t%0";
cda749b1 7346 }
cda749b1
JW
7347 }
7348 else
7349 {
e075ae69 7350 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
cda749b1 7351
7c82106f 7352 static const char * const alt[16] =
e075ae69 7353 {
7c82106f
UB
7354 "fcom%z2\t%y2\n\tfnstsw\t%0",
7355 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7356 "fucom%z2\t%y2\n\tfnstsw\t%0",
7357 "fucomp%z2\t%y2\n\tfnstsw\t%0",
0f290768 7358
7c82106f
UB
7359 "ficom%z2\t%y2\n\tfnstsw\t%0",
7360 "ficomp%z2\t%y2\n\tfnstsw\t%0",
e075ae69
RH
7361 NULL,
7362 NULL,
7363
7364 "fcomi\t{%y1, %0|%0, %y1}",
7365 "fcomip\t{%y1, %0|%0, %y1}",
7366 "fucomi\t{%y1, %0|%0, %y1}",
7367 "fucomip\t{%y1, %0|%0, %y1}",
7368
7369 NULL,
7370 NULL,
7371 NULL,
e075ae69
RH
7372 NULL
7373 };
7374
7375 int mask;
69ddee61 7376 const char *ret;
e075ae69
RH
7377
7378 mask = eflags_p << 3;
7c82106f 7379 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
e075ae69
RH
7380 mask |= unordered_p << 1;
7381 mask |= stack_top_dies;
7382
7c82106f 7383 if (mask >= 16)
e075ae69
RH
7384 abort ();
7385 ret = alt[mask];
7386 if (ret == NULL)
7387 abort ();
cda749b1 7388
e075ae69 7389 return ret;
cda749b1
JW
7390 }
7391}
2a2ab3f9 7392
f88c65f7 7393void
b96a374d 7394ix86_output_addr_vec_elt (FILE *file, int value)
f88c65f7
RH
7395{
7396 const char *directive = ASM_LONG;
7397
7398 if (TARGET_64BIT)
7399 {
7400#ifdef ASM_QUAD
7401 directive = ASM_QUAD;
7402#else
7403 abort ();
7404#endif
7405 }
7406
7407 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7408}
7409
7410void
b96a374d 7411ix86_output_addr_diff_elt (FILE *file, int value, int rel)
f88c65f7
RH
7412{
7413 if (TARGET_64BIT)
74411039 7414 fprintf (file, "%s%s%d-%s%d\n",
f88c65f7
RH
7415 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7416 else if (HAVE_AS_GOTOFF_IN_DATA)
7417 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
b069de3b
SS
7418#if TARGET_MACHO
7419 else if (TARGET_MACHO)
86ecdfb6
AP
7420 {
7421 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
7422 machopic_output_function_base_name (file);
7423 fprintf(file, "\n");
7424 }
b069de3b 7425#endif
f88c65f7 7426 else
5fc0e5df
KW
7427 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7428 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
f88c65f7 7429}
32b5b1aa 7430\f
a8bac9ab
RH
7431/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7432 for the target. */
7433
7434void
b96a374d 7435ix86_expand_clear (rtx dest)
a8bac9ab
RH
7436{
7437 rtx tmp;
7438
7439 /* We play register width games, which are only valid after reload. */
7440 if (!reload_completed)
7441 abort ();
7442
7443 /* Avoid HImode and its attendant prefix byte. */
7444 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7445 dest = gen_rtx_REG (SImode, REGNO (dest));
7446
7447 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7448
7449 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7450 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7451 {
7452 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7453 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7454 }
7455
7456 emit_insn (tmp);
7457}
7458
f996902d
RH
7459/* X is an unchanging MEM. If it is a constant pool reference, return
7460 the constant pool rtx, else NULL. */
7461
8fe75e43 7462rtx
b96a374d 7463maybe_get_pool_constant (rtx x)
f996902d 7464{
69bd9368 7465 x = ix86_delegitimize_address (XEXP (x, 0));
f996902d
RH
7466
7467 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7468 return get_pool_constant (x);
7469
7470 return NULL_RTX;
7471}
7472
79325812 7473void
b96a374d 7474ix86_expand_move (enum machine_mode mode, rtx operands[])
32b5b1aa 7475{
e075ae69 7476 int strict = (reload_in_progress || reload_completed);
74dc3e94
RH
7477 rtx op0, op1;
7478 enum tls_model model;
f996902d
RH
7479
7480 op0 = operands[0];
7481 op1 = operands[1];
7482
d2ad2c8a 7483 if (GET_CODE (op1) == SYMBOL_REF)
f996902d 7484 {
d2ad2c8a
JH
7485 model = SYMBOL_REF_TLS_MODEL (op1);
7486 if (model)
7487 {
7488 op1 = legitimize_tls_address (op1, model, true);
7489 op1 = force_operand (op1, op0);
7490 if (op1 == op0)
7491 return;
7492 }
7493 }
7494 else if (GET_CODE (op1) == CONST
7495 && GET_CODE (XEXP (op1, 0)) == PLUS
7496 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
7497 {
7498 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
7499 if (model)
7500 {
7501 rtx addend = XEXP (XEXP (op1, 0), 1);
7502 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
7503 op1 = force_operand (op1, NULL);
7504 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
7505 op0, 1, OPTAB_DIRECT);
7506 if (op1 == op0)
7507 return;
7508 }
f996902d 7509 }
74dc3e94
RH
7510
7511 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
f996902d 7512 {
b069de3b
SS
7513#if TARGET_MACHO
7514 if (MACHOPIC_PURE)
7515 {
7516 rtx temp = ((reload_in_progress
7517 || ((op0 && GET_CODE (op0) == REG)
7518 && mode == Pmode))
7519 ? op0 : gen_reg_rtx (Pmode));
7520 op1 = machopic_indirect_data_reference (op1, temp);
7521 op1 = machopic_legitimize_pic_address (op1, mode,
7522 temp == op1 ? 0 : temp);
7523 }
74dc3e94
RH
7524 else if (MACHOPIC_INDIRECT)
7525 op1 = machopic_indirect_data_reference (op1, 0);
7526 if (op0 == op1)
7527 return;
7528#else
f996902d
RH
7529 if (GET_CODE (op0) == MEM)
7530 op1 = force_reg (Pmode, op1);
e075ae69 7531 else
b39edae3 7532 op1 = legitimize_address (op1, op1, Pmode);
74dc3e94 7533#endif /* TARGET_MACHO */
e075ae69
RH
7534 }
7535 else
7536 {
f996902d 7537 if (GET_CODE (op0) == MEM
44cf5b6a 7538 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
f996902d
RH
7539 || !push_operand (op0, mode))
7540 && GET_CODE (op1) == MEM)
7541 op1 = force_reg (mode, op1);
e9a25f70 7542
f996902d
RH
7543 if (push_operand (op0, mode)
7544 && ! general_no_elim_operand (op1, mode))
7545 op1 = copy_to_mode_reg (mode, op1);
2c5a510c 7546
44cf5b6a
JH
7547 /* Force large constants in 64bit compilation into register
7548 to get them CSEed. */
7549 if (TARGET_64BIT && mode == DImode
f996902d 7550 && immediate_operand (op1, mode)
8fe75e43 7551 && !x86_64_zext_immediate_operand (op1, VOIDmode)
f996902d 7552 && !register_operand (op0, mode)
44cf5b6a 7553 && optimize && !reload_completed && !reload_in_progress)
f996902d 7554 op1 = copy_to_mode_reg (mode, op1);
44cf5b6a 7555
e075ae69 7556 if (FLOAT_MODE_P (mode))
32b5b1aa 7557 {
d7a29404
JH
7558 /* If we are loading a floating point constant to a register,
7559 force the value to memory now, since we'll get better code
7560 out the back end. */
e075ae69
RH
7561
7562 if (strict)
7563 ;
ddc67067
MM
7564 else if (GET_CODE (op1) == CONST_DOUBLE)
7565 {
7566 op1 = validize_mem (force_const_mem (mode, op1));
7567 if (!register_operand (op0, mode))
7568 {
7569 rtx temp = gen_reg_rtx (mode);
7570 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
7571 emit_move_insn (op0, temp);
7572 return;
7573 }
7574 }
32b5b1aa 7575 }
32b5b1aa 7576 }
e9a25f70 7577
74dc3e94 7578 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
e075ae69 7579}
e9a25f70 7580
e37af218 7581void
b96a374d 7582ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
e37af218 7583{
c38573a8
RH
7584 rtx op0 = operands[0], op1 = operands[1];
7585
e37af218
RH
7586 /* Force constants other than zero into memory. We do not know how
7587 the instructions used to build constants modify the upper 64 bits
7588 of the register, once we have that information we may be able
7589 to handle some of them more efficiently. */
7590 if ((reload_in_progress | reload_completed) == 0
c38573a8
RH
7591 && register_operand (op0, mode)
7592 && CONSTANT_P (op1) && op1 != CONST0_RTX (mode))
7593 op1 = validize_mem (force_const_mem (mode, op1));
e37af218
RH
7594
7595 /* Make operand1 a register if it isn't already. */
f8ca7923 7596 if (!no_new_pseudos
c38573a8
RH
7597 && !register_operand (op0, mode)
7598 && !register_operand (op1, mode))
e37af218 7599 {
c38573a8 7600 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
e37af218
RH
7601 return;
7602 }
7603
c38573a8 7604 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
fce5a9f2 7605}
e37af218 7606
c38573a8
RH
7607/* Implement the movmisalign patterns for SSE. Non-SSE modes go
7608 straight to ix86_expand_vector_move. */
7609
7610void
7611ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
7612{
7613 rtx op0, op1, m;
7614
7615 op0 = operands[0];
7616 op1 = operands[1];
7617
7618 if (MEM_P (op1))
7619 {
7620 /* If we're optimizing for size, movups is the smallest. */
7621 if (optimize_size)
7622 {
7623 op0 = gen_lowpart (V4SFmode, op0);
7624 op1 = gen_lowpart (V4SFmode, op1);
7625 emit_insn (gen_sse_movups (op0, op1));
7626 return;
7627 }
7628
7629 /* ??? If we have typed data, then it would appear that using
7630 movdqu is the only way to get unaligned data loaded with
7631 integer type. */
7632 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
7633 {
7634 op0 = gen_lowpart (V16QImode, op0);
7635 op1 = gen_lowpart (V16QImode, op1);
7636 emit_insn (gen_sse2_movdqu (op0, op1));
7637 return;
7638 }
7639
7640 if (TARGET_SSE2 && mode == V2DFmode)
7641 {
7642 /* When SSE registers are split into halves, we can avoid
7643 writing to the top half twice. */
7644 if (TARGET_SSE_SPLIT_REGS)
7645 {
7646 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
7647 m = adjust_address (op1, DFmode, 0);
7648 emit_insn (gen_sse2_loadlpd (op0, op0, m));
7649 m = adjust_address (op1, DFmode, 8);
7650 emit_insn (gen_sse2_loadhpd (op0, op0, m));
7651 }
7652 else
7653 {
7654 /* ??? Not sure about the best option for the Intel chips.
7655 The following would seem to satisfy; the register is
7656 entirely cleared, breaking the dependency chain. We
7657 then store to the upper half, with a dependency depth
7658 of one. A rumor has it that Intel recommends two movsd
7659 followed by an unpacklpd, but this is unconfirmed. And
7660 given that the dependency depth of the unpacklpd would
7661 still be one, I'm not sure why this would be better. */
7662 m = adjust_address (op1, DFmode, 0);
7663 emit_insn (gen_sse2_loadsd (op0, m));
7664 m = adjust_address (op1, DFmode, 8);
7665 emit_insn (gen_sse2_loadhpd (op0, op0, m));
7666 }
7667 }
7668 else
7669 {
7670 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
7671 emit_move_insn (op0, CONST0_RTX (mode));
7672 else
7673 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
7674
2cdb3148
RH
7675 m = adjust_address (op1, V2SFmode, 0);
7676 emit_insn (gen_sse_loadlps (op0, op0, m));
7677 m = adjust_address (op1, V2SFmode, 8);
7678 emit_insn (gen_sse_loadhps (op0, op0, m));
c38573a8
RH
7679 }
7680 }
7681 else if (MEM_P (op0))
7682 {
7683 /* If we're optimizing for size, movups is the smallest. */
7684 if (optimize_size)
7685 {
7686 op0 = gen_lowpart (V4SFmode, op0);
7687 op1 = gen_lowpart (V4SFmode, op1);
7688 emit_insn (gen_sse_movups (op0, op1));
7689 return;
7690 }
7691
7692 /* ??? Similar to above, only less clear because of quote
7693 typeless stores unquote. */
7694 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
7695 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
7696 {
7697 op0 = gen_lowpart (V16QImode, op0);
7698 op1 = gen_lowpart (V16QImode, op1);
7699 emit_insn (gen_sse2_movdqu (op0, op1));
7700 return;
7701 }
7702
7703 if (TARGET_SSE2 && mode == V2DFmode)
7704 {
7705 m = adjust_address (op0, DFmode, 0);
7706 emit_insn (gen_sse2_storelpd (m, op1));
7707 m = adjust_address (op0, DFmode, 8);
7708 emit_insn (gen_sse2_storehpd (m, op1));
7709 return;
7710 }
7711 else
7712 {
2cdb3148
RH
7713 m = adjust_address (op0, V2SFmode, 0);
7714 emit_insn (gen_sse_storelps (m, op1));
7715 m = adjust_address (op0, V2SFmode, 8);
7716 emit_insn (gen_sse_storehps (m, op1));
c38573a8
RH
7717 return;
7718 }
7719 }
7720 else
7721 gcc_unreachable ();
7722}
7723
7724
ef719a44
RH
7725/* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
7726 destination to use for the operation. If different from the true
7727 destination in operands[0], a copy operation will be required. */
e9a25f70 7728
ef719a44
RH
7729rtx
7730ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
7731 rtx operands[])
e075ae69
RH
7732{
7733 int matching_memory;
ef719a44 7734 rtx src1, src2, dst;
e075ae69
RH
7735
7736 dst = operands[0];
7737 src1 = operands[1];
7738 src2 = operands[2];
7739
7740 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
ec8e098d 7741 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
e075ae69
RH
7742 && (rtx_equal_p (dst, src2)
7743 || immediate_operand (src1, mode)))
7744 {
7745 rtx temp = src1;
7746 src1 = src2;
7747 src2 = temp;
32b5b1aa 7748 }
e9a25f70 7749
e075ae69
RH
7750 /* If the destination is memory, and we do not have matching source
7751 operands, do things in registers. */
7752 matching_memory = 0;
7753 if (GET_CODE (dst) == MEM)
32b5b1aa 7754 {
e075ae69
RH
7755 if (rtx_equal_p (dst, src1))
7756 matching_memory = 1;
ec8e098d 7757 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
e075ae69
RH
7758 && rtx_equal_p (dst, src2))
7759 matching_memory = 2;
7760 else
7761 dst = gen_reg_rtx (mode);
7762 }
0f290768 7763
e075ae69
RH
7764 /* Both source operands cannot be in memory. */
7765 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7766 {
7767 if (matching_memory != 2)
7768 src2 = force_reg (mode, src2);
7769 else
7770 src1 = force_reg (mode, src1);
32b5b1aa 7771 }
e9a25f70 7772
06a964de
JH
7773 /* If the operation is not commutable, source 1 cannot be a constant
7774 or non-matching memory. */
0f290768 7775 if ((CONSTANT_P (src1)
06a964de 7776 || (!matching_memory && GET_CODE (src1) == MEM))
ec8e098d 7777 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
e075ae69 7778 src1 = force_reg (mode, src1);
0f290768 7779
e075ae69 7780 /* If optimizing, copy to regs to improve CSE */
fe577e58 7781 if (optimize && ! no_new_pseudos)
32b5b1aa 7782 {
e075ae69
RH
7783 if (GET_CODE (dst) == MEM)
7784 dst = gen_reg_rtx (mode);
7785 if (GET_CODE (src1) == MEM)
7786 src1 = force_reg (mode, src1);
7787 if (GET_CODE (src2) == MEM)
7788 src2 = force_reg (mode, src2);
32b5b1aa 7789 }
e9a25f70 7790
ef719a44
RH
7791 src1 = operands[1] = src1;
7792 src2 = operands[2] = src2;
7793 return dst;
7794}
7795
7796/* Similarly, but assume that the destination has already been
7797 set up properly. */
7798
7799void
7800ix86_fixup_binary_operands_no_copy (enum rtx_code code,
7801 enum machine_mode mode, rtx operands[])
7802{
7803 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
7804 gcc_assert (dst == operands[0]);
7805}
7806
7807/* Attempt to expand a binary operator. Make the expansion closer to the
7808 actual machine, then just general_operand, which will allow 3 separate
7809 memory references (one output, two input) in a single insn. */
7810
7811void
7812ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
7813 rtx operands[])
7814{
7815 rtx src1, src2, dst, op, clob;
7816
7817 dst = ix86_fixup_binary_operands (code, mode, operands);
7818 src1 = operands[1];
7819 src2 = operands[2];
7820
7821 /* Emit the instruction. */
e075ae69
RH
7822
7823 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7824 if (reload_in_progress)
7825 {
7826 /* Reload doesn't know about the flags register, and doesn't know that
7827 it doesn't want to clobber it. We can only do this with PLUS. */
7828 if (code != PLUS)
7829 abort ();
7830 emit_insn (op);
7831 }
7832 else
32b5b1aa 7833 {
e075ae69
RH
7834 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7835 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
32b5b1aa 7836 }
e9a25f70 7837
e075ae69
RH
7838 /* Fix up the destination if needed. */
7839 if (dst != operands[0])
7840 emit_move_insn (operands[0], dst);
7841}
7842
7843/* Return TRUE or FALSE depending on whether the binary operator meets the
7844 appropriate constraints. */
7845
7846int
b96a374d
AJ
7847ix86_binary_operator_ok (enum rtx_code code,
7848 enum machine_mode mode ATTRIBUTE_UNUSED,
7849 rtx operands[3])
e075ae69
RH
7850{
7851 /* Both source operands cannot be in memory. */
7852 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7853 return 0;
7854 /* If the operation is not commutable, source 1 cannot be a constant. */
ec8e098d 7855 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
e075ae69
RH
7856 return 0;
7857 /* If the destination is memory, we must have a matching source operand. */
7858 if (GET_CODE (operands[0]) == MEM
7859 && ! (rtx_equal_p (operands[0], operands[1])
ec8e098d 7860 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
e075ae69
RH
7861 && rtx_equal_p (operands[0], operands[2]))))
7862 return 0;
06a964de 7863 /* If the operation is not commutable and the source 1 is memory, we must
d6a7951f 7864 have a matching destination. */
06a964de 7865 if (GET_CODE (operands[1]) == MEM
ec8e098d 7866 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
06a964de
JH
7867 && ! rtx_equal_p (operands[0], operands[1]))
7868 return 0;
e075ae69
RH
7869 return 1;
7870}
7871
7872/* Attempt to expand a unary operator. Make the expansion closer to the
7873 actual machine, then just general_operand, which will allow 2 separate
9d81fc27 7874 memory references (one output, one input) in a single insn. */
e075ae69 7875
9d81fc27 7876void
b96a374d
AJ
7877ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
7878 rtx operands[])
e075ae69 7879{
06a964de
JH
7880 int matching_memory;
7881 rtx src, dst, op, clob;
7882
7883 dst = operands[0];
7884 src = operands[1];
e075ae69 7885
06a964de
JH
7886 /* If the destination is memory, and we do not have matching source
7887 operands, do things in registers. */
7888 matching_memory = 0;
7cacf53e 7889 if (MEM_P (dst))
32b5b1aa 7890 {
06a964de
JH
7891 if (rtx_equal_p (dst, src))
7892 matching_memory = 1;
e075ae69 7893 else
06a964de 7894 dst = gen_reg_rtx (mode);
32b5b1aa 7895 }
e9a25f70 7896
06a964de 7897 /* When source operand is memory, destination must match. */
7cacf53e 7898 if (MEM_P (src) && !matching_memory)
06a964de 7899 src = force_reg (mode, src);
0f290768 7900
7cacf53e 7901 /* If optimizing, copy to regs to improve CSE. */
fe577e58 7902 if (optimize && ! no_new_pseudos)
06a964de
JH
7903 {
7904 if (GET_CODE (dst) == MEM)
7905 dst = gen_reg_rtx (mode);
7906 if (GET_CODE (src) == MEM)
7907 src = force_reg (mode, src);
7908 }
7909
7910 /* Emit the instruction. */
7911
7912 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
7913 if (reload_in_progress || code == NOT)
7914 {
7915 /* Reload doesn't know about the flags register, and doesn't know that
7916 it doesn't want to clobber it. */
7917 if (code != NOT)
7918 abort ();
7919 emit_insn (op);
7920 }
7921 else
7922 {
7923 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7924 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7925 }
7926
7927 /* Fix up the destination if needed. */
7928 if (dst != operands[0])
7929 emit_move_insn (operands[0], dst);
e075ae69
RH
7930}
7931
7932/* Return TRUE or FALSE depending on whether the unary operator meets the
7933 appropriate constraints. */
7934
7935int
b96a374d
AJ
7936ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
7937 enum machine_mode mode ATTRIBUTE_UNUSED,
7938 rtx operands[2] ATTRIBUTE_UNUSED)
e075ae69 7939{
06a964de
JH
7940 /* If one of operands is memory, source and destination must match. */
7941 if ((GET_CODE (operands[0]) == MEM
7942 || GET_CODE (operands[1]) == MEM)
7943 && ! rtx_equal_p (operands[0], operands[1]))
7944 return FALSE;
e075ae69
RH
7945 return TRUE;
7946}
7cacf53e
RH
7947
7948/* Generate code for floating point ABS or NEG. */
7949
7950void
7951ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
7952 rtx operands[])
7953{
7954 rtx mask, set, use, clob, dst, src;
7955 bool matching_memory;
7956 bool use_sse = false;
ef719a44
RH
7957 bool vector_mode = VECTOR_MODE_P (mode);
7958 enum machine_mode elt_mode = mode;
7959 enum machine_mode vec_mode = VOIDmode;
7cacf53e 7960
ef719a44
RH
7961 if (vector_mode)
7962 {
7963 elt_mode = GET_MODE_INNER (mode);
7964 vec_mode = mode;
7965 use_sse = true;
7966 }
7cacf53e
RH
7967 if (TARGET_SSE_MATH)
7968 {
7969 if (mode == SFmode)
ef719a44
RH
7970 {
7971 use_sse = true;
7972 vec_mode = V4SFmode;
7973 }
7cacf53e 7974 else if (mode == DFmode && TARGET_SSE2)
ef719a44
RH
7975 {
7976 use_sse = true;
7977 vec_mode = V2DFmode;
7978 }
7cacf53e
RH
7979 }
7980
7981 /* NEG and ABS performed with SSE use bitwise mask operations.
7982 Create the appropriate mask now. */
7983 if (use_sse)
7984 {
7985 HOST_WIDE_INT hi, lo;
7986 int shift = 63;
ef719a44 7987 rtvec v;
7cacf53e
RH
7988
7989 /* Find the sign bit, sign extended to 2*HWI. */
ef719a44 7990 if (elt_mode == SFmode)
7cacf53e
RH
7991 lo = 0x80000000, hi = lo < 0;
7992 else if (HOST_BITS_PER_WIDE_INT >= 64)
7993 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
7994 else
7995 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
7996
7997 /* If we're looking for the absolute value, then we want
7998 the compliment. */
7999 if (code == ABS)
8000 lo = ~lo, hi = ~hi;
8001
8002 /* Force this value into the low part of a fp vector constant. */
8003 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
8004 mask = gen_lowpart (mode, mask);
ef719a44
RH
8005
8006 switch (mode)
8007 {
8008 case SFmode:
8009 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
8010 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
8011 break;
8012
8013 case DFmode:
8014 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
8015 break;
8016
8017 case V4SFmode:
8018 v = gen_rtvec (4, mask, mask, mask, mask);
8019 break;
8020
8021 case V4DFmode:
8022 v = gen_rtvec (2, mask, mask);
8023 break;
8024
8025 default:
8026 gcc_unreachable ();
8027 }
8028
8029 mask = gen_rtx_CONST_VECTOR (vec_mode, v);
8030 mask = force_reg (vec_mode, mask);
7cacf53e
RH
8031 }
8032 else
8033 {
8034 /* When not using SSE, we don't use the mask, but prefer to keep the
8035 same general form of the insn pattern to reduce duplication when
8036 it comes time to split. */
8037 mask = const0_rtx;
8038 }
8039
8040 dst = operands[0];
8041 src = operands[1];
8042
8043 /* If the destination is memory, and we don't have matching source
8044 operands, do things in registers. */
8045 matching_memory = false;
8046 if (MEM_P (dst))
8047 {
8048 if (rtx_equal_p (dst, src) && (!optimize || no_new_pseudos))
8049 matching_memory = true;
8050 else
8051 dst = gen_reg_rtx (mode);
8052 }
8053 if (MEM_P (src) && !matching_memory)
8054 src = force_reg (mode, src);
8055
ef719a44
RH
8056 if (vector_mode)
8057 {
8058 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
8059 set = gen_rtx_SET (VOIDmode, dst, set);
8060 emit_insn (set);
8061 }
8062 else
8063 {
8064 set = gen_rtx_fmt_e (code, mode, src);
8065 set = gen_rtx_SET (VOIDmode, dst, set);
8066 use = gen_rtx_USE (VOIDmode, mask);
8067 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8068 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, set, use, clob)));
8069 }
7cacf53e
RH
8070
8071 if (dst != operands[0])
8072 emit_move_insn (operands[0], dst);
8073}
e075ae69 8074
16189740
RH
8075/* Return TRUE or FALSE depending on whether the first SET in INSN
8076 has source and destination with matching CC modes, and that the
8077 CC mode is at least as constrained as REQ_MODE. */
8078
8079int
b96a374d 8080ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
16189740
RH
8081{
8082 rtx set;
8083 enum machine_mode set_mode;
8084
8085 set = PATTERN (insn);
8086 if (GET_CODE (set) == PARALLEL)
8087 set = XVECEXP (set, 0, 0);
8088 if (GET_CODE (set) != SET)
8089 abort ();
9076b9c1
JH
8090 if (GET_CODE (SET_SRC (set)) != COMPARE)
8091 abort ();
16189740
RH
8092
8093 set_mode = GET_MODE (SET_DEST (set));
8094 switch (set_mode)
8095 {
9076b9c1
JH
8096 case CCNOmode:
8097 if (req_mode != CCNOmode
8098 && (req_mode != CCmode
8099 || XEXP (SET_SRC (set), 1) != const0_rtx))
8100 return 0;
8101 break;
16189740 8102 case CCmode:
9076b9c1 8103 if (req_mode == CCGCmode)
16189740 8104 return 0;
5efb1046 8105 /* FALLTHRU */
9076b9c1
JH
8106 case CCGCmode:
8107 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8108 return 0;
5efb1046 8109 /* FALLTHRU */
9076b9c1 8110 case CCGOCmode:
16189740
RH
8111 if (req_mode == CCZmode)
8112 return 0;
5efb1046 8113 /* FALLTHRU */
16189740
RH
8114 case CCZmode:
8115 break;
8116
8117 default:
8118 abort ();
8119 }
8120
8121 return (GET_MODE (SET_SRC (set)) == set_mode);
8122}
8123
e075ae69
RH
8124/* Generate insn patterns to do an integer compare of OPERANDS. */
8125
8126static rtx
b96a374d 8127ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
e075ae69
RH
8128{
8129 enum machine_mode cmpmode;
8130 rtx tmp, flags;
8131
8132 cmpmode = SELECT_CC_MODE (code, op0, op1);
8133 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8134
8135 /* This is very simple, but making the interface the same as in the
8136 FP case makes the rest of the code easier. */
8137 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8138 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8139
8140 /* Return the test that should be put into the flags user, i.e.
8141 the bcc, scc, or cmov instruction. */
8142 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8143}
8144
3a3677ff
RH
8145/* Figure out whether to use ordered or unordered fp comparisons.
8146 Return the appropriate mode to use. */
e075ae69 8147
b1cdafbb 8148enum machine_mode
b96a374d 8149ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
e075ae69 8150{
9e7adcb3
JH
8151 /* ??? In order to make all comparisons reversible, we do all comparisons
8152 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8153 all forms trapping and nontrapping comparisons, we can make inequality
8154 comparisons trapping again, since it results in better code when using
8155 FCOM based compares. */
8156 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
3a3677ff
RH
8157}
8158
9076b9c1 8159enum machine_mode
b96a374d 8160ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
9076b9c1
JH
8161{
8162 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8163 return ix86_fp_compare_mode (code);
8164 switch (code)
8165 {
8166 /* Only zero flag is needed. */
8167 case EQ: /* ZF=0 */
8168 case NE: /* ZF!=0 */
8169 return CCZmode;
8170 /* Codes needing carry flag. */
265dab10
JH
8171 case GEU: /* CF=0 */
8172 case GTU: /* CF=0 & ZF=0 */
7e08e190
JH
8173 case LTU: /* CF=1 */
8174 case LEU: /* CF=1 | ZF=1 */
265dab10 8175 return CCmode;
9076b9c1
JH
8176 /* Codes possibly doable only with sign flag when
8177 comparing against zero. */
8178 case GE: /* SF=OF or SF=0 */
7e08e190 8179 case LT: /* SF<>OF or SF=1 */
9076b9c1
JH
8180 if (op1 == const0_rtx)
8181 return CCGOCmode;
8182 else
8183 /* For other cases Carry flag is not required. */
8184 return CCGCmode;
8185 /* Codes doable only with sign flag when comparing
8186 against zero, but we miss jump instruction for it
4aae8a9a 8187 so we need to use relational tests against overflow
9076b9c1
JH
8188 that thus needs to be zero. */
8189 case GT: /* ZF=0 & SF=OF */
8190 case LE: /* ZF=1 | SF<>OF */
8191 if (op1 == const0_rtx)
8192 return CCNOmode;
8193 else
8194 return CCGCmode;
7fcd7218
JH
8195 /* strcmp pattern do (use flags) and combine may ask us for proper
8196 mode. */
8197 case USE:
8198 return CCmode;
9076b9c1 8199 default:
0f290768 8200 abort ();
9076b9c1
JH
8201 }
8202}
8203
e129d93a
ILT
8204/* Return the fixed registers used for condition codes. */
8205
8206static bool
8207ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
8208{
8209 *p1 = FLAGS_REG;
8210 *p2 = FPSR_REG;
8211 return true;
8212}
8213
8214/* If two condition code modes are compatible, return a condition code
8215 mode which is compatible with both. Otherwise, return
8216 VOIDmode. */
8217
8218static enum machine_mode
8219ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
8220{
8221 if (m1 == m2)
8222 return m1;
8223
8224 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8225 return VOIDmode;
8226
8227 if ((m1 == CCGCmode && m2 == CCGOCmode)
8228 || (m1 == CCGOCmode && m2 == CCGCmode))
8229 return CCGCmode;
8230
8231 switch (m1)
8232 {
8233 default:
8234 abort ();
8235
8236 case CCmode:
8237 case CCGCmode:
8238 case CCGOCmode:
8239 case CCNOmode:
8240 case CCZmode:
8241 switch (m2)
8242 {
8243 default:
8244 return VOIDmode;
8245
8246 case CCmode:
8247 case CCGCmode:
8248 case CCGOCmode:
8249 case CCNOmode:
8250 case CCZmode:
8251 return CCmode;
8252 }
8253
8254 case CCFPmode:
8255 case CCFPUmode:
8256 /* These are only compatible with themselves, which we already
8257 checked above. */
8258 return VOIDmode;
8259 }
8260}
8261
3a3677ff
RH
8262/* Return true if we should use an FCOMI instruction for this fp comparison. */
8263
a940d8bd 8264int
b96a374d 8265ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
3a3677ff 8266{
9e7adcb3
JH
8267 enum rtx_code swapped_code = swap_condition (code);
8268 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8269 || (ix86_fp_comparison_cost (swapped_code)
8270 == ix86_fp_comparison_fcomi_cost (swapped_code)));
3a3677ff
RH
8271}
8272
0f290768 8273/* Swap, force into registers, or otherwise massage the two operands
3a3677ff 8274 to a fp comparison. The operands are updated in place; the new
d1f87653 8275 comparison code is returned. */
3a3677ff
RH
8276
8277static enum rtx_code
b96a374d 8278ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
3a3677ff
RH
8279{
8280 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8281 rtx op0 = *pop0, op1 = *pop1;
8282 enum machine_mode op_mode = GET_MODE (op0);
89b17498 8283 int is_sse = SSE_REG_P (op0) || SSE_REG_P (op1);
3a3677ff 8284
e075ae69 8285 /* All of the unordered compare instructions only work on registers.
45c8c47f
UB
8286 The same is true of the fcomi compare instructions. The same is
8287 true of the XFmode compare instructions if not comparing with
8288 zero (ftst insn is used in this case). */
3a3677ff 8289
0644b628
JH
8290 if (!is_sse
8291 && (fpcmp_mode == CCFPUmode
45c8c47f
UB
8292 || (op_mode == XFmode
8293 && ! (standard_80387_constant_p (op0) == 1
8294 || standard_80387_constant_p (op1) == 1))
0644b628 8295 || ix86_use_fcomi_compare (code)))
e075ae69 8296 {
3a3677ff
RH
8297 op0 = force_reg (op_mode, op0);
8298 op1 = force_reg (op_mode, op1);
e075ae69
RH
8299 }
8300 else
8301 {
8302 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8303 things around if they appear profitable, otherwise force op0
8304 into a register. */
8305
8306 if (standard_80387_constant_p (op0) == 0
8307 || (GET_CODE (op0) == MEM
8308 && ! (standard_80387_constant_p (op1) == 0
8309 || GET_CODE (op1) == MEM)))
32b5b1aa 8310 {
e075ae69
RH
8311 rtx tmp;
8312 tmp = op0, op0 = op1, op1 = tmp;
8313 code = swap_condition (code);
8314 }
8315
8316 if (GET_CODE (op0) != REG)
3a3677ff 8317 op0 = force_reg (op_mode, op0);
e075ae69
RH
8318
8319 if (CONSTANT_P (op1))
8320 {
45c8c47f
UB
8321 int tmp = standard_80387_constant_p (op1);
8322 if (tmp == 0)
3a3677ff 8323 op1 = validize_mem (force_const_mem (op_mode, op1));
45c8c47f
UB
8324 else if (tmp == 1)
8325 {
8326 if (TARGET_CMOVE)
8327 op1 = force_reg (op_mode, op1);
8328 }
8329 else
8330 op1 = force_reg (op_mode, op1);
32b5b1aa
SC
8331 }
8332 }
e9a25f70 8333
9e7adcb3
JH
8334 /* Try to rearrange the comparison to make it cheaper. */
8335 if (ix86_fp_comparison_cost (code)
8336 > ix86_fp_comparison_cost (swap_condition (code))
558740bf 8337 && (GET_CODE (op1) == REG || !no_new_pseudos))
9e7adcb3
JH
8338 {
8339 rtx tmp;
8340 tmp = op0, op0 = op1, op1 = tmp;
8341 code = swap_condition (code);
8342 if (GET_CODE (op0) != REG)
8343 op0 = force_reg (op_mode, op0);
8344 }
8345
3a3677ff
RH
8346 *pop0 = op0;
8347 *pop1 = op1;
8348 return code;
8349}
8350
c0c102a9
JH
8351/* Convert comparison codes we use to represent FP comparison to integer
8352 code that will result in proper branch. Return UNKNOWN if no such code
8353 is available. */
8fe75e43
RH
8354
8355enum rtx_code
b96a374d 8356ix86_fp_compare_code_to_integer (enum rtx_code code)
c0c102a9
JH
8357{
8358 switch (code)
8359 {
8360 case GT:
8361 return GTU;
8362 case GE:
8363 return GEU;
8364 case ORDERED:
8365 case UNORDERED:
8366 return code;
8367 break;
8368 case UNEQ:
8369 return EQ;
8370 break;
8371 case UNLT:
8372 return LTU;
8373 break;
8374 case UNLE:
8375 return LEU;
8376 break;
8377 case LTGT:
8378 return NE;
8379 break;
8380 default:
8381 return UNKNOWN;
8382 }
8383}
8384
8385/* Split comparison code CODE into comparisons we can do using branch
8386 instructions. BYPASS_CODE is comparison code for branch that will
8387 branch around FIRST_CODE and SECOND_CODE. If some of branches
f822d252 8388 is not required, set value to UNKNOWN.
c0c102a9 8389 We never require more than two branches. */
8fe75e43
RH
8390
8391void
b96a374d
AJ
8392ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8393 enum rtx_code *first_code,
8394 enum rtx_code *second_code)
c0c102a9
JH
8395{
8396 *first_code = code;
f822d252
ZW
8397 *bypass_code = UNKNOWN;
8398 *second_code = UNKNOWN;
c0c102a9
JH
8399
8400 /* The fcomi comparison sets flags as follows:
8401
8402 cmp ZF PF CF
8403 > 0 0 0
8404 < 0 0 1
8405 = 1 0 0
8406 un 1 1 1 */
8407
8408 switch (code)
8409 {
8410 case GT: /* GTU - CF=0 & ZF=0 */
8411 case GE: /* GEU - CF=0 */
8412 case ORDERED: /* PF=0 */
8413 case UNORDERED: /* PF=1 */
8414 case UNEQ: /* EQ - ZF=1 */
8415 case UNLT: /* LTU - CF=1 */
8416 case UNLE: /* LEU - CF=1 | ZF=1 */
8417 case LTGT: /* EQ - ZF=0 */
8418 break;
8419 case LT: /* LTU - CF=1 - fails on unordered */
8420 *first_code = UNLT;
8421 *bypass_code = UNORDERED;
8422 break;
8423 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8424 *first_code = UNLE;
8425 *bypass_code = UNORDERED;
8426 break;
8427 case EQ: /* EQ - ZF=1 - fails on unordered */
8428 *first_code = UNEQ;
8429 *bypass_code = UNORDERED;
8430 break;
8431 case NE: /* NE - ZF=0 - fails on unordered */
8432 *first_code = LTGT;
8433 *second_code = UNORDERED;
8434 break;
8435 case UNGE: /* GEU - CF=0 - fails on unordered */
8436 *first_code = GE;
8437 *second_code = UNORDERED;
8438 break;
8439 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8440 *first_code = GT;
8441 *second_code = UNORDERED;
8442 break;
8443 default:
8444 abort ();
8445 }
8446 if (!TARGET_IEEE_FP)
8447 {
f822d252
ZW
8448 *second_code = UNKNOWN;
8449 *bypass_code = UNKNOWN;
c0c102a9
JH
8450 }
8451}
8452
9e7adcb3 8453/* Return cost of comparison done fcom + arithmetics operations on AX.
5bdc5878 8454 All following functions do use number of instructions as a cost metrics.
9e7adcb3
JH
8455 In future this should be tweaked to compute bytes for optimize_size and
8456 take into account performance of various instructions on various CPUs. */
8457static int
b96a374d 8458ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
9e7adcb3
JH
8459{
8460 if (!TARGET_IEEE_FP)
8461 return 4;
8462 /* The cost of code output by ix86_expand_fp_compare. */
8463 switch (code)
8464 {
8465 case UNLE:
8466 case UNLT:
8467 case LTGT:
8468 case GT:
8469 case GE:
8470 case UNORDERED:
8471 case ORDERED:
8472 case UNEQ:
8473 return 4;
8474 break;
8475 case LT:
8476 case NE:
8477 case EQ:
8478 case UNGE:
8479 return 5;
8480 break;
8481 case LE:
8482 case UNGT:
8483 return 6;
8484 break;
8485 default:
8486 abort ();
8487 }
8488}
8489
8490/* Return cost of comparison done using fcomi operation.
8491 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8492static int
b96a374d 8493ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9e7adcb3
JH
8494{
8495 enum rtx_code bypass_code, first_code, second_code;
d1f87653 8496 /* Return arbitrarily high cost when instruction is not supported - this
9e7adcb3
JH
8497 prevents gcc from using it. */
8498 if (!TARGET_CMOVE)
8499 return 1024;
8500 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
f822d252 8501 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
9e7adcb3
JH
8502}
8503
8504/* Return cost of comparison done using sahf operation.
8505 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8506static int
b96a374d 8507ix86_fp_comparison_sahf_cost (enum rtx_code code)
9e7adcb3
JH
8508{
8509 enum rtx_code bypass_code, first_code, second_code;
d1f87653 8510 /* Return arbitrarily high cost when instruction is not preferred - this
9e7adcb3
JH
8511 avoids gcc from using it. */
8512 if (!TARGET_USE_SAHF && !optimize_size)
8513 return 1024;
8514 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
f822d252 8515 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
9e7adcb3
JH
8516}
8517
8518/* Compute cost of the comparison done using any method.
8519 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8520static int
b96a374d 8521ix86_fp_comparison_cost (enum rtx_code code)
9e7adcb3
JH
8522{
8523 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8524 int min;
8525
8526 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8527 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8528
8529 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8530 if (min > sahf_cost)
8531 min = sahf_cost;
8532 if (min > fcomi_cost)
8533 min = fcomi_cost;
8534 return min;
8535}
c0c102a9 8536
3a3677ff
RH
8537/* Generate insn patterns to do a floating point compare of OPERANDS. */
8538
9e7adcb3 8539static rtx
b96a374d
AJ
8540ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
8541 rtx *second_test, rtx *bypass_test)
3a3677ff
RH
8542{
8543 enum machine_mode fpcmp_mode, intcmp_mode;
c0c102a9 8544 rtx tmp, tmp2;
9e7adcb3 8545 int cost = ix86_fp_comparison_cost (code);
c0c102a9 8546 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
8547
8548 fpcmp_mode = ix86_fp_compare_mode (code);
8549 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8550
9e7adcb3
JH
8551 if (second_test)
8552 *second_test = NULL_RTX;
8553 if (bypass_test)
8554 *bypass_test = NULL_RTX;
8555
c0c102a9
JH
8556 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8557
9e7adcb3 8558 /* Do fcomi/sahf based test when profitable. */
f822d252
ZW
8559 if ((bypass_code == UNKNOWN || bypass_test)
8560 && (second_code == UNKNOWN || second_test)
9e7adcb3 8561 && ix86_fp_comparison_arithmetics_cost (code) > cost)
32b5b1aa 8562 {
c0c102a9
JH
8563 if (TARGET_CMOVE)
8564 {
8565 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8566 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8567 tmp);
8568 emit_insn (tmp);
8569 }
8570 else
8571 {
8572 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 8573 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
8574 if (!scratch)
8575 scratch = gen_reg_rtx (HImode);
c0c102a9
JH
8576 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8577 emit_insn (gen_x86_sahf_1 (scratch));
8578 }
e075ae69
RH
8579
8580 /* The FP codes work out to act like unsigned. */
9a915772 8581 intcmp_mode = fpcmp_mode;
9e7adcb3 8582 code = first_code;
f822d252 8583 if (bypass_code != UNKNOWN)
9e7adcb3
JH
8584 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8585 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8586 const0_rtx);
f822d252 8587 if (second_code != UNKNOWN)
9e7adcb3
JH
8588 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8589 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8590 const0_rtx);
e075ae69
RH
8591 }
8592 else
8593 {
8594 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
e075ae69 8595 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 8596 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
8597 if (!scratch)
8598 scratch = gen_reg_rtx (HImode);
3a3677ff 8599 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
e075ae69 8600
9a915772
JH
8601 /* In the unordered case, we have to check C2 for NaN's, which
8602 doesn't happen to work out to anything nice combination-wise.
8603 So do some bit twiddling on the value we've got in AH to come
8604 up with an appropriate set of condition codes. */
e075ae69 8605
9a915772
JH
8606 intcmp_mode = CCNOmode;
8607 switch (code)
32b5b1aa 8608 {
9a915772
JH
8609 case GT:
8610 case UNGT:
8611 if (code == GT || !TARGET_IEEE_FP)
32b5b1aa 8612 {
3a3677ff 8613 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
e075ae69 8614 code = EQ;
9a915772
JH
8615 }
8616 else
8617 {
8618 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8619 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8620 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8621 intcmp_mode = CCmode;
8622 code = GEU;
8623 }
8624 break;
8625 case LT:
8626 case UNLT:
8627 if (code == LT && TARGET_IEEE_FP)
8628 {
3a3677ff
RH
8629 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8630 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
e075ae69
RH
8631 intcmp_mode = CCmode;
8632 code = EQ;
9a915772
JH
8633 }
8634 else
8635 {
8636 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8637 code = NE;
8638 }
8639 break;
8640 case GE:
8641 case UNGE:
8642 if (code == GE || !TARGET_IEEE_FP)
8643 {
3a3677ff 8644 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
e075ae69 8645 code = EQ;
9a915772
JH
8646 }
8647 else
8648 {
8649 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8650 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8651 GEN_INT (0x01)));
8652 code = NE;
8653 }
8654 break;
8655 case LE:
8656 case UNLE:
8657 if (code == LE && TARGET_IEEE_FP)
8658 {
3a3677ff
RH
8659 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8660 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8661 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
8662 intcmp_mode = CCmode;
8663 code = LTU;
9a915772
JH
8664 }
8665 else
8666 {
8667 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8668 code = NE;
8669 }
8670 break;
8671 case EQ:
8672 case UNEQ:
8673 if (code == EQ && TARGET_IEEE_FP)
8674 {
3a3677ff
RH
8675 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8676 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
8677 intcmp_mode = CCmode;
8678 code = EQ;
9a915772
JH
8679 }
8680 else
8681 {
3a3677ff
RH
8682 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8683 code = NE;
8684 break;
9a915772
JH
8685 }
8686 break;
8687 case NE:
8688 case LTGT:
8689 if (code == NE && TARGET_IEEE_FP)
8690 {
3a3677ff 8691 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9a915772
JH
8692 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8693 GEN_INT (0x40)));
3a3677ff 8694 code = NE;
9a915772
JH
8695 }
8696 else
8697 {
3a3677ff
RH
8698 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8699 code = EQ;
32b5b1aa 8700 }
9a915772
JH
8701 break;
8702
8703 case UNORDERED:
8704 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8705 code = NE;
8706 break;
8707 case ORDERED:
8708 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8709 code = EQ;
8710 break;
8711
8712 default:
8713 abort ();
32b5b1aa 8714 }
32b5b1aa 8715 }
e075ae69
RH
8716
8717 /* Return the test that should be put into the flags user, i.e.
8718 the bcc, scc, or cmov instruction. */
8719 return gen_rtx_fmt_ee (code, VOIDmode,
8720 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8721 const0_rtx);
8722}
8723
9e3e266c 8724rtx
b96a374d 8725ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
e075ae69
RH
8726{
8727 rtx op0, op1, ret;
8728 op0 = ix86_compare_op0;
8729 op1 = ix86_compare_op1;
8730
a1b8572c
JH
8731 if (second_test)
8732 *second_test = NULL_RTX;
8733 if (bypass_test)
8734 *bypass_test = NULL_RTX;
8735
e075ae69 8736 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
bf71a4f8 8737 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
77ebd435 8738 second_test, bypass_test);
32b5b1aa 8739 else
e075ae69
RH
8740 ret = ix86_expand_int_compare (code, op0, op1);
8741
8742 return ret;
8743}
8744
03598dea
JH
8745/* Return true if the CODE will result in nontrivial jump sequence. */
8746bool
b96a374d 8747ix86_fp_jump_nontrivial_p (enum rtx_code code)
03598dea
JH
8748{
8749 enum rtx_code bypass_code, first_code, second_code;
8750 if (!TARGET_CMOVE)
8751 return true;
8752 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
f822d252 8753 return bypass_code != UNKNOWN || second_code != UNKNOWN;
03598dea
JH
8754}
8755
e075ae69 8756void
b96a374d 8757ix86_expand_branch (enum rtx_code code, rtx label)
e075ae69 8758{
3a3677ff 8759 rtx tmp;
e075ae69 8760
3a3677ff 8761 switch (GET_MODE (ix86_compare_op0))
32b5b1aa 8762 {
3a3677ff
RH
8763 case QImode:
8764 case HImode:
8765 case SImode:
0d7d98ee 8766 simple:
a1b8572c 8767 tmp = ix86_expand_compare (code, NULL, NULL);
e075ae69
RH
8768 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8769 gen_rtx_LABEL_REF (VOIDmode, label),
8770 pc_rtx);
8771 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32b5b1aa 8772 return;
e075ae69 8773
3a3677ff
RH
8774 case SFmode:
8775 case DFmode:
0f290768 8776 case XFmode:
3a3677ff
RH
8777 {
8778 rtvec vec;
8779 int use_fcomi;
03598dea 8780 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
8781
8782 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8783 &ix86_compare_op1);
fce5a9f2 8784
03598dea
JH
8785 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8786
8787 /* Check whether we will use the natural sequence with one jump. If
8788 so, we can expand jump early. Otherwise delay expansion by
8789 creating compound insn to not confuse optimizers. */
f822d252 8790 if (bypass_code == UNKNOWN && second_code == UNKNOWN
03598dea
JH
8791 && TARGET_CMOVE)
8792 {
8793 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8794 gen_rtx_LABEL_REF (VOIDmode, label),
7c82106f 8795 pc_rtx, NULL_RTX, NULL_RTX);
03598dea
JH
8796 }
8797 else
8798 {
8799 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8800 ix86_compare_op0, ix86_compare_op1);
8801 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8802 gen_rtx_LABEL_REF (VOIDmode, label),
8803 pc_rtx);
8804 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8805
8806 use_fcomi = ix86_use_fcomi_compare (code);
8807 vec = rtvec_alloc (3 + !use_fcomi);
8808 RTVEC_ELT (vec, 0) = tmp;
8809 RTVEC_ELT (vec, 1)
8810 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8811 RTVEC_ELT (vec, 2)
8812 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8813 if (! use_fcomi)
8814 RTVEC_ELT (vec, 3)
8815 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8816
8817 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8818 }
3a3677ff
RH
8819 return;
8820 }
32b5b1aa 8821
3a3677ff 8822 case DImode:
0d7d98ee
JH
8823 if (TARGET_64BIT)
8824 goto simple;
3a3677ff
RH
8825 /* Expand DImode branch into multiple compare+branch. */
8826 {
8827 rtx lo[2], hi[2], label2;
8828 enum rtx_code code1, code2, code3;
32b5b1aa 8829
3a3677ff
RH
8830 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
8831 {
8832 tmp = ix86_compare_op0;
8833 ix86_compare_op0 = ix86_compare_op1;
8834 ix86_compare_op1 = tmp;
8835 code = swap_condition (code);
8836 }
8837 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
8838 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
32b5b1aa 8839
3a3677ff
RH
8840 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8841 avoid two branches. This costs one extra insn, so disable when
8842 optimizing for size. */
32b5b1aa 8843
3a3677ff
RH
8844 if ((code == EQ || code == NE)
8845 && (!optimize_size
8846 || hi[1] == const0_rtx || lo[1] == const0_rtx))
8847 {
8848 rtx xor0, xor1;
32b5b1aa 8849
3a3677ff
RH
8850 xor1 = hi[0];
8851 if (hi[1] != const0_rtx)
8852 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
8853 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 8854
3a3677ff
RH
8855 xor0 = lo[0];
8856 if (lo[1] != const0_rtx)
8857 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
8858 NULL_RTX, 0, OPTAB_WIDEN);
e075ae69 8859
3a3677ff
RH
8860 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
8861 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 8862
3a3677ff
RH
8863 ix86_compare_op0 = tmp;
8864 ix86_compare_op1 = const0_rtx;
8865 ix86_expand_branch (code, label);
8866 return;
8867 }
e075ae69 8868
1f9124e4
JJ
8869 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8870 op1 is a constant and the low word is zero, then we can just
8871 examine the high word. */
32b5b1aa 8872
1f9124e4
JJ
8873 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
8874 switch (code)
8875 {
8876 case LT: case LTU: case GE: case GEU:
8877 ix86_compare_op0 = hi[0];
8878 ix86_compare_op1 = hi[1];
8879 ix86_expand_branch (code, label);
8880 return;
8881 default:
8882 break;
8883 }
e075ae69 8884
3a3677ff 8885 /* Otherwise, we need two or three jumps. */
e075ae69 8886
3a3677ff 8887 label2 = gen_label_rtx ();
e075ae69 8888
3a3677ff
RH
8889 code1 = code;
8890 code2 = swap_condition (code);
8891 code3 = unsigned_condition (code);
e075ae69 8892
3a3677ff
RH
8893 switch (code)
8894 {
8895 case LT: case GT: case LTU: case GTU:
8896 break;
e075ae69 8897
3a3677ff
RH
8898 case LE: code1 = LT; code2 = GT; break;
8899 case GE: code1 = GT; code2 = LT; break;
8900 case LEU: code1 = LTU; code2 = GTU; break;
8901 case GEU: code1 = GTU; code2 = LTU; break;
e075ae69 8902
f822d252
ZW
8903 case EQ: code1 = UNKNOWN; code2 = NE; break;
8904 case NE: code2 = UNKNOWN; break;
e075ae69 8905
3a3677ff
RH
8906 default:
8907 abort ();
8908 }
e075ae69 8909
3a3677ff
RH
8910 /*
8911 * a < b =>
8912 * if (hi(a) < hi(b)) goto true;
8913 * if (hi(a) > hi(b)) goto false;
8914 * if (lo(a) < lo(b)) goto true;
8915 * false:
8916 */
8917
8918 ix86_compare_op0 = hi[0];
8919 ix86_compare_op1 = hi[1];
8920
f822d252 8921 if (code1 != UNKNOWN)
3a3677ff 8922 ix86_expand_branch (code1, label);
f822d252 8923 if (code2 != UNKNOWN)
3a3677ff
RH
8924 ix86_expand_branch (code2, label2);
8925
8926 ix86_compare_op0 = lo[0];
8927 ix86_compare_op1 = lo[1];
8928 ix86_expand_branch (code3, label);
8929
f822d252 8930 if (code2 != UNKNOWN)
3a3677ff
RH
8931 emit_label (label2);
8932 return;
8933 }
e075ae69 8934
3a3677ff
RH
8935 default:
8936 abort ();
8937 }
32b5b1aa 8938}
e075ae69 8939
9e7adcb3
JH
8940/* Split branch based on floating point condition. */
8941void
b96a374d 8942ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
7c82106f 8943 rtx target1, rtx target2, rtx tmp, rtx pushed)
9e7adcb3
JH
8944{
8945 rtx second, bypass;
8946 rtx label = NULL_RTX;
03598dea 8947 rtx condition;
6b24c259
JH
8948 int bypass_probability = -1, second_probability = -1, probability = -1;
8949 rtx i;
9e7adcb3
JH
8950
8951 if (target2 != pc_rtx)
8952 {
8953 rtx tmp = target2;
8954 code = reverse_condition_maybe_unordered (code);
8955 target2 = target1;
8956 target1 = tmp;
8957 }
8958
8959 condition = ix86_expand_fp_compare (code, op1, op2,
8960 tmp, &second, &bypass);
6b24c259 8961
7c82106f
UB
8962 /* Remove pushed operand from stack. */
8963 if (pushed)
8964 ix86_free_from_memory (GET_MODE (pushed));
8965
6b24c259
JH
8966 if (split_branch_probability >= 0)
8967 {
8968 /* Distribute the probabilities across the jumps.
8969 Assume the BYPASS and SECOND to be always test
8970 for UNORDERED. */
8971 probability = split_branch_probability;
8972
d6a7951f 8973 /* Value of 1 is low enough to make no need for probability
6b24c259
JH
8974 to be updated. Later we may run some experiments and see
8975 if unordered values are more frequent in practice. */
8976 if (bypass)
8977 bypass_probability = 1;
8978 if (second)
8979 second_probability = 1;
8980 }
9e7adcb3
JH
8981 if (bypass != NULL_RTX)
8982 {
8983 label = gen_label_rtx ();
6b24c259
JH
8984 i = emit_jump_insn (gen_rtx_SET
8985 (VOIDmode, pc_rtx,
8986 gen_rtx_IF_THEN_ELSE (VOIDmode,
8987 bypass,
8988 gen_rtx_LABEL_REF (VOIDmode,
8989 label),
8990 pc_rtx)));
8991 if (bypass_probability >= 0)
8992 REG_NOTES (i)
8993 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8994 GEN_INT (bypass_probability),
8995 REG_NOTES (i));
8996 }
8997 i = emit_jump_insn (gen_rtx_SET
9e7adcb3
JH
8998 (VOIDmode, pc_rtx,
8999 gen_rtx_IF_THEN_ELSE (VOIDmode,
6b24c259
JH
9000 condition, target1, target2)));
9001 if (probability >= 0)
9002 REG_NOTES (i)
9003 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9004 GEN_INT (probability),
9005 REG_NOTES (i));
9006 if (second != NULL_RTX)
9e7adcb3 9007 {
6b24c259
JH
9008 i = emit_jump_insn (gen_rtx_SET
9009 (VOIDmode, pc_rtx,
9010 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9011 target2)));
9012 if (second_probability >= 0)
9013 REG_NOTES (i)
9014 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9015 GEN_INT (second_probability),
9016 REG_NOTES (i));
9e7adcb3 9017 }
9e7adcb3
JH
9018 if (label != NULL_RTX)
9019 emit_label (label);
9020}
9021
32b5b1aa 9022int
b96a374d 9023ix86_expand_setcc (enum rtx_code code, rtx dest)
32b5b1aa 9024{
3a627503 9025 rtx ret, tmp, tmpreg, equiv;
a1b8572c 9026 rtx second_test, bypass_test;
e075ae69 9027
885a70fd
JH
9028 if (GET_MODE (ix86_compare_op0) == DImode
9029 && !TARGET_64BIT)
e075ae69
RH
9030 return 0; /* FAIL */
9031
b932f770
JH
9032 if (GET_MODE (dest) != QImode)
9033 abort ();
e075ae69 9034
a1b8572c 9035 ret = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
9036 PUT_MODE (ret, QImode);
9037
9038 tmp = dest;
a1b8572c 9039 tmpreg = dest;
32b5b1aa 9040
e075ae69 9041 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
a1b8572c
JH
9042 if (bypass_test || second_test)
9043 {
9044 rtx test = second_test;
9045 int bypass = 0;
9046 rtx tmp2 = gen_reg_rtx (QImode);
9047 if (bypass_test)
9048 {
9049 if (second_test)
b531087a 9050 abort ();
a1b8572c
JH
9051 test = bypass_test;
9052 bypass = 1;
9053 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9054 }
9055 PUT_MODE (test, QImode);
9056 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9057
9058 if (bypass)
9059 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9060 else
9061 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9062 }
e075ae69 9063
3a627503
RS
9064 /* Attach a REG_EQUAL note describing the comparison result. */
9065 equiv = simplify_gen_relational (code, QImode,
9066 GET_MODE (ix86_compare_op0),
9067 ix86_compare_op0, ix86_compare_op1);
9068 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9069
e075ae69 9070 return 1; /* DONE */
32b5b1aa 9071}
e075ae69 9072
c35d187f
RH
9073/* Expand comparison setting or clearing carry flag. Return true when
9074 successful and set pop for the operation. */
9075static bool
b96a374d 9076ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
4977bab6
ZW
9077{
9078 enum machine_mode mode =
9079 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9080
9081 /* Do not handle DImode compares that go trought special path. Also we can't
43f3a59d 9082 deal with FP compares yet. This is possible to add. */
e6e81735
JH
9083 if ((mode == DImode && !TARGET_64BIT))
9084 return false;
9085 if (FLOAT_MODE_P (mode))
9086 {
9087 rtx second_test = NULL, bypass_test = NULL;
9088 rtx compare_op, compare_seq;
9089
9090 /* Shortcut: following common codes never translate into carry flag compares. */
9091 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9092 || code == ORDERED || code == UNORDERED)
9093 return false;
9094
9095 /* These comparisons require zero flag; swap operands so they won't. */
9096 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9097 && !TARGET_IEEE_FP)
9098 {
9099 rtx tmp = op0;
9100 op0 = op1;
9101 op1 = tmp;
9102 code = swap_condition (code);
9103 }
9104
c51e6d85
KH
9105 /* Try to expand the comparison and verify that we end up with carry flag
9106 based comparison. This is fails to be true only when we decide to expand
9107 comparison using arithmetic that is not too common scenario. */
e6e81735
JH
9108 start_sequence ();
9109 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9110 &second_test, &bypass_test);
9111 compare_seq = get_insns ();
9112 end_sequence ();
9113
9114 if (second_test || bypass_test)
9115 return false;
9116 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9117 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9118 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9119 else
9120 code = GET_CODE (compare_op);
9121 if (code != LTU && code != GEU)
9122 return false;
9123 emit_insn (compare_seq);
9124 *pop = compare_op;
9125 return true;
9126 }
9127 if (!INTEGRAL_MODE_P (mode))
4977bab6
ZW
9128 return false;
9129 switch (code)
9130 {
9131 case LTU:
9132 case GEU:
9133 break;
9134
9135 /* Convert a==0 into (unsigned)a<1. */
9136 case EQ:
9137 case NE:
9138 if (op1 != const0_rtx)
9139 return false;
9140 op1 = const1_rtx;
9141 code = (code == EQ ? LTU : GEU);
9142 break;
9143
9144 /* Convert a>b into b<a or a>=b-1. */
9145 case GTU:
9146 case LEU:
9147 if (GET_CODE (op1) == CONST_INT)
9148 {
9149 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9150 /* Bail out on overflow. We still can swap operands but that
43f3a59d 9151 would force loading of the constant into register. */
4977bab6
ZW
9152 if (op1 == const0_rtx
9153 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9154 return false;
9155 code = (code == GTU ? GEU : LTU);
9156 }
9157 else
9158 {
9159 rtx tmp = op1;
9160 op1 = op0;
9161 op0 = tmp;
9162 code = (code == GTU ? LTU : GEU);
9163 }
9164 break;
9165
ccea753c 9166 /* Convert a>=0 into (unsigned)a<0x80000000. */
4977bab6
ZW
9167 case LT:
9168 case GE:
9169 if (mode == DImode || op1 != const0_rtx)
9170 return false;
ccea753c 9171 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
4977bab6
ZW
9172 code = (code == LT ? GEU : LTU);
9173 break;
9174 case LE:
9175 case GT:
9176 if (mode == DImode || op1 != constm1_rtx)
9177 return false;
ccea753c 9178 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
4977bab6
ZW
9179 code = (code == LE ? GEU : LTU);
9180 break;
9181
9182 default:
9183 return false;
9184 }
ebe75517
JH
9185 /* Swapping operands may cause constant to appear as first operand. */
9186 if (!nonimmediate_operand (op0, VOIDmode))
9187 {
9188 if (no_new_pseudos)
9189 return false;
9190 op0 = force_reg (mode, op0);
9191 }
4977bab6
ZW
9192 ix86_compare_op0 = op0;
9193 ix86_compare_op1 = op1;
9194 *pop = ix86_expand_compare (code, NULL, NULL);
9195 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9196 abort ();
9197 return true;
9198}
9199
32b5b1aa 9200int
b96a374d 9201ix86_expand_int_movcc (rtx operands[])
32b5b1aa 9202{
e075ae69
RH
9203 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9204 rtx compare_seq, compare_op;
a1b8572c 9205 rtx second_test, bypass_test;
635559ab 9206 enum machine_mode mode = GET_MODE (operands[0]);
4977bab6 9207 bool sign_bit_compare_p = false;;
3a3677ff 9208
e075ae69 9209 start_sequence ();
a1b8572c 9210 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
2f937369 9211 compare_seq = get_insns ();
e075ae69
RH
9212 end_sequence ();
9213
9214 compare_code = GET_CODE (compare_op);
9215
4977bab6
ZW
9216 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9217 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9218 sign_bit_compare_p = true;
9219
e075ae69
RH
9220 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9221 HImode insns, we'd be swallowed in word prefix ops. */
9222
4977bab6 9223 if ((mode != HImode || TARGET_FAST_PREFIX)
635559ab 9224 && (mode != DImode || TARGET_64BIT)
0f290768 9225 && GET_CODE (operands[2]) == CONST_INT
e075ae69
RH
9226 && GET_CODE (operands[3]) == CONST_INT)
9227 {
9228 rtx out = operands[0];
9229 HOST_WIDE_INT ct = INTVAL (operands[2]);
9230 HOST_WIDE_INT cf = INTVAL (operands[3]);
9231 HOST_WIDE_INT diff;
9232
4977bab6
ZW
9233 diff = ct - cf;
9234 /* Sign bit compares are better done using shifts than we do by using
b96a374d 9235 sbb. */
4977bab6
ZW
9236 if (sign_bit_compare_p
9237 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9238 ix86_compare_op1, &compare_op))
e075ae69 9239 {
e075ae69
RH
9240 /* Detect overlap between destination and compare sources. */
9241 rtx tmp = out;
9242
4977bab6 9243 if (!sign_bit_compare_p)
36583fea 9244 {
e6e81735
JH
9245 bool fpcmp = false;
9246
4977bab6
ZW
9247 compare_code = GET_CODE (compare_op);
9248
e6e81735
JH
9249 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9250 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9251 {
9252 fpcmp = true;
9253 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9254 }
9255
4977bab6
ZW
9256 /* To simplify rest of code, restrict to the GEU case. */
9257 if (compare_code == LTU)
9258 {
9259 HOST_WIDE_INT tmp = ct;
9260 ct = cf;
9261 cf = tmp;
9262 compare_code = reverse_condition (compare_code);
9263 code = reverse_condition (code);
9264 }
e6e81735
JH
9265 else
9266 {
9267 if (fpcmp)
9268 PUT_CODE (compare_op,
9269 reverse_condition_maybe_unordered
9270 (GET_CODE (compare_op)));
9271 else
9272 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9273 }
4977bab6 9274 diff = ct - cf;
36583fea 9275
4977bab6
ZW
9276 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9277 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9278 tmp = gen_reg_rtx (mode);
e075ae69 9279
4977bab6 9280 if (mode == DImode)
e6e81735 9281 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
4977bab6 9282 else
e6e81735 9283 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
4977bab6 9284 }
14f73b5a 9285 else
4977bab6
ZW
9286 {
9287 if (code == GT || code == GE)
9288 code = reverse_condition (code);
9289 else
9290 {
9291 HOST_WIDE_INT tmp = ct;
9292 ct = cf;
9293 cf = tmp;
5fb48685 9294 diff = ct - cf;
4977bab6
ZW
9295 }
9296 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9297 ix86_compare_op1, VOIDmode, 0, -1);
9298 }
e075ae69 9299
36583fea
JH
9300 if (diff == 1)
9301 {
9302 /*
9303 * cmpl op0,op1
9304 * sbbl dest,dest
9305 * [addl dest, ct]
9306 *
9307 * Size 5 - 8.
9308 */
9309 if (ct)
b96a374d 9310 tmp = expand_simple_binop (mode, PLUS,
635559ab 9311 tmp, GEN_INT (ct),
4977bab6 9312 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
9313 }
9314 else if (cf == -1)
9315 {
9316 /*
9317 * cmpl op0,op1
9318 * sbbl dest,dest
9319 * orl $ct, dest
9320 *
9321 * Size 8.
9322 */
635559ab
JH
9323 tmp = expand_simple_binop (mode, IOR,
9324 tmp, GEN_INT (ct),
4977bab6 9325 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
9326 }
9327 else if (diff == -1 && ct)
9328 {
9329 /*
9330 * cmpl op0,op1
9331 * sbbl dest,dest
06ec023f 9332 * notl dest
36583fea
JH
9333 * [addl dest, cf]
9334 *
9335 * Size 8 - 11.
9336 */
4977bab6 9337 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
635559ab 9338 if (cf)
b96a374d 9339 tmp = expand_simple_binop (mode, PLUS,
4977bab6
ZW
9340 copy_rtx (tmp), GEN_INT (cf),
9341 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
9342 }
9343 else
9344 {
9345 /*
9346 * cmpl op0,op1
9347 * sbbl dest,dest
06ec023f 9348 * [notl dest]
36583fea
JH
9349 * andl cf - ct, dest
9350 * [addl dest, ct]
9351 *
9352 * Size 8 - 11.
9353 */
06ec023f
RB
9354
9355 if (cf == 0)
9356 {
9357 cf = ct;
9358 ct = 0;
4977bab6 9359 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
06ec023f
RB
9360 }
9361
635559ab 9362 tmp = expand_simple_binop (mode, AND,
4977bab6 9363 copy_rtx (tmp),
d8bf17f9 9364 gen_int_mode (cf - ct, mode),
4977bab6 9365 copy_rtx (tmp), 1, OPTAB_DIRECT);
635559ab 9366 if (ct)
b96a374d 9367 tmp = expand_simple_binop (mode, PLUS,
4977bab6
ZW
9368 copy_rtx (tmp), GEN_INT (ct),
9369 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea 9370 }
e075ae69 9371
4977bab6
ZW
9372 if (!rtx_equal_p (tmp, out))
9373 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
e075ae69
RH
9374
9375 return 1; /* DONE */
9376 }
9377
e075ae69
RH
9378 if (diff < 0)
9379 {
9380 HOST_WIDE_INT tmp;
9381 tmp = ct, ct = cf, cf = tmp;
9382 diff = -diff;
734dba19
JH
9383 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9384 {
9385 /* We may be reversing unordered compare to normal compare, that
9386 is not valid in general (we may convert non-trapping condition
9387 to trapping one), however on i386 we currently emit all
9388 comparisons unordered. */
9389 compare_code = reverse_condition_maybe_unordered (compare_code);
9390 code = reverse_condition_maybe_unordered (code);
9391 }
9392 else
9393 {
9394 compare_code = reverse_condition (compare_code);
9395 code = reverse_condition (code);
9396 }
e075ae69 9397 }
0f2a3457 9398
f822d252 9399 compare_code = UNKNOWN;
0f2a3457
JJ
9400 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9401 && GET_CODE (ix86_compare_op1) == CONST_INT)
9402 {
9403 if (ix86_compare_op1 == const0_rtx
9404 && (code == LT || code == GE))
9405 compare_code = code;
9406 else if (ix86_compare_op1 == constm1_rtx)
9407 {
9408 if (code == LE)
9409 compare_code = LT;
9410 else if (code == GT)
9411 compare_code = GE;
9412 }
9413 }
9414
9415 /* Optimize dest = (op0 < 0) ? -1 : cf. */
f822d252 9416 if (compare_code != UNKNOWN
0f2a3457
JJ
9417 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9418 && (cf == -1 || ct == -1))
9419 {
9420 /* If lea code below could be used, only optimize
9421 if it results in a 2 insn sequence. */
9422
9423 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9424 || diff == 3 || diff == 5 || diff == 9)
9425 || (compare_code == LT && ct == -1)
9426 || (compare_code == GE && cf == -1))
9427 {
9428 /*
9429 * notl op1 (if necessary)
9430 * sarl $31, op1
9431 * orl cf, op1
9432 */
9433 if (ct != -1)
9434 {
9435 cf = ct;
b96a374d 9436 ct = -1;
0f2a3457
JJ
9437 code = reverse_condition (code);
9438 }
9439
9440 out = emit_store_flag (out, code, ix86_compare_op0,
9441 ix86_compare_op1, VOIDmode, 0, -1);
9442
9443 out = expand_simple_binop (mode, IOR,
9444 out, GEN_INT (cf),
9445 out, 1, OPTAB_DIRECT);
9446 if (out != operands[0])
9447 emit_move_insn (operands[0], out);
9448
9449 return 1; /* DONE */
9450 }
9451 }
9452
4977bab6 9453
635559ab
JH
9454 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9455 || diff == 3 || diff == 5 || diff == 9)
4977bab6 9456 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
8fe75e43
RH
9457 && (mode != DImode
9458 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
e075ae69
RH
9459 {
9460 /*
9461 * xorl dest,dest
9462 * cmpl op1,op2
9463 * setcc dest
9464 * lea cf(dest*(ct-cf)),dest
9465 *
9466 * Size 14.
9467 *
9468 * This also catches the degenerate setcc-only case.
9469 */
9470
9471 rtx tmp;
9472 int nops;
9473
9474 out = emit_store_flag (out, code, ix86_compare_op0,
9475 ix86_compare_op1, VOIDmode, 0, 1);
9476
9477 nops = 0;
97f51ac4
RB
9478 /* On x86_64 the lea instruction operates on Pmode, so we need
9479 to get arithmetics done in proper mode to match. */
e075ae69 9480 if (diff == 1)
068f5dea 9481 tmp = copy_rtx (out);
e075ae69
RH
9482 else
9483 {
885a70fd 9484 rtx out1;
068f5dea 9485 out1 = copy_rtx (out);
635559ab 9486 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
e075ae69
RH
9487 nops++;
9488 if (diff & 1)
9489 {
635559ab 9490 tmp = gen_rtx_PLUS (mode, tmp, out1);
e075ae69
RH
9491 nops++;
9492 }
9493 }
9494 if (cf != 0)
9495 {
635559ab 9496 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
e075ae69
RH
9497 nops++;
9498 }
4977bab6 9499 if (!rtx_equal_p (tmp, out))
e075ae69 9500 {
14f73b5a 9501 if (nops == 1)
a5cf80f0 9502 out = force_operand (tmp, copy_rtx (out));
e075ae69 9503 else
4977bab6 9504 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
e075ae69 9505 }
4977bab6 9506 if (!rtx_equal_p (out, operands[0]))
1985ef90 9507 emit_move_insn (operands[0], copy_rtx (out));
e075ae69
RH
9508
9509 return 1; /* DONE */
9510 }
9511
9512 /*
9513 * General case: Jumpful:
9514 * xorl dest,dest cmpl op1, op2
9515 * cmpl op1, op2 movl ct, dest
9516 * setcc dest jcc 1f
9517 * decl dest movl cf, dest
9518 * andl (cf-ct),dest 1:
9519 * addl ct,dest
0f290768 9520 *
e075ae69
RH
9521 * Size 20. Size 14.
9522 *
9523 * This is reasonably steep, but branch mispredict costs are
9524 * high on modern cpus, so consider failing only if optimizing
9525 * for space.
e075ae69
RH
9526 */
9527
4977bab6
ZW
9528 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9529 && BRANCH_COST >= 2)
e075ae69 9530 {
97f51ac4 9531 if (cf == 0)
e075ae69 9532 {
97f51ac4
RB
9533 cf = ct;
9534 ct = 0;
734dba19 9535 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
0f2a3457
JJ
9536 /* We may be reversing unordered compare to normal compare,
9537 that is not valid in general (we may convert non-trapping
9538 condition to trapping one), however on i386 we currently
9539 emit all comparisons unordered. */
9540 code = reverse_condition_maybe_unordered (code);
9541 else
9542 {
9543 code = reverse_condition (code);
f822d252 9544 if (compare_code != UNKNOWN)
0f2a3457
JJ
9545 compare_code = reverse_condition (compare_code);
9546 }
9547 }
9548
f822d252 9549 if (compare_code != UNKNOWN)
0f2a3457
JJ
9550 {
9551 /* notl op1 (if needed)
9552 sarl $31, op1
9553 andl (cf-ct), op1
b96a374d 9554 addl ct, op1
0f2a3457
JJ
9555
9556 For x < 0 (resp. x <= -1) there will be no notl,
9557 so if possible swap the constants to get rid of the
9558 complement.
9559 True/false will be -1/0 while code below (store flag
9560 followed by decrement) is 0/-1, so the constants need
9561 to be exchanged once more. */
9562
9563 if (compare_code == GE || !cf)
734dba19 9564 {
b96a374d 9565 code = reverse_condition (code);
0f2a3457 9566 compare_code = LT;
734dba19
JH
9567 }
9568 else
9569 {
0f2a3457 9570 HOST_WIDE_INT tmp = cf;
b96a374d 9571 cf = ct;
0f2a3457 9572 ct = tmp;
734dba19 9573 }
0f2a3457
JJ
9574
9575 out = emit_store_flag (out, code, ix86_compare_op0,
9576 ix86_compare_op1, VOIDmode, 0, -1);
e075ae69 9577 }
0f2a3457
JJ
9578 else
9579 {
9580 out = emit_store_flag (out, code, ix86_compare_op0,
9581 ix86_compare_op1, VOIDmode, 0, 1);
e075ae69 9582
4977bab6
ZW
9583 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
9584 copy_rtx (out), 1, OPTAB_DIRECT);
0f2a3457 9585 }
e075ae69 9586
4977bab6 9587 out = expand_simple_binop (mode, AND, copy_rtx (out),
d8bf17f9 9588 gen_int_mode (cf - ct, mode),
4977bab6 9589 copy_rtx (out), 1, OPTAB_DIRECT);
97f51ac4 9590 if (ct)
4977bab6
ZW
9591 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
9592 copy_rtx (out), 1, OPTAB_DIRECT);
9593 if (!rtx_equal_p (out, operands[0]))
9594 emit_move_insn (operands[0], copy_rtx (out));
e075ae69
RH
9595
9596 return 1; /* DONE */
9597 }
9598 }
9599
4977bab6 9600 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
e075ae69
RH
9601 {
9602 /* Try a few things more with specific constants and a variable. */
9603
78a0d70c 9604 optab op;
e075ae69
RH
9605 rtx var, orig_out, out, tmp;
9606
4977bab6 9607 if (BRANCH_COST <= 2)
e075ae69
RH
9608 return 0; /* FAIL */
9609
0f290768 9610 /* If one of the two operands is an interesting constant, load a
e075ae69 9611 constant with the above and mask it in with a logical operation. */
0f290768 9612
e075ae69
RH
9613 if (GET_CODE (operands[2]) == CONST_INT)
9614 {
9615 var = operands[3];
4977bab6 9616 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
e075ae69 9617 operands[3] = constm1_rtx, op = and_optab;
4977bab6 9618 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
e075ae69 9619 operands[3] = const0_rtx, op = ior_optab;
78a0d70c
ZW
9620 else
9621 return 0; /* FAIL */
e075ae69
RH
9622 }
9623 else if (GET_CODE (operands[3]) == CONST_INT)
9624 {
9625 var = operands[2];
4977bab6 9626 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
e075ae69 9627 operands[2] = constm1_rtx, op = and_optab;
4977bab6 9628 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
e075ae69 9629 operands[2] = const0_rtx, op = ior_optab;
78a0d70c
ZW
9630 else
9631 return 0; /* FAIL */
e075ae69 9632 }
78a0d70c 9633 else
e075ae69
RH
9634 return 0; /* FAIL */
9635
9636 orig_out = operands[0];
635559ab 9637 tmp = gen_reg_rtx (mode);
e075ae69
RH
9638 operands[0] = tmp;
9639
9640 /* Recurse to get the constant loaded. */
9641 if (ix86_expand_int_movcc (operands) == 0)
9642 return 0; /* FAIL */
9643
9644 /* Mask in the interesting variable. */
635559ab 9645 out = expand_binop (mode, op, var, tmp, orig_out, 0,
e075ae69 9646 OPTAB_WIDEN);
4977bab6
ZW
9647 if (!rtx_equal_p (out, orig_out))
9648 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
e075ae69
RH
9649
9650 return 1; /* DONE */
9651 }
9652
9653 /*
9654 * For comparison with above,
9655 *
9656 * movl cf,dest
9657 * movl ct,tmp
9658 * cmpl op1,op2
9659 * cmovcc tmp,dest
9660 *
9661 * Size 15.
9662 */
9663
635559ab
JH
9664 if (! nonimmediate_operand (operands[2], mode))
9665 operands[2] = force_reg (mode, operands[2]);
9666 if (! nonimmediate_operand (operands[3], mode))
9667 operands[3] = force_reg (mode, operands[3]);
e075ae69 9668
a1b8572c
JH
9669 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9670 {
635559ab 9671 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
9672 emit_move_insn (tmp, operands[3]);
9673 operands[3] = tmp;
9674 }
9675 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9676 {
635559ab 9677 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
9678 emit_move_insn (tmp, operands[2]);
9679 operands[2] = tmp;
9680 }
4977bab6 9681
c9682caf 9682 if (! register_operand (operands[2], VOIDmode)
b96a374d 9683 && (mode == QImode
4977bab6 9684 || ! register_operand (operands[3], VOIDmode)))
635559ab 9685 operands[2] = force_reg (mode, operands[2]);
a1b8572c 9686
4977bab6
ZW
9687 if (mode == QImode
9688 && ! register_operand (operands[3], VOIDmode))
9689 operands[3] = force_reg (mode, operands[3]);
9690
e075ae69
RH
9691 emit_insn (compare_seq);
9692 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 9693 gen_rtx_IF_THEN_ELSE (mode,
e075ae69
RH
9694 compare_op, operands[2],
9695 operands[3])));
a1b8572c 9696 if (bypass_test)
4977bab6 9697 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
635559ab 9698 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c 9699 bypass_test,
4977bab6
ZW
9700 copy_rtx (operands[3]),
9701 copy_rtx (operands[0]))));
a1b8572c 9702 if (second_test)
4977bab6 9703 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
635559ab 9704 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c 9705 second_test,
4977bab6
ZW
9706 copy_rtx (operands[2]),
9707 copy_rtx (operands[0]))));
e075ae69
RH
9708
9709 return 1; /* DONE */
e9a25f70 9710}
e075ae69 9711
32b5b1aa 9712int
b96a374d 9713ix86_expand_fp_movcc (rtx operands[])
32b5b1aa 9714{
e075ae69 9715 enum rtx_code code;
e075ae69 9716 rtx tmp;
a1b8572c 9717 rtx compare_op, second_test, bypass_test;
32b5b1aa 9718
0073023d
JH
9719 /* For SF/DFmode conditional moves based on comparisons
9720 in same mode, we may want to use SSE min/max instructions. */
965f5423
JH
9721 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9722 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
0073023d 9723 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
fa9f36a1
JH
9724 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9725 && (!TARGET_IEEE_FP
9726 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
0073023d
JH
9727 /* We may be called from the post-reload splitter. */
9728 && (!REG_P (operands[0])
9729 || SSE_REG_P (operands[0])
52a661a6 9730 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
0073023d
JH
9731 {
9732 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9733 code = GET_CODE (operands[1]);
9734
9735 /* See if we have (cross) match between comparison operands and
9736 conditional move operands. */
9737 if (rtx_equal_p (operands[2], op1))
9738 {
9739 rtx tmp = op0;
9740 op0 = op1;
9741 op1 = tmp;
9742 code = reverse_condition_maybe_unordered (code);
9743 }
9744 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9745 {
9746 /* Check for min operation. */
4977bab6 9747 if (code == LT || code == UNLE)
0073023d 9748 {
4977bab6
ZW
9749 if (code == UNLE)
9750 {
9751 rtx tmp = op0;
9752 op0 = op1;
9753 op1 = tmp;
9754 }
0073023d
JH
9755 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9756 if (memory_operand (op0, VOIDmode))
9757 op0 = force_reg (GET_MODE (operands[0]), op0);
9758 if (GET_MODE (operands[0]) == SFmode)
9759 emit_insn (gen_minsf3 (operands[0], op0, op1));
9760 else
9761 emit_insn (gen_mindf3 (operands[0], op0, op1));
9762 return 1;
9763 }
9764 /* Check for max operation. */
4977bab6 9765 if (code == GT || code == UNGE)
0073023d 9766 {
4977bab6
ZW
9767 if (code == UNGE)
9768 {
9769 rtx tmp = op0;
9770 op0 = op1;
9771 op1 = tmp;
9772 }
0073023d
JH
9773 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9774 if (memory_operand (op0, VOIDmode))
9775 op0 = force_reg (GET_MODE (operands[0]), op0);
9776 if (GET_MODE (operands[0]) == SFmode)
9777 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9778 else
9779 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9780 return 1;
9781 }
9782 }
9783 /* Manage condition to be sse_comparison_operator. In case we are
9784 in non-ieee mode, try to canonicalize the destination operand
9785 to be first in the comparison - this helps reload to avoid extra
9786 moves. */
9787 if (!sse_comparison_operator (operands[1], VOIDmode)
9788 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9789 {
9790 rtx tmp = ix86_compare_op0;
9791 ix86_compare_op0 = ix86_compare_op1;
9792 ix86_compare_op1 = tmp;
9793 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9794 VOIDmode, ix86_compare_op0,
9795 ix86_compare_op1);
9796 }
d1f87653 9797 /* Similarly try to manage result to be first operand of conditional
fa9f36a1
JH
9798 move. We also don't support the NE comparison on SSE, so try to
9799 avoid it. */
037f20f1
JH
9800 if ((rtx_equal_p (operands[0], operands[3])
9801 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9802 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
0073023d
JH
9803 {
9804 rtx tmp = operands[2];
9805 operands[2] = operands[3];
92d0fb09 9806 operands[3] = tmp;
0073023d
JH
9807 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9808 (GET_CODE (operands[1])),
9809 VOIDmode, ix86_compare_op0,
9810 ix86_compare_op1);
9811 }
9812 if (GET_MODE (operands[0]) == SFmode)
9813 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9814 operands[2], operands[3],
9815 ix86_compare_op0, ix86_compare_op1));
9816 else
9817 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9818 operands[2], operands[3],
9819 ix86_compare_op0, ix86_compare_op1));
9820 return 1;
9821 }
9822
e075ae69 9823 /* The floating point conditional move instructions don't directly
0f290768 9824 support conditions resulting from a signed integer comparison. */
32b5b1aa 9825
e075ae69 9826 code = GET_CODE (operands[1]);
a1b8572c 9827 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9e7adcb3
JH
9828
9829 /* The floating point conditional move instructions don't directly
9830 support signed integer comparisons. */
9831
a1b8572c 9832 if (!fcmov_comparison_operator (compare_op, VOIDmode))
e075ae69 9833 {
a1b8572c 9834 if (second_test != NULL || bypass_test != NULL)
b531087a 9835 abort ();
e075ae69 9836 tmp = gen_reg_rtx (QImode);
3a3677ff 9837 ix86_expand_setcc (code, tmp);
e075ae69
RH
9838 code = NE;
9839 ix86_compare_op0 = tmp;
9840 ix86_compare_op1 = const0_rtx;
a1b8572c
JH
9841 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9842 }
9843 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9844 {
9845 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9846 emit_move_insn (tmp, operands[3]);
9847 operands[3] = tmp;
9848 }
9849 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9850 {
9851 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9852 emit_move_insn (tmp, operands[2]);
9853 operands[2] = tmp;
e075ae69 9854 }
e9a25f70 9855
e075ae69
RH
9856 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9857 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9e7adcb3 9858 compare_op,
e075ae69
RH
9859 operands[2],
9860 operands[3])));
a1b8572c
JH
9861 if (bypass_test)
9862 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9863 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9864 bypass_test,
9865 operands[3],
9866 operands[0])));
9867 if (second_test)
9868 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9869 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9870 second_test,
9871 operands[2],
9872 operands[0])));
32b5b1aa 9873
e075ae69 9874 return 1;
32b5b1aa
SC
9875}
9876
7b52eede
JH
9877/* Expand conditional increment or decrement using adb/sbb instructions.
9878 The default case using setcc followed by the conditional move can be
9879 done by generic code. */
9880int
b96a374d 9881ix86_expand_int_addcc (rtx operands[])
7b52eede
JH
9882{
9883 enum rtx_code code = GET_CODE (operands[1]);
9884 rtx compare_op;
9885 rtx val = const0_rtx;
e6e81735 9886 bool fpcmp = false;
e6e81735 9887 enum machine_mode mode = GET_MODE (operands[0]);
7b52eede
JH
9888
9889 if (operands[3] != const1_rtx
9890 && operands[3] != constm1_rtx)
9891 return 0;
9892 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9893 ix86_compare_op1, &compare_op))
9894 return 0;
e6e81735
JH
9895 code = GET_CODE (compare_op);
9896
9897 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9898 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9899 {
9900 fpcmp = true;
9901 code = ix86_fp_compare_code_to_integer (code);
9902 }
9903
9904 if (code != LTU)
9905 {
9906 val = constm1_rtx;
9907 if (fpcmp)
9908 PUT_CODE (compare_op,
9909 reverse_condition_maybe_unordered
9910 (GET_CODE (compare_op)));
9911 else
9912 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9913 }
9914 PUT_MODE (compare_op, mode);
9915
9916 /* Construct either adc or sbb insn. */
9917 if ((code == LTU) == (operands[3] == constm1_rtx))
7b52eede
JH
9918 {
9919 switch (GET_MODE (operands[0]))
9920 {
9921 case QImode:
e6e81735 9922 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
9923 break;
9924 case HImode:
e6e81735 9925 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
9926 break;
9927 case SImode:
e6e81735 9928 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
9929 break;
9930 case DImode:
e6e81735 9931 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
7b52eede
JH
9932 break;
9933 default:
9934 abort ();
9935 }
9936 }
9937 else
9938 {
9939 switch (GET_MODE (operands[0]))
9940 {
9941 case QImode:
e6e81735 9942 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
9943 break;
9944 case HImode:
e6e81735 9945 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
9946 break;
9947 case SImode:
e6e81735 9948 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
9949 break;
9950 case DImode:
e6e81735 9951 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
7b52eede
JH
9952 break;
9953 default:
9954 abort ();
9955 }
9956 }
9957 return 1; /* DONE */
9958}
9959
9960
2450a057
JH
9961/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9962 works for floating pointer parameters and nonoffsetable memories.
9963 For pushes, it returns just stack offsets; the values will be saved
9964 in the right order. Maximally three parts are generated. */
9965
2b589241 9966static int
b96a374d 9967ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
32b5b1aa 9968{
26e5b205
JH
9969 int size;
9970
9971 if (!TARGET_64BIT)
f8a1ebc6 9972 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
26e5b205
JH
9973 else
9974 size = (GET_MODE_SIZE (mode) + 4) / 8;
2450a057 9975
a7180f70
BS
9976 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
9977 abort ();
2450a057
JH
9978 if (size < 2 || size > 3)
9979 abort ();
9980
f996902d
RH
9981 /* Optimize constant pool reference to immediates. This is used by fp
9982 moves, that force all constants to memory to allow combining. */
389fdba0 9983 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
f996902d
RH
9984 {
9985 rtx tmp = maybe_get_pool_constant (operand);
9986 if (tmp)
9987 operand = tmp;
9988 }
d7a29404 9989
2450a057 9990 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
e075ae69 9991 {
2450a057
JH
9992 /* The only non-offsetable memories we handle are pushes. */
9993 if (! push_operand (operand, VOIDmode))
9994 abort ();
9995
26e5b205
JH
9996 operand = copy_rtx (operand);
9997 PUT_MODE (operand, Pmode);
2450a057
JH
9998 parts[0] = parts[1] = parts[2] = operand;
9999 }
26e5b205 10000 else if (!TARGET_64BIT)
2450a057
JH
10001 {
10002 if (mode == DImode)
10003 split_di (&operand, 1, &parts[0], &parts[1]);
10004 else
e075ae69 10005 {
2450a057
JH
10006 if (REG_P (operand))
10007 {
10008 if (!reload_completed)
10009 abort ();
10010 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10011 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10012 if (size == 3)
10013 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10014 }
10015 else if (offsettable_memref_p (operand))
10016 {
f4ef873c 10017 operand = adjust_address (operand, SImode, 0);
2450a057 10018 parts[0] = operand;
b72f00af 10019 parts[1] = adjust_address (operand, SImode, 4);
2450a057 10020 if (size == 3)
b72f00af 10021 parts[2] = adjust_address (operand, SImode, 8);
2450a057
JH
10022 }
10023 else if (GET_CODE (operand) == CONST_DOUBLE)
10024 {
10025 REAL_VALUE_TYPE r;
2b589241 10026 long l[4];
2450a057
JH
10027
10028 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10029 switch (mode)
10030 {
10031 case XFmode:
10032 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
d8bf17f9 10033 parts[2] = gen_int_mode (l[2], SImode);
2450a057
JH
10034 break;
10035 case DFmode:
10036 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10037 break;
10038 default:
10039 abort ();
10040 }
d8bf17f9
LB
10041 parts[1] = gen_int_mode (l[1], SImode);
10042 parts[0] = gen_int_mode (l[0], SImode);
2450a057
JH
10043 }
10044 else
10045 abort ();
e075ae69 10046 }
2450a057 10047 }
26e5b205
JH
10048 else
10049 {
44cf5b6a
JH
10050 if (mode == TImode)
10051 split_ti (&operand, 1, &parts[0], &parts[1]);
26e5b205
JH
10052 if (mode == XFmode || mode == TFmode)
10053 {
f8a1ebc6 10054 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
26e5b205
JH
10055 if (REG_P (operand))
10056 {
10057 if (!reload_completed)
10058 abort ();
10059 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
f8a1ebc6 10060 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
26e5b205
JH
10061 }
10062 else if (offsettable_memref_p (operand))
10063 {
b72f00af 10064 operand = adjust_address (operand, DImode, 0);
26e5b205 10065 parts[0] = operand;
f8a1ebc6 10066 parts[1] = adjust_address (operand, upper_mode, 8);
26e5b205
JH
10067 }
10068 else if (GET_CODE (operand) == CONST_DOUBLE)
10069 {
10070 REAL_VALUE_TYPE r;
38606553 10071 long l[4];
26e5b205
JH
10072
10073 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9953b5e1 10074 real_to_target (l, &r, mode);
38606553 10075
26e5b205
JH
10076 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10077 if (HOST_BITS_PER_WIDE_INT >= 64)
523fbd9d 10078 parts[0]
d8bf17f9 10079 = gen_int_mode
44cf5b6a 10080 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
b531087a 10081 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
d8bf17f9 10082 DImode);
26e5b205
JH
10083 else
10084 parts[0] = immed_double_const (l[0], l[1], DImode);
38606553 10085
f8a1ebc6
JH
10086 if (upper_mode == SImode)
10087 parts[1] = gen_int_mode (l[2], SImode);
10088 else if (HOST_BITS_PER_WIDE_INT >= 64)
10089 parts[1]
10090 = gen_int_mode
10091 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10092 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10093 DImode);
10094 else
10095 parts[1] = immed_double_const (l[2], l[3], DImode);
26e5b205
JH
10096 }
10097 else
10098 abort ();
10099 }
10100 }
2450a057 10101
2b589241 10102 return size;
2450a057
JH
10103}
10104
10105/* Emit insns to perform a move or push of DI, DF, and XF values.
10106 Return false when normal moves are needed; true when all required
10107 insns have been emitted. Operands 2-4 contain the input values
10108 int the correct order; operands 5-7 contain the output values. */
10109
26e5b205 10110void
b96a374d 10111ix86_split_long_move (rtx operands[])
2450a057
JH
10112{
10113 rtx part[2][3];
26e5b205 10114 int nparts;
2450a057
JH
10115 int push = 0;
10116 int collisions = 0;
26e5b205
JH
10117 enum machine_mode mode = GET_MODE (operands[0]);
10118
10119 /* The DFmode expanders may ask us to move double.
10120 For 64bit target this is single move. By hiding the fact
10121 here we simplify i386.md splitters. */
10122 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10123 {
8cdfa312
RH
10124 /* Optimize constant pool reference to immediates. This is used by
10125 fp moves, that force all constants to memory to allow combining. */
26e5b205
JH
10126
10127 if (GET_CODE (operands[1]) == MEM
10128 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10129 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10130 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10131 if (push_operand (operands[0], VOIDmode))
b47b4f21
JH
10132 {
10133 operands[0] = copy_rtx (operands[0]);
10134 PUT_MODE (operands[0], Pmode);
10135 }
26e5b205
JH
10136 else
10137 operands[0] = gen_lowpart (DImode, operands[0]);
10138 operands[1] = gen_lowpart (DImode, operands[1]);
10139 emit_move_insn (operands[0], operands[1]);
10140 return;
10141 }
2450a057 10142
2450a057
JH
10143 /* The only non-offsettable memory we handle is push. */
10144 if (push_operand (operands[0], VOIDmode))
10145 push = 1;
10146 else if (GET_CODE (operands[0]) == MEM
10147 && ! offsettable_memref_p (operands[0]))
10148 abort ();
10149
26e5b205
JH
10150 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10151 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
2450a057
JH
10152
10153 /* When emitting push, take care for source operands on the stack. */
10154 if (push && GET_CODE (operands[1]) == MEM
10155 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10156 {
26e5b205 10157 if (nparts == 3)
886cbb88
JH
10158 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10159 XEXP (part[1][2], 0));
10160 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10161 XEXP (part[1][1], 0));
2450a057
JH
10162 }
10163
0f290768 10164 /* We need to do copy in the right order in case an address register
2450a057
JH
10165 of the source overlaps the destination. */
10166 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10167 {
10168 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10169 collisions++;
10170 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10171 collisions++;
26e5b205 10172 if (nparts == 3
2450a057
JH
10173 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10174 collisions++;
10175
10176 /* Collision in the middle part can be handled by reordering. */
26e5b205 10177 if (collisions == 1 && nparts == 3
2450a057 10178 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
e075ae69 10179 {
2450a057
JH
10180 rtx tmp;
10181 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10182 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10183 }
e075ae69 10184
2450a057
JH
10185 /* If there are more collisions, we can't handle it by reordering.
10186 Do an lea to the last part and use only one colliding move. */
10187 else if (collisions > 1)
10188 {
8231b3f9
RH
10189 rtx base;
10190
2450a057 10191 collisions = 1;
8231b3f9
RH
10192
10193 base = part[0][nparts - 1];
10194
10195 /* Handle the case when the last part isn't valid for lea.
10196 Happens in 64-bit mode storing the 12-byte XFmode. */
10197 if (GET_MODE (base) != Pmode)
10198 base = gen_rtx_REG (Pmode, REGNO (base));
10199
10200 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10201 part[1][0] = replace_equiv_address (part[1][0], base);
10202 part[1][1] = replace_equiv_address (part[1][1],
10203 plus_constant (base, UNITS_PER_WORD));
26e5b205 10204 if (nparts == 3)
8231b3f9
RH
10205 part[1][2] = replace_equiv_address (part[1][2],
10206 plus_constant (base, 8));
2450a057
JH
10207 }
10208 }
10209
10210 if (push)
10211 {
26e5b205 10212 if (!TARGET_64BIT)
2b589241 10213 {
26e5b205
JH
10214 if (nparts == 3)
10215 {
f8a1ebc6
JH
10216 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10217 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
26e5b205
JH
10218 emit_move_insn (part[0][2], part[1][2]);
10219 }
2b589241 10220 }
26e5b205
JH
10221 else
10222 {
10223 /* In 64bit mode we don't have 32bit push available. In case this is
10224 register, it is OK - we will just use larger counterpart. We also
10225 retype memory - these comes from attempt to avoid REX prefix on
10226 moving of second half of TFmode value. */
10227 if (GET_MODE (part[1][1]) == SImode)
10228 {
10229 if (GET_CODE (part[1][1]) == MEM)
f4ef873c 10230 part[1][1] = adjust_address (part[1][1], DImode, 0);
26e5b205
JH
10231 else if (REG_P (part[1][1]))
10232 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10233 else
b531087a 10234 abort ();
886cbb88
JH
10235 if (GET_MODE (part[1][0]) == SImode)
10236 part[1][0] = part[1][1];
26e5b205
JH
10237 }
10238 }
10239 emit_move_insn (part[0][1], part[1][1]);
10240 emit_move_insn (part[0][0], part[1][0]);
10241 return;
2450a057
JH
10242 }
10243
10244 /* Choose correct order to not overwrite the source before it is copied. */
10245 if ((REG_P (part[0][0])
10246 && REG_P (part[1][1])
10247 && (REGNO (part[0][0]) == REGNO (part[1][1])
26e5b205 10248 || (nparts == 3
2450a057
JH
10249 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10250 || (collisions > 0
10251 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10252 {
26e5b205 10253 if (nparts == 3)
2450a057 10254 {
26e5b205
JH
10255 operands[2] = part[0][2];
10256 operands[3] = part[0][1];
10257 operands[4] = part[0][0];
10258 operands[5] = part[1][2];
10259 operands[6] = part[1][1];
10260 operands[7] = part[1][0];
2450a057
JH
10261 }
10262 else
10263 {
26e5b205
JH
10264 operands[2] = part[0][1];
10265 operands[3] = part[0][0];
10266 operands[5] = part[1][1];
10267 operands[6] = part[1][0];
2450a057
JH
10268 }
10269 }
10270 else
10271 {
26e5b205 10272 if (nparts == 3)
2450a057 10273 {
26e5b205
JH
10274 operands[2] = part[0][0];
10275 operands[3] = part[0][1];
10276 operands[4] = part[0][2];
10277 operands[5] = part[1][0];
10278 operands[6] = part[1][1];
10279 operands[7] = part[1][2];
2450a057
JH
10280 }
10281 else
10282 {
26e5b205
JH
10283 operands[2] = part[0][0];
10284 operands[3] = part[0][1];
10285 operands[5] = part[1][0];
10286 operands[6] = part[1][1];
e075ae69
RH
10287 }
10288 }
903a5059 10289
0e40b5f2 10290 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
903a5059
RS
10291 if (optimize_size)
10292 {
10293 if (GET_CODE (operands[5]) == CONST_INT
10294 && operands[5] != const0_rtx
10295 && REG_P (operands[2]))
10296 {
10297 if (GET_CODE (operands[6]) == CONST_INT
10298 && INTVAL (operands[6]) == INTVAL (operands[5]))
10299 operands[6] = operands[2];
10300
10301 if (nparts == 3
10302 && GET_CODE (operands[7]) == CONST_INT
10303 && INTVAL (operands[7]) == INTVAL (operands[5]))
10304 operands[7] = operands[2];
10305 }
10306
10307 if (nparts == 3
10308 && GET_CODE (operands[6]) == CONST_INT
10309 && operands[6] != const0_rtx
10310 && REG_P (operands[3])
10311 && GET_CODE (operands[7]) == CONST_INT
10312 && INTVAL (operands[7]) == INTVAL (operands[6]))
10313 operands[7] = operands[3];
10314 }
10315
26e5b205
JH
10316 emit_move_insn (operands[2], operands[5]);
10317 emit_move_insn (operands[3], operands[6]);
10318 if (nparts == 3)
10319 emit_move_insn (operands[4], operands[7]);
32b5b1aa 10320
26e5b205 10321 return;
32b5b1aa 10322}
32b5b1aa 10323
1b83d209
RS
10324/* Helper function of ix86_split_ashldi used to generate an SImode
10325 left shift by a constant, either using a single shift or
10326 a sequence of add instructions. */
10327
10328static void
10329ix86_expand_ashlsi3_const (rtx operand, int count)
10330{
10331 if (count == 1)
10332 emit_insn (gen_addsi3 (operand, operand, operand));
10333 else if (!optimize_size
10334 && count * ix86_cost->add <= ix86_cost->shift_const)
10335 {
10336 int i;
10337 for (i=0; i<count; i++)
10338 emit_insn (gen_addsi3 (operand, operand, operand));
10339 }
10340 else
10341 emit_insn (gen_ashlsi3 (operand, operand, GEN_INT (count)));
10342}
10343
e075ae69 10344void
b96a374d 10345ix86_split_ashldi (rtx *operands, rtx scratch)
32b5b1aa 10346{
e075ae69
RH
10347 rtx low[2], high[2];
10348 int count;
b985a30f 10349
e075ae69
RH
10350 if (GET_CODE (operands[2]) == CONST_INT)
10351 {
10352 split_di (operands, 2, low, high);
10353 count = INTVAL (operands[2]) & 63;
32b5b1aa 10354
e075ae69
RH
10355 if (count >= 32)
10356 {
10357 emit_move_insn (high[0], low[1]);
10358 emit_move_insn (low[0], const0_rtx);
b985a30f 10359
e075ae69 10360 if (count > 32)
1b83d209 10361 ix86_expand_ashlsi3_const (high[0], count - 32);
e075ae69
RH
10362 }
10363 else
10364 {
10365 if (!rtx_equal_p (operands[0], operands[1]))
10366 emit_move_insn (operands[0], operands[1]);
10367 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
1b83d209 10368 ix86_expand_ashlsi3_const (low[0], count);
e075ae69 10369 }
93330ea1 10370 return;
e075ae69 10371 }
93330ea1
RH
10372
10373 split_di (operands, 1, low, high);
10374
10375 if (operands[1] == const1_rtx)
e075ae69 10376 {
93330ea1
RH
10377 /* Assuming we've chosen a QImode capable registers, then 1LL << N
10378 can be done with two 32-bit shifts, no branches, no cmoves. */
10379 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
10380 {
10381 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
b985a30f 10382
93330ea1
RH
10383 ix86_expand_clear (low[0]);
10384 ix86_expand_clear (high[0]);
10385 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (32)));
10386
10387 d = gen_lowpart (QImode, low[0]);
10388 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
10389 s = gen_rtx_EQ (QImode, flags, const0_rtx);
10390 emit_insn (gen_rtx_SET (VOIDmode, d, s));
b985a30f 10391
93330ea1
RH
10392 d = gen_lowpart (QImode, high[0]);
10393 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
10394 s = gen_rtx_NE (QImode, flags, const0_rtx);
10395 emit_insn (gen_rtx_SET (VOIDmode, d, s));
10396 }
32b5b1aa 10397
93330ea1
RH
10398 /* Otherwise, we can get the same results by manually performing
10399 a bit extract operation on bit 5, and then performing the two
10400 shifts. The two methods of getting 0/1 into low/high are exactly
10401 the same size. Avoiding the shift in the bit extract case helps
10402 pentium4 a bit; no one else seems to care much either way. */
10403 else
e075ae69 10404 {
93330ea1
RH
10405 rtx x;
10406
10407 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
10408 x = gen_rtx_ZERO_EXTEND (SImode, operands[2]);
e075ae69 10409 else
93330ea1
RH
10410 x = gen_lowpart (SImode, operands[2]);
10411 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
e075ae69 10412
93330ea1
RH
10413 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (5)));
10414 emit_insn (gen_andsi3 (high[0], high[0], GEN_INT (1)));
10415 emit_move_insn (low[0], high[0]);
10416 emit_insn (gen_xorsi3 (low[0], low[0], GEN_INT (1)));
e075ae69 10417 }
93330ea1
RH
10418
10419 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10420 emit_insn (gen_ashlsi3 (high[0], high[0], operands[2]));
10421 return;
10422 }
10423
10424 if (operands[1] == constm1_rtx)
10425 {
10426 /* For -1LL << N, we can avoid the shld instruction, because we
10427 know that we're shifting 0...31 ones into a -1. */
10428 emit_move_insn (low[0], constm1_rtx);
10429 if (optimize_size)
10430 emit_move_insn (high[0], low[0]);
e075ae69 10431 else
93330ea1 10432 emit_move_insn (high[0], constm1_rtx);
e075ae69 10433 }
93330ea1
RH
10434 else
10435 {
10436 if (!rtx_equal_p (operands[0], operands[1]))
10437 emit_move_insn (operands[0], operands[1]);
10438
10439 split_di (operands, 1, low, high);
10440 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10441 }
10442
10443 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10444
10445 if (TARGET_CMOVE && scratch)
10446 {
10447 ix86_expand_clear (scratch);
10448 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
10449 }
10450 else
10451 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
e9a25f70 10452}
32b5b1aa 10453
e075ae69 10454void
b96a374d 10455ix86_split_ashrdi (rtx *operands, rtx scratch)
32b5b1aa 10456{
e075ae69
RH
10457 rtx low[2], high[2];
10458 int count;
32b5b1aa 10459
e075ae69
RH
10460 if (GET_CODE (operands[2]) == CONST_INT)
10461 {
10462 split_di (operands, 2, low, high);
10463 count = INTVAL (operands[2]) & 63;
32b5b1aa 10464
8937b6a2
RS
10465 if (count == 63)
10466 {
10467 emit_move_insn (high[0], high[1]);
10468 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10469 emit_move_insn (low[0], high[0]);
10470
10471 }
10472 else if (count >= 32)
e075ae69
RH
10473 {
10474 emit_move_insn (low[0], high[1]);
93330ea1
RH
10475 emit_move_insn (high[0], low[0]);
10476 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
e075ae69
RH
10477 if (count > 32)
10478 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10479 }
10480 else
10481 {
10482 if (!rtx_equal_p (operands[0], operands[1]))
10483 emit_move_insn (operands[0], operands[1]);
10484 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10485 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10486 }
10487 }
10488 else
32b5b1aa 10489 {
e075ae69
RH
10490 if (!rtx_equal_p (operands[0], operands[1]))
10491 emit_move_insn (operands[0], operands[1]);
10492
10493 split_di (operands, 1, low, high);
10494
10495 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10496 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10497
93330ea1 10498 if (TARGET_CMOVE && scratch)
e075ae69 10499 {
e075ae69
RH
10500 emit_move_insn (scratch, high[0]);
10501 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10502 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10503 scratch));
10504 }
10505 else
10506 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
32b5b1aa 10507 }
e075ae69 10508}
32b5b1aa 10509
e075ae69 10510void
b96a374d 10511ix86_split_lshrdi (rtx *operands, rtx scratch)
e075ae69
RH
10512{
10513 rtx low[2], high[2];
10514 int count;
32b5b1aa 10515
e075ae69 10516 if (GET_CODE (operands[2]) == CONST_INT)
32b5b1aa 10517 {
e075ae69
RH
10518 split_di (operands, 2, low, high);
10519 count = INTVAL (operands[2]) & 63;
10520
10521 if (count >= 32)
c7271385 10522 {
e075ae69 10523 emit_move_insn (low[0], high[1]);
93330ea1 10524 ix86_expand_clear (high[0]);
32b5b1aa 10525
e075ae69
RH
10526 if (count > 32)
10527 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10528 }
10529 else
10530 {
10531 if (!rtx_equal_p (operands[0], operands[1]))
10532 emit_move_insn (operands[0], operands[1]);
10533 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10534 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10535 }
32b5b1aa 10536 }
e075ae69
RH
10537 else
10538 {
10539 if (!rtx_equal_p (operands[0], operands[1]))
10540 emit_move_insn (operands[0], operands[1]);
32b5b1aa 10541
e075ae69
RH
10542 split_di (operands, 1, low, high);
10543
10544 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10545 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10546
10547 /* Heh. By reversing the arguments, we can reuse this pattern. */
93330ea1 10548 if (TARGET_CMOVE && scratch)
e075ae69 10549 {
93330ea1 10550 ix86_expand_clear (scratch);
e075ae69
RH
10551 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10552 scratch));
10553 }
10554 else
10555 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10556 }
32b5b1aa 10557}
3f803cd9 10558
0407c02b 10559/* Helper function for the string operations below. Dest VARIABLE whether
0945b39d
JH
10560 it is aligned to VALUE bytes. If true, jump to the label. */
10561static rtx
b96a374d 10562ix86_expand_aligntest (rtx variable, int value)
0945b39d
JH
10563{
10564 rtx label = gen_label_rtx ();
10565 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10566 if (GET_MODE (variable) == DImode)
10567 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10568 else
10569 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10570 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
d43e0b7d 10571 1, label);
0945b39d
JH
10572 return label;
10573}
10574
10575/* Adjust COUNTER by the VALUE. */
10576static void
b96a374d 10577ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
0945b39d
JH
10578{
10579 if (GET_MODE (countreg) == DImode)
10580 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10581 else
10582 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10583}
10584
10585/* Zero extend possibly SImode EXP to Pmode register. */
d24b3457 10586rtx
b96a374d 10587ix86_zero_extend_to_Pmode (rtx exp)
0945b39d
JH
10588{
10589 rtx r;
10590 if (GET_MODE (exp) == VOIDmode)
10591 return force_reg (Pmode, exp);
10592 if (GET_MODE (exp) == Pmode)
10593 return copy_to_mode_reg (Pmode, exp);
10594 r = gen_reg_rtx (Pmode);
10595 emit_insn (gen_zero_extendsidi2 (r, exp));
10596 return r;
10597}
10598
10599/* Expand string move (memcpy) operation. Use i386 string operations when
70128ad9 10600 profitable. expand_clrmem contains similar code. */
0945b39d 10601int
70128ad9 10602ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
0945b39d 10603{
4e44c1ef 10604 rtx srcreg, destreg, countreg, srcexp, destexp;
0945b39d
JH
10605 enum machine_mode counter_mode;
10606 HOST_WIDE_INT align = 0;
10607 unsigned HOST_WIDE_INT count = 0;
0945b39d 10608
0945b39d
JH
10609 if (GET_CODE (align_exp) == CONST_INT)
10610 align = INTVAL (align_exp);
10611
d0a5295a
RH
10612 /* Can't use any of this if the user has appropriated esi or edi. */
10613 if (global_regs[4] || global_regs[5])
10614 return 0;
10615
5519a4f9 10616 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
10617 if (!TARGET_ALIGN_STRINGOPS)
10618 align = 64;
10619
10620 if (GET_CODE (count_exp) == CONST_INT)
26771da7
JH
10621 {
10622 count = INTVAL (count_exp);
10623 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10624 return 0;
10625 }
0945b39d
JH
10626
10627 /* Figure out proper mode for counter. For 32bits it is always SImode,
10628 for 64bits use SImode when possible, otherwise DImode.
10629 Set count to number of bytes copied when known at compile time. */
8fe75e43
RH
10630 if (!TARGET_64BIT
10631 || GET_MODE (count_exp) == SImode
10632 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
0945b39d
JH
10633 counter_mode = SImode;
10634 else
10635 counter_mode = DImode;
10636
10637 if (counter_mode != SImode && counter_mode != DImode)
10638 abort ();
10639
10640 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
4e44c1ef
JJ
10641 if (destreg != XEXP (dst, 0))
10642 dst = replace_equiv_address_nv (dst, destreg);
0945b39d 10643 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
4e44c1ef
JJ
10644 if (srcreg != XEXP (src, 0))
10645 src = replace_equiv_address_nv (src, srcreg);
0945b39d
JH
10646
10647 /* When optimizing for size emit simple rep ; movsb instruction for
10648 counts not divisible by 4. */
10649
10650 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10651 {
4e44c1ef 10652 emit_insn (gen_cld ());
0945b39d 10653 countreg = ix86_zero_extend_to_Pmode (count_exp);
4e44c1ef
JJ
10654 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
10655 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
10656 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
10657 destexp, srcexp));
0945b39d
JH
10658 }
10659
10660 /* For constant aligned (or small unaligned) copies use rep movsl
10661 followed by code copying the rest. For PentiumPro ensure 8 byte
10662 alignment to allow rep movsl acceleration. */
10663
10664 else if (count != 0
10665 && (align >= 8
10666 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 10667 || optimize_size || count < (unsigned int) 64))
0945b39d 10668 {
4e44c1ef 10669 unsigned HOST_WIDE_INT offset = 0;
0945b39d 10670 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
4e44c1ef
JJ
10671 rtx srcmem, dstmem;
10672
10673 emit_insn (gen_cld ());
0945b39d
JH
10674 if (count & ~(size - 1))
10675 {
10676 countreg = copy_to_mode_reg (counter_mode,
10677 GEN_INT ((count >> (size == 4 ? 2 : 3))
10678 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10679 countreg = ix86_zero_extend_to_Pmode (countreg);
f676971a 10680
4e44c1ef
JJ
10681 destexp = gen_rtx_ASHIFT (Pmode, countreg,
10682 GEN_INT (size == 4 ? 2 : 3));
10683 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
10684 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10685
10686 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
10687 countreg, destexp, srcexp));
10688 offset = count & ~(size - 1);
0945b39d
JH
10689 }
10690 if (size == 8 && (count & 0x04))
4e44c1ef
JJ
10691 {
10692 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
10693 offset);
10694 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
10695 offset);
10696 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10697 offset += 4;
10698 }
0945b39d 10699 if (count & 0x02)
4e44c1ef
JJ
10700 {
10701 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
10702 offset);
10703 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
10704 offset);
10705 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10706 offset += 2;
10707 }
0945b39d 10708 if (count & 0x01)
4e44c1ef
JJ
10709 {
10710 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
10711 offset);
10712 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
10713 offset);
10714 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10715 }
0945b39d
JH
10716 }
10717 /* The generic code based on the glibc implementation:
10718 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10719 allowing accelerated copying there)
10720 - copy the data using rep movsl
10721 - copy the rest. */
10722 else
10723 {
10724 rtx countreg2;
10725 rtx label = NULL;
4e44c1ef 10726 rtx srcmem, dstmem;
37ad04a5
JH
10727 int desired_alignment = (TARGET_PENTIUMPRO
10728 && (count == 0 || count >= (unsigned int) 260)
10729 ? 8 : UNITS_PER_WORD);
4e44c1ef
JJ
10730 /* Get rid of MEM_OFFSETs, they won't be accurate. */
10731 dst = change_address (dst, BLKmode, destreg);
10732 src = change_address (src, BLKmode, srcreg);
0945b39d
JH
10733
10734 /* In case we don't know anything about the alignment, default to
10735 library version, since it is usually equally fast and result in
b96a374d 10736 shorter code.
4977bab6
ZW
10737
10738 Also emit call when we know that the count is large and call overhead
10739 will not be important. */
10740 if (!TARGET_INLINE_ALL_STRINGOPS
10741 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
4e44c1ef 10742 return 0;
0945b39d
JH
10743
10744 if (TARGET_SINGLE_STRINGOP)
10745 emit_insn (gen_cld ());
10746
10747 countreg2 = gen_reg_rtx (Pmode);
10748 countreg = copy_to_mode_reg (counter_mode, count_exp);
10749
10750 /* We don't use loops to align destination and to copy parts smaller
10751 than 4 bytes, because gcc is able to optimize such code better (in
10752 the case the destination or the count really is aligned, gcc is often
10753 able to predict the branches) and also it is friendlier to the
a4f31c00 10754 hardware branch prediction.
0945b39d 10755
d1f87653 10756 Using loops is beneficial for generic case, because we can
0945b39d
JH
10757 handle small counts using the loops. Many CPUs (such as Athlon)
10758 have large REP prefix setup costs.
10759
4aae8a9a 10760 This is quite costly. Maybe we can revisit this decision later or
0945b39d
JH
10761 add some customizability to this code. */
10762
37ad04a5 10763 if (count == 0 && align < desired_alignment)
0945b39d
JH
10764 {
10765 label = gen_label_rtx ();
aaae0bb9 10766 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
d43e0b7d 10767 LEU, 0, counter_mode, 1, label);
0945b39d
JH
10768 }
10769 if (align <= 1)
10770 {
10771 rtx label = ix86_expand_aligntest (destreg, 1);
4e44c1ef
JJ
10772 srcmem = change_address (src, QImode, srcreg);
10773 dstmem = change_address (dst, QImode, destreg);
10774 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
10775 ix86_adjust_counter (countreg, 1);
10776 emit_label (label);
10777 LABEL_NUSES (label) = 1;
10778 }
10779 if (align <= 2)
10780 {
10781 rtx label = ix86_expand_aligntest (destreg, 2);
4e44c1ef
JJ
10782 srcmem = change_address (src, HImode, srcreg);
10783 dstmem = change_address (dst, HImode, destreg);
10784 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
10785 ix86_adjust_counter (countreg, 2);
10786 emit_label (label);
10787 LABEL_NUSES (label) = 1;
10788 }
37ad04a5 10789 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
10790 {
10791 rtx label = ix86_expand_aligntest (destreg, 4);
4e44c1ef
JJ
10792 srcmem = change_address (src, SImode, srcreg);
10793 dstmem = change_address (dst, SImode, destreg);
10794 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
10795 ix86_adjust_counter (countreg, 4);
10796 emit_label (label);
10797 LABEL_NUSES (label) = 1;
10798 }
10799
37ad04a5
JH
10800 if (label && desired_alignment > 4 && !TARGET_64BIT)
10801 {
10802 emit_label (label);
10803 LABEL_NUSES (label) = 1;
10804 label = NULL_RTX;
10805 }
0945b39d
JH
10806 if (!TARGET_SINGLE_STRINGOP)
10807 emit_insn (gen_cld ());
10808 if (TARGET_64BIT)
10809 {
10810 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10811 GEN_INT (3)));
4e44c1ef 10812 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
0945b39d
JH
10813 }
10814 else
10815 {
4e44c1ef
JJ
10816 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
10817 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
0945b39d 10818 }
4e44c1ef
JJ
10819 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
10820 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10821 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
10822 countreg2, destexp, srcexp));
0945b39d
JH
10823
10824 if (label)
10825 {
10826 emit_label (label);
10827 LABEL_NUSES (label) = 1;
10828 }
10829 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
4e44c1ef
JJ
10830 {
10831 srcmem = change_address (src, SImode, srcreg);
10832 dstmem = change_address (dst, SImode, destreg);
10833 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10834 }
0945b39d
JH
10835 if ((align <= 4 || count == 0) && TARGET_64BIT)
10836 {
10837 rtx label = ix86_expand_aligntest (countreg, 4);
4e44c1ef
JJ
10838 srcmem = change_address (src, SImode, srcreg);
10839 dstmem = change_address (dst, SImode, destreg);
10840 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
10841 emit_label (label);
10842 LABEL_NUSES (label) = 1;
10843 }
10844 if (align > 2 && count != 0 && (count & 2))
4e44c1ef
JJ
10845 {
10846 srcmem = change_address (src, HImode, srcreg);
10847 dstmem = change_address (dst, HImode, destreg);
10848 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10849 }
0945b39d
JH
10850 if (align <= 2 || count == 0)
10851 {
10852 rtx label = ix86_expand_aligntest (countreg, 2);
4e44c1ef
JJ
10853 srcmem = change_address (src, HImode, srcreg);
10854 dstmem = change_address (dst, HImode, destreg);
10855 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
10856 emit_label (label);
10857 LABEL_NUSES (label) = 1;
10858 }
10859 if (align > 1 && count != 0 && (count & 1))
4e44c1ef
JJ
10860 {
10861 srcmem = change_address (src, QImode, srcreg);
10862 dstmem = change_address (dst, QImode, destreg);
10863 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10864 }
0945b39d
JH
10865 if (align <= 1 || count == 0)
10866 {
10867 rtx label = ix86_expand_aligntest (countreg, 1);
4e44c1ef
JJ
10868 srcmem = change_address (src, QImode, srcreg);
10869 dstmem = change_address (dst, QImode, destreg);
10870 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
10871 emit_label (label);
10872 LABEL_NUSES (label) = 1;
10873 }
10874 }
10875
0945b39d
JH
10876 return 1;
10877}
10878
10879/* Expand string clear operation (bzero). Use i386 string operations when
70128ad9 10880 profitable. expand_movmem contains similar code. */
0945b39d 10881int
70128ad9 10882ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
0945b39d 10883{
4e44c1ef 10884 rtx destreg, zeroreg, countreg, destexp;
0945b39d
JH
10885 enum machine_mode counter_mode;
10886 HOST_WIDE_INT align = 0;
10887 unsigned HOST_WIDE_INT count = 0;
10888
10889 if (GET_CODE (align_exp) == CONST_INT)
10890 align = INTVAL (align_exp);
10891
d0a5295a
RH
10892 /* Can't use any of this if the user has appropriated esi. */
10893 if (global_regs[4])
10894 return 0;
10895
5519a4f9 10896 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
10897 if (!TARGET_ALIGN_STRINGOPS)
10898 align = 32;
10899
10900 if (GET_CODE (count_exp) == CONST_INT)
26771da7
JH
10901 {
10902 count = INTVAL (count_exp);
10903 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10904 return 0;
10905 }
0945b39d
JH
10906 /* Figure out proper mode for counter. For 32bits it is always SImode,
10907 for 64bits use SImode when possible, otherwise DImode.
10908 Set count to number of bytes copied when known at compile time. */
8fe75e43
RH
10909 if (!TARGET_64BIT
10910 || GET_MODE (count_exp) == SImode
10911 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
0945b39d
JH
10912 counter_mode = SImode;
10913 else
10914 counter_mode = DImode;
10915
4e44c1ef
JJ
10916 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10917 if (destreg != XEXP (dst, 0))
10918 dst = replace_equiv_address_nv (dst, destreg);
0945b39d 10919
0945b39d
JH
10920
10921 /* When optimizing for size emit simple rep ; movsb instruction for
6b32b628
JJ
10922 counts not divisible by 4. The movl $N, %ecx; rep; stosb
10923 sequence is 7 bytes long, so if optimizing for size and count is
10924 small enough that some stosl, stosw and stosb instructions without
10925 rep are shorter, fall back into the next if. */
0945b39d 10926
6b32b628
JJ
10927 if ((!optimize || optimize_size)
10928 && (count == 0
10929 || ((count & 0x03)
10930 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
0945b39d 10931 {
6b32b628
JJ
10932 emit_insn (gen_cld ());
10933
0945b39d
JH
10934 countreg = ix86_zero_extend_to_Pmode (count_exp);
10935 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
4e44c1ef
JJ
10936 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
10937 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
0945b39d
JH
10938 }
10939 else if (count != 0
10940 && (align >= 8
10941 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 10942 || optimize_size || count < (unsigned int) 64))
0945b39d
JH
10943 {
10944 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
4e44c1ef
JJ
10945 unsigned HOST_WIDE_INT offset = 0;
10946
6b32b628
JJ
10947 emit_insn (gen_cld ());
10948
0945b39d
JH
10949 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10950 if (count & ~(size - 1))
10951 {
6b32b628
JJ
10952 unsigned HOST_WIDE_INT repcount;
10953 unsigned int max_nonrep;
10954
10955 repcount = count >> (size == 4 ? 2 : 3);
10956 if (!TARGET_64BIT)
10957 repcount &= 0x3fffffff;
10958
10959 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
10960 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
10961 bytes. In both cases the latter seems to be faster for small
10962 values of N. */
10963 max_nonrep = size == 4 ? 7 : 4;
10964 if (!optimize_size)
10965 switch (ix86_tune)
10966 {
10967 case PROCESSOR_PENTIUM4:
10968 case PROCESSOR_NOCONA:
10969 max_nonrep = 3;
10970 break;
10971 default:
10972 break;
10973 }
10974
10975 if (repcount <= max_nonrep)
10976 while (repcount-- > 0)
10977 {
10978 rtx mem = adjust_automodify_address_nv (dst,
10979 GET_MODE (zeroreg),
10980 destreg, offset);
0737fbff 10981 emit_insn (gen_strset (destreg, mem, zeroreg));
6b32b628
JJ
10982 offset += size;
10983 }
10984 else
10985 {
10986 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
10987 countreg = ix86_zero_extend_to_Pmode (countreg);
10988 destexp = gen_rtx_ASHIFT (Pmode, countreg,
10989 GEN_INT (size == 4 ? 2 : 3));
10990 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10991 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
10992 destexp));
10993 offset = count & ~(size - 1);
10994 }
0945b39d
JH
10995 }
10996 if (size == 8 && (count & 0x04))
4e44c1ef
JJ
10997 {
10998 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
10999 offset);
11000 emit_insn (gen_strset (destreg, mem,
0945b39d 11001 gen_rtx_SUBREG (SImode, zeroreg, 0)));
4e44c1ef
JJ
11002 offset += 4;
11003 }
0945b39d 11004 if (count & 0x02)
4e44c1ef
JJ
11005 {
11006 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
11007 offset);
11008 emit_insn (gen_strset (destreg, mem,
0945b39d 11009 gen_rtx_SUBREG (HImode, zeroreg, 0)));
4e44c1ef
JJ
11010 offset += 2;
11011 }
0945b39d 11012 if (count & 0x01)
4e44c1ef
JJ
11013 {
11014 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
11015 offset);
11016 emit_insn (gen_strset (destreg, mem,
0945b39d 11017 gen_rtx_SUBREG (QImode, zeroreg, 0)));
4e44c1ef 11018 }
0945b39d
JH
11019 }
11020 else
11021 {
11022 rtx countreg2;
11023 rtx label = NULL;
37ad04a5
JH
11024 /* Compute desired alignment of the string operation. */
11025 int desired_alignment = (TARGET_PENTIUMPRO
11026 && (count == 0 || count >= (unsigned int) 260)
11027 ? 8 : UNITS_PER_WORD);
0945b39d
JH
11028
11029 /* In case we don't know anything about the alignment, default to
11030 library version, since it is usually equally fast and result in
4977bab6
ZW
11031 shorter code.
11032
11033 Also emit call when we know that the count is large and call overhead
11034 will not be important. */
11035 if (!TARGET_INLINE_ALL_STRINGOPS
11036 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
0945b39d
JH
11037 return 0;
11038
11039 if (TARGET_SINGLE_STRINGOP)
11040 emit_insn (gen_cld ());
11041
11042 countreg2 = gen_reg_rtx (Pmode);
11043 countreg = copy_to_mode_reg (counter_mode, count_exp);
11044 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
4e44c1ef
JJ
11045 /* Get rid of MEM_OFFSET, it won't be accurate. */
11046 dst = change_address (dst, BLKmode, destreg);
0945b39d 11047
37ad04a5 11048 if (count == 0 && align < desired_alignment)
0945b39d
JH
11049 {
11050 label = gen_label_rtx ();
37ad04a5 11051 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
d43e0b7d 11052 LEU, 0, counter_mode, 1, label);
0945b39d
JH
11053 }
11054 if (align <= 1)
11055 {
11056 rtx label = ix86_expand_aligntest (destreg, 1);
4e44c1ef
JJ
11057 emit_insn (gen_strset (destreg, dst,
11058 gen_rtx_SUBREG (QImode, zeroreg, 0)));
0945b39d
JH
11059 ix86_adjust_counter (countreg, 1);
11060 emit_label (label);
11061 LABEL_NUSES (label) = 1;
11062 }
11063 if (align <= 2)
11064 {
11065 rtx label = ix86_expand_aligntest (destreg, 2);
4e44c1ef
JJ
11066 emit_insn (gen_strset (destreg, dst,
11067 gen_rtx_SUBREG (HImode, zeroreg, 0)));
0945b39d
JH
11068 ix86_adjust_counter (countreg, 2);
11069 emit_label (label);
11070 LABEL_NUSES (label) = 1;
11071 }
37ad04a5 11072 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
11073 {
11074 rtx label = ix86_expand_aligntest (destreg, 4);
4e44c1ef
JJ
11075 emit_insn (gen_strset (destreg, dst,
11076 (TARGET_64BIT
11077 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11078 : zeroreg)));
0945b39d
JH
11079 ix86_adjust_counter (countreg, 4);
11080 emit_label (label);
11081 LABEL_NUSES (label) = 1;
11082 }
11083
37ad04a5
JH
11084 if (label && desired_alignment > 4 && !TARGET_64BIT)
11085 {
11086 emit_label (label);
11087 LABEL_NUSES (label) = 1;
11088 label = NULL_RTX;
11089 }
11090
0945b39d
JH
11091 if (!TARGET_SINGLE_STRINGOP)
11092 emit_insn (gen_cld ());
11093 if (TARGET_64BIT)
11094 {
11095 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11096 GEN_INT (3)));
4e44c1ef 11097 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
0945b39d
JH
11098 }
11099 else
11100 {
4e44c1ef
JJ
11101 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11102 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
0945b39d 11103 }
4e44c1ef
JJ
11104 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11105 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
11106
0945b39d
JH
11107 if (label)
11108 {
11109 emit_label (label);
11110 LABEL_NUSES (label) = 1;
11111 }
37ad04a5 11112
0945b39d 11113 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
4e44c1ef
JJ
11114 emit_insn (gen_strset (destreg, dst,
11115 gen_rtx_SUBREG (SImode, zeroreg, 0)));
0945b39d
JH
11116 if (TARGET_64BIT && (align <= 4 || count == 0))
11117 {
79258dce 11118 rtx label = ix86_expand_aligntest (countreg, 4);
4e44c1ef
JJ
11119 emit_insn (gen_strset (destreg, dst,
11120 gen_rtx_SUBREG (SImode, zeroreg, 0)));
0945b39d
JH
11121 emit_label (label);
11122 LABEL_NUSES (label) = 1;
11123 }
11124 if (align > 2 && count != 0 && (count & 2))
4e44c1ef
JJ
11125 emit_insn (gen_strset (destreg, dst,
11126 gen_rtx_SUBREG (HImode, zeroreg, 0)));
0945b39d
JH
11127 if (align <= 2 || count == 0)
11128 {
74411039 11129 rtx label = ix86_expand_aligntest (countreg, 2);
4e44c1ef
JJ
11130 emit_insn (gen_strset (destreg, dst,
11131 gen_rtx_SUBREG (HImode, zeroreg, 0)));
0945b39d
JH
11132 emit_label (label);
11133 LABEL_NUSES (label) = 1;
11134 }
11135 if (align > 1 && count != 0 && (count & 1))
4e44c1ef
JJ
11136 emit_insn (gen_strset (destreg, dst,
11137 gen_rtx_SUBREG (QImode, zeroreg, 0)));
0945b39d
JH
11138 if (align <= 1 || count == 0)
11139 {
74411039 11140 rtx label = ix86_expand_aligntest (countreg, 1);
4e44c1ef
JJ
11141 emit_insn (gen_strset (destreg, dst,
11142 gen_rtx_SUBREG (QImode, zeroreg, 0)));
0945b39d
JH
11143 emit_label (label);
11144 LABEL_NUSES (label) = 1;
11145 }
11146 }
11147 return 1;
11148}
4e44c1ef 11149
0945b39d
JH
11150/* Expand strlen. */
11151int
b96a374d 11152ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
0945b39d
JH
11153{
11154 rtx addr, scratch1, scratch2, scratch3, scratch4;
11155
11156 /* The generic case of strlen expander is long. Avoid it's
11157 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11158
11159 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11160 && !TARGET_INLINE_ALL_STRINGOPS
11161 && !optimize_size
11162 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11163 return 0;
11164
11165 addr = force_reg (Pmode, XEXP (src, 0));
11166 scratch1 = gen_reg_rtx (Pmode);
11167
11168 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11169 && !optimize_size)
11170 {
11171 /* Well it seems that some optimizer does not combine a call like
11172 foo(strlen(bar), strlen(bar));
11173 when the move and the subtraction is done here. It does calculate
11174 the length just once when these instructions are done inside of
11175 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11176 often used and I use one fewer register for the lifetime of
11177 output_strlen_unroll() this is better. */
11178
11179 emit_move_insn (out, addr);
11180
4e44c1ef 11181 ix86_expand_strlensi_unroll_1 (out, src, align);
0945b39d
JH
11182
11183 /* strlensi_unroll_1 returns the address of the zero at the end of
11184 the string, like memchr(), so compute the length by subtracting
11185 the start address. */
11186 if (TARGET_64BIT)
11187 emit_insn (gen_subdi3 (out, out, addr));
11188 else
11189 emit_insn (gen_subsi3 (out, out, addr));
11190 }
11191 else
11192 {
4e44c1ef 11193 rtx unspec;
0945b39d
JH
11194 scratch2 = gen_reg_rtx (Pmode);
11195 scratch3 = gen_reg_rtx (Pmode);
11196 scratch4 = force_reg (Pmode, constm1_rtx);
11197
11198 emit_move_insn (scratch3, addr);
11199 eoschar = force_reg (QImode, eoschar);
11200
11201 emit_insn (gen_cld ());
4e44c1ef
JJ
11202 src = replace_equiv_address_nv (src, scratch3);
11203
11204 /* If .md starts supporting :P, this can be done in .md. */
11205 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
11206 scratch4), UNSPEC_SCAS);
11207 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
0945b39d
JH
11208 if (TARGET_64BIT)
11209 {
0945b39d
JH
11210 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11211 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11212 }
11213 else
11214 {
0945b39d
JH
11215 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11216 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11217 }
11218 }
11219 return 1;
11220}
11221
e075ae69
RH
11222/* Expand the appropriate insns for doing strlen if not just doing
11223 repnz; scasb
11224
11225 out = result, initialized with the start address
11226 align_rtx = alignment of the address.
11227 scratch = scratch register, initialized with the startaddress when
77ebd435 11228 not aligned, otherwise undefined
3f803cd9 11229
39e3f58c 11230 This is just the body. It needs the initializations mentioned above and
3f803cd9
SC
11231 some address computing at the end. These things are done in i386.md. */
11232
0945b39d 11233static void
4e44c1ef 11234ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
3f803cd9 11235{
e075ae69
RH
11236 int align;
11237 rtx tmp;
11238 rtx align_2_label = NULL_RTX;
11239 rtx align_3_label = NULL_RTX;
11240 rtx align_4_label = gen_label_rtx ();
11241 rtx end_0_label = gen_label_rtx ();
e075ae69 11242 rtx mem;
e2e52e1b 11243 rtx tmpreg = gen_reg_rtx (SImode);
0945b39d 11244 rtx scratch = gen_reg_rtx (SImode);
e6e81735 11245 rtx cmp;
e075ae69
RH
11246
11247 align = 0;
11248 if (GET_CODE (align_rtx) == CONST_INT)
11249 align = INTVAL (align_rtx);
3f803cd9 11250
e9a25f70 11251 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
3f803cd9 11252
e9a25f70 11253 /* Is there a known alignment and is it less than 4? */
e075ae69 11254 if (align < 4)
3f803cd9 11255 {
0945b39d
JH
11256 rtx scratch1 = gen_reg_rtx (Pmode);
11257 emit_move_insn (scratch1, out);
e9a25f70 11258 /* Is there a known alignment and is it not 2? */
e075ae69 11259 if (align != 2)
3f803cd9 11260 {
e075ae69
RH
11261 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11262 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11263
11264 /* Leave just the 3 lower bits. */
0945b39d 11265 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
e075ae69
RH
11266 NULL_RTX, 0, OPTAB_WIDEN);
11267
9076b9c1 11268 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 11269 Pmode, 1, align_4_label);
60c81c89 11270 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
d43e0b7d 11271 Pmode, 1, align_2_label);
60c81c89 11272 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
d43e0b7d 11273 Pmode, 1, align_3_label);
3f803cd9
SC
11274 }
11275 else
11276 {
e9a25f70
JL
11277 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11278 check if is aligned to 4 - byte. */
e9a25f70 11279
60c81c89 11280 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
e075ae69
RH
11281 NULL_RTX, 0, OPTAB_WIDEN);
11282
9076b9c1 11283 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 11284 Pmode, 1, align_4_label);
3f803cd9
SC
11285 }
11286
4e44c1ef 11287 mem = change_address (src, QImode, out);
e9a25f70 11288
e075ae69 11289 /* Now compare the bytes. */
e9a25f70 11290
0f290768 11291 /* Compare the first n unaligned byte on a byte per byte basis. */
9076b9c1 11292 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
d43e0b7d 11293 QImode, 1, end_0_label);
3f803cd9 11294
0f290768 11295 /* Increment the address. */
0945b39d
JH
11296 if (TARGET_64BIT)
11297 emit_insn (gen_adddi3 (out, out, const1_rtx));
11298 else
11299 emit_insn (gen_addsi3 (out, out, const1_rtx));
e9a25f70 11300
e075ae69
RH
11301 /* Not needed with an alignment of 2 */
11302 if (align != 2)
11303 {
11304 emit_label (align_2_label);
3f803cd9 11305
d43e0b7d
RK
11306 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11307 end_0_label);
e075ae69 11308
0945b39d
JH
11309 if (TARGET_64BIT)
11310 emit_insn (gen_adddi3 (out, out, const1_rtx));
11311 else
11312 emit_insn (gen_addsi3 (out, out, const1_rtx));
e075ae69
RH
11313
11314 emit_label (align_3_label);
11315 }
11316
d43e0b7d
RK
11317 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11318 end_0_label);
e075ae69 11319
0945b39d
JH
11320 if (TARGET_64BIT)
11321 emit_insn (gen_adddi3 (out, out, const1_rtx));
11322 else
11323 emit_insn (gen_addsi3 (out, out, const1_rtx));
3f803cd9
SC
11324 }
11325
e075ae69
RH
11326 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11327 align this loop. It gives only huge programs, but does not help to
11328 speed up. */
11329 emit_label (align_4_label);
3f803cd9 11330
4e44c1ef 11331 mem = change_address (src, SImode, out);
e075ae69 11332 emit_move_insn (scratch, mem);
0945b39d
JH
11333 if (TARGET_64BIT)
11334 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11335 else
11336 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
e075ae69 11337
e2e52e1b
JH
11338 /* This formula yields a nonzero result iff one of the bytes is zero.
11339 This saves three branches inside loop and many cycles. */
11340
11341 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11342 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11343 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
7471a1f0 11344 emit_insn (gen_andsi3 (tmpreg, tmpreg,
d8bf17f9 11345 gen_int_mode (0x80808080, SImode)));
d43e0b7d
RK
11346 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11347 align_4_label);
e2e52e1b
JH
11348
11349 if (TARGET_CMOVE)
11350 {
11351 rtx reg = gen_reg_rtx (SImode);
0945b39d 11352 rtx reg2 = gen_reg_rtx (Pmode);
e2e52e1b
JH
11353 emit_move_insn (reg, tmpreg);
11354 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11355
0f290768 11356 /* If zero is not in the first two bytes, move two bytes forward. */
16189740 11357 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
11358 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11359 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11360 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11361 gen_rtx_IF_THEN_ELSE (SImode, tmp,
77ebd435
AJ
11362 reg,
11363 tmpreg)));
e2e52e1b 11364 /* Emit lea manually to avoid clobbering of flags. */
0945b39d 11365 emit_insn (gen_rtx_SET (SImode, reg2,
60c81c89 11366 gen_rtx_PLUS (Pmode, out, const2_rtx)));
e2e52e1b
JH
11367
11368 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11369 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11370 emit_insn (gen_rtx_SET (VOIDmode, out,
0945b39d 11371 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
a4f31c00
AJ
11372 reg2,
11373 out)));
e2e52e1b
JH
11374
11375 }
11376 else
11377 {
11378 rtx end_2_label = gen_label_rtx ();
11379 /* Is zero in the first two bytes? */
11380
16189740 11381 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
11382 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11383 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11384 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11385 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11386 pc_rtx);
11387 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11388 JUMP_LABEL (tmp) = end_2_label;
11389
0f290768 11390 /* Not in the first two. Move two bytes forward. */
e2e52e1b 11391 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
0945b39d 11392 if (TARGET_64BIT)
60c81c89 11393 emit_insn (gen_adddi3 (out, out, const2_rtx));
0945b39d 11394 else
60c81c89 11395 emit_insn (gen_addsi3 (out, out, const2_rtx));
e2e52e1b
JH
11396
11397 emit_label (end_2_label);
11398
11399 }
11400
0f290768 11401 /* Avoid branch in fixing the byte. */
e2e52e1b 11402 tmpreg = gen_lowpart (QImode, tmpreg);
7e08e190 11403 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
e6e81735 11404 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
0945b39d 11405 if (TARGET_64BIT)
e6e81735 11406 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
0945b39d 11407 else
e6e81735 11408 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
e075ae69
RH
11409
11410 emit_label (end_0_label);
11411}
0e07aff3
RH
11412
11413void
0f901c4c
SH
11414ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11415 rtx callarg2 ATTRIBUTE_UNUSED,
b96a374d 11416 rtx pop, int sibcall)
0e07aff3
RH
11417{
11418 rtx use = NULL, call;
11419
11420 if (pop == const0_rtx)
11421 pop = NULL;
11422 if (TARGET_64BIT && pop)
11423 abort ();
11424
b069de3b
SS
11425#if TARGET_MACHO
11426 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11427 fnaddr = machopic_indirect_call_target (fnaddr);
11428#else
0e07aff3
RH
11429 /* Static functions and indirect calls don't need the pic register. */
11430 if (! TARGET_64BIT && flag_pic
11431 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
12969f45 11432 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
66edd3b4 11433 use_reg (&use, pic_offset_table_rtx);
0e07aff3
RH
11434
11435 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11436 {
11437 rtx al = gen_rtx_REG (QImode, 0);
11438 emit_move_insn (al, callarg2);
11439 use_reg (&use, al);
11440 }
b069de3b 11441#endif /* TARGET_MACHO */
0e07aff3
RH
11442
11443 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11444 {
11445 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11446 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11447 }
4977bab6
ZW
11448 if (sibcall && TARGET_64BIT
11449 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11450 {
11451 rtx addr;
11452 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
b19ee4bd 11453 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
4977bab6
ZW
11454 emit_move_insn (fnaddr, addr);
11455 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11456 }
0e07aff3
RH
11457
11458 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11459 if (retval)
11460 call = gen_rtx_SET (VOIDmode, retval, call);
11461 if (pop)
11462 {
11463 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11464 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11465 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11466 }
11467
11468 call = emit_call_insn (call);
11469 if (use)
11470 CALL_INSN_FUNCTION_USAGE (call) = use;
11471}
fce5a9f2 11472
e075ae69 11473\f
e075ae69
RH
11474/* Clear stack slot assignments remembered from previous functions.
11475 This is called from INIT_EXPANDERS once before RTL is emitted for each
11476 function. */
11477
e2500fed 11478static struct machine_function *
b96a374d 11479ix86_init_machine_status (void)
37b15744 11480{
d7394366
JH
11481 struct machine_function *f;
11482
11483 f = ggc_alloc_cleared (sizeof (struct machine_function));
11484 f->use_fast_prologue_epilogue_nregs = -1;
8330e2c6
AJ
11485
11486 return f;
1526a060
BS
11487}
11488
e075ae69
RH
11489/* Return a MEM corresponding to a stack slot with mode MODE.
11490 Allocate a new slot if necessary.
11491
11492 The RTL for a function can have several slots available: N is
11493 which slot to use. */
11494
11495rtx
b96a374d 11496assign_386_stack_local (enum machine_mode mode, int n)
e075ae69 11497{
ddb0ae00
ZW
11498 struct stack_local_entry *s;
11499
e075ae69
RH
11500 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11501 abort ();
11502
ddb0ae00
ZW
11503 for (s = ix86_stack_locals; s; s = s->next)
11504 if (s->mode == mode && s->n == n)
11505 return s->rtl;
11506
11507 s = (struct stack_local_entry *)
11508 ggc_alloc (sizeof (struct stack_local_entry));
11509 s->n = n;
11510 s->mode = mode;
11511 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
e075ae69 11512
ddb0ae00
ZW
11513 s->next = ix86_stack_locals;
11514 ix86_stack_locals = s;
11515 return s->rtl;
e075ae69 11516}
f996902d
RH
11517
11518/* Construct the SYMBOL_REF for the tls_get_addr function. */
11519
e2500fed 11520static GTY(()) rtx ix86_tls_symbol;
f996902d 11521rtx
b96a374d 11522ix86_tls_get_addr (void)
f996902d 11523{
f996902d 11524
e2500fed 11525 if (!ix86_tls_symbol)
f996902d 11526 {
75d38379
JJ
11527 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11528 (TARGET_GNU_TLS && !TARGET_64BIT)
11529 ? "___tls_get_addr"
11530 : "__tls_get_addr");
f996902d
RH
11531 }
11532
e2500fed 11533 return ix86_tls_symbol;
f996902d 11534}
e075ae69
RH
11535\f
11536/* Calculate the length of the memory address in the instruction
11537 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11538
8fe75e43 11539int
b96a374d 11540memory_address_length (rtx addr)
e075ae69
RH
11541{
11542 struct ix86_address parts;
11543 rtx base, index, disp;
11544 int len;
11545
11546 if (GET_CODE (addr) == PRE_DEC
0d7d98ee
JH
11547 || GET_CODE (addr) == POST_INC
11548 || GET_CODE (addr) == PRE_MODIFY
11549 || GET_CODE (addr) == POST_MODIFY)
e075ae69 11550 return 0;
3f803cd9 11551
e075ae69
RH
11552 if (! ix86_decompose_address (addr, &parts))
11553 abort ();
3f803cd9 11554
e075ae69
RH
11555 base = parts.base;
11556 index = parts.index;
11557 disp = parts.disp;
11558 len = 0;
3f803cd9 11559
7b65ed54
EB
11560 /* Rule of thumb:
11561 - esp as the base always wants an index,
11562 - ebp as the base always wants a displacement. */
11563
e075ae69
RH
11564 /* Register Indirect. */
11565 if (base && !index && !disp)
11566 {
7b65ed54
EB
11567 /* esp (for its index) and ebp (for its displacement) need
11568 the two-byte modrm form. */
e075ae69
RH
11569 if (addr == stack_pointer_rtx
11570 || addr == arg_pointer_rtx
564d80f4
JH
11571 || addr == frame_pointer_rtx
11572 || addr == hard_frame_pointer_rtx)
e075ae69 11573 len = 1;
3f803cd9 11574 }
e9a25f70 11575
e075ae69
RH
11576 /* Direct Addressing. */
11577 else if (disp && !base && !index)
11578 len = 4;
11579
3f803cd9
SC
11580 else
11581 {
e075ae69
RH
11582 /* Find the length of the displacement constant. */
11583 if (disp)
11584 {
11585 if (GET_CODE (disp) == CONST_INT
9b73c90a
EB
11586 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11587 && base)
e075ae69
RH
11588 len = 1;
11589 else
11590 len = 4;
11591 }
7b65ed54
EB
11592 /* ebp always wants a displacement. */
11593 else if (base == hard_frame_pointer_rtx)
11594 len = 1;
3f803cd9 11595
43f3a59d 11596 /* An index requires the two-byte modrm form.... */
7b65ed54
EB
11597 if (index
11598 /* ...like esp, which always wants an index. */
11599 || base == stack_pointer_rtx
11600 || base == arg_pointer_rtx
11601 || base == frame_pointer_rtx)
e075ae69 11602 len += 1;
3f803cd9
SC
11603 }
11604
e075ae69
RH
11605 return len;
11606}
79325812 11607
5bf0ebab
RH
11608/* Compute default value for "length_immediate" attribute. When SHORTFORM
11609 is set, expect that insn have 8bit immediate alternative. */
e075ae69 11610int
b96a374d 11611ix86_attr_length_immediate_default (rtx insn, int shortform)
e075ae69 11612{
6ef67412
JH
11613 int len = 0;
11614 int i;
6c698a6d 11615 extract_insn_cached (insn);
6ef67412
JH
11616 for (i = recog_data.n_operands - 1; i >= 0; --i)
11617 if (CONSTANT_P (recog_data.operand[i]))
3071fab5 11618 {
6ef67412 11619 if (len)
3071fab5 11620 abort ();
6ef67412
JH
11621 if (shortform
11622 && GET_CODE (recog_data.operand[i]) == CONST_INT
11623 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11624 len = 1;
11625 else
11626 {
11627 switch (get_attr_mode (insn))
11628 {
11629 case MODE_QI:
11630 len+=1;
11631 break;
11632 case MODE_HI:
11633 len+=2;
11634 break;
11635 case MODE_SI:
11636 len+=4;
11637 break;
14f73b5a
JH
11638 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11639 case MODE_DI:
11640 len+=4;
11641 break;
6ef67412 11642 default:
c725bd79 11643 fatal_insn ("unknown insn mode", insn);
6ef67412
JH
11644 }
11645 }
3071fab5 11646 }
6ef67412
JH
11647 return len;
11648}
11649/* Compute default value for "length_address" attribute. */
11650int
b96a374d 11651ix86_attr_length_address_default (rtx insn)
6ef67412
JH
11652{
11653 int i;
9b73c90a
EB
11654
11655 if (get_attr_type (insn) == TYPE_LEA)
11656 {
11657 rtx set = PATTERN (insn);
11658 if (GET_CODE (set) == SET)
11659 ;
11660 else if (GET_CODE (set) == PARALLEL
11661 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
11662 set = XVECEXP (set, 0, 0);
11663 else
11664 {
11665#ifdef ENABLE_CHECKING
11666 abort ();
11667#endif
11668 return 0;
11669 }
11670
11671 return memory_address_length (SET_SRC (set));
11672 }
11673
6c698a6d 11674 extract_insn_cached (insn);
1ccbefce
RH
11675 for (i = recog_data.n_operands - 1; i >= 0; --i)
11676 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 11677 {
6ef67412 11678 return memory_address_length (XEXP (recog_data.operand[i], 0));
e075ae69
RH
11679 break;
11680 }
6ef67412 11681 return 0;
3f803cd9 11682}
e075ae69
RH
11683\f
11684/* Return the maximum number of instructions a cpu can issue. */
b657fc39 11685
c237e94a 11686static int
b96a374d 11687ix86_issue_rate (void)
b657fc39 11688{
9e555526 11689 switch (ix86_tune)
b657fc39 11690 {
e075ae69
RH
11691 case PROCESSOR_PENTIUM:
11692 case PROCESSOR_K6:
11693 return 2;
79325812 11694
e075ae69 11695 case PROCESSOR_PENTIUMPRO:
b4e89e2d
JH
11696 case PROCESSOR_PENTIUM4:
11697 case PROCESSOR_ATHLON:
4977bab6 11698 case PROCESSOR_K8:
89c43c0a 11699 case PROCESSOR_NOCONA:
e075ae69 11700 return 3;
b657fc39 11701
b657fc39 11702 default:
e075ae69 11703 return 1;
b657fc39 11704 }
b657fc39
L
11705}
11706
e075ae69
RH
11707/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11708 by DEP_INSN and nothing set by DEP_INSN. */
b657fc39 11709
e075ae69 11710static int
b96a374d 11711ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
e075ae69
RH
11712{
11713 rtx set, set2;
b657fc39 11714
e075ae69
RH
11715 /* Simplify the test for uninteresting insns. */
11716 if (insn_type != TYPE_SETCC
11717 && insn_type != TYPE_ICMOV
11718 && insn_type != TYPE_FCMOV
11719 && insn_type != TYPE_IBR)
11720 return 0;
b657fc39 11721
e075ae69
RH
11722 if ((set = single_set (dep_insn)) != 0)
11723 {
11724 set = SET_DEST (set);
11725 set2 = NULL_RTX;
11726 }
11727 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11728 && XVECLEN (PATTERN (dep_insn), 0) == 2
11729 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11730 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11731 {
11732 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11733 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11734 }
78a0d70c
ZW
11735 else
11736 return 0;
b657fc39 11737
78a0d70c
ZW
11738 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11739 return 0;
b657fc39 11740
f5143c46 11741 /* This test is true if the dependent insn reads the flags but
78a0d70c
ZW
11742 not any other potentially set register. */
11743 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11744 return 0;
11745
11746 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11747 return 0;
11748
11749 return 1;
e075ae69 11750}
b657fc39 11751
e075ae69
RH
11752/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11753 address with operands set by DEP_INSN. */
11754
11755static int
b96a374d 11756ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
e075ae69
RH
11757{
11758 rtx addr;
11759
6ad48e84
JH
11760 if (insn_type == TYPE_LEA
11761 && TARGET_PENTIUM)
5fbdde42
RH
11762 {
11763 addr = PATTERN (insn);
11764 if (GET_CODE (addr) == SET)
11765 ;
11766 else if (GET_CODE (addr) == PARALLEL
11767 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11768 addr = XVECEXP (addr, 0, 0);
11769 else
11770 abort ();
11771 addr = SET_SRC (addr);
11772 }
e075ae69
RH
11773 else
11774 {
11775 int i;
6c698a6d 11776 extract_insn_cached (insn);
1ccbefce
RH
11777 for (i = recog_data.n_operands - 1; i >= 0; --i)
11778 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 11779 {
1ccbefce 11780 addr = XEXP (recog_data.operand[i], 0);
e075ae69
RH
11781 goto found;
11782 }
11783 return 0;
11784 found:;
b657fc39
L
11785 }
11786
e075ae69 11787 return modified_in_p (addr, dep_insn);
b657fc39 11788}
a269a03c 11789
c237e94a 11790static int
b96a374d 11791ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
a269a03c 11792{
e075ae69 11793 enum attr_type insn_type, dep_insn_type;
8695f61e 11794 enum attr_memory memory;
e075ae69 11795 rtx set, set2;
9b00189f 11796 int dep_insn_code_number;
a269a03c 11797
d1f87653 11798 /* Anti and output dependencies have zero cost on all CPUs. */
e075ae69 11799 if (REG_NOTE_KIND (link) != 0)
309ada50 11800 return 0;
a269a03c 11801
9b00189f
JH
11802 dep_insn_code_number = recog_memoized (dep_insn);
11803
e075ae69 11804 /* If we can't recognize the insns, we can't really do anything. */
9b00189f 11805 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
e075ae69 11806 return cost;
a269a03c 11807
1c71e60e
JH
11808 insn_type = get_attr_type (insn);
11809 dep_insn_type = get_attr_type (dep_insn);
9b00189f 11810
9e555526 11811 switch (ix86_tune)
a269a03c
JC
11812 {
11813 case PROCESSOR_PENTIUM:
e075ae69
RH
11814 /* Address Generation Interlock adds a cycle of latency. */
11815 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11816 cost += 1;
11817
11818 /* ??? Compares pair with jump/setcc. */
11819 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11820 cost = 0;
11821
d1f87653 11822 /* Floating point stores require value to be ready one cycle earlier. */
0f290768 11823 if (insn_type == TYPE_FMOV
e075ae69
RH
11824 && get_attr_memory (insn) == MEMORY_STORE
11825 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11826 cost += 1;
11827 break;
a269a03c 11828
e075ae69 11829 case PROCESSOR_PENTIUMPRO:
6ad48e84 11830 memory = get_attr_memory (insn);
e075ae69
RH
11831
11832 /* INT->FP conversion is expensive. */
11833 if (get_attr_fp_int_src (dep_insn))
11834 cost += 5;
11835
11836 /* There is one cycle extra latency between an FP op and a store. */
11837 if (insn_type == TYPE_FMOV
11838 && (set = single_set (dep_insn)) != NULL_RTX
11839 && (set2 = single_set (insn)) != NULL_RTX
11840 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11841 && GET_CODE (SET_DEST (set2)) == MEM)
11842 cost += 1;
6ad48e84
JH
11843
11844 /* Show ability of reorder buffer to hide latency of load by executing
11845 in parallel with previous instruction in case
11846 previous instruction is not needed to compute the address. */
11847 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11848 && !ix86_agi_dependant (insn, dep_insn, insn_type))
b96a374d 11849 {
6ad48e84
JH
11850 /* Claim moves to take one cycle, as core can issue one load
11851 at time and the next load can start cycle later. */
11852 if (dep_insn_type == TYPE_IMOV
11853 || dep_insn_type == TYPE_FMOV)
11854 cost = 1;
11855 else if (cost > 1)
11856 cost--;
11857 }
e075ae69 11858 break;
a269a03c 11859
e075ae69 11860 case PROCESSOR_K6:
6ad48e84 11861 memory = get_attr_memory (insn);
8695f61e 11862
e075ae69
RH
11863 /* The esp dependency is resolved before the instruction is really
11864 finished. */
11865 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11866 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11867 return 1;
a269a03c 11868
e075ae69
RH
11869 /* INT->FP conversion is expensive. */
11870 if (get_attr_fp_int_src (dep_insn))
11871 cost += 5;
6ad48e84
JH
11872
11873 /* Show ability of reorder buffer to hide latency of load by executing
11874 in parallel with previous instruction in case
11875 previous instruction is not needed to compute the address. */
11876 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11877 && !ix86_agi_dependant (insn, dep_insn, insn_type))
b96a374d 11878 {
6ad48e84
JH
11879 /* Claim moves to take one cycle, as core can issue one load
11880 at time and the next load can start cycle later. */
11881 if (dep_insn_type == TYPE_IMOV
11882 || dep_insn_type == TYPE_FMOV)
11883 cost = 1;
11884 else if (cost > 2)
11885 cost -= 2;
11886 else
11887 cost = 1;
11888 }
a14003ee 11889 break;
e075ae69 11890
309ada50 11891 case PROCESSOR_ATHLON:
4977bab6 11892 case PROCESSOR_K8:
6ad48e84 11893 memory = get_attr_memory (insn);
6ad48e84 11894
6ad48e84
JH
11895 /* Show ability of reorder buffer to hide latency of load by executing
11896 in parallel with previous instruction in case
11897 previous instruction is not needed to compute the address. */
11898 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11899 && !ix86_agi_dependant (insn, dep_insn, insn_type))
b96a374d 11900 {
26f74aa3
JH
11901 enum attr_unit unit = get_attr_unit (insn);
11902 int loadcost = 3;
11903
11904 /* Because of the difference between the length of integer and
11905 floating unit pipeline preparation stages, the memory operands
b96a374d 11906 for floating point are cheaper.
26f74aa3 11907
c51e6d85 11908 ??? For Athlon it the difference is most probably 2. */
26f74aa3
JH
11909 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
11910 loadcost = 3;
11911 else
11912 loadcost = TARGET_ATHLON ? 2 : 0;
11913
11914 if (cost >= loadcost)
11915 cost -= loadcost;
6ad48e84
JH
11916 else
11917 cost = 0;
11918 }
309ada50 11919
a269a03c 11920 default:
a269a03c
JC
11921 break;
11922 }
11923
11924 return cost;
11925}
0a726ef1 11926
9b690711
RH
11927/* How many alternative schedules to try. This should be as wide as the
11928 scheduling freedom in the DFA, but no wider. Making this value too
11929 large results extra work for the scheduler. */
11930
11931static int
b96a374d 11932ia32_multipass_dfa_lookahead (void)
9b690711 11933{
9e555526 11934 if (ix86_tune == PROCESSOR_PENTIUM)
9b690711 11935 return 2;
56bab446 11936
8695f61e
SB
11937 if (ix86_tune == PROCESSOR_PENTIUMPRO
11938 || ix86_tune == PROCESSOR_K6)
56bab446
SB
11939 return 1;
11940
9b690711 11941 else
56bab446 11942 return 0;
9b690711
RH
11943}
11944
0e4970d7 11945\f
a7180f70
BS
11946/* Compute the alignment given to a constant that is being placed in memory.
11947 EXP is the constant and ALIGN is the alignment that the object would
11948 ordinarily have.
11949 The value of this function is used instead of that alignment to align
11950 the object. */
11951
11952int
b96a374d 11953ix86_constant_alignment (tree exp, int align)
a7180f70
BS
11954{
11955 if (TREE_CODE (exp) == REAL_CST)
11956 {
11957 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
11958 return 64;
11959 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
11960 return 128;
11961 }
4137ba7a
JJ
11962 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
11963 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
11964 return BITS_PER_WORD;
a7180f70
BS
11965
11966 return align;
11967}
11968
11969/* Compute the alignment for a static variable.
11970 TYPE is the data type, and ALIGN is the alignment that
11971 the object would ordinarily have. The value of this function is used
11972 instead of that alignment to align the object. */
11973
11974int
b96a374d 11975ix86_data_alignment (tree type, int align)
a7180f70
BS
11976{
11977 if (AGGREGATE_TYPE_P (type)
11978 && TYPE_SIZE (type)
11979 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11980 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
11981 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
11982 return 256;
11983
0d7d98ee
JH
11984 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11985 to 16byte boundary. */
11986 if (TARGET_64BIT)
11987 {
11988 if (AGGREGATE_TYPE_P (type)
11989 && TYPE_SIZE (type)
11990 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11991 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
11992 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11993 return 128;
11994 }
11995
a7180f70
BS
11996 if (TREE_CODE (type) == ARRAY_TYPE)
11997 {
11998 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11999 return 64;
12000 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12001 return 128;
12002 }
12003 else if (TREE_CODE (type) == COMPLEX_TYPE)
12004 {
0f290768 12005
a7180f70
BS
12006 if (TYPE_MODE (type) == DCmode && align < 64)
12007 return 64;
12008 if (TYPE_MODE (type) == XCmode && align < 128)
12009 return 128;
12010 }
12011 else if ((TREE_CODE (type) == RECORD_TYPE
12012 || TREE_CODE (type) == UNION_TYPE
12013 || TREE_CODE (type) == QUAL_UNION_TYPE)
12014 && TYPE_FIELDS (type))
12015 {
12016 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12017 return 64;
12018 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12019 return 128;
12020 }
12021 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12022 || TREE_CODE (type) == INTEGER_TYPE)
12023 {
12024 if (TYPE_MODE (type) == DFmode && align < 64)
12025 return 64;
12026 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12027 return 128;
12028 }
12029
12030 return align;
12031}
12032
12033/* Compute the alignment for a local variable.
12034 TYPE is the data type, and ALIGN is the alignment that
12035 the object would ordinarily have. The value of this macro is used
12036 instead of that alignment to align the object. */
12037
12038int
b96a374d 12039ix86_local_alignment (tree type, int align)
a7180f70 12040{
0d7d98ee
JH
12041 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12042 to 16byte boundary. */
12043 if (TARGET_64BIT)
12044 {
12045 if (AGGREGATE_TYPE_P (type)
12046 && TYPE_SIZE (type)
12047 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12048 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12049 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12050 return 128;
12051 }
a7180f70
BS
12052 if (TREE_CODE (type) == ARRAY_TYPE)
12053 {
12054 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12055 return 64;
12056 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12057 return 128;
12058 }
12059 else if (TREE_CODE (type) == COMPLEX_TYPE)
12060 {
12061 if (TYPE_MODE (type) == DCmode && align < 64)
12062 return 64;
12063 if (TYPE_MODE (type) == XCmode && align < 128)
12064 return 128;
12065 }
12066 else if ((TREE_CODE (type) == RECORD_TYPE
12067 || TREE_CODE (type) == UNION_TYPE
12068 || TREE_CODE (type) == QUAL_UNION_TYPE)
12069 && TYPE_FIELDS (type))
12070 {
12071 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12072 return 64;
12073 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12074 return 128;
12075 }
12076 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12077 || TREE_CODE (type) == INTEGER_TYPE)
12078 {
0f290768 12079
a7180f70
BS
12080 if (TYPE_MODE (type) == DFmode && align < 64)
12081 return 64;
12082 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12083 return 128;
12084 }
12085 return align;
12086}
0ed08620
JH
12087\f
12088/* Emit RTL insns to initialize the variable parts of a trampoline.
12089 FNADDR is an RTX for the address of the function's pure code.
12090 CXT is an RTX for the static chain value for the function. */
12091void
b96a374d 12092x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
0ed08620
JH
12093{
12094 if (!TARGET_64BIT)
12095 {
12096 /* Compute offset from the end of the jmp to the target function. */
12097 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12098 plus_constant (tramp, 10),
12099 NULL_RTX, 1, OPTAB_DIRECT);
12100 emit_move_insn (gen_rtx_MEM (QImode, tramp),
d8bf17f9 12101 gen_int_mode (0xb9, QImode));
0ed08620
JH
12102 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12103 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
d8bf17f9 12104 gen_int_mode (0xe9, QImode));
0ed08620
JH
12105 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12106 }
12107 else
12108 {
12109 int offset = 0;
12110 /* Try to load address using shorter movl instead of movabs.
12111 We may want to support movq for kernel mode, but kernel does not use
12112 trampolines at the moment. */
8fe75e43 12113 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
0ed08620
JH
12114 {
12115 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12116 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12117 gen_int_mode (0xbb41, HImode));
0ed08620
JH
12118 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12119 gen_lowpart (SImode, fnaddr));
12120 offset += 6;
12121 }
12122 else
12123 {
12124 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12125 gen_int_mode (0xbb49, HImode));
0ed08620
JH
12126 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12127 fnaddr);
12128 offset += 10;
12129 }
12130 /* Load static chain using movabs to r10. */
12131 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12132 gen_int_mode (0xba49, HImode));
0ed08620
JH
12133 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12134 cxt);
12135 offset += 10;
12136 /* Jump to the r11 */
12137 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12138 gen_int_mode (0xff49, HImode));
0ed08620 12139 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
d8bf17f9 12140 gen_int_mode (0xe3, QImode));
0ed08620
JH
12141 offset += 3;
12142 if (offset > TRAMPOLINE_SIZE)
b531087a 12143 abort ();
0ed08620 12144 }
5791cc29 12145
e7a742ec 12146#ifdef ENABLE_EXECUTE_STACK
f84d109f 12147 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
5791cc29
JT
12148 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12149#endif
0ed08620 12150}
eeb06b1b 12151\f
6e34d3a3
JM
12152#define def_builtin(MASK, NAME, TYPE, CODE) \
12153do { \
12154 if ((MASK) & target_flags \
12155 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12156 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12157 NULL, NULL_TREE); \
eeb06b1b 12158} while (0)
bd793c65 12159
e358acde
RH
12160/* Bits for builtin_description.flag. */
12161
12162/* Set when we don't support the comparison natively, and should
12163 swap_comparison in order to support it. */
12164#define BUILTIN_DESC_SWAP_OPERANDS 1
12165
bd793c65
BS
12166struct builtin_description
12167{
8b60264b
KG
12168 const unsigned int mask;
12169 const enum insn_code icode;
12170 const char *const name;
12171 const enum ix86_builtins code;
12172 const enum rtx_code comparison;
12173 const unsigned int flag;
bd793c65
BS
12174};
12175
8b60264b 12176static const struct builtin_description bdesc_comi[] =
bd793c65 12177{
37f22004
L
12178 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12179 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12180 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12181 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12182 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12183 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12184 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12185 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12186 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12187 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12188 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12189 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
1194ca05
JH
12190 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12191 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12192 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12193 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12194 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12195 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12196 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12197 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12198 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12199 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12200 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12201 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
bd793c65
BS
12202};
12203
8b60264b 12204static const struct builtin_description bdesc_2arg[] =
bd793c65
BS
12205{
12206 /* SSE */
37f22004
L
12207 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12208 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12209 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12210 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
ef719a44
RH
12211 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12212 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12213 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12214 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12215
12216 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12217 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12218 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12219 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
e358acde 12220 BUILTIN_DESC_SWAP_OPERANDS },
ef719a44 12221 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
e358acde 12222 BUILTIN_DESC_SWAP_OPERANDS },
ef719a44
RH
12223 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12224 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
12225 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
12226 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
12227 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
e358acde 12228 BUILTIN_DESC_SWAP_OPERANDS },
ef719a44 12229 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
e358acde 12230 BUILTIN_DESC_SWAP_OPERANDS },
ef719a44
RH
12231 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
12232 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12233 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12234 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12235 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12236 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
12237 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
12238 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
12239 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
12240 BUILTIN_DESC_SWAP_OPERANDS },
12241 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
12242 BUILTIN_DESC_SWAP_OPERANDS },
12243 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
37f22004
L
12244
12245 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12246 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
ef719a44
RH
12247 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12248 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
37f22004 12249
ef719a44 12250 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
37f22004 12251 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
ef719a44
RH
12252 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12253 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
37f22004
L
12254
12255 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12256 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12257 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12258 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12259 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
bd793c65
BS
12260
12261 /* MMX */
80e8bb90
RH
12262 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12263 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12264 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
d50672ef 12265 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
80e8bb90
RH
12266 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12267 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12268 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
d50672ef 12269 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
eeb06b1b 12270
80e8bb90
RH
12271 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12272 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12273 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12274 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12275 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12276 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12277 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12278 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
eeb06b1b 12279
80e8bb90
RH
12280 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12281 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12282 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
eeb06b1b 12283
80e8bb90
RH
12284 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12285 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12286 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12287 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
eeb06b1b 12288
37f22004
L
12289 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12290 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
eeb06b1b 12291
80e8bb90
RH
12292 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12293 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12294 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12295 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12296 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12297 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
eeb06b1b 12298
80e8bb90
RH
12299 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12300 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12301 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12302 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
eeb06b1b
BS
12303
12304 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12305 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12306 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12307 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12308 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12309 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
bd793c65
BS
12310
12311 /* Special. */
eeb06b1b
BS
12312 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12313 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12314 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12315
ef719a44
RH
12316 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12317 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12318 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
eeb06b1b 12319
80e8bb90
RH
12320 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12321 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12322 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12323 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
eeb06b1b
BS
12324 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12325 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12326
80e8bb90
RH
12327 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12328 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12329 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12330 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
eeb06b1b
BS
12331 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12332 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12333
80e8bb90
RH
12334 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12335 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12336 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12337 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
eeb06b1b 12338
37f22004 12339 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
fbe5eb6d
BS
12340 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12341
12342 /* SSE2 */
12343 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12344 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12345 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12346 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
ef719a44
RH
12347 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12348 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12349 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12350 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12351
12352 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12353 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12354 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12355 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
e358acde 12356 BUILTIN_DESC_SWAP_OPERANDS },
ef719a44 12357 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
e358acde 12358 BUILTIN_DESC_SWAP_OPERANDS },
ef719a44
RH
12359 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12360 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
12361 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
12362 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
12363 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
e358acde 12364 BUILTIN_DESC_SWAP_OPERANDS },
ef719a44 12365 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
e358acde 12366 BUILTIN_DESC_SWAP_OPERANDS },
ef719a44
RH
12367 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
12368 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12369 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12370 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12371 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12372 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
12373 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
12374 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
12375 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
fbe5eb6d
BS
12376
12377 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12378 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
ef719a44
RH
12379 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12380 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
fbe5eb6d 12381
ef719a44 12382 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
1877be45 12383 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
ef719a44
RH
12384 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12385 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
fbe5eb6d
BS
12386
12387 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12388 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12389 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12390
12391 /* SSE2 MMX */
12392 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12393 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12394 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
d50672ef 12395 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
fbe5eb6d
BS
12396 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12397 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12398 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
d50672ef 12399 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
fbe5eb6d 12400
ef719a44
RH
12401 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12402 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12403 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12404 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12405 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12406 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12407 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12408 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
fbe5eb6d
BS
12409
12410 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
ef719a44 12411 { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
fbe5eb6d 12412
ef719a44 12413 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
916b60b7 12414 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
ef719a44
RH
12415 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12416 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
fbe5eb6d
BS
12417
12418 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12419 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12420
ef719a44
RH
12421 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12422 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12423 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12424 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12425 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12426 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
fbe5eb6d
BS
12427
12428 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12429 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12430 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12431 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12432
12433 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12434 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12435 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
077084dd 12436 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
fbe5eb6d
BS
12437 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12438 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12439 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
f02e1358 12440 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
fbe5eb6d 12441
916b60b7
BS
12442 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12443 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12444 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12445
ef719a44 12446 { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
916b60b7
BS
12447 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12448
9e9fb0ce
JB
12449 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
12450 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
12451
916b60b7 12452 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
916b60b7 12453 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
916b60b7
BS
12454 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12455
916b60b7 12456 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
916b60b7 12457 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
916b60b7
BS
12458 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12459
916b60b7 12460 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
916b60b7
BS
12461 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12462
12463 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12464
ef719a44
RH
12465 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12466 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
12467 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12468 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
22c7c85e 12469
9e200aaf 12470 /* SSE3 MMX */
ef719a44
RH
12471 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
12472 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
12473 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
12474 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
12475 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
12476 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
bd793c65
BS
12477};
12478
8b60264b 12479static const struct builtin_description bdesc_1arg[] =
bd793c65 12480{
37f22004
L
12481 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12482 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
fbe5eb6d 12483
37f22004 12484 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
ef719a44
RH
12485 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12486 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
fbe5eb6d 12487
ef719a44
RH
12488 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12489 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12490 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
12491 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12492 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12493 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
fbe5eb6d
BS
12494
12495 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12496 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
fbe5eb6d
BS
12497
12498 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12499
ef719a44
RH
12500 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12501 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
bd793c65 12502
ef719a44
RH
12503 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12504 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12505 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12506 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12507 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
bd793c65 12508
ef719a44 12509 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
fbe5eb6d 12510
ef719a44
RH
12511 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12512 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12513 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
12514 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
f02e1358 12515
ef719a44
RH
12516 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12517 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12518 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
22c7c85e 12519
9e200aaf 12520 /* SSE3 */
ef719a44
RH
12521 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
12522 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
12523 { MASK_SSE3, CODE_FOR_sse3_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
bd793c65
BS
12524};
12525
f6155fda 12526void
b96a374d 12527ix86_init_builtins (void)
f6155fda
SS
12528{
12529 if (TARGET_MMX)
12530 ix86_init_mmx_sse_builtins ();
12531}
12532
12533/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
bd793c65
BS
12534 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12535 builtins. */
e37af218 12536static void
b96a374d 12537ix86_init_mmx_sse_builtins (void)
bd793c65 12538{
8b60264b 12539 const struct builtin_description * d;
77ebd435 12540 size_t i;
bd793c65 12541
4a5eab38
PB
12542 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
12543 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
12544 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
a16da3ae
RH
12545 tree V2DI_type_node
12546 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
4a5eab38
PB
12547 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
12548 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
12549 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
12550 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
12551 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
12552 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
12553
bd793c65 12554 tree pchar_type_node = build_pointer_type (char_type_node);
068f5dea
JH
12555 tree pcchar_type_node = build_pointer_type (
12556 build_type_variant (char_type_node, 1, 0));
bd793c65 12557 tree pfloat_type_node = build_pointer_type (float_type_node);
068f5dea
JH
12558 tree pcfloat_type_node = build_pointer_type (
12559 build_type_variant (float_type_node, 1, 0));
bd793c65 12560 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
916b60b7 12561 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
bd793c65
BS
12562 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12563
12564 /* Comparisons. */
12565 tree int_ftype_v4sf_v4sf
b4de2f7d
AH
12566 = build_function_type_list (integer_type_node,
12567 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12568 tree v4si_ftype_v4sf_v4sf
b4de2f7d
AH
12569 = build_function_type_list (V4SI_type_node,
12570 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12571 /* MMX/SSE/integer conversions. */
bd793c65 12572 tree int_ftype_v4sf
b4de2f7d
AH
12573 = build_function_type_list (integer_type_node,
12574 V4SF_type_node, NULL_TREE);
453ee231
JH
12575 tree int64_ftype_v4sf
12576 = build_function_type_list (long_long_integer_type_node,
12577 V4SF_type_node, NULL_TREE);
bd793c65 12578 tree int_ftype_v8qi
b4de2f7d 12579 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
bd793c65 12580 tree v4sf_ftype_v4sf_int
b4de2f7d
AH
12581 = build_function_type_list (V4SF_type_node,
12582 V4SF_type_node, integer_type_node, NULL_TREE);
453ee231
JH
12583 tree v4sf_ftype_v4sf_int64
12584 = build_function_type_list (V4SF_type_node,
12585 V4SF_type_node, long_long_integer_type_node,
12586 NULL_TREE);
bd793c65 12587 tree v4sf_ftype_v4sf_v2si
b4de2f7d
AH
12588 = build_function_type_list (V4SF_type_node,
12589 V4SF_type_node, V2SI_type_node, NULL_TREE);
bd793c65 12590 tree int_ftype_v4hi_int
b4de2f7d
AH
12591 = build_function_type_list (integer_type_node,
12592 V4HI_type_node, integer_type_node, NULL_TREE);
bd793c65 12593 tree v4hi_ftype_v4hi_int_int
e7a60f56 12594 = build_function_type_list (V4HI_type_node, V4HI_type_node,
b4de2f7d
AH
12595 integer_type_node, integer_type_node,
12596 NULL_TREE);
bd793c65
BS
12597 /* Miscellaneous. */
12598 tree v8qi_ftype_v4hi_v4hi
b4de2f7d
AH
12599 = build_function_type_list (V8QI_type_node,
12600 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 12601 tree v4hi_ftype_v2si_v2si
b4de2f7d
AH
12602 = build_function_type_list (V4HI_type_node,
12603 V2SI_type_node, V2SI_type_node, NULL_TREE);
bd793c65 12604 tree v4sf_ftype_v4sf_v4sf_int
b4de2f7d
AH
12605 = build_function_type_list (V4SF_type_node,
12606 V4SF_type_node, V4SF_type_node,
12607 integer_type_node, NULL_TREE);
bd793c65 12608 tree v2si_ftype_v4hi_v4hi
b4de2f7d
AH
12609 = build_function_type_list (V2SI_type_node,
12610 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 12611 tree v4hi_ftype_v4hi_int
b4de2f7d 12612 = build_function_type_list (V4HI_type_node,
e7a60f56 12613 V4HI_type_node, integer_type_node, NULL_TREE);
bd793c65 12614 tree v4hi_ftype_v4hi_di
b4de2f7d
AH
12615 = build_function_type_list (V4HI_type_node,
12616 V4HI_type_node, long_long_unsigned_type_node,
12617 NULL_TREE);
bd793c65 12618 tree v2si_ftype_v2si_di
b4de2f7d
AH
12619 = build_function_type_list (V2SI_type_node,
12620 V2SI_type_node, long_long_unsigned_type_node,
12621 NULL_TREE);
bd793c65 12622 tree void_ftype_void
b4de2f7d 12623 = build_function_type (void_type_node, void_list_node);
bd793c65 12624 tree void_ftype_unsigned
b4de2f7d 12625 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
22c7c85e
L
12626 tree void_ftype_unsigned_unsigned
12627 = build_function_type_list (void_type_node, unsigned_type_node,
12628 unsigned_type_node, NULL_TREE);
12629 tree void_ftype_pcvoid_unsigned_unsigned
12630 = build_function_type_list (void_type_node, const_ptr_type_node,
12631 unsigned_type_node, unsigned_type_node,
12632 NULL_TREE);
bd793c65 12633 tree unsigned_ftype_void
b4de2f7d 12634 = build_function_type (unsigned_type_node, void_list_node);
bd793c65 12635 tree di_ftype_void
b4de2f7d 12636 = build_function_type (long_long_unsigned_type_node, void_list_node);
e37af218 12637 tree v4sf_ftype_void
b4de2f7d 12638 = build_function_type (V4SF_type_node, void_list_node);
bd793c65 12639 tree v2si_ftype_v4sf
b4de2f7d 12640 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12641 /* Loads/stores. */
bd793c65 12642 tree void_ftype_v8qi_v8qi_pchar
b4de2f7d
AH
12643 = build_function_type_list (void_type_node,
12644 V8QI_type_node, V8QI_type_node,
12645 pchar_type_node, NULL_TREE);
068f5dea
JH
12646 tree v4sf_ftype_pcfloat
12647 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
bd793c65
BS
12648 /* @@@ the type is bogus */
12649 tree v4sf_ftype_v4sf_pv2si
b4de2f7d 12650 = build_function_type_list (V4SF_type_node,
f8ca7923 12651 V4SF_type_node, pv2si_type_node, NULL_TREE);
1255c85c 12652 tree void_ftype_pv2si_v4sf
b4de2f7d 12653 = build_function_type_list (void_type_node,
f8ca7923 12654 pv2si_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12655 tree void_ftype_pfloat_v4sf
b4de2f7d
AH
12656 = build_function_type_list (void_type_node,
12657 pfloat_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12658 tree void_ftype_pdi_di
b4de2f7d
AH
12659 = build_function_type_list (void_type_node,
12660 pdi_type_node, long_long_unsigned_type_node,
12661 NULL_TREE);
916b60b7 12662 tree void_ftype_pv2di_v2di
b4de2f7d
AH
12663 = build_function_type_list (void_type_node,
12664 pv2di_type_node, V2DI_type_node, NULL_TREE);
bd793c65
BS
12665 /* Normal vector unops. */
12666 tree v4sf_ftype_v4sf
b4de2f7d 12667 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
0f290768 12668
bd793c65
BS
12669 /* Normal vector binops. */
12670 tree v4sf_ftype_v4sf_v4sf
b4de2f7d
AH
12671 = build_function_type_list (V4SF_type_node,
12672 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12673 tree v8qi_ftype_v8qi_v8qi
b4de2f7d
AH
12674 = build_function_type_list (V8QI_type_node,
12675 V8QI_type_node, V8QI_type_node, NULL_TREE);
bd793c65 12676 tree v4hi_ftype_v4hi_v4hi
b4de2f7d
AH
12677 = build_function_type_list (V4HI_type_node,
12678 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 12679 tree v2si_ftype_v2si_v2si
b4de2f7d
AH
12680 = build_function_type_list (V2SI_type_node,
12681 V2SI_type_node, V2SI_type_node, NULL_TREE);
bd793c65 12682 tree di_ftype_di_di
b4de2f7d
AH
12683 = build_function_type_list (long_long_unsigned_type_node,
12684 long_long_unsigned_type_node,
12685 long_long_unsigned_type_node, NULL_TREE);
bd793c65 12686
47f339cf 12687 tree v2si_ftype_v2sf
ae3aa00d 12688 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
47f339cf 12689 tree v2sf_ftype_v2si
b4de2f7d 12690 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
47f339cf 12691 tree v2si_ftype_v2si
b4de2f7d 12692 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
47f339cf 12693 tree v2sf_ftype_v2sf
b4de2f7d 12694 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 12695 tree v2sf_ftype_v2sf_v2sf
b4de2f7d
AH
12696 = build_function_type_list (V2SF_type_node,
12697 V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 12698 tree v2si_ftype_v2sf_v2sf
b4de2f7d
AH
12699 = build_function_type_list (V2SI_type_node,
12700 V2SF_type_node, V2SF_type_node, NULL_TREE);
fbe5eb6d 12701 tree pint_type_node = build_pointer_type (integer_type_node);
068f5dea
JH
12702 tree pcint_type_node = build_pointer_type (
12703 build_type_variant (integer_type_node, 1, 0));
fbe5eb6d 12704 tree pdouble_type_node = build_pointer_type (double_type_node);
068f5dea
JH
12705 tree pcdouble_type_node = build_pointer_type (
12706 build_type_variant (double_type_node, 1, 0));
fbe5eb6d 12707 tree int_ftype_v2df_v2df
b4de2f7d
AH
12708 = build_function_type_list (integer_type_node,
12709 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d
BS
12710
12711 tree ti_ftype_void
b4de2f7d 12712 = build_function_type (intTI_type_node, void_list_node);
f02e1358
JH
12713 tree v2di_ftype_void
12714 = build_function_type (V2DI_type_node, void_list_node);
fbe5eb6d 12715 tree ti_ftype_ti_ti
b4de2f7d
AH
12716 = build_function_type_list (intTI_type_node,
12717 intTI_type_node, intTI_type_node, NULL_TREE);
068f5dea
JH
12718 tree void_ftype_pcvoid
12719 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
fbe5eb6d 12720 tree v2di_ftype_di
b4de2f7d
AH
12721 = build_function_type_list (V2DI_type_node,
12722 long_long_unsigned_type_node, NULL_TREE);
f02e1358
JH
12723 tree di_ftype_v2di
12724 = build_function_type_list (long_long_unsigned_type_node,
12725 V2DI_type_node, NULL_TREE);
fbe5eb6d 12726 tree v4sf_ftype_v4si
b4de2f7d 12727 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 12728 tree v4si_ftype_v4sf
b4de2f7d 12729 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 12730 tree v2df_ftype_v4si
b4de2f7d 12731 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 12732 tree v4si_ftype_v2df
b4de2f7d 12733 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12734 tree v2si_ftype_v2df
b4de2f7d 12735 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12736 tree v4sf_ftype_v2df
b4de2f7d 12737 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12738 tree v2df_ftype_v2si
b4de2f7d 12739 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
fbe5eb6d 12740 tree v2df_ftype_v4sf
b4de2f7d 12741 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 12742 tree int_ftype_v2df
b4de2f7d 12743 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
453ee231
JH
12744 tree int64_ftype_v2df
12745 = build_function_type_list (long_long_integer_type_node,
b96a374d 12746 V2DF_type_node, NULL_TREE);
fbe5eb6d 12747 tree v2df_ftype_v2df_int
b4de2f7d
AH
12748 = build_function_type_list (V2DF_type_node,
12749 V2DF_type_node, integer_type_node, NULL_TREE);
453ee231
JH
12750 tree v2df_ftype_v2df_int64
12751 = build_function_type_list (V2DF_type_node,
12752 V2DF_type_node, long_long_integer_type_node,
12753 NULL_TREE);
fbe5eb6d 12754 tree v4sf_ftype_v4sf_v2df
b4de2f7d
AH
12755 = build_function_type_list (V4SF_type_node,
12756 V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12757 tree v2df_ftype_v2df_v4sf
b4de2f7d
AH
12758 = build_function_type_list (V2DF_type_node,
12759 V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 12760 tree v2df_ftype_v2df_v2df_int
b4de2f7d
AH
12761 = build_function_type_list (V2DF_type_node,
12762 V2DF_type_node, V2DF_type_node,
12763 integer_type_node,
12764 NULL_TREE);
1c47af84 12765 tree v2df_ftype_v2df_pcdouble
b4de2f7d 12766 = build_function_type_list (V2DF_type_node,
1c47af84 12767 V2DF_type_node, pcdouble_type_node, NULL_TREE);
fbe5eb6d 12768 tree void_ftype_pdouble_v2df
b4de2f7d
AH
12769 = build_function_type_list (void_type_node,
12770 pdouble_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12771 tree void_ftype_pint_int
b4de2f7d
AH
12772 = build_function_type_list (void_type_node,
12773 pint_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 12774 tree void_ftype_v16qi_v16qi_pchar
b4de2f7d
AH
12775 = build_function_type_list (void_type_node,
12776 V16QI_type_node, V16QI_type_node,
12777 pchar_type_node, NULL_TREE);
068f5dea
JH
12778 tree v2df_ftype_pcdouble
12779 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
fbe5eb6d 12780 tree v2df_ftype_v2df_v2df
b4de2f7d
AH
12781 = build_function_type_list (V2DF_type_node,
12782 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12783 tree v16qi_ftype_v16qi_v16qi
b4de2f7d
AH
12784 = build_function_type_list (V16QI_type_node,
12785 V16QI_type_node, V16QI_type_node, NULL_TREE);
fbe5eb6d 12786 tree v8hi_ftype_v8hi_v8hi
b4de2f7d
AH
12787 = build_function_type_list (V8HI_type_node,
12788 V8HI_type_node, V8HI_type_node, NULL_TREE);
fbe5eb6d 12789 tree v4si_ftype_v4si_v4si
b4de2f7d
AH
12790 = build_function_type_list (V4SI_type_node,
12791 V4SI_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 12792 tree v2di_ftype_v2di_v2di
b4de2f7d
AH
12793 = build_function_type_list (V2DI_type_node,
12794 V2DI_type_node, V2DI_type_node, NULL_TREE);
fbe5eb6d 12795 tree v2di_ftype_v2df_v2df
b4de2f7d
AH
12796 = build_function_type_list (V2DI_type_node,
12797 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12798 tree v2df_ftype_v2df
b4de2f7d 12799 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12800 tree v2df_ftype_double
b4de2f7d 12801 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
fbe5eb6d 12802 tree v2df_ftype_double_double
b4de2f7d
AH
12803 = build_function_type_list (V2DF_type_node,
12804 double_type_node, double_type_node, NULL_TREE);
fbe5eb6d 12805 tree int_ftype_v8hi_int
b4de2f7d
AH
12806 = build_function_type_list (integer_type_node,
12807 V8HI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 12808 tree v8hi_ftype_v8hi_int_int
b4de2f7d
AH
12809 = build_function_type_list (V8HI_type_node,
12810 V8HI_type_node, integer_type_node,
12811 integer_type_node, NULL_TREE);
916b60b7 12812 tree v2di_ftype_v2di_int
b4de2f7d
AH
12813 = build_function_type_list (V2DI_type_node,
12814 V2DI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 12815 tree v4si_ftype_v4si_int
b4de2f7d
AH
12816 = build_function_type_list (V4SI_type_node,
12817 V4SI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 12818 tree v8hi_ftype_v8hi_int
b4de2f7d
AH
12819 = build_function_type_list (V8HI_type_node,
12820 V8HI_type_node, integer_type_node, NULL_TREE);
916b60b7 12821 tree v8hi_ftype_v8hi_v2di
b4de2f7d
AH
12822 = build_function_type_list (V8HI_type_node,
12823 V8HI_type_node, V2DI_type_node, NULL_TREE);
916b60b7 12824 tree v4si_ftype_v4si_v2di
b4de2f7d
AH
12825 = build_function_type_list (V4SI_type_node,
12826 V4SI_type_node, V2DI_type_node, NULL_TREE);
916b60b7 12827 tree v4si_ftype_v8hi_v8hi
b4de2f7d
AH
12828 = build_function_type_list (V4SI_type_node,
12829 V8HI_type_node, V8HI_type_node, NULL_TREE);
916b60b7 12830 tree di_ftype_v8qi_v8qi
b4de2f7d
AH
12831 = build_function_type_list (long_long_unsigned_type_node,
12832 V8QI_type_node, V8QI_type_node, NULL_TREE);
9e9fb0ce
JB
12833 tree di_ftype_v2si_v2si
12834 = build_function_type_list (long_long_unsigned_type_node,
12835 V2SI_type_node, V2SI_type_node, NULL_TREE);
916b60b7 12836 tree v2di_ftype_v16qi_v16qi
b4de2f7d
AH
12837 = build_function_type_list (V2DI_type_node,
12838 V16QI_type_node, V16QI_type_node, NULL_TREE);
9e9fb0ce
JB
12839 tree v2di_ftype_v4si_v4si
12840 = build_function_type_list (V2DI_type_node,
12841 V4SI_type_node, V4SI_type_node, NULL_TREE);
916b60b7 12842 tree int_ftype_v16qi
b4de2f7d 12843 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
068f5dea
JH
12844 tree v16qi_ftype_pcchar
12845 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
f02e1358
JH
12846 tree void_ftype_pchar_v16qi
12847 = build_function_type_list (void_type_node,
12848 pchar_type_node, V16QI_type_node, NULL_TREE);
068f5dea
JH
12849 tree v4si_ftype_pcint
12850 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
12851 tree void_ftype_pcint_v4si
f02e1358 12852 = build_function_type_list (void_type_node,
068f5dea 12853 pcint_type_node, V4SI_type_node, NULL_TREE);
f02e1358
JH
12854 tree v2di_ftype_v2di
12855 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
47f339cf 12856
f8a1ebc6
JH
12857 tree float80_type;
12858 tree float128_type;
12859
12860 /* The __float80 type. */
12861 if (TYPE_MODE (long_double_type_node) == XFmode)
12862 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
12863 "__float80");
12864 else
12865 {
12866 /* The __float80 type. */
12867 float80_type = make_node (REAL_TYPE);
968a7562 12868 TYPE_PRECISION (float80_type) = 80;
f8a1ebc6
JH
12869 layout_type (float80_type);
12870 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
12871 }
12872
12873 float128_type = make_node (REAL_TYPE);
12874 TYPE_PRECISION (float128_type) = 128;
12875 layout_type (float128_type);
12876 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
12877
bd793c65
BS
12878 /* Add all builtins that are more or less simple operations on two
12879 operands. */
ca7558fc 12880 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
12881 {
12882 /* Use one of the operands; the target can have a different mode for
12883 mask-generating compares. */
12884 enum machine_mode mode;
12885 tree type;
12886
12887 if (d->name == 0)
12888 continue;
12889 mode = insn_data[d->icode].operand[1].mode;
12890
bd793c65
BS
12891 switch (mode)
12892 {
fbe5eb6d
BS
12893 case V16QImode:
12894 type = v16qi_ftype_v16qi_v16qi;
12895 break;
12896 case V8HImode:
12897 type = v8hi_ftype_v8hi_v8hi;
12898 break;
12899 case V4SImode:
12900 type = v4si_ftype_v4si_v4si;
12901 break;
12902 case V2DImode:
12903 type = v2di_ftype_v2di_v2di;
12904 break;
12905 case V2DFmode:
12906 type = v2df_ftype_v2df_v2df;
12907 break;
12908 case TImode:
12909 type = ti_ftype_ti_ti;
12910 break;
bd793c65
BS
12911 case V4SFmode:
12912 type = v4sf_ftype_v4sf_v4sf;
12913 break;
12914 case V8QImode:
12915 type = v8qi_ftype_v8qi_v8qi;
12916 break;
12917 case V4HImode:
12918 type = v4hi_ftype_v4hi_v4hi;
12919 break;
12920 case V2SImode:
12921 type = v2si_ftype_v2si_v2si;
12922 break;
bd793c65
BS
12923 case DImode:
12924 type = di_ftype_di_di;
12925 break;
12926
12927 default:
12928 abort ();
12929 }
0f290768 12930
bd793c65 12931 /* Override for comparisons. */
ef719a44
RH
12932 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
12933 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
bd793c65
BS
12934 type = v4si_ftype_v4sf_v4sf;
12935
ef719a44
RH
12936 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
12937 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
fbe5eb6d
BS
12938 type = v2di_ftype_v2df_v2df;
12939
eeb06b1b 12940 def_builtin (d->mask, d->name, type, d->code);
bd793c65
BS
12941 }
12942
12943 /* Add the remaining MMX insns with somewhat more complicated types. */
eeb06b1b
BS
12944 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12945 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
eeb06b1b
BS
12946 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12947 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12948 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12949
12950 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12951 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12952 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12953
12954 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12955 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12956
12957 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12958 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
0f290768 12959
bd793c65 12960 /* comi/ucomi insns. */
ca7558fc 12961 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
fbe5eb6d
BS
12962 if (d->mask == MASK_SSE2)
12963 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12964 else
12965 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
bd793c65 12966
1255c85c
BS
12967 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12968 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12969 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
bd793c65 12970
37f22004
L
12971 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12972 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12973 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12974 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12975 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12976 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
12977 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12978 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
12979 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12980 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
12981 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
12982
12983 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
12984 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
12985
12986 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
12987
12988 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
12989 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
12990 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
12991 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
12992 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
12993 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
12994
12995 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
12996 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
12997 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
12998 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
12999
13000 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13001 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13002 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13003 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13004
13005 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13006
13007 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13008
13009 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13010 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13011 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13012 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13013 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13014 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13015
13016 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
bd793c65 13017
47f339cf
BS
13018 /* Original 3DNow! */
13019 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13020 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13021 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13022 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13023 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13024 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13025 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13026 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13027 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13028 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13029 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13030 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13031 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13032 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13033 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13034 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13035 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13036 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13037 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13038 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
47f339cf
BS
13039
13040 /* 3DNow! extension as used in the Athlon CPU. */
13041 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13042 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13043 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13044 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13045 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13046 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13047
37f22004 13048 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
fbe5eb6d
BS
13049
13050 /* SSE2 */
13051 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13052 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13053
13054 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13055 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
f02e1358 13056 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
fbe5eb6d 13057
068f5dea
JH
13058 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13059 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13060 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
fbe5eb6d
BS
13061 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13062 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13063 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13064
1c47af84
RH
13065 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
13066 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
13067 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREHPD);
13068 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORELPD);
fbe5eb6d
BS
13069
13070 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
916b60b7 13071 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
fbe5eb6d
BS
13072 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13073 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
916b60b7 13074 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
fbe5eb6d
BS
13075
13076 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13077 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13078 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
916b60b7 13079 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
fbe5eb6d
BS
13080
13081 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13082 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13083
13084 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13085
13086 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
916b60b7 13087 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
fbe5eb6d
BS
13088
13089 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13090 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13091 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13092 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13093 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13094
13095 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13096
13097 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13098 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
37f22004
L
13099 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13100 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
fbe5eb6d
BS
13101
13102 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13103 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13104 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13105
13106 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
37f22004 13107 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
fbe5eb6d
BS
13108 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13109 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13110
13111 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13112 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13113 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
068f5dea
JH
13114 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13115 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
fbe5eb6d
BS
13116 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13117 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13118
068f5dea 13119 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
fbe5eb6d
BS
13120 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13121 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
916b60b7 13122
068f5dea
JH
13123 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13124 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13125 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
f02e1358
JH
13126 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13127 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
068f5dea 13128 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
f02e1358
JH
13129 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13130
37f22004 13131 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
f02e1358 13132
9e9fb0ce
JB
13133 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
13134 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
13135
916b60b7
BS
13136 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13137 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13138 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13139
13140 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13141 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13142 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13143
13144 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13145 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13146
ab3146fd 13147 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
916b60b7
BS
13148 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13149 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13150 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13151
ab3146fd 13152 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
916b60b7
BS
13153 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13154 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13155 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13156
13157 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13158 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13159
13160 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
22c7c85e
L
13161
13162 /* Prescott New Instructions. */
9e200aaf 13163 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
22c7c85e
L
13164 void_ftype_pcvoid_unsigned_unsigned,
13165 IX86_BUILTIN_MONITOR);
9e200aaf 13166 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
22c7c85e
L
13167 void_ftype_unsigned_unsigned,
13168 IX86_BUILTIN_MWAIT);
9e200aaf 13169 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
22c7c85e
L
13170 v4sf_ftype_v4sf,
13171 IX86_BUILTIN_MOVSHDUP);
9e200aaf 13172 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
22c7c85e
L
13173 v4sf_ftype_v4sf,
13174 IX86_BUILTIN_MOVSLDUP);
9e200aaf 13175 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
22c7c85e 13176 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
9e200aaf 13177 def_builtin (MASK_SSE3, "__builtin_ia32_loadddup",
22c7c85e 13178 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
9e200aaf 13179 def_builtin (MASK_SSE3, "__builtin_ia32_movddup",
22c7c85e 13180 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
bd793c65
BS
13181}
13182
13183/* Errors in the source file can cause expand_expr to return const0_rtx
13184 where we expect a vector. To avoid crashing, use one of the vector
13185 clear instructions. */
13186static rtx
b96a374d 13187safe_vector_operand (rtx x, enum machine_mode mode)
bd793c65 13188{
ef719a44
RH
13189 if (x == const0_rtx)
13190 x = CONST0_RTX (mode);
bd793c65
BS
13191 return x;
13192}
13193
13194/* Subroutine of ix86_expand_builtin to take care of binop insns. */
13195
13196static rtx
b96a374d 13197ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
bd793c65 13198{
ef719a44 13199 rtx pat, xops[3];
bd793c65
BS
13200 tree arg0 = TREE_VALUE (arglist);
13201 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13202 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13203 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13204 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13205 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13206 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13207
13208 if (VECTOR_MODE_P (mode0))
13209 op0 = safe_vector_operand (op0, mode0);
13210 if (VECTOR_MODE_P (mode1))
13211 op1 = safe_vector_operand (op1, mode1);
13212
e358acde 13213 if (optimize || !target
bd793c65
BS
13214 || GET_MODE (target) != tmode
13215 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13216 target = gen_reg_rtx (tmode);
13217
d9deed68
JH
13218 if (GET_MODE (op1) == SImode && mode1 == TImode)
13219 {
13220 rtx x = gen_reg_rtx (V4SImode);
13221 emit_insn (gen_sse2_loadd (x, op1));
13222 op1 = gen_lowpart (TImode, x);
13223 }
13224
bd793c65
BS
13225 /* In case the insn wants input operands in modes different from
13226 the result, abort. */
ebe75517
JH
13227 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13228 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
bd793c65
BS
13229 abort ();
13230
ef719a44 13231 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
bd793c65 13232 op0 = copy_to_mode_reg (mode0, op0);
ef719a44 13233 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
bd793c65
BS
13234 op1 = copy_to_mode_reg (mode1, op1);
13235
ef719a44
RH
13236 xops[0] = target;
13237 xops[1] = op0;
13238 xops[2] = op1;
13239 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
59bef189 13240
ef719a44 13241 pat = GEN_FCN (icode) (target, xops[1], xops[2]);
bd793c65
BS
13242 if (! pat)
13243 return 0;
13244 emit_insn (pat);
13245 return target;
13246}
13247
13248/* Subroutine of ix86_expand_builtin to take care of stores. */
13249
13250static rtx
b96a374d 13251ix86_expand_store_builtin (enum insn_code icode, tree arglist)
bd793c65
BS
13252{
13253 rtx pat;
13254 tree arg0 = TREE_VALUE (arglist);
13255 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13256 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13257 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13258 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13259 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13260
13261 if (VECTOR_MODE_P (mode1))
13262 op1 = safe_vector_operand (op1, mode1);
13263
13264 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
7f0e57bd 13265 op1 = copy_to_mode_reg (mode1, op1);
59bef189 13266
bd793c65
BS
13267 pat = GEN_FCN (icode) (op0, op1);
13268 if (pat)
13269 emit_insn (pat);
13270 return 0;
13271}
13272
13273/* Subroutine of ix86_expand_builtin to take care of unop insns. */
13274
13275static rtx
b96a374d
AJ
13276ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13277 rtx target, int do_load)
bd793c65
BS
13278{
13279 rtx pat;
13280 tree arg0 = TREE_VALUE (arglist);
13281 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13282 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13283 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13284
e358acde 13285 if (optimize || !target
bd793c65
BS
13286 || GET_MODE (target) != tmode
13287 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13288 target = gen_reg_rtx (tmode);
13289 if (do_load)
13290 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13291 else
13292 {
13293 if (VECTOR_MODE_P (mode0))
13294 op0 = safe_vector_operand (op0, mode0);
13295
e358acde
RH
13296 if ((optimize && !register_operand (op0, mode0))
13297 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
bd793c65
BS
13298 op0 = copy_to_mode_reg (mode0, op0);
13299 }
13300
13301 pat = GEN_FCN (icode) (target, op0);
13302 if (! pat)
13303 return 0;
13304 emit_insn (pat);
13305 return target;
13306}
13307
13308/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13309 sqrtss, rsqrtss, rcpss. */
13310
13311static rtx
b96a374d 13312ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
bd793c65
BS
13313{
13314 rtx pat;
13315 tree arg0 = TREE_VALUE (arglist);
59bef189 13316 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
bd793c65
BS
13317 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13318 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13319
e358acde 13320 if (optimize || !target
bd793c65
BS
13321 || GET_MODE (target) != tmode
13322 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13323 target = gen_reg_rtx (tmode);
13324
13325 if (VECTOR_MODE_P (mode0))
13326 op0 = safe_vector_operand (op0, mode0);
13327
e358acde
RH
13328 if ((optimize && !register_operand (op0, mode0))
13329 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
bd793c65 13330 op0 = copy_to_mode_reg (mode0, op0);
fce5a9f2 13331
59bef189
RH
13332 op1 = op0;
13333 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13334 op1 = copy_to_mode_reg (mode0, op1);
fce5a9f2 13335
59bef189 13336 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
13337 if (! pat)
13338 return 0;
13339 emit_insn (pat);
13340 return target;
13341}
13342
13343/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13344
13345static rtx
b96a374d
AJ
13346ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13347 rtx target)
bd793c65
BS
13348{
13349 rtx pat;
13350 tree arg0 = TREE_VALUE (arglist);
13351 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13352 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13353 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13354 rtx op2;
13355 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13356 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13357 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13358 enum rtx_code comparison = d->comparison;
13359
13360 if (VECTOR_MODE_P (mode0))
13361 op0 = safe_vector_operand (op0, mode0);
13362 if (VECTOR_MODE_P (mode1))
13363 op1 = safe_vector_operand (op1, mode1);
13364
13365 /* Swap operands if we have a comparison that isn't available in
13366 hardware. */
e358acde 13367 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
bd793c65 13368 {
21e1b5f1
BS
13369 rtx tmp = gen_reg_rtx (mode1);
13370 emit_move_insn (tmp, op1);
bd793c65 13371 op1 = op0;
21e1b5f1 13372 op0 = tmp;
bd793c65 13373 }
21e1b5f1 13374
e358acde 13375 if (optimize || !target
21e1b5f1
BS
13376 || GET_MODE (target) != tmode
13377 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
bd793c65
BS
13378 target = gen_reg_rtx (tmode);
13379
e358acde
RH
13380 if ((optimize && !register_operand (op0, mode0))
13381 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
bd793c65 13382 op0 = copy_to_mode_reg (mode0, op0);
e358acde
RH
13383 if ((optimize && !register_operand (op1, mode1))
13384 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
bd793c65
BS
13385 op1 = copy_to_mode_reg (mode1, op1);
13386
13387 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13388 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13389 if (! pat)
13390 return 0;
13391 emit_insn (pat);
13392 return target;
13393}
13394
13395/* Subroutine of ix86_expand_builtin to take care of comi insns. */
13396
13397static rtx
b96a374d
AJ
13398ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
13399 rtx target)
bd793c65
BS
13400{
13401 rtx pat;
13402 tree arg0 = TREE_VALUE (arglist);
13403 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13404 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13405 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13406 rtx op2;
13407 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13408 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13409 enum rtx_code comparison = d->comparison;
13410
13411 if (VECTOR_MODE_P (mode0))
13412 op0 = safe_vector_operand (op0, mode0);
13413 if (VECTOR_MODE_P (mode1))
13414 op1 = safe_vector_operand (op1, mode1);
13415
13416 /* Swap operands if we have a comparison that isn't available in
13417 hardware. */
e358acde 13418 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
bd793c65
BS
13419 {
13420 rtx tmp = op1;
13421 op1 = op0;
13422 op0 = tmp;
bd793c65
BS
13423 }
13424
13425 target = gen_reg_rtx (SImode);
13426 emit_move_insn (target, const0_rtx);
13427 target = gen_rtx_SUBREG (QImode, target, 0);
13428
e358acde
RH
13429 if ((optimize && !register_operand (op0, mode0))
13430 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
bd793c65 13431 op0 = copy_to_mode_reg (mode0, op0);
e358acde
RH
13432 if ((optimize && !register_operand (op1, mode1))
13433 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
bd793c65
BS
13434 op1 = copy_to_mode_reg (mode1, op1);
13435
13436 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
1194ca05 13437 pat = GEN_FCN (d->icode) (op0, op1);
bd793c65
BS
13438 if (! pat)
13439 return 0;
13440 emit_insn (pat);
29628f27
BS
13441 emit_insn (gen_rtx_SET (VOIDmode,
13442 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13443 gen_rtx_fmt_ee (comparison, QImode,
1194ca05 13444 SET_DEST (pat),
29628f27 13445 const0_rtx)));
bd793c65 13446
6f1a6c5b 13447 return SUBREG_REG (target);
bd793c65
BS
13448}
13449
13450/* Expand an expression EXP that calls a built-in function,
13451 with result going to TARGET if that's convenient
13452 (and in mode MODE if that's convenient).
13453 SUBTARGET may be used as the target for computing one of EXP's operands.
13454 IGNORE is nonzero if the value is to be ignored. */
13455
13456rtx
b96a374d
AJ
13457ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
13458 enum machine_mode mode ATTRIBUTE_UNUSED,
13459 int ignore ATTRIBUTE_UNUSED)
bd793c65 13460{
8b60264b 13461 const struct builtin_description *d;
77ebd435 13462 size_t i;
bd793c65
BS
13463 enum insn_code icode;
13464 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13465 tree arglist = TREE_OPERAND (exp, 1);
e37af218 13466 tree arg0, arg1, arg2;
bd793c65
BS
13467 rtx op0, op1, op2, pat;
13468 enum machine_mode tmode, mode0, mode1, mode2;
8752c357 13469 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
bd793c65
BS
13470
13471 switch (fcode)
13472 {
13473 case IX86_BUILTIN_EMMS:
80e8bb90 13474 emit_insn (gen_mmx_emms ());
bd793c65
BS
13475 return 0;
13476
13477 case IX86_BUILTIN_SFENCE:
80e8bb90 13478 emit_insn (gen_sse_sfence ());
bd793c65
BS
13479 return 0;
13480
bd793c65 13481 case IX86_BUILTIN_PEXTRW:
fbe5eb6d
BS
13482 case IX86_BUILTIN_PEXTRW128:
13483 icode = (fcode == IX86_BUILTIN_PEXTRW
13484 ? CODE_FOR_mmx_pextrw
13485 : CODE_FOR_sse2_pextrw);
bd793c65
BS
13486 arg0 = TREE_VALUE (arglist);
13487 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13488 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13489 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13490 tmode = insn_data[icode].operand[0].mode;
13491 mode0 = insn_data[icode].operand[1].mode;
13492 mode1 = insn_data[icode].operand[2].mode;
13493
13494 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13495 op0 = copy_to_mode_reg (mode0, op0);
13496 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13497 {
ebe75517
JH
13498 error ("selector must be an integer constant in the range 0..%i",
13499 fcode == IX86_BUILTIN_PEXTRW ? 3:7);
6f1a6c5b 13500 return gen_reg_rtx (tmode);
bd793c65
BS
13501 }
13502 if (target == 0
13503 || GET_MODE (target) != tmode
13504 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13505 target = gen_reg_rtx (tmode);
13506 pat = GEN_FCN (icode) (target, op0, op1);
13507 if (! pat)
13508 return 0;
13509 emit_insn (pat);
13510 return target;
13511
13512 case IX86_BUILTIN_PINSRW:
fbe5eb6d
BS
13513 case IX86_BUILTIN_PINSRW128:
13514 icode = (fcode == IX86_BUILTIN_PINSRW
13515 ? CODE_FOR_mmx_pinsrw
13516 : CODE_FOR_sse2_pinsrw);
bd793c65
BS
13517 arg0 = TREE_VALUE (arglist);
13518 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13519 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13520 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13521 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13522 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13523 tmode = insn_data[icode].operand[0].mode;
13524 mode0 = insn_data[icode].operand[1].mode;
13525 mode1 = insn_data[icode].operand[2].mode;
13526 mode2 = insn_data[icode].operand[3].mode;
13527
13528 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13529 op0 = copy_to_mode_reg (mode0, op0);
e358acde
RH
13530 if ((optimize && !register_operand (op1, mode1))
13531 || ! (*insn_data[icode].operand[2].predicate) (op1, mode1))
bd793c65
BS
13532 op1 = copy_to_mode_reg (mode1, op1);
13533 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13534 {
ebe75517 13535 error ("selector must be an integer constant in the range 0..%i",
0703dceb 13536 fcode == IX86_BUILTIN_PINSRW ? 3:7);
bd793c65
BS
13537 return const0_rtx;
13538 }
13539 if (target == 0
13540 || GET_MODE (target) != tmode
13541 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13542 target = gen_reg_rtx (tmode);
13543 pat = GEN_FCN (icode) (target, op0, op1, op2);
13544 if (! pat)
13545 return 0;
13546 emit_insn (pat);
13547 return target;
13548
13549 case IX86_BUILTIN_MASKMOVQ:
077084dd 13550 case IX86_BUILTIN_MASKMOVDQU:
fbe5eb6d 13551 icode = (fcode == IX86_BUILTIN_MASKMOVQ
80e8bb90 13552 ? CODE_FOR_mmx_maskmovq
ef719a44 13553 : CODE_FOR_sse2_maskmovdqu);
bd793c65
BS
13554 /* Note the arg order is different from the operand order. */
13555 arg1 = TREE_VALUE (arglist);
13556 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13557 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13558 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13559 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13560 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13561 mode0 = insn_data[icode].operand[0].mode;
13562 mode1 = insn_data[icode].operand[1].mode;
13563 mode2 = insn_data[icode].operand[2].mode;
13564
80e8bb90
RH
13565 op0 = force_reg (Pmode, op0);
13566 op0 = gen_rtx_MEM (mode1, op0);
ef719a44 13567
5c464583 13568 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
bd793c65
BS
13569 op0 = copy_to_mode_reg (mode0, op0);
13570 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13571 op1 = copy_to_mode_reg (mode1, op1);
13572 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13573 op2 = copy_to_mode_reg (mode2, op2);
13574 pat = GEN_FCN (icode) (op0, op1, op2);
13575 if (! pat)
13576 return 0;
13577 emit_insn (pat);
13578 return 0;
13579
13580 case IX86_BUILTIN_SQRTSS:
ef719a44 13581 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
bd793c65 13582 case IX86_BUILTIN_RSQRTSS:
ef719a44 13583 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
bd793c65 13584 case IX86_BUILTIN_RCPSS:
ef719a44 13585 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
bd793c65
BS
13586
13587 case IX86_BUILTIN_LOADAPS:
ef719a44 13588 return ix86_expand_unop_builtin (CODE_FOR_movv4sf, arglist, target, 1);
bd793c65
BS
13589
13590 case IX86_BUILTIN_LOADUPS:
13591 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13592
13593 case IX86_BUILTIN_STOREAPS:
ef719a44 13594 return ix86_expand_store_builtin (CODE_FOR_movv4sf, arglist);
f02e1358 13595
bd793c65 13596 case IX86_BUILTIN_STOREUPS:
e37af218 13597 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
bd793c65
BS
13598
13599 case IX86_BUILTIN_LOADSS:
13600 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13601
13602 case IX86_BUILTIN_STORESS:
e37af218 13603 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
bd793c65 13604
0f290768 13605 case IX86_BUILTIN_LOADHPS:
bd793c65 13606 case IX86_BUILTIN_LOADLPS:
fbe5eb6d
BS
13607 case IX86_BUILTIN_LOADHPD:
13608 case IX86_BUILTIN_LOADLPD:
2cdb3148
RH
13609 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
13610 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
1c47af84
RH
13611 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
13612 : CODE_FOR_sse2_loadlpd);
bd793c65
BS
13613 arg0 = TREE_VALUE (arglist);
13614 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13615 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13616 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13617 tmode = insn_data[icode].operand[0].mode;
13618 mode0 = insn_data[icode].operand[1].mode;
13619 mode1 = insn_data[icode].operand[2].mode;
13620
e358acde 13621 op0 = force_reg (mode0, op0);
bd793c65 13622 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
e358acde 13623 if (optimize || target == 0
bd793c65 13624 || GET_MODE (target) != tmode
e358acde 13625 || !register_operand (target, tmode))
bd793c65
BS
13626 target = gen_reg_rtx (tmode);
13627 pat = GEN_FCN (icode) (target, op0, op1);
13628 if (! pat)
13629 return 0;
13630 emit_insn (pat);
13631 return target;
0f290768 13632
bd793c65
BS
13633 case IX86_BUILTIN_STOREHPS:
13634 case IX86_BUILTIN_STORELPS:
1c47af84
RH
13635 case IX86_BUILTIN_STOREHPD:
13636 case IX86_BUILTIN_STORELPD:
2cdb3148
RH
13637 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
13638 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_storelps
13639 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_storehpd
1c47af84
RH
13640 : CODE_FOR_sse2_storelpd);
13641 arg0 = TREE_VALUE (arglist);
13642 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13643 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13644 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13645 mode0 = insn_data[icode].operand[0].mode;
13646 mode1 = insn_data[icode].operand[1].mode;
13647
13648 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
e358acde 13649 op1 = force_reg (mode1, op1);
1c47af84
RH
13650
13651 pat = GEN_FCN (icode) (op0, op1);
13652 if (! pat)
13653 return 0;
13654 emit_insn (pat);
13655 return const0_rtx;
bd793c65
BS
13656
13657 case IX86_BUILTIN_MOVNTPS:
e37af218 13658 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
bd793c65 13659 case IX86_BUILTIN_MOVNTQ:
e37af218 13660 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
bd793c65
BS
13661
13662 case IX86_BUILTIN_LDMXCSR:
13663 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13664 target = assign_386_stack_local (SImode, 0);
13665 emit_move_insn (target, op0);
80e8bb90 13666 emit_insn (gen_sse_ldmxcsr (target));
bd793c65
BS
13667 return 0;
13668
13669 case IX86_BUILTIN_STMXCSR:
13670 target = assign_386_stack_local (SImode, 0);
80e8bb90 13671 emit_insn (gen_sse_stmxcsr (target));
bd793c65
BS
13672 return copy_to_mode_reg (SImode, target);
13673
bd793c65 13674 case IX86_BUILTIN_SHUFPS:
fbe5eb6d
BS
13675 case IX86_BUILTIN_SHUFPD:
13676 icode = (fcode == IX86_BUILTIN_SHUFPS
13677 ? CODE_FOR_sse_shufps
13678 : CODE_FOR_sse2_shufpd);
bd793c65
BS
13679 arg0 = TREE_VALUE (arglist);
13680 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13681 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13682 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13683 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13684 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13685 tmode = insn_data[icode].operand[0].mode;
13686 mode0 = insn_data[icode].operand[1].mode;
13687 mode1 = insn_data[icode].operand[2].mode;
13688 mode2 = insn_data[icode].operand[3].mode;
13689
13690 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13691 op0 = copy_to_mode_reg (mode0, op0);
e358acde
RH
13692 if ((optimize && !register_operand (op1, mode1))
13693 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
bd793c65
BS
13694 op1 = copy_to_mode_reg (mode1, op1);
13695 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13696 {
13697 /* @@@ better error message */
13698 error ("mask must be an immediate");
6f1a6c5b 13699 return gen_reg_rtx (tmode);
bd793c65 13700 }
e358acde 13701 if (optimize || target == 0
bd793c65
BS
13702 || GET_MODE (target) != tmode
13703 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13704 target = gen_reg_rtx (tmode);
13705 pat = GEN_FCN (icode) (target, op0, op1, op2);
13706 if (! pat)
13707 return 0;
13708 emit_insn (pat);
13709 return target;
13710
13711 case IX86_BUILTIN_PSHUFW:
fbe5eb6d
BS
13712 case IX86_BUILTIN_PSHUFD:
13713 case IX86_BUILTIN_PSHUFHW:
13714 case IX86_BUILTIN_PSHUFLW:
13715 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13716 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13717 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13718 : CODE_FOR_mmx_pshufw);
bd793c65
BS
13719 arg0 = TREE_VALUE (arglist);
13720 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13721 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13722 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13723 tmode = insn_data[icode].operand[0].mode;
29628f27
BS
13724 mode1 = insn_data[icode].operand[1].mode;
13725 mode2 = insn_data[icode].operand[2].mode;
bd793c65 13726
29628f27
BS
13727 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13728 op0 = copy_to_mode_reg (mode1, op0);
13729 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
bd793c65
BS
13730 {
13731 /* @@@ better error message */
13732 error ("mask must be an immediate");
13733 return const0_rtx;
13734 }
13735 if (target == 0
13736 || GET_MODE (target) != tmode
13737 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13738 target = gen_reg_rtx (tmode);
29628f27 13739 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
13740 if (! pat)
13741 return 0;
13742 emit_insn (pat);
13743 return target;
13744
ab3146fd
ZD
13745 case IX86_BUILTIN_PSLLDQI128:
13746 case IX86_BUILTIN_PSRLDQI128:
13747 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
13748 : CODE_FOR_sse2_lshrti3);
13749 arg0 = TREE_VALUE (arglist);
13750 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13751 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13752 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13753 tmode = insn_data[icode].operand[0].mode;
13754 mode1 = insn_data[icode].operand[1].mode;
13755 mode2 = insn_data[icode].operand[2].mode;
13756
13757 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13758 {
13759 op0 = copy_to_reg (op0);
13760 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
13761 }
13762 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13763 {
13764 error ("shift must be an immediate");
13765 return const0_rtx;
13766 }
13767 target = gen_reg_rtx (V2DImode);
13768 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
13769 if (! pat)
13770 return 0;
13771 emit_insn (pat);
13772 return target;
13773
47f339cf 13774 case IX86_BUILTIN_FEMMS:
80e8bb90 13775 emit_insn (gen_mmx_femms ());
47f339cf
BS
13776 return NULL_RTX;
13777
13778 case IX86_BUILTIN_PAVGUSB:
80e8bb90 13779 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
47f339cf
BS
13780
13781 case IX86_BUILTIN_PF2ID:
80e8bb90 13782 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
47f339cf
BS
13783
13784 case IX86_BUILTIN_PFACC:
80e8bb90 13785 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
47f339cf
BS
13786
13787 case IX86_BUILTIN_PFADD:
80e8bb90 13788 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
47f339cf
BS
13789
13790 case IX86_BUILTIN_PFCMPEQ:
80e8bb90 13791 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
47f339cf
BS
13792
13793 case IX86_BUILTIN_PFCMPGE:
80e8bb90 13794 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
47f339cf
BS
13795
13796 case IX86_BUILTIN_PFCMPGT:
80e8bb90 13797 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
47f339cf
BS
13798
13799 case IX86_BUILTIN_PFMAX:
80e8bb90 13800 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
47f339cf
BS
13801
13802 case IX86_BUILTIN_PFMIN:
80e8bb90 13803 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
47f339cf
BS
13804
13805 case IX86_BUILTIN_PFMUL:
80e8bb90 13806 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
47f339cf
BS
13807
13808 case IX86_BUILTIN_PFRCP:
80e8bb90 13809 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
47f339cf
BS
13810
13811 case IX86_BUILTIN_PFRCPIT1:
80e8bb90 13812 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
47f339cf
BS
13813
13814 case IX86_BUILTIN_PFRCPIT2:
80e8bb90 13815 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
47f339cf
BS
13816
13817 case IX86_BUILTIN_PFRSQIT1:
80e8bb90 13818 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
47f339cf
BS
13819
13820 case IX86_BUILTIN_PFRSQRT:
80e8bb90 13821 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
47f339cf
BS
13822
13823 case IX86_BUILTIN_PFSUB:
80e8bb90 13824 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
47f339cf
BS
13825
13826 case IX86_BUILTIN_PFSUBR:
80e8bb90 13827 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
47f339cf
BS
13828
13829 case IX86_BUILTIN_PI2FD:
80e8bb90 13830 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
47f339cf
BS
13831
13832 case IX86_BUILTIN_PMULHRW:
80e8bb90 13833 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
47f339cf 13834
47f339cf 13835 case IX86_BUILTIN_PF2IW:
80e8bb90 13836 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
47f339cf
BS
13837
13838 case IX86_BUILTIN_PFNACC:
80e8bb90 13839 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
47f339cf
BS
13840
13841 case IX86_BUILTIN_PFPNACC:
80e8bb90 13842 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
47f339cf
BS
13843
13844 case IX86_BUILTIN_PI2FW:
80e8bb90 13845 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
47f339cf
BS
13846
13847 case IX86_BUILTIN_PSWAPDSI:
80e8bb90 13848 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
47f339cf
BS
13849
13850 case IX86_BUILTIN_PSWAPDSF:
80e8bb90 13851 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
47f339cf 13852
e37af218 13853 case IX86_BUILTIN_SSE_ZERO:
ef719a44 13854 return CONST0_RTX (V4SFmode);
bd793c65 13855
bd793c65 13856 case IX86_BUILTIN_MMX_ZERO:
80e8bb90 13857 return const0_rtx;
bd793c65 13858
f02e1358 13859 case IX86_BUILTIN_CLRTI:
ef719a44 13860 return const0_rtx;
f02e1358 13861
fbe5eb6d 13862 case IX86_BUILTIN_SQRTSD:
ef719a44 13863 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
fbe5eb6d 13864 case IX86_BUILTIN_LOADAPD:
ef719a44 13865 return ix86_expand_unop_builtin (CODE_FOR_movv2df, arglist, target, 1);
fbe5eb6d
BS
13866 case IX86_BUILTIN_LOADUPD:
13867 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13868
13869 case IX86_BUILTIN_STOREAPD:
ef719a44 13870 return ix86_expand_store_builtin (CODE_FOR_movv2df, arglist);
fbe5eb6d
BS
13871 case IX86_BUILTIN_STOREUPD:
13872 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13873
13874 case IX86_BUILTIN_LOADSD:
13875 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13876
13877 case IX86_BUILTIN_STORESD:
ef719a44 13878 return ix86_expand_store_builtin (CODE_FOR_sse2_storelpd, arglist);
fbe5eb6d
BS
13879
13880 case IX86_BUILTIN_SETPD1:
13881 target = assign_386_stack_local (DFmode, 0);
13882 arg0 = TREE_VALUE (arglist);
13883 emit_move_insn (adjust_address (target, DFmode, 0),
13884 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13885 op0 = gen_reg_rtx (V2DFmode);
13886 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
60c81c89 13887 emit_insn (gen_sse2_shufpd (op0, op0, op0, const0_rtx));
fbe5eb6d
BS
13888 return op0;
13889
13890 case IX86_BUILTIN_SETPD:
13891 target = assign_386_stack_local (V2DFmode, 0);
13892 arg0 = TREE_VALUE (arglist);
13893 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13894 emit_move_insn (adjust_address (target, DFmode, 0),
13895 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13896 emit_move_insn (adjust_address (target, DFmode, 8),
13897 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13898 op0 = gen_reg_rtx (V2DFmode);
ef719a44 13899 emit_move_insn (op0, target);
fbe5eb6d
BS
13900 return op0;
13901
13902 case IX86_BUILTIN_LOADRPD:
ef719a44 13903 target = ix86_expand_unop_builtin (CODE_FOR_movv2df, arglist,
fbe5eb6d 13904 gen_reg_rtx (V2DFmode), 1);
60c81c89 13905 emit_insn (gen_sse2_shufpd (target, target, target, const1_rtx));
fbe5eb6d
BS
13906 return target;
13907
13908 case IX86_BUILTIN_LOADPD1:
13909 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13910 gen_reg_rtx (V2DFmode), 1);
13911 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13912 return target;
13913
13914 case IX86_BUILTIN_STOREPD1:
ef719a44 13915 return ix86_expand_store_builtin (CODE_FOR_movv2df, arglist);
fbe5eb6d 13916 case IX86_BUILTIN_STORERPD:
ef719a44 13917 return ix86_expand_store_builtin (CODE_FOR_movv2df, arglist);
fbe5eb6d 13918
48126a97 13919 case IX86_BUILTIN_CLRPD:
ef719a44 13920 return CONST0_RTX (V2DFmode);
48126a97 13921
fbe5eb6d
BS
13922 case IX86_BUILTIN_MFENCE:
13923 emit_insn (gen_sse2_mfence ());
13924 return 0;
13925 case IX86_BUILTIN_LFENCE:
13926 emit_insn (gen_sse2_lfence ());
13927 return 0;
13928
13929 case IX86_BUILTIN_CLFLUSH:
13930 arg0 = TREE_VALUE (arglist);
13931 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13932 icode = CODE_FOR_sse2_clflush;
1194ca05
JH
13933 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
13934 op0 = copy_to_mode_reg (Pmode, op0);
fbe5eb6d
BS
13935
13936 emit_insn (gen_sse2_clflush (op0));
13937 return 0;
13938
13939 case IX86_BUILTIN_MOVNTPD:
13940 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13941 case IX86_BUILTIN_MOVNTDQ:
916b60b7 13942 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
fbe5eb6d
BS
13943 case IX86_BUILTIN_MOVNTI:
13944 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13945
f02e1358 13946 case IX86_BUILTIN_LOADDQA:
ef719a44 13947 return ix86_expand_unop_builtin (CODE_FOR_movv2di, arglist, target, 1);
f02e1358
JH
13948 case IX86_BUILTIN_LOADDQU:
13949 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
13950 case IX86_BUILTIN_LOADD:
13951 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
13952
13953 case IX86_BUILTIN_STOREDQA:
ef719a44 13954 return ix86_expand_store_builtin (CODE_FOR_movv2di, arglist);
f02e1358
JH
13955 case IX86_BUILTIN_STOREDQU:
13956 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
13957 case IX86_BUILTIN_STORED:
13958 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
13959
22c7c85e
L
13960 case IX86_BUILTIN_MONITOR:
13961 arg0 = TREE_VALUE (arglist);
13962 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13963 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13964 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13965 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13966 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13967 if (!REG_P (op0))
13968 op0 = copy_to_mode_reg (SImode, op0);
13969 if (!REG_P (op1))
13970 op1 = copy_to_mode_reg (SImode, op1);
13971 if (!REG_P (op2))
13972 op2 = copy_to_mode_reg (SImode, op2);
ef719a44 13973 emit_insn (gen_sse3_monitor (op0, op1, op2));
22c7c85e
L
13974 return 0;
13975
13976 case IX86_BUILTIN_MWAIT:
13977 arg0 = TREE_VALUE (arglist);
13978 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13979 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13980 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13981 if (!REG_P (op0))
13982 op0 = copy_to_mode_reg (SImode, op0);
13983 if (!REG_P (op1))
13984 op1 = copy_to_mode_reg (SImode, op1);
ef719a44 13985 emit_insn (gen_sse3_mwait (op0, op1));
22c7c85e
L
13986 return 0;
13987
13988 case IX86_BUILTIN_LOADDDUP:
ef719a44 13989 return ix86_expand_unop_builtin (CODE_FOR_sse3_loadddup, arglist, target, 1);
22c7c85e
L
13990
13991 case IX86_BUILTIN_LDDQU:
ef719a44 13992 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist, target,
22c7c85e
L
13993 1);
13994
bd793c65
BS
13995 default:
13996 break;
13997 }
13998
ca7558fc 13999 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
14000 if (d->code == fcode)
14001 {
14002 /* Compares are treated specially. */
ef719a44
RH
14003 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
14004 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
14005 || d->icode == CODE_FOR_sse2_maskcmpv2df3
14006 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
bd793c65
BS
14007 return ix86_expand_sse_compare (d, arglist, target);
14008
14009 return ix86_expand_binop_builtin (d->icode, arglist, target);
14010 }
14011
ca7558fc 14012 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
bd793c65
BS
14013 if (d->code == fcode)
14014 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
0f290768 14015
ca7558fc 14016 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
bd793c65
BS
14017 if (d->code == fcode)
14018 return ix86_expand_sse_comi (d, arglist, target);
0f290768 14019
bd793c65
BS
14020 /* @@@ Should really do something sensible here. */
14021 return 0;
bd793c65 14022}
4211a8fb
JH
14023
14024/* Store OPERAND to the memory after reload is completed. This means
f710504c 14025 that we can't easily use assign_stack_local. */
4211a8fb 14026rtx
b96a374d 14027ix86_force_to_memory (enum machine_mode mode, rtx operand)
4211a8fb 14028{
898d374d 14029 rtx result;
4211a8fb
JH
14030 if (!reload_completed)
14031 abort ();
a5b378d6 14032 if (TARGET_RED_ZONE)
898d374d
JH
14033 {
14034 result = gen_rtx_MEM (mode,
14035 gen_rtx_PLUS (Pmode,
14036 stack_pointer_rtx,
14037 GEN_INT (-RED_ZONE_SIZE)));
14038 emit_move_insn (result, operand);
14039 }
a5b378d6 14040 else if (!TARGET_RED_ZONE && TARGET_64BIT)
4211a8fb 14041 {
898d374d 14042 switch (mode)
4211a8fb 14043 {
898d374d
JH
14044 case HImode:
14045 case SImode:
14046 operand = gen_lowpart (DImode, operand);
5efb1046 14047 /* FALLTHRU */
898d374d 14048 case DImode:
4211a8fb 14049 emit_insn (
898d374d
JH
14050 gen_rtx_SET (VOIDmode,
14051 gen_rtx_MEM (DImode,
14052 gen_rtx_PRE_DEC (DImode,
14053 stack_pointer_rtx)),
14054 operand));
14055 break;
14056 default:
14057 abort ();
14058 }
14059 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14060 }
14061 else
14062 {
14063 switch (mode)
14064 {
14065 case DImode:
14066 {
14067 rtx operands[2];
14068 split_di (&operand, 1, operands, operands + 1);
14069 emit_insn (
14070 gen_rtx_SET (VOIDmode,
14071 gen_rtx_MEM (SImode,
14072 gen_rtx_PRE_DEC (Pmode,
14073 stack_pointer_rtx)),
14074 operands[1]));
14075 emit_insn (
14076 gen_rtx_SET (VOIDmode,
14077 gen_rtx_MEM (SImode,
14078 gen_rtx_PRE_DEC (Pmode,
14079 stack_pointer_rtx)),
14080 operands[0]));
14081 }
14082 break;
14083 case HImode:
14084 /* It is better to store HImodes as SImodes. */
14085 if (!TARGET_PARTIAL_REG_STALL)
14086 operand = gen_lowpart (SImode, operand);
5efb1046 14087 /* FALLTHRU */
898d374d 14088 case SImode:
4211a8fb 14089 emit_insn (
898d374d
JH
14090 gen_rtx_SET (VOIDmode,
14091 gen_rtx_MEM (GET_MODE (operand),
14092 gen_rtx_PRE_DEC (SImode,
14093 stack_pointer_rtx)),
14094 operand));
14095 break;
14096 default:
14097 abort ();
4211a8fb 14098 }
898d374d 14099 result = gen_rtx_MEM (mode, stack_pointer_rtx);
4211a8fb 14100 }
898d374d 14101 return result;
4211a8fb
JH
14102}
14103
14104/* Free operand from the memory. */
14105void
b96a374d 14106ix86_free_from_memory (enum machine_mode mode)
4211a8fb 14107{
a5b378d6 14108 if (!TARGET_RED_ZONE)
898d374d
JH
14109 {
14110 int size;
14111
14112 if (mode == DImode || TARGET_64BIT)
14113 size = 8;
14114 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14115 size = 2;
14116 else
14117 size = 4;
14118 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14119 to pop or add instruction if registers are available. */
14120 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14121 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14122 GEN_INT (size))));
14123 }
4211a8fb 14124}
a946dd00 14125
f84aa48a
JH
14126/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14127 QImode must go into class Q_REGS.
14128 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
892a2d68 14129 movdf to do mem-to-mem moves through integer regs. */
f84aa48a 14130enum reg_class
b96a374d 14131ix86_preferred_reload_class (rtx x, enum reg_class class)
f84aa48a 14132{
1877be45
JH
14133 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14134 return NO_REGS;
f84aa48a
JH
14135 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14136 {
14137 /* SSE can't load any constant directly yet. */
14138 if (SSE_CLASS_P (class))
14139 return NO_REGS;
14140 /* Floats can load 0 and 1. */
14141 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14142 {
14143 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14144 if (MAYBE_SSE_CLASS_P (class))
14145 return (reg_class_subset_p (class, GENERAL_REGS)
14146 ? GENERAL_REGS : FLOAT_REGS);
14147 else
14148 return class;
14149 }
14150 /* General regs can load everything. */
14151 if (reg_class_subset_p (class, GENERAL_REGS))
14152 return GENERAL_REGS;
14153 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14154 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14155 return NO_REGS;
14156 }
14157 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14158 return NO_REGS;
14159 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14160 return Q_REGS;
14161 return class;
14162}
14163
14164/* If we are copying between general and FP registers, we need a memory
14165 location. The same is true for SSE and MMX registers.
14166
14167 The macro can't work reliably when one of the CLASSES is class containing
14168 registers from multiple units (SSE, MMX, integer). We avoid this by never
14169 combining those units in single alternative in the machine description.
14170 Ensure that this constraint holds to avoid unexpected surprises.
14171
14172 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14173 enforce these sanity checks. */
14174int
b96a374d
AJ
14175ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14176 enum machine_mode mode, int strict)
f84aa48a
JH
14177{
14178 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14179 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14180 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14181 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14182 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14183 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14184 {
14185 if (strict)
14186 abort ();
14187 else
14188 return 1;
14189 }
14190 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
8f62128d
JH
14191 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14192 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14193 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14194 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
f84aa48a
JH
14195}
14196/* Return the cost of moving data from a register in class CLASS1 to
a4f31c00 14197 one in class CLASS2.
f84aa48a
JH
14198
14199 It is not required that the cost always equal 2 when FROM is the same as TO;
14200 on some machines it is expensive to move between registers if they are not
14201 general registers. */
14202int
b96a374d
AJ
14203ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14204 enum reg_class class2)
f84aa48a
JH
14205{
14206 /* In case we require secondary memory, compute cost of the store followed
b96a374d 14207 by load. In order to avoid bad register allocation choices, we need
d631b80a
RH
14208 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14209
f84aa48a
JH
14210 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14211 {
d631b80a
RH
14212 int cost = 1;
14213
14214 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14215 MEMORY_MOVE_COST (mode, class1, 1));
14216 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14217 MEMORY_MOVE_COST (mode, class2, 1));
b96a374d 14218
d631b80a
RH
14219 /* In case of copying from general_purpose_register we may emit multiple
14220 stores followed by single load causing memory size mismatch stall.
d1f87653 14221 Count this as arbitrarily high cost of 20. */
62415523 14222 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
d631b80a
RH
14223 cost += 20;
14224
14225 /* In the case of FP/MMX moves, the registers actually overlap, and we
14226 have to switch modes in order to treat them differently. */
14227 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14228 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14229 cost += 20;
14230
14231 return cost;
f84aa48a 14232 }
d631b80a 14233
92d0fb09 14234 /* Moves between SSE/MMX and integer unit are expensive. */
62415523
JH
14235 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14236 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
fa79946e
JH
14237 return ix86_cost->mmxsse_to_integer;
14238 if (MAYBE_FLOAT_CLASS_P (class1))
14239 return ix86_cost->fp_move;
14240 if (MAYBE_SSE_CLASS_P (class1))
14241 return ix86_cost->sse_move;
14242 if (MAYBE_MMX_CLASS_P (class1))
14243 return ix86_cost->mmx_move;
f84aa48a
JH
14244 return 2;
14245}
14246
a946dd00
JH
14247/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14248int
b96a374d 14249ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
a946dd00
JH
14250{
14251 /* Flags and only flags can only hold CCmode values. */
14252 if (CC_REGNO_P (regno))
14253 return GET_MODE_CLASS (mode) == MODE_CC;
14254 if (GET_MODE_CLASS (mode) == MODE_CC
14255 || GET_MODE_CLASS (mode) == MODE_RANDOM
14256 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14257 return 0;
14258 if (FP_REGNO_P (regno))
14259 return VALID_FP_MODE_P (mode);
14260 if (SSE_REGNO_P (regno))
dcbca208 14261 {
6c4ccfd8
RH
14262 /* We implement the move patterns for all vector modes into and
14263 out of SSE registers, even when no operation instructions
14264 are available. */
14265 return (VALID_SSE_REG_MODE (mode)
14266 || VALID_SSE2_REG_MODE (mode)
14267 || VALID_MMX_REG_MODE (mode)
14268 || VALID_MMX_REG_MODE_3DNOW (mode));
dcbca208 14269 }
a946dd00 14270 if (MMX_REGNO_P (regno))
dcbca208 14271 {
6c4ccfd8
RH
14272 /* We implement the move patterns for 3DNOW modes even in MMX mode,
14273 so if the register is available at all, then we can move data of
14274 the given mode into or out of it. */
14275 return (VALID_MMX_REG_MODE (mode)
14276 || VALID_MMX_REG_MODE_3DNOW (mode));
dcbca208 14277 }
a946dd00
JH
14278 /* We handle both integer and floats in the general purpose registers.
14279 In future we should be able to handle vector modes as well. */
14280 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14281 return 0;
14282 /* Take care for QImode values - they can be in non-QI regs, but then
14283 they do cause partial register stalls. */
d2836273 14284 if (regno < 4 || mode != QImode || TARGET_64BIT)
a946dd00
JH
14285 return 1;
14286 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14287}
fa79946e
JH
14288
14289/* Return the cost of moving data of mode M between a
14290 register and memory. A value of 2 is the default; this cost is
14291 relative to those in `REGISTER_MOVE_COST'.
14292
14293 If moving between registers and memory is more expensive than
14294 between two registers, you should define this macro to express the
a4f31c00
AJ
14295 relative cost.
14296
fa79946e
JH
14297 Model also increased moving costs of QImode registers in non
14298 Q_REGS classes.
14299 */
14300int
b96a374d 14301ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
fa79946e
JH
14302{
14303 if (FLOAT_CLASS_P (class))
14304 {
14305 int index;
14306 switch (mode)
14307 {
14308 case SFmode:
14309 index = 0;
14310 break;
14311 case DFmode:
14312 index = 1;
14313 break;
14314 case XFmode:
fa79946e
JH
14315 index = 2;
14316 break;
14317 default:
14318 return 100;
14319 }
14320 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14321 }
14322 if (SSE_CLASS_P (class))
14323 {
14324 int index;
14325 switch (GET_MODE_SIZE (mode))
14326 {
14327 case 4:
14328 index = 0;
14329 break;
14330 case 8:
14331 index = 1;
14332 break;
14333 case 16:
14334 index = 2;
14335 break;
14336 default:
14337 return 100;
14338 }
14339 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14340 }
14341 if (MMX_CLASS_P (class))
14342 {
14343 int index;
14344 switch (GET_MODE_SIZE (mode))
14345 {
14346 case 4:
14347 index = 0;
14348 break;
14349 case 8:
14350 index = 1;
14351 break;
14352 default:
14353 return 100;
14354 }
14355 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14356 }
14357 switch (GET_MODE_SIZE (mode))
14358 {
14359 case 1:
14360 if (in)
14361 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14362 : ix86_cost->movzbl_load);
14363 else
14364 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14365 : ix86_cost->int_store[0] + 4);
14366 break;
14367 case 2:
14368 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14369 default:
14370 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14371 if (mode == TFmode)
14372 mode = XFmode;
3bb7e126 14373 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
bce75972
VM
14374 * (((int) GET_MODE_SIZE (mode)
14375 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
fa79946e
JH
14376 }
14377}
0ecf09f9 14378
3c50106f
RH
14379/* Compute a (partial) cost for rtx X. Return true if the complete
14380 cost has been computed, and false if subexpressions should be
14381 scanned. In either case, *TOTAL contains the cost result. */
14382
14383static bool
b96a374d 14384ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
3c50106f
RH
14385{
14386 enum machine_mode mode = GET_MODE (x);
14387
14388 switch (code)
14389 {
14390 case CONST_INT:
14391 case CONST:
14392 case LABEL_REF:
14393 case SYMBOL_REF:
8fe75e43 14394 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
3c50106f 14395 *total = 3;
8fe75e43 14396 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
3c50106f 14397 *total = 2;
3504dad3
JH
14398 else if (flag_pic && SYMBOLIC_CONST (x)
14399 && (!TARGET_64BIT
14400 || (!GET_CODE (x) != LABEL_REF
14401 && (GET_CODE (x) != SYMBOL_REF
12969f45 14402 || !SYMBOL_REF_LOCAL_P (x)))))
3c50106f
RH
14403 *total = 1;
14404 else
14405 *total = 0;
14406 return true;
14407
14408 case CONST_DOUBLE:
14409 if (mode == VOIDmode)
14410 *total = 0;
14411 else
14412 switch (standard_80387_constant_p (x))
14413 {
14414 case 1: /* 0.0 */
14415 *total = 1;
14416 break;
881b2a96 14417 default: /* Other constants */
3c50106f
RH
14418 *total = 2;
14419 break;
881b2a96
RS
14420 case 0:
14421 case -1:
3c50106f
RH
14422 /* Start with (MEM (SYMBOL_REF)), since that's where
14423 it'll probably end up. Add a penalty for size. */
14424 *total = (COSTS_N_INSNS (1)
3504dad3 14425 + (flag_pic != 0 && !TARGET_64BIT)
3c50106f
RH
14426 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14427 break;
14428 }
14429 return true;
14430
14431 case ZERO_EXTEND:
14432 /* The zero extensions is often completely free on x86_64, so make
14433 it as cheap as possible. */
14434 if (TARGET_64BIT && mode == DImode
14435 && GET_MODE (XEXP (x, 0)) == SImode)
14436 *total = 1;
14437 else if (TARGET_ZERO_EXTEND_WITH_AND)
14438 *total = COSTS_N_INSNS (ix86_cost->add);
14439 else
14440 *total = COSTS_N_INSNS (ix86_cost->movzx);
14441 return false;
14442
14443 case SIGN_EXTEND:
14444 *total = COSTS_N_INSNS (ix86_cost->movsx);
14445 return false;
14446
14447 case ASHIFT:
14448 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14449 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14450 {
14451 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14452 if (value == 1)
14453 {
14454 *total = COSTS_N_INSNS (ix86_cost->add);
14455 return false;
14456 }
14457 if ((value == 2 || value == 3)
3c50106f
RH
14458 && ix86_cost->lea <= ix86_cost->shift_const)
14459 {
14460 *total = COSTS_N_INSNS (ix86_cost->lea);
14461 return false;
14462 }
14463 }
5efb1046 14464 /* FALLTHRU */
3c50106f
RH
14465
14466 case ROTATE:
14467 case ASHIFTRT:
14468 case LSHIFTRT:
14469 case ROTATERT:
14470 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
14471 {
14472 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14473 {
14474 if (INTVAL (XEXP (x, 1)) > 32)
14475 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
14476 else
14477 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
14478 }
14479 else
14480 {
14481 if (GET_CODE (XEXP (x, 1)) == AND)
14482 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
14483 else
14484 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
14485 }
14486 }
14487 else
14488 {
14489 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14490 *total = COSTS_N_INSNS (ix86_cost->shift_const);
14491 else
14492 *total = COSTS_N_INSNS (ix86_cost->shift_var);
14493 }
14494 return false;
14495
14496 case MULT:
14497 if (FLOAT_MODE_P (mode))
3c50106f 14498 {
4a5eab38
PB
14499 *total = COSTS_N_INSNS (ix86_cost->fmul);
14500 return false;
3c50106f
RH
14501 }
14502 else
14503 {
4a5eab38
PB
14504 rtx op0 = XEXP (x, 0);
14505 rtx op1 = XEXP (x, 1);
14506 int nbits;
14507 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14508 {
14509 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14510 for (nbits = 0; value != 0; value &= value - 1)
14511 nbits++;
14512 }
14513 else
14514 /* This is arbitrary. */
14515 nbits = 7;
14516
14517 /* Compute costs correctly for widening multiplication. */
14518 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
14519 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
14520 == GET_MODE_SIZE (mode))
14521 {
14522 int is_mulwiden = 0;
14523 enum machine_mode inner_mode = GET_MODE (op0);
14524
14525 if (GET_CODE (op0) == GET_CODE (op1))
14526 is_mulwiden = 1, op1 = XEXP (op1, 0);
14527 else if (GET_CODE (op1) == CONST_INT)
14528 {
14529 if (GET_CODE (op0) == SIGN_EXTEND)
14530 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
14531 == INTVAL (op1);
14532 else
14533 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
14534 }
14535
14536 if (is_mulwiden)
14537 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
14538 }
f676971a 14539
4a5eab38
PB
14540 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14541 + nbits * ix86_cost->mult_bit)
14542 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code);
14543
14544 return true;
3c50106f 14545 }
3c50106f
RH
14546
14547 case DIV:
14548 case UDIV:
14549 case MOD:
14550 case UMOD:
14551 if (FLOAT_MODE_P (mode))
14552 *total = COSTS_N_INSNS (ix86_cost->fdiv);
14553 else
14554 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
14555 return false;
14556
14557 case PLUS:
14558 if (FLOAT_MODE_P (mode))
14559 *total = COSTS_N_INSNS (ix86_cost->fadd);
e0c00392 14560 else if (GET_MODE_CLASS (mode) == MODE_INT
3c50106f
RH
14561 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
14562 {
14563 if (GET_CODE (XEXP (x, 0)) == PLUS
14564 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14565 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
14566 && CONSTANT_P (XEXP (x, 1)))
14567 {
14568 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
14569 if (val == 2 || val == 4 || val == 8)
14570 {
14571 *total = COSTS_N_INSNS (ix86_cost->lea);
14572 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14573 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
14574 outer_code);
14575 *total += rtx_cost (XEXP (x, 1), outer_code);
14576 return true;
14577 }
14578 }
14579 else if (GET_CODE (XEXP (x, 0)) == MULT
14580 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
14581 {
14582 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
14583 if (val == 2 || val == 4 || val == 8)
14584 {
14585 *total = COSTS_N_INSNS (ix86_cost->lea);
14586 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14587 *total += rtx_cost (XEXP (x, 1), outer_code);
14588 return true;
14589 }
14590 }
14591 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14592 {
14593 *total = COSTS_N_INSNS (ix86_cost->lea);
14594 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14595 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14596 *total += rtx_cost (XEXP (x, 1), outer_code);
14597 return true;
14598 }
14599 }
5efb1046 14600 /* FALLTHRU */
3c50106f
RH
14601
14602 case MINUS:
14603 if (FLOAT_MODE_P (mode))
14604 {
14605 *total = COSTS_N_INSNS (ix86_cost->fadd);
14606 return false;
14607 }
5efb1046 14608 /* FALLTHRU */
3c50106f
RH
14609
14610 case AND:
14611 case IOR:
14612 case XOR:
14613 if (!TARGET_64BIT && mode == DImode)
14614 {
14615 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
14616 + (rtx_cost (XEXP (x, 0), outer_code)
14617 << (GET_MODE (XEXP (x, 0)) != DImode))
14618 + (rtx_cost (XEXP (x, 1), outer_code)
b96a374d 14619 << (GET_MODE (XEXP (x, 1)) != DImode)));
3c50106f
RH
14620 return true;
14621 }
5efb1046 14622 /* FALLTHRU */
3c50106f
RH
14623
14624 case NEG:
14625 if (FLOAT_MODE_P (mode))
14626 {
14627 *total = COSTS_N_INSNS (ix86_cost->fchs);
14628 return false;
14629 }
5efb1046 14630 /* FALLTHRU */
3c50106f
RH
14631
14632 case NOT:
14633 if (!TARGET_64BIT && mode == DImode)
14634 *total = COSTS_N_INSNS (ix86_cost->add * 2);
14635 else
14636 *total = COSTS_N_INSNS (ix86_cost->add);
14637 return false;
14638
c271ba77
KH
14639 case COMPARE:
14640 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
14641 && XEXP (XEXP (x, 0), 1) == const1_rtx
14642 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
14643 && XEXP (x, 1) == const0_rtx)
14644 {
14645 /* This kind of construct is implemented using test[bwl].
14646 Treat it as if we had an AND. */
14647 *total = (COSTS_N_INSNS (ix86_cost->add)
14648 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
14649 + rtx_cost (const1_rtx, outer_code));
14650 return true;
14651 }
14652 return false;
14653
3c50106f 14654 case FLOAT_EXTEND:
dcbca208
RH
14655 if (!TARGET_SSE_MATH
14656 || mode == XFmode
14657 || (mode == DFmode && !TARGET_SSE2))
3c50106f
RH
14658 *total = 0;
14659 return false;
14660
14661 case ABS:
14662 if (FLOAT_MODE_P (mode))
14663 *total = COSTS_N_INSNS (ix86_cost->fabs);
14664 return false;
14665
14666 case SQRT:
14667 if (FLOAT_MODE_P (mode))
14668 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
14669 return false;
14670
74dc3e94
RH
14671 case UNSPEC:
14672 if (XINT (x, 1) == UNSPEC_TP)
14673 *total = 0;
14674 return false;
14675
3c50106f
RH
14676 default:
14677 return false;
14678 }
14679}
14680
b069de3b
SS
14681#if TARGET_MACHO
14682
14683static int current_machopic_label_num;
14684
14685/* Given a symbol name and its associated stub, write out the
14686 definition of the stub. */
14687
14688void
b96a374d 14689machopic_output_stub (FILE *file, const char *symb, const char *stub)
b069de3b
SS
14690{
14691 unsigned int length;
14692 char *binder_name, *symbol_name, lazy_ptr_name[32];
14693 int label = ++current_machopic_label_num;
14694
14695 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
14696 symb = (*targetm.strip_name_encoding) (symb);
14697
14698 length = strlen (stub);
14699 binder_name = alloca (length + 32);
14700 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
14701
14702 length = strlen (symb);
14703 symbol_name = alloca (length + 32);
14704 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
14705
14706 sprintf (lazy_ptr_name, "L%d$lz", label);
14707
14708 if (MACHOPIC_PURE)
14709 machopic_picsymbol_stub_section ();
14710 else
14711 machopic_symbol_stub_section ();
14712
14713 fprintf (file, "%s:\n", stub);
14714 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14715
14716 if (MACHOPIC_PURE)
14717 {
14718 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
14719 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
14720 fprintf (file, "\tjmp %%edx\n");
14721 }
14722 else
14723 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
b96a374d 14724
b069de3b 14725 fprintf (file, "%s:\n", binder_name);
b96a374d 14726
b069de3b
SS
14727 if (MACHOPIC_PURE)
14728 {
14729 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
14730 fprintf (file, "\tpushl %%eax\n");
14731 }
14732 else
14733 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
14734
14735 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
14736
14737 machopic_lazy_symbol_ptr_section ();
14738 fprintf (file, "%s:\n", lazy_ptr_name);
14739 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14740 fprintf (file, "\t.long %s\n", binder_name);
14741}
14742#endif /* TARGET_MACHO */
14743
162f023b
JH
14744/* Order the registers for register allocator. */
14745
14746void
b96a374d 14747x86_order_regs_for_local_alloc (void)
162f023b
JH
14748{
14749 int pos = 0;
14750 int i;
14751
14752 /* First allocate the local general purpose registers. */
14753 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14754 if (GENERAL_REGNO_P (i) && call_used_regs[i])
14755 reg_alloc_order [pos++] = i;
14756
14757 /* Global general purpose registers. */
14758 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14759 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
14760 reg_alloc_order [pos++] = i;
14761
14762 /* x87 registers come first in case we are doing FP math
14763 using them. */
14764 if (!TARGET_SSE_MATH)
14765 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14766 reg_alloc_order [pos++] = i;
fce5a9f2 14767
162f023b
JH
14768 /* SSE registers. */
14769 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
14770 reg_alloc_order [pos++] = i;
14771 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
14772 reg_alloc_order [pos++] = i;
14773
d1f87653 14774 /* x87 registers. */
162f023b
JH
14775 if (TARGET_SSE_MATH)
14776 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14777 reg_alloc_order [pos++] = i;
14778
14779 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
14780 reg_alloc_order [pos++] = i;
14781
14782 /* Initialize the rest of array as we do not allocate some registers
14783 at all. */
14784 while (pos < FIRST_PSEUDO_REGISTER)
14785 reg_alloc_order [pos++] = 0;
14786}
194734e9 14787
4977bab6
ZW
14788#ifndef TARGET_USE_MS_BITFIELD_LAYOUT
14789#define TARGET_USE_MS_BITFIELD_LAYOUT 0
14790#endif
14791
fe77449a
DR
14792/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
14793 struct attribute_spec.handler. */
14794static tree
b96a374d
AJ
14795ix86_handle_struct_attribute (tree *node, tree name,
14796 tree args ATTRIBUTE_UNUSED,
14797 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
fe77449a
DR
14798{
14799 tree *type = NULL;
14800 if (DECL_P (*node))
14801 {
14802 if (TREE_CODE (*node) == TYPE_DECL)
14803 type = &TREE_TYPE (*node);
14804 }
14805 else
14806 type = node;
14807
14808 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
14809 || TREE_CODE (*type) == UNION_TYPE)))
14810 {
9e637a26 14811 warning ("%qs attribute ignored", IDENTIFIER_POINTER (name));
fe77449a
DR
14812 *no_add_attrs = true;
14813 }
14814
14815 else if ((is_attribute_p ("ms_struct", name)
14816 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
14817 || ((is_attribute_p ("gcc_struct", name)
14818 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
14819 {
9e637a26 14820 warning ("%qs incompatible attribute ignored",
fe77449a
DR
14821 IDENTIFIER_POINTER (name));
14822 *no_add_attrs = true;
14823 }
14824
14825 return NULL_TREE;
14826}
14827
4977bab6 14828static bool
b96a374d 14829ix86_ms_bitfield_layout_p (tree record_type)
4977bab6 14830{
fe77449a 14831 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
021bad8e 14832 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
fe77449a 14833 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
4977bab6
ZW
14834}
14835
483ab821
MM
14836/* Returns an expression indicating where the this parameter is
14837 located on entry to the FUNCTION. */
14838
14839static rtx
b96a374d 14840x86_this_parameter (tree function)
483ab821
MM
14841{
14842 tree type = TREE_TYPE (function);
14843
3961e8fe
RH
14844 if (TARGET_64BIT)
14845 {
61f71b34 14846 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
3961e8fe
RH
14847 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
14848 }
14849
e767b5be 14850 if (ix86_function_regparm (type, function) > 0)
483ab821
MM
14851 {
14852 tree parm;
14853
14854 parm = TYPE_ARG_TYPES (type);
14855 /* Figure out whether or not the function has a variable number of
14856 arguments. */
3961e8fe 14857 for (; parm; parm = TREE_CHAIN (parm))
483ab821
MM
14858 if (TREE_VALUE (parm) == void_type_node)
14859 break;
e767b5be 14860 /* If not, the this parameter is in the first argument. */
483ab821 14861 if (parm)
e767b5be
JH
14862 {
14863 int regno = 0;
14864 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
14865 regno = 2;
02e02343 14866 return gen_rtx_REG (SImode, regno);
e767b5be 14867 }
483ab821
MM
14868 }
14869
61f71b34 14870 if (aggregate_value_p (TREE_TYPE (type), type))
483ab821
MM
14871 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
14872 else
14873 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
14874}
14875
3961e8fe
RH
14876/* Determine whether x86_output_mi_thunk can succeed. */
14877
14878static bool
b96a374d
AJ
14879x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
14880 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
14881 HOST_WIDE_INT vcall_offset, tree function)
3961e8fe
RH
14882{
14883 /* 64-bit can handle anything. */
14884 if (TARGET_64BIT)
14885 return true;
14886
14887 /* For 32-bit, everything's fine if we have one free register. */
e767b5be 14888 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
3961e8fe
RH
14889 return true;
14890
14891 /* Need a free register for vcall_offset. */
14892 if (vcall_offset)
14893 return false;
14894
14895 /* Need a free register for GOT references. */
14896 if (flag_pic && !(*targetm.binds_local_p) (function))
14897 return false;
14898
14899 /* Otherwise ok. */
14900 return true;
14901}
14902
14903/* Output the assembler code for a thunk function. THUNK_DECL is the
14904 declaration for the thunk function itself, FUNCTION is the decl for
14905 the target function. DELTA is an immediate constant offset to be
272d0bee 14906 added to THIS. If VCALL_OFFSET is nonzero, the word at
3961e8fe 14907 *(*this + vcall_offset) should be added to THIS. */
483ab821 14908
c590b625 14909static void
b96a374d
AJ
14910x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
14911 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
14912 HOST_WIDE_INT vcall_offset, tree function)
194734e9 14913{
194734e9 14914 rtx xops[3];
3961e8fe
RH
14915 rtx this = x86_this_parameter (function);
14916 rtx this_reg, tmp;
194734e9 14917
3961e8fe
RH
14918 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
14919 pull it in now and let DELTA benefit. */
14920 if (REG_P (this))
14921 this_reg = this;
14922 else if (vcall_offset)
14923 {
14924 /* Put the this parameter into %eax. */
14925 xops[0] = this;
14926 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
14927 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14928 }
14929 else
14930 this_reg = NULL_RTX;
14931
14932 /* Adjust the this parameter by a fixed constant. */
14933 if (delta)
194734e9 14934 {
483ab821 14935 xops[0] = GEN_INT (delta);
3961e8fe
RH
14936 xops[1] = this_reg ? this_reg : this;
14937 if (TARGET_64BIT)
194734e9 14938 {
3961e8fe
RH
14939 if (!x86_64_general_operand (xops[0], DImode))
14940 {
14941 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14942 xops[1] = tmp;
14943 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
14944 xops[0] = tmp;
14945 xops[1] = this;
14946 }
14947 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
194734e9
JH
14948 }
14949 else
3961e8fe 14950 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
194734e9 14951 }
3961e8fe
RH
14952
14953 /* Adjust the this parameter by a value stored in the vtable. */
14954 if (vcall_offset)
194734e9 14955 {
3961e8fe
RH
14956 if (TARGET_64BIT)
14957 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14958 else
e767b5be
JH
14959 {
14960 int tmp_regno = 2 /* ECX */;
14961 if (lookup_attribute ("fastcall",
14962 TYPE_ATTRIBUTES (TREE_TYPE (function))))
14963 tmp_regno = 0 /* EAX */;
14964 tmp = gen_rtx_REG (SImode, tmp_regno);
14965 }
483ab821 14966
3961e8fe
RH
14967 xops[0] = gen_rtx_MEM (Pmode, this_reg);
14968 xops[1] = tmp;
14969 if (TARGET_64BIT)
14970 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14971 else
14972 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
483ab821 14973
3961e8fe
RH
14974 /* Adjust the this parameter. */
14975 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
14976 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
14977 {
14978 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
14979 xops[0] = GEN_INT (vcall_offset);
14980 xops[1] = tmp2;
14981 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14982 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
483ab821 14983 }
3961e8fe
RH
14984 xops[1] = this_reg;
14985 if (TARGET_64BIT)
14986 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14987 else
14988 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14989 }
194734e9 14990
3961e8fe
RH
14991 /* If necessary, drop THIS back to its stack slot. */
14992 if (this_reg && this_reg != this)
14993 {
14994 xops[0] = this_reg;
14995 xops[1] = this;
14996 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14997 }
194734e9 14998
89ce1c8f 14999 xops[0] = XEXP (DECL_RTL (function), 0);
3961e8fe
RH
15000 if (TARGET_64BIT)
15001 {
15002 if (!flag_pic || (*targetm.binds_local_p) (function))
15003 output_asm_insn ("jmp\t%P0", xops);
15004 else
fcbe3b89 15005 {
89ce1c8f 15006 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
fcbe3b89
RH
15007 tmp = gen_rtx_CONST (Pmode, tmp);
15008 tmp = gen_rtx_MEM (QImode, tmp);
15009 xops[0] = tmp;
15010 output_asm_insn ("jmp\t%A0", xops);
15011 }
3961e8fe
RH
15012 }
15013 else
15014 {
15015 if (!flag_pic || (*targetm.binds_local_p) (function))
15016 output_asm_insn ("jmp\t%P0", xops);
194734e9 15017 else
21ff35fb 15018#if TARGET_MACHO
095fa594
SH
15019 if (TARGET_MACHO)
15020 {
11abc112 15021 rtx sym_ref = XEXP (DECL_RTL (function), 0);
f676971a
EC
15022 tmp = (gen_rtx_SYMBOL_REF
15023 (Pmode,
11abc112 15024 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
095fa594
SH
15025 tmp = gen_rtx_MEM (QImode, tmp);
15026 xops[0] = tmp;
15027 output_asm_insn ("jmp\t%0", xops);
15028 }
15029 else
15030#endif /* TARGET_MACHO */
194734e9 15031 {
3961e8fe
RH
15032 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15033 output_set_got (tmp);
15034
15035 xops[1] = tmp;
15036 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15037 output_asm_insn ("jmp\t{*}%1", xops);
194734e9
JH
15038 }
15039 }
15040}
e2500fed 15041
1bc7c5b6 15042static void
b96a374d 15043x86_file_start (void)
1bc7c5b6
ZW
15044{
15045 default_file_start ();
15046 if (X86_FILE_START_VERSION_DIRECTIVE)
15047 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15048 if (X86_FILE_START_FLTUSED)
15049 fputs ("\t.global\t__fltused\n", asm_out_file);
15050 if (ix86_asm_dialect == ASM_INTEL)
15051 fputs ("\t.intel_syntax\n", asm_out_file);
15052}
15053
e932b21b 15054int
b96a374d 15055x86_field_alignment (tree field, int computed)
e932b21b
JH
15056{
15057 enum machine_mode mode;
ad9335eb
JJ
15058 tree type = TREE_TYPE (field);
15059
15060 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
e932b21b 15061 return computed;
ad9335eb
JJ
15062 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15063 ? get_inner_array_type (type) : type);
39e3a681
JJ
15064 if (mode == DFmode || mode == DCmode
15065 || GET_MODE_CLASS (mode) == MODE_INT
15066 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
e932b21b
JH
15067 return MIN (32, computed);
15068 return computed;
15069}
15070
a5fa1ecd
JH
15071/* Output assembler code to FILE to increment profiler label # LABELNO
15072 for profiling a function entry. */
15073void
b96a374d 15074x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
a5fa1ecd
JH
15075{
15076 if (TARGET_64BIT)
15077 if (flag_pic)
15078 {
15079#ifndef NO_PROFILE_COUNTERS
15080 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15081#endif
15082 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15083 }
15084 else
15085 {
15086#ifndef NO_PROFILE_COUNTERS
15087 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15088#endif
15089 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15090 }
15091 else if (flag_pic)
15092 {
15093#ifndef NO_PROFILE_COUNTERS
15094 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15095 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15096#endif
15097 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15098 }
15099 else
15100 {
15101#ifndef NO_PROFILE_COUNTERS
ff6e2d3e 15102 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
a5fa1ecd
JH
15103 PROFILE_COUNT_REGISTER);
15104#endif
15105 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15106 }
15107}
15108
d2c49530
JH
15109/* We don't have exact information about the insn sizes, but we may assume
15110 quite safely that we are informed about all 1 byte insns and memory
c51e6d85 15111 address sizes. This is enough to eliminate unnecessary padding in
d2c49530
JH
15112 99% of cases. */
15113
15114static int
b96a374d 15115min_insn_size (rtx insn)
d2c49530
JH
15116{
15117 int l = 0;
15118
15119 if (!INSN_P (insn) || !active_insn_p (insn))
15120 return 0;
15121
15122 /* Discard alignments we've emit and jump instructions. */
15123 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15124 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15125 return 0;
15126 if (GET_CODE (insn) == JUMP_INSN
15127 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15128 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15129 return 0;
15130
15131 /* Important case - calls are always 5 bytes.
15132 It is common to have many calls in the row. */
15133 if (GET_CODE (insn) == CALL_INSN
15134 && symbolic_reference_mentioned_p (PATTERN (insn))
15135 && !SIBLING_CALL_P (insn))
15136 return 5;
15137 if (get_attr_length (insn) <= 1)
15138 return 1;
15139
15140 /* For normal instructions we may rely on the sizes of addresses
15141 and the presence of symbol to require 4 bytes of encoding.
15142 This is not the case for jumps where references are PC relative. */
15143 if (GET_CODE (insn) != JUMP_INSN)
15144 {
15145 l = get_attr_length_address (insn);
15146 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15147 l = 4;
15148 }
15149 if (l)
15150 return 1+l;
15151 else
15152 return 2;
15153}
15154
c51e6d85 15155/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
d2c49530
JH
15156 window. */
15157
15158static void
be04394b 15159ix86_avoid_jump_misspredicts (void)
d2c49530
JH
15160{
15161 rtx insn, start = get_insns ();
15162 int nbytes = 0, njumps = 0;
15163 int isjump = 0;
15164
15165 /* Look for all minimal intervals of instructions containing 4 jumps.
15166 The intervals are bounded by START and INSN. NBYTES is the total
15167 size of instructions in the interval including INSN and not including
15168 START. When the NBYTES is smaller than 16 bytes, it is possible
15169 that the end of START and INSN ends up in the same 16byte page.
15170
15171 The smallest offset in the page INSN can start is the case where START
15172 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15173 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15174 */
15175 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15176 {
15177
15178 nbytes += min_insn_size (insn);
c263766c
RH
15179 if (dump_file)
15180 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
d2c49530
JH
15181 INSN_UID (insn), min_insn_size (insn));
15182 if ((GET_CODE (insn) == JUMP_INSN
15183 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15184 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15185 || GET_CODE (insn) == CALL_INSN)
15186 njumps++;
15187 else
15188 continue;
15189
15190 while (njumps > 3)
15191 {
15192 start = NEXT_INSN (start);
15193 if ((GET_CODE (start) == JUMP_INSN
15194 && GET_CODE (PATTERN (start)) != ADDR_VEC
15195 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15196 || GET_CODE (start) == CALL_INSN)
15197 njumps--, isjump = 1;
15198 else
15199 isjump = 0;
15200 nbytes -= min_insn_size (start);
15201 }
15202 if (njumps < 0)
15203 abort ();
c263766c
RH
15204 if (dump_file)
15205 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
d2c49530
JH
15206 INSN_UID (start), INSN_UID (insn), nbytes);
15207
15208 if (njumps == 3 && isjump && nbytes < 16)
15209 {
15210 int padsize = 15 - nbytes + min_insn_size (insn);
15211
c263766c
RH
15212 if (dump_file)
15213 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
15214 INSN_UID (insn), padsize);
d2c49530
JH
15215 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15216 }
15217 }
15218}
15219
be04394b 15220/* AMD Athlon works faster
d1f87653 15221 when RET is not destination of conditional jump or directly preceded
2a500b9e
JH
15222 by other jump instruction. We avoid the penalty by inserting NOP just
15223 before the RET instructions in such cases. */
18dbd950 15224static void
be04394b 15225ix86_pad_returns (void)
2a500b9e
JH
15226{
15227 edge e;
628f6a4e 15228 edge_iterator ei;
2a500b9e 15229
628f6a4e
BE
15230 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
15231 {
15232 basic_block bb = e->src;
15233 rtx ret = BB_END (bb);
15234 rtx prev;
15235 bool replace = false;
15236
15237 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15238 || !maybe_hot_bb_p (bb))
15239 continue;
15240 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15241 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15242 break;
15243 if (prev && GET_CODE (prev) == CODE_LABEL)
15244 {
15245 edge e;
15246 edge_iterator ei;
15247
15248 FOR_EACH_EDGE (e, ei, bb->preds)
15249 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15250 && !(e->flags & EDGE_FALLTHRU))
15251 replace = true;
15252 }
15253 if (!replace)
15254 {
15255 prev = prev_active_insn (ret);
15256 if (prev
15257 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15258 || GET_CODE (prev) == CALL_INSN))
253c7a00 15259 replace = true;
628f6a4e
BE
15260 /* Empty functions get branch mispredict even when the jump destination
15261 is not visible to us. */
15262 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15263 replace = true;
15264 }
15265 if (replace)
15266 {
15267 emit_insn_before (gen_return_internal_long (), ret);
15268 delete_insn (ret);
15269 }
15270 }
be04394b
JH
15271}
15272
15273/* Implement machine specific optimizations. We implement padding of returns
15274 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
15275static void
15276ix86_reorg (void)
15277{
15278 if (TARGET_ATHLON_K8 && optimize && !optimize_size)
15279 ix86_pad_returns ();
15280 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
15281 ix86_avoid_jump_misspredicts ();
2a500b9e
JH
15282}
15283
4977bab6
ZW
15284/* Return nonzero when QImode register that must be represented via REX prefix
15285 is used. */
15286bool
b96a374d 15287x86_extended_QIreg_mentioned_p (rtx insn)
4977bab6
ZW
15288{
15289 int i;
15290 extract_insn_cached (insn);
15291 for (i = 0; i < recog_data.n_operands; i++)
15292 if (REG_P (recog_data.operand[i])
15293 && REGNO (recog_data.operand[i]) >= 4)
15294 return true;
15295 return false;
15296}
15297
15298/* Return nonzero when P points to register encoded via REX prefix.
15299 Called via for_each_rtx. */
15300static int
b96a374d 15301extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
4977bab6
ZW
15302{
15303 unsigned int regno;
15304 if (!REG_P (*p))
15305 return 0;
15306 regno = REGNO (*p);
15307 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15308}
15309
15310/* Return true when INSN mentions register that must be encoded using REX
15311 prefix. */
15312bool
b96a374d 15313x86_extended_reg_mentioned_p (rtx insn)
4977bab6
ZW
15314{
15315 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15316}
15317
1d6ba901 15318/* Generate an unsigned DImode/SImode to FP conversion. This is the same code
8d705469
JH
15319 optabs would emit if we didn't have TFmode patterns. */
15320
15321void
b96a374d 15322x86_emit_floatuns (rtx operands[2])
8d705469
JH
15323{
15324 rtx neglab, donelab, i0, i1, f0, in, out;
1d6ba901
ZD
15325 enum machine_mode mode, inmode;
15326
15327 inmode = GET_MODE (operands[1]);
15328 if (inmode != SImode
15329 && inmode != DImode)
15330 abort ();
8d705469
JH
15331
15332 out = operands[0];
1d6ba901 15333 in = force_reg (inmode, operands[1]);
8d705469
JH
15334 mode = GET_MODE (out);
15335 neglab = gen_label_rtx ();
15336 donelab = gen_label_rtx ();
15337 i1 = gen_reg_rtx (Pmode);
15338 f0 = gen_reg_rtx (mode);
15339
15340 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15341
15342 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15343 emit_jump_insn (gen_jump (donelab));
15344 emit_barrier ();
15345
15346 emit_label (neglab);
15347
15348 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15349 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15350 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15351 expand_float (f0, i0, 0);
15352 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15353
15354 emit_label (donelab);
15355}
15356
997404de
JH
15357/* Initialize vector TARGET via VALS. */
15358void
15359ix86_expand_vector_init (rtx target, rtx vals)
15360{
15361 enum machine_mode mode = GET_MODE (target);
15362 int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
15363 int n_elts = (GET_MODE_SIZE (mode) / elt_size);
15364 int i;
f676971a 15365
997404de
JH
15366 for (i = n_elts - 1; i >= 0; i--)
15367 if (GET_CODE (XVECEXP (vals, 0, i)) != CONST_INT
15368 && GET_CODE (XVECEXP (vals, 0, i)) != CONST_DOUBLE)
15369 break;
15370
f676971a 15371 /* Few special cases first...
997404de
JH
15372 ... constants are best loaded from constant pool. */
15373 if (i < 0)
15374 {
15375 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15376 return;
15377 }
15378
15379 /* ... values where only first field is non-constant are best loaded
1ae58c30 15380 from the pool and overwritten via move later. */
1c47af84 15381 if (i == 0)
997404de 15382 {
997404de
JH
15383 XVECEXP (vals, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode));
15384 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
1c47af84 15385
997404de
JH
15386 switch (GET_MODE (target))
15387 {
1c47af84
RH
15388 case V2DFmode:
15389 emit_insn (gen_sse2_loadlpd (target, target, XVECEXP (vals, 0, 0)));
15390 break;
15391
15392 case V4SFmode:
15393 {
15394 /* ??? We can represent this better. */
15395 rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
15396 GET_MODE_INNER (mode), 0);
15397 op = force_reg (mode, op);
997404de 15398 emit_insn (gen_sse_movss (target, target, op));
1c47af84
RH
15399 }
15400 break;
15401
15402 default:
15403 break;
997404de
JH
15404 }
15405 return;
15406 }
15407
15408 /* And the busy sequence doing rotations. */
15409 switch (GET_MODE (target))
15410 {
15411 case V2DFmode:
15412 {
15413 rtx vecop0 =
15414 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 0), DFmode, 0);
15415 rtx vecop1 =
15416 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 1), DFmode, 0);
15417
15418 vecop0 = force_reg (V2DFmode, vecop0);
15419 vecop1 = force_reg (V2DFmode, vecop1);
15420 emit_insn (gen_sse2_unpcklpd (target, vecop0, vecop1));
15421 }
15422 break;
15423 case V4SFmode:
15424 {
15425 rtx vecop0 =
15426 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 0), SFmode, 0);
15427 rtx vecop1 =
15428 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 1), SFmode, 0);
15429 rtx vecop2 =
15430 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 2), SFmode, 0);
15431 rtx vecop3 =
15432 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 3), SFmode, 0);
15433 rtx tmp1 = gen_reg_rtx (V4SFmode);
15434 rtx tmp2 = gen_reg_rtx (V4SFmode);
15435
15436 vecop0 = force_reg (V4SFmode, vecop0);
15437 vecop1 = force_reg (V4SFmode, vecop1);
15438 vecop2 = force_reg (V4SFmode, vecop2);
15439 vecop3 = force_reg (V4SFmode, vecop3);
15440 emit_insn (gen_sse_unpcklps (tmp1, vecop1, vecop3));
15441 emit_insn (gen_sse_unpcklps (tmp2, vecop0, vecop2));
15442 emit_insn (gen_sse_unpcklps (target, tmp2, tmp1));
15443 }
15444 break;
15445 default:
15446 abort ();
15447 }
15448}
15449
f676971a
EC
15450/* Implements target hook vector_mode_supported_p. */
15451static bool
15452ix86_vector_mode_supported_p (enum machine_mode mode)
15453{
dcbca208 15454 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
f676971a 15455 return true;
dcbca208 15456 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
f676971a 15457 return true;
dcbca208 15458 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
f676971a 15459 return true;
dcbca208
RH
15460 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
15461 return true;
15462 return false;
f676971a
EC
15463}
15464
67dfe110
KH
15465/* Worker function for TARGET_MD_ASM_CLOBBERS.
15466
15467 We do this in the new i386 backend to maintain source compatibility
15468 with the old cc0-based compiler. */
15469
15470static tree
15471ix86_md_asm_clobbers (tree clobbers)
15472{
f676971a
EC
15473 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
15474 clobbers);
15475 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
15476 clobbers);
15477 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
15478 clobbers);
67dfe110
KH
15479 return clobbers;
15480}
15481
3c5cb3e4
KH
15482/* Worker function for REVERSE_CONDITION. */
15483
15484enum rtx_code
15485ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
15486{
15487 return (mode != CCFPmode && mode != CCFPUmode
15488 ? reverse_condition (code)
15489 : reverse_condition_maybe_unordered (code));
15490}
15491
5ea9cb6e
RS
15492/* Output code to perform an x87 FP register move, from OPERANDS[1]
15493 to OPERANDS[0]. */
15494
15495const char *
15496output_387_reg_move (rtx insn, rtx *operands)
15497{
15498 if (REG_P (operands[1])
15499 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
15500 {
15501 if (REGNO (operands[0]) == FIRST_STACK_REG
15502 && TARGET_USE_FFREEP)
15503 return "ffreep\t%y0";
15504 return "fstp\t%y0";
15505 }
15506 if (STACK_TOP_P (operands[0]))
15507 return "fld%z1\t%y1";
15508 return "fst\t%y0";
15509}
15510
5ae27cfa
UB
15511/* Output code to perform a conditional jump to LABEL, if C2 flag in
15512 FP status register is set. */
15513
15514void
15515ix86_emit_fp_unordered_jump (rtx label)
15516{
15517 rtx reg = gen_reg_rtx (HImode);
15518 rtx temp;
15519
15520 emit_insn (gen_x86_fnstsw_1 (reg));
2484cc35
UB
15521
15522 if (TARGET_USE_SAHF)
15523 {
15524 emit_insn (gen_x86_sahf_1 (reg));
15525
f676971a 15526 temp = gen_rtx_REG (CCmode, FLAGS_REG);
2484cc35
UB
15527 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
15528 }
15529 else
15530 {
15531 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
15532
f676971a 15533 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
2484cc35
UB
15534 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
15535 }
f676971a 15536
5ae27cfa
UB
15537 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
15538 gen_rtx_LABEL_REF (VOIDmode, label),
15539 pc_rtx);
15540 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
15541 emit_jump_insn (temp);
15542}
15543
c2fcfa4f
UB
15544/* Output code to perform a log1p XFmode calculation. */
15545
15546void ix86_emit_i387_log1p (rtx op0, rtx op1)
15547{
15548 rtx label1 = gen_label_rtx ();
15549 rtx label2 = gen_label_rtx ();
15550
15551 rtx tmp = gen_reg_rtx (XFmode);
15552 rtx tmp2 = gen_reg_rtx (XFmode);
15553
15554 emit_insn (gen_absxf2 (tmp, op1));
15555 emit_insn (gen_cmpxf (tmp,
15556 CONST_DOUBLE_FROM_REAL_VALUE (
15557 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
15558 XFmode)));
15559 emit_jump_insn (gen_bge (label1));
15560
15561 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
15562 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
15563 emit_jump (label2);
15564
15565 emit_label (label1);
15566 emit_move_insn (tmp, CONST1_RTX (XFmode));
15567 emit_insn (gen_addxf3 (tmp, op1, tmp));
15568 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
15569 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
15570
15571 emit_label (label2);
15572}
f676971a 15573
a8e68029
DJ
15574/* Solaris named-section hook. Parameters are as for
15575 named_section_real. */
15576
15577static void
15578i386_solaris_elf_named_section (const char *name, unsigned int flags,
15579 tree decl)
15580{
15581 /* With Binutils 2.15, the "@unwind" marker must be specified on
15582 every occurrence of the ".eh_frame" section, not just the first
15583 one. */
15584 if (TARGET_64BIT
15585 && strcmp (name, ".eh_frame") == 0)
15586 {
15587 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
15588 flags & SECTION_WRITE ? "aw" : "a");
15589 return;
15590 }
15591 default_elf_asm_named_section (name, flags, decl);
15592}
15593
e2500fed 15594#include "gt-i386.h"
This page took 4.968911 seconds and 5 git commands to generate.