]> gcc.gnu.org Git - gcc.git/blame - gcc/config/i386/i386.c
h8300.md (output_a_shift): Clean up the code to output shifts using rotation.
[gcc.git] / gcc / config / i386 / i386.c
CommitLineData
e075ae69 1/* Subroutines used for code generation on IA-32.
07933f72
GS
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
2a2ab3f9
JVA
4
5This file is part of GNU CC.
6
7GNU CC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GNU CC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU CC; see the file COPYING. If not, write to
97aadbb9 19the Free Software Foundation, 59 Temple Place - Suite 330,
0f290768 20Boston, MA 02111-1307, USA. */
2a2ab3f9 21
2a2ab3f9 22#include "config.h"
bb5177ac 23#include "system.h"
4977bab6
ZW
24#include "coretypes.h"
25#include "tm.h"
2a2ab3f9 26#include "rtl.h"
6baf1cc8
BS
27#include "tree.h"
28#include "tm_p.h"
2a2ab3f9
JVA
29#include "regs.h"
30#include "hard-reg-set.h"
31#include "real.h"
32#include "insn-config.h"
33#include "conditions.h"
2a2ab3f9
JVA
34#include "output.h"
35#include "insn-attr.h"
2a2ab3f9 36#include "flags.h"
a8ffcc81 37#include "except.h"
ecbc4695 38#include "function.h"
00c79232 39#include "recog.h"
ced8dd8c 40#include "expr.h"
e78d8e51 41#include "optabs.h"
f103890b 42#include "toplev.h"
e075ae69 43#include "basic-block.h"
1526a060 44#include "ggc.h"
672a6f42
NB
45#include "target.h"
46#include "target-def.h"
f1e639b1 47#include "langhooks.h"
2a2ab3f9 48
8dfe5673 49#ifndef CHECK_STACK_LIMIT
07933f72 50#define CHECK_STACK_LIMIT (-1)
8dfe5673
RK
51#endif
52
2ab0437e 53/* Processor costs (relative to an add) */
fce5a9f2 54static const
2ab0437e
JH
55struct processor_costs size_cost = { /* costs for tunning for size */
56 2, /* cost of an add instruction */
57 3, /* cost of a lea instruction */
58 2, /* variable shift costs */
59 3, /* constant shift costs */
4977bab6 60 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
2ab0437e 61 0, /* cost of multiply per each bit set */
4977bab6 62 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
44cf5b6a
JH
63 3, /* cost of movsx */
64 3, /* cost of movzx */
2ab0437e
JH
65 0, /* "large" insn */
66 2, /* MOVE_RATIO */
67 2, /* cost for loading QImode using movzbl */
68 {2, 2, 2}, /* cost of loading integer registers
69 in QImode, HImode and SImode.
70 Relative to reg-reg move (2). */
71 {2, 2, 2}, /* cost of storing integer registers */
72 2, /* cost of reg,reg fld/fst */
73 {2, 2, 2}, /* cost of loading fp registers
74 in SFmode, DFmode and XFmode */
75 {2, 2, 2}, /* cost of loading integer registers */
76 3, /* cost of moving MMX register */
77 {3, 3}, /* cost of loading MMX registers
78 in SImode and DImode */
79 {3, 3}, /* cost of storing MMX registers
80 in SImode and DImode */
81 3, /* cost of moving SSE register */
82 {3, 3, 3}, /* cost of loading SSE registers
83 in SImode, DImode and TImode */
84 {3, 3, 3}, /* cost of storing SSE registers
85 in SImode, DImode and TImode */
86 3, /* MMX or SSE register to integer */
f4365627
JH
87 0, /* size of prefetch block */
88 0, /* number of parallel prefetches */
4977bab6 89 1, /* Branch cost */
229b303a
RS
90 2, /* cost of FADD and FSUB insns. */
91 2, /* cost of FMUL instruction. */
92 2, /* cost of FDIV instruction. */
93 2, /* cost of FABS instruction. */
94 2, /* cost of FCHS instruction. */
95 2, /* cost of FSQRT instruction. */
2ab0437e 96};
229b303a 97
32b5b1aa 98/* Processor costs (relative to an add) */
fce5a9f2 99static const
32b5b1aa 100struct processor_costs i386_cost = { /* 386 specific costs */
e9a25f70 101 1, /* cost of an add instruction */
32b5b1aa
SC
102 1, /* cost of a lea instruction */
103 3, /* variable shift costs */
104 2, /* constant shift costs */
4977bab6 105 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
32b5b1aa 106 1, /* cost of multiply per each bit set */
4977bab6 107 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
44cf5b6a
JH
108 3, /* cost of movsx */
109 2, /* cost of movzx */
96e7ae40 110 15, /* "large" insn */
e2e52e1b 111 3, /* MOVE_RATIO */
7c6b971d 112 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
113 {2, 4, 2}, /* cost of loading integer registers
114 in QImode, HImode and SImode.
0f290768 115 Relative to reg-reg move (2). */
96e7ae40
JH
116 {2, 4, 2}, /* cost of storing integer registers */
117 2, /* cost of reg,reg fld/fst */
118 {8, 8, 8}, /* cost of loading fp registers
119 in SFmode, DFmode and XFmode */
fa79946e
JH
120 {8, 8, 8}, /* cost of loading integer registers */
121 2, /* cost of moving MMX register */
122 {4, 8}, /* cost of loading MMX registers
123 in SImode and DImode */
124 {4, 8}, /* cost of storing MMX registers
125 in SImode and DImode */
126 2, /* cost of moving SSE register */
127 {4, 8, 16}, /* cost of loading SSE registers
128 in SImode, DImode and TImode */
129 {4, 8, 16}, /* cost of storing SSE registers
130 in SImode, DImode and TImode */
131 3, /* MMX or SSE register to integer */
f4365627
JH
132 0, /* size of prefetch block */
133 0, /* number of parallel prefetches */
4977bab6 134 1, /* Branch cost */
229b303a
RS
135 23, /* cost of FADD and FSUB insns. */
136 27, /* cost of FMUL instruction. */
137 88, /* cost of FDIV instruction. */
138 22, /* cost of FABS instruction. */
139 24, /* cost of FCHS instruction. */
140 122, /* cost of FSQRT instruction. */
32b5b1aa
SC
141};
142
fce5a9f2 143static const
32b5b1aa
SC
144struct processor_costs i486_cost = { /* 486 specific costs */
145 1, /* cost of an add instruction */
146 1, /* cost of a lea instruction */
147 3, /* variable shift costs */
148 2, /* constant shift costs */
4977bab6 149 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
32b5b1aa 150 1, /* cost of multiply per each bit set */
4977bab6 151 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
44cf5b6a
JH
152 3, /* cost of movsx */
153 2, /* cost of movzx */
96e7ae40 154 15, /* "large" insn */
e2e52e1b 155 3, /* MOVE_RATIO */
7c6b971d 156 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
157 {2, 4, 2}, /* cost of loading integer registers
158 in QImode, HImode and SImode.
0f290768 159 Relative to reg-reg move (2). */
96e7ae40
JH
160 {2, 4, 2}, /* cost of storing integer registers */
161 2, /* cost of reg,reg fld/fst */
162 {8, 8, 8}, /* cost of loading fp registers
163 in SFmode, DFmode and XFmode */
fa79946e
JH
164 {8, 8, 8}, /* cost of loading integer registers */
165 2, /* cost of moving MMX register */
166 {4, 8}, /* cost of loading MMX registers
167 in SImode and DImode */
168 {4, 8}, /* cost of storing MMX registers
169 in SImode and DImode */
170 2, /* cost of moving SSE register */
171 {4, 8, 16}, /* cost of loading SSE registers
172 in SImode, DImode and TImode */
173 {4, 8, 16}, /* cost of storing SSE registers
174 in SImode, DImode and TImode */
f4365627
JH
175 3, /* MMX or SSE register to integer */
176 0, /* size of prefetch block */
177 0, /* number of parallel prefetches */
4977bab6 178 1, /* Branch cost */
229b303a
RS
179 8, /* cost of FADD and FSUB insns. */
180 16, /* cost of FMUL instruction. */
181 73, /* cost of FDIV instruction. */
182 3, /* cost of FABS instruction. */
183 3, /* cost of FCHS instruction. */
184 83, /* cost of FSQRT instruction. */
32b5b1aa
SC
185};
186
fce5a9f2 187static const
e5cb57e8 188struct processor_costs pentium_cost = {
32b5b1aa
SC
189 1, /* cost of an add instruction */
190 1, /* cost of a lea instruction */
856b07a1 191 4, /* variable shift costs */
e5cb57e8 192 1, /* constant shift costs */
4977bab6 193 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
856b07a1 194 0, /* cost of multiply per each bit set */
4977bab6 195 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
44cf5b6a
JH
196 3, /* cost of movsx */
197 2, /* cost of movzx */
96e7ae40 198 8, /* "large" insn */
e2e52e1b 199 6, /* MOVE_RATIO */
7c6b971d 200 6, /* cost for loading QImode using movzbl */
96e7ae40
JH
201 {2, 4, 2}, /* cost of loading integer registers
202 in QImode, HImode and SImode.
0f290768 203 Relative to reg-reg move (2). */
96e7ae40
JH
204 {2, 4, 2}, /* cost of storing integer registers */
205 2, /* cost of reg,reg fld/fst */
206 {2, 2, 6}, /* cost of loading fp registers
207 in SFmode, DFmode and XFmode */
fa79946e
JH
208 {4, 4, 6}, /* cost of loading integer registers */
209 8, /* cost of moving MMX register */
210 {8, 8}, /* cost of loading MMX registers
211 in SImode and DImode */
212 {8, 8}, /* cost of storing MMX registers
213 in SImode and DImode */
214 2, /* cost of moving SSE register */
215 {4, 8, 16}, /* cost of loading SSE registers
216 in SImode, DImode and TImode */
217 {4, 8, 16}, /* cost of storing SSE registers
218 in SImode, DImode and TImode */
f4365627
JH
219 3, /* MMX or SSE register to integer */
220 0, /* size of prefetch block */
221 0, /* number of parallel prefetches */
4977bab6 222 2, /* Branch cost */
229b303a
RS
223 3, /* cost of FADD and FSUB insns. */
224 3, /* cost of FMUL instruction. */
225 39, /* cost of FDIV instruction. */
226 1, /* cost of FABS instruction. */
227 1, /* cost of FCHS instruction. */
228 70, /* cost of FSQRT instruction. */
32b5b1aa
SC
229};
230
fce5a9f2 231static const
856b07a1
SC
232struct processor_costs pentiumpro_cost = {
233 1, /* cost of an add instruction */
234 1, /* cost of a lea instruction */
e075ae69 235 1, /* variable shift costs */
856b07a1 236 1, /* constant shift costs */
4977bab6 237 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
856b07a1 238 0, /* cost of multiply per each bit set */
4977bab6 239 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
44cf5b6a
JH
240 1, /* cost of movsx */
241 1, /* cost of movzx */
96e7ae40 242 8, /* "large" insn */
e2e52e1b 243 6, /* MOVE_RATIO */
7c6b971d 244 2, /* cost for loading QImode using movzbl */
96e7ae40
JH
245 {4, 4, 4}, /* cost of loading integer registers
246 in QImode, HImode and SImode.
0f290768 247 Relative to reg-reg move (2). */
96e7ae40
JH
248 {2, 2, 2}, /* cost of storing integer registers */
249 2, /* cost of reg,reg fld/fst */
250 {2, 2, 6}, /* cost of loading fp registers
251 in SFmode, DFmode and XFmode */
fa79946e
JH
252 {4, 4, 6}, /* cost of loading integer registers */
253 2, /* cost of moving MMX register */
254 {2, 2}, /* cost of loading MMX registers
255 in SImode and DImode */
256 {2, 2}, /* cost of storing MMX registers
257 in SImode and DImode */
258 2, /* cost of moving SSE register */
259 {2, 2, 8}, /* cost of loading SSE registers
260 in SImode, DImode and TImode */
261 {2, 2, 8}, /* cost of storing SSE registers
262 in SImode, DImode and TImode */
f4365627
JH
263 3, /* MMX or SSE register to integer */
264 32, /* size of prefetch block */
265 6, /* number of parallel prefetches */
4977bab6 266 2, /* Branch cost */
229b303a
RS
267 3, /* cost of FADD and FSUB insns. */
268 5, /* cost of FMUL instruction. */
269 56, /* cost of FDIV instruction. */
270 2, /* cost of FABS instruction. */
271 2, /* cost of FCHS instruction. */
272 56, /* cost of FSQRT instruction. */
856b07a1
SC
273};
274
fce5a9f2 275static const
a269a03c
JC
276struct processor_costs k6_cost = {
277 1, /* cost of an add instruction */
e075ae69 278 2, /* cost of a lea instruction */
a269a03c
JC
279 1, /* variable shift costs */
280 1, /* constant shift costs */
4977bab6 281 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
a269a03c 282 0, /* cost of multiply per each bit set */
4977bab6 283 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
44cf5b6a
JH
284 2, /* cost of movsx */
285 2, /* cost of movzx */
96e7ae40 286 8, /* "large" insn */
e2e52e1b 287 4, /* MOVE_RATIO */
7c6b971d 288 3, /* cost for loading QImode using movzbl */
96e7ae40
JH
289 {4, 5, 4}, /* cost of loading integer registers
290 in QImode, HImode and SImode.
0f290768 291 Relative to reg-reg move (2). */
96e7ae40
JH
292 {2, 3, 2}, /* cost of storing integer registers */
293 4, /* cost of reg,reg fld/fst */
294 {6, 6, 6}, /* cost of loading fp registers
295 in SFmode, DFmode and XFmode */
fa79946e
JH
296 {4, 4, 4}, /* cost of loading integer registers */
297 2, /* cost of moving MMX register */
298 {2, 2}, /* cost of loading MMX registers
299 in SImode and DImode */
300 {2, 2}, /* cost of storing MMX registers
301 in SImode and DImode */
302 2, /* cost of moving SSE register */
303 {2, 2, 8}, /* cost of loading SSE registers
304 in SImode, DImode and TImode */
305 {2, 2, 8}, /* cost of storing SSE registers
306 in SImode, DImode and TImode */
f4365627
JH
307 6, /* MMX or SSE register to integer */
308 32, /* size of prefetch block */
309 1, /* number of parallel prefetches */
4977bab6 310 1, /* Branch cost */
229b303a
RS
311 2, /* cost of FADD and FSUB insns. */
312 2, /* cost of FMUL instruction. */
4f770e7b
RS
313 56, /* cost of FDIV instruction. */
314 2, /* cost of FABS instruction. */
229b303a
RS
315 2, /* cost of FCHS instruction. */
316 56, /* cost of FSQRT instruction. */
a269a03c
JC
317};
318
fce5a9f2 319static const
309ada50
JH
320struct processor_costs athlon_cost = {
321 1, /* cost of an add instruction */
0b5107cf 322 2, /* cost of a lea instruction */
309ada50
JH
323 1, /* variable shift costs */
324 1, /* constant shift costs */
4977bab6 325 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
309ada50 326 0, /* cost of multiply per each bit set */
4977bab6 327 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
44cf5b6a
JH
328 1, /* cost of movsx */
329 1, /* cost of movzx */
309ada50 330 8, /* "large" insn */
e2e52e1b 331 9, /* MOVE_RATIO */
309ada50 332 4, /* cost for loading QImode using movzbl */
b72b1c29 333 {3, 4, 3}, /* cost of loading integer registers
309ada50 334 in QImode, HImode and SImode.
0f290768 335 Relative to reg-reg move (2). */
b72b1c29 336 {3, 4, 3}, /* cost of storing integer registers */
309ada50 337 4, /* cost of reg,reg fld/fst */
b72b1c29 338 {4, 4, 12}, /* cost of loading fp registers
309ada50 339 in SFmode, DFmode and XFmode */
b72b1c29 340 {6, 6, 8}, /* cost of loading integer registers */
fa79946e 341 2, /* cost of moving MMX register */
b72b1c29 342 {4, 4}, /* cost of loading MMX registers
fa79946e 343 in SImode and DImode */
b72b1c29 344 {4, 4}, /* cost of storing MMX registers
fa79946e
JH
345 in SImode and DImode */
346 2, /* cost of moving SSE register */
b72b1c29 347 {4, 4, 6}, /* cost of loading SSE registers
fa79946e 348 in SImode, DImode and TImode */
b72b1c29 349 {4, 4, 5}, /* cost of storing SSE registers
fa79946e 350 in SImode, DImode and TImode */
b72b1c29 351 5, /* MMX or SSE register to integer */
f4365627
JH
352 64, /* size of prefetch block */
353 6, /* number of parallel prefetches */
4977bab6 354 2, /* Branch cost */
229b303a
RS
355 4, /* cost of FADD and FSUB insns. */
356 4, /* cost of FMUL instruction. */
357 24, /* cost of FDIV instruction. */
358 2, /* cost of FABS instruction. */
359 2, /* cost of FCHS instruction. */
360 35, /* cost of FSQRT instruction. */
309ada50
JH
361};
362
4977bab6
ZW
363static const
364struct processor_costs k8_cost = {
365 1, /* cost of an add instruction */
366 2, /* cost of a lea instruction */
367 1, /* variable shift costs */
368 1, /* constant shift costs */
369 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
370 0, /* cost of multiply per each bit set */
371 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
372 1, /* cost of movsx */
373 1, /* cost of movzx */
374 8, /* "large" insn */
375 9, /* MOVE_RATIO */
376 4, /* cost for loading QImode using movzbl */
377 {3, 4, 3}, /* cost of loading integer registers
378 in QImode, HImode and SImode.
379 Relative to reg-reg move (2). */
380 {3, 4, 3}, /* cost of storing integer registers */
381 4, /* cost of reg,reg fld/fst */
382 {4, 4, 12}, /* cost of loading fp registers
383 in SFmode, DFmode and XFmode */
384 {6, 6, 8}, /* cost of loading integer registers */
385 2, /* cost of moving MMX register */
386 {3, 3}, /* cost of loading MMX registers
387 in SImode and DImode */
388 {4, 4}, /* cost of storing MMX registers
389 in SImode and DImode */
390 2, /* cost of moving SSE register */
391 {4, 3, 6}, /* cost of loading SSE registers
392 in SImode, DImode and TImode */
393 {4, 4, 5}, /* cost of storing SSE registers
394 in SImode, DImode and TImode */
395 5, /* MMX or SSE register to integer */
396 64, /* size of prefetch block */
397 6, /* number of parallel prefetches */
398 2, /* Branch cost */
399 4, /* cost of FADD and FSUB insns. */
400 4, /* cost of FMUL instruction. */
401 19, /* cost of FDIV instruction. */
402 2, /* cost of FABS instruction. */
403 2, /* cost of FCHS instruction. */
404 35, /* cost of FSQRT instruction. */
405};
406
fce5a9f2 407static const
b4e89e2d
JH
408struct processor_costs pentium4_cost = {
409 1, /* cost of an add instruction */
410 1, /* cost of a lea instruction */
4977bab6
ZW
411 4, /* variable shift costs */
412 4, /* constant shift costs */
413 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
b4e89e2d 414 0, /* cost of multiply per each bit set */
4977bab6 415 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
44cf5b6a
JH
416 1, /* cost of movsx */
417 1, /* cost of movzx */
b4e89e2d
JH
418 16, /* "large" insn */
419 6, /* MOVE_RATIO */
420 2, /* cost for loading QImode using movzbl */
421 {4, 5, 4}, /* cost of loading integer registers
422 in QImode, HImode and SImode.
423 Relative to reg-reg move (2). */
424 {2, 3, 2}, /* cost of storing integer registers */
425 2, /* cost of reg,reg fld/fst */
426 {2, 2, 6}, /* cost of loading fp registers
427 in SFmode, DFmode and XFmode */
428 {4, 4, 6}, /* cost of loading integer registers */
429 2, /* cost of moving MMX register */
430 {2, 2}, /* cost of loading MMX registers
431 in SImode and DImode */
432 {2, 2}, /* cost of storing MMX registers
433 in SImode and DImode */
434 12, /* cost of moving SSE register */
435 {12, 12, 12}, /* cost of loading SSE registers
436 in SImode, DImode and TImode */
437 {2, 2, 8}, /* cost of storing SSE registers
438 in SImode, DImode and TImode */
439 10, /* MMX or SSE register to integer */
f4365627
JH
440 64, /* size of prefetch block */
441 6, /* number of parallel prefetches */
4977bab6 442 2, /* Branch cost */
229b303a
RS
443 5, /* cost of FADD and FSUB insns. */
444 7, /* cost of FMUL instruction. */
445 43, /* cost of FDIV instruction. */
446 2, /* cost of FABS instruction. */
447 2, /* cost of FCHS instruction. */
448 43, /* cost of FSQRT instruction. */
b4e89e2d
JH
449};
450
8b60264b 451const struct processor_costs *ix86_cost = &pentium_cost;
32b5b1aa 452
a269a03c
JC
453/* Processor feature/optimization bitmasks. */
454#define m_386 (1<<PROCESSOR_I386)
455#define m_486 (1<<PROCESSOR_I486)
456#define m_PENT (1<<PROCESSOR_PENTIUM)
457#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
458#define m_K6 (1<<PROCESSOR_K6)
309ada50 459#define m_ATHLON (1<<PROCESSOR_ATHLON)
b4e89e2d 460#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
4977bab6
ZW
461#define m_K8 (1<<PROCESSOR_K8)
462#define m_ATHLON_K8 (m_K8 | m_ATHLON)
a269a03c 463
4977bab6
ZW
464const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
465const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
a269a03c 466const int x86_zero_extend_with_and = m_486 | m_PENT;
4977bab6 467const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
e075ae69 468const int x86_double_with_add = ~m_386;
a269a03c 469const int x86_use_bit_test = m_386;
4977bab6
ZW
470const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
471const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
472const int x86_3dnow_a = m_ATHLON_K8;
473const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
ef6257cd 474const int x86_branch_hints = m_PENT4;
b4e89e2d 475const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
e075ae69
RH
476const int x86_partial_reg_stall = m_PPRO;
477const int x86_use_loop = m_K6;
4977bab6 478const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
e075ae69
RH
479const int x86_use_mov0 = m_K6;
480const int x86_use_cltd = ~(m_PENT | m_K6);
481const int x86_read_modify_write = ~m_PENT;
482const int x86_read_modify = ~(m_PENT | m_PPRO);
483const int x86_split_long_moves = m_PPRO;
4977bab6 484const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
285464d0 485const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
b4e89e2d 486const int x86_single_stringop = m_386 | m_PENT4;
d9f32422
JH
487const int x86_qimode_math = ~(0);
488const int x86_promote_qi_regs = 0;
489const int x86_himode_math = ~(m_PPRO);
490const int x86_promote_hi_regs = m_PPRO;
4977bab6
ZW
491const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
492const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
493const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
494const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
495const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
496const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
497const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
498const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
499const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
500const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
b972dd02 501const int x86_decompose_lea = m_PENT4;
495333a6 502const int x86_shift1 = ~m_486;
4977bab6
ZW
503const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
504const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
505/* Set for machines where the type and dependencies are resolved on SSE register
506 parts insetad of whole registers, so we may maintain just lower part of
507 scalar values in proper format leaving the upper part undefined. */
508const int x86_sse_partial_regs = m_ATHLON_K8;
509/* Athlon optimizes partial-register FPS special case, thus avoiding the
510 need for extra instructions beforehand */
511const int x86_sse_partial_regs_for_cvtsd2ss = 0;
512const int x86_sse_typeless_stores = m_ATHLON_K8;
513const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
514const int x86_use_ffreep = m_ATHLON_K8;
515const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
a269a03c 516
6ab16dd9
JH
517/* In case the avreage insn count for single function invocation is
518 lower than this constant, emit fast (but longer) prologue and
519 epilogue code. */
4977bab6 520#define FAST_PROLOGUE_INSN_COUNT 20
5bf0ebab 521
6ab16dd9
JH
522/* Set by prologue expander and used by epilogue expander to determine
523 the style used. */
524static int use_fast_prologue_epilogue;
525
5bf0ebab
RH
526/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
527static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
528static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
529static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
4c0d89b5
RS
530
531/* Array of the smallest class containing reg number REGNO, indexed by
0f290768 532 REGNO. Used by REGNO_REG_CLASS in i386.h. */
4c0d89b5 533
e075ae69 534enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
4c0d89b5
RS
535{
536 /* ax, dx, cx, bx */
ab408a86 537 AREG, DREG, CREG, BREG,
4c0d89b5 538 /* si, di, bp, sp */
e075ae69 539 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
4c0d89b5
RS
540 /* FP registers */
541 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
79325812 542 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
4c0d89b5 543 /* arg pointer */
83774849 544 NON_Q_REGS,
564d80f4 545 /* flags, fpsr, dirflag, frame */
a7180f70
BS
546 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
547 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
548 SSE_REGS, SSE_REGS,
549 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
3d117b30
JH
550 MMX_REGS, MMX_REGS,
551 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
552 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
553 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
554 SSE_REGS, SSE_REGS,
4c0d89b5 555};
c572e5ba 556
3d117b30 557/* The "default" register map used in 32bit mode. */
83774849 558
0f290768 559int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
560{
561 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
562 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
3d117b30 563 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
564 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
565 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
3d117b30
JH
566 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
567 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
568};
569
5bf0ebab
RH
570static int const x86_64_int_parameter_registers[6] =
571{
572 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
573 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
574};
575
576static int const x86_64_int_return_registers[4] =
577{
578 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
579};
53c17031 580
0f7fa3d0
JH
581/* The "default" register map used in 64bit mode. */
582int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
583{
584 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
3de72741 585 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
0f7fa3d0
JH
586 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
587 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
588 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
589 8,9,10,11,12,13,14,15, /* extended integer registers */
590 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
591};
592
83774849
RH
593/* Define the register numbers to be used in Dwarf debugging information.
594 The SVR4 reference port C compiler uses the following register numbers
595 in its Dwarf output code:
596 0 for %eax (gcc regno = 0)
597 1 for %ecx (gcc regno = 2)
598 2 for %edx (gcc regno = 1)
599 3 for %ebx (gcc regno = 3)
600 4 for %esp (gcc regno = 7)
601 5 for %ebp (gcc regno = 6)
602 6 for %esi (gcc regno = 4)
603 7 for %edi (gcc regno = 5)
604 The following three DWARF register numbers are never generated by
605 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
606 believes these numbers have these meanings.
607 8 for %eip (no gcc equivalent)
608 9 for %eflags (gcc regno = 17)
609 10 for %trapno (no gcc equivalent)
610 It is not at all clear how we should number the FP stack registers
611 for the x86 architecture. If the version of SDB on x86/svr4 were
612 a bit less brain dead with respect to floating-point then we would
613 have a precedent to follow with respect to DWARF register numbers
614 for x86 FP registers, but the SDB on x86/svr4 is so completely
615 broken with respect to FP registers that it is hardly worth thinking
616 of it as something to strive for compatibility with.
617 The version of x86/svr4 SDB I have at the moment does (partially)
618 seem to believe that DWARF register number 11 is associated with
619 the x86 register %st(0), but that's about all. Higher DWARF
620 register numbers don't seem to be associated with anything in
621 particular, and even for DWARF regno 11, SDB only seems to under-
622 stand that it should say that a variable lives in %st(0) (when
623 asked via an `=' command) if we said it was in DWARF regno 11,
624 but SDB still prints garbage when asked for the value of the
625 variable in question (via a `/' command).
626 (Also note that the labels SDB prints for various FP stack regs
627 when doing an `x' command are all wrong.)
628 Note that these problems generally don't affect the native SVR4
629 C compiler because it doesn't allow the use of -O with -g and
630 because when it is *not* optimizing, it allocates a memory
631 location for each floating-point variable, and the memory
632 location is what gets described in the DWARF AT_location
633 attribute for the variable in question.
634 Regardless of the severe mental illness of the x86/svr4 SDB, we
635 do something sensible here and we use the following DWARF
636 register numbers. Note that these are all stack-top-relative
637 numbers.
638 11 for %st(0) (gcc regno = 8)
639 12 for %st(1) (gcc regno = 9)
640 13 for %st(2) (gcc regno = 10)
641 14 for %st(3) (gcc regno = 11)
642 15 for %st(4) (gcc regno = 12)
643 16 for %st(5) (gcc regno = 13)
644 17 for %st(6) (gcc regno = 14)
645 18 for %st(7) (gcc regno = 15)
646*/
0f290768 647int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
648{
649 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
650 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
3f3f2124 651 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
652 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
653 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
3f3f2124
JH
654 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
655 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
83774849
RH
656};
657
c572e5ba
JVA
658/* Test and compare insns in i386.md store the information needed to
659 generate branch and scc insns here. */
660
07933f72
GS
661rtx ix86_compare_op0 = NULL_RTX;
662rtx ix86_compare_op1 = NULL_RTX;
f5316dfe 663
f996902d
RH
664/* The encoding characters for the four TLS models present in ELF. */
665
755ac5d4 666static char const tls_model_chars[] = " GLil";
f996902d 667
7a2e09f4 668#define MAX_386_STACK_LOCALS 3
8362f420
JH
669/* Size of the register save area. */
670#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
36edd3cc
BS
671
672/* Define the structure for the machine field in struct function. */
e2500fed 673struct machine_function GTY(())
36edd3cc
BS
674{
675 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
f996902d 676 const char *some_ld_name;
8362f420 677 int save_varrargs_registers;
6fca22eb 678 int accesses_prev_frame;
36edd3cc
BS
679};
680
01d939e8 681#define ix86_stack_locals (cfun->machine->stack_locals)
8362f420 682#define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
36edd3cc 683
4dd2ac2c
JH
684/* Structure describing stack frame layout.
685 Stack grows downward:
686
687 [arguments]
688 <- ARG_POINTER
689 saved pc
690
691 saved frame pointer if frame_pointer_needed
692 <- HARD_FRAME_POINTER
693 [saved regs]
694
695 [padding1] \
696 )
697 [va_arg registers] (
698 > to_allocate <- FRAME_POINTER
699 [frame] (
700 )
701 [padding2] /
702 */
703struct ix86_frame
704{
705 int nregs;
706 int padding1;
8362f420 707 int va_arg_size;
4dd2ac2c
JH
708 HOST_WIDE_INT frame;
709 int padding2;
710 int outgoing_arguments_size;
8362f420 711 int red_zone_size;
4dd2ac2c
JH
712
713 HOST_WIDE_INT to_allocate;
714 /* The offsets relative to ARG_POINTER. */
715 HOST_WIDE_INT frame_pointer_offset;
716 HOST_WIDE_INT hard_frame_pointer_offset;
717 HOST_WIDE_INT stack_pointer_offset;
718};
719
c93e80a5
JH
720/* Used to enable/disable debugging features. */
721const char *ix86_debug_arg_string, *ix86_debug_addr_string;
6189a572
JH
722/* Code model option as passed by user. */
723const char *ix86_cmodel_string;
724/* Parsed value. */
725enum cmodel ix86_cmodel;
80f33d06
GS
726/* Asm dialect. */
727const char *ix86_asm_string;
728enum asm_dialect ix86_asm_dialect = ASM_ATT;
f996902d
RH
729/* TLS dialext. */
730const char *ix86_tls_dialect_string;
731enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
6189a572 732
5bf0ebab 733/* Which unit we are generating floating point math for. */
965f5423
JH
734enum fpmath_unit ix86_fpmath;
735
5bf0ebab
RH
736/* Which cpu are we scheduling for. */
737enum processor_type ix86_cpu;
738/* Which instruction set architecture to use. */
739enum processor_type ix86_arch;
c8c5cb99
SC
740
741/* Strings to hold which cpu and instruction set architecture to use. */
9c23aa47
ZW
742const char *ix86_cpu_string; /* for -mcpu=<xxx> */
743const char *ix86_arch_string; /* for -march=<xxx> */
965f5423 744const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
c8c5cb99 745
0f290768 746/* # of registers to use to pass arguments. */
e075ae69 747const char *ix86_regparm_string;
e9a25f70 748
f4365627
JH
749/* true if sse prefetch instruction is not NOOP. */
750int x86_prefetch_sse;
751
e075ae69
RH
752/* ix86_regparm_string as a number */
753int ix86_regparm;
e9a25f70
JL
754
755/* Alignment to use for loops and jumps: */
756
0f290768 757/* Power of two alignment for loops. */
e075ae69 758const char *ix86_align_loops_string;
e9a25f70 759
0f290768 760/* Power of two alignment for non-loop jumps. */
e075ae69 761const char *ix86_align_jumps_string;
e9a25f70 762
3af4bd89 763/* Power of two alignment for stack boundary in bytes. */
e075ae69 764const char *ix86_preferred_stack_boundary_string;
3af4bd89
JH
765
766/* Preferred alignment for stack boundary in bits. */
e075ae69 767int ix86_preferred_stack_boundary;
3af4bd89 768
e9a25f70 769/* Values 1-5: see jump.c */
e075ae69
RH
770int ix86_branch_cost;
771const char *ix86_branch_cost_string;
e9a25f70 772
0f290768 773/* Power of two alignment for functions. */
e075ae69 774const char *ix86_align_funcs_string;
623fe810
RH
775
776/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
777static char internal_label_prefix[16];
778static int internal_label_prefix_len;
e075ae69 779\f
623fe810 780static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
f996902d 781static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
f6da8bc3
KG
782static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
783static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
e075ae69 784 int, int, FILE *));
f996902d
RH
785static const char *get_some_local_dynamic_name PARAMS ((void));
786static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
787static rtx maybe_get_pool_constant PARAMS ((rtx));
f6da8bc3 788static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
3a3677ff
RH
789static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
790 rtx *, rtx *));
f996902d 791static rtx get_thread_pointer PARAMS ((void));
145aacc2 792static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
f6da8bc3
KG
793static rtx gen_push PARAMS ((rtx));
794static int memory_address_length PARAMS ((rtx addr));
795static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
796static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
f6da8bc3
KG
797static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
798static void ix86_dump_ppro_packet PARAMS ((FILE *));
799static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
e2500fed 800static struct machine_function * ix86_init_machine_status PARAMS ((void));
2b589241 801static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
b531087a
KH
802static int ix86_nsaved_regs PARAMS ((void));
803static void ix86_emit_save_regs PARAMS ((void));
c6036a37 804static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
37a58036 805static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
bd09bdeb 806static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
0e4970d7 807static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
b531087a 808static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
55efb413 809static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
0945b39d 810static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
0945b39d
JH
811static rtx ix86_expand_aligntest PARAMS ((rtx, int));
812static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
c237e94a
ZW
813static int ix86_issue_rate PARAMS ((void));
814static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
815static void ix86_sched_init PARAMS ((FILE *, int, int));
816static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
817static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
9b690711
RH
818static int ia32_use_dfa_pipeline_interface PARAMS ((void));
819static int ia32_multipass_dfa_lookahead PARAMS ((void));
e37af218 820static void ix86_init_mmx_sse_builtins PARAMS ((void));
3961e8fe
RH
821static rtx x86_this_parameter PARAMS ((tree));
822static void x86_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
823 HOST_WIDE_INT, tree));
824static bool x86_can_output_mi_thunk PARAMS ((tree, HOST_WIDE_INT,
825 HOST_WIDE_INT, tree));
4977bab6 826bool ix86_expand_carry_flag_compare PARAMS ((enum rtx_code, rtx, rtx, rtx*));
e075ae69
RH
827
828struct ix86_address
829{
830 rtx base, index, disp;
831 HOST_WIDE_INT scale;
832};
b08de47e 833
e075ae69 834static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
3a04ff64 835static bool ix86_cannot_force_const_mem PARAMS ((rtx));
bd793c65 836
f996902d
RH
837static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
838static const char *ix86_strip_name_encoding PARAMS ((const char *))
839 ATTRIBUTE_UNUSED;
fb49053f 840
bd793c65 841struct builtin_description;
8b60264b
KG
842static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
843 tree, rtx));
844static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
845 tree, rtx));
bd793c65
BS
846static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
847static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
848static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
e37af218 849static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
bd793c65 850static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
c0c102a9
JH
851static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
852static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
853 enum rtx_code *,
854 enum rtx_code *,
855 enum rtx_code *));
9e7adcb3
JH
856static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
857 rtx *, rtx *));
858static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
859static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
860static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
861static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
bd09bdeb 862static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
9b690711 863static int ix86_save_reg PARAMS ((unsigned int, int));
4dd2ac2c 864static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
8d8e52be 865static int ix86_comp_type_attributes PARAMS ((tree, tree));
483ab821 866static int ix86_fntype_regparm PARAMS ((tree));
91d231cb 867const struct attribute_spec ix86_attribute_table[];
4977bab6 868static bool ix86_function_ok_for_sibcall PARAMS ((tree, tree));
91d231cb
JM
869static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
870static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
b069de3b 871static int ix86_value_regno PARAMS ((enum machine_mode));
4977bab6
ZW
872static bool ix86_ms_bitfield_layout_p PARAMS ((tree));
873static int extended_reg_mentioned_1 PARAMS ((rtx *, void *));
7c262518 874
21c318ba 875#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
2cc07db4
RH
876static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
877#endif
e56feed6 878
53c17031
JH
879/* Register class used for passing given 64bit part of the argument.
880 These represent classes as documented by the PS ABI, with the exception
881 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
882 use SF or DFmode move instead of DImode to avoid reformating penalties.
883
884 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
885 whenever possible (upper half does contain padding).
886 */
887enum x86_64_reg_class
888 {
889 X86_64_NO_CLASS,
890 X86_64_INTEGER_CLASS,
891 X86_64_INTEGERSI_CLASS,
892 X86_64_SSE_CLASS,
893 X86_64_SSESF_CLASS,
894 X86_64_SSEDF_CLASS,
895 X86_64_SSEUP_CLASS,
896 X86_64_X87_CLASS,
897 X86_64_X87UP_CLASS,
898 X86_64_MEMORY_CLASS
899 };
0b5826ac 900static const char * const x86_64_reg_class_name[] =
53c17031
JH
901 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
902
903#define MAX_CLASSES 4
904static int classify_argument PARAMS ((enum machine_mode, tree,
905 enum x86_64_reg_class [MAX_CLASSES],
906 int));
907static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
908 int *));
909static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
07933f72 910 const int *, int));
53c17031
JH
911static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
912 enum x86_64_reg_class));
672a6f42
NB
913\f
914/* Initialize the GCC target structure. */
91d231cb
JM
915#undef TARGET_ATTRIBUTE_TABLE
916#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
672a6f42 917#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
f5f4be42
NB
918# undef TARGET_MERGE_DECL_ATTRIBUTES
919# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
672a6f42
NB
920#endif
921
8d8e52be
JM
922#undef TARGET_COMP_TYPE_ATTRIBUTES
923#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
924
f6155fda
SS
925#undef TARGET_INIT_BUILTINS
926#define TARGET_INIT_BUILTINS ix86_init_builtins
927
928#undef TARGET_EXPAND_BUILTIN
929#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
930
bd09bdeb
RH
931#undef TARGET_ASM_FUNCTION_EPILOGUE
932#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
08c148a8 933
17b53c33
NB
934#undef TARGET_ASM_OPEN_PAREN
935#define TARGET_ASM_OPEN_PAREN ""
936#undef TARGET_ASM_CLOSE_PAREN
937#define TARGET_ASM_CLOSE_PAREN ""
938
301d03af
RS
939#undef TARGET_ASM_ALIGNED_HI_OP
940#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
941#undef TARGET_ASM_ALIGNED_SI_OP
942#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
943#ifdef ASM_QUAD
944#undef TARGET_ASM_ALIGNED_DI_OP
945#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
946#endif
947
948#undef TARGET_ASM_UNALIGNED_HI_OP
949#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
950#undef TARGET_ASM_UNALIGNED_SI_OP
951#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
952#undef TARGET_ASM_UNALIGNED_DI_OP
953#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
954
c237e94a
ZW
955#undef TARGET_SCHED_ADJUST_COST
956#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
957#undef TARGET_SCHED_ISSUE_RATE
958#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
959#undef TARGET_SCHED_VARIABLE_ISSUE
960#define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
961#undef TARGET_SCHED_INIT
962#define TARGET_SCHED_INIT ix86_sched_init
963#undef TARGET_SCHED_REORDER
964#define TARGET_SCHED_REORDER ix86_sched_reorder
fce5a9f2 965#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
9b690711
RH
966#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
967 ia32_use_dfa_pipeline_interface
968#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
969#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
970 ia32_multipass_dfa_lookahead
c237e94a 971
4977bab6
ZW
972#undef TARGET_FUNCTION_OK_FOR_SIBCALL
973#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
974
f996902d
RH
975#ifdef HAVE_AS_TLS
976#undef TARGET_HAVE_TLS
977#define TARGET_HAVE_TLS true
978#endif
3a04ff64
RH
979#undef TARGET_CANNOT_FORCE_CONST_MEM
980#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
f996902d 981
4977bab6
ZW
982#undef TARGET_MS_BITFIELD_LAYOUT_P
983#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
984
c590b625
RH
985#undef TARGET_ASM_OUTPUT_MI_THUNK
986#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
3961e8fe
RH
987#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
988#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
c590b625 989
f6897b10 990struct gcc_target targetm = TARGET_INITIALIZER;
e075ae69 991\f
f5316dfe
MM
992/* Sometimes certain combinations of command options do not make
993 sense on a particular target machine. You can define a macro
994 `OVERRIDE_OPTIONS' to take account of this. This macro, if
995 defined, is executed once just after all the command options have
996 been parsed.
997
998 Don't use this macro to turn on various extra optimizations for
999 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1000
1001void
1002override_options ()
1003{
400500c4 1004 int i;
e075ae69
RH
1005 /* Comes from final.c -- no real reason to change it. */
1006#define MAX_CODE_ALIGN 16
f5316dfe 1007
c8c5cb99
SC
1008 static struct ptt
1009 {
8b60264b
KG
1010 const struct processor_costs *cost; /* Processor costs */
1011 const int target_enable; /* Target flags to enable. */
1012 const int target_disable; /* Target flags to disable. */
1013 const int align_loop; /* Default alignments. */
2cca7283 1014 const int align_loop_max_skip;
8b60264b 1015 const int align_jump;
2cca7283 1016 const int align_jump_max_skip;
8b60264b 1017 const int align_func;
e075ae69 1018 }
0f290768 1019 const processor_target_table[PROCESSOR_max] =
e075ae69 1020 {
4977bab6
ZW
1021 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1022 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1023 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1024 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1025 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1026 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1027 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1028 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
e075ae69
RH
1029 };
1030
f4365627 1031 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
e075ae69
RH
1032 static struct pta
1033 {
8b60264b
KG
1034 const char *const name; /* processor name or nickname. */
1035 const enum processor_type processor;
0dd0e980
JH
1036 const enum pta_flags
1037 {
1038 PTA_SSE = 1,
1039 PTA_SSE2 = 2,
1040 PTA_MMX = 4,
f4365627 1041 PTA_PREFETCH_SSE = 8,
0dd0e980 1042 PTA_3DNOW = 16,
4977bab6
ZW
1043 PTA_3DNOW_A = 64,
1044 PTA_64BIT = 128
0dd0e980 1045 } flags;
e075ae69 1046 }
0f290768 1047 const processor_alias_table[] =
e075ae69 1048 {
0dd0e980
JH
1049 {"i386", PROCESSOR_I386, 0},
1050 {"i486", PROCESSOR_I486, 0},
1051 {"i586", PROCESSOR_PENTIUM, 0},
1052 {"pentium", PROCESSOR_PENTIUM, 0},
1053 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
3780101d
JG
1054 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1055 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1056 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
0dd0e980
JH
1057 {"i686", PROCESSOR_PENTIUMPRO, 0},
1058 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1059 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
f4365627 1060 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
0dd0e980 1061 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
f4365627 1062 PTA_MMX | PTA_PREFETCH_SSE},
0dd0e980
JH
1063 {"k6", PROCESSOR_K6, PTA_MMX},
1064 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1065 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
f4365627 1066 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1067 | PTA_3DNOW_A},
f4365627 1068 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
0dd0e980 1069 | PTA_3DNOW | PTA_3DNOW_A},
f4365627 1070 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1071 | PTA_3DNOW_A | PTA_SSE},
f4365627 1072 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1073 | PTA_3DNOW_A | PTA_SSE},
f4365627 1074 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1075 | PTA_3DNOW_A | PTA_SSE},
4977bab6
ZW
1076 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1077 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
3af4bd89 1078 };
c8c5cb99 1079
ca7558fc 1080 int const pta_size = ARRAY_SIZE (processor_alias_table);
c8c5cb99 1081
3dc85dfb
RH
1082 /* By default our XFmode is the 80-bit extended format. If we have
1083 use TFmode instead, it's also the 80-bit format, but with padding. */
1084 real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format;
1085 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
1086
41ed2237
JH
1087 /* Set the default values for switches whose default depends on TARGET_64BIT
1088 in case they weren't overwriten by command line options. */
55ba61f3
JH
1089 if (TARGET_64BIT)
1090 {
1091 if (flag_omit_frame_pointer == 2)
1092 flag_omit_frame_pointer = 1;
1093 if (flag_asynchronous_unwind_tables == 2)
1094 flag_asynchronous_unwind_tables = 1;
1095 if (flag_pcc_struct_return == 2)
1096 flag_pcc_struct_return = 0;
1097 }
1098 else
1099 {
1100 if (flag_omit_frame_pointer == 2)
1101 flag_omit_frame_pointer = 0;
1102 if (flag_asynchronous_unwind_tables == 2)
1103 flag_asynchronous_unwind_tables = 0;
1104 if (flag_pcc_struct_return == 2)
1105 flag_pcc_struct_return = 1;
1106 }
1107
f5316dfe
MM
1108#ifdef SUBTARGET_OVERRIDE_OPTIONS
1109 SUBTARGET_OVERRIDE_OPTIONS;
1110#endif
1111
f4365627
JH
1112 if (!ix86_cpu_string && ix86_arch_string)
1113 ix86_cpu_string = ix86_arch_string;
1114 if (!ix86_cpu_string)
1115 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
1116 if (!ix86_arch_string)
4977bab6 1117 ix86_arch_string = TARGET_64BIT ? "k8" : "i386";
e075ae69 1118
6189a572
JH
1119 if (ix86_cmodel_string != 0)
1120 {
1121 if (!strcmp (ix86_cmodel_string, "small"))
1122 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1123 else if (flag_pic)
c725bd79 1124 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
6189a572
JH
1125 else if (!strcmp (ix86_cmodel_string, "32"))
1126 ix86_cmodel = CM_32;
1127 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1128 ix86_cmodel = CM_KERNEL;
1129 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1130 ix86_cmodel = CM_MEDIUM;
1131 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1132 ix86_cmodel = CM_LARGE;
1133 else
1134 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1135 }
1136 else
1137 {
1138 ix86_cmodel = CM_32;
1139 if (TARGET_64BIT)
1140 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1141 }
c93e80a5
JH
1142 if (ix86_asm_string != 0)
1143 {
1144 if (!strcmp (ix86_asm_string, "intel"))
1145 ix86_asm_dialect = ASM_INTEL;
1146 else if (!strcmp (ix86_asm_string, "att"))
1147 ix86_asm_dialect = ASM_ATT;
1148 else
1149 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1150 }
6189a572 1151 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
c725bd79 1152 error ("code model `%s' not supported in the %s bit mode",
6189a572
JH
1153 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1154 if (ix86_cmodel == CM_LARGE)
c725bd79 1155 sorry ("code model `large' not supported yet");
0c2dc519 1156 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
c725bd79 1157 sorry ("%i-bit mode not compiled in",
0c2dc519 1158 (target_flags & MASK_64BIT) ? 64 : 32);
6189a572 1159
f4365627
JH
1160 for (i = 0; i < pta_size; i++)
1161 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1162 {
1163 ix86_arch = processor_alias_table[i].processor;
1164 /* Default cpu tuning to the architecture. */
1165 ix86_cpu = ix86_arch;
1166 if (processor_alias_table[i].flags & PTA_MMX
9ef1b13a 1167 && !(target_flags_explicit & MASK_MMX))
f4365627
JH
1168 target_flags |= MASK_MMX;
1169 if (processor_alias_table[i].flags & PTA_3DNOW
9ef1b13a 1170 && !(target_flags_explicit & MASK_3DNOW))
f4365627
JH
1171 target_flags |= MASK_3DNOW;
1172 if (processor_alias_table[i].flags & PTA_3DNOW_A
9ef1b13a 1173 && !(target_flags_explicit & MASK_3DNOW_A))
f4365627
JH
1174 target_flags |= MASK_3DNOW_A;
1175 if (processor_alias_table[i].flags & PTA_SSE
9ef1b13a 1176 && !(target_flags_explicit & MASK_SSE))
f4365627
JH
1177 target_flags |= MASK_SSE;
1178 if (processor_alias_table[i].flags & PTA_SSE2
9ef1b13a 1179 && !(target_flags_explicit & MASK_SSE2))
f4365627
JH
1180 target_flags |= MASK_SSE2;
1181 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1182 x86_prefetch_sse = true;
4977bab6
ZW
1183 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1184 error ("CPU you selected does not support x86-64 instruction set");
f4365627
JH
1185 break;
1186 }
400500c4 1187
f4365627
JH
1188 if (i == pta_size)
1189 error ("bad value (%s) for -march= switch", ix86_arch_string);
e075ae69 1190
f4365627
JH
1191 for (i = 0; i < pta_size; i++)
1192 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1193 {
1194 ix86_cpu = processor_alias_table[i].processor;
4977bab6
ZW
1195 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1196 error ("CPU you selected does not support x86-64 instruction set");
f4365627
JH
1197 break;
1198 }
1199 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1200 x86_prefetch_sse = true;
1201 if (i == pta_size)
1202 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
e075ae69 1203
2ab0437e
JH
1204 if (optimize_size)
1205 ix86_cost = &size_cost;
1206 else
1207 ix86_cost = processor_target_table[ix86_cpu].cost;
e075ae69
RH
1208 target_flags |= processor_target_table[ix86_cpu].target_enable;
1209 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1210
36edd3cc
BS
1211 /* Arrange to set up i386_stack_locals for all functions. */
1212 init_machine_status = ix86_init_machine_status;
fce5a9f2 1213
0f290768 1214 /* Validate -mregparm= value. */
e075ae69 1215 if (ix86_regparm_string)
b08de47e 1216 {
400500c4
RK
1217 i = atoi (ix86_regparm_string);
1218 if (i < 0 || i > REGPARM_MAX)
1219 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1220 else
1221 ix86_regparm = i;
b08de47e 1222 }
0d7d98ee
JH
1223 else
1224 if (TARGET_64BIT)
1225 ix86_regparm = REGPARM_MAX;
b08de47e 1226
3e18fdf6 1227 /* If the user has provided any of the -malign-* options,
a4f31c00 1228 warn and use that value only if -falign-* is not set.
3e18fdf6 1229 Remove this code in GCC 3.2 or later. */
e075ae69 1230 if (ix86_align_loops_string)
b08de47e 1231 {
3e18fdf6
GK
1232 warning ("-malign-loops is obsolete, use -falign-loops");
1233 if (align_loops == 0)
1234 {
1235 i = atoi (ix86_align_loops_string);
1236 if (i < 0 || i > MAX_CODE_ALIGN)
1237 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1238 else
1239 align_loops = 1 << i;
1240 }
b08de47e 1241 }
3af4bd89 1242
e075ae69 1243 if (ix86_align_jumps_string)
b08de47e 1244 {
3e18fdf6
GK
1245 warning ("-malign-jumps is obsolete, use -falign-jumps");
1246 if (align_jumps == 0)
1247 {
1248 i = atoi (ix86_align_jumps_string);
1249 if (i < 0 || i > MAX_CODE_ALIGN)
1250 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1251 else
1252 align_jumps = 1 << i;
1253 }
b08de47e 1254 }
b08de47e 1255
e075ae69 1256 if (ix86_align_funcs_string)
b08de47e 1257 {
3e18fdf6
GK
1258 warning ("-malign-functions is obsolete, use -falign-functions");
1259 if (align_functions == 0)
1260 {
1261 i = atoi (ix86_align_funcs_string);
1262 if (i < 0 || i > MAX_CODE_ALIGN)
1263 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1264 else
1265 align_functions = 1 << i;
1266 }
b08de47e 1267 }
3af4bd89 1268
3e18fdf6 1269 /* Default align_* from the processor table. */
3e18fdf6 1270 if (align_loops == 0)
2cca7283
JH
1271 {
1272 align_loops = processor_target_table[ix86_cpu].align_loop;
1273 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1274 }
3e18fdf6 1275 if (align_jumps == 0)
2cca7283
JH
1276 {
1277 align_jumps = processor_target_table[ix86_cpu].align_jump;
1278 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1279 }
3e18fdf6 1280 if (align_functions == 0)
2cca7283
JH
1281 {
1282 align_functions = processor_target_table[ix86_cpu].align_func;
1283 }
3e18fdf6 1284
e4c0478d 1285 /* Validate -mpreferred-stack-boundary= value, or provide default.
fbb83b43
AO
1286 The default of 128 bits is for Pentium III's SSE __m128, but we
1287 don't want additional code to keep the stack aligned when
1288 optimizing for code size. */
1289 ix86_preferred_stack_boundary = (optimize_size
ef49d42e 1290 ? TARGET_64BIT ? 128 : 32
fbb83b43 1291 : 128);
e075ae69 1292 if (ix86_preferred_stack_boundary_string)
3af4bd89 1293 {
400500c4 1294 i = atoi (ix86_preferred_stack_boundary_string);
ef49d42e 1295 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
c6257c5d 1296 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
ef49d42e 1297 TARGET_64BIT ? 4 : 2);
400500c4
RK
1298 else
1299 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3af4bd89 1300 }
77a989d1 1301
0f290768 1302 /* Validate -mbranch-cost= value, or provide default. */
4977bab6 1303 ix86_branch_cost = processor_target_table[ix86_cpu].cost->branch_cost;
e075ae69 1304 if (ix86_branch_cost_string)
804a8ee0 1305 {
400500c4
RK
1306 i = atoi (ix86_branch_cost_string);
1307 if (i < 0 || i > 5)
1308 error ("-mbranch-cost=%d is not between 0 and 5", i);
1309 else
1310 ix86_branch_cost = i;
804a8ee0 1311 }
804a8ee0 1312
f996902d
RH
1313 if (ix86_tls_dialect_string)
1314 {
1315 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1316 ix86_tls_dialect = TLS_DIALECT_GNU;
1317 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1318 ix86_tls_dialect = TLS_DIALECT_SUN;
1319 else
1320 error ("bad value (%s) for -mtls-dialect= switch",
1321 ix86_tls_dialect_string);
1322 }
1323
e9a25f70
JL
1324 /* Keep nonleaf frame pointers. */
1325 if (TARGET_OMIT_LEAF_FRAME_POINTER)
77a989d1 1326 flag_omit_frame_pointer = 1;
e075ae69
RH
1327
1328 /* If we're doing fast math, we don't care about comparison order
1329 wrt NaNs. This lets us use a shorter comparison sequence. */
de6c5979 1330 if (flag_unsafe_math_optimizations)
e075ae69
RH
1331 target_flags &= ~MASK_IEEE_FP;
1332
30c99a84
RH
1333 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1334 since the insns won't need emulation. */
1335 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1336 target_flags &= ~MASK_NO_FANCY_MATH_387;
1337
14f73b5a
JH
1338 if (TARGET_64BIT)
1339 {
1340 if (TARGET_ALIGN_DOUBLE)
c725bd79 1341 error ("-malign-double makes no sense in the 64bit mode");
14f73b5a 1342 if (TARGET_RTD)
c725bd79 1343 error ("-mrtd calling convention not supported in the 64bit mode");
14f73b5a 1344 /* Enable by default the SSE and MMX builtins. */
965f5423
JH
1345 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1346 ix86_fpmath = FPMATH_SSE;
14f73b5a 1347 }
965f5423
JH
1348 else
1349 ix86_fpmath = FPMATH_387;
1350
1351 if (ix86_fpmath_string != 0)
1352 {
1353 if (! strcmp (ix86_fpmath_string, "387"))
1354 ix86_fpmath = FPMATH_387;
1355 else if (! strcmp (ix86_fpmath_string, "sse"))
1356 {
1357 if (!TARGET_SSE)
1358 {
1359 warning ("SSE instruction set disabled, using 387 arithmetics");
1360 ix86_fpmath = FPMATH_387;
1361 }
1362 else
1363 ix86_fpmath = FPMATH_SSE;
1364 }
1365 else if (! strcmp (ix86_fpmath_string, "387,sse")
1366 || ! strcmp (ix86_fpmath_string, "sse,387"))
1367 {
1368 if (!TARGET_SSE)
1369 {
1370 warning ("SSE instruction set disabled, using 387 arithmetics");
1371 ix86_fpmath = FPMATH_387;
1372 }
1373 else if (!TARGET_80387)
1374 {
1375 warning ("387 instruction set disabled, using SSE arithmetics");
1376 ix86_fpmath = FPMATH_SSE;
1377 }
1378 else
1379 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1380 }
fce5a9f2 1381 else
965f5423
JH
1382 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1383 }
14f73b5a 1384
a7180f70
BS
1385 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1386 on by -msse. */
1387 if (TARGET_SSE)
e37af218
RH
1388 {
1389 target_flags |= MASK_MMX;
1390 x86_prefetch_sse = true;
1391 }
c6036a37 1392
47f339cf
BS
1393 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1394 if (TARGET_3DNOW)
1395 {
1396 target_flags |= MASK_MMX;
1397 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1398 extensions it adds. */
1399 if (x86_3dnow_a & (1 << ix86_arch))
1400 target_flags |= MASK_3DNOW_A;
1401 }
c6036a37 1402 if ((x86_accumulate_outgoing_args & CPUMASK)
9ef1b13a 1403 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
c6036a37
JH
1404 && !optimize_size)
1405 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
623fe810
RH
1406
1407 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1408 {
1409 char *p;
1410 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1411 p = strchr (internal_label_prefix, 'X');
1412 internal_label_prefix_len = p - internal_label_prefix;
1413 *p = '\0';
1414 }
f5316dfe
MM
1415}
1416\f
32b5b1aa 1417void
c6aded7c 1418optimization_options (level, size)
32b5b1aa 1419 int level;
bb5177ac 1420 int size ATTRIBUTE_UNUSED;
32b5b1aa 1421{
e9a25f70
JL
1422 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1423 make the problem with not enough registers even worse. */
32b5b1aa
SC
1424#ifdef INSN_SCHEDULING
1425 if (level > 1)
1426 flag_schedule_insns = 0;
1427#endif
55ba61f3
JH
1428
1429 /* The default values of these switches depend on the TARGET_64BIT
1430 that is not known at this moment. Mark these values with 2 and
1431 let user the to override these. In case there is no command line option
1432 specifying them, we will set the defaults in override_options. */
1433 if (optimize >= 1)
1434 flag_omit_frame_pointer = 2;
1435 flag_pcc_struct_return = 2;
1436 flag_asynchronous_unwind_tables = 2;
32b5b1aa 1437}
b08de47e 1438\f
91d231cb
JM
1439/* Table of valid machine attributes. */
1440const struct attribute_spec ix86_attribute_table[] =
b08de47e 1441{
91d231cb 1442 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
b08de47e
MM
1443 /* Stdcall attribute says callee is responsible for popping arguments
1444 if they are not variable. */
91d231cb 1445 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
e91f04de
CH
1446 /* Fastcall attribute says callee is responsible for popping arguments
1447 if they are not variable. */
1448 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
91d231cb
JM
1449 /* Cdecl attribute says the callee is a normal C declaration */
1450 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
b08de47e 1451 /* Regparm attribute specifies how many integer arguments are to be
0f290768 1452 passed in registers. */
91d231cb
JM
1453 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1454#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
3da1eb0b
DS
1455 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1456 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1457 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
91d231cb
JM
1458#endif
1459 { NULL, 0, 0, false, false, false, NULL }
1460};
1461
4977bab6
ZW
1462/* If PIC, we cannot make sibling calls to global functions
1463 because the PLT requires %ebx live.
1464 If we are returning floats on the register stack, we cannot make
1465 sibling calls to functions that return floats. (The stack adjust
1466 instruction will wind up after the sibcall jump, and not be executed.) */
1467
1468static bool
1469ix86_function_ok_for_sibcall (decl, exp)
1470 tree decl;
1471 tree exp;
1472{
1473 /* If we are generating position-independent code, we cannot sibcall
1474 optimize any indirect call, or a direct call to a global function,
1475 as the PLT requires %ebx be live. */
1476 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1477 return false;
1478
1479 /* If we are returning floats on the 80387 register stack, we cannot
1480 make a sibcall from a function that doesn't return a float to a
1481 function that does; the necessary stack adjustment will not be
1482 executed. */
1483 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1484 && ! STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1485 return false;
1486
1487 /* If this call is indirect, we'll need to be able to use a call-clobbered
1488 register for the address of the target function. Make sure that all
1489 such registers are not used for passing parameters. */
1490 if (!decl && !TARGET_64BIT)
1491 {
1492 int regparm = ix86_regparm;
1493 tree attr, type;
1494
1495 /* We're looking at the CALL_EXPR, we need the type of the function. */
1496 type = TREE_OPERAND (exp, 0); /* pointer expression */
1497 type = TREE_TYPE (type); /* pointer type */
1498 type = TREE_TYPE (type); /* function type */
1499
1500 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1501 if (attr)
1502 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1503
1504 if (regparm >= 3)
1505 {
1506 /* ??? Need to count the actual number of registers to be used,
1507 not the possible number of registers. Fix later. */
1508 return false;
1509 }
1510 }
1511
1512 /* Otherwise okay. That also includes certain types of indirect calls. */
1513 return true;
1514}
1515
e91f04de 1516/* Handle a "cdecl", "stdcall", or "fastcall" attribute;
91d231cb
JM
1517 arguments as in struct attribute_spec.handler. */
1518static tree
1519ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1520 tree *node;
1521 tree name;
1522 tree args ATTRIBUTE_UNUSED;
1523 int flags ATTRIBUTE_UNUSED;
1524 bool *no_add_attrs;
1525{
1526 if (TREE_CODE (*node) != FUNCTION_TYPE
1527 && TREE_CODE (*node) != METHOD_TYPE
1528 && TREE_CODE (*node) != FIELD_DECL
1529 && TREE_CODE (*node) != TYPE_DECL)
b08de47e 1530 {
91d231cb
JM
1531 warning ("`%s' attribute only applies to functions",
1532 IDENTIFIER_POINTER (name));
1533 *no_add_attrs = true;
1534 }
e91f04de
CH
1535 else
1536 {
1537 if (is_attribute_p ("fastcall", name))
1538 {
1539 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1540 {
1541 error ("fastcall and stdcall attributes are not compatible");
1542 }
1543 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1544 {
1545 error ("fastcall and regparm attributes are not compatible");
1546 }
1547 }
1548 else if (is_attribute_p ("stdcall", name))
1549 {
1550 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1551 {
1552 error ("fastcall and stdcall attributes are not compatible");
1553 }
1554 }
1555 }
b08de47e 1556
91d231cb
JM
1557 if (TARGET_64BIT)
1558 {
1559 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1560 *no_add_attrs = true;
1561 }
b08de47e 1562
91d231cb
JM
1563 return NULL_TREE;
1564}
b08de47e 1565
91d231cb
JM
1566/* Handle a "regparm" attribute;
1567 arguments as in struct attribute_spec.handler. */
1568static tree
1569ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1570 tree *node;
1571 tree name;
1572 tree args;
1573 int flags ATTRIBUTE_UNUSED;
1574 bool *no_add_attrs;
1575{
1576 if (TREE_CODE (*node) != FUNCTION_TYPE
1577 && TREE_CODE (*node) != METHOD_TYPE
1578 && TREE_CODE (*node) != FIELD_DECL
1579 && TREE_CODE (*node) != TYPE_DECL)
1580 {
1581 warning ("`%s' attribute only applies to functions",
1582 IDENTIFIER_POINTER (name));
1583 *no_add_attrs = true;
1584 }
1585 else
1586 {
1587 tree cst;
b08de47e 1588
91d231cb
JM
1589 cst = TREE_VALUE (args);
1590 if (TREE_CODE (cst) != INTEGER_CST)
1591 {
1592 warning ("`%s' attribute requires an integer constant argument",
1593 IDENTIFIER_POINTER (name));
1594 *no_add_attrs = true;
1595 }
1596 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1597 {
1598 warning ("argument to `%s' attribute larger than %d",
1599 IDENTIFIER_POINTER (name), REGPARM_MAX);
1600 *no_add_attrs = true;
1601 }
e91f04de
CH
1602
1603 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1604 {
1605 error ("fastcall and regparm attributes are not compatible");
1606 }
b08de47e
MM
1607 }
1608
91d231cb 1609 return NULL_TREE;
b08de47e
MM
1610}
1611
1612/* Return 0 if the attributes for two types are incompatible, 1 if they
1613 are compatible, and 2 if they are nearly compatible (which causes a
1614 warning to be generated). */
1615
8d8e52be 1616static int
e075ae69 1617ix86_comp_type_attributes (type1, type2)
afcfe58c
MM
1618 tree type1;
1619 tree type2;
b08de47e 1620{
0f290768 1621 /* Check for mismatch of non-default calling convention. */
27c38fbe 1622 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
afcfe58c
MM
1623
1624 if (TREE_CODE (type1) != FUNCTION_TYPE)
1625 return 1;
1626
e91f04de
CH
1627 /* Check for mismatched fastcall types */
1628 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1629 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1630 return 0;
1631
afcfe58c 1632 /* Check for mismatched return types (cdecl vs stdcall). */
6093f019
RH
1633 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1634 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
afcfe58c 1635 return 0;
b08de47e
MM
1636 return 1;
1637}
b08de47e 1638\f
483ab821
MM
1639/* Return the regparm value for a fuctio with the indicated TYPE. */
1640
1641static int
1642ix86_fntype_regparm (type)
1643 tree type;
1644{
1645 tree attr;
1646
1647 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1648 if (attr)
1649 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1650 else
1651 return ix86_regparm;
1652}
1653
b08de47e
MM
1654/* Value is the number of bytes of arguments automatically
1655 popped when returning from a subroutine call.
1656 FUNDECL is the declaration node of the function (as a tree),
1657 FUNTYPE is the data type of the function (as a tree),
1658 or for a library call it is an identifier node for the subroutine name.
1659 SIZE is the number of bytes of arguments passed on the stack.
1660
1661 On the 80386, the RTD insn may be used to pop them if the number
1662 of args is fixed, but if the number is variable then the caller
1663 must pop them all. RTD can't be used for library calls now
1664 because the library is compiled with the Unix compiler.
1665 Use of RTD is a selectable option, since it is incompatible with
1666 standard Unix calling sequences. If the option is not selected,
1667 the caller must always pop the args.
1668
1669 The attribute stdcall is equivalent to RTD on a per module basis. */
1670
1671int
e075ae69 1672ix86_return_pops_args (fundecl, funtype, size)
b08de47e
MM
1673 tree fundecl;
1674 tree funtype;
1675 int size;
79325812 1676{
3345ee7d 1677 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
b08de47e 1678
0f290768 1679 /* Cdecl functions override -mrtd, and never pop the stack. */
e9a25f70 1680 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
79325812 1681
e91f04de
CH
1682 /* Stdcall and fastcall functions will pop the stack if not variable args. */
1683 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1684 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
698cdd84 1685 rtd = 1;
79325812 1686
698cdd84
SC
1687 if (rtd
1688 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
e9a25f70
JL
1689 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1690 == void_type_node)))
698cdd84
SC
1691 return size;
1692 }
79325812 1693
232b8f52 1694 /* Lose any fake structure return argument if it is passed on the stack. */
0d7d98ee
JH
1695 if (aggregate_value_p (TREE_TYPE (funtype))
1696 && !TARGET_64BIT)
232b8f52 1697 {
483ab821 1698 int nregs = ix86_fntype_regparm (funtype);
232b8f52
JJ
1699
1700 if (!nregs)
1701 return GET_MODE_SIZE (Pmode);
1702 }
1703
1704 return 0;
b08de47e 1705}
b08de47e
MM
1706\f
1707/* Argument support functions. */
1708
53c17031
JH
1709/* Return true when register may be used to pass function parameters. */
1710bool
1711ix86_function_arg_regno_p (regno)
1712 int regno;
1713{
1714 int i;
1715 if (!TARGET_64BIT)
0333394e
JJ
1716 return (regno < REGPARM_MAX
1717 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
53c17031
JH
1718 if (SSE_REGNO_P (regno) && TARGET_SSE)
1719 return true;
1720 /* RAX is used as hidden argument to va_arg functions. */
1721 if (!regno)
1722 return true;
1723 for (i = 0; i < REGPARM_MAX; i++)
1724 if (regno == x86_64_int_parameter_registers[i])
1725 return true;
1726 return false;
1727}
1728
b08de47e
MM
1729/* Initialize a variable CUM of type CUMULATIVE_ARGS
1730 for a call to a function whose data type is FNTYPE.
1731 For a library call, FNTYPE is 0. */
1732
1733void
1734init_cumulative_args (cum, fntype, libname)
e9a25f70 1735 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
b08de47e
MM
1736 tree fntype; /* tree ptr for function decl */
1737 rtx libname; /* SYMBOL_REF of library name or 0 */
1738{
1739 static CUMULATIVE_ARGS zero_cum;
1740 tree param, next_param;
1741
1742 if (TARGET_DEBUG_ARG)
1743 {
1744 fprintf (stderr, "\ninit_cumulative_args (");
1745 if (fntype)
e9a25f70
JL
1746 fprintf (stderr, "fntype code = %s, ret code = %s",
1747 tree_code_name[(int) TREE_CODE (fntype)],
1748 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
b08de47e
MM
1749 else
1750 fprintf (stderr, "no fntype");
1751
1752 if (libname)
1753 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1754 }
1755
1756 *cum = zero_cum;
1757
1758 /* Set up the number of registers to use for passing arguments. */
e075ae69 1759 cum->nregs = ix86_regparm;
53c17031
JH
1760 cum->sse_nregs = SSE_REGPARM_MAX;
1761 if (fntype && !TARGET_64BIT)
b08de47e
MM
1762 {
1763 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
e9a25f70 1764
b08de47e
MM
1765 if (attr)
1766 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1767 }
53c17031 1768 cum->maybe_vaarg = false;
b08de47e 1769
e91f04de
CH
1770 /* Use ecx and edx registers if function has fastcall attribute */
1771 if (fntype && !TARGET_64BIT)
1772 {
1773 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1774 {
1775 cum->nregs = 2;
1776 cum->fastcall = 1;
1777 }
1778 }
1779
1780
b08de47e
MM
1781 /* Determine if this function has variable arguments. This is
1782 indicated by the last argument being 'void_type_mode' if there
1783 are no variable arguments. If there are variable arguments, then
1784 we won't pass anything in registers */
1785
1786 if (cum->nregs)
1787 {
1788 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
e9a25f70 1789 param != 0; param = next_param)
b08de47e
MM
1790 {
1791 next_param = TREE_CHAIN (param);
e9a25f70 1792 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
53c17031
JH
1793 {
1794 if (!TARGET_64BIT)
e91f04de
CH
1795 {
1796 cum->nregs = 0;
1797 cum->fastcall = 0;
1798 }
53c17031
JH
1799 cum->maybe_vaarg = true;
1800 }
b08de47e
MM
1801 }
1802 }
53c17031
JH
1803 if ((!fntype && !libname)
1804 || (fntype && !TYPE_ARG_TYPES (fntype)))
1805 cum->maybe_vaarg = 1;
b08de47e
MM
1806
1807 if (TARGET_DEBUG_ARG)
1808 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1809
1810 return;
1811}
1812
53c17031 1813/* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
f710504c 1814 of this code is to classify each 8bytes of incoming argument by the register
53c17031
JH
1815 class and assign registers accordingly. */
1816
1817/* Return the union class of CLASS1 and CLASS2.
1818 See the x86-64 PS ABI for details. */
1819
1820static enum x86_64_reg_class
1821merge_classes (class1, class2)
1822 enum x86_64_reg_class class1, class2;
1823{
1824 /* Rule #1: If both classes are equal, this is the resulting class. */
1825 if (class1 == class2)
1826 return class1;
1827
1828 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1829 the other class. */
1830 if (class1 == X86_64_NO_CLASS)
1831 return class2;
1832 if (class2 == X86_64_NO_CLASS)
1833 return class1;
1834
1835 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1836 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1837 return X86_64_MEMORY_CLASS;
1838
1839 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1840 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1841 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1842 return X86_64_INTEGERSI_CLASS;
1843 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1844 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1845 return X86_64_INTEGER_CLASS;
1846
1847 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1848 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1849 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1850 return X86_64_MEMORY_CLASS;
1851
1852 /* Rule #6: Otherwise class SSE is used. */
1853 return X86_64_SSE_CLASS;
1854}
1855
1856/* Classify the argument of type TYPE and mode MODE.
1857 CLASSES will be filled by the register class used to pass each word
1858 of the operand. The number of words is returned. In case the parameter
1859 should be passed in memory, 0 is returned. As a special case for zero
1860 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1861
1862 BIT_OFFSET is used internally for handling records and specifies offset
1863 of the offset in bits modulo 256 to avoid overflow cases.
1864
1865 See the x86-64 PS ABI for details.
1866*/
1867
1868static int
1869classify_argument (mode, type, classes, bit_offset)
1870 enum machine_mode mode;
1871 tree type;
1872 enum x86_64_reg_class classes[MAX_CLASSES];
1873 int bit_offset;
1874{
1875 int bytes =
1876 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
23327dae 1877 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
53c17031 1878
c60ee6f5
JH
1879 /* Variable sized entities are always passed/returned in memory. */
1880 if (bytes < 0)
1881 return 0;
1882
53c17031
JH
1883 if (type && AGGREGATE_TYPE_P (type))
1884 {
1885 int i;
1886 tree field;
1887 enum x86_64_reg_class subclasses[MAX_CLASSES];
1888
1889 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1890 if (bytes > 16)
1891 return 0;
1892
1893 for (i = 0; i < words; i++)
1894 classes[i] = X86_64_NO_CLASS;
1895
1896 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1897 signalize memory class, so handle it as special case. */
1898 if (!words)
1899 {
1900 classes[0] = X86_64_NO_CLASS;
1901 return 1;
1902 }
1903
1904 /* Classify each field of record and merge classes. */
1905 if (TREE_CODE (type) == RECORD_TYPE)
1906 {
91ea38f9
JH
1907 /* For classes first merge in the field of the subclasses. */
1908 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1909 {
1910 tree bases = TYPE_BINFO_BASETYPES (type);
1911 int n_bases = TREE_VEC_LENGTH (bases);
1912 int i;
1913
1914 for (i = 0; i < n_bases; ++i)
1915 {
1916 tree binfo = TREE_VEC_ELT (bases, i);
1917 int num;
1918 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1919 tree type = BINFO_TYPE (binfo);
1920
1921 num = classify_argument (TYPE_MODE (type),
1922 type, subclasses,
1923 (offset + bit_offset) % 256);
1924 if (!num)
1925 return 0;
1926 for (i = 0; i < num; i++)
1927 {
db01f480 1928 int pos = (offset + (bit_offset % 64)) / 8 / 8;
91ea38f9
JH
1929 classes[i + pos] =
1930 merge_classes (subclasses[i], classes[i + pos]);
1931 }
1932 }
1933 }
1934 /* And now merge the fields of structure. */
53c17031
JH
1935 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1936 {
1937 if (TREE_CODE (field) == FIELD_DECL)
1938 {
1939 int num;
1940
1941 /* Bitfields are always classified as integer. Handle them
1942 early, since later code would consider them to be
1943 misaligned integers. */
1944 if (DECL_BIT_FIELD (field))
1945 {
1946 for (i = int_bit_position (field) / 8 / 8;
1947 i < (int_bit_position (field)
1948 + tree_low_cst (DECL_SIZE (field), 0)
1949 + 63) / 8 / 8; i++)
1950 classes[i] =
1951 merge_classes (X86_64_INTEGER_CLASS,
1952 classes[i]);
1953 }
1954 else
1955 {
1956 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1957 TREE_TYPE (field), subclasses,
1958 (int_bit_position (field)
1959 + bit_offset) % 256);
1960 if (!num)
1961 return 0;
1962 for (i = 0; i < num; i++)
1963 {
1964 int pos =
db01f480 1965 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
53c17031
JH
1966 classes[i + pos] =
1967 merge_classes (subclasses[i], classes[i + pos]);
1968 }
1969 }
1970 }
1971 }
1972 }
1973 /* Arrays are handled as small records. */
1974 else if (TREE_CODE (type) == ARRAY_TYPE)
1975 {
1976 int num;
1977 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1978 TREE_TYPE (type), subclasses, bit_offset);
1979 if (!num)
1980 return 0;
1981
1982 /* The partial classes are now full classes. */
1983 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1984 subclasses[0] = X86_64_SSE_CLASS;
1985 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1986 subclasses[0] = X86_64_INTEGER_CLASS;
1987
1988 for (i = 0; i < words; i++)
1989 classes[i] = subclasses[i % num];
1990 }
1991 /* Unions are similar to RECORD_TYPE but offset is always 0. */
e4dbaed5
AS
1992 else if (TREE_CODE (type) == UNION_TYPE
1993 || TREE_CODE (type) == QUAL_UNION_TYPE)
53c17031 1994 {
91ea38f9
JH
1995 /* For classes first merge in the field of the subclasses. */
1996 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1997 {
1998 tree bases = TYPE_BINFO_BASETYPES (type);
1999 int n_bases = TREE_VEC_LENGTH (bases);
2000 int i;
2001
2002 for (i = 0; i < n_bases; ++i)
2003 {
2004 tree binfo = TREE_VEC_ELT (bases, i);
2005 int num;
2006 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2007 tree type = BINFO_TYPE (binfo);
2008
2009 num = classify_argument (TYPE_MODE (type),
2010 type, subclasses,
db01f480 2011 (offset + (bit_offset % 64)) % 256);
91ea38f9
JH
2012 if (!num)
2013 return 0;
2014 for (i = 0; i < num; i++)
2015 {
c16576e6 2016 int pos = (offset + (bit_offset % 64)) / 8 / 8;
91ea38f9
JH
2017 classes[i + pos] =
2018 merge_classes (subclasses[i], classes[i + pos]);
2019 }
2020 }
2021 }
53c17031
JH
2022 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2023 {
2024 if (TREE_CODE (field) == FIELD_DECL)
2025 {
2026 int num;
2027 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2028 TREE_TYPE (field), subclasses,
2029 bit_offset);
2030 if (!num)
2031 return 0;
2032 for (i = 0; i < num; i++)
2033 classes[i] = merge_classes (subclasses[i], classes[i]);
2034 }
2035 }
2036 }
2037 else
2038 abort ();
2039
2040 /* Final merger cleanup. */
2041 for (i = 0; i < words; i++)
2042 {
2043 /* If one class is MEMORY, everything should be passed in
2044 memory. */
2045 if (classes[i] == X86_64_MEMORY_CLASS)
2046 return 0;
2047
d6a7951f 2048 /* The X86_64_SSEUP_CLASS should be always preceded by
53c17031
JH
2049 X86_64_SSE_CLASS. */
2050 if (classes[i] == X86_64_SSEUP_CLASS
2051 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2052 classes[i] = X86_64_SSE_CLASS;
2053
d6a7951f 2054 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
53c17031
JH
2055 if (classes[i] == X86_64_X87UP_CLASS
2056 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2057 classes[i] = X86_64_SSE_CLASS;
2058 }
2059 return words;
2060 }
2061
2062 /* Compute alignment needed. We align all types to natural boundaries with
2063 exception of XFmode that is aligned to 64bits. */
2064 if (mode != VOIDmode && mode != BLKmode)
2065 {
2066 int mode_alignment = GET_MODE_BITSIZE (mode);
2067
2068 if (mode == XFmode)
2069 mode_alignment = 128;
2070 else if (mode == XCmode)
2071 mode_alignment = 256;
f5143c46 2072 /* Misaligned fields are always returned in memory. */
53c17031
JH
2073 if (bit_offset % mode_alignment)
2074 return 0;
2075 }
2076
2077 /* Classification of atomic types. */
2078 switch (mode)
2079 {
2080 case DImode:
2081 case SImode:
2082 case HImode:
2083 case QImode:
2084 case CSImode:
2085 case CHImode:
2086 case CQImode:
2087 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2088 classes[0] = X86_64_INTEGERSI_CLASS;
2089 else
2090 classes[0] = X86_64_INTEGER_CLASS;
2091 return 1;
2092 case CDImode:
2093 case TImode:
2094 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2095 return 2;
2096 case CTImode:
2097 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2098 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2099 return 4;
2100 case SFmode:
2101 if (!(bit_offset % 64))
2102 classes[0] = X86_64_SSESF_CLASS;
2103 else
2104 classes[0] = X86_64_SSE_CLASS;
2105 return 1;
2106 case DFmode:
2107 classes[0] = X86_64_SSEDF_CLASS;
2108 return 1;
2109 case TFmode:
2110 classes[0] = X86_64_X87_CLASS;
2111 classes[1] = X86_64_X87UP_CLASS;
2112 return 2;
2113 case TCmode:
2114 classes[0] = X86_64_X87_CLASS;
2115 classes[1] = X86_64_X87UP_CLASS;
2116 classes[2] = X86_64_X87_CLASS;
2117 classes[3] = X86_64_X87UP_CLASS;
2118 return 4;
2119 case DCmode:
2120 classes[0] = X86_64_SSEDF_CLASS;
2121 classes[1] = X86_64_SSEDF_CLASS;
2122 return 2;
2123 case SCmode:
2124 classes[0] = X86_64_SSE_CLASS;
2125 return 1;
e95d6b23
JH
2126 case V4SFmode:
2127 case V4SImode:
495333a6
JH
2128 case V16QImode:
2129 case V8HImode:
2130 case V2DFmode:
2131 case V2DImode:
e95d6b23
JH
2132 classes[0] = X86_64_SSE_CLASS;
2133 classes[1] = X86_64_SSEUP_CLASS;
2134 return 2;
2135 case V2SFmode:
2136 case V2SImode:
2137 case V4HImode:
2138 case V8QImode:
1194ca05 2139 return 0;
53c17031 2140 case BLKmode:
e95d6b23 2141 case VOIDmode:
53c17031
JH
2142 return 0;
2143 default:
2144 abort ();
2145 }
2146}
2147
2148/* Examine the argument and return set number of register required in each
f5143c46 2149 class. Return 0 iff parameter should be passed in memory. */
53c17031
JH
2150static int
2151examine_argument (mode, type, in_return, int_nregs, sse_nregs)
2152 enum machine_mode mode;
2153 tree type;
2154 int *int_nregs, *sse_nregs;
2155 int in_return;
2156{
2157 enum x86_64_reg_class class[MAX_CLASSES];
2158 int n = classify_argument (mode, type, class, 0);
2159
2160 *int_nregs = 0;
2161 *sse_nregs = 0;
2162 if (!n)
2163 return 0;
2164 for (n--; n >= 0; n--)
2165 switch (class[n])
2166 {
2167 case X86_64_INTEGER_CLASS:
2168 case X86_64_INTEGERSI_CLASS:
2169 (*int_nregs)++;
2170 break;
2171 case X86_64_SSE_CLASS:
2172 case X86_64_SSESF_CLASS:
2173 case X86_64_SSEDF_CLASS:
2174 (*sse_nregs)++;
2175 break;
2176 case X86_64_NO_CLASS:
2177 case X86_64_SSEUP_CLASS:
2178 break;
2179 case X86_64_X87_CLASS:
2180 case X86_64_X87UP_CLASS:
2181 if (!in_return)
2182 return 0;
2183 break;
2184 case X86_64_MEMORY_CLASS:
2185 abort ();
2186 }
2187 return 1;
2188}
2189/* Construct container for the argument used by GCC interface. See
2190 FUNCTION_ARG for the detailed description. */
2191static rtx
2192construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
2193 enum machine_mode mode;
2194 tree type;
2195 int in_return;
2196 int nintregs, nsseregs;
07933f72
GS
2197 const int * intreg;
2198 int sse_regno;
53c17031
JH
2199{
2200 enum machine_mode tmpmode;
2201 int bytes =
2202 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2203 enum x86_64_reg_class class[MAX_CLASSES];
2204 int n;
2205 int i;
2206 int nexps = 0;
2207 int needed_sseregs, needed_intregs;
2208 rtx exp[MAX_CLASSES];
2209 rtx ret;
2210
2211 n = classify_argument (mode, type, class, 0);
2212 if (TARGET_DEBUG_ARG)
2213 {
2214 if (!n)
2215 fprintf (stderr, "Memory class\n");
2216 else
2217 {
2218 fprintf (stderr, "Classes:");
2219 for (i = 0; i < n; i++)
2220 {
2221 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2222 }
2223 fprintf (stderr, "\n");
2224 }
2225 }
2226 if (!n)
2227 return NULL;
2228 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2229 return NULL;
2230 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2231 return NULL;
2232
2233 /* First construct simple cases. Avoid SCmode, since we want to use
2234 single register to pass this type. */
2235 if (n == 1 && mode != SCmode)
2236 switch (class[0])
2237 {
2238 case X86_64_INTEGER_CLASS:
2239 case X86_64_INTEGERSI_CLASS:
2240 return gen_rtx_REG (mode, intreg[0]);
2241 case X86_64_SSE_CLASS:
2242 case X86_64_SSESF_CLASS:
2243 case X86_64_SSEDF_CLASS:
2244 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2245 case X86_64_X87_CLASS:
2246 return gen_rtx_REG (mode, FIRST_STACK_REG);
2247 case X86_64_NO_CLASS:
2248 /* Zero sized array, struct or class. */
2249 return NULL;
2250 default:
2251 abort ();
2252 }
2253 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
e95d6b23 2254 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
53c17031
JH
2255 if (n == 2
2256 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2257 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2258 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2259 && class[1] == X86_64_INTEGER_CLASS
2260 && (mode == CDImode || mode == TImode)
2261 && intreg[0] + 1 == intreg[1])
2262 return gen_rtx_REG (mode, intreg[0]);
2263 if (n == 4
2264 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2265 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2266 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2267
2268 /* Otherwise figure out the entries of the PARALLEL. */
2269 for (i = 0; i < n; i++)
2270 {
2271 switch (class[i])
2272 {
2273 case X86_64_NO_CLASS:
2274 break;
2275 case X86_64_INTEGER_CLASS:
2276 case X86_64_INTEGERSI_CLASS:
2277 /* Merge TImodes on aligned occassions here too. */
2278 if (i * 8 + 8 > bytes)
2279 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2280 else if (class[i] == X86_64_INTEGERSI_CLASS)
2281 tmpmode = SImode;
2282 else
2283 tmpmode = DImode;
2284 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2285 if (tmpmode == BLKmode)
2286 tmpmode = DImode;
2287 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2288 gen_rtx_REG (tmpmode, *intreg),
2289 GEN_INT (i*8));
2290 intreg++;
2291 break;
2292 case X86_64_SSESF_CLASS:
2293 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2294 gen_rtx_REG (SFmode,
2295 SSE_REGNO (sse_regno)),
2296 GEN_INT (i*8));
2297 sse_regno++;
2298 break;
2299 case X86_64_SSEDF_CLASS:
2300 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2301 gen_rtx_REG (DFmode,
2302 SSE_REGNO (sse_regno)),
2303 GEN_INT (i*8));
2304 sse_regno++;
2305 break;
2306 case X86_64_SSE_CLASS:
12f5c45e
JH
2307 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2308 tmpmode = TImode;
53c17031
JH
2309 else
2310 tmpmode = DImode;
2311 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2312 gen_rtx_REG (tmpmode,
2313 SSE_REGNO (sse_regno)),
2314 GEN_INT (i*8));
12f5c45e
JH
2315 if (tmpmode == TImode)
2316 i++;
53c17031
JH
2317 sse_regno++;
2318 break;
2319 default:
2320 abort ();
2321 }
2322 }
2323 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2324 for (i = 0; i < nexps; i++)
2325 XVECEXP (ret, 0, i) = exp [i];
2326 return ret;
2327}
2328
b08de47e
MM
2329/* Update the data in CUM to advance over an argument
2330 of mode MODE and data type TYPE.
2331 (TYPE is null for libcalls where that information may not be available.) */
2332
2333void
2334function_arg_advance (cum, mode, type, named)
2335 CUMULATIVE_ARGS *cum; /* current arg information */
2336 enum machine_mode mode; /* current arg mode */
2337 tree type; /* type of the argument or 0 if lib support */
2338 int named; /* whether or not the argument was named */
2339{
5ac9118e
KG
2340 int bytes =
2341 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
2342 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2343
2344 if (TARGET_DEBUG_ARG)
2345 fprintf (stderr,
e9a25f70 2346 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
b08de47e 2347 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
53c17031 2348 if (TARGET_64BIT)
b08de47e 2349 {
53c17031
JH
2350 int int_nregs, sse_nregs;
2351 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2352 cum->words += words;
2353 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
82a127a9 2354 {
53c17031
JH
2355 cum->nregs -= int_nregs;
2356 cum->sse_nregs -= sse_nregs;
2357 cum->regno += int_nregs;
2358 cum->sse_regno += sse_nregs;
82a127a9 2359 }
53c17031
JH
2360 else
2361 cum->words += words;
b08de47e 2362 }
a4f31c00 2363 else
82a127a9 2364 {
53c17031
JH
2365 if (TARGET_SSE && mode == TImode)
2366 {
2367 cum->sse_words += words;
2368 cum->sse_nregs -= 1;
2369 cum->sse_regno += 1;
2370 if (cum->sse_nregs <= 0)
2371 {
2372 cum->sse_nregs = 0;
2373 cum->sse_regno = 0;
2374 }
2375 }
2376 else
82a127a9 2377 {
53c17031
JH
2378 cum->words += words;
2379 cum->nregs -= words;
2380 cum->regno += words;
2381
2382 if (cum->nregs <= 0)
2383 {
2384 cum->nregs = 0;
2385 cum->regno = 0;
2386 }
82a127a9
CM
2387 }
2388 }
b08de47e
MM
2389 return;
2390}
2391
2392/* Define where to put the arguments to a function.
2393 Value is zero to push the argument on the stack,
2394 or a hard register in which to store the argument.
2395
2396 MODE is the argument's machine mode.
2397 TYPE is the data type of the argument (as a tree).
2398 This is null for libcalls where that information may
2399 not be available.
2400 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2401 the preceding args and about the function being called.
2402 NAMED is nonzero if this argument is a named parameter
2403 (otherwise it is an extra parameter matching an ellipsis). */
2404
07933f72 2405rtx
b08de47e
MM
2406function_arg (cum, mode, type, named)
2407 CUMULATIVE_ARGS *cum; /* current arg information */
2408 enum machine_mode mode; /* current arg mode */
2409 tree type; /* type of the argument or 0 if lib support */
2410 int named; /* != 0 for normal args, == 0 for ... args */
2411{
2412 rtx ret = NULL_RTX;
5ac9118e
KG
2413 int bytes =
2414 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
2415 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2416
53c17031
JH
2417 /* Handle an hidden AL argument containing number of registers for varargs
2418 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2419 any AL settings. */
32ee7d1d 2420 if (mode == VOIDmode)
b08de47e 2421 {
53c17031
JH
2422 if (TARGET_64BIT)
2423 return GEN_INT (cum->maybe_vaarg
2424 ? (cum->sse_nregs < 0
2425 ? SSE_REGPARM_MAX
2426 : cum->sse_regno)
2427 : -1);
2428 else
2429 return constm1_rtx;
b08de47e 2430 }
53c17031
JH
2431 if (TARGET_64BIT)
2432 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2433 &x86_64_int_parameter_registers [cum->regno],
2434 cum->sse_regno);
2435 else
2436 switch (mode)
2437 {
2438 /* For now, pass fp/complex values on the stack. */
2439 default:
2440 break;
2441
2442 case BLKmode:
2443 case DImode:
2444 case SImode:
2445 case HImode:
2446 case QImode:
2447 if (words <= cum->nregs)
e91f04de
CH
2448 {
2449 int regno = cum->regno;
2450
2451 /* Fastcall allocates the first two DWORD (SImode) or
2452 smaller arguments to ECX and EDX. */
2453 if (cum->fastcall)
2454 {
2455 if (mode == BLKmode || mode == DImode)
2456 break;
2457
2458 /* ECX not EAX is the first allocated register. */
2459 if (regno == 0)
2460 regno = 2;
2461 }
2462 ret = gen_rtx_REG (mode, regno);
2463 }
53c17031
JH
2464 break;
2465 case TImode:
2466 if (cum->sse_nregs)
2467 ret = gen_rtx_REG (mode, cum->sse_regno);
2468 break;
2469 }
b08de47e
MM
2470
2471 if (TARGET_DEBUG_ARG)
2472 {
2473 fprintf (stderr,
91ea38f9 2474 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
b08de47e
MM
2475 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2476
2477 if (ret)
91ea38f9 2478 print_simple_rtl (stderr, ret);
b08de47e
MM
2479 else
2480 fprintf (stderr, ", stack");
2481
2482 fprintf (stderr, " )\n");
2483 }
2484
2485 return ret;
2486}
53c17031
JH
2487
2488/* Gives the alignment boundary, in bits, of an argument with the specified mode
2489 and type. */
2490
2491int
2492ix86_function_arg_boundary (mode, type)
2493 enum machine_mode mode;
2494 tree type;
2495{
2496 int align;
2497 if (!TARGET_64BIT)
2498 return PARM_BOUNDARY;
2499 if (type)
2500 align = TYPE_ALIGN (type);
2501 else
2502 align = GET_MODE_ALIGNMENT (mode);
2503 if (align < PARM_BOUNDARY)
2504 align = PARM_BOUNDARY;
2505 if (align > 128)
2506 align = 128;
2507 return align;
2508}
2509
2510/* Return true if N is a possible register number of function value. */
2511bool
2512ix86_function_value_regno_p (regno)
2513 int regno;
2514{
2515 if (!TARGET_64BIT)
2516 {
2517 return ((regno) == 0
2518 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2519 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2520 }
2521 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2522 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2523 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2524}
2525
2526/* Define how to find the value returned by a function.
2527 VALTYPE is the data type of the value (as a tree).
2528 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2529 otherwise, FUNC is 0. */
2530rtx
2531ix86_function_value (valtype)
2532 tree valtype;
2533{
2534 if (TARGET_64BIT)
2535 {
2536 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2537 REGPARM_MAX, SSE_REGPARM_MAX,
2538 x86_64_int_return_registers, 0);
2539 /* For zero sized structures, construct_continer return NULL, but we need
2540 to keep rest of compiler happy by returning meaningfull value. */
2541 if (!ret)
2542 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2543 return ret;
2544 }
2545 else
b069de3b
SS
2546 return gen_rtx_REG (TYPE_MODE (valtype),
2547 ix86_value_regno (TYPE_MODE (valtype)));
53c17031
JH
2548}
2549
f5143c46 2550/* Return false iff type is returned in memory. */
53c17031
JH
2551int
2552ix86_return_in_memory (type)
2553 tree type;
2554{
2555 int needed_intregs, needed_sseregs;
2556 if (TARGET_64BIT)
2557 {
2558 return !examine_argument (TYPE_MODE (type), type, 1,
2559 &needed_intregs, &needed_sseregs);
2560 }
2561 else
2562 {
2563 if (TYPE_MODE (type) == BLKmode
2564 || (VECTOR_MODE_P (TYPE_MODE (type))
2565 && int_size_in_bytes (type) == 8)
2566 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2567 && TYPE_MODE (type) != TFmode
2568 && !VECTOR_MODE_P (TYPE_MODE (type))))
2569 return 1;
2570 return 0;
2571 }
2572}
2573
2574/* Define how to find the value returned by a library function
2575 assuming the value has mode MODE. */
2576rtx
2577ix86_libcall_value (mode)
2578 enum machine_mode mode;
2579{
2580 if (TARGET_64BIT)
2581 {
2582 switch (mode)
2583 {
2584 case SFmode:
2585 case SCmode:
2586 case DFmode:
2587 case DCmode:
2588 return gen_rtx_REG (mode, FIRST_SSE_REG);
2589 case TFmode:
2590 case TCmode:
2591 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2592 default:
2593 return gen_rtx_REG (mode, 0);
2594 }
2595 }
2596 else
b069de3b
SS
2597 return gen_rtx_REG (mode, ix86_value_regno (mode));
2598}
2599
2600/* Given a mode, return the register to use for a return value. */
2601
2602static int
2603ix86_value_regno (mode)
2604 enum machine_mode mode;
2605{
2606 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2607 return FIRST_FLOAT_REG;
2608 if (mode == TImode || VECTOR_MODE_P (mode))
2609 return FIRST_SSE_REG;
2610 return 0;
53c17031 2611}
ad919812
JH
2612\f
2613/* Create the va_list data type. */
53c17031 2614
ad919812
JH
2615tree
2616ix86_build_va_list ()
2617{
2618 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
53c17031 2619
ad919812
JH
2620 /* For i386 we use plain pointer to argument area. */
2621 if (!TARGET_64BIT)
2622 return build_pointer_type (char_type_node);
2623
f1e639b1 2624 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
ad919812
JH
2625 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2626
fce5a9f2 2627 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
ad919812 2628 unsigned_type_node);
fce5a9f2 2629 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
ad919812
JH
2630 unsigned_type_node);
2631 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2632 ptr_type_node);
2633 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2634 ptr_type_node);
2635
2636 DECL_FIELD_CONTEXT (f_gpr) = record;
2637 DECL_FIELD_CONTEXT (f_fpr) = record;
2638 DECL_FIELD_CONTEXT (f_ovf) = record;
2639 DECL_FIELD_CONTEXT (f_sav) = record;
2640
2641 TREE_CHAIN (record) = type_decl;
2642 TYPE_NAME (record) = type_decl;
2643 TYPE_FIELDS (record) = f_gpr;
2644 TREE_CHAIN (f_gpr) = f_fpr;
2645 TREE_CHAIN (f_fpr) = f_ovf;
2646 TREE_CHAIN (f_ovf) = f_sav;
2647
2648 layout_type (record);
2649
2650 /* The correct type is an array type of one element. */
2651 return build_array_type (record, build_index_type (size_zero_node));
2652}
2653
2654/* Perform any needed actions needed for a function that is receiving a
fce5a9f2 2655 variable number of arguments.
ad919812
JH
2656
2657 CUM is as above.
2658
2659 MODE and TYPE are the mode and type of the current parameter.
2660
2661 PRETEND_SIZE is a variable that should be set to the amount of stack
2662 that must be pushed by the prolog to pretend that our caller pushed
2663 it.
2664
2665 Normally, this macro will push all remaining incoming registers on the
2666 stack and set PRETEND_SIZE to the length of the registers pushed. */
2667
2668void
2669ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2670 CUMULATIVE_ARGS *cum;
2671 enum machine_mode mode;
2672 tree type;
2673 int *pretend_size ATTRIBUTE_UNUSED;
2674 int no_rtl;
2675
2676{
2677 CUMULATIVE_ARGS next_cum;
2678 rtx save_area = NULL_RTX, mem;
2679 rtx label;
2680 rtx label_ref;
2681 rtx tmp_reg;
2682 rtx nsse_reg;
2683 int set;
2684 tree fntype;
2685 int stdarg_p;
2686 int i;
2687
2688 if (!TARGET_64BIT)
2689 return;
2690
2691 /* Indicate to allocate space on the stack for varargs save area. */
2692 ix86_save_varrargs_registers = 1;
2693
2694 fntype = TREE_TYPE (current_function_decl);
2695 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2696 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2697 != void_type_node));
2698
2699 /* For varargs, we do not want to skip the dummy va_dcl argument.
2700 For stdargs, we do want to skip the last named argument. */
2701 next_cum = *cum;
2702 if (stdarg_p)
2703 function_arg_advance (&next_cum, mode, type, 1);
2704
2705 if (!no_rtl)
2706 save_area = frame_pointer_rtx;
2707
2708 set = get_varargs_alias_set ();
2709
2710 for (i = next_cum.regno; i < ix86_regparm; i++)
2711 {
2712 mem = gen_rtx_MEM (Pmode,
2713 plus_constant (save_area, i * UNITS_PER_WORD));
0692acba 2714 set_mem_alias_set (mem, set);
ad919812
JH
2715 emit_move_insn (mem, gen_rtx_REG (Pmode,
2716 x86_64_int_parameter_registers[i]));
2717 }
2718
2719 if (next_cum.sse_nregs)
2720 {
2721 /* Now emit code to save SSE registers. The AX parameter contains number
2722 of SSE parameter regsiters used to call this function. We use
2723 sse_prologue_save insn template that produces computed jump across
2724 SSE saves. We need some preparation work to get this working. */
2725
2726 label = gen_label_rtx ();
2727 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2728
2729 /* Compute address to jump to :
2730 label - 5*eax + nnamed_sse_arguments*5 */
2731 tmp_reg = gen_reg_rtx (Pmode);
2732 nsse_reg = gen_reg_rtx (Pmode);
2733 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2734 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
44cf5b6a 2735 gen_rtx_MULT (Pmode, nsse_reg,
ad919812
JH
2736 GEN_INT (4))));
2737 if (next_cum.sse_regno)
2738 emit_move_insn
2739 (nsse_reg,
2740 gen_rtx_CONST (DImode,
2741 gen_rtx_PLUS (DImode,
2742 label_ref,
2743 GEN_INT (next_cum.sse_regno * 4))));
2744 else
2745 emit_move_insn (nsse_reg, label_ref);
2746 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2747
2748 /* Compute address of memory block we save into. We always use pointer
2749 pointing 127 bytes after first byte to store - this is needed to keep
2750 instruction size limited by 4 bytes. */
2751 tmp_reg = gen_reg_rtx (Pmode);
8ac61af7
RK
2752 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2753 plus_constant (save_area,
2754 8 * REGPARM_MAX + 127)));
ad919812 2755 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
14f73b5a 2756 set_mem_alias_set (mem, set);
8ac61af7 2757 set_mem_align (mem, BITS_PER_WORD);
ad919812
JH
2758
2759 /* And finally do the dirty job! */
8ac61af7
RK
2760 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2761 GEN_INT (next_cum.sse_regno), label));
ad919812
JH
2762 }
2763
2764}
2765
2766/* Implement va_start. */
2767
2768void
e5faf155 2769ix86_va_start (valist, nextarg)
ad919812
JH
2770 tree valist;
2771 rtx nextarg;
2772{
2773 HOST_WIDE_INT words, n_gpr, n_fpr;
2774 tree f_gpr, f_fpr, f_ovf, f_sav;
2775 tree gpr, fpr, ovf, sav, t;
2776
2777 /* Only 64bit target needs something special. */
2778 if (!TARGET_64BIT)
2779 {
e5faf155 2780 std_expand_builtin_va_start (valist, nextarg);
ad919812
JH
2781 return;
2782 }
2783
2784 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2785 f_fpr = TREE_CHAIN (f_gpr);
2786 f_ovf = TREE_CHAIN (f_fpr);
2787 f_sav = TREE_CHAIN (f_ovf);
2788
2789 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2790 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2791 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2792 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2793 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2794
2795 /* Count number of gp and fp argument registers used. */
2796 words = current_function_args_info.words;
2797 n_gpr = current_function_args_info.regno;
2798 n_fpr = current_function_args_info.sse_regno;
2799
2800 if (TARGET_DEBUG_ARG)
2801 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
b531087a 2802 (int) words, (int) n_gpr, (int) n_fpr);
ad919812
JH
2803
2804 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2805 build_int_2 (n_gpr * 8, 0));
2806 TREE_SIDE_EFFECTS (t) = 1;
2807 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2808
2809 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2810 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2811 TREE_SIDE_EFFECTS (t) = 1;
2812 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2813
2814 /* Find the overflow area. */
2815 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2816 if (words != 0)
2817 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2818 build_int_2 (words * UNITS_PER_WORD, 0));
2819 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2820 TREE_SIDE_EFFECTS (t) = 1;
2821 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2822
2823 /* Find the register save area.
2824 Prologue of the function save it right above stack frame. */
2825 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2826 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2827 TREE_SIDE_EFFECTS (t) = 1;
2828 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2829}
2830
2831/* Implement va_arg. */
2832rtx
2833ix86_va_arg (valist, type)
2834 tree valist, type;
2835{
0139adca 2836 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
ad919812
JH
2837 tree f_gpr, f_fpr, f_ovf, f_sav;
2838 tree gpr, fpr, ovf, sav, t;
b932f770 2839 int size, rsize;
ad919812
JH
2840 rtx lab_false, lab_over = NULL_RTX;
2841 rtx addr_rtx, r;
2842 rtx container;
2843
2844 /* Only 64bit target needs something special. */
2845 if (!TARGET_64BIT)
2846 {
2847 return std_expand_builtin_va_arg (valist, type);
2848 }
2849
2850 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2851 f_fpr = TREE_CHAIN (f_gpr);
2852 f_ovf = TREE_CHAIN (f_fpr);
2853 f_sav = TREE_CHAIN (f_ovf);
2854
2855 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2856 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2857 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2858 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2859 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2860
2861 size = int_size_in_bytes (type);
2862 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2863
2864 container = construct_container (TYPE_MODE (type), type, 0,
2865 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2866 /*
2867 * Pull the value out of the saved registers ...
2868 */
2869
2870 addr_rtx = gen_reg_rtx (Pmode);
2871
2872 if (container)
2873 {
2874 rtx int_addr_rtx, sse_addr_rtx;
2875 int needed_intregs, needed_sseregs;
2876 int need_temp;
2877
2878 lab_over = gen_label_rtx ();
2879 lab_false = gen_label_rtx ();
8bad7136 2880
ad919812
JH
2881 examine_argument (TYPE_MODE (type), type, 0,
2882 &needed_intregs, &needed_sseregs);
2883
2884
2885 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2886 || TYPE_ALIGN (type) > 128);
2887
2888 /* In case we are passing structure, verify that it is consetuctive block
2889 on the register save area. If not we need to do moves. */
2890 if (!need_temp && !REG_P (container))
2891 {
2892 /* Verify that all registers are strictly consetuctive */
2893 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2894 {
2895 int i;
2896
2897 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2898 {
2899 rtx slot = XVECEXP (container, 0, i);
b531087a 2900 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
ad919812
JH
2901 || INTVAL (XEXP (slot, 1)) != i * 16)
2902 need_temp = 1;
2903 }
2904 }
2905 else
2906 {
2907 int i;
2908
2909 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2910 {
2911 rtx slot = XVECEXP (container, 0, i);
b531087a 2912 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
ad919812
JH
2913 || INTVAL (XEXP (slot, 1)) != i * 8)
2914 need_temp = 1;
2915 }
2916 }
2917 }
2918 if (!need_temp)
2919 {
2920 int_addr_rtx = addr_rtx;
2921 sse_addr_rtx = addr_rtx;
2922 }
2923 else
2924 {
2925 int_addr_rtx = gen_reg_rtx (Pmode);
2926 sse_addr_rtx = gen_reg_rtx (Pmode);
2927 }
2928 /* First ensure that we fit completely in registers. */
2929 if (needed_intregs)
2930 {
2931 emit_cmp_and_jump_insns (expand_expr
2932 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2933 GEN_INT ((REGPARM_MAX - needed_intregs +
2934 1) * 8), GE, const1_rtx, SImode,
d43e0b7d 2935 1, lab_false);
ad919812
JH
2936 }
2937 if (needed_sseregs)
2938 {
2939 emit_cmp_and_jump_insns (expand_expr
2940 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2941 GEN_INT ((SSE_REGPARM_MAX -
2942 needed_sseregs + 1) * 16 +
2943 REGPARM_MAX * 8), GE, const1_rtx,
d43e0b7d 2944 SImode, 1, lab_false);
ad919812
JH
2945 }
2946
2947 /* Compute index to start of area used for integer regs. */
2948 if (needed_intregs)
2949 {
2950 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2951 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2952 if (r != int_addr_rtx)
2953 emit_move_insn (int_addr_rtx, r);
2954 }
2955 if (needed_sseregs)
2956 {
2957 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2958 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2959 if (r != sse_addr_rtx)
2960 emit_move_insn (sse_addr_rtx, r);
2961 }
2962 if (need_temp)
2963 {
2964 int i;
2965 rtx mem;
2966
b932f770
JH
2967 /* Never use the memory itself, as it has the alias set. */
2968 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2969 mem = gen_rtx_MEM (BLKmode, addr_rtx);
0692acba 2970 set_mem_alias_set (mem, get_varargs_alias_set ());
8ac61af7 2971 set_mem_align (mem, BITS_PER_UNIT);
b932f770 2972
ad919812
JH
2973 for (i = 0; i < XVECLEN (container, 0); i++)
2974 {
2975 rtx slot = XVECEXP (container, 0, i);
2976 rtx reg = XEXP (slot, 0);
2977 enum machine_mode mode = GET_MODE (reg);
2978 rtx src_addr;
2979 rtx src_mem;
2980 int src_offset;
2981 rtx dest_mem;
2982
2983 if (SSE_REGNO_P (REGNO (reg)))
2984 {
2985 src_addr = sse_addr_rtx;
2986 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2987 }
2988 else
2989 {
2990 src_addr = int_addr_rtx;
2991 src_offset = REGNO (reg) * 8;
2992 }
2993 src_mem = gen_rtx_MEM (mode, src_addr);
0692acba 2994 set_mem_alias_set (src_mem, get_varargs_alias_set ());
ad919812
JH
2995 src_mem = adjust_address (src_mem, mode, src_offset);
2996 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
ad919812
JH
2997 emit_move_insn (dest_mem, src_mem);
2998 }
2999 }
3000
3001 if (needed_intregs)
3002 {
3003 t =
3004 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3005 build_int_2 (needed_intregs * 8, 0));
3006 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3007 TREE_SIDE_EFFECTS (t) = 1;
3008 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3009 }
3010 if (needed_sseregs)
3011 {
3012 t =
3013 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3014 build_int_2 (needed_sseregs * 16, 0));
3015 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3016 TREE_SIDE_EFFECTS (t) = 1;
3017 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3018 }
3019
3020 emit_jump_insn (gen_jump (lab_over));
3021 emit_barrier ();
3022 emit_label (lab_false);
3023 }
3024
3025 /* ... otherwise out of the overflow area. */
3026
3027 /* Care for on-stack alignment if needed. */
3028 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3029 t = ovf;
3030 else
3031 {
3032 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3033 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3034 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3035 }
3036 t = save_expr (t);
3037
3038 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3039 if (r != addr_rtx)
3040 emit_move_insn (addr_rtx, r);
3041
3042 t =
3043 build (PLUS_EXPR, TREE_TYPE (t), t,
3044 build_int_2 (rsize * UNITS_PER_WORD, 0));
3045 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3046 TREE_SIDE_EFFECTS (t) = 1;
3047 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3048
3049 if (container)
3050 emit_label (lab_over);
3051
ad919812
JH
3052 return addr_rtx;
3053}
3054\f
c3c637e3
GS
3055/* Return nonzero if OP is either a i387 or SSE fp register. */
3056int
3057any_fp_register_operand (op, mode)
3058 rtx op;
3059 enum machine_mode mode ATTRIBUTE_UNUSED;
3060{
3061 return ANY_FP_REG_P (op);
3062}
3063
3064/* Return nonzero if OP is an i387 fp register. */
3065int
3066fp_register_operand (op, mode)
3067 rtx op;
3068 enum machine_mode mode ATTRIBUTE_UNUSED;
3069{
3070 return FP_REG_P (op);
3071}
3072
3073/* Return nonzero if OP is a non-fp register_operand. */
3074int
3075register_and_not_any_fp_reg_operand (op, mode)
3076 rtx op;
3077 enum machine_mode mode;
3078{
3079 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3080}
3081
3082/* Return nonzero of OP is a register operand other than an
3083 i387 fp register. */
3084int
3085register_and_not_fp_reg_operand (op, mode)
3086 rtx op;
3087 enum machine_mode mode;
3088{
3089 return register_operand (op, mode) && !FP_REG_P (op);
3090}
3091
7dd4b4a3
JH
3092/* Return nonzero if OP is general operand representable on x86_64. */
3093
3094int
3095x86_64_general_operand (op, mode)
3096 rtx op;
3097 enum machine_mode mode;
3098{
3099 if (!TARGET_64BIT)
3100 return general_operand (op, mode);
3101 if (nonimmediate_operand (op, mode))
3102 return 1;
c05dbe81 3103 return x86_64_sign_extended_value (op);
7dd4b4a3
JH
3104}
3105
3106/* Return nonzero if OP is general operand representable on x86_64
d6a7951f 3107 as either sign extended or zero extended constant. */
7dd4b4a3
JH
3108
3109int
3110x86_64_szext_general_operand (op, mode)
3111 rtx op;
3112 enum machine_mode mode;
3113{
3114 if (!TARGET_64BIT)
3115 return general_operand (op, mode);
3116 if (nonimmediate_operand (op, mode))
3117 return 1;
c05dbe81 3118 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
7dd4b4a3
JH
3119}
3120
3121/* Return nonzero if OP is nonmemory operand representable on x86_64. */
3122
3123int
3124x86_64_nonmemory_operand (op, mode)
3125 rtx op;
3126 enum machine_mode mode;
3127{
3128 if (!TARGET_64BIT)
3129 return nonmemory_operand (op, mode);
3130 if (register_operand (op, mode))
3131 return 1;
c05dbe81 3132 return x86_64_sign_extended_value (op);
7dd4b4a3
JH
3133}
3134
3135/* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3136
3137int
3138x86_64_movabs_operand (op, mode)
3139 rtx op;
3140 enum machine_mode mode;
3141{
3142 if (!TARGET_64BIT || !flag_pic)
3143 return nonmemory_operand (op, mode);
c05dbe81 3144 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
7dd4b4a3
JH
3145 return 1;
3146 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3147 return 1;
3148 return 0;
3149}
3150
3151/* Return nonzero if OP is nonmemory operand representable on x86_64. */
3152
3153int
3154x86_64_szext_nonmemory_operand (op, mode)
3155 rtx op;
3156 enum machine_mode mode;
3157{
3158 if (!TARGET_64BIT)
3159 return nonmemory_operand (op, mode);
3160 if (register_operand (op, mode))
3161 return 1;
c05dbe81 3162 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
7dd4b4a3
JH
3163}
3164
3165/* Return nonzero if OP is immediate operand representable on x86_64. */
3166
3167int
3168x86_64_immediate_operand (op, mode)
3169 rtx op;
3170 enum machine_mode mode;
3171{
3172 if (!TARGET_64BIT)
3173 return immediate_operand (op, mode);
c05dbe81 3174 return x86_64_sign_extended_value (op);
7dd4b4a3
JH
3175}
3176
3177/* Return nonzero if OP is immediate operand representable on x86_64. */
3178
3179int
3180x86_64_zext_immediate_operand (op, mode)
3181 rtx op;
3182 enum machine_mode mode ATTRIBUTE_UNUSED;
3183{
3184 return x86_64_zero_extended_value (op);
3185}
3186
8bad7136
JL
3187/* Return nonzero if OP is (const_int 1), else return zero. */
3188
3189int
3190const_int_1_operand (op, mode)
3191 rtx op;
3192 enum machine_mode mode ATTRIBUTE_UNUSED;
3193{
3194 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
3195}
3196
794a292d
JJ
3197/* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3198 for shift & compare patterns, as shifting by 0 does not change flags),
3199 else return zero. */
3200
3201int
3202const_int_1_31_operand (op, mode)
3203 rtx op;
3204 enum machine_mode mode ATTRIBUTE_UNUSED;
3205{
3206 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3207}
3208
e075ae69
RH
3209/* Returns 1 if OP is either a symbol reference or a sum of a symbol
3210 reference and a constant. */
b08de47e
MM
3211
3212int
e075ae69
RH
3213symbolic_operand (op, mode)
3214 register rtx op;
3215 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 3216{
e075ae69 3217 switch (GET_CODE (op))
2a2ab3f9 3218 {
e075ae69
RH
3219 case SYMBOL_REF:
3220 case LABEL_REF:
3221 return 1;
3222
3223 case CONST:
3224 op = XEXP (op, 0);
3225 if (GET_CODE (op) == SYMBOL_REF
3226 || GET_CODE (op) == LABEL_REF
3227 || (GET_CODE (op) == UNSPEC
8ee41eaf
RH
3228 && (XINT (op, 1) == UNSPEC_GOT
3229 || XINT (op, 1) == UNSPEC_GOTOFF
3230 || XINT (op, 1) == UNSPEC_GOTPCREL)))
e075ae69
RH
3231 return 1;
3232 if (GET_CODE (op) != PLUS
3233 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3234 return 0;
3235
3236 op = XEXP (op, 0);
3237 if (GET_CODE (op) == SYMBOL_REF
3238 || GET_CODE (op) == LABEL_REF)
3239 return 1;
3240 /* Only @GOTOFF gets offsets. */
3241 if (GET_CODE (op) != UNSPEC
8ee41eaf 3242 || XINT (op, 1) != UNSPEC_GOTOFF)
e075ae69
RH
3243 return 0;
3244
3245 op = XVECEXP (op, 0, 0);
3246 if (GET_CODE (op) == SYMBOL_REF
3247 || GET_CODE (op) == LABEL_REF)
3248 return 1;
3249 return 0;
3250
3251 default:
3252 return 0;
2a2ab3f9
JVA
3253 }
3254}
2a2ab3f9 3255
e075ae69 3256/* Return true if the operand contains a @GOT or @GOTOFF reference. */
3b3c6a3f 3257
e075ae69
RH
3258int
3259pic_symbolic_operand (op, mode)
3260 register rtx op;
3261 enum machine_mode mode ATTRIBUTE_UNUSED;
3262{
6eb791fc
JH
3263 if (GET_CODE (op) != CONST)
3264 return 0;
3265 op = XEXP (op, 0);
3266 if (TARGET_64BIT)
3267 {
3268 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
3269 return 1;
3270 }
fce5a9f2 3271 else
2a2ab3f9 3272 {
e075ae69
RH
3273 if (GET_CODE (op) == UNSPEC)
3274 return 1;
3275 if (GET_CODE (op) != PLUS
3276 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3277 return 0;
3278 op = XEXP (op, 0);
3279 if (GET_CODE (op) == UNSPEC)
3280 return 1;
2a2ab3f9 3281 }
e075ae69 3282 return 0;
2a2ab3f9 3283}
2a2ab3f9 3284
623fe810
RH
3285/* Return true if OP is a symbolic operand that resolves locally. */
3286
3287static int
3288local_symbolic_operand (op, mode)
3289 rtx op;
3290 enum machine_mode mode ATTRIBUTE_UNUSED;
3291{
623fe810
RH
3292 if (GET_CODE (op) == CONST
3293 && GET_CODE (XEXP (op, 0)) == PLUS
c05dbe81 3294 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
623fe810
RH
3295 op = XEXP (XEXP (op, 0), 0);
3296
8bfb45f8
JJ
3297 if (GET_CODE (op) == LABEL_REF)
3298 return 1;
3299
623fe810
RH
3300 if (GET_CODE (op) != SYMBOL_REF)
3301 return 0;
3302
3303 /* These we've been told are local by varasm and encode_section_info
3304 respectively. */
3305 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
3306 return 1;
3307
3308 /* There is, however, a not insubstantial body of code in the rest of
fce5a9f2 3309 the compiler that assumes it can just stick the results of
623fe810
RH
3310 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3311 /* ??? This is a hack. Should update the body of the compiler to
fb49053f 3312 always create a DECL an invoke targetm.encode_section_info. */
623fe810
RH
3313 if (strncmp (XSTR (op, 0), internal_label_prefix,
3314 internal_label_prefix_len) == 0)
3315 return 1;
3316
3317 return 0;
3318}
3319
f996902d
RH
3320/* Test for various thread-local symbols. See ix86_encode_section_info. */
3321
3322int
3323tls_symbolic_operand (op, mode)
3324 register rtx op;
3325 enum machine_mode mode ATTRIBUTE_UNUSED;
3326{
3327 const char *symbol_str;
3328
3329 if (GET_CODE (op) != SYMBOL_REF)
3330 return 0;
3331 symbol_str = XSTR (op, 0);
3332
3333 if (symbol_str[0] != '%')
3334 return 0;
755ac5d4 3335 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
f996902d
RH
3336}
3337
3338static int
3339tls_symbolic_operand_1 (op, kind)
3340 rtx op;
3341 enum tls_model kind;
3342{
3343 const char *symbol_str;
3344
3345 if (GET_CODE (op) != SYMBOL_REF)
3346 return 0;
3347 symbol_str = XSTR (op, 0);
3348
3349 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3350}
3351
3352int
3353global_dynamic_symbolic_operand (op, mode)
3354 register rtx op;
3355 enum machine_mode mode ATTRIBUTE_UNUSED;
3356{
3357 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3358}
3359
3360int
3361local_dynamic_symbolic_operand (op, mode)
3362 register rtx op;
3363 enum machine_mode mode ATTRIBUTE_UNUSED;
3364{
3365 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3366}
3367
3368int
3369initial_exec_symbolic_operand (op, mode)
3370 register rtx op;
3371 enum machine_mode mode ATTRIBUTE_UNUSED;
3372{
3373 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3374}
3375
3376int
3377local_exec_symbolic_operand (op, mode)
3378 register rtx op;
3379 enum machine_mode mode ATTRIBUTE_UNUSED;
3380{
3381 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3382}
3383
28d52ffb
RH
3384/* Test for a valid operand for a call instruction. Don't allow the
3385 arg pointer register or virtual regs since they may decay into
3386 reg + const, which the patterns can't handle. */
2a2ab3f9 3387
e075ae69
RH
3388int
3389call_insn_operand (op, mode)
3390 rtx op;
3391 enum machine_mode mode ATTRIBUTE_UNUSED;
3392{
e075ae69
RH
3393 /* Disallow indirect through a virtual register. This leads to
3394 compiler aborts when trying to eliminate them. */
3395 if (GET_CODE (op) == REG
3396 && (op == arg_pointer_rtx
564d80f4 3397 || op == frame_pointer_rtx
e075ae69
RH
3398 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3399 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3400 return 0;
2a2ab3f9 3401
28d52ffb
RH
3402 /* Disallow `call 1234'. Due to varying assembler lameness this
3403 gets either rejected or translated to `call .+1234'. */
3404 if (GET_CODE (op) == CONST_INT)
3405 return 0;
3406
cbbf65e0
RH
3407 /* Explicitly allow SYMBOL_REF even if pic. */
3408 if (GET_CODE (op) == SYMBOL_REF)
e075ae69 3409 return 1;
2a2ab3f9 3410
cbbf65e0
RH
3411 /* Otherwise we can allow any general_operand in the address. */
3412 return general_operand (op, Pmode);
e075ae69 3413}
79325812 3414
4977bab6
ZW
3415/* Test for a valid operand for a call instruction. Don't allow the
3416 arg pointer register or virtual regs since they may decay into
3417 reg + const, which the patterns can't handle. */
3418
3419int
3420sibcall_insn_operand (op, mode)
3421 rtx op;
3422 enum machine_mode mode ATTRIBUTE_UNUSED;
3423{
3424 /* Disallow indirect through a virtual register. This leads to
3425 compiler aborts when trying to eliminate them. */
3426 if (GET_CODE (op) == REG
3427 && (op == arg_pointer_rtx
3428 || op == frame_pointer_rtx
3429 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3430 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3431 return 0;
3432
3433 /* Explicitly allow SYMBOL_REF even if pic. */
3434 if (GET_CODE (op) == SYMBOL_REF)
3435 return 1;
3436
3437 /* Otherwise we can only allow register operands. */
3438 return register_operand (op, Pmode);
3439}
3440
e075ae69
RH
3441int
3442constant_call_address_operand (op, mode)
3443 rtx op;
3444 enum machine_mode mode ATTRIBUTE_UNUSED;
3445{
eaf19aba
JJ
3446 if (GET_CODE (op) == CONST
3447 && GET_CODE (XEXP (op, 0)) == PLUS
3448 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3449 op = XEXP (XEXP (op, 0), 0);
e1ff012c 3450 return GET_CODE (op) == SYMBOL_REF;
e075ae69 3451}
2a2ab3f9 3452
e075ae69 3453/* Match exactly zero and one. */
e9a25f70 3454
0f290768 3455int
e075ae69
RH
3456const0_operand (op, mode)
3457 register rtx op;
3458 enum machine_mode mode;
3459{
3460 return op == CONST0_RTX (mode);
3461}
e9a25f70 3462
0f290768 3463int
e075ae69
RH
3464const1_operand (op, mode)
3465 register rtx op;
3466 enum machine_mode mode ATTRIBUTE_UNUSED;
3467{
3468 return op == const1_rtx;
3469}
2a2ab3f9 3470
e075ae69 3471/* Match 2, 4, or 8. Used for leal multiplicands. */
e9a25f70 3472
e075ae69
RH
3473int
3474const248_operand (op, mode)
3475 register rtx op;
3476 enum machine_mode mode ATTRIBUTE_UNUSED;
3477{
3478 return (GET_CODE (op) == CONST_INT
3479 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3480}
e9a25f70 3481
e075ae69 3482/* True if this is a constant appropriate for an increment or decremenmt. */
81fd0956 3483
e075ae69
RH
3484int
3485incdec_operand (op, mode)
3486 register rtx op;
0631e0bf 3487 enum machine_mode mode ATTRIBUTE_UNUSED;
e075ae69 3488{
f5143c46 3489 /* On Pentium4, the inc and dec operations causes extra dependency on flag
b4e89e2d
JH
3490 registers, since carry flag is not set. */
3491 if (TARGET_PENTIUM4 && !optimize_size)
3492 return 0;
2b1c08f5 3493 return op == const1_rtx || op == constm1_rtx;
e075ae69 3494}
2a2ab3f9 3495
371bc54b
JH
3496/* Return nonzero if OP is acceptable as operand of DImode shift
3497 expander. */
3498
3499int
3500shiftdi_operand (op, mode)
3501 rtx op;
3502 enum machine_mode mode ATTRIBUTE_UNUSED;
3503{
3504 if (TARGET_64BIT)
3505 return nonimmediate_operand (op, mode);
3506 else
3507 return register_operand (op, mode);
3508}
3509
0f290768 3510/* Return false if this is the stack pointer, or any other fake
e075ae69
RH
3511 register eliminable to the stack pointer. Otherwise, this is
3512 a register operand.
2a2ab3f9 3513
e075ae69
RH
3514 This is used to prevent esp from being used as an index reg.
3515 Which would only happen in pathological cases. */
5f1ec3e6 3516
e075ae69
RH
3517int
3518reg_no_sp_operand (op, mode)
3519 register rtx op;
3520 enum machine_mode mode;
3521{
3522 rtx t = op;
3523 if (GET_CODE (t) == SUBREG)
3524 t = SUBREG_REG (t);
564d80f4 3525 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
e075ae69 3526 return 0;
2a2ab3f9 3527
e075ae69 3528 return register_operand (op, mode);
2a2ab3f9 3529}
b840bfb0 3530
915119a5
BS
3531int
3532mmx_reg_operand (op, mode)
3533 register rtx op;
bd793c65 3534 enum machine_mode mode ATTRIBUTE_UNUSED;
915119a5
BS
3535{
3536 return MMX_REG_P (op);
3537}
3538
2c5a510c
RH
3539/* Return false if this is any eliminable register. Otherwise
3540 general_operand. */
3541
3542int
3543general_no_elim_operand (op, mode)
3544 register rtx op;
3545 enum machine_mode mode;
3546{
3547 rtx t = op;
3548 if (GET_CODE (t) == SUBREG)
3549 t = SUBREG_REG (t);
3550 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3551 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3552 || t == virtual_stack_dynamic_rtx)
3553 return 0;
1020a5ab
RH
3554 if (REG_P (t)
3555 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3556 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3557 return 0;
2c5a510c
RH
3558
3559 return general_operand (op, mode);
3560}
3561
3562/* Return false if this is any eliminable register. Otherwise
3563 register_operand or const_int. */
3564
3565int
3566nonmemory_no_elim_operand (op, mode)
3567 register rtx op;
3568 enum machine_mode mode;
3569{
3570 rtx t = op;
3571 if (GET_CODE (t) == SUBREG)
3572 t = SUBREG_REG (t);
3573 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3574 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3575 || t == virtual_stack_dynamic_rtx)
3576 return 0;
3577
3578 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3579}
3580
7ec70495
JH
3581/* Return false if this is any eliminable register or stack register,
3582 otherwise work like register_operand. */
3583
3584int
3585index_register_operand (op, mode)
3586 register rtx op;
3587 enum machine_mode mode;
3588{
3589 rtx t = op;
3590 if (GET_CODE (t) == SUBREG)
3591 t = SUBREG_REG (t);
3592 if (!REG_P (t))
3593 return 0;
3594 if (t == arg_pointer_rtx
3595 || t == frame_pointer_rtx
3596 || t == virtual_incoming_args_rtx
3597 || t == virtual_stack_vars_rtx
3598 || t == virtual_stack_dynamic_rtx
3599 || REGNO (t) == STACK_POINTER_REGNUM)
3600 return 0;
3601
3602 return general_operand (op, mode);
3603}
3604
e075ae69 3605/* Return true if op is a Q_REGS class register. */
b840bfb0 3606
e075ae69
RH
3607int
3608q_regs_operand (op, mode)
3609 register rtx op;
3610 enum machine_mode mode;
b840bfb0 3611{
e075ae69
RH
3612 if (mode != VOIDmode && GET_MODE (op) != mode)
3613 return 0;
3614 if (GET_CODE (op) == SUBREG)
3615 op = SUBREG_REG (op);
7799175f 3616 return ANY_QI_REG_P (op);
0f290768 3617}
b840bfb0 3618
4977bab6
ZW
3619/* Return true if op is an flags register. */
3620
3621int
3622flags_reg_operand (op, mode)
3623 register rtx op;
3624 enum machine_mode mode;
3625{
3626 if (mode != VOIDmode && GET_MODE (op) != mode)
3627 return 0;
3628 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3629}
3630
e075ae69 3631/* Return true if op is a NON_Q_REGS class register. */
b840bfb0 3632
e075ae69
RH
3633int
3634non_q_regs_operand (op, mode)
3635 register rtx op;
3636 enum machine_mode mode;
3637{
3638 if (mode != VOIDmode && GET_MODE (op) != mode)
3639 return 0;
3640 if (GET_CODE (op) == SUBREG)
3641 op = SUBREG_REG (op);
3642 return NON_QI_REG_P (op);
0f290768 3643}
b840bfb0 3644
4977bab6
ZW
3645int
3646zero_extended_scalar_load_operand (op, mode)
3647 rtx op;
3648 enum machine_mode mode ATTRIBUTE_UNUSED;
3649{
3650 unsigned n_elts;
3651 if (GET_CODE (op) != MEM)
3652 return 0;
3653 op = maybe_get_pool_constant (op);
3654 if (!op)
3655 return 0;
3656 if (GET_CODE (op) != CONST_VECTOR)
3657 return 0;
3658 n_elts =
3659 (GET_MODE_SIZE (GET_MODE (op)) /
3660 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3661 for (n_elts--; n_elts > 0; n_elts--)
3662 {
3663 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3664 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3665 return 0;
3666 }
3667 return 1;
3668}
3669
915119a5
BS
3670/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3671 insns. */
3672int
3673sse_comparison_operator (op, mode)
3674 rtx op;
3675 enum machine_mode mode ATTRIBUTE_UNUSED;
3676{
3677 enum rtx_code code = GET_CODE (op);
a46d1d38
JH
3678 switch (code)
3679 {
3680 /* Operations supported directly. */
3681 case EQ:
3682 case LT:
3683 case LE:
3684 case UNORDERED:
3685 case NE:
3686 case UNGE:
3687 case UNGT:
3688 case ORDERED:
3689 return 1;
3690 /* These are equivalent to ones above in non-IEEE comparisons. */
3691 case UNEQ:
3692 case UNLT:
3693 case UNLE:
3694 case LTGT:
3695 case GE:
3696 case GT:
3697 return !TARGET_IEEE_FP;
3698 default:
3699 return 0;
3700 }
915119a5 3701}
9076b9c1 3702/* Return 1 if OP is a valid comparison operator in valid mode. */
e075ae69 3703int
9076b9c1
JH
3704ix86_comparison_operator (op, mode)
3705 register rtx op;
3706 enum machine_mode mode;
e075ae69 3707{
9076b9c1 3708 enum machine_mode inmode;
9a915772 3709 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
3710 if (mode != VOIDmode && GET_MODE (op) != mode)
3711 return 0;
9a915772
JH
3712 if (GET_RTX_CLASS (code) != '<')
3713 return 0;
3714 inmode = GET_MODE (XEXP (op, 0));
3715
3716 if (inmode == CCFPmode || inmode == CCFPUmode)
3717 {
3718 enum rtx_code second_code, bypass_code;
3719 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3720 return (bypass_code == NIL && second_code == NIL);
3721 }
3722 switch (code)
3a3677ff
RH
3723 {
3724 case EQ: case NE:
3a3677ff 3725 return 1;
9076b9c1 3726 case LT: case GE:
7e08e190 3727 if (inmode == CCmode || inmode == CCGCmode
9076b9c1
JH
3728 || inmode == CCGOCmode || inmode == CCNOmode)
3729 return 1;
3730 return 0;
7e08e190 3731 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
7e08e190 3732 if (inmode == CCmode)
9076b9c1
JH
3733 return 1;
3734 return 0;
3735 case GT: case LE:
7e08e190 3736 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
9076b9c1
JH
3737 return 1;
3738 return 0;
3a3677ff
RH
3739 default:
3740 return 0;
3741 }
3742}
3743
9076b9c1 3744/* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3a3677ff 3745
9076b9c1
JH
3746int
3747fcmov_comparison_operator (op, mode)
3a3677ff
RH
3748 register rtx op;
3749 enum machine_mode mode;
3750{
b62d22a2 3751 enum machine_mode inmode;
9a915772 3752 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
3753 if (mode != VOIDmode && GET_MODE (op) != mode)
3754 return 0;
9a915772
JH
3755 if (GET_RTX_CLASS (code) != '<')
3756 return 0;
3757 inmode = GET_MODE (XEXP (op, 0));
3758 if (inmode == CCFPmode || inmode == CCFPUmode)
3a3677ff 3759 {
9a915772
JH
3760 enum rtx_code second_code, bypass_code;
3761 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3762 if (bypass_code != NIL || second_code != NIL)
3763 return 0;
3764 code = ix86_fp_compare_code_to_integer (code);
3765 }
3766 /* i387 supports just limited amount of conditional codes. */
3767 switch (code)
3768 {
3769 case LTU: case GTU: case LEU: case GEU:
3770 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
9076b9c1
JH
3771 return 1;
3772 return 0;
9a915772
JH
3773 case ORDERED: case UNORDERED:
3774 case EQ: case NE:
3775 return 1;
3a3677ff
RH
3776 default:
3777 return 0;
3778 }
e075ae69 3779}
b840bfb0 3780
e9e80858
JH
3781/* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3782
3783int
3784promotable_binary_operator (op, mode)
3785 register rtx op;
3786 enum machine_mode mode ATTRIBUTE_UNUSED;
3787{
3788 switch (GET_CODE (op))
3789 {
3790 case MULT:
3791 /* Modern CPUs have same latency for HImode and SImode multiply,
3792 but 386 and 486 do HImode multiply faster. */
3793 return ix86_cpu > PROCESSOR_I486;
3794 case PLUS:
3795 case AND:
3796 case IOR:
3797 case XOR:
3798 case ASHIFT:
3799 return 1;
3800 default:
3801 return 0;
3802 }
3803}
3804
e075ae69
RH
3805/* Nearly general operand, but accept any const_double, since we wish
3806 to be able to drop them into memory rather than have them get pulled
3807 into registers. */
b840bfb0 3808
2a2ab3f9 3809int
e075ae69
RH
3810cmp_fp_expander_operand (op, mode)
3811 register rtx op;
3812 enum machine_mode mode;
2a2ab3f9 3813{
e075ae69 3814 if (mode != VOIDmode && mode != GET_MODE (op))
0b6b2900 3815 return 0;
e075ae69 3816 if (GET_CODE (op) == CONST_DOUBLE)
2a2ab3f9 3817 return 1;
e075ae69 3818 return general_operand (op, mode);
2a2ab3f9
JVA
3819}
3820
e075ae69 3821/* Match an SI or HImode register for a zero_extract. */
2a2ab3f9
JVA
3822
3823int
e075ae69 3824ext_register_operand (op, mode)
2a2ab3f9 3825 register rtx op;
bb5177ac 3826 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 3827{
3522082b 3828 int regno;
0d7d98ee
JH
3829 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3830 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
e075ae69 3831 return 0;
3522082b
JH
3832
3833 if (!register_operand (op, VOIDmode))
3834 return 0;
3835
3836 /* Be curefull to accept only registers having upper parts. */
3837 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3838 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
e075ae69
RH
3839}
3840
3841/* Return 1 if this is a valid binary floating-point operation.
0f290768 3842 OP is the expression matched, and MODE is its mode. */
e075ae69
RH
3843
3844int
3845binary_fp_operator (op, mode)
3846 register rtx op;
3847 enum machine_mode mode;
3848{
3849 if (mode != VOIDmode && mode != GET_MODE (op))
3850 return 0;
3851
2a2ab3f9
JVA
3852 switch (GET_CODE (op))
3853 {
e075ae69
RH
3854 case PLUS:
3855 case MINUS:
3856 case MULT:
3857 case DIV:
3858 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
e9a25f70 3859
2a2ab3f9
JVA
3860 default:
3861 return 0;
3862 }
3863}
fee2770d 3864
e075ae69 3865int
b531087a 3866mult_operator (op, mode)
e075ae69
RH
3867 register rtx op;
3868 enum machine_mode mode ATTRIBUTE_UNUSED;
3869{
3870 return GET_CODE (op) == MULT;
3871}
3872
3873int
b531087a 3874div_operator (op, mode)
e075ae69
RH
3875 register rtx op;
3876 enum machine_mode mode ATTRIBUTE_UNUSED;
3877{
3878 return GET_CODE (op) == DIV;
3879}
0a726ef1
JL
3880
3881int
e075ae69
RH
3882arith_or_logical_operator (op, mode)
3883 rtx op;
3884 enum machine_mode mode;
0a726ef1 3885{
e075ae69
RH
3886 return ((mode == VOIDmode || GET_MODE (op) == mode)
3887 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3888 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
0a726ef1
JL
3889}
3890
e075ae69 3891/* Returns 1 if OP is memory operand with a displacement. */
fee2770d
RS
3892
3893int
e075ae69
RH
3894memory_displacement_operand (op, mode)
3895 register rtx op;
3896 enum machine_mode mode;
4f2c8ebb 3897{
e075ae69 3898 struct ix86_address parts;
e9a25f70 3899
e075ae69
RH
3900 if (! memory_operand (op, mode))
3901 return 0;
3902
3903 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3904 abort ();
3905
3906 return parts.disp != NULL_RTX;
4f2c8ebb
RS
3907}
3908
16189740 3909/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
e075ae69
RH
3910 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3911
3912 ??? It seems likely that this will only work because cmpsi is an
3913 expander, and no actual insns use this. */
4f2c8ebb
RS
3914
3915int
e075ae69
RH
3916cmpsi_operand (op, mode)
3917 rtx op;
3918 enum machine_mode mode;
fee2770d 3919{
b9b2c339 3920 if (nonimmediate_operand (op, mode))
e075ae69
RH
3921 return 1;
3922
3923 if (GET_CODE (op) == AND
3924 && GET_MODE (op) == SImode
3925 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3926 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3927 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3928 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3929 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3930 && GET_CODE (XEXP (op, 1)) == CONST_INT)
fee2770d 3931 return 1;
e9a25f70 3932
fee2770d
RS
3933 return 0;
3934}
d784886d 3935
e075ae69
RH
3936/* Returns 1 if OP is memory operand that can not be represented by the
3937 modRM array. */
d784886d
RK
3938
3939int
e075ae69 3940long_memory_operand (op, mode)
d784886d
RK
3941 register rtx op;
3942 enum machine_mode mode;
3943{
e075ae69 3944 if (! memory_operand (op, mode))
d784886d
RK
3945 return 0;
3946
e075ae69 3947 return memory_address_length (op) != 0;
d784886d 3948}
2247f6ed
JH
3949
3950/* Return nonzero if the rtx is known aligned. */
3951
3952int
3953aligned_operand (op, mode)
3954 rtx op;
3955 enum machine_mode mode;
3956{
3957 struct ix86_address parts;
3958
3959 if (!general_operand (op, mode))
3960 return 0;
3961
0f290768 3962 /* Registers and immediate operands are always "aligned". */
2247f6ed
JH
3963 if (GET_CODE (op) != MEM)
3964 return 1;
3965
0f290768 3966 /* Don't even try to do any aligned optimizations with volatiles. */
2247f6ed
JH
3967 if (MEM_VOLATILE_P (op))
3968 return 0;
3969
3970 op = XEXP (op, 0);
3971
3972 /* Pushes and pops are only valid on the stack pointer. */
3973 if (GET_CODE (op) == PRE_DEC
3974 || GET_CODE (op) == POST_INC)
3975 return 1;
3976
3977 /* Decode the address. */
3978 if (! ix86_decompose_address (op, &parts))
3979 abort ();
3980
1540f9eb
JH
3981 if (parts.base && GET_CODE (parts.base) == SUBREG)
3982 parts.base = SUBREG_REG (parts.base);
3983 if (parts.index && GET_CODE (parts.index) == SUBREG)
3984 parts.index = SUBREG_REG (parts.index);
3985
2247f6ed
JH
3986 /* Look for some component that isn't known to be aligned. */
3987 if (parts.index)
3988 {
3989 if (parts.scale < 4
bdb429a5 3990 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
2247f6ed
JH
3991 return 0;
3992 }
3993 if (parts.base)
3994 {
bdb429a5 3995 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
2247f6ed
JH
3996 return 0;
3997 }
3998 if (parts.disp)
3999 {
4000 if (GET_CODE (parts.disp) != CONST_INT
4001 || (INTVAL (parts.disp) & 3) != 0)
4002 return 0;
4003 }
4004
4005 /* Didn't find one -- this must be an aligned address. */
4006 return 1;
4007}
e075ae69
RH
4008\f
4009/* Return true if the constant is something that can be loaded with
4010 a special instruction. Only handle 0.0 and 1.0; others are less
4011 worthwhile. */
57dbca5e
BS
4012
4013int
e075ae69
RH
4014standard_80387_constant_p (x)
4015 rtx x;
57dbca5e 4016{
2b04e52b 4017 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
e075ae69 4018 return -1;
2b04e52b
JH
4019 /* Note that on the 80387, other constants, such as pi, that we should support
4020 too. On some machines, these are much slower to load as standard constant,
4021 than to load from doubles in memory. */
4022 if (x == CONST0_RTX (GET_MODE (x)))
4023 return 1;
4024 if (x == CONST1_RTX (GET_MODE (x)))
4025 return 2;
e075ae69 4026 return 0;
57dbca5e
BS
4027}
4028
2b04e52b
JH
4029/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4030 */
4031int
4032standard_sse_constant_p (x)
4033 rtx x;
4034{
0e67d460
JH
4035 if (x == const0_rtx)
4036 return 1;
2b04e52b
JH
4037 return (x == CONST0_RTX (GET_MODE (x)));
4038}
4039
2a2ab3f9
JVA
4040/* Returns 1 if OP contains a symbol reference */
4041
4042int
4043symbolic_reference_mentioned_p (op)
4044 rtx op;
4045{
6f7d635c 4046 register const char *fmt;
2a2ab3f9
JVA
4047 register int i;
4048
4049 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4050 return 1;
4051
4052 fmt = GET_RTX_FORMAT (GET_CODE (op));
4053 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4054 {
4055 if (fmt[i] == 'E')
4056 {
4057 register int j;
4058
4059 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4060 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4061 return 1;
4062 }
e9a25f70 4063
2a2ab3f9
JVA
4064 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4065 return 1;
4066 }
4067
4068 return 0;
4069}
e075ae69
RH
4070
4071/* Return 1 if it is appropriate to emit `ret' instructions in the
4072 body of a function. Do this only if the epilogue is simple, needing a
4073 couple of insns. Prior to reloading, we can't tell how many registers
4074 must be saved, so return 0 then. Return 0 if there is no frame
4075 marker to de-allocate.
4076
4077 If NON_SAVING_SETJMP is defined and true, then it is not possible
4078 for the epilogue to be simple, so return 0. This is a special case
4079 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4080 until final, but jump_optimize may need to know sooner if a
4081 `return' is OK. */
32b5b1aa
SC
4082
4083int
e075ae69 4084ix86_can_use_return_insn_p ()
32b5b1aa 4085{
4dd2ac2c 4086 struct ix86_frame frame;
9a7372d6 4087
e075ae69
RH
4088#ifdef NON_SAVING_SETJMP
4089 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4090 return 0;
4091#endif
9a7372d6
RH
4092
4093 if (! reload_completed || frame_pointer_needed)
4094 return 0;
32b5b1aa 4095
9a7372d6
RH
4096 /* Don't allow more than 32 pop, since that's all we can do
4097 with one instruction. */
4098 if (current_function_pops_args
4099 && current_function_args_size >= 32768)
e075ae69 4100 return 0;
32b5b1aa 4101
4dd2ac2c
JH
4102 ix86_compute_frame_layout (&frame);
4103 return frame.to_allocate == 0 && frame.nregs == 0;
e075ae69 4104}
6189a572
JH
4105\f
4106/* Return 1 if VALUE can be stored in the sign extended immediate field. */
4107int
c05dbe81 4108x86_64_sign_extended_value (value)
6189a572
JH
4109 rtx value;
4110{
4111 switch (GET_CODE (value))
4112 {
4113 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4114 to be at least 32 and this all acceptable constants are
4115 represented as CONST_INT. */
4116 case CONST_INT:
4117 if (HOST_BITS_PER_WIDE_INT == 32)
4118 return 1;
4119 else
4120 {
4121 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
fa9f36a1 4122 return trunc_int_for_mode (val, SImode) == val;
6189a572
JH
4123 }
4124 break;
4125
75d38379
JJ
4126 /* For certain code models, the symbolic references are known to fit.
4127 in CM_SMALL_PIC model we know it fits if it is local to the shared
4128 library. Don't count TLS SYMBOL_REFs here, since they should fit
4129 only if inside of UNSPEC handled below. */
6189a572 4130 case SYMBOL_REF:
c05dbe81 4131 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
6189a572
JH
4132
4133 /* For certain code models, the code is near as well. */
4134 case LABEL_REF:
c05dbe81
JH
4135 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4136 || ix86_cmodel == CM_KERNEL);
6189a572
JH
4137
4138 /* We also may accept the offsetted memory references in certain special
4139 cases. */
4140 case CONST:
75d38379
JJ
4141 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4142 switch (XINT (XEXP (value, 0), 1))
4143 {
4144 case UNSPEC_GOTPCREL:
4145 case UNSPEC_DTPOFF:
4146 case UNSPEC_GOTNTPOFF:
4147 case UNSPEC_NTPOFF:
4148 return 1;
4149 default:
4150 break;
4151 }
4152 if (GET_CODE (XEXP (value, 0)) == PLUS)
6189a572
JH
4153 {
4154 rtx op1 = XEXP (XEXP (value, 0), 0);
4155 rtx op2 = XEXP (XEXP (value, 0), 1);
4156 HOST_WIDE_INT offset;
4157
4158 if (ix86_cmodel == CM_LARGE)
4159 return 0;
4160 if (GET_CODE (op2) != CONST_INT)
4161 return 0;
4162 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4163 switch (GET_CODE (op1))
4164 {
4165 case SYMBOL_REF:
75d38379 4166 /* For CM_SMALL assume that latest object is 16MB before
6189a572
JH
4167 end of 31bits boundary. We may also accept pretty
4168 large negative constants knowing that all objects are
4169 in the positive half of address space. */
4170 if (ix86_cmodel == CM_SMALL
75d38379 4171 && offset < 16*1024*1024
6189a572
JH
4172 && trunc_int_for_mode (offset, SImode) == offset)
4173 return 1;
4174 /* For CM_KERNEL we know that all object resist in the
4175 negative half of 32bits address space. We may not
4176 accept negative offsets, since they may be just off
d6a7951f 4177 and we may accept pretty large positive ones. */
6189a572
JH
4178 if (ix86_cmodel == CM_KERNEL
4179 && offset > 0
4180 && trunc_int_for_mode (offset, SImode) == offset)
4181 return 1;
4182 break;
4183 case LABEL_REF:
4184 /* These conditions are similar to SYMBOL_REF ones, just the
4185 constraints for code models differ. */
c05dbe81 4186 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
75d38379 4187 && offset < 16*1024*1024
6189a572
JH
4188 && trunc_int_for_mode (offset, SImode) == offset)
4189 return 1;
4190 if (ix86_cmodel == CM_KERNEL
4191 && offset > 0
4192 && trunc_int_for_mode (offset, SImode) == offset)
4193 return 1;
4194 break;
75d38379
JJ
4195 case UNSPEC:
4196 switch (XINT (op1, 1))
4197 {
4198 case UNSPEC_DTPOFF:
4199 case UNSPEC_NTPOFF:
4200 if (offset > 0
4201 && trunc_int_for_mode (offset, SImode) == offset)
4202 return 1;
4203 }
4204 break;
6189a572
JH
4205 default:
4206 return 0;
4207 }
4208 }
4209 return 0;
4210 default:
4211 return 0;
4212 }
4213}
4214
4215/* Return 1 if VALUE can be stored in the zero extended immediate field. */
4216int
4217x86_64_zero_extended_value (value)
4218 rtx value;
4219{
4220 switch (GET_CODE (value))
4221 {
4222 case CONST_DOUBLE:
4223 if (HOST_BITS_PER_WIDE_INT == 32)
4224 return (GET_MODE (value) == VOIDmode
4225 && !CONST_DOUBLE_HIGH (value));
4226 else
4227 return 0;
4228 case CONST_INT:
4229 if (HOST_BITS_PER_WIDE_INT == 32)
4230 return INTVAL (value) >= 0;
4231 else
b531087a 4232 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
6189a572
JH
4233 break;
4234
4235 /* For certain code models, the symbolic references are known to fit. */
4236 case SYMBOL_REF:
4237 return ix86_cmodel == CM_SMALL;
4238
4239 /* For certain code models, the code is near as well. */
4240 case LABEL_REF:
4241 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4242
4243 /* We also may accept the offsetted memory references in certain special
4244 cases. */
4245 case CONST:
4246 if (GET_CODE (XEXP (value, 0)) == PLUS)
4247 {
4248 rtx op1 = XEXP (XEXP (value, 0), 0);
4249 rtx op2 = XEXP (XEXP (value, 0), 1);
4250
4251 if (ix86_cmodel == CM_LARGE)
4252 return 0;
4253 switch (GET_CODE (op1))
4254 {
4255 case SYMBOL_REF:
4256 return 0;
d6a7951f 4257 /* For small code model we may accept pretty large positive
6189a572
JH
4258 offsets, since one bit is available for free. Negative
4259 offsets are limited by the size of NULL pointer area
4260 specified by the ABI. */
4261 if (ix86_cmodel == CM_SMALL
4262 && GET_CODE (op2) == CONST_INT
4263 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4264 && (trunc_int_for_mode (INTVAL (op2), SImode)
4265 == INTVAL (op2)))
4266 return 1;
4267 /* ??? For the kernel, we may accept adjustment of
4268 -0x10000000, since we know that it will just convert
d6a7951f 4269 negative address space to positive, but perhaps this
6189a572
JH
4270 is not worthwhile. */
4271 break;
4272 case LABEL_REF:
4273 /* These conditions are similar to SYMBOL_REF ones, just the
4274 constraints for code models differ. */
4275 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4276 && GET_CODE (op2) == CONST_INT
4277 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4278 && (trunc_int_for_mode (INTVAL (op2), SImode)
4279 == INTVAL (op2)))
4280 return 1;
4281 break;
4282 default:
4283 return 0;
4284 }
4285 }
4286 return 0;
4287 default:
4288 return 0;
4289 }
4290}
6fca22eb
RH
4291
4292/* Value should be nonzero if functions must have frame pointers.
4293 Zero means the frame pointer need not be set up (and parms may
4294 be accessed via the stack pointer) in functions that seem suitable. */
4295
4296int
4297ix86_frame_pointer_required ()
4298{
4299 /* If we accessed previous frames, then the generated code expects
4300 to be able to access the saved ebp value in our frame. */
4301 if (cfun->machine->accesses_prev_frame)
4302 return 1;
a4f31c00 4303
6fca22eb
RH
4304 /* Several x86 os'es need a frame pointer for other reasons,
4305 usually pertaining to setjmp. */
4306 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4307 return 1;
4308
4309 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4310 the frame pointer by default. Turn it back on now if we've not
4311 got a leaf function. */
a7943381 4312 if (TARGET_OMIT_LEAF_FRAME_POINTER
55ba61f3
JH
4313 && (!current_function_is_leaf))
4314 return 1;
4315
4316 if (current_function_profile)
6fca22eb
RH
4317 return 1;
4318
4319 return 0;
4320}
4321
4322/* Record that the current function accesses previous call frames. */
4323
4324void
4325ix86_setup_frame_addresses ()
4326{
4327 cfun->machine->accesses_prev_frame = 1;
4328}
e075ae69 4329\f
145aacc2
RH
4330#if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4331# define USE_HIDDEN_LINKONCE 1
4332#else
4333# define USE_HIDDEN_LINKONCE 0
4334#endif
4335
bd09bdeb 4336static int pic_labels_used;
e9a25f70 4337
145aacc2
RH
4338/* Fills in the label name that should be used for a pc thunk for
4339 the given register. */
4340
4341static void
4342get_pc_thunk_name (name, regno)
4343 char name[32];
4344 unsigned int regno;
4345{
4346 if (USE_HIDDEN_LINKONCE)
4347 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4348 else
4349 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4350}
4351
4352
e075ae69
RH
4353/* This function generates code for -fpic that loads %ebx with
4354 the return address of the caller and then returns. */
4355
4356void
4cf12e7e 4357ix86_asm_file_end (file)
e075ae69 4358 FILE *file;
e075ae69
RH
4359{
4360 rtx xops[2];
bd09bdeb 4361 int regno;
32b5b1aa 4362
bd09bdeb 4363 for (regno = 0; regno < 8; ++regno)
7c262518 4364 {
145aacc2
RH
4365 char name[32];
4366
bd09bdeb
RH
4367 if (! ((pic_labels_used >> regno) & 1))
4368 continue;
4369
145aacc2 4370 get_pc_thunk_name (name, regno);
bd09bdeb 4371
145aacc2
RH
4372 if (USE_HIDDEN_LINKONCE)
4373 {
4374 tree decl;
4375
4376 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4377 error_mark_node);
4378 TREE_PUBLIC (decl) = 1;
4379 TREE_STATIC (decl) = 1;
4380 DECL_ONE_ONLY (decl) = 1;
4381
4382 (*targetm.asm_out.unique_section) (decl, 0);
4383 named_section (decl, NULL, 0);
4384
5eb99654 4385 (*targetm.asm_out.globalize_label) (file, name);
145aacc2
RH
4386 fputs ("\t.hidden\t", file);
4387 assemble_name (file, name);
4388 fputc ('\n', file);
4389 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
4390 }
4391 else
4392 {
4393 text_section ();
4394 ASM_OUTPUT_LABEL (file, name);
4395 }
bd09bdeb
RH
4396
4397 xops[0] = gen_rtx_REG (SImode, regno);
4398 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4399 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4400 output_asm_insn ("ret", xops);
7c262518 4401 }
32b5b1aa 4402}
32b5b1aa 4403
c8c03509 4404/* Emit code for the SET_GOT patterns. */
32b5b1aa 4405
c8c03509
RH
4406const char *
4407output_set_got (dest)
4408 rtx dest;
4409{
4410 rtx xops[3];
0d7d98ee 4411
c8c03509 4412 xops[0] = dest;
5fc0e5df 4413 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
32b5b1aa 4414
c8c03509 4415 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
32b5b1aa 4416 {
c8c03509
RH
4417 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4418
4419 if (!flag_pic)
4420 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4421 else
4422 output_asm_insn ("call\t%a2", xops);
4423
b069de3b
SS
4424#if TARGET_MACHO
4425 /* Output the "canonical" label name ("Lxx$pb") here too. This
4426 is what will be referred to by the Mach-O PIC subsystem. */
4427 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4428#endif
4977bab6 4429 (*targetm.asm_out.internal_label) (asm_out_file, "L",
c8c03509
RH
4430 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4431
4432 if (flag_pic)
4433 output_asm_insn ("pop{l}\t%0", xops);
32b5b1aa 4434 }
e075ae69 4435 else
e5cb57e8 4436 {
145aacc2
RH
4437 char name[32];
4438 get_pc_thunk_name (name, REGNO (dest));
bd09bdeb 4439 pic_labels_used |= 1 << REGNO (dest);
f996902d 4440
145aacc2 4441 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
c8c03509
RH
4442 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4443 output_asm_insn ("call\t%X2", xops);
e5cb57e8 4444 }
e5cb57e8 4445
c8c03509
RH
4446 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4447 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
b069de3b 4448 else if (!TARGET_MACHO)
8e9fadc3 4449 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
79325812 4450
c8c03509 4451 return "";
e9a25f70 4452}
8dfe5673 4453
0d7d98ee 4454/* Generate an "push" pattern for input ARG. */
e9a25f70 4455
e075ae69
RH
4456static rtx
4457gen_push (arg)
4458 rtx arg;
e9a25f70 4459{
c5c76735 4460 return gen_rtx_SET (VOIDmode,
0d7d98ee
JH
4461 gen_rtx_MEM (Pmode,
4462 gen_rtx_PRE_DEC (Pmode,
c5c76735
JL
4463 stack_pointer_rtx)),
4464 arg);
e9a25f70
JL
4465}
4466
bd09bdeb
RH
4467/* Return >= 0 if there is an unused call-clobbered register available
4468 for the entire function. */
4469
4470static unsigned int
4471ix86_select_alt_pic_regnum ()
4472{
4473 if (current_function_is_leaf && !current_function_profile)
4474 {
4475 int i;
4476 for (i = 2; i >= 0; --i)
4477 if (!regs_ever_live[i])
4478 return i;
4479 }
4480
4481 return INVALID_REGNUM;
4482}
fce5a9f2 4483
4dd2ac2c
JH
4484/* Return 1 if we need to save REGNO. */
4485static int
1020a5ab 4486ix86_save_reg (regno, maybe_eh_return)
9b690711 4487 unsigned int regno;
37a58036 4488 int maybe_eh_return;
1020a5ab 4489{
bd09bdeb
RH
4490 if (pic_offset_table_rtx
4491 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4492 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
66edd3b4 4493 || current_function_profile
1020a5ab 4494 || current_function_calls_eh_return))
bd09bdeb
RH
4495 {
4496 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4497 return 0;
4498 return 1;
4499 }
1020a5ab
RH
4500
4501 if (current_function_calls_eh_return && maybe_eh_return)
4502 {
4503 unsigned i;
4504 for (i = 0; ; i++)
4505 {
b531087a 4506 unsigned test = EH_RETURN_DATA_REGNO (i);
1020a5ab
RH
4507 if (test == INVALID_REGNUM)
4508 break;
9b690711 4509 if (test == regno)
1020a5ab
RH
4510 return 1;
4511 }
4512 }
4dd2ac2c 4513
1020a5ab
RH
4514 return (regs_ever_live[regno]
4515 && !call_used_regs[regno]
4516 && !fixed_regs[regno]
4517 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4dd2ac2c
JH
4518}
4519
0903fcab
JH
4520/* Return number of registers to be saved on the stack. */
4521
4522static int
4523ix86_nsaved_regs ()
4524{
4525 int nregs = 0;
0903fcab
JH
4526 int regno;
4527
4dd2ac2c 4528 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 4529 if (ix86_save_reg (regno, true))
4dd2ac2c 4530 nregs++;
0903fcab
JH
4531 return nregs;
4532}
4533
4534/* Return the offset between two registers, one to be eliminated, and the other
4535 its replacement, at the start of a routine. */
4536
4537HOST_WIDE_INT
4538ix86_initial_elimination_offset (from, to)
4539 int from;
4540 int to;
4541{
4dd2ac2c
JH
4542 struct ix86_frame frame;
4543 ix86_compute_frame_layout (&frame);
564d80f4
JH
4544
4545 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 4546 return frame.hard_frame_pointer_offset;
564d80f4
JH
4547 else if (from == FRAME_POINTER_REGNUM
4548 && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 4549 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
4550 else
4551 {
564d80f4
JH
4552 if (to != STACK_POINTER_REGNUM)
4553 abort ();
4554 else if (from == ARG_POINTER_REGNUM)
4dd2ac2c 4555 return frame.stack_pointer_offset;
564d80f4
JH
4556 else if (from != FRAME_POINTER_REGNUM)
4557 abort ();
0903fcab 4558 else
4dd2ac2c 4559 return frame.stack_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
4560 }
4561}
4562
4dd2ac2c 4563/* Fill structure ix86_frame about frame of currently computed function. */
0f290768 4564
4dd2ac2c
JH
4565static void
4566ix86_compute_frame_layout (frame)
4567 struct ix86_frame *frame;
65954bd8 4568{
65954bd8 4569 HOST_WIDE_INT total_size;
564d80f4 4570 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
44affdae
JH
4571 int offset;
4572 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4dd2ac2c 4573 HOST_WIDE_INT size = get_frame_size ();
65954bd8 4574
4dd2ac2c 4575 frame->nregs = ix86_nsaved_regs ();
564d80f4 4576 total_size = size;
65954bd8 4577
9ba81eaa 4578 /* Skip return address and saved base pointer. */
4dd2ac2c
JH
4579 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4580
4581 frame->hard_frame_pointer_offset = offset;
564d80f4 4582
fcbfaa65
RK
4583 /* Do some sanity checking of stack_alignment_needed and
4584 preferred_alignment, since i386 port is the only using those features
f710504c 4585 that may break easily. */
564d80f4 4586
44affdae
JH
4587 if (size && !stack_alignment_needed)
4588 abort ();
44affdae
JH
4589 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4590 abort ();
4591 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4592 abort ();
4593 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4594 abort ();
564d80f4 4595
4dd2ac2c
JH
4596 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4597 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
564d80f4 4598
4dd2ac2c
JH
4599 /* Register save area */
4600 offset += frame->nregs * UNITS_PER_WORD;
65954bd8 4601
8362f420
JH
4602 /* Va-arg area */
4603 if (ix86_save_varrargs_registers)
4604 {
4605 offset += X86_64_VARARGS_SIZE;
4606 frame->va_arg_size = X86_64_VARARGS_SIZE;
4607 }
4608 else
4609 frame->va_arg_size = 0;
4610
4dd2ac2c
JH
4611 /* Align start of frame for local function. */
4612 frame->padding1 = ((offset + stack_alignment_needed - 1)
4613 & -stack_alignment_needed) - offset;
f73ad30e 4614
4dd2ac2c 4615 offset += frame->padding1;
65954bd8 4616
4dd2ac2c
JH
4617 /* Frame pointer points here. */
4618 frame->frame_pointer_offset = offset;
54ff41b7 4619
4dd2ac2c 4620 offset += size;
65954bd8 4621
0b7ae565
RH
4622 /* Add outgoing arguments area. Can be skipped if we eliminated
4623 all the function calls as dead code. */
4624 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4dd2ac2c
JH
4625 {
4626 offset += current_function_outgoing_args_size;
4627 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4628 }
4629 else
4630 frame->outgoing_arguments_size = 0;
564d80f4 4631
002ff5bc
RH
4632 /* Align stack boundary. Only needed if we're calling another function
4633 or using alloca. */
4634 if (!current_function_is_leaf || current_function_calls_alloca)
0b7ae565
RH
4635 frame->padding2 = ((offset + preferred_alignment - 1)
4636 & -preferred_alignment) - offset;
4637 else
4638 frame->padding2 = 0;
4dd2ac2c
JH
4639
4640 offset += frame->padding2;
4641
4642 /* We've reached end of stack frame. */
4643 frame->stack_pointer_offset = offset;
4644
4645 /* Size prologue needs to allocate. */
4646 frame->to_allocate =
4647 (size + frame->padding1 + frame->padding2
8362f420 4648 + frame->outgoing_arguments_size + frame->va_arg_size);
4dd2ac2c 4649
8362f420
JH
4650 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4651 && current_function_is_leaf)
4652 {
4653 frame->red_zone_size = frame->to_allocate;
4654 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4655 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4656 }
4657 else
4658 frame->red_zone_size = 0;
4659 frame->to_allocate -= frame->red_zone_size;
4660 frame->stack_pointer_offset -= frame->red_zone_size;
4dd2ac2c
JH
4661#if 0
4662 fprintf (stderr, "nregs: %i\n", frame->nregs);
4663 fprintf (stderr, "size: %i\n", size);
4664 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4665 fprintf (stderr, "padding1: %i\n", frame->padding1);
8362f420 4666 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4dd2ac2c
JH
4667 fprintf (stderr, "padding2: %i\n", frame->padding2);
4668 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
8362f420 4669 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4dd2ac2c
JH
4670 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4671 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4672 frame->hard_frame_pointer_offset);
4673 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4674#endif
65954bd8
JL
4675}
4676
0903fcab
JH
4677/* Emit code to save registers in the prologue. */
4678
4679static void
4680ix86_emit_save_regs ()
4681{
4682 register int regno;
0903fcab 4683 rtx insn;
0903fcab 4684
4dd2ac2c 4685 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 4686 if (ix86_save_reg (regno, true))
0903fcab 4687 {
0d7d98ee 4688 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
0903fcab
JH
4689 RTX_FRAME_RELATED_P (insn) = 1;
4690 }
4691}
4692
c6036a37
JH
4693/* Emit code to save registers using MOV insns. First register
4694 is restored from POINTER + OFFSET. */
4695static void
4696ix86_emit_save_regs_using_mov (pointer, offset)
b72f00af
RK
4697 rtx pointer;
4698 HOST_WIDE_INT offset;
c6036a37
JH
4699{
4700 int regno;
4701 rtx insn;
4702
4703 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4704 if (ix86_save_reg (regno, true))
4705 {
b72f00af
RK
4706 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4707 Pmode, offset),
c6036a37
JH
4708 gen_rtx_REG (Pmode, regno));
4709 RTX_FRAME_RELATED_P (insn) = 1;
4710 offset += UNITS_PER_WORD;
4711 }
4712}
4713
0f290768 4714/* Expand the prologue into a bunch of separate insns. */
e075ae69
RH
4715
4716void
4717ix86_expand_prologue ()
2a2ab3f9 4718{
564d80f4 4719 rtx insn;
bd09bdeb 4720 bool pic_reg_used;
4dd2ac2c 4721 struct ix86_frame frame;
6ab16dd9 4722 int use_mov = 0;
c6036a37 4723 HOST_WIDE_INT allocate;
4dd2ac2c 4724
4977bab6 4725 ix86_compute_frame_layout (&frame);
2ab0437e 4726 if (!optimize_size)
6ab16dd9 4727 {
4977bab6
ZW
4728 int count = frame.nregs;
4729
4730 /* The fast prologue uses move instead of push to save registers. This
4731 is significantly longer, but also executes faster as modern hardware
4732 can execute the moves in parallel, but can't do that for push/pop.
4733
4734 Be curefull about choosing what prologue to emit: When function takes
4735 many instructions to execute we may use slow version as well as in
4736 case function is known to be outside hot spot (this is known with
4737 feedback only). Weight the size of function by number of registers
4738 to save as it is cheap to use one or two push instructions but very
4739 slow to use many of them. */
4740 if (count)
4741 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4742 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4743 || (flag_branch_probabilities
4744 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4745 use_fast_prologue_epilogue = 0;
4746 else
4747 use_fast_prologue_epilogue = !expensive_function_p (count);
2ab0437e
JH
4748 if (TARGET_PROLOGUE_USING_MOVE)
4749 use_mov = use_fast_prologue_epilogue;
6ab16dd9 4750 }
79325812 4751
e075ae69
RH
4752 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4753 slower on all targets. Also sdb doesn't like it. */
e9a25f70 4754
2a2ab3f9
JVA
4755 if (frame_pointer_needed)
4756 {
564d80f4 4757 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
e075ae69 4758 RTX_FRAME_RELATED_P (insn) = 1;
e9a25f70 4759
564d80f4 4760 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
e075ae69 4761 RTX_FRAME_RELATED_P (insn) = 1;
2a2ab3f9
JVA
4762 }
4763
c6036a37
JH
4764 allocate = frame.to_allocate;
4765 /* In case we are dealing only with single register and empty frame,
4766 push is equivalent of the mov+add sequence. */
4767 if (allocate == 0 && frame.nregs <= 1)
4768 use_mov = 0;
4769
4770 if (!use_mov)
4771 ix86_emit_save_regs ();
4772 else
4773 allocate += frame.nregs * UNITS_PER_WORD;
564d80f4 4774
c6036a37 4775 if (allocate == 0)
8dfe5673 4776 ;
e323735c 4777 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
469ac993 4778 {
f2042df3
RH
4779 insn = emit_insn (gen_pro_epilogue_adjust_stack
4780 (stack_pointer_rtx, stack_pointer_rtx,
e323735c 4781 GEN_INT (-allocate)));
e075ae69 4782 RTX_FRAME_RELATED_P (insn) = 1;
469ac993 4783 }
79325812 4784 else
8dfe5673 4785 {
e075ae69 4786 /* ??? Is this only valid for Win32? */
e9a25f70 4787
e075ae69 4788 rtx arg0, sym;
e9a25f70 4789
8362f420 4790 if (TARGET_64BIT)
b531087a 4791 abort ();
8362f420 4792
e075ae69 4793 arg0 = gen_rtx_REG (SImode, 0);
c6036a37 4794 emit_move_insn (arg0, GEN_INT (allocate));
77a989d1 4795
e075ae69
RH
4796 sym = gen_rtx_MEM (FUNCTION_MODE,
4797 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
32ee7d1d 4798 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
e075ae69
RH
4799
4800 CALL_INSN_FUNCTION_USAGE (insn)
276ab4a4
RH
4801 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4802 CALL_INSN_FUNCTION_USAGE (insn));
e075ae69 4803 }
c6036a37
JH
4804 if (use_mov)
4805 {
4806 if (!frame_pointer_needed || !frame.to_allocate)
4807 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4808 else
4809 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4810 -frame.nregs * UNITS_PER_WORD);
4811 }
e9a25f70 4812
84530511
SC
4813#ifdef SUBTARGET_PROLOGUE
4814 SUBTARGET_PROLOGUE;
0f290768 4815#endif
84530511 4816
bd09bdeb
RH
4817 pic_reg_used = false;
4818 if (pic_offset_table_rtx
4819 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4820 || current_function_profile))
4821 {
4822 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4823
4824 if (alt_pic_reg_used != INVALID_REGNUM)
4825 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4826
4827 pic_reg_used = true;
4828 }
4829
e9a25f70 4830 if (pic_reg_used)
c8c03509
RH
4831 {
4832 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4833
66edd3b4
RH
4834 /* Even with accurate pre-reload life analysis, we can wind up
4835 deleting all references to the pic register after reload.
4836 Consider if cross-jumping unifies two sides of a branch
4837 controled by a comparison vs the only read from a global.
4838 In which case, allow the set_got to be deleted, though we're
4839 too late to do anything about the ebx save in the prologue. */
c8c03509
RH
4840 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4841 }
77a989d1 4842
66edd3b4
RH
4843 /* Prevent function calls from be scheduled before the call to mcount.
4844 In the pic_reg_used case, make sure that the got load isn't deleted. */
4845 if (current_function_profile)
4846 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
77a989d1
SC
4847}
4848
da2d1d3a
JH
4849/* Emit code to restore saved registers using MOV insns. First register
4850 is restored from POINTER + OFFSET. */
4851static void
1020a5ab
RH
4852ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4853 rtx pointer;
4854 int offset;
37a58036 4855 int maybe_eh_return;
da2d1d3a
JH
4856{
4857 int regno;
da2d1d3a 4858
4dd2ac2c 4859 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 4860 if (ix86_save_reg (regno, maybe_eh_return))
da2d1d3a 4861 {
4dd2ac2c 4862 emit_move_insn (gen_rtx_REG (Pmode, regno),
b72f00af
RK
4863 adjust_address (gen_rtx_MEM (Pmode, pointer),
4864 Pmode, offset));
4dd2ac2c 4865 offset += UNITS_PER_WORD;
da2d1d3a
JH
4866 }
4867}
4868
0f290768 4869/* Restore function stack, frame, and registers. */
e9a25f70 4870
2a2ab3f9 4871void
1020a5ab
RH
4872ix86_expand_epilogue (style)
4873 int style;
2a2ab3f9 4874{
1c71e60e 4875 int regno;
fdb8a883 4876 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4dd2ac2c 4877 struct ix86_frame frame;
65954bd8 4878 HOST_WIDE_INT offset;
4dd2ac2c
JH
4879
4880 ix86_compute_frame_layout (&frame);
2a2ab3f9 4881
a4f31c00 4882 /* Calculate start of saved registers relative to ebp. Special care
84e306b4
RH
4883 must be taken for the normal return case of a function using
4884 eh_return: the eax and edx registers are marked as saved, but not
4885 restored along this path. */
4886 offset = frame.nregs;
4887 if (current_function_calls_eh_return && style != 2)
4888 offset -= 2;
4889 offset *= -UNITS_PER_WORD;
2a2ab3f9 4890
fdb8a883
JW
4891 /* If we're only restoring one register and sp is not valid then
4892 using a move instruction to restore the register since it's
0f290768 4893 less work than reloading sp and popping the register.
da2d1d3a
JH
4894
4895 The default code result in stack adjustment using add/lea instruction,
4896 while this code results in LEAVE instruction (or discrete equivalent),
4897 so it is profitable in some other cases as well. Especially when there
4898 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4899 and there is exactly one register to pop. This heruistic may need some
4900 tuning in future. */
4dd2ac2c 4901 if ((!sp_valid && frame.nregs <= 1)
2ab0437e 4902 || (TARGET_EPILOGUE_USING_MOVE
6ab16dd9 4903 && use_fast_prologue_epilogue
c6036a37 4904 && (frame.nregs > 1 || frame.to_allocate))
4dd2ac2c 4905 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
2ab0437e 4906 || (frame_pointer_needed && TARGET_USE_LEAVE
6ab16dd9 4907 && use_fast_prologue_epilogue && frame.nregs == 1)
2ab0437e 4908 || current_function_calls_eh_return)
2a2ab3f9 4909 {
da2d1d3a
JH
4910 /* Restore registers. We can use ebp or esp to address the memory
4911 locations. If both are available, default to ebp, since offsets
4912 are known to be small. Only exception is esp pointing directly to the
4913 end of block of saved registers, where we may simplify addressing
4914 mode. */
4915
4dd2ac2c 4916 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
1020a5ab
RH
4917 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4918 frame.to_allocate, style == 2);
da2d1d3a 4919 else
1020a5ab
RH
4920 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4921 offset, style == 2);
4922
4923 /* eh_return epilogues need %ecx added to the stack pointer. */
4924 if (style == 2)
4925 {
4926 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
2a2ab3f9 4927
1020a5ab
RH
4928 if (frame_pointer_needed)
4929 {
4930 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4931 tmp = plus_constant (tmp, UNITS_PER_WORD);
4932 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4933
4934 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4935 emit_move_insn (hard_frame_pointer_rtx, tmp);
4936
4937 emit_insn (gen_pro_epilogue_adjust_stack
f2042df3 4938 (stack_pointer_rtx, sa, const0_rtx));
1020a5ab
RH
4939 }
4940 else
4941 {
4942 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4943 tmp = plus_constant (tmp, (frame.to_allocate
4944 + frame.nregs * UNITS_PER_WORD));
4945 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4946 }
4947 }
4948 else if (!frame_pointer_needed)
f2042df3
RH
4949 emit_insn (gen_pro_epilogue_adjust_stack
4950 (stack_pointer_rtx, stack_pointer_rtx,
4951 GEN_INT (frame.to_allocate
4952 + frame.nregs * UNITS_PER_WORD)));
0f290768 4953 /* If not an i386, mov & pop is faster than "leave". */
6ab16dd9 4954 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
8362f420 4955 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
c8c5cb99 4956 else
2a2ab3f9 4957 {
1c71e60e
JH
4958 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4959 hard_frame_pointer_rtx,
f2042df3 4960 const0_rtx));
8362f420
JH
4961 if (TARGET_64BIT)
4962 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4963 else
4964 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
e9a25f70
JL
4965 }
4966 }
1c71e60e 4967 else
68f654ec 4968 {
1c71e60e
JH
4969 /* First step is to deallocate the stack frame so that we can
4970 pop the registers. */
4971 if (!sp_valid)
4972 {
4973 if (!frame_pointer_needed)
4974 abort ();
4975 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4976 hard_frame_pointer_rtx,
f2042df3 4977 GEN_INT (offset)));
1c71e60e 4978 }
4dd2ac2c 4979 else if (frame.to_allocate)
f2042df3
RH
4980 emit_insn (gen_pro_epilogue_adjust_stack
4981 (stack_pointer_rtx, stack_pointer_rtx,
4982 GEN_INT (frame.to_allocate)));
1c71e60e 4983
4dd2ac2c 4984 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 4985 if (ix86_save_reg (regno, false))
8362f420
JH
4986 {
4987 if (TARGET_64BIT)
4988 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4989 else
4990 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4991 }
4dd2ac2c 4992 if (frame_pointer_needed)
8362f420 4993 {
f5143c46 4994 /* Leave results in shorter dependency chains on CPUs that are
2ab0437e
JH
4995 able to grok it fast. */
4996 if (TARGET_USE_LEAVE)
4997 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4998 else if (TARGET_64BIT)
8362f420
JH
4999 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5000 else
5001 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5002 }
68f654ec 5003 }
68f654ec 5004
cbbf65e0 5005 /* Sibcall epilogues don't want a return instruction. */
1020a5ab 5006 if (style == 0)
cbbf65e0
RH
5007 return;
5008
2a2ab3f9
JVA
5009 if (current_function_pops_args && current_function_args_size)
5010 {
e075ae69 5011 rtx popc = GEN_INT (current_function_pops_args);
2a2ab3f9 5012
b8c752c8
UD
5013 /* i386 can only pop 64K bytes. If asked to pop more, pop
5014 return address, do explicit add, and jump indirectly to the
0f290768 5015 caller. */
2a2ab3f9 5016
b8c752c8 5017 if (current_function_pops_args >= 65536)
2a2ab3f9 5018 {
e075ae69 5019 rtx ecx = gen_rtx_REG (SImode, 2);
e9a25f70 5020
8362f420
JH
5021 /* There are is no "pascal" calling convention in 64bit ABI. */
5022 if (TARGET_64BIT)
b531087a 5023 abort ();
8362f420 5024
e075ae69
RH
5025 emit_insn (gen_popsi1 (ecx));
5026 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
11837777 5027 emit_jump_insn (gen_return_indirect_internal (ecx));
e9a25f70 5028 }
79325812 5029 else
e075ae69
RH
5030 emit_jump_insn (gen_return_pop_internal (popc));
5031 }
5032 else
5033 emit_jump_insn (gen_return_internal ());
5034}
bd09bdeb
RH
5035
5036/* Reset from the function's potential modifications. */
5037
5038static void
5039ix86_output_function_epilogue (file, size)
5040 FILE *file ATTRIBUTE_UNUSED;
5041 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
5042{
5043 if (pic_offset_table_rtx)
5044 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5045}
e075ae69
RH
5046\f
5047/* Extract the parts of an RTL expression that is a valid memory address
b446e5a2
JH
5048 for an instruction. Return 0 if the structure of the address is
5049 grossly off. Return -1 if the address contains ASHIFT, so it is not
5050 strictly valid, but still used for computing length of lea instruction.
5051 */
e075ae69
RH
5052
5053static int
5054ix86_decompose_address (addr, out)
5055 register rtx addr;
5056 struct ix86_address *out;
5057{
5058 rtx base = NULL_RTX;
5059 rtx index = NULL_RTX;
5060 rtx disp = NULL_RTX;
5061 HOST_WIDE_INT scale = 1;
5062 rtx scale_rtx = NULL_RTX;
b446e5a2 5063 int retval = 1;
e075ae69 5064
1540f9eb 5065 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
e075ae69
RH
5066 base = addr;
5067 else if (GET_CODE (addr) == PLUS)
5068 {
5069 rtx op0 = XEXP (addr, 0);
5070 rtx op1 = XEXP (addr, 1);
5071 enum rtx_code code0 = GET_CODE (op0);
5072 enum rtx_code code1 = GET_CODE (op1);
5073
5074 if (code0 == REG || code0 == SUBREG)
5075 {
5076 if (code1 == REG || code1 == SUBREG)
5077 index = op0, base = op1; /* index + base */
5078 else
5079 base = op0, disp = op1; /* base + displacement */
5080 }
5081 else if (code0 == MULT)
e9a25f70 5082 {
e075ae69
RH
5083 index = XEXP (op0, 0);
5084 scale_rtx = XEXP (op0, 1);
5085 if (code1 == REG || code1 == SUBREG)
5086 base = op1; /* index*scale + base */
e9a25f70 5087 else
e075ae69
RH
5088 disp = op1; /* index*scale + disp */
5089 }
5090 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
5091 {
5092 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
5093 scale_rtx = XEXP (XEXP (op0, 0), 1);
5094 base = XEXP (op0, 1);
5095 disp = op1;
2a2ab3f9 5096 }
e075ae69
RH
5097 else if (code0 == PLUS)
5098 {
5099 index = XEXP (op0, 0); /* index + base + disp */
5100 base = XEXP (op0, 1);
5101 disp = op1;
5102 }
5103 else
b446e5a2 5104 return 0;
e075ae69
RH
5105 }
5106 else if (GET_CODE (addr) == MULT)
5107 {
5108 index = XEXP (addr, 0); /* index*scale */
5109 scale_rtx = XEXP (addr, 1);
5110 }
5111 else if (GET_CODE (addr) == ASHIFT)
5112 {
5113 rtx tmp;
5114
5115 /* We're called for lea too, which implements ashift on occasion. */
5116 index = XEXP (addr, 0);
5117 tmp = XEXP (addr, 1);
5118 if (GET_CODE (tmp) != CONST_INT)
b446e5a2 5119 return 0;
e075ae69
RH
5120 scale = INTVAL (tmp);
5121 if ((unsigned HOST_WIDE_INT) scale > 3)
b446e5a2 5122 return 0;
e075ae69 5123 scale = 1 << scale;
b446e5a2 5124 retval = -1;
2a2ab3f9 5125 }
2a2ab3f9 5126 else
e075ae69
RH
5127 disp = addr; /* displacement */
5128
5129 /* Extract the integral value of scale. */
5130 if (scale_rtx)
e9a25f70 5131 {
e075ae69 5132 if (GET_CODE (scale_rtx) != CONST_INT)
b446e5a2 5133 return 0;
e075ae69 5134 scale = INTVAL (scale_rtx);
e9a25f70 5135 }
3b3c6a3f 5136
e075ae69
RH
5137 /* Allow arg pointer and stack pointer as index if there is not scaling */
5138 if (base && index && scale == 1
564d80f4
JH
5139 && (index == arg_pointer_rtx || index == frame_pointer_rtx
5140 || index == stack_pointer_rtx))
e075ae69
RH
5141 {
5142 rtx tmp = base;
5143 base = index;
5144 index = tmp;
5145 }
5146
5147 /* Special case: %ebp cannot be encoded as a base without a displacement. */
564d80f4
JH
5148 if ((base == hard_frame_pointer_rtx
5149 || base == frame_pointer_rtx
5150 || base == arg_pointer_rtx) && !disp)
e075ae69
RH
5151 disp = const0_rtx;
5152
5153 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5154 Avoid this by transforming to [%esi+0]. */
5155 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
5156 && base && !index && !disp
329e1d01 5157 && REG_P (base)
e075ae69
RH
5158 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5159 disp = const0_rtx;
5160
5161 /* Special case: encode reg+reg instead of reg*2. */
5162 if (!base && index && scale && scale == 2)
5163 base = index, scale = 1;
0f290768 5164
e075ae69
RH
5165 /* Special case: scaling cannot be encoded without base or displacement. */
5166 if (!base && !disp && index && scale != 1)
5167 disp = const0_rtx;
5168
5169 out->base = base;
5170 out->index = index;
5171 out->disp = disp;
5172 out->scale = scale;
3b3c6a3f 5173
b446e5a2 5174 return retval;
e075ae69 5175}
01329426
JH
5176\f
5177/* Return cost of the memory address x.
5178 For i386, it is better to use a complex address than let gcc copy
5179 the address into a reg and make a new pseudo. But not if the address
5180 requires to two regs - that would mean more pseudos with longer
5181 lifetimes. */
5182int
5183ix86_address_cost (x)
5184 rtx x;
5185{
5186 struct ix86_address parts;
5187 int cost = 1;
3b3c6a3f 5188
01329426
JH
5189 if (!ix86_decompose_address (x, &parts))
5190 abort ();
5191
1540f9eb
JH
5192 if (parts.base && GET_CODE (parts.base) == SUBREG)
5193 parts.base = SUBREG_REG (parts.base);
5194 if (parts.index && GET_CODE (parts.index) == SUBREG)
5195 parts.index = SUBREG_REG (parts.index);
5196
01329426
JH
5197 /* More complex memory references are better. */
5198 if (parts.disp && parts.disp != const0_rtx)
5199 cost--;
5200
5201 /* Attempt to minimize number of registers in the address. */
5202 if ((parts.base
5203 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5204 || (parts.index
5205 && (!REG_P (parts.index)
5206 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5207 cost++;
5208
5209 if (parts.base
5210 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5211 && parts.index
5212 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5213 && parts.base != parts.index)
5214 cost++;
5215
5216 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5217 since it's predecode logic can't detect the length of instructions
5218 and it degenerates to vector decoded. Increase cost of such
5219 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
0f290768 5220 to split such addresses or even refuse such addresses at all.
01329426
JH
5221
5222 Following addressing modes are affected:
5223 [base+scale*index]
5224 [scale*index+disp]
5225 [base+index]
0f290768 5226
01329426
JH
5227 The first and last case may be avoidable by explicitly coding the zero in
5228 memory address, but I don't have AMD-K6 machine handy to check this
5229 theory. */
5230
5231 if (TARGET_K6
5232 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5233 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5234 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5235 cost += 10;
0f290768 5236
01329426
JH
5237 return cost;
5238}
5239\f
b949ea8b
JW
5240/* If X is a machine specific address (i.e. a symbol or label being
5241 referenced as a displacement from the GOT implemented using an
5242 UNSPEC), then return the base term. Otherwise return X. */
5243
5244rtx
5245ix86_find_base_term (x)
5246 rtx x;
5247{
5248 rtx term;
5249
6eb791fc
JH
5250 if (TARGET_64BIT)
5251 {
5252 if (GET_CODE (x) != CONST)
5253 return x;
5254 term = XEXP (x, 0);
5255 if (GET_CODE (term) == PLUS
5256 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5257 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5258 term = XEXP (term, 0);
5259 if (GET_CODE (term) != UNSPEC
8ee41eaf 5260 || XINT (term, 1) != UNSPEC_GOTPCREL)
6eb791fc
JH
5261 return x;
5262
5263 term = XVECEXP (term, 0, 0);
5264
5265 if (GET_CODE (term) != SYMBOL_REF
5266 && GET_CODE (term) != LABEL_REF)
5267 return x;
5268
5269 return term;
5270 }
5271
b949ea8b
JW
5272 if (GET_CODE (x) != PLUS
5273 || XEXP (x, 0) != pic_offset_table_rtx
5274 || GET_CODE (XEXP (x, 1)) != CONST)
5275 return x;
5276
5277 term = XEXP (XEXP (x, 1), 0);
5278
5279 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
5280 term = XEXP (term, 0);
5281
5282 if (GET_CODE (term) != UNSPEC
8ee41eaf 5283 || XINT (term, 1) != UNSPEC_GOTOFF)
b949ea8b
JW
5284 return x;
5285
5286 term = XVECEXP (term, 0, 0);
5287
5288 if (GET_CODE (term) != SYMBOL_REF
5289 && GET_CODE (term) != LABEL_REF)
5290 return x;
5291
5292 return term;
5293}
5294\f
f996902d
RH
5295/* Determine if a given RTX is a valid constant. We already know this
5296 satisfies CONSTANT_P. */
5297
5298bool
5299legitimate_constant_p (x)
5300 rtx x;
5301{
5302 rtx inner;
5303
5304 switch (GET_CODE (x))
5305 {
5306 case SYMBOL_REF:
5307 /* TLS symbols are not constant. */
5308 if (tls_symbolic_operand (x, Pmode))
5309 return false;
5310 break;
5311
5312 case CONST:
5313 inner = XEXP (x, 0);
5314
5315 /* Offsets of TLS symbols are never valid.
5316 Discourage CSE from creating them. */
5317 if (GET_CODE (inner) == PLUS
5318 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5319 return false;
5320
5321 /* Only some unspecs are valid as "constants". */
5322 if (GET_CODE (inner) == UNSPEC)
5323 switch (XINT (inner, 1))
5324 {
5325 case UNSPEC_TPOFF:
5326 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
f996902d
RH
5327 default:
5328 return false;
5329 }
5330 break;
5331
5332 default:
5333 break;
5334 }
5335
5336 /* Otherwise we handle everything else in the move patterns. */
5337 return true;
5338}
5339
3a04ff64
RH
5340/* Determine if it's legal to put X into the constant pool. This
5341 is not possible for the address of thread-local symbols, which
5342 is checked above. */
5343
5344static bool
5345ix86_cannot_force_const_mem (x)
5346 rtx x;
5347{
5348 return !legitimate_constant_p (x);
5349}
5350
f996902d
RH
5351/* Determine if a given RTX is a valid constant address. */
5352
5353bool
5354constant_address_p (x)
5355 rtx x;
5356{
5357 switch (GET_CODE (x))
5358 {
5359 case LABEL_REF:
5360 case CONST_INT:
5361 return true;
5362
5363 case CONST_DOUBLE:
5364 return TARGET_64BIT;
5365
5366 case CONST:
b069de3b
SS
5367 /* For Mach-O, really believe the CONST. */
5368 if (TARGET_MACHO)
5369 return true;
5370 /* Otherwise fall through. */
f996902d
RH
5371 case SYMBOL_REF:
5372 return !flag_pic && legitimate_constant_p (x);
5373
5374 default:
5375 return false;
5376 }
5377}
5378
5379/* Nonzero if the constant value X is a legitimate general operand
fce5a9f2 5380 when generating PIC code. It is given that flag_pic is on and
f996902d
RH
5381 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5382
5383bool
5384legitimate_pic_operand_p (x)
5385 rtx x;
5386{
5387 rtx inner;
5388
5389 switch (GET_CODE (x))
5390 {
5391 case CONST:
5392 inner = XEXP (x, 0);
5393
5394 /* Only some unspecs are valid as "constants". */
5395 if (GET_CODE (inner) == UNSPEC)
5396 switch (XINT (inner, 1))
5397 {
5398 case UNSPEC_TPOFF:
5399 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
f996902d
RH
5400 default:
5401 return false;
5402 }
5403 /* FALLTHRU */
5404
5405 case SYMBOL_REF:
5406 case LABEL_REF:
5407 return legitimate_pic_address_disp_p (x);
5408
5409 default:
5410 return true;
5411 }
5412}
5413
e075ae69
RH
5414/* Determine if a given CONST RTX is a valid memory displacement
5415 in PIC mode. */
0f290768 5416
59be65f6 5417int
91bb873f
RH
5418legitimate_pic_address_disp_p (disp)
5419 register rtx disp;
5420{
f996902d
RH
5421 bool saw_plus;
5422
6eb791fc
JH
5423 /* In 64bit mode we can allow direct addresses of symbols and labels
5424 when they are not dynamic symbols. */
c05dbe81
JH
5425 if (TARGET_64BIT)
5426 {
5427 /* TLS references should always be enclosed in UNSPEC. */
5428 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5429 return 0;
5430 if (GET_CODE (disp) == SYMBOL_REF
5431 && ix86_cmodel == CM_SMALL_PIC
5432 && (CONSTANT_POOL_ADDRESS_P (disp)
5433 || SYMBOL_REF_FLAG (disp)))
5434 return 1;
5435 if (GET_CODE (disp) == LABEL_REF)
5436 return 1;
5437 if (GET_CODE (disp) == CONST
5438 && GET_CODE (XEXP (disp, 0)) == PLUS
5439 && ((GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
5440 && ix86_cmodel == CM_SMALL_PIC
5441 && (CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (disp, 0), 0))
5442 || SYMBOL_REF_FLAG (XEXP (XEXP (disp, 0), 0))))
5443 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
5444 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT
5445 && INTVAL (XEXP (XEXP (disp, 0), 1)) < 16*1024*1024
5446 && INTVAL (XEXP (XEXP (disp, 0), 1)) >= -16*1024*1024)
5447 return 1;
5448 }
91bb873f
RH
5449 if (GET_CODE (disp) != CONST)
5450 return 0;
5451 disp = XEXP (disp, 0);
5452
6eb791fc
JH
5453 if (TARGET_64BIT)
5454 {
5455 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5456 of GOT tables. We should not need these anyway. */
5457 if (GET_CODE (disp) != UNSPEC
8ee41eaf 5458 || XINT (disp, 1) != UNSPEC_GOTPCREL)
6eb791fc
JH
5459 return 0;
5460
5461 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5462 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5463 return 0;
5464 return 1;
5465 }
5466
f996902d 5467 saw_plus = false;
91bb873f
RH
5468 if (GET_CODE (disp) == PLUS)
5469 {
5470 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5471 return 0;
5472 disp = XEXP (disp, 0);
f996902d 5473 saw_plus = true;
91bb873f
RH
5474 }
5475
b069de3b
SS
5476 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5477 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5478 {
5479 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5480 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5481 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5482 {
5483 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5484 if (strstr (sym_name, "$pb") != 0)
5485 return 1;
5486 }
5487 }
5488
8ee41eaf 5489 if (GET_CODE (disp) != UNSPEC)
91bb873f
RH
5490 return 0;
5491
623fe810
RH
5492 switch (XINT (disp, 1))
5493 {
8ee41eaf 5494 case UNSPEC_GOT:
f996902d
RH
5495 if (saw_plus)
5496 return false;
623fe810 5497 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
8ee41eaf 5498 case UNSPEC_GOTOFF:
623fe810 5499 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
f996902d 5500 case UNSPEC_GOTTPOFF:
dea73790
JJ
5501 case UNSPEC_GOTNTPOFF:
5502 case UNSPEC_INDNTPOFF:
f996902d
RH
5503 if (saw_plus)
5504 return false;
5505 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5506 case UNSPEC_NTPOFF:
f996902d
RH
5507 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5508 case UNSPEC_DTPOFF:
f996902d 5509 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
623fe810 5510 }
fce5a9f2 5511
623fe810 5512 return 0;
91bb873f
RH
5513}
5514
e075ae69
RH
5515/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5516 memory address for an instruction. The MODE argument is the machine mode
5517 for the MEM expression that wants to use this address.
5518
5519 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5520 convert common non-canonical forms to canonical form so that they will
5521 be recognized. */
5522
3b3c6a3f
MM
5523int
5524legitimate_address_p (mode, addr, strict)
5525 enum machine_mode mode;
5526 register rtx addr;
5527 int strict;
5528{
e075ae69
RH
5529 struct ix86_address parts;
5530 rtx base, index, disp;
5531 HOST_WIDE_INT scale;
5532 const char *reason = NULL;
5533 rtx reason_rtx = NULL_RTX;
3b3c6a3f
MM
5534
5535 if (TARGET_DEBUG_ADDR)
5536 {
5537 fprintf (stderr,
e9a25f70 5538 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
3b3c6a3f 5539 GET_MODE_NAME (mode), strict);
3b3c6a3f
MM
5540 debug_rtx (addr);
5541 }
5542
9e20be0c
JJ
5543 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
5544 {
5545 if (TARGET_DEBUG_ADDR)
5546 fprintf (stderr, "Success.\n");
5547 return TRUE;
5548 }
5549
b446e5a2 5550 if (ix86_decompose_address (addr, &parts) <= 0)
3b3c6a3f 5551 {
e075ae69 5552 reason = "decomposition failed";
50e60bc3 5553 goto report_error;
3b3c6a3f
MM
5554 }
5555
e075ae69
RH
5556 base = parts.base;
5557 index = parts.index;
5558 disp = parts.disp;
5559 scale = parts.scale;
91f0226f 5560
e075ae69 5561 /* Validate base register.
e9a25f70
JL
5562
5563 Don't allow SUBREG's here, it can lead to spill failures when the base
3d771dfd
MM
5564 is one word out of a two word structure, which is represented internally
5565 as a DImode int. */
e9a25f70 5566
3b3c6a3f
MM
5567 if (base)
5568 {
1540f9eb 5569 rtx reg;
e075ae69
RH
5570 reason_rtx = base;
5571
1540f9eb
JH
5572 if (GET_CODE (base) == SUBREG)
5573 reg = SUBREG_REG (base);
5574 else
5575 reg = base;
5576
5577 if (GET_CODE (reg) != REG)
3b3c6a3f 5578 {
e075ae69 5579 reason = "base is not a register";
50e60bc3 5580 goto report_error;
3b3c6a3f
MM
5581 }
5582
c954bd01
RH
5583 if (GET_MODE (base) != Pmode)
5584 {
e075ae69 5585 reason = "base is not in Pmode";
50e60bc3 5586 goto report_error;
c954bd01
RH
5587 }
5588
1540f9eb
JH
5589 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5590 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
3b3c6a3f 5591 {
e075ae69 5592 reason = "base is not valid";
50e60bc3 5593 goto report_error;
3b3c6a3f
MM
5594 }
5595 }
5596
e075ae69 5597 /* Validate index register.
e9a25f70
JL
5598
5599 Don't allow SUBREG's here, it can lead to spill failures when the index
3d771dfd
MM
5600 is one word out of a two word structure, which is represented internally
5601 as a DImode int. */
e075ae69
RH
5602
5603 if (index)
3b3c6a3f 5604 {
1540f9eb 5605 rtx reg;
e075ae69
RH
5606 reason_rtx = index;
5607
1540f9eb
JH
5608 if (GET_CODE (index) == SUBREG)
5609 reg = SUBREG_REG (index);
5610 else
5611 reg = index;
5612
5613 if (GET_CODE (reg) != REG)
3b3c6a3f 5614 {
e075ae69 5615 reason = "index is not a register";
50e60bc3 5616 goto report_error;
3b3c6a3f
MM
5617 }
5618
e075ae69 5619 if (GET_MODE (index) != Pmode)
c954bd01 5620 {
e075ae69 5621 reason = "index is not in Pmode";
50e60bc3 5622 goto report_error;
c954bd01
RH
5623 }
5624
1540f9eb
JH
5625 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5626 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
3b3c6a3f 5627 {
e075ae69 5628 reason = "index is not valid";
50e60bc3 5629 goto report_error;
3b3c6a3f
MM
5630 }
5631 }
3b3c6a3f 5632
e075ae69
RH
5633 /* Validate scale factor. */
5634 if (scale != 1)
3b3c6a3f 5635 {
e075ae69
RH
5636 reason_rtx = GEN_INT (scale);
5637 if (!index)
3b3c6a3f 5638 {
e075ae69 5639 reason = "scale without index";
50e60bc3 5640 goto report_error;
3b3c6a3f
MM
5641 }
5642
e075ae69 5643 if (scale != 2 && scale != 4 && scale != 8)
3b3c6a3f 5644 {
e075ae69 5645 reason = "scale is not a valid multiplier";
50e60bc3 5646 goto report_error;
3b3c6a3f
MM
5647 }
5648 }
5649
91bb873f 5650 /* Validate displacement. */
3b3c6a3f
MM
5651 if (disp)
5652 {
e075ae69
RH
5653 reason_rtx = disp;
5654
f996902d
RH
5655 if (GET_CODE (disp) == CONST
5656 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5657 switch (XINT (XEXP (disp, 0), 1))
5658 {
5659 case UNSPEC_GOT:
5660 case UNSPEC_GOTOFF:
5661 case UNSPEC_GOTPCREL:
5662 if (!flag_pic)
5663 abort ();
5664 goto is_legitimate_pic;
5665
5666 case UNSPEC_GOTTPOFF:
dea73790
JJ
5667 case UNSPEC_GOTNTPOFF:
5668 case UNSPEC_INDNTPOFF:
f996902d
RH
5669 case UNSPEC_NTPOFF:
5670 case UNSPEC_DTPOFF:
5671 break;
5672
5673 default:
5674 reason = "invalid address unspec";
5675 goto report_error;
5676 }
5677
b069de3b
SS
5678 else if (flag_pic && (SYMBOLIC_CONST (disp)
5679#if TARGET_MACHO
5680 && !machopic_operand_p (disp)
5681#endif
5682 ))
3b3c6a3f 5683 {
f996902d 5684 is_legitimate_pic:
0d7d98ee
JH
5685 if (TARGET_64BIT && (index || base))
5686 {
75d38379
JJ
5687 /* foo@dtpoff(%rX) is ok. */
5688 if (GET_CODE (disp) != CONST
5689 || GET_CODE (XEXP (disp, 0)) != PLUS
5690 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5691 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5692 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5693 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5694 {
5695 reason = "non-constant pic memory reference";
5696 goto report_error;
5697 }
0d7d98ee 5698 }
75d38379 5699 else if (! legitimate_pic_address_disp_p (disp))
91bb873f 5700 {
e075ae69 5701 reason = "displacement is an invalid pic construct";
50e60bc3 5702 goto report_error;
91bb873f
RH
5703 }
5704
4e9efe54 5705 /* This code used to verify that a symbolic pic displacement
0f290768
KH
5706 includes the pic_offset_table_rtx register.
5707
4e9efe54
JH
5708 While this is good idea, unfortunately these constructs may
5709 be created by "adds using lea" optimization for incorrect
5710 code like:
5711
5712 int a;
5713 int foo(int i)
5714 {
5715 return *(&a+i);
5716 }
5717
50e60bc3 5718 This code is nonsensical, but results in addressing
4e9efe54 5719 GOT table with pic_offset_table_rtx base. We can't
f710504c 5720 just refuse it easily, since it gets matched by
4e9efe54
JH
5721 "addsi3" pattern, that later gets split to lea in the
5722 case output register differs from input. While this
5723 can be handled by separate addsi pattern for this case
5724 that never results in lea, this seems to be easier and
5725 correct fix for crash to disable this test. */
3b3c6a3f 5726 }
f996902d
RH
5727 else if (!CONSTANT_ADDRESS_P (disp))
5728 {
5729 reason = "displacement is not constant";
5730 goto report_error;
5731 }
c05dbe81
JH
5732 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
5733 {
5734 reason = "displacement is out of range";
5735 goto report_error;
5736 }
5737 else if (!TARGET_64BIT && GET_CODE (disp) == CONST_DOUBLE)
5738 {
5739 reason = "displacement is a const_double";
5740 goto report_error;
5741 }
3b3c6a3f
MM
5742 }
5743
e075ae69 5744 /* Everything looks valid. */
3b3c6a3f 5745 if (TARGET_DEBUG_ADDR)
e075ae69 5746 fprintf (stderr, "Success.\n");
3b3c6a3f 5747 return TRUE;
e075ae69 5748
5bf0ebab 5749 report_error:
e075ae69
RH
5750 if (TARGET_DEBUG_ADDR)
5751 {
5752 fprintf (stderr, "Error: %s\n", reason);
5753 debug_rtx (reason_rtx);
5754 }
5755 return FALSE;
3b3c6a3f 5756}
3b3c6a3f 5757\f
55efb413
JW
5758/* Return an unique alias set for the GOT. */
5759
0f290768 5760static HOST_WIDE_INT
55efb413
JW
5761ix86_GOT_alias_set ()
5762{
5bf0ebab
RH
5763 static HOST_WIDE_INT set = -1;
5764 if (set == -1)
5765 set = new_alias_set ();
5766 return set;
0f290768 5767}
55efb413 5768
3b3c6a3f
MM
5769/* Return a legitimate reference for ORIG (an address) using the
5770 register REG. If REG is 0, a new pseudo is generated.
5771
91bb873f 5772 There are two types of references that must be handled:
3b3c6a3f
MM
5773
5774 1. Global data references must load the address from the GOT, via
5775 the PIC reg. An insn is emitted to do this load, and the reg is
5776 returned.
5777
91bb873f
RH
5778 2. Static data references, constant pool addresses, and code labels
5779 compute the address as an offset from the GOT, whose base is in
5780 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5781 differentiate them from global data objects. The returned
5782 address is the PIC reg + an unspec constant.
3b3c6a3f
MM
5783
5784 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
91bb873f 5785 reg also appears in the address. */
3b3c6a3f
MM
5786
5787rtx
5788legitimize_pic_address (orig, reg)
5789 rtx orig;
5790 rtx reg;
5791{
5792 rtx addr = orig;
5793 rtx new = orig;
91bb873f 5794 rtx base;
3b3c6a3f 5795
b069de3b
SS
5796#if TARGET_MACHO
5797 if (reg == 0)
5798 reg = gen_reg_rtx (Pmode);
5799 /* Use the generic Mach-O PIC machinery. */
5800 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5801#endif
5802
c05dbe81
JH
5803 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
5804 new = addr;
5805 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
3b3c6a3f 5806 {
c05dbe81
JH
5807 /* This symbol may be referenced via a displacement from the PIC
5808 base address (@GOTOFF). */
3b3c6a3f 5809
c05dbe81
JH
5810 if (reload_in_progress)
5811 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5812 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5813 new = gen_rtx_CONST (Pmode, new);
5814 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3b3c6a3f 5815
c05dbe81
JH
5816 if (reg != 0)
5817 {
5818 emit_move_insn (reg, new);
5819 new = reg;
5820 }
3b3c6a3f 5821 }
91bb873f 5822 else if (GET_CODE (addr) == SYMBOL_REF)
3b3c6a3f 5823 {
14f73b5a
JH
5824 if (TARGET_64BIT)
5825 {
8ee41eaf 5826 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
14f73b5a
JH
5827 new = gen_rtx_CONST (Pmode, new);
5828 new = gen_rtx_MEM (Pmode, new);
5829 RTX_UNCHANGING_P (new) = 1;
5830 set_mem_alias_set (new, ix86_GOT_alias_set ());
5831
5832 if (reg == 0)
5833 reg = gen_reg_rtx (Pmode);
5834 /* Use directly gen_movsi, otherwise the address is loaded
5835 into register for CSE. We don't want to CSE this addresses,
5836 instead we CSE addresses from the GOT table, so skip this. */
5837 emit_insn (gen_movsi (reg, new));
5838 new = reg;
5839 }
5840 else
5841 {
5842 /* This symbol must be referenced via a load from the
5843 Global Offset Table (@GOT). */
3b3c6a3f 5844
66edd3b4
RH
5845 if (reload_in_progress)
5846 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf 5847 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
14f73b5a
JH
5848 new = gen_rtx_CONST (Pmode, new);
5849 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5850 new = gen_rtx_MEM (Pmode, new);
5851 RTX_UNCHANGING_P (new) = 1;
5852 set_mem_alias_set (new, ix86_GOT_alias_set ());
3b3c6a3f 5853
14f73b5a
JH
5854 if (reg == 0)
5855 reg = gen_reg_rtx (Pmode);
5856 emit_move_insn (reg, new);
5857 new = reg;
5858 }
0f290768 5859 }
91bb873f
RH
5860 else
5861 {
5862 if (GET_CODE (addr) == CONST)
3b3c6a3f 5863 {
91bb873f 5864 addr = XEXP (addr, 0);
e3c8ea67
RH
5865
5866 /* We must match stuff we generate before. Assume the only
5867 unspecs that can get here are ours. Not that we could do
5868 anything with them anyway... */
5869 if (GET_CODE (addr) == UNSPEC
5870 || (GET_CODE (addr) == PLUS
5871 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5872 return orig;
5873 if (GET_CODE (addr) != PLUS)
564d80f4 5874 abort ();
3b3c6a3f 5875 }
91bb873f
RH
5876 if (GET_CODE (addr) == PLUS)
5877 {
5878 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
e9a25f70 5879
91bb873f
RH
5880 /* Check first to see if this is a constant offset from a @GOTOFF
5881 symbol reference. */
623fe810 5882 if (local_symbolic_operand (op0, Pmode)
91bb873f
RH
5883 && GET_CODE (op1) == CONST_INT)
5884 {
6eb791fc
JH
5885 if (!TARGET_64BIT)
5886 {
66edd3b4
RH
5887 if (reload_in_progress)
5888 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf
RH
5889 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5890 UNSPEC_GOTOFF);
6eb791fc
JH
5891 new = gen_rtx_PLUS (Pmode, new, op1);
5892 new = gen_rtx_CONST (Pmode, new);
5893 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
91bb873f 5894
6eb791fc
JH
5895 if (reg != 0)
5896 {
5897 emit_move_insn (reg, new);
5898 new = reg;
5899 }
5900 }
5901 else
91bb873f 5902 {
75d38379
JJ
5903 if (INTVAL (op1) < -16*1024*1024
5904 || INTVAL (op1) >= 16*1024*1024)
5905 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
91bb873f
RH
5906 }
5907 }
5908 else
5909 {
5910 base = legitimize_pic_address (XEXP (addr, 0), reg);
5911 new = legitimize_pic_address (XEXP (addr, 1),
5912 base == reg ? NULL_RTX : reg);
5913
5914 if (GET_CODE (new) == CONST_INT)
5915 new = plus_constant (base, INTVAL (new));
5916 else
5917 {
5918 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5919 {
5920 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5921 new = XEXP (new, 1);
5922 }
5923 new = gen_rtx_PLUS (Pmode, base, new);
5924 }
5925 }
5926 }
3b3c6a3f
MM
5927 }
5928 return new;
5929}
fb49053f 5930
fb49053f 5931static void
f996902d 5932ix86_encode_section_info (decl, first)
fb49053f
RH
5933 tree decl;
5934 int first ATTRIBUTE_UNUSED;
5935{
f996902d
RH
5936 bool local_p = (*targetm.binds_local_p) (decl);
5937 rtx rtl, symbol;
5938
5939 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
5940 if (GET_CODE (rtl) != MEM)
5941 return;
5942 symbol = XEXP (rtl, 0);
5943 if (GET_CODE (symbol) != SYMBOL_REF)
5944 return;
5945
5946 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5947 symbol so that we may access it directly in the GOT. */
5948
fb49053f 5949 if (flag_pic)
f996902d
RH
5950 SYMBOL_REF_FLAG (symbol) = local_p;
5951
5952 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
5953 "local dynamic", "initial exec" or "local exec" TLS models
5954 respectively. */
5955
5956 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
fb49053f 5957 {
f996902d
RH
5958 const char *symbol_str;
5959 char *newstr;
5960 size_t len;
dce81a1a 5961 enum tls_model kind = decl_tls_model (decl);
f996902d 5962
75d38379
JJ
5963 if (TARGET_64BIT && ! flag_pic)
5964 {
5965 /* x86-64 doesn't allow non-pic code for shared libraries,
5966 so don't generate GD/LD TLS models for non-pic code. */
5967 switch (kind)
5968 {
5969 case TLS_MODEL_GLOBAL_DYNAMIC:
5970 kind = TLS_MODEL_INITIAL_EXEC; break;
5971 case TLS_MODEL_LOCAL_DYNAMIC:
5972 kind = TLS_MODEL_LOCAL_EXEC; break;
5973 default:
5974 break;
5975 }
5976 }
5977
f996902d 5978 symbol_str = XSTR (symbol, 0);
fb49053f 5979
f996902d
RH
5980 if (symbol_str[0] == '%')
5981 {
5982 if (symbol_str[1] == tls_model_chars[kind])
5983 return;
5984 symbol_str += 2;
5985 }
5986 len = strlen (symbol_str) + 1;
5987 newstr = alloca (len + 2);
5988
5989 newstr[0] = '%';
5990 newstr[1] = tls_model_chars[kind];
5991 memcpy (newstr + 2, symbol_str, len);
5992
5993 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
fb49053f
RH
5994 }
5995}
f996902d
RH
5996
5997/* Undo the above when printing symbol names. */
5998
5999static const char *
6000ix86_strip_name_encoding (str)
6001 const char *str;
6002{
6003 if (str[0] == '%')
6004 str += 2;
6005 if (str [0] == '*')
6006 str += 1;
6007 return str;
6008}
3b3c6a3f 6009\f
f996902d
RH
6010/* Load the thread pointer into a register. */
6011
6012static rtx
6013get_thread_pointer ()
6014{
6015 rtx tp;
6016
6017 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
9e20be0c
JJ
6018 tp = gen_rtx_MEM (Pmode, tp);
6019 RTX_UNCHANGING_P (tp) = 1;
6020 set_mem_alias_set (tp, ix86_GOT_alias_set ());
f996902d
RH
6021 tp = force_reg (Pmode, tp);
6022
6023 return tp;
6024}
fce5a9f2 6025
3b3c6a3f
MM
6026/* Try machine-dependent ways of modifying an illegitimate address
6027 to be legitimate. If we find one, return the new, valid address.
6028 This macro is used in only one place: `memory_address' in explow.c.
6029
6030 OLDX is the address as it was before break_out_memory_refs was called.
6031 In some cases it is useful to look at this to decide what needs to be done.
6032
6033 MODE and WIN are passed so that this macro can use
6034 GO_IF_LEGITIMATE_ADDRESS.
6035
6036 It is always safe for this macro to do nothing. It exists to recognize
6037 opportunities to optimize the output.
6038
6039 For the 80386, we handle X+REG by loading X into a register R and
6040 using R+REG. R will go in a general reg and indexing will be used.
6041 However, if REG is a broken-out memory address or multiplication,
6042 nothing needs to be done because REG can certainly go in a general reg.
6043
6044 When -fpic is used, special handling is needed for symbolic references.
6045 See comments by legitimize_pic_address in i386.c for details. */
6046
6047rtx
6048legitimize_address (x, oldx, mode)
6049 register rtx x;
bb5177ac 6050 register rtx oldx ATTRIBUTE_UNUSED;
3b3c6a3f
MM
6051 enum machine_mode mode;
6052{
6053 int changed = 0;
6054 unsigned log;
6055
6056 if (TARGET_DEBUG_ADDR)
6057 {
e9a25f70
JL
6058 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6059 GET_MODE_NAME (mode));
3b3c6a3f
MM
6060 debug_rtx (x);
6061 }
6062
f996902d
RH
6063 log = tls_symbolic_operand (x, mode);
6064 if (log)
6065 {
6066 rtx dest, base, off, pic;
75d38379 6067 int type;
f996902d 6068
755ac5d4 6069 switch (log)
f996902d
RH
6070 {
6071 case TLS_MODEL_GLOBAL_DYNAMIC:
6072 dest = gen_reg_rtx (Pmode);
75d38379
JJ
6073 if (TARGET_64BIT)
6074 {
6075 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6076
6077 start_sequence ();
6078 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6079 insns = get_insns ();
6080 end_sequence ();
6081
6082 emit_libcall_block (insns, dest, rax, x);
6083 }
6084 else
6085 emit_insn (gen_tls_global_dynamic_32 (dest, x));
f996902d
RH
6086 break;
6087
6088 case TLS_MODEL_LOCAL_DYNAMIC:
6089 base = gen_reg_rtx (Pmode);
75d38379
JJ
6090 if (TARGET_64BIT)
6091 {
6092 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6093
6094 start_sequence ();
6095 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6096 insns = get_insns ();
6097 end_sequence ();
6098
6099 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6100 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6101 emit_libcall_block (insns, base, rax, note);
6102 }
6103 else
6104 emit_insn (gen_tls_local_dynamic_base_32 (base));
f996902d
RH
6105
6106 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6107 off = gen_rtx_CONST (Pmode, off);
6108
6109 return gen_rtx_PLUS (Pmode, base, off);
6110
6111 case TLS_MODEL_INITIAL_EXEC:
75d38379
JJ
6112 if (TARGET_64BIT)
6113 {
6114 pic = NULL;
6115 type = UNSPEC_GOTNTPOFF;
6116 }
6117 else if (flag_pic)
f996902d 6118 {
66edd3b4
RH
6119 if (reload_in_progress)
6120 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
f996902d 6121 pic = pic_offset_table_rtx;
75d38379 6122 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
f996902d 6123 }
dea73790 6124 else if (!TARGET_GNU_TLS)
f996902d
RH
6125 {
6126 pic = gen_reg_rtx (Pmode);
6127 emit_insn (gen_set_got (pic));
75d38379 6128 type = UNSPEC_GOTTPOFF;
f996902d 6129 }
dea73790 6130 else
75d38379
JJ
6131 {
6132 pic = NULL;
6133 type = UNSPEC_INDNTPOFF;
6134 }
f996902d
RH
6135
6136 base = get_thread_pointer ();
6137
75d38379 6138 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
f996902d 6139 off = gen_rtx_CONST (Pmode, off);
75d38379 6140 if (pic)
dea73790 6141 off = gen_rtx_PLUS (Pmode, pic, off);
f996902d
RH
6142 off = gen_rtx_MEM (Pmode, off);
6143 RTX_UNCHANGING_P (off) = 1;
6144 set_mem_alias_set (off, ix86_GOT_alias_set ());
f996902d 6145 dest = gen_reg_rtx (Pmode);
dea73790 6146
75d38379 6147 if (TARGET_64BIT || TARGET_GNU_TLS)
dea73790
JJ
6148 {
6149 emit_move_insn (dest, off);
6150 return gen_rtx_PLUS (Pmode, base, dest);
6151 }
6152 else
6153 emit_insn (gen_subsi3 (dest, base, off));
f996902d
RH
6154 break;
6155
6156 case TLS_MODEL_LOCAL_EXEC:
6157 base = get_thread_pointer ();
6158
6159 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
75d38379
JJ
6160 (TARGET_64BIT || TARGET_GNU_TLS)
6161 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
f996902d
RH
6162 off = gen_rtx_CONST (Pmode, off);
6163
75d38379 6164 if (TARGET_64BIT || TARGET_GNU_TLS)
f996902d
RH
6165 return gen_rtx_PLUS (Pmode, base, off);
6166 else
6167 {
6168 dest = gen_reg_rtx (Pmode);
6169 emit_insn (gen_subsi3 (dest, base, off));
6170 }
6171 break;
6172
6173 default:
6174 abort ();
6175 }
6176
6177 return dest;
6178 }
6179
3b3c6a3f
MM
6180 if (flag_pic && SYMBOLIC_CONST (x))
6181 return legitimize_pic_address (x, 0);
6182
6183 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6184 if (GET_CODE (x) == ASHIFT
6185 && GET_CODE (XEXP (x, 1)) == CONST_INT
b531087a 6186 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
3b3c6a3f
MM
6187 {
6188 changed = 1;
a269a03c
JC
6189 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6190 GEN_INT (1 << log));
3b3c6a3f
MM
6191 }
6192
6193 if (GET_CODE (x) == PLUS)
6194 {
0f290768 6195 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
e9a25f70 6196
3b3c6a3f
MM
6197 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6198 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
b531087a 6199 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
3b3c6a3f
MM
6200 {
6201 changed = 1;
c5c76735
JL
6202 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6203 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6204 GEN_INT (1 << log));
3b3c6a3f
MM
6205 }
6206
6207 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6208 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
b531087a 6209 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
3b3c6a3f
MM
6210 {
6211 changed = 1;
c5c76735
JL
6212 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6213 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6214 GEN_INT (1 << log));
3b3c6a3f
MM
6215 }
6216
0f290768 6217 /* Put multiply first if it isn't already. */
3b3c6a3f
MM
6218 if (GET_CODE (XEXP (x, 1)) == MULT)
6219 {
6220 rtx tmp = XEXP (x, 0);
6221 XEXP (x, 0) = XEXP (x, 1);
6222 XEXP (x, 1) = tmp;
6223 changed = 1;
6224 }
6225
6226 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6227 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6228 created by virtual register instantiation, register elimination, and
6229 similar optimizations. */
6230 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6231 {
6232 changed = 1;
c5c76735
JL
6233 x = gen_rtx_PLUS (Pmode,
6234 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6235 XEXP (XEXP (x, 1), 0)),
6236 XEXP (XEXP (x, 1), 1));
3b3c6a3f
MM
6237 }
6238
e9a25f70
JL
6239 /* Canonicalize
6240 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3b3c6a3f
MM
6241 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6242 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6243 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6244 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6245 && CONSTANT_P (XEXP (x, 1)))
6246 {
00c79232
ML
6247 rtx constant;
6248 rtx other = NULL_RTX;
3b3c6a3f
MM
6249
6250 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6251 {
6252 constant = XEXP (x, 1);
6253 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6254 }
6255 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6256 {
6257 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6258 other = XEXP (x, 1);
6259 }
6260 else
6261 constant = 0;
6262
6263 if (constant)
6264 {
6265 changed = 1;
c5c76735
JL
6266 x = gen_rtx_PLUS (Pmode,
6267 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6268 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6269 plus_constant (other, INTVAL (constant)));
3b3c6a3f
MM
6270 }
6271 }
6272
6273 if (changed && legitimate_address_p (mode, x, FALSE))
6274 return x;
6275
6276 if (GET_CODE (XEXP (x, 0)) == MULT)
6277 {
6278 changed = 1;
6279 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6280 }
6281
6282 if (GET_CODE (XEXP (x, 1)) == MULT)
6283 {
6284 changed = 1;
6285 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6286 }
6287
6288 if (changed
6289 && GET_CODE (XEXP (x, 1)) == REG
6290 && GET_CODE (XEXP (x, 0)) == REG)
6291 return x;
6292
6293 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6294 {
6295 changed = 1;
6296 x = legitimize_pic_address (x, 0);
6297 }
6298
6299 if (changed && legitimate_address_p (mode, x, FALSE))
6300 return x;
6301
6302 if (GET_CODE (XEXP (x, 0)) == REG)
6303 {
6304 register rtx temp = gen_reg_rtx (Pmode);
6305 register rtx val = force_operand (XEXP (x, 1), temp);
6306 if (val != temp)
6307 emit_move_insn (temp, val);
6308
6309 XEXP (x, 1) = temp;
6310 return x;
6311 }
6312
6313 else if (GET_CODE (XEXP (x, 1)) == REG)
6314 {
6315 register rtx temp = gen_reg_rtx (Pmode);
6316 register rtx val = force_operand (XEXP (x, 0), temp);
6317 if (val != temp)
6318 emit_move_insn (temp, val);
6319
6320 XEXP (x, 0) = temp;
6321 return x;
6322 }
6323 }
6324
6325 return x;
6326}
2a2ab3f9
JVA
6327\f
6328/* Print an integer constant expression in assembler syntax. Addition
6329 and subtraction are the only arithmetic that may appear in these
6330 expressions. FILE is the stdio stream to write to, X is the rtx, and
6331 CODE is the operand print code from the output string. */
6332
6333static void
6334output_pic_addr_const (file, x, code)
6335 FILE *file;
6336 rtx x;
6337 int code;
6338{
6339 char buf[256];
6340
6341 switch (GET_CODE (x))
6342 {
6343 case PC:
6344 if (flag_pic)
6345 putc ('.', file);
6346 else
6347 abort ();
6348 break;
6349
6350 case SYMBOL_REF:
91bb873f 6351 assemble_name (file, XSTR (x, 0));
b069de3b 6352 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x))
91bb873f 6353 fputs ("@PLT", file);
2a2ab3f9
JVA
6354 break;
6355
91bb873f
RH
6356 case LABEL_REF:
6357 x = XEXP (x, 0);
6358 /* FALLTHRU */
2a2ab3f9
JVA
6359 case CODE_LABEL:
6360 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6361 assemble_name (asm_out_file, buf);
6362 break;
6363
6364 case CONST_INT:
f64cecad 6365 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2a2ab3f9
JVA
6366 break;
6367
6368 case CONST:
6369 /* This used to output parentheses around the expression,
6370 but that does not work on the 386 (either ATT or BSD assembler). */
6371 output_pic_addr_const (file, XEXP (x, 0), code);
6372 break;
6373
6374 case CONST_DOUBLE:
6375 if (GET_MODE (x) == VOIDmode)
6376 {
6377 /* We can use %d if the number is <32 bits and positive. */
6378 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
f64cecad
JC
6379 fprintf (file, "0x%lx%08lx",
6380 (unsigned long) CONST_DOUBLE_HIGH (x),
6381 (unsigned long) CONST_DOUBLE_LOW (x));
2a2ab3f9 6382 else
f64cecad 6383 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2a2ab3f9
JVA
6384 }
6385 else
6386 /* We can't handle floating point constants;
6387 PRINT_OPERAND must handle them. */
6388 output_operand_lossage ("floating constant misused");
6389 break;
6390
6391 case PLUS:
e9a25f70 6392 /* Some assemblers need integer constants to appear first. */
2a2ab3f9
JVA
6393 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6394 {
2a2ab3f9 6395 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 6396 putc ('+', file);
e9a25f70 6397 output_pic_addr_const (file, XEXP (x, 1), code);
2a2ab3f9 6398 }
91bb873f 6399 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2a2ab3f9 6400 {
2a2ab3f9 6401 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 6402 putc ('+', file);
e9a25f70 6403 output_pic_addr_const (file, XEXP (x, 0), code);
2a2ab3f9 6404 }
91bb873f
RH
6405 else
6406 abort ();
2a2ab3f9
JVA
6407 break;
6408
6409 case MINUS:
b069de3b
SS
6410 if (!TARGET_MACHO)
6411 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
2a2ab3f9 6412 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 6413 putc ('-', file);
2a2ab3f9 6414 output_pic_addr_const (file, XEXP (x, 1), code);
b069de3b
SS
6415 if (!TARGET_MACHO)
6416 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
2a2ab3f9
JVA
6417 break;
6418
91bb873f
RH
6419 case UNSPEC:
6420 if (XVECLEN (x, 0) != 1)
5bf0ebab 6421 abort ();
91bb873f
RH
6422 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6423 switch (XINT (x, 1))
77ebd435 6424 {
8ee41eaf 6425 case UNSPEC_GOT:
77ebd435
AJ
6426 fputs ("@GOT", file);
6427 break;
8ee41eaf 6428 case UNSPEC_GOTOFF:
77ebd435
AJ
6429 fputs ("@GOTOFF", file);
6430 break;
8ee41eaf 6431 case UNSPEC_GOTPCREL:
edfe8595 6432 fputs ("@GOTPCREL(%rip)", file);
6eb791fc 6433 break;
f996902d 6434 case UNSPEC_GOTTPOFF:
dea73790 6435 /* FIXME: This might be @TPOFF in Sun ld too. */
f996902d
RH
6436 fputs ("@GOTTPOFF", file);
6437 break;
6438 case UNSPEC_TPOFF:
6439 fputs ("@TPOFF", file);
6440 break;
6441 case UNSPEC_NTPOFF:
75d38379
JJ
6442 if (TARGET_64BIT)
6443 fputs ("@TPOFF", file);
6444 else
6445 fputs ("@NTPOFF", file);
f996902d
RH
6446 break;
6447 case UNSPEC_DTPOFF:
6448 fputs ("@DTPOFF", file);
6449 break;
dea73790 6450 case UNSPEC_GOTNTPOFF:
75d38379
JJ
6451 if (TARGET_64BIT)
6452 fputs ("@GOTTPOFF(%rip)", file);
6453 else
6454 fputs ("@GOTNTPOFF", file);
dea73790
JJ
6455 break;
6456 case UNSPEC_INDNTPOFF:
6457 fputs ("@INDNTPOFF", file);
6458 break;
77ebd435
AJ
6459 default:
6460 output_operand_lossage ("invalid UNSPEC as operand");
6461 break;
6462 }
91bb873f
RH
6463 break;
6464
2a2ab3f9
JVA
6465 default:
6466 output_operand_lossage ("invalid expression as operand");
6467 }
6468}
1865dbb5 6469
0f290768 6470/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
1865dbb5
JM
6471 We need to handle our special PIC relocations. */
6472
0f290768 6473void
1865dbb5
JM
6474i386_dwarf_output_addr_const (file, x)
6475 FILE *file;
6476 rtx x;
6477{
14f73b5a 6478#ifdef ASM_QUAD
18b5b8d6 6479 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
14f73b5a
JH
6480#else
6481 if (TARGET_64BIT)
6482 abort ();
18b5b8d6 6483 fprintf (file, "%s", ASM_LONG);
14f73b5a 6484#endif
1865dbb5
JM
6485 if (flag_pic)
6486 output_pic_addr_const (file, x, '\0');
6487 else
6488 output_addr_const (file, x);
6489 fputc ('\n', file);
6490}
6491
b9203463
RH
6492/* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6493 We need to emit DTP-relative relocations. */
6494
6495void
6496i386_output_dwarf_dtprel (file, size, x)
6497 FILE *file;
6498 int size;
6499 rtx x;
6500{
75d38379
JJ
6501 fputs (ASM_LONG, file);
6502 output_addr_const (file, x);
6503 fputs ("@DTPOFF", file);
b9203463
RH
6504 switch (size)
6505 {
6506 case 4:
b9203463
RH
6507 break;
6508 case 8:
75d38379 6509 fputs (", 0", file);
b9203463 6510 break;
b9203463
RH
6511 default:
6512 abort ();
6513 }
b9203463
RH
6514}
6515
1865dbb5
JM
6516/* In the name of slightly smaller debug output, and to cater to
6517 general assembler losage, recognize PIC+GOTOFF and turn it back
6518 into a direct symbol reference. */
6519
6520rtx
6521i386_simplify_dwarf_addr (orig_x)
6522 rtx orig_x;
6523{
ec65b2e3 6524 rtx x = orig_x, y;
1865dbb5 6525
4c8c0dec
JJ
6526 if (GET_CODE (x) == MEM)
6527 x = XEXP (x, 0);
6528
6eb791fc
JH
6529 if (TARGET_64BIT)
6530 {
6531 if (GET_CODE (x) != CONST
6532 || GET_CODE (XEXP (x, 0)) != UNSPEC
8ee41eaf 6533 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
4c8c0dec 6534 || GET_CODE (orig_x) != MEM)
6eb791fc
JH
6535 return orig_x;
6536 return XVECEXP (XEXP (x, 0), 0, 0);
6537 }
6538
1865dbb5 6539 if (GET_CODE (x) != PLUS
1865dbb5
JM
6540 || GET_CODE (XEXP (x, 1)) != CONST)
6541 return orig_x;
6542
ec65b2e3
JJ
6543 if (GET_CODE (XEXP (x, 0)) == REG
6544 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6545 /* %ebx + GOT/GOTOFF */
6546 y = NULL;
6547 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6548 {
6549 /* %ebx + %reg * scale + GOT/GOTOFF */
6550 y = XEXP (x, 0);
6551 if (GET_CODE (XEXP (y, 0)) == REG
6552 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6553 y = XEXP (y, 1);
6554 else if (GET_CODE (XEXP (y, 1)) == REG
6555 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6556 y = XEXP (y, 0);
6557 else
6558 return orig_x;
6559 if (GET_CODE (y) != REG
6560 && GET_CODE (y) != MULT
6561 && GET_CODE (y) != ASHIFT)
6562 return orig_x;
6563 }
6564 else
6565 return orig_x;
6566
1865dbb5
JM
6567 x = XEXP (XEXP (x, 1), 0);
6568 if (GET_CODE (x) == UNSPEC
8ee41eaf
RH
6569 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6570 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
ec65b2e3
JJ
6571 {
6572 if (y)
6573 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6574 return XVECEXP (x, 0, 0);
6575 }
1865dbb5
JM
6576
6577 if (GET_CODE (x) == PLUS
6578 && GET_CODE (XEXP (x, 0)) == UNSPEC
6579 && GET_CODE (XEXP (x, 1)) == CONST_INT
8ee41eaf
RH
6580 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6581 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6582 && GET_CODE (orig_x) != MEM)))
ec65b2e3
JJ
6583 {
6584 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6585 if (y)
6586 return gen_rtx_PLUS (Pmode, y, x);
6587 return x;
6588 }
1865dbb5
JM
6589
6590 return orig_x;
6591}
2a2ab3f9 6592\f
a269a03c 6593static void
e075ae69 6594put_condition_code (code, mode, reverse, fp, file)
a269a03c 6595 enum rtx_code code;
e075ae69
RH
6596 enum machine_mode mode;
6597 int reverse, fp;
a269a03c
JC
6598 FILE *file;
6599{
a269a03c
JC
6600 const char *suffix;
6601
9a915772
JH
6602 if (mode == CCFPmode || mode == CCFPUmode)
6603 {
6604 enum rtx_code second_code, bypass_code;
6605 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6606 if (bypass_code != NIL || second_code != NIL)
b531087a 6607 abort ();
9a915772
JH
6608 code = ix86_fp_compare_code_to_integer (code);
6609 mode = CCmode;
6610 }
a269a03c
JC
6611 if (reverse)
6612 code = reverse_condition (code);
e075ae69 6613
a269a03c
JC
6614 switch (code)
6615 {
6616 case EQ:
6617 suffix = "e";
6618 break;
a269a03c
JC
6619 case NE:
6620 suffix = "ne";
6621 break;
a269a03c 6622 case GT:
7e08e190 6623 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
e075ae69
RH
6624 abort ();
6625 suffix = "g";
a269a03c 6626 break;
a269a03c 6627 case GTU:
e075ae69
RH
6628 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6629 Those same assemblers have the same but opposite losage on cmov. */
7e08e190 6630 if (mode != CCmode)
0f290768 6631 abort ();
e075ae69 6632 suffix = fp ? "nbe" : "a";
a269a03c 6633 break;
a269a03c 6634 case LT:
9076b9c1 6635 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 6636 suffix = "s";
7e08e190 6637 else if (mode == CCmode || mode == CCGCmode)
e075ae69 6638 suffix = "l";
9076b9c1 6639 else
0f290768 6640 abort ();
a269a03c 6641 break;
a269a03c 6642 case LTU:
9076b9c1 6643 if (mode != CCmode)
0f290768 6644 abort ();
a269a03c
JC
6645 suffix = "b";
6646 break;
a269a03c 6647 case GE:
9076b9c1 6648 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 6649 suffix = "ns";
7e08e190 6650 else if (mode == CCmode || mode == CCGCmode)
e075ae69 6651 suffix = "ge";
9076b9c1 6652 else
0f290768 6653 abort ();
a269a03c 6654 break;
a269a03c 6655 case GEU:
e075ae69 6656 /* ??? As above. */
7e08e190 6657 if (mode != CCmode)
0f290768 6658 abort ();
7e08e190 6659 suffix = fp ? "nb" : "ae";
a269a03c 6660 break;
a269a03c 6661 case LE:
7e08e190 6662 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
e075ae69
RH
6663 abort ();
6664 suffix = "le";
a269a03c 6665 break;
a269a03c 6666 case LEU:
9076b9c1
JH
6667 if (mode != CCmode)
6668 abort ();
7e08e190 6669 suffix = "be";
a269a03c 6670 break;
3a3677ff 6671 case UNORDERED:
9e7adcb3 6672 suffix = fp ? "u" : "p";
3a3677ff
RH
6673 break;
6674 case ORDERED:
9e7adcb3 6675 suffix = fp ? "nu" : "np";
3a3677ff 6676 break;
a269a03c
JC
6677 default:
6678 abort ();
6679 }
6680 fputs (suffix, file);
6681}
6682
e075ae69
RH
6683void
6684print_reg (x, code, file)
6685 rtx x;
6686 int code;
6687 FILE *file;
e5cb57e8 6688{
e075ae69 6689 if (REGNO (x) == ARG_POINTER_REGNUM
564d80f4 6690 || REGNO (x) == FRAME_POINTER_REGNUM
e075ae69
RH
6691 || REGNO (x) == FLAGS_REG
6692 || REGNO (x) == FPSR_REG)
6693 abort ();
e9a25f70 6694
5bf0ebab 6695 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
e075ae69
RH
6696 putc ('%', file);
6697
ef6257cd 6698 if (code == 'w' || MMX_REG_P (x))
e075ae69
RH
6699 code = 2;
6700 else if (code == 'b')
6701 code = 1;
6702 else if (code == 'k')
6703 code = 4;
3f3f2124
JH
6704 else if (code == 'q')
6705 code = 8;
e075ae69
RH
6706 else if (code == 'y')
6707 code = 3;
6708 else if (code == 'h')
6709 code = 0;
6710 else
6711 code = GET_MODE_SIZE (GET_MODE (x));
e9a25f70 6712
3f3f2124
JH
6713 /* Irritatingly, AMD extended registers use different naming convention
6714 from the normal registers. */
6715 if (REX_INT_REG_P (x))
6716 {
885a70fd
JH
6717 if (!TARGET_64BIT)
6718 abort ();
3f3f2124
JH
6719 switch (code)
6720 {
ef6257cd 6721 case 0:
c725bd79 6722 error ("extended registers have no high halves");
3f3f2124
JH
6723 break;
6724 case 1:
6725 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6726 break;
6727 case 2:
6728 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6729 break;
6730 case 4:
6731 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6732 break;
6733 case 8:
6734 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6735 break;
6736 default:
c725bd79 6737 error ("unsupported operand size for extended register");
3f3f2124
JH
6738 break;
6739 }
6740 return;
6741 }
e075ae69
RH
6742 switch (code)
6743 {
6744 case 3:
6745 if (STACK_TOP_P (x))
6746 {
6747 fputs ("st(0)", file);
6748 break;
6749 }
6750 /* FALLTHRU */
e075ae69 6751 case 8:
3f3f2124 6752 case 4:
e075ae69 6753 case 12:
446988df 6754 if (! ANY_FP_REG_P (x))
885a70fd 6755 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
e075ae69 6756 /* FALLTHRU */
a7180f70 6757 case 16:
e075ae69
RH
6758 case 2:
6759 fputs (hi_reg_name[REGNO (x)], file);
6760 break;
6761 case 1:
6762 fputs (qi_reg_name[REGNO (x)], file);
6763 break;
6764 case 0:
6765 fputs (qi_high_reg_name[REGNO (x)], file);
6766 break;
6767 default:
6768 abort ();
fe25fea3 6769 }
e5cb57e8
SC
6770}
6771
f996902d
RH
6772/* Locate some local-dynamic symbol still in use by this function
6773 so that we can print its name in some tls_local_dynamic_base
6774 pattern. */
6775
6776static const char *
6777get_some_local_dynamic_name ()
6778{
6779 rtx insn;
6780
6781 if (cfun->machine->some_ld_name)
6782 return cfun->machine->some_ld_name;
6783
6784 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6785 if (INSN_P (insn)
6786 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6787 return cfun->machine->some_ld_name;
6788
6789 abort ();
6790}
6791
6792static int
6793get_some_local_dynamic_name_1 (px, data)
6794 rtx *px;
6795 void *data ATTRIBUTE_UNUSED;
6796{
6797 rtx x = *px;
6798
6799 if (GET_CODE (x) == SYMBOL_REF
6800 && local_dynamic_symbolic_operand (x, Pmode))
6801 {
6802 cfun->machine->some_ld_name = XSTR (x, 0);
6803 return 1;
6804 }
6805
6806 return 0;
6807}
6808
2a2ab3f9 6809/* Meaning of CODE:
fe25fea3 6810 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
e5cb57e8 6811 C -- print opcode suffix for set/cmov insn.
fe25fea3 6812 c -- like C, but print reversed condition
ef6257cd 6813 F,f -- likewise, but for floating-point.
048b1c95
JJ
6814 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6815 nothing
2a2ab3f9
JVA
6816 R -- print the prefix for register names.
6817 z -- print the opcode suffix for the size of the current operand.
6818 * -- print a star (in certain assembler syntax)
fb204271 6819 A -- print an absolute memory reference.
2a2ab3f9 6820 w -- print the operand as if it's a "word" (HImode) even if it isn't.
2d49677f
SC
6821 s -- print a shift double count, followed by the assemblers argument
6822 delimiter.
fe25fea3
SC
6823 b -- print the QImode name of the register for the indicated operand.
6824 %b0 would print %al if operands[0] is reg 0.
6825 w -- likewise, print the HImode name of the register.
6826 k -- likewise, print the SImode name of the register.
3f3f2124 6827 q -- likewise, print the DImode name of the register.
ef6257cd
JH
6828 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6829 y -- print "st(0)" instead of "st" as a register.
a46d1d38 6830 D -- print condition for SSE cmp instruction.
ef6257cd
JH
6831 P -- if PIC, print an @PLT suffix.
6832 X -- don't print any sort of PIC '@' suffix for a symbol.
f996902d 6833 & -- print some in-use local-dynamic symbol name.
a46d1d38 6834 */
2a2ab3f9
JVA
6835
6836void
6837print_operand (file, x, code)
6838 FILE *file;
6839 rtx x;
6840 int code;
6841{
6842 if (code)
6843 {
6844 switch (code)
6845 {
6846 case '*':
80f33d06 6847 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9
JVA
6848 putc ('*', file);
6849 return;
6850
f996902d
RH
6851 case '&':
6852 assemble_name (file, get_some_local_dynamic_name ());
6853 return;
6854
fb204271 6855 case 'A':
80f33d06 6856 if (ASSEMBLER_DIALECT == ASM_ATT)
fb204271 6857 putc ('*', file);
80f33d06 6858 else if (ASSEMBLER_DIALECT == ASM_INTEL)
fb204271
DN
6859 {
6860 /* Intel syntax. For absolute addresses, registers should not
6861 be surrounded by braces. */
6862 if (GET_CODE (x) != REG)
6863 {
6864 putc ('[', file);
6865 PRINT_OPERAND (file, x, 0);
6866 putc (']', file);
6867 return;
6868 }
6869 }
80f33d06
GS
6870 else
6871 abort ();
fb204271
DN
6872
6873 PRINT_OPERAND (file, x, 0);
6874 return;
6875
6876
2a2ab3f9 6877 case 'L':
80f33d06 6878 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6879 putc ('l', file);
2a2ab3f9
JVA
6880 return;
6881
6882 case 'W':
80f33d06 6883 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6884 putc ('w', file);
2a2ab3f9
JVA
6885 return;
6886
6887 case 'B':
80f33d06 6888 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6889 putc ('b', file);
2a2ab3f9
JVA
6890 return;
6891
6892 case 'Q':
80f33d06 6893 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6894 putc ('l', file);
2a2ab3f9
JVA
6895 return;
6896
6897 case 'S':
80f33d06 6898 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6899 putc ('s', file);
2a2ab3f9
JVA
6900 return;
6901
5f1ec3e6 6902 case 'T':
80f33d06 6903 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6904 putc ('t', file);
5f1ec3e6
JVA
6905 return;
6906
2a2ab3f9
JVA
6907 case 'z':
6908 /* 387 opcodes don't get size suffixes if the operands are
0f290768 6909 registers. */
2a2ab3f9
JVA
6910 if (STACK_REG_P (x))
6911 return;
6912
831c4e87
KC
6913 /* Likewise if using Intel opcodes. */
6914 if (ASSEMBLER_DIALECT == ASM_INTEL)
6915 return;
6916
6917 /* This is the size of op from size of operand. */
2a2ab3f9
JVA
6918 switch (GET_MODE_SIZE (GET_MODE (x)))
6919 {
2a2ab3f9 6920 case 2:
155d8a47
JW
6921#ifdef HAVE_GAS_FILDS_FISTS
6922 putc ('s', file);
6923#endif
2a2ab3f9
JVA
6924 return;
6925
6926 case 4:
6927 if (GET_MODE (x) == SFmode)
6928 {
e075ae69 6929 putc ('s', file);
2a2ab3f9
JVA
6930 return;
6931 }
6932 else
e075ae69 6933 putc ('l', file);
2a2ab3f9
JVA
6934 return;
6935
5f1ec3e6 6936 case 12:
2b589241 6937 case 16:
e075ae69
RH
6938 putc ('t', file);
6939 return;
5f1ec3e6 6940
2a2ab3f9
JVA
6941 case 8:
6942 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
56c0e8fa
JVA
6943 {
6944#ifdef GAS_MNEMONICS
e075ae69 6945 putc ('q', file);
56c0e8fa 6946#else
e075ae69
RH
6947 putc ('l', file);
6948 putc ('l', file);
56c0e8fa
JVA
6949#endif
6950 }
e075ae69
RH
6951 else
6952 putc ('l', file);
2a2ab3f9 6953 return;
155d8a47
JW
6954
6955 default:
6956 abort ();
2a2ab3f9 6957 }
4af3895e
JVA
6958
6959 case 'b':
6960 case 'w':
6961 case 'k':
3f3f2124 6962 case 'q':
4af3895e
JVA
6963 case 'h':
6964 case 'y':
5cb6195d 6965 case 'X':
e075ae69 6966 case 'P':
4af3895e
JVA
6967 break;
6968
2d49677f
SC
6969 case 's':
6970 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6971 {
6972 PRINT_OPERAND (file, x, 0);
e075ae69 6973 putc (',', file);
2d49677f 6974 }
a269a03c
JC
6975 return;
6976
a46d1d38
JH
6977 case 'D':
6978 /* Little bit of braindamage here. The SSE compare instructions
6979 does use completely different names for the comparisons that the
6980 fp conditional moves. */
6981 switch (GET_CODE (x))
6982 {
6983 case EQ:
6984 case UNEQ:
6985 fputs ("eq", file);
6986 break;
6987 case LT:
6988 case UNLT:
6989 fputs ("lt", file);
6990 break;
6991 case LE:
6992 case UNLE:
6993 fputs ("le", file);
6994 break;
6995 case UNORDERED:
6996 fputs ("unord", file);
6997 break;
6998 case NE:
6999 case LTGT:
7000 fputs ("neq", file);
7001 break;
7002 case UNGE:
7003 case GE:
7004 fputs ("nlt", file);
7005 break;
7006 case UNGT:
7007 case GT:
7008 fputs ("nle", file);
7009 break;
7010 case ORDERED:
7011 fputs ("ord", file);
7012 break;
7013 default:
7014 abort ();
7015 break;
7016 }
7017 return;
048b1c95
JJ
7018 case 'O':
7019#ifdef CMOV_SUN_AS_SYNTAX
7020 if (ASSEMBLER_DIALECT == ASM_ATT)
7021 {
7022 switch (GET_MODE (x))
7023 {
7024 case HImode: putc ('w', file); break;
7025 case SImode:
7026 case SFmode: putc ('l', file); break;
7027 case DImode:
7028 case DFmode: putc ('q', file); break;
7029 default: abort ();
7030 }
7031 putc ('.', file);
7032 }
7033#endif
7034 return;
1853aadd 7035 case 'C':
e075ae69 7036 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
1853aadd 7037 return;
fe25fea3 7038 case 'F':
048b1c95
JJ
7039#ifdef CMOV_SUN_AS_SYNTAX
7040 if (ASSEMBLER_DIALECT == ASM_ATT)
7041 putc ('.', file);
7042#endif
e075ae69 7043 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
fe25fea3
SC
7044 return;
7045
e9a25f70 7046 /* Like above, but reverse condition */
e075ae69 7047 case 'c':
fce5a9f2 7048 /* Check to see if argument to %c is really a constant
c1d5afc4
CR
7049 and not a condition code which needs to be reversed. */
7050 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7051 {
7052 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7053 return;
7054 }
e075ae69
RH
7055 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7056 return;
fe25fea3 7057 case 'f':
048b1c95
JJ
7058#ifdef CMOV_SUN_AS_SYNTAX
7059 if (ASSEMBLER_DIALECT == ASM_ATT)
7060 putc ('.', file);
7061#endif
e075ae69 7062 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
1853aadd 7063 return;
ef6257cd
JH
7064 case '+':
7065 {
7066 rtx x;
e5cb57e8 7067
ef6257cd
JH
7068 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7069 return;
a4f31c00 7070
ef6257cd
JH
7071 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7072 if (x)
7073 {
7074 int pred_val = INTVAL (XEXP (x, 0));
7075
7076 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7077 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7078 {
7079 int taken = pred_val > REG_BR_PROB_BASE / 2;
7080 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7081
7082 /* Emit hints only in the case default branch prediction
7083 heruistics would fail. */
7084 if (taken != cputaken)
7085 {
7086 /* We use 3e (DS) prefix for taken branches and
7087 2e (CS) prefix for not taken branches. */
7088 if (taken)
7089 fputs ("ds ; ", file);
7090 else
7091 fputs ("cs ; ", file);
7092 }
7093 }
7094 }
7095 return;
7096 }
4af3895e 7097 default:
a52453cc 7098 output_operand_lossage ("invalid operand code `%c'", code);
2a2ab3f9
JVA
7099 }
7100 }
e9a25f70 7101
2a2ab3f9
JVA
7102 if (GET_CODE (x) == REG)
7103 {
7104 PRINT_REG (x, code, file);
7105 }
e9a25f70 7106
2a2ab3f9
JVA
7107 else if (GET_CODE (x) == MEM)
7108 {
e075ae69 7109 /* No `byte ptr' prefix for call instructions. */
80f33d06 7110 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
2a2ab3f9 7111 {
69ddee61 7112 const char * size;
e075ae69
RH
7113 switch (GET_MODE_SIZE (GET_MODE (x)))
7114 {
7115 case 1: size = "BYTE"; break;
7116 case 2: size = "WORD"; break;
7117 case 4: size = "DWORD"; break;
7118 case 8: size = "QWORD"; break;
7119 case 12: size = "XWORD"; break;
a7180f70 7120 case 16: size = "XMMWORD"; break;
e075ae69 7121 default:
564d80f4 7122 abort ();
e075ae69 7123 }
fb204271
DN
7124
7125 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7126 if (code == 'b')
7127 size = "BYTE";
7128 else if (code == 'w')
7129 size = "WORD";
7130 else if (code == 'k')
7131 size = "DWORD";
7132
e075ae69
RH
7133 fputs (size, file);
7134 fputs (" PTR ", file);
2a2ab3f9 7135 }
e075ae69
RH
7136
7137 x = XEXP (x, 0);
7138 if (flag_pic && CONSTANT_ADDRESS_P (x))
7139 output_pic_addr_const (file, x, code);
0d7d98ee 7140 /* Avoid (%rip) for call operands. */
5bf0ebab 7141 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
0d7d98ee
JH
7142 && GET_CODE (x) != CONST_INT)
7143 output_addr_const (file, x);
c8b94768
RH
7144 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7145 output_operand_lossage ("invalid constraints for operand");
2a2ab3f9 7146 else
e075ae69 7147 output_address (x);
2a2ab3f9 7148 }
e9a25f70 7149
2a2ab3f9
JVA
7150 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7151 {
e9a25f70
JL
7152 REAL_VALUE_TYPE r;
7153 long l;
7154
5f1ec3e6
JVA
7155 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7156 REAL_VALUE_TO_TARGET_SINGLE (r, l);
e075ae69 7157
80f33d06 7158 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7159 putc ('$', file);
52267fcb 7160 fprintf (file, "0x%lx", l);
5f1ec3e6 7161 }
e9a25f70 7162
0f290768 7163 /* These float cases don't actually occur as immediate operands. */
5f1ec3e6
JVA
7164 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7165 {
e9a25f70
JL
7166 char dstr[30];
7167
da6eec72 7168 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
5f1ec3e6 7169 fprintf (file, "%s", dstr);
2a2ab3f9 7170 }
e9a25f70 7171
2b589241
JH
7172 else if (GET_CODE (x) == CONST_DOUBLE
7173 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
2a2ab3f9 7174 {
e9a25f70
JL
7175 char dstr[30];
7176
da6eec72 7177 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
5f1ec3e6 7178 fprintf (file, "%s", dstr);
2a2ab3f9 7179 }
f996902d 7180
79325812 7181 else
2a2ab3f9 7182 {
4af3895e 7183 if (code != 'P')
2a2ab3f9 7184 {
695dac07 7185 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
e075ae69 7186 {
80f33d06 7187 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
7188 putc ('$', file);
7189 }
2a2ab3f9
JVA
7190 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7191 || GET_CODE (x) == LABEL_REF)
e075ae69 7192 {
80f33d06 7193 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
7194 putc ('$', file);
7195 else
7196 fputs ("OFFSET FLAT:", file);
7197 }
2a2ab3f9 7198 }
e075ae69
RH
7199 if (GET_CODE (x) == CONST_INT)
7200 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7201 else if (flag_pic)
2a2ab3f9
JVA
7202 output_pic_addr_const (file, x, code);
7203 else
7204 output_addr_const (file, x);
7205 }
7206}
7207\f
7208/* Print a memory operand whose address is ADDR. */
7209
7210void
7211print_operand_address (file, addr)
7212 FILE *file;
7213 register rtx addr;
7214{
e075ae69
RH
7215 struct ix86_address parts;
7216 rtx base, index, disp;
7217 int scale;
e9a25f70 7218
9e20be0c
JJ
7219 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
7220 {
7221 if (ASSEMBLER_DIALECT == ASM_INTEL)
7222 fputs ("DWORD PTR ", file);
7223 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7224 putc ('%', file);
75d38379
JJ
7225 if (TARGET_64BIT)
7226 fputs ("fs:0", file);
7227 else
7228 fputs ("gs:0", file);
9e20be0c
JJ
7229 return;
7230 }
7231
e075ae69
RH
7232 if (! ix86_decompose_address (addr, &parts))
7233 abort ();
e9a25f70 7234
e075ae69
RH
7235 base = parts.base;
7236 index = parts.index;
7237 disp = parts.disp;
7238 scale = parts.scale;
e9a25f70 7239
e075ae69
RH
7240 if (!base && !index)
7241 {
7242 /* Displacement only requires special attention. */
e9a25f70 7243
e075ae69 7244 if (GET_CODE (disp) == CONST_INT)
2a2ab3f9 7245 {
80f33d06 7246 if (ASSEMBLER_DIALECT == ASM_INTEL)
fb204271
DN
7247 {
7248 if (USER_LABEL_PREFIX[0] == 0)
7249 putc ('%', file);
7250 fputs ("ds:", file);
7251 }
e075ae69 7252 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
2a2ab3f9 7253 }
e075ae69
RH
7254 else if (flag_pic)
7255 output_pic_addr_const (file, addr, 0);
7256 else
7257 output_addr_const (file, addr);
0d7d98ee
JH
7258
7259 /* Use one byte shorter RIP relative addressing for 64bit mode. */
edfe8595 7260 if (TARGET_64BIT
75d38379
JJ
7261 && ((GET_CODE (addr) == SYMBOL_REF
7262 && ! tls_symbolic_operand (addr, GET_MODE (addr)))
edfe8595
RH
7263 || GET_CODE (addr) == LABEL_REF
7264 || (GET_CODE (addr) == CONST
7265 && GET_CODE (XEXP (addr, 0)) == PLUS
200bcf7e
JH
7266 && (GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
7267 || GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF)
edfe8595 7268 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
0d7d98ee 7269 fputs ("(%rip)", file);
e075ae69
RH
7270 }
7271 else
7272 {
80f33d06 7273 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9 7274 {
e075ae69 7275 if (disp)
2a2ab3f9 7276 {
c399861d 7277 if (flag_pic)
e075ae69
RH
7278 output_pic_addr_const (file, disp, 0);
7279 else if (GET_CODE (disp) == LABEL_REF)
7280 output_asm_label (disp);
2a2ab3f9 7281 else
e075ae69 7282 output_addr_const (file, disp);
2a2ab3f9
JVA
7283 }
7284
e075ae69
RH
7285 putc ('(', file);
7286 if (base)
7287 PRINT_REG (base, 0, file);
7288 if (index)
2a2ab3f9 7289 {
e075ae69
RH
7290 putc (',', file);
7291 PRINT_REG (index, 0, file);
7292 if (scale != 1)
7293 fprintf (file, ",%d", scale);
2a2ab3f9 7294 }
e075ae69 7295 putc (')', file);
2a2ab3f9 7296 }
2a2ab3f9
JVA
7297 else
7298 {
e075ae69 7299 rtx offset = NULL_RTX;
e9a25f70 7300
e075ae69
RH
7301 if (disp)
7302 {
7303 /* Pull out the offset of a symbol; print any symbol itself. */
7304 if (GET_CODE (disp) == CONST
7305 && GET_CODE (XEXP (disp, 0)) == PLUS
7306 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7307 {
7308 offset = XEXP (XEXP (disp, 0), 1);
7309 disp = gen_rtx_CONST (VOIDmode,
7310 XEXP (XEXP (disp, 0), 0));
7311 }
ce193852 7312
e075ae69
RH
7313 if (flag_pic)
7314 output_pic_addr_const (file, disp, 0);
7315 else if (GET_CODE (disp) == LABEL_REF)
7316 output_asm_label (disp);
7317 else if (GET_CODE (disp) == CONST_INT)
7318 offset = disp;
7319 else
7320 output_addr_const (file, disp);
7321 }
e9a25f70 7322
e075ae69
RH
7323 putc ('[', file);
7324 if (base)
a8620236 7325 {
e075ae69
RH
7326 PRINT_REG (base, 0, file);
7327 if (offset)
7328 {
7329 if (INTVAL (offset) >= 0)
7330 putc ('+', file);
7331 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7332 }
a8620236 7333 }
e075ae69
RH
7334 else if (offset)
7335 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
2247a58c 7336 else
e075ae69 7337 putc ('0', file);
e9a25f70 7338
e075ae69
RH
7339 if (index)
7340 {
7341 putc ('+', file);
7342 PRINT_REG (index, 0, file);
7343 if (scale != 1)
7344 fprintf (file, "*%d", scale);
7345 }
7346 putc (']', file);
7347 }
2a2ab3f9
JVA
7348 }
7349}
f996902d
RH
7350
7351bool
7352output_addr_const_extra (file, x)
7353 FILE *file;
7354 rtx x;
7355{
7356 rtx op;
7357
7358 if (GET_CODE (x) != UNSPEC)
7359 return false;
7360
7361 op = XVECEXP (x, 0, 0);
7362 switch (XINT (x, 1))
7363 {
7364 case UNSPEC_GOTTPOFF:
7365 output_addr_const (file, op);
dea73790 7366 /* FIXME: This might be @TPOFF in Sun ld. */
f996902d
RH
7367 fputs ("@GOTTPOFF", file);
7368 break;
7369 case UNSPEC_TPOFF:
7370 output_addr_const (file, op);
7371 fputs ("@TPOFF", file);
7372 break;
7373 case UNSPEC_NTPOFF:
7374 output_addr_const (file, op);
75d38379
JJ
7375 if (TARGET_64BIT)
7376 fputs ("@TPOFF", file);
7377 else
7378 fputs ("@NTPOFF", file);
f996902d
RH
7379 break;
7380 case UNSPEC_DTPOFF:
7381 output_addr_const (file, op);
7382 fputs ("@DTPOFF", file);
7383 break;
dea73790
JJ
7384 case UNSPEC_GOTNTPOFF:
7385 output_addr_const (file, op);
75d38379
JJ
7386 if (TARGET_64BIT)
7387 fputs ("@GOTTPOFF(%rip)", file);
7388 else
7389 fputs ("@GOTNTPOFF", file);
dea73790
JJ
7390 break;
7391 case UNSPEC_INDNTPOFF:
7392 output_addr_const (file, op);
7393 fputs ("@INDNTPOFF", file);
7394 break;
f996902d
RH
7395
7396 default:
7397 return false;
7398 }
7399
7400 return true;
7401}
2a2ab3f9
JVA
7402\f
7403/* Split one or more DImode RTL references into pairs of SImode
7404 references. The RTL can be REG, offsettable MEM, integer constant, or
7405 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7406 split and "num" is its length. lo_half and hi_half are output arrays
0f290768 7407 that parallel "operands". */
2a2ab3f9
JVA
7408
7409void
7410split_di (operands, num, lo_half, hi_half)
7411 rtx operands[];
7412 int num;
7413 rtx lo_half[], hi_half[];
7414{
7415 while (num--)
7416 {
57dbca5e 7417 rtx op = operands[num];
b932f770
JH
7418
7419 /* simplify_subreg refuse to split volatile memory addresses,
7420 but we still have to handle it. */
7421 if (GET_CODE (op) == MEM)
2a2ab3f9 7422 {
f4ef873c 7423 lo_half[num] = adjust_address (op, SImode, 0);
b72f00af 7424 hi_half[num] = adjust_address (op, SImode, 4);
2a2ab3f9
JVA
7425 }
7426 else
b932f770 7427 {
38ca929b
JH
7428 lo_half[num] = simplify_gen_subreg (SImode, op,
7429 GET_MODE (op) == VOIDmode
7430 ? DImode : GET_MODE (op), 0);
7431 hi_half[num] = simplify_gen_subreg (SImode, op,
7432 GET_MODE (op) == VOIDmode
7433 ? DImode : GET_MODE (op), 4);
b932f770 7434 }
2a2ab3f9
JVA
7435 }
7436}
44cf5b6a
JH
7437/* Split one or more TImode RTL references into pairs of SImode
7438 references. The RTL can be REG, offsettable MEM, integer constant, or
7439 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7440 split and "num" is its length. lo_half and hi_half are output arrays
7441 that parallel "operands". */
7442
7443void
7444split_ti (operands, num, lo_half, hi_half)
7445 rtx operands[];
7446 int num;
7447 rtx lo_half[], hi_half[];
7448{
7449 while (num--)
7450 {
7451 rtx op = operands[num];
b932f770
JH
7452
7453 /* simplify_subreg refuse to split volatile memory addresses, but we
7454 still have to handle it. */
7455 if (GET_CODE (op) == MEM)
44cf5b6a
JH
7456 {
7457 lo_half[num] = adjust_address (op, DImode, 0);
7458 hi_half[num] = adjust_address (op, DImode, 8);
7459 }
7460 else
b932f770
JH
7461 {
7462 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7463 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7464 }
44cf5b6a
JH
7465 }
7466}
2a2ab3f9 7467\f
2a2ab3f9
JVA
7468/* Output code to perform a 387 binary operation in INSN, one of PLUS,
7469 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7470 is the expression of the binary operation. The output may either be
7471 emitted here, or returned to the caller, like all output_* functions.
7472
7473 There is no guarantee that the operands are the same mode, as they
0f290768 7474 might be within FLOAT or FLOAT_EXTEND expressions. */
2a2ab3f9 7475
e3c2afab
AM
7476#ifndef SYSV386_COMPAT
7477/* Set to 1 for compatibility with brain-damaged assemblers. No-one
7478 wants to fix the assemblers because that causes incompatibility
7479 with gcc. No-one wants to fix gcc because that causes
7480 incompatibility with assemblers... You can use the option of
7481 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7482#define SYSV386_COMPAT 1
7483#endif
7484
69ddee61 7485const char *
2a2ab3f9
JVA
7486output_387_binary_op (insn, operands)
7487 rtx insn;
7488 rtx *operands;
7489{
e3c2afab 7490 static char buf[30];
69ddee61 7491 const char *p;
1deaa899
JH
7492 const char *ssep;
7493 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
2a2ab3f9 7494
e3c2afab
AM
7495#ifdef ENABLE_CHECKING
7496 /* Even if we do not want to check the inputs, this documents input
7497 constraints. Which helps in understanding the following code. */
7498 if (STACK_REG_P (operands[0])
7499 && ((REG_P (operands[1])
7500 && REGNO (operands[0]) == REGNO (operands[1])
7501 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7502 || (REG_P (operands[2])
7503 && REGNO (operands[0]) == REGNO (operands[2])
7504 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7505 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7506 ; /* ok */
1deaa899 7507 else if (!is_sse)
e3c2afab
AM
7508 abort ();
7509#endif
7510
2a2ab3f9
JVA
7511 switch (GET_CODE (operands[3]))
7512 {
7513 case PLUS:
e075ae69
RH
7514 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7515 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7516 p = "fiadd";
7517 else
7518 p = "fadd";
1deaa899 7519 ssep = "add";
2a2ab3f9
JVA
7520 break;
7521
7522 case MINUS:
e075ae69
RH
7523 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7524 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7525 p = "fisub";
7526 else
7527 p = "fsub";
1deaa899 7528 ssep = "sub";
2a2ab3f9
JVA
7529 break;
7530
7531 case MULT:
e075ae69
RH
7532 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7533 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7534 p = "fimul";
7535 else
7536 p = "fmul";
1deaa899 7537 ssep = "mul";
2a2ab3f9
JVA
7538 break;
7539
7540 case DIV:
e075ae69
RH
7541 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7542 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7543 p = "fidiv";
7544 else
7545 p = "fdiv";
1deaa899 7546 ssep = "div";
2a2ab3f9
JVA
7547 break;
7548
7549 default:
7550 abort ();
7551 }
7552
1deaa899
JH
7553 if (is_sse)
7554 {
7555 strcpy (buf, ssep);
7556 if (GET_MODE (operands[0]) == SFmode)
7557 strcat (buf, "ss\t{%2, %0|%0, %2}");
7558 else
7559 strcat (buf, "sd\t{%2, %0|%0, %2}");
7560 return buf;
7561 }
e075ae69 7562 strcpy (buf, p);
2a2ab3f9
JVA
7563
7564 switch (GET_CODE (operands[3]))
7565 {
7566 case MULT:
7567 case PLUS:
7568 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7569 {
e3c2afab 7570 rtx temp = operands[2];
2a2ab3f9
JVA
7571 operands[2] = operands[1];
7572 operands[1] = temp;
7573 }
7574
e3c2afab
AM
7575 /* know operands[0] == operands[1]. */
7576
2a2ab3f9 7577 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
7578 {
7579 p = "%z2\t%2";
7580 break;
7581 }
2a2ab3f9
JVA
7582
7583 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63
JL
7584 {
7585 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
7586 /* How is it that we are storing to a dead operand[2]?
7587 Well, presumably operands[1] is dead too. We can't
7588 store the result to st(0) as st(0) gets popped on this
7589 instruction. Instead store to operands[2] (which I
7590 think has to be st(1)). st(1) will be popped later.
7591 gcc <= 2.8.1 didn't have this check and generated
7592 assembly code that the Unixware assembler rejected. */
7593 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 7594 else
e3c2afab 7595 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
e075ae69 7596 break;
6b28fd63 7597 }
2a2ab3f9
JVA
7598
7599 if (STACK_TOP_P (operands[0]))
e3c2afab 7600 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 7601 else
e3c2afab 7602 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
e075ae69 7603 break;
2a2ab3f9
JVA
7604
7605 case MINUS:
7606 case DIV:
7607 if (GET_CODE (operands[1]) == MEM)
e075ae69
RH
7608 {
7609 p = "r%z1\t%1";
7610 break;
7611 }
2a2ab3f9
JVA
7612
7613 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
7614 {
7615 p = "%z2\t%2";
7616 break;
7617 }
2a2ab3f9 7618
2a2ab3f9 7619 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63 7620 {
e3c2afab
AM
7621#if SYSV386_COMPAT
7622 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7623 derived assemblers, confusingly reverse the direction of
7624 the operation for fsub{r} and fdiv{r} when the
7625 destination register is not st(0). The Intel assembler
7626 doesn't have this brain damage. Read !SYSV386_COMPAT to
7627 figure out what the hardware really does. */
7628 if (STACK_TOP_P (operands[0]))
7629 p = "{p\t%0, %2|rp\t%2, %0}";
7630 else
7631 p = "{rp\t%2, %0|p\t%0, %2}";
7632#else
6b28fd63 7633 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
7634 /* As above for fmul/fadd, we can't store to st(0). */
7635 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 7636 else
e3c2afab
AM
7637 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7638#endif
e075ae69 7639 break;
6b28fd63 7640 }
2a2ab3f9
JVA
7641
7642 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6b28fd63 7643 {
e3c2afab 7644#if SYSV386_COMPAT
6b28fd63 7645 if (STACK_TOP_P (operands[0]))
e3c2afab 7646 p = "{rp\t%0, %1|p\t%1, %0}";
6b28fd63 7647 else
e3c2afab
AM
7648 p = "{p\t%1, %0|rp\t%0, %1}";
7649#else
7650 if (STACK_TOP_P (operands[0]))
7651 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7652 else
7653 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7654#endif
e075ae69 7655 break;
6b28fd63 7656 }
2a2ab3f9
JVA
7657
7658 if (STACK_TOP_P (operands[0]))
7659 {
7660 if (STACK_TOP_P (operands[1]))
e3c2afab 7661 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 7662 else
e3c2afab 7663 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
e075ae69 7664 break;
2a2ab3f9
JVA
7665 }
7666 else if (STACK_TOP_P (operands[1]))
e3c2afab
AM
7667 {
7668#if SYSV386_COMPAT
7669 p = "{\t%1, %0|r\t%0, %1}";
7670#else
7671 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7672#endif
7673 }
2a2ab3f9 7674 else
e3c2afab
AM
7675 {
7676#if SYSV386_COMPAT
7677 p = "{r\t%2, %0|\t%0, %2}";
7678#else
7679 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7680#endif
7681 }
e075ae69 7682 break;
2a2ab3f9
JVA
7683
7684 default:
7685 abort ();
7686 }
e075ae69
RH
7687
7688 strcat (buf, p);
7689 return buf;
2a2ab3f9 7690}
e075ae69 7691
a4f31c00 7692/* Output code to initialize control word copies used by
7a2e09f4
JH
7693 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7694 is set to control word rounding downwards. */
7695void
7696emit_i387_cw_initialization (normal, round_down)
7697 rtx normal, round_down;
7698{
7699 rtx reg = gen_reg_rtx (HImode);
7700
7701 emit_insn (gen_x86_fnstcw_1 (normal));
7702 emit_move_insn (reg, normal);
7703 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7704 && !TARGET_64BIT)
7705 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7706 else
7707 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7708 emit_move_insn (round_down, reg);
7709}
7710
2a2ab3f9 7711/* Output code for INSN to convert a float to a signed int. OPERANDS
46d21d2c 7712 are the insn operands. The output may be [HSD]Imode and the input
e075ae69 7713 operand may be [SDX]Fmode. */
2a2ab3f9 7714
69ddee61 7715const char *
2a2ab3f9
JVA
7716output_fix_trunc (insn, operands)
7717 rtx insn;
7718 rtx *operands;
7719{
7720 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
e075ae69 7721 int dimode_p = GET_MODE (operands[0]) == DImode;
2a2ab3f9 7722
e075ae69
RH
7723 /* Jump through a hoop or two for DImode, since the hardware has no
7724 non-popping instruction. We used to do this a different way, but
7725 that was somewhat fragile and broke with post-reload splitters. */
a05924f9
JH
7726 if (dimode_p && !stack_top_dies)
7727 output_asm_insn ("fld\t%y1", operands);
e075ae69 7728
7a2e09f4 7729 if (!STACK_TOP_P (operands[1]))
10195bd8
JW
7730 abort ();
7731
e075ae69 7732 if (GET_CODE (operands[0]) != MEM)
7a2e09f4 7733 abort ();
e9a25f70 7734
7a2e09f4 7735 output_asm_insn ("fldcw\t%3", operands);
e075ae69 7736 if (stack_top_dies || dimode_p)
7a2e09f4 7737 output_asm_insn ("fistp%z0\t%0", operands);
10195bd8 7738 else
7a2e09f4 7739 output_asm_insn ("fist%z0\t%0", operands);
e075ae69 7740 output_asm_insn ("fldcw\t%2", operands);
10195bd8 7741
e075ae69 7742 return "";
2a2ab3f9 7743}
cda749b1 7744
e075ae69
RH
7745/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7746 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7747 when fucom should be used. */
7748
69ddee61 7749const char *
e075ae69 7750output_fp_compare (insn, operands, eflags_p, unordered_p)
cda749b1
JW
7751 rtx insn;
7752 rtx *operands;
e075ae69 7753 int eflags_p, unordered_p;
cda749b1 7754{
e075ae69
RH
7755 int stack_top_dies;
7756 rtx cmp_op0 = operands[0];
7757 rtx cmp_op1 = operands[1];
0644b628 7758 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
e075ae69
RH
7759
7760 if (eflags_p == 2)
7761 {
7762 cmp_op0 = cmp_op1;
7763 cmp_op1 = operands[2];
7764 }
0644b628
JH
7765 if (is_sse)
7766 {
7767 if (GET_MODE (operands[0]) == SFmode)
7768 if (unordered_p)
7769 return "ucomiss\t{%1, %0|%0, %1}";
7770 else
7771 return "comiss\t{%1, %0|%0, %y}";
7772 else
7773 if (unordered_p)
7774 return "ucomisd\t{%1, %0|%0, %1}";
7775 else
7776 return "comisd\t{%1, %0|%0, %y}";
7777 }
cda749b1 7778
e075ae69 7779 if (! STACK_TOP_P (cmp_op0))
cda749b1
JW
7780 abort ();
7781
e075ae69 7782 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
cda749b1 7783
e075ae69
RH
7784 if (STACK_REG_P (cmp_op1)
7785 && stack_top_dies
7786 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7787 && REGNO (cmp_op1) != FIRST_STACK_REG)
cda749b1 7788 {
e075ae69
RH
7789 /* If both the top of the 387 stack dies, and the other operand
7790 is also a stack register that dies, then this must be a
7791 `fcompp' float compare */
7792
7793 if (eflags_p == 1)
7794 {
7795 /* There is no double popping fcomi variant. Fortunately,
7796 eflags is immune from the fstp's cc clobbering. */
7797 if (unordered_p)
7798 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7799 else
7800 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7801 return "fstp\t%y0";
7802 }
7803 else
cda749b1 7804 {
e075ae69
RH
7805 if (eflags_p == 2)
7806 {
7807 if (unordered_p)
7808 return "fucompp\n\tfnstsw\t%0";
7809 else
7810 return "fcompp\n\tfnstsw\t%0";
7811 }
cda749b1
JW
7812 else
7813 {
e075ae69
RH
7814 if (unordered_p)
7815 return "fucompp";
7816 else
7817 return "fcompp";
cda749b1
JW
7818 }
7819 }
cda749b1
JW
7820 }
7821 else
7822 {
e075ae69 7823 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
cda749b1 7824
0f290768 7825 static const char * const alt[24] =
e075ae69
RH
7826 {
7827 "fcom%z1\t%y1",
7828 "fcomp%z1\t%y1",
7829 "fucom%z1\t%y1",
7830 "fucomp%z1\t%y1",
0f290768 7831
e075ae69
RH
7832 "ficom%z1\t%y1",
7833 "ficomp%z1\t%y1",
7834 NULL,
7835 NULL,
7836
7837 "fcomi\t{%y1, %0|%0, %y1}",
7838 "fcomip\t{%y1, %0|%0, %y1}",
7839 "fucomi\t{%y1, %0|%0, %y1}",
7840 "fucomip\t{%y1, %0|%0, %y1}",
7841
7842 NULL,
7843 NULL,
7844 NULL,
7845 NULL,
7846
7847 "fcom%z2\t%y2\n\tfnstsw\t%0",
7848 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7849 "fucom%z2\t%y2\n\tfnstsw\t%0",
7850 "fucomp%z2\t%y2\n\tfnstsw\t%0",
0f290768 7851
e075ae69
RH
7852 "ficom%z2\t%y2\n\tfnstsw\t%0",
7853 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7854 NULL,
7855 NULL
7856 };
7857
7858 int mask;
69ddee61 7859 const char *ret;
e075ae69
RH
7860
7861 mask = eflags_p << 3;
7862 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7863 mask |= unordered_p << 1;
7864 mask |= stack_top_dies;
7865
7866 if (mask >= 24)
7867 abort ();
7868 ret = alt[mask];
7869 if (ret == NULL)
7870 abort ();
cda749b1 7871
e075ae69 7872 return ret;
cda749b1
JW
7873 }
7874}
2a2ab3f9 7875
f88c65f7
RH
7876void
7877ix86_output_addr_vec_elt (file, value)
7878 FILE *file;
7879 int value;
7880{
7881 const char *directive = ASM_LONG;
7882
7883 if (TARGET_64BIT)
7884 {
7885#ifdef ASM_QUAD
7886 directive = ASM_QUAD;
7887#else
7888 abort ();
7889#endif
7890 }
7891
7892 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7893}
7894
7895void
7896ix86_output_addr_diff_elt (file, value, rel)
7897 FILE *file;
7898 int value, rel;
7899{
7900 if (TARGET_64BIT)
74411039 7901 fprintf (file, "%s%s%d-%s%d\n",
f88c65f7
RH
7902 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7903 else if (HAVE_AS_GOTOFF_IN_DATA)
7904 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
b069de3b
SS
7905#if TARGET_MACHO
7906 else if (TARGET_MACHO)
7907 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
7908 machopic_function_base_name () + 1);
7909#endif
f88c65f7 7910 else
5fc0e5df
KW
7911 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7912 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
f88c65f7 7913}
32b5b1aa 7914\f
a8bac9ab
RH
7915/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7916 for the target. */
7917
7918void
7919ix86_expand_clear (dest)
7920 rtx dest;
7921{
7922 rtx tmp;
7923
7924 /* We play register width games, which are only valid after reload. */
7925 if (!reload_completed)
7926 abort ();
7927
7928 /* Avoid HImode and its attendant prefix byte. */
7929 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7930 dest = gen_rtx_REG (SImode, REGNO (dest));
7931
7932 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7933
7934 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7935 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7936 {
7937 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7938 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7939 }
7940
7941 emit_insn (tmp);
7942}
7943
f996902d
RH
7944/* X is an unchanging MEM. If it is a constant pool reference, return
7945 the constant pool rtx, else NULL. */
7946
7947static rtx
7948maybe_get_pool_constant (x)
7949 rtx x;
7950{
7951 x = XEXP (x, 0);
7952
75d38379 7953 if (flag_pic && ! TARGET_64BIT)
f996902d
RH
7954 {
7955 if (GET_CODE (x) != PLUS)
7956 return NULL_RTX;
7957 if (XEXP (x, 0) != pic_offset_table_rtx)
7958 return NULL_RTX;
7959 x = XEXP (x, 1);
7960 if (GET_CODE (x) != CONST)
7961 return NULL_RTX;
7962 x = XEXP (x, 0);
7963 if (GET_CODE (x) != UNSPEC)
7964 return NULL_RTX;
7965 if (XINT (x, 1) != UNSPEC_GOTOFF)
7966 return NULL_RTX;
7967 x = XVECEXP (x, 0, 0);
7968 }
7969
7970 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7971 return get_pool_constant (x);
7972
7973 return NULL_RTX;
7974}
7975
79325812 7976void
e075ae69
RH
7977ix86_expand_move (mode, operands)
7978 enum machine_mode mode;
7979 rtx operands[];
32b5b1aa 7980{
e075ae69 7981 int strict = (reload_in_progress || reload_completed);
f996902d
RH
7982 rtx insn, op0, op1, tmp;
7983
7984 op0 = operands[0];
7985 op1 = operands[1];
7986
f996902d
RH
7987 if (tls_symbolic_operand (op1, Pmode))
7988 {
7989 op1 = legitimize_address (op1, op1, VOIDmode);
7990 if (GET_CODE (op0) == MEM)
7991 {
7992 tmp = gen_reg_rtx (mode);
7993 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
7994 op1 = tmp;
7995 }
7996 }
7997 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7998 {
b069de3b
SS
7999#if TARGET_MACHO
8000 if (MACHOPIC_PURE)
8001 {
8002 rtx temp = ((reload_in_progress
8003 || ((op0 && GET_CODE (op0) == REG)
8004 && mode == Pmode))
8005 ? op0 : gen_reg_rtx (Pmode));
8006 op1 = machopic_indirect_data_reference (op1, temp);
8007 op1 = machopic_legitimize_pic_address (op1, mode,
8008 temp == op1 ? 0 : temp);
8009 }
8010 else
8011 {
8012 if (MACHOPIC_INDIRECT)
8013 op1 = machopic_indirect_data_reference (op1, 0);
8014 }
8015 if (op0 != op1)
8016 {
8017 insn = gen_rtx_SET (VOIDmode, op0, op1);
8018 emit_insn (insn);
8019 }
8020 return;
8021#endif /* TARGET_MACHO */
f996902d
RH
8022 if (GET_CODE (op0) == MEM)
8023 op1 = force_reg (Pmode, op1);
e075ae69 8024 else
32b5b1aa 8025 {
f996902d 8026 rtx temp = op0;
e075ae69
RH
8027 if (GET_CODE (temp) != REG)
8028 temp = gen_reg_rtx (Pmode);
f996902d
RH
8029 temp = legitimize_pic_address (op1, temp);
8030 if (temp == op0)
e075ae69 8031 return;
f996902d 8032 op1 = temp;
32b5b1aa 8033 }
e075ae69
RH
8034 }
8035 else
8036 {
f996902d 8037 if (GET_CODE (op0) == MEM
44cf5b6a 8038 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
f996902d
RH
8039 || !push_operand (op0, mode))
8040 && GET_CODE (op1) == MEM)
8041 op1 = force_reg (mode, op1);
e9a25f70 8042
f996902d
RH
8043 if (push_operand (op0, mode)
8044 && ! general_no_elim_operand (op1, mode))
8045 op1 = copy_to_mode_reg (mode, op1);
2c5a510c 8046
44cf5b6a
JH
8047 /* Force large constants in 64bit compilation into register
8048 to get them CSEed. */
8049 if (TARGET_64BIT && mode == DImode
f996902d
RH
8050 && immediate_operand (op1, mode)
8051 && !x86_64_zero_extended_value (op1)
8052 && !register_operand (op0, mode)
44cf5b6a 8053 && optimize && !reload_completed && !reload_in_progress)
f996902d 8054 op1 = copy_to_mode_reg (mode, op1);
44cf5b6a 8055
e075ae69 8056 if (FLOAT_MODE_P (mode))
32b5b1aa 8057 {
d7a29404
JH
8058 /* If we are loading a floating point constant to a register,
8059 force the value to memory now, since we'll get better code
8060 out the back end. */
e075ae69
RH
8061
8062 if (strict)
8063 ;
f996902d
RH
8064 else if (GET_CODE (op1) == CONST_DOUBLE
8065 && register_operand (op0, mode))
8066 op1 = validize_mem (force_const_mem (mode, op1));
32b5b1aa 8067 }
32b5b1aa 8068 }
e9a25f70 8069
f996902d 8070 insn = gen_rtx_SET (VOIDmode, op0, op1);
e9a25f70 8071
e075ae69
RH
8072 emit_insn (insn);
8073}
e9a25f70 8074
e37af218
RH
8075void
8076ix86_expand_vector_move (mode, operands)
8077 enum machine_mode mode;
8078 rtx operands[];
8079{
8080 /* Force constants other than zero into memory. We do not know how
8081 the instructions used to build constants modify the upper 64 bits
8082 of the register, once we have that information we may be able
8083 to handle some of them more efficiently. */
8084 if ((reload_in_progress | reload_completed) == 0
8085 && register_operand (operands[0], mode)
8086 && CONSTANT_P (operands[1]))
f8ca7923 8087 operands[1] = force_const_mem (mode, operands[1]);
e37af218
RH
8088
8089 /* Make operand1 a register if it isn't already. */
f8ca7923 8090 if (!no_new_pseudos
e37af218 8091 && !register_operand (operands[0], mode)
b105d6da 8092 && !register_operand (operands[1], mode))
e37af218 8093 {
59bef189 8094 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
e37af218
RH
8095 emit_move_insn (operands[0], temp);
8096 return;
8097 }
8098
8099 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
fce5a9f2 8100}
e37af218 8101
e075ae69
RH
8102/* Attempt to expand a binary operator. Make the expansion closer to the
8103 actual machine, then just general_operand, which will allow 3 separate
9d81fc27 8104 memory references (one output, two input) in a single insn. */
e9a25f70 8105
e075ae69
RH
8106void
8107ix86_expand_binary_operator (code, mode, operands)
8108 enum rtx_code code;
8109 enum machine_mode mode;
8110 rtx operands[];
8111{
8112 int matching_memory;
8113 rtx src1, src2, dst, op, clob;
8114
8115 dst = operands[0];
8116 src1 = operands[1];
8117 src2 = operands[2];
8118
8119 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8120 if (GET_RTX_CLASS (code) == 'c'
8121 && (rtx_equal_p (dst, src2)
8122 || immediate_operand (src1, mode)))
8123 {
8124 rtx temp = src1;
8125 src1 = src2;
8126 src2 = temp;
32b5b1aa 8127 }
e9a25f70 8128
e075ae69
RH
8129 /* If the destination is memory, and we do not have matching source
8130 operands, do things in registers. */
8131 matching_memory = 0;
8132 if (GET_CODE (dst) == MEM)
32b5b1aa 8133 {
e075ae69
RH
8134 if (rtx_equal_p (dst, src1))
8135 matching_memory = 1;
8136 else if (GET_RTX_CLASS (code) == 'c'
8137 && rtx_equal_p (dst, src2))
8138 matching_memory = 2;
8139 else
8140 dst = gen_reg_rtx (mode);
8141 }
0f290768 8142
e075ae69
RH
8143 /* Both source operands cannot be in memory. */
8144 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8145 {
8146 if (matching_memory != 2)
8147 src2 = force_reg (mode, src2);
8148 else
8149 src1 = force_reg (mode, src1);
32b5b1aa 8150 }
e9a25f70 8151
06a964de
JH
8152 /* If the operation is not commutable, source 1 cannot be a constant
8153 or non-matching memory. */
0f290768 8154 if ((CONSTANT_P (src1)
06a964de
JH
8155 || (!matching_memory && GET_CODE (src1) == MEM))
8156 && GET_RTX_CLASS (code) != 'c')
e075ae69 8157 src1 = force_reg (mode, src1);
0f290768 8158
e075ae69 8159 /* If optimizing, copy to regs to improve CSE */
fe577e58 8160 if (optimize && ! no_new_pseudos)
32b5b1aa 8161 {
e075ae69
RH
8162 if (GET_CODE (dst) == MEM)
8163 dst = gen_reg_rtx (mode);
8164 if (GET_CODE (src1) == MEM)
8165 src1 = force_reg (mode, src1);
8166 if (GET_CODE (src2) == MEM)
8167 src2 = force_reg (mode, src2);
32b5b1aa 8168 }
e9a25f70 8169
e075ae69
RH
8170 /* Emit the instruction. */
8171
8172 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8173 if (reload_in_progress)
8174 {
8175 /* Reload doesn't know about the flags register, and doesn't know that
8176 it doesn't want to clobber it. We can only do this with PLUS. */
8177 if (code != PLUS)
8178 abort ();
8179 emit_insn (op);
8180 }
8181 else
32b5b1aa 8182 {
e075ae69
RH
8183 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8184 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
32b5b1aa 8185 }
e9a25f70 8186
e075ae69
RH
8187 /* Fix up the destination if needed. */
8188 if (dst != operands[0])
8189 emit_move_insn (operands[0], dst);
8190}
8191
8192/* Return TRUE or FALSE depending on whether the binary operator meets the
8193 appropriate constraints. */
8194
8195int
8196ix86_binary_operator_ok (code, mode, operands)
8197 enum rtx_code code;
8198 enum machine_mode mode ATTRIBUTE_UNUSED;
8199 rtx operands[3];
8200{
8201 /* Both source operands cannot be in memory. */
8202 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8203 return 0;
8204 /* If the operation is not commutable, source 1 cannot be a constant. */
8205 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8206 return 0;
8207 /* If the destination is memory, we must have a matching source operand. */
8208 if (GET_CODE (operands[0]) == MEM
8209 && ! (rtx_equal_p (operands[0], operands[1])
8210 || (GET_RTX_CLASS (code) == 'c'
8211 && rtx_equal_p (operands[0], operands[2]))))
8212 return 0;
06a964de 8213 /* If the operation is not commutable and the source 1 is memory, we must
d6a7951f 8214 have a matching destination. */
06a964de
JH
8215 if (GET_CODE (operands[1]) == MEM
8216 && GET_RTX_CLASS (code) != 'c'
8217 && ! rtx_equal_p (operands[0], operands[1]))
8218 return 0;
e075ae69
RH
8219 return 1;
8220}
8221
8222/* Attempt to expand a unary operator. Make the expansion closer to the
8223 actual machine, then just general_operand, which will allow 2 separate
9d81fc27 8224 memory references (one output, one input) in a single insn. */
e075ae69 8225
9d81fc27 8226void
e075ae69
RH
8227ix86_expand_unary_operator (code, mode, operands)
8228 enum rtx_code code;
8229 enum machine_mode mode;
8230 rtx operands[];
8231{
06a964de
JH
8232 int matching_memory;
8233 rtx src, dst, op, clob;
8234
8235 dst = operands[0];
8236 src = operands[1];
e075ae69 8237
06a964de
JH
8238 /* If the destination is memory, and we do not have matching source
8239 operands, do things in registers. */
8240 matching_memory = 0;
8241 if (GET_CODE (dst) == MEM)
32b5b1aa 8242 {
06a964de
JH
8243 if (rtx_equal_p (dst, src))
8244 matching_memory = 1;
e075ae69 8245 else
06a964de 8246 dst = gen_reg_rtx (mode);
32b5b1aa 8247 }
e9a25f70 8248
06a964de
JH
8249 /* When source operand is memory, destination must match. */
8250 if (!matching_memory && GET_CODE (src) == MEM)
8251 src = force_reg (mode, src);
0f290768 8252
06a964de 8253 /* If optimizing, copy to regs to improve CSE */
fe577e58 8254 if (optimize && ! no_new_pseudos)
06a964de
JH
8255 {
8256 if (GET_CODE (dst) == MEM)
8257 dst = gen_reg_rtx (mode);
8258 if (GET_CODE (src) == MEM)
8259 src = force_reg (mode, src);
8260 }
8261
8262 /* Emit the instruction. */
8263
8264 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8265 if (reload_in_progress || code == NOT)
8266 {
8267 /* Reload doesn't know about the flags register, and doesn't know that
8268 it doesn't want to clobber it. */
8269 if (code != NOT)
8270 abort ();
8271 emit_insn (op);
8272 }
8273 else
8274 {
8275 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8276 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8277 }
8278
8279 /* Fix up the destination if needed. */
8280 if (dst != operands[0])
8281 emit_move_insn (operands[0], dst);
e075ae69
RH
8282}
8283
8284/* Return TRUE or FALSE depending on whether the unary operator meets the
8285 appropriate constraints. */
8286
8287int
8288ix86_unary_operator_ok (code, mode, operands)
8289 enum rtx_code code ATTRIBUTE_UNUSED;
8290 enum machine_mode mode ATTRIBUTE_UNUSED;
8291 rtx operands[2] ATTRIBUTE_UNUSED;
8292{
06a964de
JH
8293 /* If one of operands is memory, source and destination must match. */
8294 if ((GET_CODE (operands[0]) == MEM
8295 || GET_CODE (operands[1]) == MEM)
8296 && ! rtx_equal_p (operands[0], operands[1]))
8297 return FALSE;
e075ae69
RH
8298 return TRUE;
8299}
8300
16189740
RH
8301/* Return TRUE or FALSE depending on whether the first SET in INSN
8302 has source and destination with matching CC modes, and that the
8303 CC mode is at least as constrained as REQ_MODE. */
8304
8305int
8306ix86_match_ccmode (insn, req_mode)
8307 rtx insn;
8308 enum machine_mode req_mode;
8309{
8310 rtx set;
8311 enum machine_mode set_mode;
8312
8313 set = PATTERN (insn);
8314 if (GET_CODE (set) == PARALLEL)
8315 set = XVECEXP (set, 0, 0);
8316 if (GET_CODE (set) != SET)
8317 abort ();
9076b9c1
JH
8318 if (GET_CODE (SET_SRC (set)) != COMPARE)
8319 abort ();
16189740
RH
8320
8321 set_mode = GET_MODE (SET_DEST (set));
8322 switch (set_mode)
8323 {
9076b9c1
JH
8324 case CCNOmode:
8325 if (req_mode != CCNOmode
8326 && (req_mode != CCmode
8327 || XEXP (SET_SRC (set), 1) != const0_rtx))
8328 return 0;
8329 break;
16189740 8330 case CCmode:
9076b9c1 8331 if (req_mode == CCGCmode)
16189740
RH
8332 return 0;
8333 /* FALLTHRU */
9076b9c1
JH
8334 case CCGCmode:
8335 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8336 return 0;
8337 /* FALLTHRU */
8338 case CCGOCmode:
16189740
RH
8339 if (req_mode == CCZmode)
8340 return 0;
8341 /* FALLTHRU */
8342 case CCZmode:
8343 break;
8344
8345 default:
8346 abort ();
8347 }
8348
8349 return (GET_MODE (SET_SRC (set)) == set_mode);
8350}
8351
e075ae69
RH
8352/* Generate insn patterns to do an integer compare of OPERANDS. */
8353
8354static rtx
8355ix86_expand_int_compare (code, op0, op1)
8356 enum rtx_code code;
8357 rtx op0, op1;
8358{
8359 enum machine_mode cmpmode;
8360 rtx tmp, flags;
8361
8362 cmpmode = SELECT_CC_MODE (code, op0, op1);
8363 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8364
8365 /* This is very simple, but making the interface the same as in the
8366 FP case makes the rest of the code easier. */
8367 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8368 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8369
8370 /* Return the test that should be put into the flags user, i.e.
8371 the bcc, scc, or cmov instruction. */
8372 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8373}
8374
3a3677ff
RH
8375/* Figure out whether to use ordered or unordered fp comparisons.
8376 Return the appropriate mode to use. */
e075ae69 8377
b1cdafbb 8378enum machine_mode
3a3677ff 8379ix86_fp_compare_mode (code)
8752c357 8380 enum rtx_code code ATTRIBUTE_UNUSED;
e075ae69 8381{
9e7adcb3
JH
8382 /* ??? In order to make all comparisons reversible, we do all comparisons
8383 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8384 all forms trapping and nontrapping comparisons, we can make inequality
8385 comparisons trapping again, since it results in better code when using
8386 FCOM based compares. */
8387 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
3a3677ff
RH
8388}
8389
9076b9c1
JH
8390enum machine_mode
8391ix86_cc_mode (code, op0, op1)
8392 enum rtx_code code;
8393 rtx op0, op1;
8394{
8395 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8396 return ix86_fp_compare_mode (code);
8397 switch (code)
8398 {
8399 /* Only zero flag is needed. */
8400 case EQ: /* ZF=0 */
8401 case NE: /* ZF!=0 */
8402 return CCZmode;
8403 /* Codes needing carry flag. */
265dab10
JH
8404 case GEU: /* CF=0 */
8405 case GTU: /* CF=0 & ZF=0 */
7e08e190
JH
8406 case LTU: /* CF=1 */
8407 case LEU: /* CF=1 | ZF=1 */
265dab10 8408 return CCmode;
9076b9c1
JH
8409 /* Codes possibly doable only with sign flag when
8410 comparing against zero. */
8411 case GE: /* SF=OF or SF=0 */
7e08e190 8412 case LT: /* SF<>OF or SF=1 */
9076b9c1
JH
8413 if (op1 == const0_rtx)
8414 return CCGOCmode;
8415 else
8416 /* For other cases Carry flag is not required. */
8417 return CCGCmode;
8418 /* Codes doable only with sign flag when comparing
8419 against zero, but we miss jump instruction for it
8420 so we need to use relational tests agains overflow
8421 that thus needs to be zero. */
8422 case GT: /* ZF=0 & SF=OF */
8423 case LE: /* ZF=1 | SF<>OF */
8424 if (op1 == const0_rtx)
8425 return CCNOmode;
8426 else
8427 return CCGCmode;
7fcd7218
JH
8428 /* strcmp pattern do (use flags) and combine may ask us for proper
8429 mode. */
8430 case USE:
8431 return CCmode;
9076b9c1 8432 default:
0f290768 8433 abort ();
9076b9c1
JH
8434 }
8435}
8436
3a3677ff
RH
8437/* Return true if we should use an FCOMI instruction for this fp comparison. */
8438
a940d8bd 8439int
3a3677ff 8440ix86_use_fcomi_compare (code)
9e7adcb3 8441 enum rtx_code code ATTRIBUTE_UNUSED;
3a3677ff 8442{
9e7adcb3
JH
8443 enum rtx_code swapped_code = swap_condition (code);
8444 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8445 || (ix86_fp_comparison_cost (swapped_code)
8446 == ix86_fp_comparison_fcomi_cost (swapped_code)));
3a3677ff
RH
8447}
8448
0f290768 8449/* Swap, force into registers, or otherwise massage the two operands
3a3677ff
RH
8450 to a fp comparison. The operands are updated in place; the new
8451 comparsion code is returned. */
8452
8453static enum rtx_code
8454ix86_prepare_fp_compare_args (code, pop0, pop1)
8455 enum rtx_code code;
8456 rtx *pop0, *pop1;
8457{
8458 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8459 rtx op0 = *pop0, op1 = *pop1;
8460 enum machine_mode op_mode = GET_MODE (op0);
0644b628 8461 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
3a3677ff 8462
e075ae69 8463 /* All of the unordered compare instructions only work on registers.
3a3677ff
RH
8464 The same is true of the XFmode compare instructions. The same is
8465 true of the fcomi compare instructions. */
8466
0644b628
JH
8467 if (!is_sse
8468 && (fpcmp_mode == CCFPUmode
8469 || op_mode == XFmode
8470 || op_mode == TFmode
8471 || ix86_use_fcomi_compare (code)))
e075ae69 8472 {
3a3677ff
RH
8473 op0 = force_reg (op_mode, op0);
8474 op1 = force_reg (op_mode, op1);
e075ae69
RH
8475 }
8476 else
8477 {
8478 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8479 things around if they appear profitable, otherwise force op0
8480 into a register. */
8481
8482 if (standard_80387_constant_p (op0) == 0
8483 || (GET_CODE (op0) == MEM
8484 && ! (standard_80387_constant_p (op1) == 0
8485 || GET_CODE (op1) == MEM)))
32b5b1aa 8486 {
e075ae69
RH
8487 rtx tmp;
8488 tmp = op0, op0 = op1, op1 = tmp;
8489 code = swap_condition (code);
8490 }
8491
8492 if (GET_CODE (op0) != REG)
3a3677ff 8493 op0 = force_reg (op_mode, op0);
e075ae69
RH
8494
8495 if (CONSTANT_P (op1))
8496 {
8497 if (standard_80387_constant_p (op1))
3a3677ff 8498 op1 = force_reg (op_mode, op1);
e075ae69 8499 else
3a3677ff 8500 op1 = validize_mem (force_const_mem (op_mode, op1));
32b5b1aa
SC
8501 }
8502 }
e9a25f70 8503
9e7adcb3
JH
8504 /* Try to rearrange the comparison to make it cheaper. */
8505 if (ix86_fp_comparison_cost (code)
8506 > ix86_fp_comparison_cost (swap_condition (code))
558740bf 8507 && (GET_CODE (op1) == REG || !no_new_pseudos))
9e7adcb3
JH
8508 {
8509 rtx tmp;
8510 tmp = op0, op0 = op1, op1 = tmp;
8511 code = swap_condition (code);
8512 if (GET_CODE (op0) != REG)
8513 op0 = force_reg (op_mode, op0);
8514 }
8515
3a3677ff
RH
8516 *pop0 = op0;
8517 *pop1 = op1;
8518 return code;
8519}
8520
c0c102a9
JH
8521/* Convert comparison codes we use to represent FP comparison to integer
8522 code that will result in proper branch. Return UNKNOWN if no such code
8523 is available. */
8524static enum rtx_code
8525ix86_fp_compare_code_to_integer (code)
8526 enum rtx_code code;
8527{
8528 switch (code)
8529 {
8530 case GT:
8531 return GTU;
8532 case GE:
8533 return GEU;
8534 case ORDERED:
8535 case UNORDERED:
8536 return code;
8537 break;
8538 case UNEQ:
8539 return EQ;
8540 break;
8541 case UNLT:
8542 return LTU;
8543 break;
8544 case UNLE:
8545 return LEU;
8546 break;
8547 case LTGT:
8548 return NE;
8549 break;
8550 default:
8551 return UNKNOWN;
8552 }
8553}
8554
8555/* Split comparison code CODE into comparisons we can do using branch
8556 instructions. BYPASS_CODE is comparison code for branch that will
8557 branch around FIRST_CODE and SECOND_CODE. If some of branches
8558 is not required, set value to NIL.
8559 We never require more than two branches. */
8560static void
8561ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8562 enum rtx_code code, *bypass_code, *first_code, *second_code;
8563{
8564 *first_code = code;
8565 *bypass_code = NIL;
8566 *second_code = NIL;
8567
8568 /* The fcomi comparison sets flags as follows:
8569
8570 cmp ZF PF CF
8571 > 0 0 0
8572 < 0 0 1
8573 = 1 0 0
8574 un 1 1 1 */
8575
8576 switch (code)
8577 {
8578 case GT: /* GTU - CF=0 & ZF=0 */
8579 case GE: /* GEU - CF=0 */
8580 case ORDERED: /* PF=0 */
8581 case UNORDERED: /* PF=1 */
8582 case UNEQ: /* EQ - ZF=1 */
8583 case UNLT: /* LTU - CF=1 */
8584 case UNLE: /* LEU - CF=1 | ZF=1 */
8585 case LTGT: /* EQ - ZF=0 */
8586 break;
8587 case LT: /* LTU - CF=1 - fails on unordered */
8588 *first_code = UNLT;
8589 *bypass_code = UNORDERED;
8590 break;
8591 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8592 *first_code = UNLE;
8593 *bypass_code = UNORDERED;
8594 break;
8595 case EQ: /* EQ - ZF=1 - fails on unordered */
8596 *first_code = UNEQ;
8597 *bypass_code = UNORDERED;
8598 break;
8599 case NE: /* NE - ZF=0 - fails on unordered */
8600 *first_code = LTGT;
8601 *second_code = UNORDERED;
8602 break;
8603 case UNGE: /* GEU - CF=0 - fails on unordered */
8604 *first_code = GE;
8605 *second_code = UNORDERED;
8606 break;
8607 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8608 *first_code = GT;
8609 *second_code = UNORDERED;
8610 break;
8611 default:
8612 abort ();
8613 }
8614 if (!TARGET_IEEE_FP)
8615 {
8616 *second_code = NIL;
8617 *bypass_code = NIL;
8618 }
8619}
8620
9e7adcb3
JH
8621/* Return cost of comparison done fcom + arithmetics operations on AX.
8622 All following functions do use number of instructions as an cost metrics.
8623 In future this should be tweaked to compute bytes for optimize_size and
8624 take into account performance of various instructions on various CPUs. */
8625static int
8626ix86_fp_comparison_arithmetics_cost (code)
8627 enum rtx_code code;
8628{
8629 if (!TARGET_IEEE_FP)
8630 return 4;
8631 /* The cost of code output by ix86_expand_fp_compare. */
8632 switch (code)
8633 {
8634 case UNLE:
8635 case UNLT:
8636 case LTGT:
8637 case GT:
8638 case GE:
8639 case UNORDERED:
8640 case ORDERED:
8641 case UNEQ:
8642 return 4;
8643 break;
8644 case LT:
8645 case NE:
8646 case EQ:
8647 case UNGE:
8648 return 5;
8649 break;
8650 case LE:
8651 case UNGT:
8652 return 6;
8653 break;
8654 default:
8655 abort ();
8656 }
8657}
8658
8659/* Return cost of comparison done using fcomi operation.
8660 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8661static int
8662ix86_fp_comparison_fcomi_cost (code)
8663 enum rtx_code code;
8664{
8665 enum rtx_code bypass_code, first_code, second_code;
8666 /* Return arbitarily high cost when instruction is not supported - this
8667 prevents gcc from using it. */
8668 if (!TARGET_CMOVE)
8669 return 1024;
8670 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8671 return (bypass_code != NIL || second_code != NIL) + 2;
8672}
8673
8674/* Return cost of comparison done using sahf operation.
8675 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8676static int
8677ix86_fp_comparison_sahf_cost (code)
8678 enum rtx_code code;
8679{
8680 enum rtx_code bypass_code, first_code, second_code;
8681 /* Return arbitarily high cost when instruction is not preferred - this
8682 avoids gcc from using it. */
8683 if (!TARGET_USE_SAHF && !optimize_size)
8684 return 1024;
8685 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8686 return (bypass_code != NIL || second_code != NIL) + 3;
8687}
8688
8689/* Compute cost of the comparison done using any method.
8690 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8691static int
8692ix86_fp_comparison_cost (code)
8693 enum rtx_code code;
8694{
8695 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8696 int min;
8697
8698 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8699 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8700
8701 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8702 if (min > sahf_cost)
8703 min = sahf_cost;
8704 if (min > fcomi_cost)
8705 min = fcomi_cost;
8706 return min;
8707}
c0c102a9 8708
3a3677ff
RH
8709/* Generate insn patterns to do a floating point compare of OPERANDS. */
8710
9e7adcb3
JH
8711static rtx
8712ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
3a3677ff
RH
8713 enum rtx_code code;
8714 rtx op0, op1, scratch;
9e7adcb3
JH
8715 rtx *second_test;
8716 rtx *bypass_test;
3a3677ff
RH
8717{
8718 enum machine_mode fpcmp_mode, intcmp_mode;
c0c102a9 8719 rtx tmp, tmp2;
9e7adcb3 8720 int cost = ix86_fp_comparison_cost (code);
c0c102a9 8721 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
8722
8723 fpcmp_mode = ix86_fp_compare_mode (code);
8724 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8725
9e7adcb3
JH
8726 if (second_test)
8727 *second_test = NULL_RTX;
8728 if (bypass_test)
8729 *bypass_test = NULL_RTX;
8730
c0c102a9
JH
8731 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8732
9e7adcb3
JH
8733 /* Do fcomi/sahf based test when profitable. */
8734 if ((bypass_code == NIL || bypass_test)
8735 && (second_code == NIL || second_test)
8736 && ix86_fp_comparison_arithmetics_cost (code) > cost)
32b5b1aa 8737 {
c0c102a9
JH
8738 if (TARGET_CMOVE)
8739 {
8740 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8741 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8742 tmp);
8743 emit_insn (tmp);
8744 }
8745 else
8746 {
8747 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 8748 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
8749 if (!scratch)
8750 scratch = gen_reg_rtx (HImode);
c0c102a9
JH
8751 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8752 emit_insn (gen_x86_sahf_1 (scratch));
8753 }
e075ae69
RH
8754
8755 /* The FP codes work out to act like unsigned. */
9a915772 8756 intcmp_mode = fpcmp_mode;
9e7adcb3
JH
8757 code = first_code;
8758 if (bypass_code != NIL)
8759 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8760 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8761 const0_rtx);
8762 if (second_code != NIL)
8763 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8764 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8765 const0_rtx);
e075ae69
RH
8766 }
8767 else
8768 {
8769 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
e075ae69 8770 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 8771 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
8772 if (!scratch)
8773 scratch = gen_reg_rtx (HImode);
3a3677ff 8774 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
e075ae69 8775
9a915772
JH
8776 /* In the unordered case, we have to check C2 for NaN's, which
8777 doesn't happen to work out to anything nice combination-wise.
8778 So do some bit twiddling on the value we've got in AH to come
8779 up with an appropriate set of condition codes. */
e075ae69 8780
9a915772
JH
8781 intcmp_mode = CCNOmode;
8782 switch (code)
32b5b1aa 8783 {
9a915772
JH
8784 case GT:
8785 case UNGT:
8786 if (code == GT || !TARGET_IEEE_FP)
32b5b1aa 8787 {
3a3677ff 8788 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
e075ae69 8789 code = EQ;
9a915772
JH
8790 }
8791 else
8792 {
8793 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8794 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8795 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8796 intcmp_mode = CCmode;
8797 code = GEU;
8798 }
8799 break;
8800 case LT:
8801 case UNLT:
8802 if (code == LT && TARGET_IEEE_FP)
8803 {
3a3677ff
RH
8804 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8805 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
e075ae69
RH
8806 intcmp_mode = CCmode;
8807 code = EQ;
9a915772
JH
8808 }
8809 else
8810 {
8811 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8812 code = NE;
8813 }
8814 break;
8815 case GE:
8816 case UNGE:
8817 if (code == GE || !TARGET_IEEE_FP)
8818 {
3a3677ff 8819 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
e075ae69 8820 code = EQ;
9a915772
JH
8821 }
8822 else
8823 {
8824 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8825 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8826 GEN_INT (0x01)));
8827 code = NE;
8828 }
8829 break;
8830 case LE:
8831 case UNLE:
8832 if (code == LE && TARGET_IEEE_FP)
8833 {
3a3677ff
RH
8834 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8835 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8836 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
8837 intcmp_mode = CCmode;
8838 code = LTU;
9a915772
JH
8839 }
8840 else
8841 {
8842 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8843 code = NE;
8844 }
8845 break;
8846 case EQ:
8847 case UNEQ:
8848 if (code == EQ && TARGET_IEEE_FP)
8849 {
3a3677ff
RH
8850 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8851 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
8852 intcmp_mode = CCmode;
8853 code = EQ;
9a915772
JH
8854 }
8855 else
8856 {
3a3677ff
RH
8857 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8858 code = NE;
8859 break;
9a915772
JH
8860 }
8861 break;
8862 case NE:
8863 case LTGT:
8864 if (code == NE && TARGET_IEEE_FP)
8865 {
3a3677ff 8866 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9a915772
JH
8867 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8868 GEN_INT (0x40)));
3a3677ff 8869 code = NE;
9a915772
JH
8870 }
8871 else
8872 {
3a3677ff
RH
8873 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8874 code = EQ;
32b5b1aa 8875 }
9a915772
JH
8876 break;
8877
8878 case UNORDERED:
8879 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8880 code = NE;
8881 break;
8882 case ORDERED:
8883 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8884 code = EQ;
8885 break;
8886
8887 default:
8888 abort ();
32b5b1aa 8889 }
32b5b1aa 8890 }
e075ae69
RH
8891
8892 /* Return the test that should be put into the flags user, i.e.
8893 the bcc, scc, or cmov instruction. */
8894 return gen_rtx_fmt_ee (code, VOIDmode,
8895 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8896 const0_rtx);
8897}
8898
9e3e266c 8899rtx
a1b8572c 8900ix86_expand_compare (code, second_test, bypass_test)
e075ae69 8901 enum rtx_code code;
a1b8572c 8902 rtx *second_test, *bypass_test;
e075ae69
RH
8903{
8904 rtx op0, op1, ret;
8905 op0 = ix86_compare_op0;
8906 op1 = ix86_compare_op1;
8907
a1b8572c
JH
8908 if (second_test)
8909 *second_test = NULL_RTX;
8910 if (bypass_test)
8911 *bypass_test = NULL_RTX;
8912
e075ae69 8913 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
bf71a4f8 8914 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
77ebd435 8915 second_test, bypass_test);
32b5b1aa 8916 else
e075ae69
RH
8917 ret = ix86_expand_int_compare (code, op0, op1);
8918
8919 return ret;
8920}
8921
03598dea
JH
8922/* Return true if the CODE will result in nontrivial jump sequence. */
8923bool
8924ix86_fp_jump_nontrivial_p (code)
8925 enum rtx_code code;
8926{
8927 enum rtx_code bypass_code, first_code, second_code;
8928 if (!TARGET_CMOVE)
8929 return true;
8930 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8931 return bypass_code != NIL || second_code != NIL;
8932}
8933
e075ae69 8934void
3a3677ff 8935ix86_expand_branch (code, label)
e075ae69 8936 enum rtx_code code;
e075ae69
RH
8937 rtx label;
8938{
3a3677ff 8939 rtx tmp;
e075ae69 8940
3a3677ff 8941 switch (GET_MODE (ix86_compare_op0))
32b5b1aa 8942 {
3a3677ff
RH
8943 case QImode:
8944 case HImode:
8945 case SImode:
0d7d98ee 8946 simple:
a1b8572c 8947 tmp = ix86_expand_compare (code, NULL, NULL);
e075ae69
RH
8948 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8949 gen_rtx_LABEL_REF (VOIDmode, label),
8950 pc_rtx);
8951 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32b5b1aa 8952 return;
e075ae69 8953
3a3677ff
RH
8954 case SFmode:
8955 case DFmode:
0f290768 8956 case XFmode:
2b589241 8957 case TFmode:
3a3677ff
RH
8958 {
8959 rtvec vec;
8960 int use_fcomi;
03598dea 8961 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
8962
8963 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8964 &ix86_compare_op1);
fce5a9f2 8965
03598dea
JH
8966 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8967
8968 /* Check whether we will use the natural sequence with one jump. If
8969 so, we can expand jump early. Otherwise delay expansion by
8970 creating compound insn to not confuse optimizers. */
8971 if (bypass_code == NIL && second_code == NIL
8972 && TARGET_CMOVE)
8973 {
8974 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8975 gen_rtx_LABEL_REF (VOIDmode, label),
8976 pc_rtx, NULL_RTX);
8977 }
8978 else
8979 {
8980 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8981 ix86_compare_op0, ix86_compare_op1);
8982 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8983 gen_rtx_LABEL_REF (VOIDmode, label),
8984 pc_rtx);
8985 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8986
8987 use_fcomi = ix86_use_fcomi_compare (code);
8988 vec = rtvec_alloc (3 + !use_fcomi);
8989 RTVEC_ELT (vec, 0) = tmp;
8990 RTVEC_ELT (vec, 1)
8991 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8992 RTVEC_ELT (vec, 2)
8993 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8994 if (! use_fcomi)
8995 RTVEC_ELT (vec, 3)
8996 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8997
8998 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8999 }
3a3677ff
RH
9000 return;
9001 }
32b5b1aa 9002
3a3677ff 9003 case DImode:
0d7d98ee
JH
9004 if (TARGET_64BIT)
9005 goto simple;
3a3677ff
RH
9006 /* Expand DImode branch into multiple compare+branch. */
9007 {
9008 rtx lo[2], hi[2], label2;
9009 enum rtx_code code1, code2, code3;
32b5b1aa 9010
3a3677ff
RH
9011 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9012 {
9013 tmp = ix86_compare_op0;
9014 ix86_compare_op0 = ix86_compare_op1;
9015 ix86_compare_op1 = tmp;
9016 code = swap_condition (code);
9017 }
9018 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9019 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
32b5b1aa 9020
3a3677ff
RH
9021 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9022 avoid two branches. This costs one extra insn, so disable when
9023 optimizing for size. */
32b5b1aa 9024
3a3677ff
RH
9025 if ((code == EQ || code == NE)
9026 && (!optimize_size
9027 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9028 {
9029 rtx xor0, xor1;
32b5b1aa 9030
3a3677ff
RH
9031 xor1 = hi[0];
9032 if (hi[1] != const0_rtx)
9033 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9034 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 9035
3a3677ff
RH
9036 xor0 = lo[0];
9037 if (lo[1] != const0_rtx)
9038 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9039 NULL_RTX, 0, OPTAB_WIDEN);
e075ae69 9040
3a3677ff
RH
9041 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9042 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 9043
3a3677ff
RH
9044 ix86_compare_op0 = tmp;
9045 ix86_compare_op1 = const0_rtx;
9046 ix86_expand_branch (code, label);
9047 return;
9048 }
e075ae69 9049
1f9124e4
JJ
9050 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9051 op1 is a constant and the low word is zero, then we can just
9052 examine the high word. */
32b5b1aa 9053
1f9124e4
JJ
9054 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9055 switch (code)
9056 {
9057 case LT: case LTU: case GE: case GEU:
9058 ix86_compare_op0 = hi[0];
9059 ix86_compare_op1 = hi[1];
9060 ix86_expand_branch (code, label);
9061 return;
9062 default:
9063 break;
9064 }
e075ae69 9065
3a3677ff 9066 /* Otherwise, we need two or three jumps. */
e075ae69 9067
3a3677ff 9068 label2 = gen_label_rtx ();
e075ae69 9069
3a3677ff
RH
9070 code1 = code;
9071 code2 = swap_condition (code);
9072 code3 = unsigned_condition (code);
e075ae69 9073
3a3677ff
RH
9074 switch (code)
9075 {
9076 case LT: case GT: case LTU: case GTU:
9077 break;
e075ae69 9078
3a3677ff
RH
9079 case LE: code1 = LT; code2 = GT; break;
9080 case GE: code1 = GT; code2 = LT; break;
9081 case LEU: code1 = LTU; code2 = GTU; break;
9082 case GEU: code1 = GTU; code2 = LTU; break;
e075ae69 9083
3a3677ff
RH
9084 case EQ: code1 = NIL; code2 = NE; break;
9085 case NE: code2 = NIL; break;
e075ae69 9086
3a3677ff
RH
9087 default:
9088 abort ();
9089 }
e075ae69 9090
3a3677ff
RH
9091 /*
9092 * a < b =>
9093 * if (hi(a) < hi(b)) goto true;
9094 * if (hi(a) > hi(b)) goto false;
9095 * if (lo(a) < lo(b)) goto true;
9096 * false:
9097 */
9098
9099 ix86_compare_op0 = hi[0];
9100 ix86_compare_op1 = hi[1];
9101
9102 if (code1 != NIL)
9103 ix86_expand_branch (code1, label);
9104 if (code2 != NIL)
9105 ix86_expand_branch (code2, label2);
9106
9107 ix86_compare_op0 = lo[0];
9108 ix86_compare_op1 = lo[1];
9109 ix86_expand_branch (code3, label);
9110
9111 if (code2 != NIL)
9112 emit_label (label2);
9113 return;
9114 }
e075ae69 9115
3a3677ff
RH
9116 default:
9117 abort ();
9118 }
32b5b1aa 9119}
e075ae69 9120
9e7adcb3
JH
9121/* Split branch based on floating point condition. */
9122void
03598dea
JH
9123ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
9124 enum rtx_code code;
9125 rtx op1, op2, target1, target2, tmp;
9e7adcb3
JH
9126{
9127 rtx second, bypass;
9128 rtx label = NULL_RTX;
03598dea 9129 rtx condition;
6b24c259
JH
9130 int bypass_probability = -1, second_probability = -1, probability = -1;
9131 rtx i;
9e7adcb3
JH
9132
9133 if (target2 != pc_rtx)
9134 {
9135 rtx tmp = target2;
9136 code = reverse_condition_maybe_unordered (code);
9137 target2 = target1;
9138 target1 = tmp;
9139 }
9140
9141 condition = ix86_expand_fp_compare (code, op1, op2,
9142 tmp, &second, &bypass);
6b24c259
JH
9143
9144 if (split_branch_probability >= 0)
9145 {
9146 /* Distribute the probabilities across the jumps.
9147 Assume the BYPASS and SECOND to be always test
9148 for UNORDERED. */
9149 probability = split_branch_probability;
9150
d6a7951f 9151 /* Value of 1 is low enough to make no need for probability
6b24c259
JH
9152 to be updated. Later we may run some experiments and see
9153 if unordered values are more frequent in practice. */
9154 if (bypass)
9155 bypass_probability = 1;
9156 if (second)
9157 second_probability = 1;
9158 }
9e7adcb3
JH
9159 if (bypass != NULL_RTX)
9160 {
9161 label = gen_label_rtx ();
6b24c259
JH
9162 i = emit_jump_insn (gen_rtx_SET
9163 (VOIDmode, pc_rtx,
9164 gen_rtx_IF_THEN_ELSE (VOIDmode,
9165 bypass,
9166 gen_rtx_LABEL_REF (VOIDmode,
9167 label),
9168 pc_rtx)));
9169 if (bypass_probability >= 0)
9170 REG_NOTES (i)
9171 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9172 GEN_INT (bypass_probability),
9173 REG_NOTES (i));
9174 }
9175 i = emit_jump_insn (gen_rtx_SET
9e7adcb3
JH
9176 (VOIDmode, pc_rtx,
9177 gen_rtx_IF_THEN_ELSE (VOIDmode,
6b24c259
JH
9178 condition, target1, target2)));
9179 if (probability >= 0)
9180 REG_NOTES (i)
9181 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9182 GEN_INT (probability),
9183 REG_NOTES (i));
9184 if (second != NULL_RTX)
9e7adcb3 9185 {
6b24c259
JH
9186 i = emit_jump_insn (gen_rtx_SET
9187 (VOIDmode, pc_rtx,
9188 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9189 target2)));
9190 if (second_probability >= 0)
9191 REG_NOTES (i)
9192 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9193 GEN_INT (second_probability),
9194 REG_NOTES (i));
9e7adcb3 9195 }
9e7adcb3
JH
9196 if (label != NULL_RTX)
9197 emit_label (label);
9198}
9199
32b5b1aa 9200int
3a3677ff 9201ix86_expand_setcc (code, dest)
e075ae69 9202 enum rtx_code code;
e075ae69 9203 rtx dest;
32b5b1aa 9204{
a1b8572c
JH
9205 rtx ret, tmp, tmpreg;
9206 rtx second_test, bypass_test;
e075ae69 9207
885a70fd
JH
9208 if (GET_MODE (ix86_compare_op0) == DImode
9209 && !TARGET_64BIT)
e075ae69
RH
9210 return 0; /* FAIL */
9211
b932f770
JH
9212 if (GET_MODE (dest) != QImode)
9213 abort ();
e075ae69 9214
a1b8572c 9215 ret = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
9216 PUT_MODE (ret, QImode);
9217
9218 tmp = dest;
a1b8572c 9219 tmpreg = dest;
32b5b1aa 9220
e075ae69 9221 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
a1b8572c
JH
9222 if (bypass_test || second_test)
9223 {
9224 rtx test = second_test;
9225 int bypass = 0;
9226 rtx tmp2 = gen_reg_rtx (QImode);
9227 if (bypass_test)
9228 {
9229 if (second_test)
b531087a 9230 abort ();
a1b8572c
JH
9231 test = bypass_test;
9232 bypass = 1;
9233 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9234 }
9235 PUT_MODE (test, QImode);
9236 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9237
9238 if (bypass)
9239 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9240 else
9241 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9242 }
e075ae69 9243
e075ae69 9244 return 1; /* DONE */
32b5b1aa 9245}
e075ae69 9246
4977bab6
ZW
9247/* Expand comparison setting or clearing carry flag. Return true when sucesfull
9248 and set pop for the operation. */
9249bool
9250ix86_expand_carry_flag_compare (code, op0, op1, pop)
9251 rtx op0, op1, *pop;
9252 enum rtx_code code;
9253{
9254 enum machine_mode mode =
9255 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9256
9257 /* Do not handle DImode compares that go trought special path. Also we can't
9258 deal with FP compares yet. This is possible to add. */
9259 if ((mode == DImode && !TARGET_64BIT) || !INTEGRAL_MODE_P (mode))
9260 return false;
9261 switch (code)
9262 {
9263 case LTU:
9264 case GEU:
9265 break;
9266
9267 /* Convert a==0 into (unsigned)a<1. */
9268 case EQ:
9269 case NE:
9270 if (op1 != const0_rtx)
9271 return false;
9272 op1 = const1_rtx;
9273 code = (code == EQ ? LTU : GEU);
9274 break;
9275
9276 /* Convert a>b into b<a or a>=b-1. */
9277 case GTU:
9278 case LEU:
9279 if (GET_CODE (op1) == CONST_INT)
9280 {
9281 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9282 /* Bail out on overflow. We still can swap operands but that
9283 would force loading of the constant into register. */
9284 if (op1 == const0_rtx
9285 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9286 return false;
9287 code = (code == GTU ? GEU : LTU);
9288 }
9289 else
9290 {
9291 rtx tmp = op1;
9292 op1 = op0;
9293 op0 = tmp;
9294 code = (code == GTU ? LTU : GEU);
9295 }
9296 break;
9297
9298 /* Convert a>0 into (unsigned)a<0x7fffffff. */
9299 case LT:
9300 case GE:
9301 if (mode == DImode || op1 != const0_rtx)
9302 return false;
9303 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9304 code = (code == LT ? GEU : LTU);
9305 break;
9306 case LE:
9307 case GT:
9308 if (mode == DImode || op1 != constm1_rtx)
9309 return false;
9310 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9311 code = (code == LE ? GEU : LTU);
9312 break;
9313
9314 default:
9315 return false;
9316 }
9317 ix86_compare_op0 = op0;
9318 ix86_compare_op1 = op1;
9319 *pop = ix86_expand_compare (code, NULL, NULL);
9320 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9321 abort ();
9322 return true;
9323}
9324
32b5b1aa 9325int
e075ae69
RH
9326ix86_expand_int_movcc (operands)
9327 rtx operands[];
32b5b1aa 9328{
e075ae69
RH
9329 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9330 rtx compare_seq, compare_op;
a1b8572c 9331 rtx second_test, bypass_test;
635559ab 9332 enum machine_mode mode = GET_MODE (operands[0]);
4977bab6 9333 bool sign_bit_compare_p = false;;
3a3677ff 9334
e075ae69 9335 start_sequence ();
a1b8572c 9336 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
2f937369 9337 compare_seq = get_insns ();
e075ae69
RH
9338 end_sequence ();
9339
9340 compare_code = GET_CODE (compare_op);
9341
4977bab6
ZW
9342 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9343 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9344 sign_bit_compare_p = true;
9345
e075ae69
RH
9346 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9347 HImode insns, we'd be swallowed in word prefix ops. */
9348
4977bab6 9349 if ((mode != HImode || TARGET_FAST_PREFIX)
635559ab 9350 && (mode != DImode || TARGET_64BIT)
0f290768 9351 && GET_CODE (operands[2]) == CONST_INT
e075ae69
RH
9352 && GET_CODE (operands[3]) == CONST_INT)
9353 {
9354 rtx out = operands[0];
9355 HOST_WIDE_INT ct = INTVAL (operands[2]);
9356 HOST_WIDE_INT cf = INTVAL (operands[3]);
9357 HOST_WIDE_INT diff;
9358
4977bab6
ZW
9359 diff = ct - cf;
9360 /* Sign bit compares are better done using shifts than we do by using
9361 sbb. */
9362 if (sign_bit_compare_p
9363 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9364 ix86_compare_op1, &compare_op))
e075ae69 9365 {
e075ae69
RH
9366 /* Detect overlap between destination and compare sources. */
9367 rtx tmp = out;
9368
4977bab6 9369 if (!sign_bit_compare_p)
36583fea 9370 {
4977bab6
ZW
9371 compare_code = GET_CODE (compare_op);
9372
9373 /* To simplify rest of code, restrict to the GEU case. */
9374 if (compare_code == LTU)
9375 {
9376 HOST_WIDE_INT tmp = ct;
9377 ct = cf;
9378 cf = tmp;
9379 compare_code = reverse_condition (compare_code);
9380 code = reverse_condition (code);
9381 }
9382 diff = ct - cf;
36583fea 9383
4977bab6
ZW
9384 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9385 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9386 tmp = gen_reg_rtx (mode);
e075ae69 9387
4977bab6
ZW
9388 if (mode == DImode)
9389 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
9390 else
9391 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp)));
9392 }
14f73b5a 9393 else
4977bab6
ZW
9394 {
9395 if (code == GT || code == GE)
9396 code = reverse_condition (code);
9397 else
9398 {
9399 HOST_WIDE_INT tmp = ct;
9400 ct = cf;
9401 cf = tmp;
9402 }
9403 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9404 ix86_compare_op1, VOIDmode, 0, -1);
9405 }
e075ae69 9406
36583fea
JH
9407 if (diff == 1)
9408 {
9409 /*
9410 * cmpl op0,op1
9411 * sbbl dest,dest
9412 * [addl dest, ct]
9413 *
9414 * Size 5 - 8.
9415 */
9416 if (ct)
635559ab
JH
9417 tmp = expand_simple_binop (mode, PLUS,
9418 tmp, GEN_INT (ct),
4977bab6 9419 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
9420 }
9421 else if (cf == -1)
9422 {
9423 /*
9424 * cmpl op0,op1
9425 * sbbl dest,dest
9426 * orl $ct, dest
9427 *
9428 * Size 8.
9429 */
635559ab
JH
9430 tmp = expand_simple_binop (mode, IOR,
9431 tmp, GEN_INT (ct),
4977bab6 9432 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
9433 }
9434 else if (diff == -1 && ct)
9435 {
9436 /*
9437 * cmpl op0,op1
9438 * sbbl dest,dest
06ec023f 9439 * notl dest
36583fea
JH
9440 * [addl dest, cf]
9441 *
9442 * Size 8 - 11.
9443 */
4977bab6 9444 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
635559ab
JH
9445 if (cf)
9446 tmp = expand_simple_binop (mode, PLUS,
4977bab6
ZW
9447 copy_rtx (tmp), GEN_INT (cf),
9448 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
9449 }
9450 else
9451 {
9452 /*
9453 * cmpl op0,op1
9454 * sbbl dest,dest
06ec023f 9455 * [notl dest]
36583fea
JH
9456 * andl cf - ct, dest
9457 * [addl dest, ct]
9458 *
9459 * Size 8 - 11.
9460 */
06ec023f
RB
9461
9462 if (cf == 0)
9463 {
9464 cf = ct;
9465 ct = 0;
4977bab6 9466 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
06ec023f
RB
9467 }
9468
635559ab 9469 tmp = expand_simple_binop (mode, AND,
4977bab6 9470 copy_rtx (tmp),
d8bf17f9 9471 gen_int_mode (cf - ct, mode),
4977bab6 9472 copy_rtx (tmp), 1, OPTAB_DIRECT);
635559ab
JH
9473 if (ct)
9474 tmp = expand_simple_binop (mode, PLUS,
4977bab6
ZW
9475 copy_rtx (tmp), GEN_INT (ct),
9476 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea 9477 }
e075ae69 9478
4977bab6
ZW
9479 if (!rtx_equal_p (tmp, out))
9480 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
e075ae69
RH
9481
9482 return 1; /* DONE */
9483 }
9484
e075ae69
RH
9485 if (diff < 0)
9486 {
9487 HOST_WIDE_INT tmp;
9488 tmp = ct, ct = cf, cf = tmp;
9489 diff = -diff;
734dba19
JH
9490 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9491 {
9492 /* We may be reversing unordered compare to normal compare, that
9493 is not valid in general (we may convert non-trapping condition
9494 to trapping one), however on i386 we currently emit all
9495 comparisons unordered. */
9496 compare_code = reverse_condition_maybe_unordered (compare_code);
9497 code = reverse_condition_maybe_unordered (code);
9498 }
9499 else
9500 {
9501 compare_code = reverse_condition (compare_code);
9502 code = reverse_condition (code);
9503 }
e075ae69 9504 }
0f2a3457
JJ
9505
9506 compare_code = NIL;
9507 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9508 && GET_CODE (ix86_compare_op1) == CONST_INT)
9509 {
9510 if (ix86_compare_op1 == const0_rtx
9511 && (code == LT || code == GE))
9512 compare_code = code;
9513 else if (ix86_compare_op1 == constm1_rtx)
9514 {
9515 if (code == LE)
9516 compare_code = LT;
9517 else if (code == GT)
9518 compare_code = GE;
9519 }
9520 }
9521
9522 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9523 if (compare_code != NIL
9524 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9525 && (cf == -1 || ct == -1))
9526 {
9527 /* If lea code below could be used, only optimize
9528 if it results in a 2 insn sequence. */
9529
9530 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9531 || diff == 3 || diff == 5 || diff == 9)
9532 || (compare_code == LT && ct == -1)
9533 || (compare_code == GE && cf == -1))
9534 {
9535 /*
9536 * notl op1 (if necessary)
9537 * sarl $31, op1
9538 * orl cf, op1
9539 */
9540 if (ct != -1)
9541 {
9542 cf = ct;
9543 ct = -1;
9544 code = reverse_condition (code);
9545 }
9546
9547 out = emit_store_flag (out, code, ix86_compare_op0,
9548 ix86_compare_op1, VOIDmode, 0, -1);
9549
9550 out = expand_simple_binop (mode, IOR,
9551 out, GEN_INT (cf),
9552 out, 1, OPTAB_DIRECT);
9553 if (out != operands[0])
9554 emit_move_insn (operands[0], out);
9555
9556 return 1; /* DONE */
9557 }
9558 }
9559
4977bab6 9560
635559ab
JH
9561 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9562 || diff == 3 || diff == 5 || diff == 9)
4977bab6 9563 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
c05dbe81 9564 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
e075ae69
RH
9565 {
9566 /*
9567 * xorl dest,dest
9568 * cmpl op1,op2
9569 * setcc dest
9570 * lea cf(dest*(ct-cf)),dest
9571 *
9572 * Size 14.
9573 *
9574 * This also catches the degenerate setcc-only case.
9575 */
9576
9577 rtx tmp;
9578 int nops;
9579
9580 out = emit_store_flag (out, code, ix86_compare_op0,
9581 ix86_compare_op1, VOIDmode, 0, 1);
9582
9583 nops = 0;
97f51ac4
RB
9584 /* On x86_64 the lea instruction operates on Pmode, so we need
9585 to get arithmetics done in proper mode to match. */
e075ae69 9586 if (diff == 1)
14f73b5a 9587 tmp = out;
e075ae69
RH
9588 else
9589 {
885a70fd 9590 rtx out1;
14f73b5a 9591 out1 = out;
635559ab 9592 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
e075ae69
RH
9593 nops++;
9594 if (diff & 1)
9595 {
635559ab 9596 tmp = gen_rtx_PLUS (mode, tmp, out1);
e075ae69
RH
9597 nops++;
9598 }
9599 }
9600 if (cf != 0)
9601 {
635559ab 9602 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
e075ae69
RH
9603 nops++;
9604 }
4977bab6 9605 if (!rtx_equal_p (tmp, out))
e075ae69 9606 {
14f73b5a 9607 if (nops == 1)
37c66aa6 9608 out = force_operand (tmp, out);
e075ae69 9609 else
4977bab6 9610 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
e075ae69 9611 }
4977bab6 9612 if (!rtx_equal_p (out, operands[0]))
1985ef90 9613 emit_move_insn (operands[0], copy_rtx (out));
e075ae69
RH
9614
9615 return 1; /* DONE */
9616 }
9617
9618 /*
9619 * General case: Jumpful:
9620 * xorl dest,dest cmpl op1, op2
9621 * cmpl op1, op2 movl ct, dest
9622 * setcc dest jcc 1f
9623 * decl dest movl cf, dest
9624 * andl (cf-ct),dest 1:
9625 * addl ct,dest
0f290768 9626 *
e075ae69
RH
9627 * Size 20. Size 14.
9628 *
9629 * This is reasonably steep, but branch mispredict costs are
9630 * high on modern cpus, so consider failing only if optimizing
9631 * for space.
e075ae69
RH
9632 */
9633
4977bab6
ZW
9634 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9635 && BRANCH_COST >= 2)
e075ae69 9636 {
97f51ac4 9637 if (cf == 0)
e075ae69 9638 {
97f51ac4
RB
9639 cf = ct;
9640 ct = 0;
734dba19 9641 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
0f2a3457
JJ
9642 /* We may be reversing unordered compare to normal compare,
9643 that is not valid in general (we may convert non-trapping
9644 condition to trapping one), however on i386 we currently
9645 emit all comparisons unordered. */
9646 code = reverse_condition_maybe_unordered (code);
9647 else
9648 {
9649 code = reverse_condition (code);
9650 if (compare_code != NIL)
9651 compare_code = reverse_condition (compare_code);
9652 }
9653 }
9654
9655 if (compare_code != NIL)
9656 {
9657 /* notl op1 (if needed)
9658 sarl $31, op1
9659 andl (cf-ct), op1
9660 addl ct, op1
9661
9662 For x < 0 (resp. x <= -1) there will be no notl,
9663 so if possible swap the constants to get rid of the
9664 complement.
9665 True/false will be -1/0 while code below (store flag
9666 followed by decrement) is 0/-1, so the constants need
9667 to be exchanged once more. */
9668
9669 if (compare_code == GE || !cf)
734dba19 9670 {
0f2a3457
JJ
9671 code = reverse_condition (code);
9672 compare_code = LT;
734dba19
JH
9673 }
9674 else
9675 {
0f2a3457
JJ
9676 HOST_WIDE_INT tmp = cf;
9677 cf = ct;
9678 ct = tmp;
734dba19 9679 }
0f2a3457
JJ
9680
9681 out = emit_store_flag (out, code, ix86_compare_op0,
9682 ix86_compare_op1, VOIDmode, 0, -1);
e075ae69 9683 }
0f2a3457
JJ
9684 else
9685 {
9686 out = emit_store_flag (out, code, ix86_compare_op0,
9687 ix86_compare_op1, VOIDmode, 0, 1);
e075ae69 9688
4977bab6
ZW
9689 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
9690 copy_rtx (out), 1, OPTAB_DIRECT);
0f2a3457 9691 }
e075ae69 9692
4977bab6 9693 out = expand_simple_binop (mode, AND, copy_rtx (out),
d8bf17f9 9694 gen_int_mode (cf - ct, mode),
4977bab6 9695 copy_rtx (out), 1, OPTAB_DIRECT);
97f51ac4 9696 if (ct)
4977bab6
ZW
9697 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
9698 copy_rtx (out), 1, OPTAB_DIRECT);
9699 if (!rtx_equal_p (out, operands[0]))
9700 emit_move_insn (operands[0], copy_rtx (out));
e075ae69
RH
9701
9702 return 1; /* DONE */
9703 }
9704 }
9705
4977bab6 9706 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
e075ae69
RH
9707 {
9708 /* Try a few things more with specific constants and a variable. */
9709
78a0d70c 9710 optab op;
e075ae69
RH
9711 rtx var, orig_out, out, tmp;
9712
4977bab6 9713 if (BRANCH_COST <= 2)
e075ae69
RH
9714 return 0; /* FAIL */
9715
0f290768 9716 /* If one of the two operands is an interesting constant, load a
e075ae69 9717 constant with the above and mask it in with a logical operation. */
0f290768 9718
e075ae69
RH
9719 if (GET_CODE (operands[2]) == CONST_INT)
9720 {
9721 var = operands[3];
4977bab6 9722 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
e075ae69 9723 operands[3] = constm1_rtx, op = and_optab;
4977bab6 9724 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
e075ae69 9725 operands[3] = const0_rtx, op = ior_optab;
78a0d70c
ZW
9726 else
9727 return 0; /* FAIL */
e075ae69
RH
9728 }
9729 else if (GET_CODE (operands[3]) == CONST_INT)
9730 {
9731 var = operands[2];
4977bab6 9732 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
e075ae69 9733 operands[2] = constm1_rtx, op = and_optab;
4977bab6 9734 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
e075ae69 9735 operands[2] = const0_rtx, op = ior_optab;
78a0d70c
ZW
9736 else
9737 return 0; /* FAIL */
e075ae69 9738 }
78a0d70c 9739 else
e075ae69
RH
9740 return 0; /* FAIL */
9741
9742 orig_out = operands[0];
635559ab 9743 tmp = gen_reg_rtx (mode);
e075ae69
RH
9744 operands[0] = tmp;
9745
9746 /* Recurse to get the constant loaded. */
9747 if (ix86_expand_int_movcc (operands) == 0)
9748 return 0; /* FAIL */
9749
9750 /* Mask in the interesting variable. */
635559ab 9751 out = expand_binop (mode, op, var, tmp, orig_out, 0,
e075ae69 9752 OPTAB_WIDEN);
4977bab6
ZW
9753 if (!rtx_equal_p (out, orig_out))
9754 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
e075ae69
RH
9755
9756 return 1; /* DONE */
9757 }
9758
9759 /*
9760 * For comparison with above,
9761 *
9762 * movl cf,dest
9763 * movl ct,tmp
9764 * cmpl op1,op2
9765 * cmovcc tmp,dest
9766 *
9767 * Size 15.
9768 */
9769
635559ab
JH
9770 if (! nonimmediate_operand (operands[2], mode))
9771 operands[2] = force_reg (mode, operands[2]);
9772 if (! nonimmediate_operand (operands[3], mode))
9773 operands[3] = force_reg (mode, operands[3]);
e075ae69 9774
a1b8572c
JH
9775 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9776 {
635559ab 9777 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
9778 emit_move_insn (tmp, operands[3]);
9779 operands[3] = tmp;
9780 }
9781 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9782 {
635559ab 9783 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
9784 emit_move_insn (tmp, operands[2]);
9785 operands[2] = tmp;
9786 }
4977bab6 9787
c9682caf 9788 if (! register_operand (operands[2], VOIDmode)
4977bab6
ZW
9789 && (mode == QImode
9790 || ! register_operand (operands[3], VOIDmode)))
635559ab 9791 operands[2] = force_reg (mode, operands[2]);
a1b8572c 9792
4977bab6
ZW
9793 if (mode == QImode
9794 && ! register_operand (operands[3], VOIDmode))
9795 operands[3] = force_reg (mode, operands[3]);
9796
e075ae69
RH
9797 emit_insn (compare_seq);
9798 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 9799 gen_rtx_IF_THEN_ELSE (mode,
e075ae69
RH
9800 compare_op, operands[2],
9801 operands[3])));
a1b8572c 9802 if (bypass_test)
4977bab6 9803 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
635559ab 9804 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c 9805 bypass_test,
4977bab6
ZW
9806 copy_rtx (operands[3]),
9807 copy_rtx (operands[0]))));
a1b8572c 9808 if (second_test)
4977bab6 9809 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
635559ab 9810 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c 9811 second_test,
4977bab6
ZW
9812 copy_rtx (operands[2]),
9813 copy_rtx (operands[0]))));
e075ae69
RH
9814
9815 return 1; /* DONE */
e9a25f70 9816}
e075ae69 9817
32b5b1aa 9818int
e075ae69
RH
9819ix86_expand_fp_movcc (operands)
9820 rtx operands[];
32b5b1aa 9821{
e075ae69 9822 enum rtx_code code;
e075ae69 9823 rtx tmp;
a1b8572c 9824 rtx compare_op, second_test, bypass_test;
32b5b1aa 9825
0073023d
JH
9826 /* For SF/DFmode conditional moves based on comparisons
9827 in same mode, we may want to use SSE min/max instructions. */
965f5423
JH
9828 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9829 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
0073023d 9830 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
fa9f36a1
JH
9831 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9832 && (!TARGET_IEEE_FP
9833 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
0073023d
JH
9834 /* We may be called from the post-reload splitter. */
9835 && (!REG_P (operands[0])
9836 || SSE_REG_P (operands[0])
52a661a6 9837 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
0073023d
JH
9838 {
9839 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9840 code = GET_CODE (operands[1]);
9841
9842 /* See if we have (cross) match between comparison operands and
9843 conditional move operands. */
9844 if (rtx_equal_p (operands[2], op1))
9845 {
9846 rtx tmp = op0;
9847 op0 = op1;
9848 op1 = tmp;
9849 code = reverse_condition_maybe_unordered (code);
9850 }
9851 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9852 {
9853 /* Check for min operation. */
4977bab6 9854 if (code == LT || code == UNLE)
0073023d 9855 {
4977bab6
ZW
9856 if (code == UNLE)
9857 {
9858 rtx tmp = op0;
9859 op0 = op1;
9860 op1 = tmp;
9861 }
0073023d
JH
9862 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9863 if (memory_operand (op0, VOIDmode))
9864 op0 = force_reg (GET_MODE (operands[0]), op0);
9865 if (GET_MODE (operands[0]) == SFmode)
9866 emit_insn (gen_minsf3 (operands[0], op0, op1));
9867 else
9868 emit_insn (gen_mindf3 (operands[0], op0, op1));
9869 return 1;
9870 }
9871 /* Check for max operation. */
4977bab6 9872 if (code == GT || code == UNGE)
0073023d 9873 {
4977bab6
ZW
9874 if (code == UNGE)
9875 {
9876 rtx tmp = op0;
9877 op0 = op1;
9878 op1 = tmp;
9879 }
0073023d
JH
9880 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9881 if (memory_operand (op0, VOIDmode))
9882 op0 = force_reg (GET_MODE (operands[0]), op0);
9883 if (GET_MODE (operands[0]) == SFmode)
9884 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9885 else
9886 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9887 return 1;
9888 }
9889 }
9890 /* Manage condition to be sse_comparison_operator. In case we are
9891 in non-ieee mode, try to canonicalize the destination operand
9892 to be first in the comparison - this helps reload to avoid extra
9893 moves. */
9894 if (!sse_comparison_operator (operands[1], VOIDmode)
9895 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9896 {
9897 rtx tmp = ix86_compare_op0;
9898 ix86_compare_op0 = ix86_compare_op1;
9899 ix86_compare_op1 = tmp;
9900 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9901 VOIDmode, ix86_compare_op0,
9902 ix86_compare_op1);
9903 }
9904 /* Similary try to manage result to be first operand of conditional
fa9f36a1
JH
9905 move. We also don't support the NE comparison on SSE, so try to
9906 avoid it. */
037f20f1
JH
9907 if ((rtx_equal_p (operands[0], operands[3])
9908 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9909 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
0073023d
JH
9910 {
9911 rtx tmp = operands[2];
9912 operands[2] = operands[3];
92d0fb09 9913 operands[3] = tmp;
0073023d
JH
9914 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9915 (GET_CODE (operands[1])),
9916 VOIDmode, ix86_compare_op0,
9917 ix86_compare_op1);
9918 }
9919 if (GET_MODE (operands[0]) == SFmode)
9920 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9921 operands[2], operands[3],
9922 ix86_compare_op0, ix86_compare_op1));
9923 else
9924 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9925 operands[2], operands[3],
9926 ix86_compare_op0, ix86_compare_op1));
9927 return 1;
9928 }
9929
e075ae69 9930 /* The floating point conditional move instructions don't directly
0f290768 9931 support conditions resulting from a signed integer comparison. */
32b5b1aa 9932
e075ae69 9933 code = GET_CODE (operands[1]);
a1b8572c 9934 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9e7adcb3
JH
9935
9936 /* The floating point conditional move instructions don't directly
9937 support signed integer comparisons. */
9938
a1b8572c 9939 if (!fcmov_comparison_operator (compare_op, VOIDmode))
e075ae69 9940 {
a1b8572c 9941 if (second_test != NULL || bypass_test != NULL)
b531087a 9942 abort ();
e075ae69 9943 tmp = gen_reg_rtx (QImode);
3a3677ff 9944 ix86_expand_setcc (code, tmp);
e075ae69
RH
9945 code = NE;
9946 ix86_compare_op0 = tmp;
9947 ix86_compare_op1 = const0_rtx;
a1b8572c
JH
9948 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9949 }
9950 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9951 {
9952 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9953 emit_move_insn (tmp, operands[3]);
9954 operands[3] = tmp;
9955 }
9956 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9957 {
9958 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9959 emit_move_insn (tmp, operands[2]);
9960 operands[2] = tmp;
e075ae69 9961 }
e9a25f70 9962
e075ae69
RH
9963 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9964 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9e7adcb3 9965 compare_op,
e075ae69
RH
9966 operands[2],
9967 operands[3])));
a1b8572c
JH
9968 if (bypass_test)
9969 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9970 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9971 bypass_test,
9972 operands[3],
9973 operands[0])));
9974 if (second_test)
9975 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9976 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9977 second_test,
9978 operands[2],
9979 operands[0])));
32b5b1aa 9980
e075ae69 9981 return 1;
32b5b1aa
SC
9982}
9983
2450a057
JH
9984/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9985 works for floating pointer parameters and nonoffsetable memories.
9986 For pushes, it returns just stack offsets; the values will be saved
9987 in the right order. Maximally three parts are generated. */
9988
2b589241 9989static int
2450a057
JH
9990ix86_split_to_parts (operand, parts, mode)
9991 rtx operand;
9992 rtx *parts;
9993 enum machine_mode mode;
32b5b1aa 9994{
26e5b205
JH
9995 int size;
9996
9997 if (!TARGET_64BIT)
9998 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
9999 else
10000 size = (GET_MODE_SIZE (mode) + 4) / 8;
2450a057 10001
a7180f70
BS
10002 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10003 abort ();
2450a057
JH
10004 if (size < 2 || size > 3)
10005 abort ();
10006
f996902d
RH
10007 /* Optimize constant pool reference to immediates. This is used by fp
10008 moves, that force all constants to memory to allow combining. */
10009 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10010 {
10011 rtx tmp = maybe_get_pool_constant (operand);
10012 if (tmp)
10013 operand = tmp;
10014 }
d7a29404 10015
2450a057 10016 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
e075ae69 10017 {
2450a057
JH
10018 /* The only non-offsetable memories we handle are pushes. */
10019 if (! push_operand (operand, VOIDmode))
10020 abort ();
10021
26e5b205
JH
10022 operand = copy_rtx (operand);
10023 PUT_MODE (operand, Pmode);
2450a057
JH
10024 parts[0] = parts[1] = parts[2] = operand;
10025 }
26e5b205 10026 else if (!TARGET_64BIT)
2450a057
JH
10027 {
10028 if (mode == DImode)
10029 split_di (&operand, 1, &parts[0], &parts[1]);
10030 else
e075ae69 10031 {
2450a057
JH
10032 if (REG_P (operand))
10033 {
10034 if (!reload_completed)
10035 abort ();
10036 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10037 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10038 if (size == 3)
10039 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10040 }
10041 else if (offsettable_memref_p (operand))
10042 {
f4ef873c 10043 operand = adjust_address (operand, SImode, 0);
2450a057 10044 parts[0] = operand;
b72f00af 10045 parts[1] = adjust_address (operand, SImode, 4);
2450a057 10046 if (size == 3)
b72f00af 10047 parts[2] = adjust_address (operand, SImode, 8);
2450a057
JH
10048 }
10049 else if (GET_CODE (operand) == CONST_DOUBLE)
10050 {
10051 REAL_VALUE_TYPE r;
2b589241 10052 long l[4];
2450a057
JH
10053
10054 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10055 switch (mode)
10056 {
10057 case XFmode:
2b589241 10058 case TFmode:
2450a057 10059 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
d8bf17f9 10060 parts[2] = gen_int_mode (l[2], SImode);
2450a057
JH
10061 break;
10062 case DFmode:
10063 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10064 break;
10065 default:
10066 abort ();
10067 }
d8bf17f9
LB
10068 parts[1] = gen_int_mode (l[1], SImode);
10069 parts[0] = gen_int_mode (l[0], SImode);
2450a057
JH
10070 }
10071 else
10072 abort ();
e075ae69 10073 }
2450a057 10074 }
26e5b205
JH
10075 else
10076 {
44cf5b6a
JH
10077 if (mode == TImode)
10078 split_ti (&operand, 1, &parts[0], &parts[1]);
26e5b205
JH
10079 if (mode == XFmode || mode == TFmode)
10080 {
10081 if (REG_P (operand))
10082 {
10083 if (!reload_completed)
10084 abort ();
10085 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10086 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10087 }
10088 else if (offsettable_memref_p (operand))
10089 {
b72f00af 10090 operand = adjust_address (operand, DImode, 0);
26e5b205 10091 parts[0] = operand;
b72f00af 10092 parts[1] = adjust_address (operand, SImode, 8);
26e5b205
JH
10093 }
10094 else if (GET_CODE (operand) == CONST_DOUBLE)
10095 {
10096 REAL_VALUE_TYPE r;
10097 long l[3];
10098
10099 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10100 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10101 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10102 if (HOST_BITS_PER_WIDE_INT >= 64)
523fbd9d 10103 parts[0]
d8bf17f9 10104 = gen_int_mode
44cf5b6a 10105 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
b531087a 10106 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
d8bf17f9 10107 DImode);
26e5b205
JH
10108 else
10109 parts[0] = immed_double_const (l[0], l[1], DImode);
d8bf17f9 10110 parts[1] = gen_int_mode (l[2], SImode);
26e5b205
JH
10111 }
10112 else
10113 abort ();
10114 }
10115 }
2450a057 10116
2b589241 10117 return size;
2450a057
JH
10118}
10119
10120/* Emit insns to perform a move or push of DI, DF, and XF values.
10121 Return false when normal moves are needed; true when all required
10122 insns have been emitted. Operands 2-4 contain the input values
10123 int the correct order; operands 5-7 contain the output values. */
10124
26e5b205
JH
10125void
10126ix86_split_long_move (operands)
10127 rtx operands[];
2450a057
JH
10128{
10129 rtx part[2][3];
26e5b205 10130 int nparts;
2450a057
JH
10131 int push = 0;
10132 int collisions = 0;
26e5b205
JH
10133 enum machine_mode mode = GET_MODE (operands[0]);
10134
10135 /* The DFmode expanders may ask us to move double.
10136 For 64bit target this is single move. By hiding the fact
10137 here we simplify i386.md splitters. */
10138 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10139 {
8cdfa312
RH
10140 /* Optimize constant pool reference to immediates. This is used by
10141 fp moves, that force all constants to memory to allow combining. */
26e5b205
JH
10142
10143 if (GET_CODE (operands[1]) == MEM
10144 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10145 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10146 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10147 if (push_operand (operands[0], VOIDmode))
b47b4f21
JH
10148 {
10149 operands[0] = copy_rtx (operands[0]);
10150 PUT_MODE (operands[0], Pmode);
10151 }
26e5b205
JH
10152 else
10153 operands[0] = gen_lowpart (DImode, operands[0]);
10154 operands[1] = gen_lowpart (DImode, operands[1]);
10155 emit_move_insn (operands[0], operands[1]);
10156 return;
10157 }
2450a057 10158
2450a057
JH
10159 /* The only non-offsettable memory we handle is push. */
10160 if (push_operand (operands[0], VOIDmode))
10161 push = 1;
10162 else if (GET_CODE (operands[0]) == MEM
10163 && ! offsettable_memref_p (operands[0]))
10164 abort ();
10165
26e5b205
JH
10166 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10167 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
2450a057
JH
10168
10169 /* When emitting push, take care for source operands on the stack. */
10170 if (push && GET_CODE (operands[1]) == MEM
10171 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10172 {
26e5b205 10173 if (nparts == 3)
886cbb88
JH
10174 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10175 XEXP (part[1][2], 0));
10176 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10177 XEXP (part[1][1], 0));
2450a057
JH
10178 }
10179
0f290768 10180 /* We need to do copy in the right order in case an address register
2450a057
JH
10181 of the source overlaps the destination. */
10182 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10183 {
10184 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10185 collisions++;
10186 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10187 collisions++;
26e5b205 10188 if (nparts == 3
2450a057
JH
10189 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10190 collisions++;
10191
10192 /* Collision in the middle part can be handled by reordering. */
26e5b205 10193 if (collisions == 1 && nparts == 3
2450a057 10194 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
e075ae69 10195 {
2450a057
JH
10196 rtx tmp;
10197 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10198 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10199 }
e075ae69 10200
2450a057
JH
10201 /* If there are more collisions, we can't handle it by reordering.
10202 Do an lea to the last part and use only one colliding move. */
10203 else if (collisions > 1)
10204 {
10205 collisions = 1;
26e5b205 10206 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
2450a057 10207 XEXP (part[1][0], 0)));
26e5b205
JH
10208 part[1][0] = change_address (part[1][0],
10209 TARGET_64BIT ? DImode : SImode,
10210 part[0][nparts - 1]);
b72f00af 10211 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
26e5b205 10212 if (nparts == 3)
b72f00af 10213 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
2450a057
JH
10214 }
10215 }
10216
10217 if (push)
10218 {
26e5b205 10219 if (!TARGET_64BIT)
2b589241 10220 {
26e5b205
JH
10221 if (nparts == 3)
10222 {
10223 /* We use only first 12 bytes of TFmode value, but for pushing we
10224 are required to adjust stack as if we were pushing real 16byte
10225 value. */
10226 if (mode == TFmode && !TARGET_64BIT)
10227 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10228 GEN_INT (-4)));
10229 emit_move_insn (part[0][2], part[1][2]);
10230 }
2b589241 10231 }
26e5b205
JH
10232 else
10233 {
10234 /* In 64bit mode we don't have 32bit push available. In case this is
10235 register, it is OK - we will just use larger counterpart. We also
10236 retype memory - these comes from attempt to avoid REX prefix on
10237 moving of second half of TFmode value. */
10238 if (GET_MODE (part[1][1]) == SImode)
10239 {
10240 if (GET_CODE (part[1][1]) == MEM)
f4ef873c 10241 part[1][1] = adjust_address (part[1][1], DImode, 0);
26e5b205
JH
10242 else if (REG_P (part[1][1]))
10243 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10244 else
b531087a 10245 abort ();
886cbb88
JH
10246 if (GET_MODE (part[1][0]) == SImode)
10247 part[1][0] = part[1][1];
26e5b205
JH
10248 }
10249 }
10250 emit_move_insn (part[0][1], part[1][1]);
10251 emit_move_insn (part[0][0], part[1][0]);
10252 return;
2450a057
JH
10253 }
10254
10255 /* Choose correct order to not overwrite the source before it is copied. */
10256 if ((REG_P (part[0][0])
10257 && REG_P (part[1][1])
10258 && (REGNO (part[0][0]) == REGNO (part[1][1])
26e5b205 10259 || (nparts == 3
2450a057
JH
10260 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10261 || (collisions > 0
10262 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10263 {
26e5b205 10264 if (nparts == 3)
2450a057 10265 {
26e5b205
JH
10266 operands[2] = part[0][2];
10267 operands[3] = part[0][1];
10268 operands[4] = part[0][0];
10269 operands[5] = part[1][2];
10270 operands[6] = part[1][1];
10271 operands[7] = part[1][0];
2450a057
JH
10272 }
10273 else
10274 {
26e5b205
JH
10275 operands[2] = part[0][1];
10276 operands[3] = part[0][0];
10277 operands[5] = part[1][1];
10278 operands[6] = part[1][0];
2450a057
JH
10279 }
10280 }
10281 else
10282 {
26e5b205 10283 if (nparts == 3)
2450a057 10284 {
26e5b205
JH
10285 operands[2] = part[0][0];
10286 operands[3] = part[0][1];
10287 operands[4] = part[0][2];
10288 operands[5] = part[1][0];
10289 operands[6] = part[1][1];
10290 operands[7] = part[1][2];
2450a057
JH
10291 }
10292 else
10293 {
26e5b205
JH
10294 operands[2] = part[0][0];
10295 operands[3] = part[0][1];
10296 operands[5] = part[1][0];
10297 operands[6] = part[1][1];
e075ae69
RH
10298 }
10299 }
26e5b205
JH
10300 emit_move_insn (operands[2], operands[5]);
10301 emit_move_insn (operands[3], operands[6]);
10302 if (nparts == 3)
10303 emit_move_insn (operands[4], operands[7]);
32b5b1aa 10304
26e5b205 10305 return;
32b5b1aa 10306}
32b5b1aa 10307
e075ae69
RH
10308void
10309ix86_split_ashldi (operands, scratch)
10310 rtx *operands, scratch;
32b5b1aa 10311{
e075ae69
RH
10312 rtx low[2], high[2];
10313 int count;
b985a30f 10314
e075ae69
RH
10315 if (GET_CODE (operands[2]) == CONST_INT)
10316 {
10317 split_di (operands, 2, low, high);
10318 count = INTVAL (operands[2]) & 63;
32b5b1aa 10319
e075ae69
RH
10320 if (count >= 32)
10321 {
10322 emit_move_insn (high[0], low[1]);
10323 emit_move_insn (low[0], const0_rtx);
b985a30f 10324
e075ae69
RH
10325 if (count > 32)
10326 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10327 }
10328 else
10329 {
10330 if (!rtx_equal_p (operands[0], operands[1]))
10331 emit_move_insn (operands[0], operands[1]);
10332 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10333 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10334 }
10335 }
10336 else
10337 {
10338 if (!rtx_equal_p (operands[0], operands[1]))
10339 emit_move_insn (operands[0], operands[1]);
b985a30f 10340
e075ae69 10341 split_di (operands, 1, low, high);
b985a30f 10342
e075ae69
RH
10343 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10344 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
32b5b1aa 10345
fe577e58 10346 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 10347 {
fe577e58 10348 if (! no_new_pseudos)
e075ae69
RH
10349 scratch = force_reg (SImode, const0_rtx);
10350 else
10351 emit_move_insn (scratch, const0_rtx);
10352
10353 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10354 scratch));
10355 }
10356 else
10357 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10358 }
e9a25f70 10359}
32b5b1aa 10360
e075ae69
RH
10361void
10362ix86_split_ashrdi (operands, scratch)
10363 rtx *operands, scratch;
32b5b1aa 10364{
e075ae69
RH
10365 rtx low[2], high[2];
10366 int count;
32b5b1aa 10367
e075ae69
RH
10368 if (GET_CODE (operands[2]) == CONST_INT)
10369 {
10370 split_di (operands, 2, low, high);
10371 count = INTVAL (operands[2]) & 63;
32b5b1aa 10372
e075ae69
RH
10373 if (count >= 32)
10374 {
10375 emit_move_insn (low[0], high[1]);
32b5b1aa 10376
e075ae69
RH
10377 if (! reload_completed)
10378 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10379 else
10380 {
10381 emit_move_insn (high[0], low[0]);
10382 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10383 }
10384
10385 if (count > 32)
10386 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10387 }
10388 else
10389 {
10390 if (!rtx_equal_p (operands[0], operands[1]))
10391 emit_move_insn (operands[0], operands[1]);
10392 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10393 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10394 }
10395 }
10396 else
32b5b1aa 10397 {
e075ae69
RH
10398 if (!rtx_equal_p (operands[0], operands[1]))
10399 emit_move_insn (operands[0], operands[1]);
10400
10401 split_di (operands, 1, low, high);
10402
10403 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10404 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10405
fe577e58 10406 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 10407 {
fe577e58 10408 if (! no_new_pseudos)
e075ae69
RH
10409 scratch = gen_reg_rtx (SImode);
10410 emit_move_insn (scratch, high[0]);
10411 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10412 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10413 scratch));
10414 }
10415 else
10416 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
32b5b1aa 10417 }
e075ae69 10418}
32b5b1aa 10419
e075ae69
RH
10420void
10421ix86_split_lshrdi (operands, scratch)
10422 rtx *operands, scratch;
10423{
10424 rtx low[2], high[2];
10425 int count;
32b5b1aa 10426
e075ae69 10427 if (GET_CODE (operands[2]) == CONST_INT)
32b5b1aa 10428 {
e075ae69
RH
10429 split_di (operands, 2, low, high);
10430 count = INTVAL (operands[2]) & 63;
10431
10432 if (count >= 32)
c7271385 10433 {
e075ae69
RH
10434 emit_move_insn (low[0], high[1]);
10435 emit_move_insn (high[0], const0_rtx);
32b5b1aa 10436
e075ae69
RH
10437 if (count > 32)
10438 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10439 }
10440 else
10441 {
10442 if (!rtx_equal_p (operands[0], operands[1]))
10443 emit_move_insn (operands[0], operands[1]);
10444 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10445 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10446 }
32b5b1aa 10447 }
e075ae69
RH
10448 else
10449 {
10450 if (!rtx_equal_p (operands[0], operands[1]))
10451 emit_move_insn (operands[0], operands[1]);
32b5b1aa 10452
e075ae69
RH
10453 split_di (operands, 1, low, high);
10454
10455 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10456 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10457
10458 /* Heh. By reversing the arguments, we can reuse this pattern. */
fe577e58 10459 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 10460 {
fe577e58 10461 if (! no_new_pseudos)
e075ae69
RH
10462 scratch = force_reg (SImode, const0_rtx);
10463 else
10464 emit_move_insn (scratch, const0_rtx);
10465
10466 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10467 scratch));
10468 }
10469 else
10470 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10471 }
32b5b1aa 10472}
3f803cd9 10473
0407c02b 10474/* Helper function for the string operations below. Dest VARIABLE whether
0945b39d
JH
10475 it is aligned to VALUE bytes. If true, jump to the label. */
10476static rtx
10477ix86_expand_aligntest (variable, value)
10478 rtx variable;
10479 int value;
10480{
10481 rtx label = gen_label_rtx ();
10482 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10483 if (GET_MODE (variable) == DImode)
10484 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10485 else
10486 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10487 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
d43e0b7d 10488 1, label);
0945b39d
JH
10489 return label;
10490}
10491
10492/* Adjust COUNTER by the VALUE. */
10493static void
10494ix86_adjust_counter (countreg, value)
10495 rtx countreg;
10496 HOST_WIDE_INT value;
10497{
10498 if (GET_MODE (countreg) == DImode)
10499 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10500 else
10501 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10502}
10503
10504/* Zero extend possibly SImode EXP to Pmode register. */
d24b3457 10505rtx
0945b39d
JH
10506ix86_zero_extend_to_Pmode (exp)
10507 rtx exp;
10508{
10509 rtx r;
10510 if (GET_MODE (exp) == VOIDmode)
10511 return force_reg (Pmode, exp);
10512 if (GET_MODE (exp) == Pmode)
10513 return copy_to_mode_reg (Pmode, exp);
10514 r = gen_reg_rtx (Pmode);
10515 emit_insn (gen_zero_extendsidi2 (r, exp));
10516 return r;
10517}
10518
10519/* Expand string move (memcpy) operation. Use i386 string operations when
10520 profitable. expand_clrstr contains similar code. */
10521int
10522ix86_expand_movstr (dst, src, count_exp, align_exp)
10523 rtx dst, src, count_exp, align_exp;
10524{
10525 rtx srcreg, destreg, countreg;
10526 enum machine_mode counter_mode;
10527 HOST_WIDE_INT align = 0;
10528 unsigned HOST_WIDE_INT count = 0;
10529 rtx insns;
10530
10531 start_sequence ();
10532
10533 if (GET_CODE (align_exp) == CONST_INT)
10534 align = INTVAL (align_exp);
10535
5519a4f9 10536 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
10537 if (!TARGET_ALIGN_STRINGOPS)
10538 align = 64;
10539
10540 if (GET_CODE (count_exp) == CONST_INT)
10541 count = INTVAL (count_exp);
10542
10543 /* Figure out proper mode for counter. For 32bits it is always SImode,
10544 for 64bits use SImode when possible, otherwise DImode.
10545 Set count to number of bytes copied when known at compile time. */
10546 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10547 || x86_64_zero_extended_value (count_exp))
10548 counter_mode = SImode;
10549 else
10550 counter_mode = DImode;
10551
10552 if (counter_mode != SImode && counter_mode != DImode)
10553 abort ();
10554
10555 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10556 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10557
10558 emit_insn (gen_cld ());
10559
10560 /* When optimizing for size emit simple rep ; movsb instruction for
10561 counts not divisible by 4. */
10562
10563 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10564 {
10565 countreg = ix86_zero_extend_to_Pmode (count_exp);
10566 if (TARGET_64BIT)
10567 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
10568 destreg, srcreg, countreg));
10569 else
10570 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
10571 destreg, srcreg, countreg));
10572 }
10573
10574 /* For constant aligned (or small unaligned) copies use rep movsl
10575 followed by code copying the rest. For PentiumPro ensure 8 byte
10576 alignment to allow rep movsl acceleration. */
10577
10578 else if (count != 0
10579 && (align >= 8
10580 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 10581 || optimize_size || count < (unsigned int) 64))
0945b39d
JH
10582 {
10583 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10584 if (count & ~(size - 1))
10585 {
10586 countreg = copy_to_mode_reg (counter_mode,
10587 GEN_INT ((count >> (size == 4 ? 2 : 3))
10588 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10589 countreg = ix86_zero_extend_to_Pmode (countreg);
10590 if (size == 4)
10591 {
10592 if (TARGET_64BIT)
10593 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
10594 destreg, srcreg, countreg));
10595 else
10596 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
10597 destreg, srcreg, countreg));
10598 }
10599 else
10600 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
10601 destreg, srcreg, countreg));
10602 }
10603 if (size == 8 && (count & 0x04))
10604 emit_insn (gen_strmovsi (destreg, srcreg));
10605 if (count & 0x02)
10606 emit_insn (gen_strmovhi (destreg, srcreg));
10607 if (count & 0x01)
10608 emit_insn (gen_strmovqi (destreg, srcreg));
10609 }
10610 /* The generic code based on the glibc implementation:
10611 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10612 allowing accelerated copying there)
10613 - copy the data using rep movsl
10614 - copy the rest. */
10615 else
10616 {
10617 rtx countreg2;
10618 rtx label = NULL;
37ad04a5
JH
10619 int desired_alignment = (TARGET_PENTIUMPRO
10620 && (count == 0 || count >= (unsigned int) 260)
10621 ? 8 : UNITS_PER_WORD);
0945b39d
JH
10622
10623 /* In case we don't know anything about the alignment, default to
10624 library version, since it is usually equally fast and result in
4977bab6
ZW
10625 shorter code.
10626
10627 Also emit call when we know that the count is large and call overhead
10628 will not be important. */
10629 if (!TARGET_INLINE_ALL_STRINGOPS
10630 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
0945b39d
JH
10631 {
10632 end_sequence ();
10633 return 0;
10634 }
10635
10636 if (TARGET_SINGLE_STRINGOP)
10637 emit_insn (gen_cld ());
10638
10639 countreg2 = gen_reg_rtx (Pmode);
10640 countreg = copy_to_mode_reg (counter_mode, count_exp);
10641
10642 /* We don't use loops to align destination and to copy parts smaller
10643 than 4 bytes, because gcc is able to optimize such code better (in
10644 the case the destination or the count really is aligned, gcc is often
10645 able to predict the branches) and also it is friendlier to the
a4f31c00 10646 hardware branch prediction.
0945b39d
JH
10647
10648 Using loops is benefical for generic case, because we can
10649 handle small counts using the loops. Many CPUs (such as Athlon)
10650 have large REP prefix setup costs.
10651
10652 This is quite costy. Maybe we can revisit this decision later or
10653 add some customizability to this code. */
10654
37ad04a5 10655 if (count == 0 && align < desired_alignment)
0945b39d
JH
10656 {
10657 label = gen_label_rtx ();
aaae0bb9 10658 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
d43e0b7d 10659 LEU, 0, counter_mode, 1, label);
0945b39d
JH
10660 }
10661 if (align <= 1)
10662 {
10663 rtx label = ix86_expand_aligntest (destreg, 1);
10664 emit_insn (gen_strmovqi (destreg, srcreg));
10665 ix86_adjust_counter (countreg, 1);
10666 emit_label (label);
10667 LABEL_NUSES (label) = 1;
10668 }
10669 if (align <= 2)
10670 {
10671 rtx label = ix86_expand_aligntest (destreg, 2);
10672 emit_insn (gen_strmovhi (destreg, srcreg));
10673 ix86_adjust_counter (countreg, 2);
10674 emit_label (label);
10675 LABEL_NUSES (label) = 1;
10676 }
37ad04a5 10677 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
10678 {
10679 rtx label = ix86_expand_aligntest (destreg, 4);
10680 emit_insn (gen_strmovsi (destreg, srcreg));
10681 ix86_adjust_counter (countreg, 4);
10682 emit_label (label);
10683 LABEL_NUSES (label) = 1;
10684 }
10685
37ad04a5
JH
10686 if (label && desired_alignment > 4 && !TARGET_64BIT)
10687 {
10688 emit_label (label);
10689 LABEL_NUSES (label) = 1;
10690 label = NULL_RTX;
10691 }
0945b39d
JH
10692 if (!TARGET_SINGLE_STRINGOP)
10693 emit_insn (gen_cld ());
10694 if (TARGET_64BIT)
10695 {
10696 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10697 GEN_INT (3)));
10698 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10699 destreg, srcreg, countreg2));
10700 }
10701 else
10702 {
10703 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10704 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10705 destreg, srcreg, countreg2));
10706 }
10707
10708 if (label)
10709 {
10710 emit_label (label);
10711 LABEL_NUSES (label) = 1;
10712 }
10713 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10714 emit_insn (gen_strmovsi (destreg, srcreg));
10715 if ((align <= 4 || count == 0) && TARGET_64BIT)
10716 {
10717 rtx label = ix86_expand_aligntest (countreg, 4);
10718 emit_insn (gen_strmovsi (destreg, srcreg));
10719 emit_label (label);
10720 LABEL_NUSES (label) = 1;
10721 }
10722 if (align > 2 && count != 0 && (count & 2))
10723 emit_insn (gen_strmovhi (destreg, srcreg));
10724 if (align <= 2 || count == 0)
10725 {
10726 rtx label = ix86_expand_aligntest (countreg, 2);
10727 emit_insn (gen_strmovhi (destreg, srcreg));
10728 emit_label (label);
10729 LABEL_NUSES (label) = 1;
10730 }
10731 if (align > 1 && count != 0 && (count & 1))
10732 emit_insn (gen_strmovqi (destreg, srcreg));
10733 if (align <= 1 || count == 0)
10734 {
10735 rtx label = ix86_expand_aligntest (countreg, 1);
10736 emit_insn (gen_strmovqi (destreg, srcreg));
10737 emit_label (label);
10738 LABEL_NUSES (label) = 1;
10739 }
10740 }
10741
10742 insns = get_insns ();
10743 end_sequence ();
10744
10745 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
2f937369 10746 emit_insn (insns);
0945b39d
JH
10747 return 1;
10748}
10749
10750/* Expand string clear operation (bzero). Use i386 string operations when
10751 profitable. expand_movstr contains similar code. */
10752int
10753ix86_expand_clrstr (src, count_exp, align_exp)
10754 rtx src, count_exp, align_exp;
10755{
10756 rtx destreg, zeroreg, countreg;
10757 enum machine_mode counter_mode;
10758 HOST_WIDE_INT align = 0;
10759 unsigned HOST_WIDE_INT count = 0;
10760
10761 if (GET_CODE (align_exp) == CONST_INT)
10762 align = INTVAL (align_exp);
10763
5519a4f9 10764 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
10765 if (!TARGET_ALIGN_STRINGOPS)
10766 align = 32;
10767
10768 if (GET_CODE (count_exp) == CONST_INT)
10769 count = INTVAL (count_exp);
10770 /* Figure out proper mode for counter. For 32bits it is always SImode,
10771 for 64bits use SImode when possible, otherwise DImode.
10772 Set count to number of bytes copied when known at compile time. */
10773 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10774 || x86_64_zero_extended_value (count_exp))
10775 counter_mode = SImode;
10776 else
10777 counter_mode = DImode;
10778
10779 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10780
10781 emit_insn (gen_cld ());
10782
10783 /* When optimizing for size emit simple rep ; movsb instruction for
10784 counts not divisible by 4. */
10785
10786 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10787 {
10788 countreg = ix86_zero_extend_to_Pmode (count_exp);
10789 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10790 if (TARGET_64BIT)
10791 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
10792 destreg, countreg));
10793 else
10794 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
10795 destreg, countreg));
10796 }
10797 else if (count != 0
10798 && (align >= 8
10799 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 10800 || optimize_size || count < (unsigned int) 64))
0945b39d
JH
10801 {
10802 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10803 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10804 if (count & ~(size - 1))
10805 {
10806 countreg = copy_to_mode_reg (counter_mode,
10807 GEN_INT ((count >> (size == 4 ? 2 : 3))
10808 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10809 countreg = ix86_zero_extend_to_Pmode (countreg);
10810 if (size == 4)
10811 {
10812 if (TARGET_64BIT)
10813 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
10814 destreg, countreg));
10815 else
10816 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
10817 destreg, countreg));
10818 }
10819 else
10820 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
10821 destreg, countreg));
10822 }
10823 if (size == 8 && (count & 0x04))
10824 emit_insn (gen_strsetsi (destreg,
10825 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10826 if (count & 0x02)
10827 emit_insn (gen_strsethi (destreg,
10828 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10829 if (count & 0x01)
10830 emit_insn (gen_strsetqi (destreg,
10831 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10832 }
10833 else
10834 {
10835 rtx countreg2;
10836 rtx label = NULL;
37ad04a5
JH
10837 /* Compute desired alignment of the string operation. */
10838 int desired_alignment = (TARGET_PENTIUMPRO
10839 && (count == 0 || count >= (unsigned int) 260)
10840 ? 8 : UNITS_PER_WORD);
0945b39d
JH
10841
10842 /* In case we don't know anything about the alignment, default to
10843 library version, since it is usually equally fast and result in
4977bab6
ZW
10844 shorter code.
10845
10846 Also emit call when we know that the count is large and call overhead
10847 will not be important. */
10848 if (!TARGET_INLINE_ALL_STRINGOPS
10849 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
0945b39d
JH
10850 return 0;
10851
10852 if (TARGET_SINGLE_STRINGOP)
10853 emit_insn (gen_cld ());
10854
10855 countreg2 = gen_reg_rtx (Pmode);
10856 countreg = copy_to_mode_reg (counter_mode, count_exp);
10857 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10858
37ad04a5 10859 if (count == 0 && align < desired_alignment)
0945b39d
JH
10860 {
10861 label = gen_label_rtx ();
37ad04a5 10862 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
d43e0b7d 10863 LEU, 0, counter_mode, 1, label);
0945b39d
JH
10864 }
10865 if (align <= 1)
10866 {
10867 rtx label = ix86_expand_aligntest (destreg, 1);
10868 emit_insn (gen_strsetqi (destreg,
10869 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10870 ix86_adjust_counter (countreg, 1);
10871 emit_label (label);
10872 LABEL_NUSES (label) = 1;
10873 }
10874 if (align <= 2)
10875 {
10876 rtx label = ix86_expand_aligntest (destreg, 2);
10877 emit_insn (gen_strsethi (destreg,
10878 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10879 ix86_adjust_counter (countreg, 2);
10880 emit_label (label);
10881 LABEL_NUSES (label) = 1;
10882 }
37ad04a5 10883 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
10884 {
10885 rtx label = ix86_expand_aligntest (destreg, 4);
10886 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
10887 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10888 : zeroreg)));
10889 ix86_adjust_counter (countreg, 4);
10890 emit_label (label);
10891 LABEL_NUSES (label) = 1;
10892 }
10893
37ad04a5
JH
10894 if (label && desired_alignment > 4 && !TARGET_64BIT)
10895 {
10896 emit_label (label);
10897 LABEL_NUSES (label) = 1;
10898 label = NULL_RTX;
10899 }
10900
0945b39d
JH
10901 if (!TARGET_SINGLE_STRINGOP)
10902 emit_insn (gen_cld ());
10903 if (TARGET_64BIT)
10904 {
10905 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10906 GEN_INT (3)));
10907 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
10908 destreg, countreg2));
10909 }
10910 else
10911 {
10912 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10913 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
10914 destreg, countreg2));
10915 }
0945b39d
JH
10916 if (label)
10917 {
10918 emit_label (label);
10919 LABEL_NUSES (label) = 1;
10920 }
37ad04a5 10921
0945b39d
JH
10922 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10923 emit_insn (gen_strsetsi (destreg,
10924 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10925 if (TARGET_64BIT && (align <= 4 || count == 0))
10926 {
79258dce 10927 rtx label = ix86_expand_aligntest (countreg, 4);
0945b39d
JH
10928 emit_insn (gen_strsetsi (destreg,
10929 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10930 emit_label (label);
10931 LABEL_NUSES (label) = 1;
10932 }
10933 if (align > 2 && count != 0 && (count & 2))
10934 emit_insn (gen_strsethi (destreg,
10935 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10936 if (align <= 2 || count == 0)
10937 {
74411039 10938 rtx label = ix86_expand_aligntest (countreg, 2);
0945b39d
JH
10939 emit_insn (gen_strsethi (destreg,
10940 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10941 emit_label (label);
10942 LABEL_NUSES (label) = 1;
10943 }
10944 if (align > 1 && count != 0 && (count & 1))
10945 emit_insn (gen_strsetqi (destreg,
10946 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10947 if (align <= 1 || count == 0)
10948 {
74411039 10949 rtx label = ix86_expand_aligntest (countreg, 1);
0945b39d
JH
10950 emit_insn (gen_strsetqi (destreg,
10951 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10952 emit_label (label);
10953 LABEL_NUSES (label) = 1;
10954 }
10955 }
10956 return 1;
10957}
10958/* Expand strlen. */
10959int
10960ix86_expand_strlen (out, src, eoschar, align)
10961 rtx out, src, eoschar, align;
10962{
10963 rtx addr, scratch1, scratch2, scratch3, scratch4;
10964
10965 /* The generic case of strlen expander is long. Avoid it's
10966 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10967
10968 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10969 && !TARGET_INLINE_ALL_STRINGOPS
10970 && !optimize_size
10971 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
10972 return 0;
10973
10974 addr = force_reg (Pmode, XEXP (src, 0));
10975 scratch1 = gen_reg_rtx (Pmode);
10976
10977 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10978 && !optimize_size)
10979 {
10980 /* Well it seems that some optimizer does not combine a call like
10981 foo(strlen(bar), strlen(bar));
10982 when the move and the subtraction is done here. It does calculate
10983 the length just once when these instructions are done inside of
10984 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10985 often used and I use one fewer register for the lifetime of
10986 output_strlen_unroll() this is better. */
10987
10988 emit_move_insn (out, addr);
10989
10990 ix86_expand_strlensi_unroll_1 (out, align);
10991
10992 /* strlensi_unroll_1 returns the address of the zero at the end of
10993 the string, like memchr(), so compute the length by subtracting
10994 the start address. */
10995 if (TARGET_64BIT)
10996 emit_insn (gen_subdi3 (out, out, addr));
10997 else
10998 emit_insn (gen_subsi3 (out, out, addr));
10999 }
11000 else
11001 {
11002 scratch2 = gen_reg_rtx (Pmode);
11003 scratch3 = gen_reg_rtx (Pmode);
11004 scratch4 = force_reg (Pmode, constm1_rtx);
11005
11006 emit_move_insn (scratch3, addr);
11007 eoschar = force_reg (QImode, eoschar);
11008
11009 emit_insn (gen_cld ());
11010 if (TARGET_64BIT)
11011 {
11012 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
11013 align, scratch4, scratch3));
11014 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11015 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11016 }
11017 else
11018 {
11019 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
11020 align, scratch4, scratch3));
11021 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11022 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11023 }
11024 }
11025 return 1;
11026}
11027
e075ae69
RH
11028/* Expand the appropriate insns for doing strlen if not just doing
11029 repnz; scasb
11030
11031 out = result, initialized with the start address
11032 align_rtx = alignment of the address.
11033 scratch = scratch register, initialized with the startaddress when
77ebd435 11034 not aligned, otherwise undefined
3f803cd9
SC
11035
11036 This is just the body. It needs the initialisations mentioned above and
11037 some address computing at the end. These things are done in i386.md. */
11038
0945b39d
JH
11039static void
11040ix86_expand_strlensi_unroll_1 (out, align_rtx)
11041 rtx out, align_rtx;
3f803cd9 11042{
e075ae69
RH
11043 int align;
11044 rtx tmp;
11045 rtx align_2_label = NULL_RTX;
11046 rtx align_3_label = NULL_RTX;
11047 rtx align_4_label = gen_label_rtx ();
11048 rtx end_0_label = gen_label_rtx ();
e075ae69 11049 rtx mem;
e2e52e1b 11050 rtx tmpreg = gen_reg_rtx (SImode);
0945b39d 11051 rtx scratch = gen_reg_rtx (SImode);
e075ae69
RH
11052
11053 align = 0;
11054 if (GET_CODE (align_rtx) == CONST_INT)
11055 align = INTVAL (align_rtx);
3f803cd9 11056
e9a25f70 11057 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
3f803cd9 11058
e9a25f70 11059 /* Is there a known alignment and is it less than 4? */
e075ae69 11060 if (align < 4)
3f803cd9 11061 {
0945b39d
JH
11062 rtx scratch1 = gen_reg_rtx (Pmode);
11063 emit_move_insn (scratch1, out);
e9a25f70 11064 /* Is there a known alignment and is it not 2? */
e075ae69 11065 if (align != 2)
3f803cd9 11066 {
e075ae69
RH
11067 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11068 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11069
11070 /* Leave just the 3 lower bits. */
0945b39d 11071 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
e075ae69
RH
11072 NULL_RTX, 0, OPTAB_WIDEN);
11073
9076b9c1 11074 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 11075 Pmode, 1, align_4_label);
9076b9c1 11076 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
d43e0b7d 11077 Pmode, 1, align_2_label);
9076b9c1 11078 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
d43e0b7d 11079 Pmode, 1, align_3_label);
3f803cd9
SC
11080 }
11081 else
11082 {
e9a25f70
JL
11083 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11084 check if is aligned to 4 - byte. */
e9a25f70 11085
0945b39d 11086 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
e075ae69
RH
11087 NULL_RTX, 0, OPTAB_WIDEN);
11088
9076b9c1 11089 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 11090 Pmode, 1, align_4_label);
3f803cd9
SC
11091 }
11092
e075ae69 11093 mem = gen_rtx_MEM (QImode, out);
e9a25f70 11094
e075ae69 11095 /* Now compare the bytes. */
e9a25f70 11096
0f290768 11097 /* Compare the first n unaligned byte on a byte per byte basis. */
9076b9c1 11098 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
d43e0b7d 11099 QImode, 1, end_0_label);
3f803cd9 11100
0f290768 11101 /* Increment the address. */
0945b39d
JH
11102 if (TARGET_64BIT)
11103 emit_insn (gen_adddi3 (out, out, const1_rtx));
11104 else
11105 emit_insn (gen_addsi3 (out, out, const1_rtx));
e9a25f70 11106
e075ae69
RH
11107 /* Not needed with an alignment of 2 */
11108 if (align != 2)
11109 {
11110 emit_label (align_2_label);
3f803cd9 11111
d43e0b7d
RK
11112 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11113 end_0_label);
e075ae69 11114
0945b39d
JH
11115 if (TARGET_64BIT)
11116 emit_insn (gen_adddi3 (out, out, const1_rtx));
11117 else
11118 emit_insn (gen_addsi3 (out, out, const1_rtx));
e075ae69
RH
11119
11120 emit_label (align_3_label);
11121 }
11122
d43e0b7d
RK
11123 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11124 end_0_label);
e075ae69 11125
0945b39d
JH
11126 if (TARGET_64BIT)
11127 emit_insn (gen_adddi3 (out, out, const1_rtx));
11128 else
11129 emit_insn (gen_addsi3 (out, out, const1_rtx));
3f803cd9
SC
11130 }
11131
e075ae69
RH
11132 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11133 align this loop. It gives only huge programs, but does not help to
11134 speed up. */
11135 emit_label (align_4_label);
3f803cd9 11136
e075ae69
RH
11137 mem = gen_rtx_MEM (SImode, out);
11138 emit_move_insn (scratch, mem);
0945b39d
JH
11139 if (TARGET_64BIT)
11140 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11141 else
11142 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
e075ae69 11143
e2e52e1b
JH
11144 /* This formula yields a nonzero result iff one of the bytes is zero.
11145 This saves three branches inside loop and many cycles. */
11146
11147 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11148 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11149 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
7471a1f0 11150 emit_insn (gen_andsi3 (tmpreg, tmpreg,
d8bf17f9 11151 gen_int_mode (0x80808080, SImode)));
d43e0b7d
RK
11152 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11153 align_4_label);
e2e52e1b
JH
11154
11155 if (TARGET_CMOVE)
11156 {
11157 rtx reg = gen_reg_rtx (SImode);
0945b39d 11158 rtx reg2 = gen_reg_rtx (Pmode);
e2e52e1b
JH
11159 emit_move_insn (reg, tmpreg);
11160 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11161
0f290768 11162 /* If zero is not in the first two bytes, move two bytes forward. */
16189740 11163 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
11164 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11165 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11166 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11167 gen_rtx_IF_THEN_ELSE (SImode, tmp,
77ebd435
AJ
11168 reg,
11169 tmpreg)));
e2e52e1b 11170 /* Emit lea manually to avoid clobbering of flags. */
0945b39d
JH
11171 emit_insn (gen_rtx_SET (SImode, reg2,
11172 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
e2e52e1b
JH
11173
11174 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11175 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11176 emit_insn (gen_rtx_SET (VOIDmode, out,
0945b39d 11177 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
a4f31c00
AJ
11178 reg2,
11179 out)));
e2e52e1b
JH
11180
11181 }
11182 else
11183 {
11184 rtx end_2_label = gen_label_rtx ();
11185 /* Is zero in the first two bytes? */
11186
16189740 11187 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
11188 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11189 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11190 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11191 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11192 pc_rtx);
11193 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11194 JUMP_LABEL (tmp) = end_2_label;
11195
0f290768 11196 /* Not in the first two. Move two bytes forward. */
e2e52e1b 11197 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
0945b39d
JH
11198 if (TARGET_64BIT)
11199 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11200 else
11201 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
e2e52e1b
JH
11202
11203 emit_label (end_2_label);
11204
11205 }
11206
0f290768 11207 /* Avoid branch in fixing the byte. */
e2e52e1b 11208 tmpreg = gen_lowpart (QImode, tmpreg);
7e08e190 11209 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
0945b39d
JH
11210 if (TARGET_64BIT)
11211 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
11212 else
11213 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
e075ae69
RH
11214
11215 emit_label (end_0_label);
11216}
0e07aff3
RH
11217
11218void
4977bab6 11219ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop, sibcall)
0e07aff3 11220 rtx retval, fnaddr, callarg1, callarg2, pop;
4977bab6 11221 int sibcall;
0e07aff3
RH
11222{
11223 rtx use = NULL, call;
11224
11225 if (pop == const0_rtx)
11226 pop = NULL;
11227 if (TARGET_64BIT && pop)
11228 abort ();
11229
b069de3b
SS
11230#if TARGET_MACHO
11231 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11232 fnaddr = machopic_indirect_call_target (fnaddr);
11233#else
0e07aff3
RH
11234 /* Static functions and indirect calls don't need the pic register. */
11235 if (! TARGET_64BIT && flag_pic
11236 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11237 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
66edd3b4 11238 use_reg (&use, pic_offset_table_rtx);
0e07aff3
RH
11239
11240 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11241 {
11242 rtx al = gen_rtx_REG (QImode, 0);
11243 emit_move_insn (al, callarg2);
11244 use_reg (&use, al);
11245 }
b069de3b 11246#endif /* TARGET_MACHO */
0e07aff3
RH
11247
11248 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11249 {
11250 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11251 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11252 }
4977bab6
ZW
11253 if (sibcall && TARGET_64BIT
11254 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11255 {
11256 rtx addr;
11257 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11258 fnaddr = gen_rtx_REG (Pmode, 40);
11259 emit_move_insn (fnaddr, addr);
11260 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11261 }
0e07aff3
RH
11262
11263 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11264 if (retval)
11265 call = gen_rtx_SET (VOIDmode, retval, call);
11266 if (pop)
11267 {
11268 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11269 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11270 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11271 }
11272
11273 call = emit_call_insn (call);
11274 if (use)
11275 CALL_INSN_FUNCTION_USAGE (call) = use;
11276}
fce5a9f2 11277
e075ae69 11278\f
e075ae69
RH
11279/* Clear stack slot assignments remembered from previous functions.
11280 This is called from INIT_EXPANDERS once before RTL is emitted for each
11281 function. */
11282
e2500fed
GK
11283static struct machine_function *
11284ix86_init_machine_status ()
37b15744 11285{
e2500fed 11286 return ggc_alloc_cleared (sizeof (struct machine_function));
1526a060
BS
11287}
11288
e075ae69
RH
11289/* Return a MEM corresponding to a stack slot with mode MODE.
11290 Allocate a new slot if necessary.
11291
11292 The RTL for a function can have several slots available: N is
11293 which slot to use. */
11294
11295rtx
11296assign_386_stack_local (mode, n)
11297 enum machine_mode mode;
11298 int n;
11299{
11300 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11301 abort ();
11302
11303 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
11304 ix86_stack_locals[(int) mode][n]
11305 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11306
11307 return ix86_stack_locals[(int) mode][n];
11308}
f996902d
RH
11309
11310/* Construct the SYMBOL_REF for the tls_get_addr function. */
11311
e2500fed 11312static GTY(()) rtx ix86_tls_symbol;
f996902d
RH
11313rtx
11314ix86_tls_get_addr ()
11315{
f996902d 11316
e2500fed 11317 if (!ix86_tls_symbol)
f996902d 11318 {
75d38379
JJ
11319 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11320 (TARGET_GNU_TLS && !TARGET_64BIT)
11321 ? "___tls_get_addr"
11322 : "__tls_get_addr");
f996902d
RH
11323 }
11324
e2500fed 11325 return ix86_tls_symbol;
f996902d 11326}
e075ae69
RH
11327\f
11328/* Calculate the length of the memory address in the instruction
11329 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11330
11331static int
11332memory_address_length (addr)
11333 rtx addr;
11334{
11335 struct ix86_address parts;
11336 rtx base, index, disp;
11337 int len;
11338
11339 if (GET_CODE (addr) == PRE_DEC
0d7d98ee
JH
11340 || GET_CODE (addr) == POST_INC
11341 || GET_CODE (addr) == PRE_MODIFY
11342 || GET_CODE (addr) == POST_MODIFY)
e075ae69 11343 return 0;
3f803cd9 11344
e075ae69
RH
11345 if (! ix86_decompose_address (addr, &parts))
11346 abort ();
3f803cd9 11347
e075ae69
RH
11348 base = parts.base;
11349 index = parts.index;
11350 disp = parts.disp;
11351 len = 0;
3f803cd9 11352
e075ae69
RH
11353 /* Register Indirect. */
11354 if (base && !index && !disp)
11355 {
11356 /* Special cases: ebp and esp need the two-byte modrm form. */
11357 if (addr == stack_pointer_rtx
11358 || addr == arg_pointer_rtx
564d80f4
JH
11359 || addr == frame_pointer_rtx
11360 || addr == hard_frame_pointer_rtx)
e075ae69 11361 len = 1;
3f803cd9 11362 }
e9a25f70 11363
e075ae69
RH
11364 /* Direct Addressing. */
11365 else if (disp && !base && !index)
11366 len = 4;
11367
3f803cd9
SC
11368 else
11369 {
e075ae69
RH
11370 /* Find the length of the displacement constant. */
11371 if (disp)
11372 {
11373 if (GET_CODE (disp) == CONST_INT
11374 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
11375 len = 1;
11376 else
11377 len = 4;
11378 }
3f803cd9 11379
e075ae69
RH
11380 /* An index requires the two-byte modrm form. */
11381 if (index)
11382 len += 1;
3f803cd9
SC
11383 }
11384
e075ae69
RH
11385 return len;
11386}
79325812 11387
5bf0ebab
RH
11388/* Compute default value for "length_immediate" attribute. When SHORTFORM
11389 is set, expect that insn have 8bit immediate alternative. */
e075ae69 11390int
6ef67412 11391ix86_attr_length_immediate_default (insn, shortform)
e075ae69 11392 rtx insn;
6ef67412 11393 int shortform;
e075ae69 11394{
6ef67412
JH
11395 int len = 0;
11396 int i;
6c698a6d 11397 extract_insn_cached (insn);
6ef67412
JH
11398 for (i = recog_data.n_operands - 1; i >= 0; --i)
11399 if (CONSTANT_P (recog_data.operand[i]))
3071fab5 11400 {
6ef67412 11401 if (len)
3071fab5 11402 abort ();
6ef67412
JH
11403 if (shortform
11404 && GET_CODE (recog_data.operand[i]) == CONST_INT
11405 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11406 len = 1;
11407 else
11408 {
11409 switch (get_attr_mode (insn))
11410 {
11411 case MODE_QI:
11412 len+=1;
11413 break;
11414 case MODE_HI:
11415 len+=2;
11416 break;
11417 case MODE_SI:
11418 len+=4;
11419 break;
14f73b5a
JH
11420 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11421 case MODE_DI:
11422 len+=4;
11423 break;
6ef67412 11424 default:
c725bd79 11425 fatal_insn ("unknown insn mode", insn);
6ef67412
JH
11426 }
11427 }
3071fab5 11428 }
6ef67412
JH
11429 return len;
11430}
11431/* Compute default value for "length_address" attribute. */
11432int
11433ix86_attr_length_address_default (insn)
11434 rtx insn;
11435{
11436 int i;
6c698a6d 11437 extract_insn_cached (insn);
1ccbefce
RH
11438 for (i = recog_data.n_operands - 1; i >= 0; --i)
11439 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 11440 {
6ef67412 11441 return memory_address_length (XEXP (recog_data.operand[i], 0));
e075ae69
RH
11442 break;
11443 }
6ef67412 11444 return 0;
3f803cd9 11445}
e075ae69
RH
11446\f
11447/* Return the maximum number of instructions a cpu can issue. */
b657fc39 11448
c237e94a 11449static int
e075ae69 11450ix86_issue_rate ()
b657fc39 11451{
e075ae69 11452 switch (ix86_cpu)
b657fc39 11453 {
e075ae69
RH
11454 case PROCESSOR_PENTIUM:
11455 case PROCESSOR_K6:
11456 return 2;
79325812 11457
e075ae69 11458 case PROCESSOR_PENTIUMPRO:
b4e89e2d
JH
11459 case PROCESSOR_PENTIUM4:
11460 case PROCESSOR_ATHLON:
4977bab6 11461 case PROCESSOR_K8:
e075ae69 11462 return 3;
b657fc39 11463
b657fc39 11464 default:
e075ae69 11465 return 1;
b657fc39 11466 }
b657fc39
L
11467}
11468
e075ae69
RH
11469/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11470 by DEP_INSN and nothing set by DEP_INSN. */
b657fc39 11471
e075ae69
RH
11472static int
11473ix86_flags_dependant (insn, dep_insn, insn_type)
11474 rtx insn, dep_insn;
11475 enum attr_type insn_type;
11476{
11477 rtx set, set2;
b657fc39 11478
e075ae69
RH
11479 /* Simplify the test for uninteresting insns. */
11480 if (insn_type != TYPE_SETCC
11481 && insn_type != TYPE_ICMOV
11482 && insn_type != TYPE_FCMOV
11483 && insn_type != TYPE_IBR)
11484 return 0;
b657fc39 11485
e075ae69
RH
11486 if ((set = single_set (dep_insn)) != 0)
11487 {
11488 set = SET_DEST (set);
11489 set2 = NULL_RTX;
11490 }
11491 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11492 && XVECLEN (PATTERN (dep_insn), 0) == 2
11493 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11494 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11495 {
11496 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11497 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11498 }
78a0d70c
ZW
11499 else
11500 return 0;
b657fc39 11501
78a0d70c
ZW
11502 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11503 return 0;
b657fc39 11504
f5143c46 11505 /* This test is true if the dependent insn reads the flags but
78a0d70c
ZW
11506 not any other potentially set register. */
11507 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11508 return 0;
11509
11510 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11511 return 0;
11512
11513 return 1;
e075ae69 11514}
b657fc39 11515
e075ae69
RH
11516/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11517 address with operands set by DEP_INSN. */
11518
11519static int
11520ix86_agi_dependant (insn, dep_insn, insn_type)
11521 rtx insn, dep_insn;
11522 enum attr_type insn_type;
11523{
11524 rtx addr;
11525
6ad48e84
JH
11526 if (insn_type == TYPE_LEA
11527 && TARGET_PENTIUM)
5fbdde42
RH
11528 {
11529 addr = PATTERN (insn);
11530 if (GET_CODE (addr) == SET)
11531 ;
11532 else if (GET_CODE (addr) == PARALLEL
11533 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11534 addr = XVECEXP (addr, 0, 0);
11535 else
11536 abort ();
11537 addr = SET_SRC (addr);
11538 }
e075ae69
RH
11539 else
11540 {
11541 int i;
6c698a6d 11542 extract_insn_cached (insn);
1ccbefce
RH
11543 for (i = recog_data.n_operands - 1; i >= 0; --i)
11544 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 11545 {
1ccbefce 11546 addr = XEXP (recog_data.operand[i], 0);
e075ae69
RH
11547 goto found;
11548 }
11549 return 0;
11550 found:;
b657fc39
L
11551 }
11552
e075ae69 11553 return modified_in_p (addr, dep_insn);
b657fc39 11554}
a269a03c 11555
c237e94a 11556static int
e075ae69 11557ix86_adjust_cost (insn, link, dep_insn, cost)
a269a03c
JC
11558 rtx insn, link, dep_insn;
11559 int cost;
11560{
e075ae69 11561 enum attr_type insn_type, dep_insn_type;
6ad48e84 11562 enum attr_memory memory, dep_memory;
e075ae69 11563 rtx set, set2;
9b00189f 11564 int dep_insn_code_number;
a269a03c 11565
309ada50 11566 /* Anti and output depenancies have zero cost on all CPUs. */
e075ae69 11567 if (REG_NOTE_KIND (link) != 0)
309ada50 11568 return 0;
a269a03c 11569
9b00189f
JH
11570 dep_insn_code_number = recog_memoized (dep_insn);
11571
e075ae69 11572 /* If we can't recognize the insns, we can't really do anything. */
9b00189f 11573 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
e075ae69 11574 return cost;
a269a03c 11575
1c71e60e
JH
11576 insn_type = get_attr_type (insn);
11577 dep_insn_type = get_attr_type (dep_insn);
9b00189f 11578
a269a03c
JC
11579 switch (ix86_cpu)
11580 {
11581 case PROCESSOR_PENTIUM:
e075ae69
RH
11582 /* Address Generation Interlock adds a cycle of latency. */
11583 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11584 cost += 1;
11585
11586 /* ??? Compares pair with jump/setcc. */
11587 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11588 cost = 0;
11589
11590 /* Floating point stores require value to be ready one cycle ealier. */
0f290768 11591 if (insn_type == TYPE_FMOV
e075ae69
RH
11592 && get_attr_memory (insn) == MEMORY_STORE
11593 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11594 cost += 1;
11595 break;
a269a03c 11596
e075ae69 11597 case PROCESSOR_PENTIUMPRO:
6ad48e84
JH
11598 memory = get_attr_memory (insn);
11599 dep_memory = get_attr_memory (dep_insn);
11600
0f290768 11601 /* Since we can't represent delayed latencies of load+operation,
e075ae69
RH
11602 increase the cost here for non-imov insns. */
11603 if (dep_insn_type != TYPE_IMOV
6ad48e84
JH
11604 && dep_insn_type != TYPE_FMOV
11605 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
e075ae69
RH
11606 cost += 1;
11607
11608 /* INT->FP conversion is expensive. */
11609 if (get_attr_fp_int_src (dep_insn))
11610 cost += 5;
11611
11612 /* There is one cycle extra latency between an FP op and a store. */
11613 if (insn_type == TYPE_FMOV
11614 && (set = single_set (dep_insn)) != NULL_RTX
11615 && (set2 = single_set (insn)) != NULL_RTX
11616 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11617 && GET_CODE (SET_DEST (set2)) == MEM)
11618 cost += 1;
6ad48e84
JH
11619
11620 /* Show ability of reorder buffer to hide latency of load by executing
11621 in parallel with previous instruction in case
11622 previous instruction is not needed to compute the address. */
11623 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11624 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11625 {
11626 /* Claim moves to take one cycle, as core can issue one load
11627 at time and the next load can start cycle later. */
11628 if (dep_insn_type == TYPE_IMOV
11629 || dep_insn_type == TYPE_FMOV)
11630 cost = 1;
11631 else if (cost > 1)
11632 cost--;
11633 }
e075ae69 11634 break;
a269a03c 11635
e075ae69 11636 case PROCESSOR_K6:
6ad48e84
JH
11637 memory = get_attr_memory (insn);
11638 dep_memory = get_attr_memory (dep_insn);
e075ae69
RH
11639 /* The esp dependency is resolved before the instruction is really
11640 finished. */
11641 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11642 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11643 return 1;
a269a03c 11644
0f290768 11645 /* Since we can't represent delayed latencies of load+operation,
e075ae69 11646 increase the cost here for non-imov insns. */
6ad48e84 11647 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
e075ae69
RH
11648 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
11649
11650 /* INT->FP conversion is expensive. */
11651 if (get_attr_fp_int_src (dep_insn))
11652 cost += 5;
6ad48e84
JH
11653
11654 /* Show ability of reorder buffer to hide latency of load by executing
11655 in parallel with previous instruction in case
11656 previous instruction is not needed to compute the address. */
11657 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11658 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11659 {
11660 /* Claim moves to take one cycle, as core can issue one load
11661 at time and the next load can start cycle later. */
11662 if (dep_insn_type == TYPE_IMOV
11663 || dep_insn_type == TYPE_FMOV)
11664 cost = 1;
11665 else if (cost > 2)
11666 cost -= 2;
11667 else
11668 cost = 1;
11669 }
a14003ee 11670 break;
e075ae69 11671
309ada50 11672 case PROCESSOR_ATHLON:
4977bab6 11673 case PROCESSOR_K8:
6ad48e84
JH
11674 memory = get_attr_memory (insn);
11675 dep_memory = get_attr_memory (dep_insn);
11676
6ad48e84
JH
11677 /* Show ability of reorder buffer to hide latency of load by executing
11678 in parallel with previous instruction in case
11679 previous instruction is not needed to compute the address. */
11680 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11681 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11682 {
11683 /* Claim moves to take one cycle, as core can issue one load
11684 at time and the next load can start cycle later. */
11685 if (dep_insn_type == TYPE_IMOV
11686 || dep_insn_type == TYPE_FMOV)
11687 cost = 0;
11688 else if (cost >= 3)
11689 cost -= 3;
11690 else
11691 cost = 0;
11692 }
309ada50 11693
a269a03c 11694 default:
a269a03c
JC
11695 break;
11696 }
11697
11698 return cost;
11699}
0a726ef1 11700
e075ae69
RH
11701static union
11702{
11703 struct ppro_sched_data
11704 {
11705 rtx decode[3];
11706 int issued_this_cycle;
11707 } ppro;
11708} ix86_sched_data;
0a726ef1 11709
e075ae69
RH
11710static enum attr_ppro_uops
11711ix86_safe_ppro_uops (insn)
11712 rtx insn;
11713{
11714 if (recog_memoized (insn) >= 0)
11715 return get_attr_ppro_uops (insn);
11716 else
11717 return PPRO_UOPS_MANY;
11718}
0a726ef1 11719
e075ae69
RH
11720static void
11721ix86_dump_ppro_packet (dump)
11722 FILE *dump;
0a726ef1 11723{
e075ae69 11724 if (ix86_sched_data.ppro.decode[0])
0a726ef1 11725 {
e075ae69
RH
11726 fprintf (dump, "PPRO packet: %d",
11727 INSN_UID (ix86_sched_data.ppro.decode[0]));
11728 if (ix86_sched_data.ppro.decode[1])
11729 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
11730 if (ix86_sched_data.ppro.decode[2])
11731 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
11732 fputc ('\n', dump);
11733 }
11734}
0a726ef1 11735
e075ae69 11736/* We're beginning a new block. Initialize data structures as necessary. */
0a726ef1 11737
c237e94a
ZW
11738static void
11739ix86_sched_init (dump, sched_verbose, veclen)
e075ae69
RH
11740 FILE *dump ATTRIBUTE_UNUSED;
11741 int sched_verbose ATTRIBUTE_UNUSED;
c237e94a 11742 int veclen ATTRIBUTE_UNUSED;
e075ae69
RH
11743{
11744 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
11745}
11746
11747/* Shift INSN to SLOT, and shift everything else down. */
11748
11749static void
11750ix86_reorder_insn (insnp, slot)
11751 rtx *insnp, *slot;
11752{
11753 if (insnp != slot)
11754 {
11755 rtx insn = *insnp;
0f290768 11756 do
e075ae69
RH
11757 insnp[0] = insnp[1];
11758 while (++insnp != slot);
11759 *insnp = insn;
0a726ef1 11760 }
e075ae69
RH
11761}
11762
c6991660 11763static void
78a0d70c
ZW
11764ix86_sched_reorder_ppro (ready, e_ready)
11765 rtx *ready;
11766 rtx *e_ready;
11767{
11768 rtx decode[3];
11769 enum attr_ppro_uops cur_uops;
11770 int issued_this_cycle;
11771 rtx *insnp;
11772 int i;
e075ae69 11773
0f290768 11774 /* At this point .ppro.decode contains the state of the three
78a0d70c 11775 decoders from last "cycle". That is, those insns that were
0f290768 11776 actually independent. But here we're scheduling for the
78a0d70c
ZW
11777 decoder, and we may find things that are decodable in the
11778 same cycle. */
e075ae69 11779
0f290768 11780 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
78a0d70c 11781 issued_this_cycle = 0;
e075ae69 11782
78a0d70c
ZW
11783 insnp = e_ready;
11784 cur_uops = ix86_safe_ppro_uops (*insnp);
0a726ef1 11785
78a0d70c
ZW
11786 /* If the decoders are empty, and we've a complex insn at the
11787 head of the priority queue, let it issue without complaint. */
11788 if (decode[0] == NULL)
11789 {
11790 if (cur_uops == PPRO_UOPS_MANY)
11791 {
11792 decode[0] = *insnp;
11793 goto ppro_done;
11794 }
11795
11796 /* Otherwise, search for a 2-4 uop unsn to issue. */
11797 while (cur_uops != PPRO_UOPS_FEW)
11798 {
11799 if (insnp == ready)
11800 break;
11801 cur_uops = ix86_safe_ppro_uops (*--insnp);
11802 }
11803
11804 /* If so, move it to the head of the line. */
11805 if (cur_uops == PPRO_UOPS_FEW)
11806 ix86_reorder_insn (insnp, e_ready);
0a726ef1 11807
78a0d70c
ZW
11808 /* Issue the head of the queue. */
11809 issued_this_cycle = 1;
11810 decode[0] = *e_ready--;
11811 }
fb693d44 11812
78a0d70c
ZW
11813 /* Look for simple insns to fill in the other two slots. */
11814 for (i = 1; i < 3; ++i)
11815 if (decode[i] == NULL)
11816 {
a151daf0 11817 if (ready > e_ready)
78a0d70c 11818 goto ppro_done;
fb693d44 11819
e075ae69
RH
11820 insnp = e_ready;
11821 cur_uops = ix86_safe_ppro_uops (*insnp);
78a0d70c
ZW
11822 while (cur_uops != PPRO_UOPS_ONE)
11823 {
11824 if (insnp == ready)
11825 break;
11826 cur_uops = ix86_safe_ppro_uops (*--insnp);
11827 }
fb693d44 11828
78a0d70c
ZW
11829 /* Found one. Move it to the head of the queue and issue it. */
11830 if (cur_uops == PPRO_UOPS_ONE)
e075ae69 11831 {
78a0d70c
ZW
11832 ix86_reorder_insn (insnp, e_ready);
11833 decode[i] = *e_ready--;
11834 issued_this_cycle++;
11835 continue;
11836 }
fb693d44 11837
78a0d70c
ZW
11838 /* ??? Didn't find one. Ideally, here we would do a lazy split
11839 of 2-uop insns, issue one and queue the other. */
11840 }
fb693d44 11841
78a0d70c
ZW
11842 ppro_done:
11843 if (issued_this_cycle == 0)
11844 issued_this_cycle = 1;
11845 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
11846}
fb693d44 11847
0f290768 11848/* We are about to being issuing insns for this clock cycle.
78a0d70c 11849 Override the default sort algorithm to better slot instructions. */
c237e94a
ZW
11850static int
11851ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
78a0d70c
ZW
11852 FILE *dump ATTRIBUTE_UNUSED;
11853 int sched_verbose ATTRIBUTE_UNUSED;
11854 rtx *ready;
c237e94a 11855 int *n_readyp;
78a0d70c
ZW
11856 int clock_var ATTRIBUTE_UNUSED;
11857{
c237e94a 11858 int n_ready = *n_readyp;
78a0d70c 11859 rtx *e_ready = ready + n_ready - 1;
fb693d44 11860
fce5a9f2 11861 /* Make sure to go ahead and initialize key items in
a151daf0
JL
11862 ix86_sched_data if we are not going to bother trying to
11863 reorder the ready queue. */
78a0d70c 11864 if (n_ready < 2)
a151daf0
JL
11865 {
11866 ix86_sched_data.ppro.issued_this_cycle = 1;
11867 goto out;
11868 }
e075ae69 11869
78a0d70c
ZW
11870 switch (ix86_cpu)
11871 {
11872 default:
11873 break;
e075ae69 11874
78a0d70c
ZW
11875 case PROCESSOR_PENTIUMPRO:
11876 ix86_sched_reorder_ppro (ready, e_ready);
e075ae69 11877 break;
fb693d44
RH
11878 }
11879
e075ae69
RH
11880out:
11881 return ix86_issue_rate ();
11882}
fb693d44 11883
e075ae69
RH
11884/* We are about to issue INSN. Return the number of insns left on the
11885 ready queue that can be issued this cycle. */
b222082e 11886
c237e94a 11887static int
e075ae69
RH
11888ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
11889 FILE *dump;
11890 int sched_verbose;
11891 rtx insn;
11892 int can_issue_more;
11893{
11894 int i;
11895 switch (ix86_cpu)
fb693d44 11896 {
e075ae69
RH
11897 default:
11898 return can_issue_more - 1;
fb693d44 11899
e075ae69
RH
11900 case PROCESSOR_PENTIUMPRO:
11901 {
11902 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
fb693d44 11903
e075ae69
RH
11904 if (uops == PPRO_UOPS_MANY)
11905 {
11906 if (sched_verbose)
11907 ix86_dump_ppro_packet (dump);
11908 ix86_sched_data.ppro.decode[0] = insn;
11909 ix86_sched_data.ppro.decode[1] = NULL;
11910 ix86_sched_data.ppro.decode[2] = NULL;
11911 if (sched_verbose)
11912 ix86_dump_ppro_packet (dump);
11913 ix86_sched_data.ppro.decode[0] = NULL;
11914 }
11915 else if (uops == PPRO_UOPS_FEW)
11916 {
11917 if (sched_verbose)
11918 ix86_dump_ppro_packet (dump);
11919 ix86_sched_data.ppro.decode[0] = insn;
11920 ix86_sched_data.ppro.decode[1] = NULL;
11921 ix86_sched_data.ppro.decode[2] = NULL;
11922 }
11923 else
11924 {
11925 for (i = 0; i < 3; ++i)
11926 if (ix86_sched_data.ppro.decode[i] == NULL)
11927 {
11928 ix86_sched_data.ppro.decode[i] = insn;
11929 break;
11930 }
11931 if (i == 3)
11932 abort ();
11933 if (i == 2)
11934 {
11935 if (sched_verbose)
11936 ix86_dump_ppro_packet (dump);
11937 ix86_sched_data.ppro.decode[0] = NULL;
11938 ix86_sched_data.ppro.decode[1] = NULL;
11939 ix86_sched_data.ppro.decode[2] = NULL;
11940 }
11941 }
11942 }
11943 return --ix86_sched_data.ppro.issued_this_cycle;
11944 }
fb693d44 11945}
9b690711
RH
11946
11947static int
11948ia32_use_dfa_pipeline_interface ()
11949{
4977bab6 11950 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
9b690711
RH
11951 return 1;
11952 return 0;
11953}
11954
11955/* How many alternative schedules to try. This should be as wide as the
11956 scheduling freedom in the DFA, but no wider. Making this value too
11957 large results extra work for the scheduler. */
11958
11959static int
11960ia32_multipass_dfa_lookahead ()
11961{
11962 if (ix86_cpu == PROCESSOR_PENTIUM)
11963 return 2;
11964 else
11965 return 0;
11966}
11967
a7180f70 11968\f
0e4970d7
RK
11969/* Walk through INSNS and look for MEM references whose address is DSTREG or
11970 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
11971 appropriate. */
11972
11973void
11974ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
11975 rtx insns;
11976 rtx dstref, srcref, dstreg, srcreg;
11977{
11978 rtx insn;
11979
11980 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
11981 if (INSN_P (insn))
11982 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
11983 dstreg, srcreg);
11984}
11985
11986/* Subroutine of above to actually do the updating by recursively walking
11987 the rtx. */
11988
11989static void
11990ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
11991 rtx x;
11992 rtx dstref, srcref, dstreg, srcreg;
11993{
11994 enum rtx_code code = GET_CODE (x);
11995 const char *format_ptr = GET_RTX_FORMAT (code);
11996 int i, j;
11997
11998 if (code == MEM && XEXP (x, 0) == dstreg)
11999 MEM_COPY_ATTRIBUTES (x, dstref);
12000 else if (code == MEM && XEXP (x, 0) == srcreg)
12001 MEM_COPY_ATTRIBUTES (x, srcref);
12002
12003 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
12004 {
12005 if (*format_ptr == 'e')
12006 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
12007 dstreg, srcreg);
12008 else if (*format_ptr == 'E')
12009 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
397be6cf 12010 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
0e4970d7
RK
12011 dstreg, srcreg);
12012 }
12013}
12014\f
a7180f70
BS
12015/* Compute the alignment given to a constant that is being placed in memory.
12016 EXP is the constant and ALIGN is the alignment that the object would
12017 ordinarily have.
12018 The value of this function is used instead of that alignment to align
12019 the object. */
12020
12021int
12022ix86_constant_alignment (exp, align)
12023 tree exp;
12024 int align;
12025{
12026 if (TREE_CODE (exp) == REAL_CST)
12027 {
12028 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12029 return 64;
12030 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12031 return 128;
12032 }
12033 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
12034 && align < 256)
12035 return 256;
12036
12037 return align;
12038}
12039
12040/* Compute the alignment for a static variable.
12041 TYPE is the data type, and ALIGN is the alignment that
12042 the object would ordinarily have. The value of this function is used
12043 instead of that alignment to align the object. */
12044
12045int
12046ix86_data_alignment (type, align)
12047 tree type;
12048 int align;
12049{
12050 if (AGGREGATE_TYPE_P (type)
12051 && TYPE_SIZE (type)
12052 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12053 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12054 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12055 return 256;
12056
0d7d98ee
JH
12057 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12058 to 16byte boundary. */
12059 if (TARGET_64BIT)
12060 {
12061 if (AGGREGATE_TYPE_P (type)
12062 && TYPE_SIZE (type)
12063 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12064 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12065 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12066 return 128;
12067 }
12068
a7180f70
BS
12069 if (TREE_CODE (type) == ARRAY_TYPE)
12070 {
12071 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12072 return 64;
12073 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12074 return 128;
12075 }
12076 else if (TREE_CODE (type) == COMPLEX_TYPE)
12077 {
0f290768 12078
a7180f70
BS
12079 if (TYPE_MODE (type) == DCmode && align < 64)
12080 return 64;
12081 if (TYPE_MODE (type) == XCmode && align < 128)
12082 return 128;
12083 }
12084 else if ((TREE_CODE (type) == RECORD_TYPE
12085 || TREE_CODE (type) == UNION_TYPE
12086 || TREE_CODE (type) == QUAL_UNION_TYPE)
12087 && TYPE_FIELDS (type))
12088 {
12089 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12090 return 64;
12091 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12092 return 128;
12093 }
12094 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12095 || TREE_CODE (type) == INTEGER_TYPE)
12096 {
12097 if (TYPE_MODE (type) == DFmode && align < 64)
12098 return 64;
12099 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12100 return 128;
12101 }
12102
12103 return align;
12104}
12105
12106/* Compute the alignment for a local variable.
12107 TYPE is the data type, and ALIGN is the alignment that
12108 the object would ordinarily have. The value of this macro is used
12109 instead of that alignment to align the object. */
12110
12111int
12112ix86_local_alignment (type, align)
12113 tree type;
12114 int align;
12115{
0d7d98ee
JH
12116 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12117 to 16byte boundary. */
12118 if (TARGET_64BIT)
12119 {
12120 if (AGGREGATE_TYPE_P (type)
12121 && TYPE_SIZE (type)
12122 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12123 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12124 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12125 return 128;
12126 }
a7180f70
BS
12127 if (TREE_CODE (type) == ARRAY_TYPE)
12128 {
12129 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12130 return 64;
12131 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12132 return 128;
12133 }
12134 else if (TREE_CODE (type) == COMPLEX_TYPE)
12135 {
12136 if (TYPE_MODE (type) == DCmode && align < 64)
12137 return 64;
12138 if (TYPE_MODE (type) == XCmode && align < 128)
12139 return 128;
12140 }
12141 else if ((TREE_CODE (type) == RECORD_TYPE
12142 || TREE_CODE (type) == UNION_TYPE
12143 || TREE_CODE (type) == QUAL_UNION_TYPE)
12144 && TYPE_FIELDS (type))
12145 {
12146 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12147 return 64;
12148 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12149 return 128;
12150 }
12151 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12152 || TREE_CODE (type) == INTEGER_TYPE)
12153 {
0f290768 12154
a7180f70
BS
12155 if (TYPE_MODE (type) == DFmode && align < 64)
12156 return 64;
12157 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12158 return 128;
12159 }
12160 return align;
12161}
0ed08620
JH
12162\f
12163/* Emit RTL insns to initialize the variable parts of a trampoline.
12164 FNADDR is an RTX for the address of the function's pure code.
12165 CXT is an RTX for the static chain value for the function. */
12166void
12167x86_initialize_trampoline (tramp, fnaddr, cxt)
12168 rtx tramp, fnaddr, cxt;
12169{
12170 if (!TARGET_64BIT)
12171 {
12172 /* Compute offset from the end of the jmp to the target function. */
12173 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12174 plus_constant (tramp, 10),
12175 NULL_RTX, 1, OPTAB_DIRECT);
12176 emit_move_insn (gen_rtx_MEM (QImode, tramp),
d8bf17f9 12177 gen_int_mode (0xb9, QImode));
0ed08620
JH
12178 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12179 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
d8bf17f9 12180 gen_int_mode (0xe9, QImode));
0ed08620
JH
12181 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12182 }
12183 else
12184 {
12185 int offset = 0;
12186 /* Try to load address using shorter movl instead of movabs.
12187 We may want to support movq for kernel mode, but kernel does not use
12188 trampolines at the moment. */
12189 if (x86_64_zero_extended_value (fnaddr))
12190 {
12191 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12192 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12193 gen_int_mode (0xbb41, HImode));
0ed08620
JH
12194 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12195 gen_lowpart (SImode, fnaddr));
12196 offset += 6;
12197 }
12198 else
12199 {
12200 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12201 gen_int_mode (0xbb49, HImode));
0ed08620
JH
12202 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12203 fnaddr);
12204 offset += 10;
12205 }
12206 /* Load static chain using movabs to r10. */
12207 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12208 gen_int_mode (0xba49, HImode));
0ed08620
JH
12209 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12210 cxt);
12211 offset += 10;
12212 /* Jump to the r11 */
12213 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12214 gen_int_mode (0xff49, HImode));
0ed08620 12215 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
d8bf17f9 12216 gen_int_mode (0xe3, QImode));
0ed08620
JH
12217 offset += 3;
12218 if (offset > TRAMPOLINE_SIZE)
b531087a 12219 abort ();
0ed08620 12220 }
5791cc29
JT
12221
12222#ifdef TRANSFER_FROM_TRAMPOLINE
12223 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12224 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12225#endif
0ed08620 12226}
eeb06b1b 12227\f
6a2dd09a
RS
12228#define def_builtin(MASK, NAME, TYPE, CODE) \
12229do { \
12230 if ((MASK) & target_flags) \
12231 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12232 NULL, NULL_TREE); \
eeb06b1b 12233} while (0)
bd793c65 12234
bd793c65
BS
12235struct builtin_description
12236{
8b60264b
KG
12237 const unsigned int mask;
12238 const enum insn_code icode;
12239 const char *const name;
12240 const enum ix86_builtins code;
12241 const enum rtx_code comparison;
12242 const unsigned int flag;
bd793c65
BS
12243};
12244
fbe5eb6d
BS
12245/* Used for builtins that are enabled both by -msse and -msse2. */
12246#define MASK_SSE1 (MASK_SSE | MASK_SSE2)
12247
8b60264b 12248static const struct builtin_description bdesc_comi[] =
bd793c65 12249{
1194ca05
JH
12250 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12251 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12252 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12253 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12254 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12255 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12256 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12257 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12258 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12259 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12260 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12261 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12262 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12263 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12264 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12265 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12266 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12267 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12268 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12269 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12270 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12271 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12272 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12273 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
bd793c65
BS
12274};
12275
8b60264b 12276static const struct builtin_description bdesc_2arg[] =
bd793c65
BS
12277{
12278 /* SSE */
fbe5eb6d
BS
12279 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12280 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12281 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12282 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12283 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12284 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12285 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12286 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12287
12288 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12289 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12290 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12291 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12292 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12293 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12294 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12295 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12296 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12297 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12298 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12299 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12300 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12301 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12302 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
fbe5eb6d
BS
12303 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12304 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12305 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12306 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
fbe5eb6d
BS
12307 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12308
12309 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12310 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12311 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12312 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12313
1877be45
JH
12314 { MASK_SSE1, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12315 { MASK_SSE1, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12316 { MASK_SSE1, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12317 { MASK_SSE1, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12318
fbe5eb6d
BS
12319 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12320 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12321 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12322 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12323 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
bd793c65
BS
12324
12325 /* MMX */
eeb06b1b
BS
12326 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12327 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12328 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12329 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12330 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12331 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12332
12333 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12334 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12335 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12336 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12337 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12338 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12339 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12340 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12341
12342 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12343 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
fbe5eb6d 12344 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
eeb06b1b
BS
12345
12346 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12347 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12348 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12349 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12350
fbe5eb6d
BS
12351 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12352 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
eeb06b1b
BS
12353
12354 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12355 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12356 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12357 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12358 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12359 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12360
fbe5eb6d
BS
12361 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12362 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12363 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12364 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
eeb06b1b
BS
12365
12366 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12367 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12368 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12369 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12370 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12371 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
bd793c65
BS
12372
12373 /* Special. */
eeb06b1b
BS
12374 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12375 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12376 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12377
fbe5eb6d
BS
12378 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12379 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
eeb06b1b
BS
12380
12381 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12382 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12383 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12384 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12385 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12386 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12387
12388 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12389 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12390 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12391 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12392 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12393 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12394
12395 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12396 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12397 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12398 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12399
fbe5eb6d
BS
12400 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12401 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12402
12403 /* SSE2 */
12404 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12405 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12406 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12407 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12408 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12409 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12410 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12411 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12412
12413 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12414 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12415 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12416 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12417 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12418 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12419 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12420 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12421 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12422 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12423 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12424 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12425 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12426 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12427 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
fbe5eb6d
BS
12428 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12429 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12430 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12431 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
fbe5eb6d
BS
12432 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12433
12434 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12435 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12436 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12437 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12438
1877be45
JH
12439 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12440 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12441 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12442 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
fbe5eb6d
BS
12443
12444 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12445 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12446 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12447
12448 /* SSE2 MMX */
12449 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12450 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12451 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12452 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12453 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12454 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12455 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12456 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12457
12458 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12459 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12460 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12461 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12462 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12463 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12464 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12465 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12466
12467 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12468 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12469 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12470 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12471
916b60b7
BS
12472 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12473 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12474 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12475 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
fbe5eb6d
BS
12476
12477 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12478 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12479
12480 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12481 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12482 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12483 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12484 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12485 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12486
12487 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12488 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12489 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12490 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12491
12492 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12493 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12494 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
077084dd 12495 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
fbe5eb6d
BS
12496 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12497 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12498 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
f02e1358 12499 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
fbe5eb6d 12500
916b60b7
BS
12501 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12502 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12503 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12504
12505 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12506 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12507
12508 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12509 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12510 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12511 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12512 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12513 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12514
12515 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12516 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12517 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12518 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12519 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12520 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12521
12522 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12523 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12524 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12525 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12526
12527 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12528
fbe5eb6d
BS
12529 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12530 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12531 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
bd793c65
BS
12532};
12533
8b60264b 12534static const struct builtin_description bdesc_1arg[] =
bd793c65 12535{
fbe5eb6d
BS
12536 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12537 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12538
12539 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12540 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12541 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12542
12543 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12544 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12545 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12546 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12547
12548 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12549 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12550 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
f02e1358 12551 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
fbe5eb6d
BS
12552
12553 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12554
12555 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12556 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
bd793c65 12557
fbe5eb6d
BS
12558 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12559 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12560 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12561 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12562 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
bd793c65 12563
fbe5eb6d 12564 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
bd793c65 12565
fbe5eb6d
BS
12566 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12567 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12568
12569 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12570 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
f02e1358
JH
12571 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12572
12573 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 }
bd793c65
BS
12574};
12575
f6155fda
SS
12576void
12577ix86_init_builtins ()
12578{
12579 if (TARGET_MMX)
12580 ix86_init_mmx_sse_builtins ();
12581}
12582
12583/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
bd793c65
BS
12584 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12585 builtins. */
e37af218 12586static void
f6155fda 12587ix86_init_mmx_sse_builtins ()
bd793c65 12588{
8b60264b 12589 const struct builtin_description * d;
77ebd435 12590 size_t i;
bd793c65
BS
12591
12592 tree pchar_type_node = build_pointer_type (char_type_node);
12593 tree pfloat_type_node = build_pointer_type (float_type_node);
12594 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
916b60b7 12595 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
bd793c65
BS
12596 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12597
12598 /* Comparisons. */
12599 tree int_ftype_v4sf_v4sf
b4de2f7d
AH
12600 = build_function_type_list (integer_type_node,
12601 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12602 tree v4si_ftype_v4sf_v4sf
b4de2f7d
AH
12603 = build_function_type_list (V4SI_type_node,
12604 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12605 /* MMX/SSE/integer conversions. */
bd793c65 12606 tree int_ftype_v4sf
b4de2f7d
AH
12607 = build_function_type_list (integer_type_node,
12608 V4SF_type_node, NULL_TREE);
bd793c65 12609 tree int_ftype_v8qi
b4de2f7d 12610 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
bd793c65 12611 tree v4sf_ftype_v4sf_int
b4de2f7d
AH
12612 = build_function_type_list (V4SF_type_node,
12613 V4SF_type_node, integer_type_node, NULL_TREE);
bd793c65 12614 tree v4sf_ftype_v4sf_v2si
b4de2f7d
AH
12615 = build_function_type_list (V4SF_type_node,
12616 V4SF_type_node, V2SI_type_node, NULL_TREE);
bd793c65 12617 tree int_ftype_v4hi_int
b4de2f7d
AH
12618 = build_function_type_list (integer_type_node,
12619 V4HI_type_node, integer_type_node, NULL_TREE);
bd793c65 12620 tree v4hi_ftype_v4hi_int_int
e7a60f56 12621 = build_function_type_list (V4HI_type_node, V4HI_type_node,
b4de2f7d
AH
12622 integer_type_node, integer_type_node,
12623 NULL_TREE);
bd793c65
BS
12624 /* Miscellaneous. */
12625 tree v8qi_ftype_v4hi_v4hi
b4de2f7d
AH
12626 = build_function_type_list (V8QI_type_node,
12627 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 12628 tree v4hi_ftype_v2si_v2si
b4de2f7d
AH
12629 = build_function_type_list (V4HI_type_node,
12630 V2SI_type_node, V2SI_type_node, NULL_TREE);
bd793c65 12631 tree v4sf_ftype_v4sf_v4sf_int
b4de2f7d
AH
12632 = build_function_type_list (V4SF_type_node,
12633 V4SF_type_node, V4SF_type_node,
12634 integer_type_node, NULL_TREE);
bd793c65 12635 tree v2si_ftype_v4hi_v4hi
b4de2f7d
AH
12636 = build_function_type_list (V2SI_type_node,
12637 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 12638 tree v4hi_ftype_v4hi_int
b4de2f7d 12639 = build_function_type_list (V4HI_type_node,
e7a60f56 12640 V4HI_type_node, integer_type_node, NULL_TREE);
bd793c65 12641 tree v4hi_ftype_v4hi_di
b4de2f7d
AH
12642 = build_function_type_list (V4HI_type_node,
12643 V4HI_type_node, long_long_unsigned_type_node,
12644 NULL_TREE);
bd793c65 12645 tree v2si_ftype_v2si_di
b4de2f7d
AH
12646 = build_function_type_list (V2SI_type_node,
12647 V2SI_type_node, long_long_unsigned_type_node,
12648 NULL_TREE);
bd793c65 12649 tree void_ftype_void
b4de2f7d 12650 = build_function_type (void_type_node, void_list_node);
bd793c65 12651 tree void_ftype_unsigned
b4de2f7d 12652 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
bd793c65 12653 tree unsigned_ftype_void
b4de2f7d 12654 = build_function_type (unsigned_type_node, void_list_node);
bd793c65 12655 tree di_ftype_void
b4de2f7d 12656 = build_function_type (long_long_unsigned_type_node, void_list_node);
e37af218 12657 tree v4sf_ftype_void
b4de2f7d 12658 = build_function_type (V4SF_type_node, void_list_node);
bd793c65 12659 tree v2si_ftype_v4sf
b4de2f7d 12660 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12661 /* Loads/stores. */
bd793c65 12662 tree void_ftype_v8qi_v8qi_pchar
b4de2f7d
AH
12663 = build_function_type_list (void_type_node,
12664 V8QI_type_node, V8QI_type_node,
12665 pchar_type_node, NULL_TREE);
bd793c65 12666 tree v4sf_ftype_pfloat
b4de2f7d 12667 = build_function_type_list (V4SF_type_node, pfloat_type_node, NULL_TREE);
bd793c65
BS
12668 /* @@@ the type is bogus */
12669 tree v4sf_ftype_v4sf_pv2si
b4de2f7d 12670 = build_function_type_list (V4SF_type_node,
f8ca7923 12671 V4SF_type_node, pv2si_type_node, NULL_TREE);
1255c85c 12672 tree void_ftype_pv2si_v4sf
b4de2f7d 12673 = build_function_type_list (void_type_node,
f8ca7923 12674 pv2si_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12675 tree void_ftype_pfloat_v4sf
b4de2f7d
AH
12676 = build_function_type_list (void_type_node,
12677 pfloat_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12678 tree void_ftype_pdi_di
b4de2f7d
AH
12679 = build_function_type_list (void_type_node,
12680 pdi_type_node, long_long_unsigned_type_node,
12681 NULL_TREE);
916b60b7 12682 tree void_ftype_pv2di_v2di
b4de2f7d
AH
12683 = build_function_type_list (void_type_node,
12684 pv2di_type_node, V2DI_type_node, NULL_TREE);
bd793c65
BS
12685 /* Normal vector unops. */
12686 tree v4sf_ftype_v4sf
b4de2f7d 12687 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
0f290768 12688
bd793c65
BS
12689 /* Normal vector binops. */
12690 tree v4sf_ftype_v4sf_v4sf
b4de2f7d
AH
12691 = build_function_type_list (V4SF_type_node,
12692 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12693 tree v8qi_ftype_v8qi_v8qi
b4de2f7d
AH
12694 = build_function_type_list (V8QI_type_node,
12695 V8QI_type_node, V8QI_type_node, NULL_TREE);
bd793c65 12696 tree v4hi_ftype_v4hi_v4hi
b4de2f7d
AH
12697 = build_function_type_list (V4HI_type_node,
12698 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 12699 tree v2si_ftype_v2si_v2si
b4de2f7d
AH
12700 = build_function_type_list (V2SI_type_node,
12701 V2SI_type_node, V2SI_type_node, NULL_TREE);
bd793c65 12702 tree di_ftype_di_di
b4de2f7d
AH
12703 = build_function_type_list (long_long_unsigned_type_node,
12704 long_long_unsigned_type_node,
12705 long_long_unsigned_type_node, NULL_TREE);
bd793c65 12706
47f339cf 12707 tree v2si_ftype_v2sf
ae3aa00d 12708 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
47f339cf 12709 tree v2sf_ftype_v2si
b4de2f7d 12710 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
47f339cf 12711 tree v2si_ftype_v2si
b4de2f7d 12712 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
47f339cf 12713 tree v2sf_ftype_v2sf
b4de2f7d 12714 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 12715 tree v2sf_ftype_v2sf_v2sf
b4de2f7d
AH
12716 = build_function_type_list (V2SF_type_node,
12717 V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 12718 tree v2si_ftype_v2sf_v2sf
b4de2f7d
AH
12719 = build_function_type_list (V2SI_type_node,
12720 V2SF_type_node, V2SF_type_node, NULL_TREE);
fbe5eb6d
BS
12721 tree pint_type_node = build_pointer_type (integer_type_node);
12722 tree pdouble_type_node = build_pointer_type (double_type_node);
12723 tree int_ftype_v2df_v2df
b4de2f7d
AH
12724 = build_function_type_list (integer_type_node,
12725 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d
BS
12726
12727 tree ti_ftype_void
b4de2f7d 12728 = build_function_type (intTI_type_node, void_list_node);
f02e1358
JH
12729 tree v2di_ftype_void
12730 = build_function_type (V2DI_type_node, void_list_node);
fbe5eb6d 12731 tree ti_ftype_ti_ti
b4de2f7d
AH
12732 = build_function_type_list (intTI_type_node,
12733 intTI_type_node, intTI_type_node, NULL_TREE);
fbe5eb6d 12734 tree void_ftype_pvoid
b4de2f7d 12735 = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
fbe5eb6d 12736 tree v2di_ftype_di
b4de2f7d
AH
12737 = build_function_type_list (V2DI_type_node,
12738 long_long_unsigned_type_node, NULL_TREE);
f02e1358
JH
12739 tree di_ftype_v2di
12740 = build_function_type_list (long_long_unsigned_type_node,
12741 V2DI_type_node, NULL_TREE);
fbe5eb6d 12742 tree v4sf_ftype_v4si
b4de2f7d 12743 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 12744 tree v4si_ftype_v4sf
b4de2f7d 12745 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 12746 tree v2df_ftype_v4si
b4de2f7d 12747 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 12748 tree v4si_ftype_v2df
b4de2f7d 12749 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12750 tree v2si_ftype_v2df
b4de2f7d 12751 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12752 tree v4sf_ftype_v2df
b4de2f7d 12753 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12754 tree v2df_ftype_v2si
b4de2f7d 12755 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
fbe5eb6d 12756 tree v2df_ftype_v4sf
b4de2f7d 12757 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 12758 tree int_ftype_v2df
b4de2f7d 12759 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12760 tree v2df_ftype_v2df_int
b4de2f7d
AH
12761 = build_function_type_list (V2DF_type_node,
12762 V2DF_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 12763 tree v4sf_ftype_v4sf_v2df
b4de2f7d
AH
12764 = build_function_type_list (V4SF_type_node,
12765 V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12766 tree v2df_ftype_v2df_v4sf
b4de2f7d
AH
12767 = build_function_type_list (V2DF_type_node,
12768 V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 12769 tree v2df_ftype_v2df_v2df_int
b4de2f7d
AH
12770 = build_function_type_list (V2DF_type_node,
12771 V2DF_type_node, V2DF_type_node,
12772 integer_type_node,
12773 NULL_TREE);
fbe5eb6d 12774 tree v2df_ftype_v2df_pv2si
b4de2f7d
AH
12775 = build_function_type_list (V2DF_type_node,
12776 V2DF_type_node, pv2si_type_node, NULL_TREE);
fbe5eb6d 12777 tree void_ftype_pv2si_v2df
b4de2f7d
AH
12778 = build_function_type_list (void_type_node,
12779 pv2si_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12780 tree void_ftype_pdouble_v2df
b4de2f7d
AH
12781 = build_function_type_list (void_type_node,
12782 pdouble_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12783 tree void_ftype_pint_int
b4de2f7d
AH
12784 = build_function_type_list (void_type_node,
12785 pint_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 12786 tree void_ftype_v16qi_v16qi_pchar
b4de2f7d
AH
12787 = build_function_type_list (void_type_node,
12788 V16QI_type_node, V16QI_type_node,
12789 pchar_type_node, NULL_TREE);
fbe5eb6d 12790 tree v2df_ftype_pdouble
b4de2f7d 12791 = build_function_type_list (V2DF_type_node, pdouble_type_node, NULL_TREE);
fbe5eb6d 12792 tree v2df_ftype_v2df_v2df
b4de2f7d
AH
12793 = build_function_type_list (V2DF_type_node,
12794 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12795 tree v16qi_ftype_v16qi_v16qi
b4de2f7d
AH
12796 = build_function_type_list (V16QI_type_node,
12797 V16QI_type_node, V16QI_type_node, NULL_TREE);
fbe5eb6d 12798 tree v8hi_ftype_v8hi_v8hi
b4de2f7d
AH
12799 = build_function_type_list (V8HI_type_node,
12800 V8HI_type_node, V8HI_type_node, NULL_TREE);
fbe5eb6d 12801 tree v4si_ftype_v4si_v4si
b4de2f7d
AH
12802 = build_function_type_list (V4SI_type_node,
12803 V4SI_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 12804 tree v2di_ftype_v2di_v2di
b4de2f7d
AH
12805 = build_function_type_list (V2DI_type_node,
12806 V2DI_type_node, V2DI_type_node, NULL_TREE);
fbe5eb6d 12807 tree v2di_ftype_v2df_v2df
b4de2f7d
AH
12808 = build_function_type_list (V2DI_type_node,
12809 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12810 tree v2df_ftype_v2df
b4de2f7d 12811 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12812 tree v2df_ftype_double
b4de2f7d 12813 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
fbe5eb6d 12814 tree v2df_ftype_double_double
b4de2f7d
AH
12815 = build_function_type_list (V2DF_type_node,
12816 double_type_node, double_type_node, NULL_TREE);
fbe5eb6d 12817 tree int_ftype_v8hi_int
b4de2f7d
AH
12818 = build_function_type_list (integer_type_node,
12819 V8HI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 12820 tree v8hi_ftype_v8hi_int_int
b4de2f7d
AH
12821 = build_function_type_list (V8HI_type_node,
12822 V8HI_type_node, integer_type_node,
12823 integer_type_node, NULL_TREE);
916b60b7 12824 tree v2di_ftype_v2di_int
b4de2f7d
AH
12825 = build_function_type_list (V2DI_type_node,
12826 V2DI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 12827 tree v4si_ftype_v4si_int
b4de2f7d
AH
12828 = build_function_type_list (V4SI_type_node,
12829 V4SI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 12830 tree v8hi_ftype_v8hi_int
b4de2f7d
AH
12831 = build_function_type_list (V8HI_type_node,
12832 V8HI_type_node, integer_type_node, NULL_TREE);
916b60b7 12833 tree v8hi_ftype_v8hi_v2di
b4de2f7d
AH
12834 = build_function_type_list (V8HI_type_node,
12835 V8HI_type_node, V2DI_type_node, NULL_TREE);
916b60b7 12836 tree v4si_ftype_v4si_v2di
b4de2f7d
AH
12837 = build_function_type_list (V4SI_type_node,
12838 V4SI_type_node, V2DI_type_node, NULL_TREE);
916b60b7 12839 tree v4si_ftype_v8hi_v8hi
b4de2f7d
AH
12840 = build_function_type_list (V4SI_type_node,
12841 V8HI_type_node, V8HI_type_node, NULL_TREE);
916b60b7 12842 tree di_ftype_v8qi_v8qi
b4de2f7d
AH
12843 = build_function_type_list (long_long_unsigned_type_node,
12844 V8QI_type_node, V8QI_type_node, NULL_TREE);
916b60b7 12845 tree v2di_ftype_v16qi_v16qi
b4de2f7d
AH
12846 = build_function_type_list (V2DI_type_node,
12847 V16QI_type_node, V16QI_type_node, NULL_TREE);
916b60b7 12848 tree int_ftype_v16qi
b4de2f7d 12849 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
f02e1358
JH
12850 tree v16qi_ftype_pchar
12851 = build_function_type_list (V16QI_type_node, pchar_type_node, NULL_TREE);
12852 tree void_ftype_pchar_v16qi
12853 = build_function_type_list (void_type_node,
12854 pchar_type_node, V16QI_type_node, NULL_TREE);
12855 tree v4si_ftype_pchar
12856 = build_function_type_list (V4SI_type_node, pchar_type_node, NULL_TREE);
12857 tree void_ftype_pchar_v4si
12858 = build_function_type_list (void_type_node,
12859 pchar_type_node, V4SI_type_node, NULL_TREE);
12860 tree v2di_ftype_v2di
12861 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
47f339cf 12862
bd793c65
BS
12863 /* Add all builtins that are more or less simple operations on two
12864 operands. */
ca7558fc 12865 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
12866 {
12867 /* Use one of the operands; the target can have a different mode for
12868 mask-generating compares. */
12869 enum machine_mode mode;
12870 tree type;
12871
12872 if (d->name == 0)
12873 continue;
12874 mode = insn_data[d->icode].operand[1].mode;
12875
bd793c65
BS
12876 switch (mode)
12877 {
fbe5eb6d
BS
12878 case V16QImode:
12879 type = v16qi_ftype_v16qi_v16qi;
12880 break;
12881 case V8HImode:
12882 type = v8hi_ftype_v8hi_v8hi;
12883 break;
12884 case V4SImode:
12885 type = v4si_ftype_v4si_v4si;
12886 break;
12887 case V2DImode:
12888 type = v2di_ftype_v2di_v2di;
12889 break;
12890 case V2DFmode:
12891 type = v2df_ftype_v2df_v2df;
12892 break;
12893 case TImode:
12894 type = ti_ftype_ti_ti;
12895 break;
bd793c65
BS
12896 case V4SFmode:
12897 type = v4sf_ftype_v4sf_v4sf;
12898 break;
12899 case V8QImode:
12900 type = v8qi_ftype_v8qi_v8qi;
12901 break;
12902 case V4HImode:
12903 type = v4hi_ftype_v4hi_v4hi;
12904 break;
12905 case V2SImode:
12906 type = v2si_ftype_v2si_v2si;
12907 break;
bd793c65
BS
12908 case DImode:
12909 type = di_ftype_di_di;
12910 break;
12911
12912 default:
12913 abort ();
12914 }
0f290768 12915
bd793c65
BS
12916 /* Override for comparisons. */
12917 if (d->icode == CODE_FOR_maskcmpv4sf3
12918 || d->icode == CODE_FOR_maskncmpv4sf3
12919 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12920 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12921 type = v4si_ftype_v4sf_v4sf;
12922
fbe5eb6d
BS
12923 if (d->icode == CODE_FOR_maskcmpv2df3
12924 || d->icode == CODE_FOR_maskncmpv2df3
12925 || d->icode == CODE_FOR_vmmaskcmpv2df3
12926 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12927 type = v2di_ftype_v2df_v2df;
12928
eeb06b1b 12929 def_builtin (d->mask, d->name, type, d->code);
bd793c65
BS
12930 }
12931
12932 /* Add the remaining MMX insns with somewhat more complicated types. */
eeb06b1b
BS
12933 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12934 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
12935 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12936 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12937 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12938 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12939 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12940
12941 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12942 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12943 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12944
12945 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12946 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12947
12948 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12949 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
0f290768 12950
bd793c65 12951 /* comi/ucomi insns. */
ca7558fc 12952 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
fbe5eb6d
BS
12953 if (d->mask == MASK_SSE2)
12954 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12955 else
12956 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
bd793c65 12957
1255c85c
BS
12958 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12959 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12960 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
bd793c65 12961
fbe5eb6d
BS
12962 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12963 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12964 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12965 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12966 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12967 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
bd793c65 12968
fbe5eb6d
BS
12969 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
12970 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
bd793c65 12971
fbe5eb6d 12972 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
bd793c65 12973
fbe5eb6d
BS
12974 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
12975 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
12976 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
12977 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
12978 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
12979 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
bd793c65 12980
fbe5eb6d
BS
12981 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
12982 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
12983 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
12984 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
bd793c65 12985
fbe5eb6d
BS
12986 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
12987 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
12988 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
12989 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
bd793c65 12990
fbe5eb6d 12991 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
bd793c65 12992
916b60b7 12993 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
bd793c65 12994
fbe5eb6d
BS
12995 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
12996 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
12997 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
12998 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
12999 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13000 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
bd793c65 13001
fbe5eb6d 13002 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
bd793c65 13003
47f339cf
BS
13004 /* Original 3DNow! */
13005 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13006 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13007 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13008 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13009 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13010 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13011 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13012 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13013 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13014 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13015 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13016 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13017 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13018 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13019 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13020 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13021 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13022 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13023 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13024 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
47f339cf
BS
13025
13026 /* 3DNow! extension as used in the Athlon CPU. */
13027 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13028 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13029 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13030 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13031 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13032 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13033
fbe5eb6d
BS
13034 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13035
13036 /* SSE2 */
13037 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13038 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13039
13040 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13041 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
f02e1358 13042 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
fbe5eb6d
BS
13043
13044 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pdouble, IX86_BUILTIN_LOADAPD);
13045 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pdouble, IX86_BUILTIN_LOADUPD);
13046 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pdouble, IX86_BUILTIN_LOADSD);
13047 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13048 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13049 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13050
13051 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13052 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13053 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13054 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13055
13056 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
916b60b7 13057 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
fbe5eb6d
BS
13058 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13059 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
916b60b7 13060 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
fbe5eb6d
BS
13061
13062 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13063 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13064 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
916b60b7 13065 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
fbe5eb6d
BS
13066
13067 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13068 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13069
13070 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13071
13072 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
916b60b7 13073 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
fbe5eb6d
BS
13074
13075 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13076 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13077 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13078 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13079 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13080
13081 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13082
13083 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13084 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13085
13086 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13087 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13088 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13089
13090 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13091 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13092 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13093
13094 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13095 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13096 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13097 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pdouble, IX86_BUILTIN_LOADPD1);
13098 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pdouble, IX86_BUILTIN_LOADRPD);
13099 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13100 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13101
13102 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pvoid, IX86_BUILTIN_CLFLUSH);
13103 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13104 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
916b60b7 13105
f02e1358
JH
13106 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pchar, IX86_BUILTIN_LOADDQA);
13107 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pchar, IX86_BUILTIN_LOADDQU);
13108 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pchar, IX86_BUILTIN_LOADD);
13109 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13110 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13111 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pchar_v4si, IX86_BUILTIN_STORED);
13112 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13113
13114 def_builtin (MASK_SSE1, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13115
916b60b7
BS
13116 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13117 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13118 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13119
13120 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13121 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13122 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13123
13124 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13125 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13126
ab3146fd 13127 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
916b60b7
BS
13128 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13129 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13130 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13131
ab3146fd 13132 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
916b60b7
BS
13133 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13134 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13135 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13136
13137 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13138 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13139
13140 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
bd793c65
BS
13141}
13142
13143/* Errors in the source file can cause expand_expr to return const0_rtx
13144 where we expect a vector. To avoid crashing, use one of the vector
13145 clear instructions. */
13146static rtx
13147safe_vector_operand (x, mode)
13148 rtx x;
13149 enum machine_mode mode;
13150{
13151 if (x != const0_rtx)
13152 return x;
13153 x = gen_reg_rtx (mode);
13154
47f339cf 13155 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
bd793c65
BS
13156 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13157 : gen_rtx_SUBREG (DImode, x, 0)));
13158 else
e37af218 13159 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
4977bab6
ZW
13160 : gen_rtx_SUBREG (V4SFmode, x, 0),
13161 CONST0_RTX (V4SFmode)));
bd793c65
BS
13162 return x;
13163}
13164
13165/* Subroutine of ix86_expand_builtin to take care of binop insns. */
13166
13167static rtx
13168ix86_expand_binop_builtin (icode, arglist, target)
13169 enum insn_code icode;
13170 tree arglist;
13171 rtx target;
13172{
13173 rtx pat;
13174 tree arg0 = TREE_VALUE (arglist);
13175 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13176 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13177 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13178 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13179 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13180 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13181
13182 if (VECTOR_MODE_P (mode0))
13183 op0 = safe_vector_operand (op0, mode0);
13184 if (VECTOR_MODE_P (mode1))
13185 op1 = safe_vector_operand (op1, mode1);
13186
13187 if (! target
13188 || GET_MODE (target) != tmode
13189 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13190 target = gen_reg_rtx (tmode);
13191
13192 /* In case the insn wants input operands in modes different from
13193 the result, abort. */
13194 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
13195 abort ();
13196
13197 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13198 op0 = copy_to_mode_reg (mode0, op0);
13199 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13200 op1 = copy_to_mode_reg (mode1, op1);
13201
59bef189
RH
13202 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13203 yet one of the two must not be a memory. This is normally enforced
13204 by expanders, but we didn't bother to create one here. */
13205 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13206 op0 = copy_to_mode_reg (mode0, op0);
13207
bd793c65
BS
13208 pat = GEN_FCN (icode) (target, op0, op1);
13209 if (! pat)
13210 return 0;
13211 emit_insn (pat);
13212 return target;
13213}
13214
13215/* Subroutine of ix86_expand_builtin to take care of stores. */
13216
13217static rtx
e37af218 13218ix86_expand_store_builtin (icode, arglist)
bd793c65
BS
13219 enum insn_code icode;
13220 tree arglist;
bd793c65
BS
13221{
13222 rtx pat;
13223 tree arg0 = TREE_VALUE (arglist);
13224 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13225 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13226 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13227 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13228 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13229
13230 if (VECTOR_MODE_P (mode1))
13231 op1 = safe_vector_operand (op1, mode1);
13232
13233 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
59bef189
RH
13234
13235 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13236 op1 = copy_to_mode_reg (mode1, op1);
13237
bd793c65
BS
13238 pat = GEN_FCN (icode) (op0, op1);
13239 if (pat)
13240 emit_insn (pat);
13241 return 0;
13242}
13243
13244/* Subroutine of ix86_expand_builtin to take care of unop insns. */
13245
13246static rtx
13247ix86_expand_unop_builtin (icode, arglist, target, do_load)
13248 enum insn_code icode;
13249 tree arglist;
13250 rtx target;
13251 int do_load;
13252{
13253 rtx pat;
13254 tree arg0 = TREE_VALUE (arglist);
13255 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13256 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13257 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13258
13259 if (! target
13260 || GET_MODE (target) != tmode
13261 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13262 target = gen_reg_rtx (tmode);
13263 if (do_load)
13264 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13265 else
13266 {
13267 if (VECTOR_MODE_P (mode0))
13268 op0 = safe_vector_operand (op0, mode0);
13269
13270 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13271 op0 = copy_to_mode_reg (mode0, op0);
13272 }
13273
13274 pat = GEN_FCN (icode) (target, op0);
13275 if (! pat)
13276 return 0;
13277 emit_insn (pat);
13278 return target;
13279}
13280
13281/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13282 sqrtss, rsqrtss, rcpss. */
13283
13284static rtx
13285ix86_expand_unop1_builtin (icode, arglist, target)
13286 enum insn_code icode;
13287 tree arglist;
13288 rtx target;
13289{
13290 rtx pat;
13291 tree arg0 = TREE_VALUE (arglist);
59bef189 13292 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
bd793c65
BS
13293 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13294 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13295
13296 if (! target
13297 || GET_MODE (target) != tmode
13298 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13299 target = gen_reg_rtx (tmode);
13300
13301 if (VECTOR_MODE_P (mode0))
13302 op0 = safe_vector_operand (op0, mode0);
13303
13304 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13305 op0 = copy_to_mode_reg (mode0, op0);
fce5a9f2 13306
59bef189
RH
13307 op1 = op0;
13308 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13309 op1 = copy_to_mode_reg (mode0, op1);
fce5a9f2 13310
59bef189 13311 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
13312 if (! pat)
13313 return 0;
13314 emit_insn (pat);
13315 return target;
13316}
13317
13318/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13319
13320static rtx
13321ix86_expand_sse_compare (d, arglist, target)
8b60264b 13322 const struct builtin_description *d;
bd793c65
BS
13323 tree arglist;
13324 rtx target;
13325{
13326 rtx pat;
13327 tree arg0 = TREE_VALUE (arglist);
13328 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13329 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13330 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13331 rtx op2;
13332 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13333 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13334 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13335 enum rtx_code comparison = d->comparison;
13336
13337 if (VECTOR_MODE_P (mode0))
13338 op0 = safe_vector_operand (op0, mode0);
13339 if (VECTOR_MODE_P (mode1))
13340 op1 = safe_vector_operand (op1, mode1);
13341
13342 /* Swap operands if we have a comparison that isn't available in
13343 hardware. */
13344 if (d->flag)
13345 {
21e1b5f1
BS
13346 rtx tmp = gen_reg_rtx (mode1);
13347 emit_move_insn (tmp, op1);
bd793c65 13348 op1 = op0;
21e1b5f1 13349 op0 = tmp;
bd793c65 13350 }
21e1b5f1
BS
13351
13352 if (! target
13353 || GET_MODE (target) != tmode
13354 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
bd793c65
BS
13355 target = gen_reg_rtx (tmode);
13356
13357 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13358 op0 = copy_to_mode_reg (mode0, op0);
13359 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13360 op1 = copy_to_mode_reg (mode1, op1);
13361
13362 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13363 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13364 if (! pat)
13365 return 0;
13366 emit_insn (pat);
13367 return target;
13368}
13369
13370/* Subroutine of ix86_expand_builtin to take care of comi insns. */
13371
13372static rtx
13373ix86_expand_sse_comi (d, arglist, target)
8b60264b 13374 const struct builtin_description *d;
bd793c65
BS
13375 tree arglist;
13376 rtx target;
13377{
13378 rtx pat;
13379 tree arg0 = TREE_VALUE (arglist);
13380 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13381 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13382 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13383 rtx op2;
13384 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13385 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13386 enum rtx_code comparison = d->comparison;
13387
13388 if (VECTOR_MODE_P (mode0))
13389 op0 = safe_vector_operand (op0, mode0);
13390 if (VECTOR_MODE_P (mode1))
13391 op1 = safe_vector_operand (op1, mode1);
13392
13393 /* Swap operands if we have a comparison that isn't available in
13394 hardware. */
13395 if (d->flag)
13396 {
13397 rtx tmp = op1;
13398 op1 = op0;
13399 op0 = tmp;
bd793c65
BS
13400 }
13401
13402 target = gen_reg_rtx (SImode);
13403 emit_move_insn (target, const0_rtx);
13404 target = gen_rtx_SUBREG (QImode, target, 0);
13405
13406 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13407 op0 = copy_to_mode_reg (mode0, op0);
13408 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13409 op1 = copy_to_mode_reg (mode1, op1);
13410
13411 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
1194ca05 13412 pat = GEN_FCN (d->icode) (op0, op1);
bd793c65
BS
13413 if (! pat)
13414 return 0;
13415 emit_insn (pat);
29628f27
BS
13416 emit_insn (gen_rtx_SET (VOIDmode,
13417 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13418 gen_rtx_fmt_ee (comparison, QImode,
1194ca05 13419 SET_DEST (pat),
29628f27 13420 const0_rtx)));
bd793c65 13421
6f1a6c5b 13422 return SUBREG_REG (target);
bd793c65
BS
13423}
13424
13425/* Expand an expression EXP that calls a built-in function,
13426 with result going to TARGET if that's convenient
13427 (and in mode MODE if that's convenient).
13428 SUBTARGET may be used as the target for computing one of EXP's operands.
13429 IGNORE is nonzero if the value is to be ignored. */
13430
13431rtx
13432ix86_expand_builtin (exp, target, subtarget, mode, ignore)
13433 tree exp;
13434 rtx target;
13435 rtx subtarget ATTRIBUTE_UNUSED;
13436 enum machine_mode mode ATTRIBUTE_UNUSED;
13437 int ignore ATTRIBUTE_UNUSED;
13438{
8b60264b 13439 const struct builtin_description *d;
77ebd435 13440 size_t i;
bd793c65
BS
13441 enum insn_code icode;
13442 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13443 tree arglist = TREE_OPERAND (exp, 1);
e37af218 13444 tree arg0, arg1, arg2;
bd793c65
BS
13445 rtx op0, op1, op2, pat;
13446 enum machine_mode tmode, mode0, mode1, mode2;
8752c357 13447 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
bd793c65
BS
13448
13449 switch (fcode)
13450 {
13451 case IX86_BUILTIN_EMMS:
13452 emit_insn (gen_emms ());
13453 return 0;
13454
13455 case IX86_BUILTIN_SFENCE:
13456 emit_insn (gen_sfence ());
13457 return 0;
13458
bd793c65 13459 case IX86_BUILTIN_PEXTRW:
fbe5eb6d
BS
13460 case IX86_BUILTIN_PEXTRW128:
13461 icode = (fcode == IX86_BUILTIN_PEXTRW
13462 ? CODE_FOR_mmx_pextrw
13463 : CODE_FOR_sse2_pextrw);
bd793c65
BS
13464 arg0 = TREE_VALUE (arglist);
13465 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13466 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13467 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13468 tmode = insn_data[icode].operand[0].mode;
13469 mode0 = insn_data[icode].operand[1].mode;
13470 mode1 = insn_data[icode].operand[2].mode;
13471
13472 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13473 op0 = copy_to_mode_reg (mode0, op0);
13474 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13475 {
13476 /* @@@ better error message */
13477 error ("selector must be an immediate");
6f1a6c5b 13478 return gen_reg_rtx (tmode);
bd793c65
BS
13479 }
13480 if (target == 0
13481 || GET_MODE (target) != tmode
13482 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13483 target = gen_reg_rtx (tmode);
13484 pat = GEN_FCN (icode) (target, op0, op1);
13485 if (! pat)
13486 return 0;
13487 emit_insn (pat);
13488 return target;
13489
13490 case IX86_BUILTIN_PINSRW:
fbe5eb6d
BS
13491 case IX86_BUILTIN_PINSRW128:
13492 icode = (fcode == IX86_BUILTIN_PINSRW
13493 ? CODE_FOR_mmx_pinsrw
13494 : CODE_FOR_sse2_pinsrw);
bd793c65
BS
13495 arg0 = TREE_VALUE (arglist);
13496 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13497 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13498 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13499 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13500 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13501 tmode = insn_data[icode].operand[0].mode;
13502 mode0 = insn_data[icode].operand[1].mode;
13503 mode1 = insn_data[icode].operand[2].mode;
13504 mode2 = insn_data[icode].operand[3].mode;
13505
13506 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13507 op0 = copy_to_mode_reg (mode0, op0);
13508 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13509 op1 = copy_to_mode_reg (mode1, op1);
13510 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13511 {
13512 /* @@@ better error message */
13513 error ("selector must be an immediate");
13514 return const0_rtx;
13515 }
13516 if (target == 0
13517 || GET_MODE (target) != tmode
13518 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13519 target = gen_reg_rtx (tmode);
13520 pat = GEN_FCN (icode) (target, op0, op1, op2);
13521 if (! pat)
13522 return 0;
13523 emit_insn (pat);
13524 return target;
13525
13526 case IX86_BUILTIN_MASKMOVQ:
077084dd 13527 case IX86_BUILTIN_MASKMOVDQU:
fbe5eb6d
BS
13528 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13529 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
f8ca7923
JH
13530 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
13531 : CODE_FOR_sse2_maskmovdqu));
bd793c65
BS
13532 /* Note the arg order is different from the operand order. */
13533 arg1 = TREE_VALUE (arglist);
13534 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13535 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13536 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13537 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13538 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13539 mode0 = insn_data[icode].operand[0].mode;
13540 mode1 = insn_data[icode].operand[1].mode;
13541 mode2 = insn_data[icode].operand[2].mode;
13542
5c464583 13543 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
bd793c65
BS
13544 op0 = copy_to_mode_reg (mode0, op0);
13545 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13546 op1 = copy_to_mode_reg (mode1, op1);
13547 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13548 op2 = copy_to_mode_reg (mode2, op2);
13549 pat = GEN_FCN (icode) (op0, op1, op2);
13550 if (! pat)
13551 return 0;
13552 emit_insn (pat);
13553 return 0;
13554
13555 case IX86_BUILTIN_SQRTSS:
13556 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13557 case IX86_BUILTIN_RSQRTSS:
13558 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13559 case IX86_BUILTIN_RCPSS:
13560 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13561
13562 case IX86_BUILTIN_LOADAPS:
13563 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13564
13565 case IX86_BUILTIN_LOADUPS:
13566 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13567
13568 case IX86_BUILTIN_STOREAPS:
e37af218 13569 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
f02e1358 13570
bd793c65 13571 case IX86_BUILTIN_STOREUPS:
e37af218 13572 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
bd793c65
BS
13573
13574 case IX86_BUILTIN_LOADSS:
13575 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13576
13577 case IX86_BUILTIN_STORESS:
e37af218 13578 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
bd793c65 13579
0f290768 13580 case IX86_BUILTIN_LOADHPS:
bd793c65 13581 case IX86_BUILTIN_LOADLPS:
fbe5eb6d
BS
13582 case IX86_BUILTIN_LOADHPD:
13583 case IX86_BUILTIN_LOADLPD:
13584 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13585 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13586 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13587 : CODE_FOR_sse2_movlpd);
bd793c65
BS
13588 arg0 = TREE_VALUE (arglist);
13589 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13590 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13591 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13592 tmode = insn_data[icode].operand[0].mode;
13593 mode0 = insn_data[icode].operand[1].mode;
13594 mode1 = insn_data[icode].operand[2].mode;
13595
13596 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13597 op0 = copy_to_mode_reg (mode0, op0);
13598 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13599 if (target == 0
13600 || GET_MODE (target) != tmode
13601 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13602 target = gen_reg_rtx (tmode);
13603 pat = GEN_FCN (icode) (target, op0, op1);
13604 if (! pat)
13605 return 0;
13606 emit_insn (pat);
13607 return target;
0f290768 13608
bd793c65
BS
13609 case IX86_BUILTIN_STOREHPS:
13610 case IX86_BUILTIN_STORELPS:
fbe5eb6d
BS
13611 case IX86_BUILTIN_STOREHPD:
13612 case IX86_BUILTIN_STORELPD:
13613 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13614 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13615 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13616 : CODE_FOR_sse2_movlpd);
bd793c65
BS
13617 arg0 = TREE_VALUE (arglist);
13618 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13619 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13620 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13621 mode0 = insn_data[icode].operand[1].mode;
13622 mode1 = insn_data[icode].operand[2].mode;
13623
13624 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13625 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13626 op1 = copy_to_mode_reg (mode1, op1);
13627
13628 pat = GEN_FCN (icode) (op0, op0, op1);
13629 if (! pat)
13630 return 0;
13631 emit_insn (pat);
13632 return 0;
13633
13634 case IX86_BUILTIN_MOVNTPS:
e37af218 13635 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
bd793c65 13636 case IX86_BUILTIN_MOVNTQ:
e37af218 13637 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
bd793c65
BS
13638
13639 case IX86_BUILTIN_LDMXCSR:
13640 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13641 target = assign_386_stack_local (SImode, 0);
13642 emit_move_insn (target, op0);
13643 emit_insn (gen_ldmxcsr (target));
13644 return 0;
13645
13646 case IX86_BUILTIN_STMXCSR:
13647 target = assign_386_stack_local (SImode, 0);
13648 emit_insn (gen_stmxcsr (target));
13649 return copy_to_mode_reg (SImode, target);
13650
bd793c65 13651 case IX86_BUILTIN_SHUFPS:
fbe5eb6d
BS
13652 case IX86_BUILTIN_SHUFPD:
13653 icode = (fcode == IX86_BUILTIN_SHUFPS
13654 ? CODE_FOR_sse_shufps
13655 : CODE_FOR_sse2_shufpd);
bd793c65
BS
13656 arg0 = TREE_VALUE (arglist);
13657 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13658 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13659 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13660 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13661 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13662 tmode = insn_data[icode].operand[0].mode;
13663 mode0 = insn_data[icode].operand[1].mode;
13664 mode1 = insn_data[icode].operand[2].mode;
13665 mode2 = insn_data[icode].operand[3].mode;
13666
13667 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13668 op0 = copy_to_mode_reg (mode0, op0);
13669 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13670 op1 = copy_to_mode_reg (mode1, op1);
13671 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13672 {
13673 /* @@@ better error message */
13674 error ("mask must be an immediate");
6f1a6c5b 13675 return gen_reg_rtx (tmode);
bd793c65
BS
13676 }
13677 if (target == 0
13678 || GET_MODE (target) != tmode
13679 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13680 target = gen_reg_rtx (tmode);
13681 pat = GEN_FCN (icode) (target, op0, op1, op2);
13682 if (! pat)
13683 return 0;
13684 emit_insn (pat);
13685 return target;
13686
13687 case IX86_BUILTIN_PSHUFW:
fbe5eb6d
BS
13688 case IX86_BUILTIN_PSHUFD:
13689 case IX86_BUILTIN_PSHUFHW:
13690 case IX86_BUILTIN_PSHUFLW:
13691 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13692 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13693 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13694 : CODE_FOR_mmx_pshufw);
bd793c65
BS
13695 arg0 = TREE_VALUE (arglist);
13696 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13697 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13698 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13699 tmode = insn_data[icode].operand[0].mode;
29628f27
BS
13700 mode1 = insn_data[icode].operand[1].mode;
13701 mode2 = insn_data[icode].operand[2].mode;
bd793c65 13702
29628f27
BS
13703 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13704 op0 = copy_to_mode_reg (mode1, op0);
13705 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
bd793c65
BS
13706 {
13707 /* @@@ better error message */
13708 error ("mask must be an immediate");
13709 return const0_rtx;
13710 }
13711 if (target == 0
13712 || GET_MODE (target) != tmode
13713 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13714 target = gen_reg_rtx (tmode);
29628f27 13715 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
13716 if (! pat)
13717 return 0;
13718 emit_insn (pat);
13719 return target;
13720
ab3146fd
ZD
13721 case IX86_BUILTIN_PSLLDQI128:
13722 case IX86_BUILTIN_PSRLDQI128:
13723 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
13724 : CODE_FOR_sse2_lshrti3);
13725 arg0 = TREE_VALUE (arglist);
13726 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13727 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13728 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13729 tmode = insn_data[icode].operand[0].mode;
13730 mode1 = insn_data[icode].operand[1].mode;
13731 mode2 = insn_data[icode].operand[2].mode;
13732
13733 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13734 {
13735 op0 = copy_to_reg (op0);
13736 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
13737 }
13738 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13739 {
13740 error ("shift must be an immediate");
13741 return const0_rtx;
13742 }
13743 target = gen_reg_rtx (V2DImode);
13744 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
13745 if (! pat)
13746 return 0;
13747 emit_insn (pat);
13748 return target;
13749
47f339cf
BS
13750 case IX86_BUILTIN_FEMMS:
13751 emit_insn (gen_femms ());
13752 return NULL_RTX;
13753
13754 case IX86_BUILTIN_PAVGUSB:
13755 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13756
13757 case IX86_BUILTIN_PF2ID:
13758 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13759
13760 case IX86_BUILTIN_PFACC:
13761 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13762
13763 case IX86_BUILTIN_PFADD:
13764 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13765
13766 case IX86_BUILTIN_PFCMPEQ:
13767 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13768
13769 case IX86_BUILTIN_PFCMPGE:
13770 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13771
13772 case IX86_BUILTIN_PFCMPGT:
13773 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13774
13775 case IX86_BUILTIN_PFMAX:
13776 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13777
13778 case IX86_BUILTIN_PFMIN:
13779 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13780
13781 case IX86_BUILTIN_PFMUL:
13782 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13783
13784 case IX86_BUILTIN_PFRCP:
13785 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13786
13787 case IX86_BUILTIN_PFRCPIT1:
13788 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13789
13790 case IX86_BUILTIN_PFRCPIT2:
13791 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13792
13793 case IX86_BUILTIN_PFRSQIT1:
13794 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13795
13796 case IX86_BUILTIN_PFRSQRT:
13797 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13798
13799 case IX86_BUILTIN_PFSUB:
13800 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13801
13802 case IX86_BUILTIN_PFSUBR:
13803 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13804
13805 case IX86_BUILTIN_PI2FD:
13806 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13807
13808 case IX86_BUILTIN_PMULHRW:
13809 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13810
47f339cf
BS
13811 case IX86_BUILTIN_PF2IW:
13812 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13813
13814 case IX86_BUILTIN_PFNACC:
13815 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13816
13817 case IX86_BUILTIN_PFPNACC:
13818 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13819
13820 case IX86_BUILTIN_PI2FW:
13821 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13822
13823 case IX86_BUILTIN_PSWAPDSI:
13824 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13825
13826 case IX86_BUILTIN_PSWAPDSF:
13827 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13828
e37af218
RH
13829 case IX86_BUILTIN_SSE_ZERO:
13830 target = gen_reg_rtx (V4SFmode);
4977bab6 13831 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
bd793c65
BS
13832 return target;
13833
bd793c65
BS
13834 case IX86_BUILTIN_MMX_ZERO:
13835 target = gen_reg_rtx (DImode);
13836 emit_insn (gen_mmx_clrdi (target));
13837 return target;
13838
f02e1358
JH
13839 case IX86_BUILTIN_CLRTI:
13840 target = gen_reg_rtx (V2DImode);
13841 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
13842 return target;
13843
13844
fbe5eb6d
BS
13845 case IX86_BUILTIN_SQRTSD:
13846 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13847 case IX86_BUILTIN_LOADAPD:
13848 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13849 case IX86_BUILTIN_LOADUPD:
13850 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13851
13852 case IX86_BUILTIN_STOREAPD:
13853 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13854 case IX86_BUILTIN_STOREUPD:
13855 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13856
13857 case IX86_BUILTIN_LOADSD:
13858 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13859
13860 case IX86_BUILTIN_STORESD:
13861 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13862
13863 case IX86_BUILTIN_SETPD1:
13864 target = assign_386_stack_local (DFmode, 0);
13865 arg0 = TREE_VALUE (arglist);
13866 emit_move_insn (adjust_address (target, DFmode, 0),
13867 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13868 op0 = gen_reg_rtx (V2DFmode);
13869 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13870 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
13871 return op0;
13872
13873 case IX86_BUILTIN_SETPD:
13874 target = assign_386_stack_local (V2DFmode, 0);
13875 arg0 = TREE_VALUE (arglist);
13876 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13877 emit_move_insn (adjust_address (target, DFmode, 0),
13878 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13879 emit_move_insn (adjust_address (target, DFmode, 8),
13880 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13881 op0 = gen_reg_rtx (V2DFmode);
13882 emit_insn (gen_sse2_movapd (op0, target));
13883 return op0;
13884
13885 case IX86_BUILTIN_LOADRPD:
13886 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13887 gen_reg_rtx (V2DFmode), 1);
13888 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
13889 return target;
13890
13891 case IX86_BUILTIN_LOADPD1:
13892 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13893 gen_reg_rtx (V2DFmode), 1);
13894 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13895 return target;
13896
13897 case IX86_BUILTIN_STOREPD1:
13898 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13899 case IX86_BUILTIN_STORERPD:
13900 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13901
48126a97
JH
13902 case IX86_BUILTIN_CLRPD:
13903 target = gen_reg_rtx (V2DFmode);
13904 emit_insn (gen_sse_clrv2df (target));
13905 return target;
13906
fbe5eb6d
BS
13907 case IX86_BUILTIN_MFENCE:
13908 emit_insn (gen_sse2_mfence ());
13909 return 0;
13910 case IX86_BUILTIN_LFENCE:
13911 emit_insn (gen_sse2_lfence ());
13912 return 0;
13913
13914 case IX86_BUILTIN_CLFLUSH:
13915 arg0 = TREE_VALUE (arglist);
13916 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13917 icode = CODE_FOR_sse2_clflush;
1194ca05
JH
13918 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
13919 op0 = copy_to_mode_reg (Pmode, op0);
fbe5eb6d
BS
13920
13921 emit_insn (gen_sse2_clflush (op0));
13922 return 0;
13923
13924 case IX86_BUILTIN_MOVNTPD:
13925 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13926 case IX86_BUILTIN_MOVNTDQ:
916b60b7 13927 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
fbe5eb6d
BS
13928 case IX86_BUILTIN_MOVNTI:
13929 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13930
f02e1358
JH
13931 case IX86_BUILTIN_LOADDQA:
13932 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
13933 case IX86_BUILTIN_LOADDQU:
13934 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
13935 case IX86_BUILTIN_LOADD:
13936 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
13937
13938 case IX86_BUILTIN_STOREDQA:
13939 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
13940 case IX86_BUILTIN_STOREDQU:
13941 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
13942 case IX86_BUILTIN_STORED:
13943 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
13944
bd793c65
BS
13945 default:
13946 break;
13947 }
13948
ca7558fc 13949 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
13950 if (d->code == fcode)
13951 {
13952 /* Compares are treated specially. */
13953 if (d->icode == CODE_FOR_maskcmpv4sf3
13954 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13955 || d->icode == CODE_FOR_maskncmpv4sf3
fbe5eb6d
BS
13956 || d->icode == CODE_FOR_vmmaskncmpv4sf3
13957 || d->icode == CODE_FOR_maskcmpv2df3
13958 || d->icode == CODE_FOR_vmmaskcmpv2df3
13959 || d->icode == CODE_FOR_maskncmpv2df3
13960 || d->icode == CODE_FOR_vmmaskncmpv2df3)
bd793c65
BS
13961 return ix86_expand_sse_compare (d, arglist, target);
13962
13963 return ix86_expand_binop_builtin (d->icode, arglist, target);
13964 }
13965
ca7558fc 13966 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
bd793c65
BS
13967 if (d->code == fcode)
13968 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
0f290768 13969
ca7558fc 13970 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
bd793c65
BS
13971 if (d->code == fcode)
13972 return ix86_expand_sse_comi (d, arglist, target);
0f290768 13973
bd793c65
BS
13974 /* @@@ Should really do something sensible here. */
13975 return 0;
bd793c65 13976}
4211a8fb
JH
13977
13978/* Store OPERAND to the memory after reload is completed. This means
f710504c 13979 that we can't easily use assign_stack_local. */
4211a8fb
JH
13980rtx
13981ix86_force_to_memory (mode, operand)
13982 enum machine_mode mode;
13983 rtx operand;
13984{
898d374d 13985 rtx result;
4211a8fb
JH
13986 if (!reload_completed)
13987 abort ();
898d374d
JH
13988 if (TARGET_64BIT && TARGET_RED_ZONE)
13989 {
13990 result = gen_rtx_MEM (mode,
13991 gen_rtx_PLUS (Pmode,
13992 stack_pointer_rtx,
13993 GEN_INT (-RED_ZONE_SIZE)));
13994 emit_move_insn (result, operand);
13995 }
13996 else if (TARGET_64BIT && !TARGET_RED_ZONE)
4211a8fb 13997 {
898d374d 13998 switch (mode)
4211a8fb 13999 {
898d374d
JH
14000 case HImode:
14001 case SImode:
14002 operand = gen_lowpart (DImode, operand);
14003 /* FALLTHRU */
14004 case DImode:
4211a8fb 14005 emit_insn (
898d374d
JH
14006 gen_rtx_SET (VOIDmode,
14007 gen_rtx_MEM (DImode,
14008 gen_rtx_PRE_DEC (DImode,
14009 stack_pointer_rtx)),
14010 operand));
14011 break;
14012 default:
14013 abort ();
14014 }
14015 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14016 }
14017 else
14018 {
14019 switch (mode)
14020 {
14021 case DImode:
14022 {
14023 rtx operands[2];
14024 split_di (&operand, 1, operands, operands + 1);
14025 emit_insn (
14026 gen_rtx_SET (VOIDmode,
14027 gen_rtx_MEM (SImode,
14028 gen_rtx_PRE_DEC (Pmode,
14029 stack_pointer_rtx)),
14030 operands[1]));
14031 emit_insn (
14032 gen_rtx_SET (VOIDmode,
14033 gen_rtx_MEM (SImode,
14034 gen_rtx_PRE_DEC (Pmode,
14035 stack_pointer_rtx)),
14036 operands[0]));
14037 }
14038 break;
14039 case HImode:
14040 /* It is better to store HImodes as SImodes. */
14041 if (!TARGET_PARTIAL_REG_STALL)
14042 operand = gen_lowpart (SImode, operand);
14043 /* FALLTHRU */
14044 case SImode:
4211a8fb 14045 emit_insn (
898d374d
JH
14046 gen_rtx_SET (VOIDmode,
14047 gen_rtx_MEM (GET_MODE (operand),
14048 gen_rtx_PRE_DEC (SImode,
14049 stack_pointer_rtx)),
14050 operand));
14051 break;
14052 default:
14053 abort ();
4211a8fb 14054 }
898d374d 14055 result = gen_rtx_MEM (mode, stack_pointer_rtx);
4211a8fb 14056 }
898d374d 14057 return result;
4211a8fb
JH
14058}
14059
14060/* Free operand from the memory. */
14061void
14062ix86_free_from_memory (mode)
14063 enum machine_mode mode;
14064{
898d374d
JH
14065 if (!TARGET_64BIT || !TARGET_RED_ZONE)
14066 {
14067 int size;
14068
14069 if (mode == DImode || TARGET_64BIT)
14070 size = 8;
14071 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14072 size = 2;
14073 else
14074 size = 4;
14075 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14076 to pop or add instruction if registers are available. */
14077 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14078 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14079 GEN_INT (size))));
14080 }
4211a8fb 14081}
a946dd00 14082
f84aa48a
JH
14083/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14084 QImode must go into class Q_REGS.
14085 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
892a2d68 14086 movdf to do mem-to-mem moves through integer regs. */
f84aa48a
JH
14087enum reg_class
14088ix86_preferred_reload_class (x, class)
14089 rtx x;
14090 enum reg_class class;
14091{
1877be45
JH
14092 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14093 return NO_REGS;
f84aa48a
JH
14094 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14095 {
14096 /* SSE can't load any constant directly yet. */
14097 if (SSE_CLASS_P (class))
14098 return NO_REGS;
14099 /* Floats can load 0 and 1. */
14100 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14101 {
14102 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14103 if (MAYBE_SSE_CLASS_P (class))
14104 return (reg_class_subset_p (class, GENERAL_REGS)
14105 ? GENERAL_REGS : FLOAT_REGS);
14106 else
14107 return class;
14108 }
14109 /* General regs can load everything. */
14110 if (reg_class_subset_p (class, GENERAL_REGS))
14111 return GENERAL_REGS;
14112 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14113 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14114 return NO_REGS;
14115 }
14116 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14117 return NO_REGS;
14118 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14119 return Q_REGS;
14120 return class;
14121}
14122
14123/* If we are copying between general and FP registers, we need a memory
14124 location. The same is true for SSE and MMX registers.
14125
14126 The macro can't work reliably when one of the CLASSES is class containing
14127 registers from multiple units (SSE, MMX, integer). We avoid this by never
14128 combining those units in single alternative in the machine description.
14129 Ensure that this constraint holds to avoid unexpected surprises.
14130
14131 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14132 enforce these sanity checks. */
14133int
14134ix86_secondary_memory_needed (class1, class2, mode, strict)
14135 enum reg_class class1, class2;
14136 enum machine_mode mode;
14137 int strict;
14138{
14139 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14140 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14141 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14142 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14143 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14144 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14145 {
14146 if (strict)
14147 abort ();
14148 else
14149 return 1;
14150 }
14151 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14152 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14153 && (mode) != SImode)
14154 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14155 && (mode) != SImode));
14156}
14157/* Return the cost of moving data from a register in class CLASS1 to
a4f31c00 14158 one in class CLASS2.
f84aa48a
JH
14159
14160 It is not required that the cost always equal 2 when FROM is the same as TO;
14161 on some machines it is expensive to move between registers if they are not
14162 general registers. */
14163int
14164ix86_register_move_cost (mode, class1, class2)
14165 enum machine_mode mode;
14166 enum reg_class class1, class2;
14167{
14168 /* In case we require secondary memory, compute cost of the store followed
d631b80a
RH
14169 by load. In order to avoid bad register allocation choices, we need
14170 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14171
f84aa48a
JH
14172 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14173 {
d631b80a
RH
14174 int cost = 1;
14175
14176 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14177 MEMORY_MOVE_COST (mode, class1, 1));
14178 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14179 MEMORY_MOVE_COST (mode, class2, 1));
14180
14181 /* In case of copying from general_purpose_register we may emit multiple
14182 stores followed by single load causing memory size mismatch stall.
14183 Count this as arbitarily high cost of 20. */
62415523 14184 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
d631b80a
RH
14185 cost += 20;
14186
14187 /* In the case of FP/MMX moves, the registers actually overlap, and we
14188 have to switch modes in order to treat them differently. */
14189 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14190 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14191 cost += 20;
14192
14193 return cost;
f84aa48a 14194 }
d631b80a 14195
92d0fb09 14196 /* Moves between SSE/MMX and integer unit are expensive. */
62415523
JH
14197 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14198 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
fa79946e
JH
14199 return ix86_cost->mmxsse_to_integer;
14200 if (MAYBE_FLOAT_CLASS_P (class1))
14201 return ix86_cost->fp_move;
14202 if (MAYBE_SSE_CLASS_P (class1))
14203 return ix86_cost->sse_move;
14204 if (MAYBE_MMX_CLASS_P (class1))
14205 return ix86_cost->mmx_move;
f84aa48a
JH
14206 return 2;
14207}
14208
a946dd00
JH
14209/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14210int
14211ix86_hard_regno_mode_ok (regno, mode)
14212 int regno;
14213 enum machine_mode mode;
14214{
14215 /* Flags and only flags can only hold CCmode values. */
14216 if (CC_REGNO_P (regno))
14217 return GET_MODE_CLASS (mode) == MODE_CC;
14218 if (GET_MODE_CLASS (mode) == MODE_CC
14219 || GET_MODE_CLASS (mode) == MODE_RANDOM
14220 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14221 return 0;
14222 if (FP_REGNO_P (regno))
14223 return VALID_FP_MODE_P (mode);
14224 if (SSE_REGNO_P (regno))
14225 return VALID_SSE_REG_MODE (mode);
14226 if (MMX_REGNO_P (regno))
47f339cf 14227 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
a946dd00
JH
14228 /* We handle both integer and floats in the general purpose registers.
14229 In future we should be able to handle vector modes as well. */
14230 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14231 return 0;
14232 /* Take care for QImode values - they can be in non-QI regs, but then
14233 they do cause partial register stalls. */
d2836273 14234 if (regno < 4 || mode != QImode || TARGET_64BIT)
a946dd00
JH
14235 return 1;
14236 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14237}
fa79946e
JH
14238
14239/* Return the cost of moving data of mode M between a
14240 register and memory. A value of 2 is the default; this cost is
14241 relative to those in `REGISTER_MOVE_COST'.
14242
14243 If moving between registers and memory is more expensive than
14244 between two registers, you should define this macro to express the
a4f31c00
AJ
14245 relative cost.
14246
fa79946e
JH
14247 Model also increased moving costs of QImode registers in non
14248 Q_REGS classes.
14249 */
14250int
14251ix86_memory_move_cost (mode, class, in)
14252 enum machine_mode mode;
14253 enum reg_class class;
14254 int in;
14255{
14256 if (FLOAT_CLASS_P (class))
14257 {
14258 int index;
14259 switch (mode)
14260 {
14261 case SFmode:
14262 index = 0;
14263 break;
14264 case DFmode:
14265 index = 1;
14266 break;
14267 case XFmode:
14268 case TFmode:
14269 index = 2;
14270 break;
14271 default:
14272 return 100;
14273 }
14274 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14275 }
14276 if (SSE_CLASS_P (class))
14277 {
14278 int index;
14279 switch (GET_MODE_SIZE (mode))
14280 {
14281 case 4:
14282 index = 0;
14283 break;
14284 case 8:
14285 index = 1;
14286 break;
14287 case 16:
14288 index = 2;
14289 break;
14290 default:
14291 return 100;
14292 }
14293 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14294 }
14295 if (MMX_CLASS_P (class))
14296 {
14297 int index;
14298 switch (GET_MODE_SIZE (mode))
14299 {
14300 case 4:
14301 index = 0;
14302 break;
14303 case 8:
14304 index = 1;
14305 break;
14306 default:
14307 return 100;
14308 }
14309 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14310 }
14311 switch (GET_MODE_SIZE (mode))
14312 {
14313 case 1:
14314 if (in)
14315 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14316 : ix86_cost->movzbl_load);
14317 else
14318 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14319 : ix86_cost->int_store[0] + 4);
14320 break;
14321 case 2:
14322 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14323 default:
14324 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14325 if (mode == TFmode)
14326 mode = XFmode;
3bb7e126 14327 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
d09e61b9
JH
14328 * ((int) GET_MODE_SIZE (mode)
14329 + UNITS_PER_WORD -1 ) / UNITS_PER_WORD);
fa79946e
JH
14330 }
14331}
0ecf09f9 14332
21c318ba 14333#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
2cc07db4
RH
14334static void
14335ix86_svr3_asm_out_constructor (symbol, priority)
14336 rtx symbol;
14337 int priority ATTRIBUTE_UNUSED;
14338{
14339 init_section ();
14340 fputs ("\tpushl $", asm_out_file);
14341 assemble_name (asm_out_file, XSTR (symbol, 0));
14342 fputc ('\n', asm_out_file);
14343}
14344#endif
162f023b 14345
b069de3b
SS
14346#if TARGET_MACHO
14347
14348static int current_machopic_label_num;
14349
14350/* Given a symbol name and its associated stub, write out the
14351 definition of the stub. */
14352
14353void
14354machopic_output_stub (file, symb, stub)
14355 FILE *file;
14356 const char *symb, *stub;
14357{
14358 unsigned int length;
14359 char *binder_name, *symbol_name, lazy_ptr_name[32];
14360 int label = ++current_machopic_label_num;
14361
14362 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
14363 symb = (*targetm.strip_name_encoding) (symb);
14364
14365 length = strlen (stub);
14366 binder_name = alloca (length + 32);
14367 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
14368
14369 length = strlen (symb);
14370 symbol_name = alloca (length + 32);
14371 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
14372
14373 sprintf (lazy_ptr_name, "L%d$lz", label);
14374
14375 if (MACHOPIC_PURE)
14376 machopic_picsymbol_stub_section ();
14377 else
14378 machopic_symbol_stub_section ();
14379
14380 fprintf (file, "%s:\n", stub);
14381 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14382
14383 if (MACHOPIC_PURE)
14384 {
14385 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
14386 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
14387 fprintf (file, "\tjmp %%edx\n");
14388 }
14389 else
14390 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
14391
14392 fprintf (file, "%s:\n", binder_name);
14393
14394 if (MACHOPIC_PURE)
14395 {
14396 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
14397 fprintf (file, "\tpushl %%eax\n");
14398 }
14399 else
14400 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
14401
14402 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
14403
14404 machopic_lazy_symbol_ptr_section ();
14405 fprintf (file, "%s:\n", lazy_ptr_name);
14406 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14407 fprintf (file, "\t.long %s\n", binder_name);
14408}
14409#endif /* TARGET_MACHO */
14410
162f023b
JH
14411/* Order the registers for register allocator. */
14412
14413void
14414x86_order_regs_for_local_alloc ()
14415{
14416 int pos = 0;
14417 int i;
14418
14419 /* First allocate the local general purpose registers. */
14420 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14421 if (GENERAL_REGNO_P (i) && call_used_regs[i])
14422 reg_alloc_order [pos++] = i;
14423
14424 /* Global general purpose registers. */
14425 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14426 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
14427 reg_alloc_order [pos++] = i;
14428
14429 /* x87 registers come first in case we are doing FP math
14430 using them. */
14431 if (!TARGET_SSE_MATH)
14432 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14433 reg_alloc_order [pos++] = i;
fce5a9f2 14434
162f023b
JH
14435 /* SSE registers. */
14436 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
14437 reg_alloc_order [pos++] = i;
14438 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
14439 reg_alloc_order [pos++] = i;
14440
14441 /* x87 registerts. */
14442 if (TARGET_SSE_MATH)
14443 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14444 reg_alloc_order [pos++] = i;
14445
14446 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
14447 reg_alloc_order [pos++] = i;
14448
14449 /* Initialize the rest of array as we do not allocate some registers
14450 at all. */
14451 while (pos < FIRST_PSEUDO_REGISTER)
14452 reg_alloc_order [pos++] = 0;
14453}
194734e9 14454
4977bab6
ZW
14455#ifndef TARGET_USE_MS_BITFIELD_LAYOUT
14456#define TARGET_USE_MS_BITFIELD_LAYOUT 0
14457#endif
14458
14459static bool
14460ix86_ms_bitfield_layout_p (record_type)
14461 tree record_type ATTRIBUTE_UNUSED;
14462{
14463 return TARGET_USE_MS_BITFIELD_LAYOUT;
14464}
14465
483ab821
MM
14466/* Returns an expression indicating where the this parameter is
14467 located on entry to the FUNCTION. */
14468
14469static rtx
3961e8fe 14470x86_this_parameter (function)
483ab821
MM
14471 tree function;
14472{
14473 tree type = TREE_TYPE (function);
14474
3961e8fe
RH
14475 if (TARGET_64BIT)
14476 {
14477 int n = aggregate_value_p (TREE_TYPE (type)) != 0;
14478 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
14479 }
14480
483ab821
MM
14481 if (ix86_fntype_regparm (type) > 0)
14482 {
14483 tree parm;
14484
14485 parm = TYPE_ARG_TYPES (type);
14486 /* Figure out whether or not the function has a variable number of
14487 arguments. */
3961e8fe 14488 for (; parm; parm = TREE_CHAIN (parm))
483ab821
MM
14489 if (TREE_VALUE (parm) == void_type_node)
14490 break;
14491 /* If not, the this parameter is in %eax. */
14492 if (parm)
14493 return gen_rtx_REG (SImode, 0);
14494 }
14495
14496 if (aggregate_value_p (TREE_TYPE (type)))
14497 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
14498 else
14499 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
14500}
14501
3961e8fe
RH
14502/* Determine whether x86_output_mi_thunk can succeed. */
14503
14504static bool
14505x86_can_output_mi_thunk (thunk, delta, vcall_offset, function)
14506 tree thunk ATTRIBUTE_UNUSED;
14507 HOST_WIDE_INT delta ATTRIBUTE_UNUSED;
14508 HOST_WIDE_INT vcall_offset;
14509 tree function;
14510{
14511 /* 64-bit can handle anything. */
14512 if (TARGET_64BIT)
14513 return true;
14514
14515 /* For 32-bit, everything's fine if we have one free register. */
14516 if (ix86_fntype_regparm (TREE_TYPE (function)) < 3)
14517 return true;
14518
14519 /* Need a free register for vcall_offset. */
14520 if (vcall_offset)
14521 return false;
14522
14523 /* Need a free register for GOT references. */
14524 if (flag_pic && !(*targetm.binds_local_p) (function))
14525 return false;
14526
14527 /* Otherwise ok. */
14528 return true;
14529}
14530
14531/* Output the assembler code for a thunk function. THUNK_DECL is the
14532 declaration for the thunk function itself, FUNCTION is the decl for
14533 the target function. DELTA is an immediate constant offset to be
272d0bee 14534 added to THIS. If VCALL_OFFSET is nonzero, the word at
3961e8fe 14535 *(*this + vcall_offset) should be added to THIS. */
483ab821 14536
c590b625 14537static void
3961e8fe
RH
14538x86_output_mi_thunk (file, thunk, delta, vcall_offset, function)
14539 FILE *file ATTRIBUTE_UNUSED;
483ab821 14540 tree thunk ATTRIBUTE_UNUSED;
eb0424da 14541 HOST_WIDE_INT delta;
3961e8fe 14542 HOST_WIDE_INT vcall_offset;
194734e9
JH
14543 tree function;
14544{
194734e9 14545 rtx xops[3];
3961e8fe
RH
14546 rtx this = x86_this_parameter (function);
14547 rtx this_reg, tmp;
194734e9 14548
3961e8fe
RH
14549 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
14550 pull it in now and let DELTA benefit. */
14551 if (REG_P (this))
14552 this_reg = this;
14553 else if (vcall_offset)
14554 {
14555 /* Put the this parameter into %eax. */
14556 xops[0] = this;
14557 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
14558 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14559 }
14560 else
14561 this_reg = NULL_RTX;
14562
14563 /* Adjust the this parameter by a fixed constant. */
14564 if (delta)
194734e9 14565 {
483ab821 14566 xops[0] = GEN_INT (delta);
3961e8fe
RH
14567 xops[1] = this_reg ? this_reg : this;
14568 if (TARGET_64BIT)
194734e9 14569 {
3961e8fe
RH
14570 if (!x86_64_general_operand (xops[0], DImode))
14571 {
14572 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14573 xops[1] = tmp;
14574 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
14575 xops[0] = tmp;
14576 xops[1] = this;
14577 }
14578 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
194734e9
JH
14579 }
14580 else
3961e8fe 14581 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
194734e9 14582 }
3961e8fe
RH
14583
14584 /* Adjust the this parameter by a value stored in the vtable. */
14585 if (vcall_offset)
194734e9 14586 {
3961e8fe
RH
14587 if (TARGET_64BIT)
14588 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14589 else
14590 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
483ab821 14591
3961e8fe
RH
14592 xops[0] = gen_rtx_MEM (Pmode, this_reg);
14593 xops[1] = tmp;
14594 if (TARGET_64BIT)
14595 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14596 else
14597 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
483ab821 14598
3961e8fe
RH
14599 /* Adjust the this parameter. */
14600 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
14601 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
14602 {
14603 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
14604 xops[0] = GEN_INT (vcall_offset);
14605 xops[1] = tmp2;
14606 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14607 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
483ab821 14608 }
3961e8fe
RH
14609 xops[1] = this_reg;
14610 if (TARGET_64BIT)
14611 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14612 else
14613 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14614 }
194734e9 14615
3961e8fe
RH
14616 /* If necessary, drop THIS back to its stack slot. */
14617 if (this_reg && this_reg != this)
14618 {
14619 xops[0] = this_reg;
14620 xops[1] = this;
14621 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14622 }
194734e9 14623
3961e8fe
RH
14624 xops[0] = DECL_RTL (function);
14625 if (TARGET_64BIT)
14626 {
14627 if (!flag_pic || (*targetm.binds_local_p) (function))
14628 output_asm_insn ("jmp\t%P0", xops);
14629 else
fcbe3b89
RH
14630 {
14631 tmp = XEXP (xops[0], 0);
14632 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, tmp), UNSPEC_GOTPCREL);
14633 tmp = gen_rtx_CONST (Pmode, tmp);
14634 tmp = gen_rtx_MEM (QImode, tmp);
14635 xops[0] = tmp;
14636 output_asm_insn ("jmp\t%A0", xops);
14637 }
3961e8fe
RH
14638 }
14639 else
14640 {
14641 if (!flag_pic || (*targetm.binds_local_p) (function))
14642 output_asm_insn ("jmp\t%P0", xops);
194734e9
JH
14643 else
14644 {
3961e8fe
RH
14645 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
14646 output_set_got (tmp);
14647
14648 xops[1] = tmp;
14649 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
14650 output_asm_insn ("jmp\t{*}%1", xops);
194734e9
JH
14651 }
14652 }
14653}
e2500fed 14654
e932b21b
JH
14655int
14656x86_field_alignment (field, computed)
14657 tree field;
14658 int computed;
14659{
14660 enum machine_mode mode;
ad9335eb
JJ
14661 tree type = TREE_TYPE (field);
14662
14663 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
e932b21b 14664 return computed;
ad9335eb
JJ
14665 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
14666 ? get_inner_array_type (type) : type);
39e3a681
JJ
14667 if (mode == DFmode || mode == DCmode
14668 || GET_MODE_CLASS (mode) == MODE_INT
14669 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
e932b21b
JH
14670 return MIN (32, computed);
14671 return computed;
14672}
14673
a5fa1ecd
JH
14674/* Output assembler code to FILE to increment profiler label # LABELNO
14675 for profiling a function entry. */
14676void
14677x86_function_profiler (file, labelno)
14678 FILE *file;
14679 int labelno;
14680{
14681 if (TARGET_64BIT)
14682 if (flag_pic)
14683 {
14684#ifndef NO_PROFILE_COUNTERS
14685 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
14686#endif
14687 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
14688 }
14689 else
14690 {
14691#ifndef NO_PROFILE_COUNTERS
14692 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
14693#endif
14694 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
14695 }
14696 else if (flag_pic)
14697 {
14698#ifndef NO_PROFILE_COUNTERS
14699 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
14700 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
14701#endif
14702 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
14703 }
14704 else
14705 {
14706#ifndef NO_PROFILE_COUNTERS
14707 fprintf (file, "\tmovl\t$%sP%d,%%$s\n", LPREFIX, labelno,
14708 PROFILE_COUNT_REGISTER);
14709#endif
14710 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
14711 }
14712}
14713
2a500b9e
JH
14714/* Implement machine specific optimizations.
14715 At the moment we implement single transformation: AMD Athlon works faster
14716 when RET is not destination of conditional jump or directly preceeded
14717 by other jump instruction. We avoid the penalty by inserting NOP just
14718 before the RET instructions in such cases. */
14719void
14720x86_machine_dependent_reorg (first)
14721 rtx first ATTRIBUTE_UNUSED;
14722{
14723 edge e;
14724
4977bab6 14725 if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
2a500b9e
JH
14726 return;
14727 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
14728 {
14729 basic_block bb = e->src;
14730 rtx ret = bb->end;
14731 rtx prev;
14732 bool insert = false;
14733
14734 if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
14735 continue;
4977bab6
ZW
14736 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
14737 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
14738 break;
2a500b9e
JH
14739 if (prev && GET_CODE (prev) == CODE_LABEL)
14740 {
14741 edge e;
14742 for (e = bb->pred; e; e = e->pred_next)
4977bab6 14743 if (EDGE_FREQUENCY (e) && e->src->index >= 0
2a500b9e
JH
14744 && !(e->flags & EDGE_FALLTHRU))
14745 insert = 1;
14746 }
14747 if (!insert)
14748 {
4977bab6 14749 prev = prev_active_insn (ret);
2a500b9e
JH
14750 if (prev && GET_CODE (prev) == JUMP_INSN
14751 && any_condjump_p (prev))
14752 insert = 1;
4977bab6
ZW
14753 /* Empty functions get branch misspredict even when the jump destination
14754 is not visible to us. */
14755 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
14756 insert = 1;
2a500b9e
JH
14757 }
14758 if (insert)
14759 emit_insn_before (gen_nop (), ret);
14760 }
14761}
14762
4977bab6
ZW
14763/* Return nonzero when QImode register that must be represented via REX prefix
14764 is used. */
14765bool
14766x86_extended_QIreg_mentioned_p (insn)
14767 rtx insn;
14768{
14769 int i;
14770 extract_insn_cached (insn);
14771 for (i = 0; i < recog_data.n_operands; i++)
14772 if (REG_P (recog_data.operand[i])
14773 && REGNO (recog_data.operand[i]) >= 4)
14774 return true;
14775 return false;
14776}
14777
14778/* Return nonzero when P points to register encoded via REX prefix.
14779 Called via for_each_rtx. */
14780static int
14781extended_reg_mentioned_1 (p, data)
14782 rtx *p;
14783 void *data ATTRIBUTE_UNUSED;
14784{
14785 unsigned int regno;
14786 if (!REG_P (*p))
14787 return 0;
14788 regno = REGNO (*p);
14789 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
14790}
14791
14792/* Return true when INSN mentions register that must be encoded using REX
14793 prefix. */
14794bool
14795x86_extended_reg_mentioned_p (insn)
14796 rtx insn;
14797{
14798 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
14799}
14800
e2500fed 14801#include "gt-i386.h"
This page took 3.531801 seconds and 5 git commands to generate.