]> gcc.gnu.org Git - gcc.git/blame - gcc/config/i386/i386.c
i386.c (machopic_output_stub): Output \t between instructions and operands.
[gcc.git] / gcc / config / i386 / i386.c
CommitLineData
e075ae69 1/* Subroutines used for code generation on IA-32.
07933f72 2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
5bf5a10b 3 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
2a2ab3f9 4
188fc5b5 5This file is part of GCC.
2a2ab3f9 6
188fc5b5 7GCC is free software; you can redistribute it and/or modify
2a2ab3f9
JVA
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
188fc5b5 12GCC is distributed in the hope that it will be useful,
2a2ab3f9
JVA
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
188fc5b5 18along with GCC; see the file COPYING. If not, write to
39d14dda
KC
19the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20Boston, MA 02110-1301, USA. */
2a2ab3f9 21
2a2ab3f9 22#include "config.h"
bb5177ac 23#include "system.h"
4977bab6
ZW
24#include "coretypes.h"
25#include "tm.h"
2a2ab3f9 26#include "rtl.h"
6baf1cc8
BS
27#include "tree.h"
28#include "tm_p.h"
2a2ab3f9
JVA
29#include "regs.h"
30#include "hard-reg-set.h"
31#include "real.h"
32#include "insn-config.h"
33#include "conditions.h"
2a2ab3f9 34#include "output.h"
8bc527af 35#include "insn-codes.h"
2a2ab3f9 36#include "insn-attr.h"
2a2ab3f9 37#include "flags.h"
a8ffcc81 38#include "except.h"
ecbc4695 39#include "function.h"
00c79232 40#include "recog.h"
ced8dd8c 41#include "expr.h"
e78d8e51 42#include "optabs.h"
f103890b 43#include "toplev.h"
e075ae69 44#include "basic-block.h"
1526a060 45#include "ggc.h"
672a6f42
NB
46#include "target.h"
47#include "target-def.h"
f1e639b1 48#include "langhooks.h"
dafc5b82 49#include "cgraph.h"
cd3ce9b4 50#include "tree-gimple.h"
72ce3d4a 51#include "dwarf2.h"
279bb624 52#include "tm-constrs.h"
2a2ab3f9 53
8dfe5673 54#ifndef CHECK_STACK_LIMIT
07933f72 55#define CHECK_STACK_LIMIT (-1)
8dfe5673
RK
56#endif
57
3c50106f
RH
58/* Return index of given mode in mult and division cost tables. */
59#define MODE_INDEX(mode) \
60 ((mode) == QImode ? 0 \
61 : (mode) == HImode ? 1 \
62 : (mode) == SImode ? 2 \
63 : (mode) == DImode ? 3 \
64 : 4)
65
2ab0437e 66/* Processor costs (relative to an add) */
3dd0df7f
RS
67/* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
68#define COSTS_N_BYTES(N) ((N) * 2)
69
fce5a9f2 70static const
2ab0437e 71struct processor_costs size_cost = { /* costs for tunning for size */
3dd0df7f
RS
72 COSTS_N_BYTES (2), /* cost of an add instruction */
73 COSTS_N_BYTES (3), /* cost of a lea instruction */
74 COSTS_N_BYTES (2), /* variable shift costs */
75 COSTS_N_BYTES (3), /* constant shift costs */
76 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
77 COSTS_N_BYTES (3), /* HI */
78 COSTS_N_BYTES (3), /* SI */
79 COSTS_N_BYTES (3), /* DI */
80 COSTS_N_BYTES (5)}, /* other */
2ab0437e 81 0, /* cost of multiply per each bit set */
3dd0df7f
RS
82 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
83 COSTS_N_BYTES (3), /* HI */
84 COSTS_N_BYTES (3), /* SI */
85 COSTS_N_BYTES (3), /* DI */
86 COSTS_N_BYTES (5)}, /* other */
87 COSTS_N_BYTES (3), /* cost of movsx */
88 COSTS_N_BYTES (3), /* cost of movzx */
2ab0437e
JH
89 0, /* "large" insn */
90 2, /* MOVE_RATIO */
91 2, /* cost for loading QImode using movzbl */
92 {2, 2, 2}, /* cost of loading integer registers
93 in QImode, HImode and SImode.
94 Relative to reg-reg move (2). */
95 {2, 2, 2}, /* cost of storing integer registers */
96 2, /* cost of reg,reg fld/fst */
97 {2, 2, 2}, /* cost of loading fp registers
98 in SFmode, DFmode and XFmode */
75bcbcdb
L
99 {2, 2, 2}, /* cost of storing fp registers
100 in SFmode, DFmode and XFmode */
2ab0437e
JH
101 3, /* cost of moving MMX register */
102 {3, 3}, /* cost of loading MMX registers
103 in SImode and DImode */
104 {3, 3}, /* cost of storing MMX registers
105 in SImode and DImode */
106 3, /* cost of moving SSE register */
107 {3, 3, 3}, /* cost of loading SSE registers
108 in SImode, DImode and TImode */
109 {3, 3, 3}, /* cost of storing SSE registers
110 in SImode, DImode and TImode */
111 3, /* MMX or SSE register to integer */
f4365627
JH
112 0, /* size of prefetch block */
113 0, /* number of parallel prefetches */
3dd0df7f
RS
114 2, /* Branch cost */
115 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
116 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
117 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
118 COSTS_N_BYTES (2), /* cost of FABS instruction. */
119 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
120 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
2ab0437e 121};
229b303a 122
32b5b1aa 123/* Processor costs (relative to an add) */
fce5a9f2 124static const
32b5b1aa 125struct processor_costs i386_cost = { /* 386 specific costs */
a9cc9cc6
JH
126 COSTS_N_INSNS (1), /* cost of an add instruction */
127 COSTS_N_INSNS (1), /* cost of a lea instruction */
128 COSTS_N_INSNS (3), /* variable shift costs */
129 COSTS_N_INSNS (2), /* constant shift costs */
130 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
131 COSTS_N_INSNS (6), /* HI */
132 COSTS_N_INSNS (6), /* SI */
133 COSTS_N_INSNS (6), /* DI */
134 COSTS_N_INSNS (6)}, /* other */
135 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
136 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
137 COSTS_N_INSNS (23), /* HI */
138 COSTS_N_INSNS (23), /* SI */
139 COSTS_N_INSNS (23), /* DI */
140 COSTS_N_INSNS (23)}, /* other */
141 COSTS_N_INSNS (3), /* cost of movsx */
142 COSTS_N_INSNS (2), /* cost of movzx */
96e7ae40 143 15, /* "large" insn */
e2e52e1b 144 3, /* MOVE_RATIO */
7c6b971d 145 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
146 {2, 4, 2}, /* cost of loading integer registers
147 in QImode, HImode and SImode.
0f290768 148 Relative to reg-reg move (2). */
96e7ae40
JH
149 {2, 4, 2}, /* cost of storing integer registers */
150 2, /* cost of reg,reg fld/fst */
151 {8, 8, 8}, /* cost of loading fp registers
152 in SFmode, DFmode and XFmode */
75bcbcdb
L
153 {8, 8, 8}, /* cost of storing fp registers
154 in SFmode, DFmode and XFmode */
fa79946e
JH
155 2, /* cost of moving MMX register */
156 {4, 8}, /* cost of loading MMX registers
157 in SImode and DImode */
158 {4, 8}, /* cost of storing MMX registers
159 in SImode and DImode */
160 2, /* cost of moving SSE register */
161 {4, 8, 16}, /* cost of loading SSE registers
162 in SImode, DImode and TImode */
163 {4, 8, 16}, /* cost of storing SSE registers
164 in SImode, DImode and TImode */
165 3, /* MMX or SSE register to integer */
f4365627
JH
166 0, /* size of prefetch block */
167 0, /* number of parallel prefetches */
4977bab6 168 1, /* Branch cost */
a9cc9cc6
JH
169 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
170 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
171 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
172 COSTS_N_INSNS (22), /* cost of FABS instruction. */
173 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
174 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
32b5b1aa
SC
175};
176
fce5a9f2 177static const
32b5b1aa 178struct processor_costs i486_cost = { /* 486 specific costs */
a9cc9cc6
JH
179 COSTS_N_INSNS (1), /* cost of an add instruction */
180 COSTS_N_INSNS (1), /* cost of a lea instruction */
181 COSTS_N_INSNS (3), /* variable shift costs */
182 COSTS_N_INSNS (2), /* constant shift costs */
183 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
184 COSTS_N_INSNS (12), /* HI */
185 COSTS_N_INSNS (12), /* SI */
186 COSTS_N_INSNS (12), /* DI */
187 COSTS_N_INSNS (12)}, /* other */
32b5b1aa 188 1, /* cost of multiply per each bit set */
a9cc9cc6
JH
189 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
190 COSTS_N_INSNS (40), /* HI */
191 COSTS_N_INSNS (40), /* SI */
192 COSTS_N_INSNS (40), /* DI */
193 COSTS_N_INSNS (40)}, /* other */
194 COSTS_N_INSNS (3), /* cost of movsx */
195 COSTS_N_INSNS (2), /* cost of movzx */
96e7ae40 196 15, /* "large" insn */
e2e52e1b 197 3, /* MOVE_RATIO */
7c6b971d 198 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
199 {2, 4, 2}, /* cost of loading integer registers
200 in QImode, HImode and SImode.
0f290768 201 Relative to reg-reg move (2). */
96e7ae40
JH
202 {2, 4, 2}, /* cost of storing integer registers */
203 2, /* cost of reg,reg fld/fst */
204 {8, 8, 8}, /* cost of loading fp registers
205 in SFmode, DFmode and XFmode */
75bcbcdb
L
206 {8, 8, 8}, /* cost of storing fp registers
207 in SFmode, DFmode and XFmode */
fa79946e
JH
208 2, /* cost of moving MMX register */
209 {4, 8}, /* cost of loading MMX registers
210 in SImode and DImode */
211 {4, 8}, /* cost of storing MMX registers
212 in SImode and DImode */
213 2, /* cost of moving SSE register */
214 {4, 8, 16}, /* cost of loading SSE registers
215 in SImode, DImode and TImode */
216 {4, 8, 16}, /* cost of storing SSE registers
217 in SImode, DImode and TImode */
f4365627
JH
218 3, /* MMX or SSE register to integer */
219 0, /* size of prefetch block */
220 0, /* number of parallel prefetches */
4977bab6 221 1, /* Branch cost */
a9cc9cc6
JH
222 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
223 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
224 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
225 COSTS_N_INSNS (3), /* cost of FABS instruction. */
226 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
227 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
32b5b1aa
SC
228};
229
fce5a9f2 230static const
e5cb57e8 231struct processor_costs pentium_cost = {
a9cc9cc6
JH
232 COSTS_N_INSNS (1), /* cost of an add instruction */
233 COSTS_N_INSNS (1), /* cost of a lea instruction */
234 COSTS_N_INSNS (4), /* variable shift costs */
235 COSTS_N_INSNS (1), /* constant shift costs */
236 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
237 COSTS_N_INSNS (11), /* HI */
238 COSTS_N_INSNS (11), /* SI */
239 COSTS_N_INSNS (11), /* DI */
240 COSTS_N_INSNS (11)}, /* other */
856b07a1 241 0, /* cost of multiply per each bit set */
a9cc9cc6
JH
242 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
243 COSTS_N_INSNS (25), /* HI */
244 COSTS_N_INSNS (25), /* SI */
245 COSTS_N_INSNS (25), /* DI */
246 COSTS_N_INSNS (25)}, /* other */
247 COSTS_N_INSNS (3), /* cost of movsx */
248 COSTS_N_INSNS (2), /* cost of movzx */
96e7ae40 249 8, /* "large" insn */
e2e52e1b 250 6, /* MOVE_RATIO */
7c6b971d 251 6, /* cost for loading QImode using movzbl */
96e7ae40
JH
252 {2, 4, 2}, /* cost of loading integer registers
253 in QImode, HImode and SImode.
0f290768 254 Relative to reg-reg move (2). */
96e7ae40
JH
255 {2, 4, 2}, /* cost of storing integer registers */
256 2, /* cost of reg,reg fld/fst */
257 {2, 2, 6}, /* cost of loading fp registers
258 in SFmode, DFmode and XFmode */
75bcbcdb
L
259 {4, 4, 6}, /* cost of storing fp registers
260 in SFmode, DFmode and XFmode */
fa79946e
JH
261 8, /* cost of moving MMX register */
262 {8, 8}, /* cost of loading MMX registers
263 in SImode and DImode */
264 {8, 8}, /* cost of storing MMX registers
265 in SImode and DImode */
266 2, /* cost of moving SSE register */
267 {4, 8, 16}, /* cost of loading SSE registers
268 in SImode, DImode and TImode */
269 {4, 8, 16}, /* cost of storing SSE registers
270 in SImode, DImode and TImode */
f4365627
JH
271 3, /* MMX or SSE register to integer */
272 0, /* size of prefetch block */
273 0, /* number of parallel prefetches */
4977bab6 274 2, /* Branch cost */
a9cc9cc6
JH
275 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
276 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
277 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
278 COSTS_N_INSNS (1), /* cost of FABS instruction. */
279 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
280 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
32b5b1aa
SC
281};
282
fce5a9f2 283static const
856b07a1 284struct processor_costs pentiumpro_cost = {
a9cc9cc6
JH
285 COSTS_N_INSNS (1), /* cost of an add instruction */
286 COSTS_N_INSNS (1), /* cost of a lea instruction */
287 COSTS_N_INSNS (1), /* variable shift costs */
288 COSTS_N_INSNS (1), /* constant shift costs */
289 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
290 COSTS_N_INSNS (4), /* HI */
291 COSTS_N_INSNS (4), /* SI */
292 COSTS_N_INSNS (4), /* DI */
293 COSTS_N_INSNS (4)}, /* other */
856b07a1 294 0, /* cost of multiply per each bit set */
a9cc9cc6
JH
295 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
296 COSTS_N_INSNS (17), /* HI */
297 COSTS_N_INSNS (17), /* SI */
298 COSTS_N_INSNS (17), /* DI */
299 COSTS_N_INSNS (17)}, /* other */
300 COSTS_N_INSNS (1), /* cost of movsx */
301 COSTS_N_INSNS (1), /* cost of movzx */
96e7ae40 302 8, /* "large" insn */
e2e52e1b 303 6, /* MOVE_RATIO */
7c6b971d 304 2, /* cost for loading QImode using movzbl */
96e7ae40
JH
305 {4, 4, 4}, /* cost of loading integer registers
306 in QImode, HImode and SImode.
0f290768 307 Relative to reg-reg move (2). */
96e7ae40
JH
308 {2, 2, 2}, /* cost of storing integer registers */
309 2, /* cost of reg,reg fld/fst */
310 {2, 2, 6}, /* cost of loading fp registers
311 in SFmode, DFmode and XFmode */
75bcbcdb
L
312 {4, 4, 6}, /* cost of storing fp registers
313 in SFmode, DFmode and XFmode */
fa79946e
JH
314 2, /* cost of moving MMX register */
315 {2, 2}, /* cost of loading MMX registers
316 in SImode and DImode */
317 {2, 2}, /* cost of storing MMX registers
318 in SImode and DImode */
319 2, /* cost of moving SSE register */
320 {2, 2, 8}, /* cost of loading SSE registers
321 in SImode, DImode and TImode */
322 {2, 2, 8}, /* cost of storing SSE registers
323 in SImode, DImode and TImode */
f4365627
JH
324 3, /* MMX or SSE register to integer */
325 32, /* size of prefetch block */
326 6, /* number of parallel prefetches */
4977bab6 327 2, /* Branch cost */
a9cc9cc6
JH
328 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
329 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
330 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
331 COSTS_N_INSNS (2), /* cost of FABS instruction. */
332 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
333 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
856b07a1
SC
334};
335
fce5a9f2 336static const
a269a03c 337struct processor_costs k6_cost = {
a9cc9cc6
JH
338 COSTS_N_INSNS (1), /* cost of an add instruction */
339 COSTS_N_INSNS (2), /* cost of a lea instruction */
340 COSTS_N_INSNS (1), /* variable shift costs */
341 COSTS_N_INSNS (1), /* constant shift costs */
342 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
343 COSTS_N_INSNS (3), /* HI */
344 COSTS_N_INSNS (3), /* SI */
345 COSTS_N_INSNS (3), /* DI */
346 COSTS_N_INSNS (3)}, /* other */
a269a03c 347 0, /* cost of multiply per each bit set */
a9cc9cc6
JH
348 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
349 COSTS_N_INSNS (18), /* HI */
350 COSTS_N_INSNS (18), /* SI */
351 COSTS_N_INSNS (18), /* DI */
352 COSTS_N_INSNS (18)}, /* other */
353 COSTS_N_INSNS (2), /* cost of movsx */
354 COSTS_N_INSNS (2), /* cost of movzx */
96e7ae40 355 8, /* "large" insn */
e2e52e1b 356 4, /* MOVE_RATIO */
7c6b971d 357 3, /* cost for loading QImode using movzbl */
96e7ae40
JH
358 {4, 5, 4}, /* cost of loading integer registers
359 in QImode, HImode and SImode.
0f290768 360 Relative to reg-reg move (2). */
96e7ae40
JH
361 {2, 3, 2}, /* cost of storing integer registers */
362 4, /* cost of reg,reg fld/fst */
363 {6, 6, 6}, /* cost of loading fp registers
364 in SFmode, DFmode and XFmode */
75bcbcdb
L
365 {4, 4, 4}, /* cost of storing fp registers
366 in SFmode, DFmode and XFmode */
fa79946e
JH
367 2, /* cost of moving MMX register */
368 {2, 2}, /* cost of loading MMX registers
369 in SImode and DImode */
370 {2, 2}, /* cost of storing MMX registers
371 in SImode and DImode */
372 2, /* cost of moving SSE register */
373 {2, 2, 8}, /* cost of loading SSE registers
374 in SImode, DImode and TImode */
375 {2, 2, 8}, /* cost of storing SSE registers
376 in SImode, DImode and TImode */
f4365627
JH
377 6, /* MMX or SSE register to integer */
378 32, /* size of prefetch block */
379 1, /* number of parallel prefetches */
4977bab6 380 1, /* Branch cost */
a9cc9cc6
JH
381 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
382 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
383 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
384 COSTS_N_INSNS (2), /* cost of FABS instruction. */
385 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
386 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
a269a03c
JC
387};
388
fce5a9f2 389static const
309ada50 390struct processor_costs athlon_cost = {
a9cc9cc6
JH
391 COSTS_N_INSNS (1), /* cost of an add instruction */
392 COSTS_N_INSNS (2), /* cost of a lea instruction */
393 COSTS_N_INSNS (1), /* variable shift costs */
394 COSTS_N_INSNS (1), /* constant shift costs */
395 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
396 COSTS_N_INSNS (5), /* HI */
397 COSTS_N_INSNS (5), /* SI */
398 COSTS_N_INSNS (5), /* DI */
399 COSTS_N_INSNS (5)}, /* other */
309ada50 400 0, /* cost of multiply per each bit set */
a9cc9cc6
JH
401 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
402 COSTS_N_INSNS (26), /* HI */
403 COSTS_N_INSNS (42), /* SI */
404 COSTS_N_INSNS (74), /* DI */
405 COSTS_N_INSNS (74)}, /* other */
406 COSTS_N_INSNS (1), /* cost of movsx */
407 COSTS_N_INSNS (1), /* cost of movzx */
309ada50 408 8, /* "large" insn */
e2e52e1b 409 9, /* MOVE_RATIO */
309ada50 410 4, /* cost for loading QImode using movzbl */
b72b1c29 411 {3, 4, 3}, /* cost of loading integer registers
309ada50 412 in QImode, HImode and SImode.
0f290768 413 Relative to reg-reg move (2). */
b72b1c29 414 {3, 4, 3}, /* cost of storing integer registers */
309ada50 415 4, /* cost of reg,reg fld/fst */
b72b1c29 416 {4, 4, 12}, /* cost of loading fp registers
309ada50 417 in SFmode, DFmode and XFmode */
75bcbcdb
L
418 {6, 6, 8}, /* cost of storing fp registers
419 in SFmode, DFmode and XFmode */
fa79946e 420 2, /* cost of moving MMX register */
b72b1c29 421 {4, 4}, /* cost of loading MMX registers
fa79946e 422 in SImode and DImode */
b72b1c29 423 {4, 4}, /* cost of storing MMX registers
fa79946e
JH
424 in SImode and DImode */
425 2, /* cost of moving SSE register */
b72b1c29 426 {4, 4, 6}, /* cost of loading SSE registers
fa79946e 427 in SImode, DImode and TImode */
b72b1c29 428 {4, 4, 5}, /* cost of storing SSE registers
fa79946e 429 in SImode, DImode and TImode */
b72b1c29 430 5, /* MMX or SSE register to integer */
f4365627
JH
431 64, /* size of prefetch block */
432 6, /* number of parallel prefetches */
8c1e80e9 433 5, /* Branch cost */
a9cc9cc6
JH
434 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
435 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
436 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
437 COSTS_N_INSNS (2), /* cost of FABS instruction. */
438 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
439 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
309ada50
JH
440};
441
4977bab6
ZW
442static const
443struct processor_costs k8_cost = {
a9cc9cc6
JH
444 COSTS_N_INSNS (1), /* cost of an add instruction */
445 COSTS_N_INSNS (2), /* cost of a lea instruction */
446 COSTS_N_INSNS (1), /* variable shift costs */
447 COSTS_N_INSNS (1), /* constant shift costs */
448 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
449 COSTS_N_INSNS (4), /* HI */
450 COSTS_N_INSNS (3), /* SI */
451 COSTS_N_INSNS (4), /* DI */
452 COSTS_N_INSNS (5)}, /* other */
4977bab6 453 0, /* cost of multiply per each bit set */
a9cc9cc6
JH
454 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
455 COSTS_N_INSNS (26), /* HI */
456 COSTS_N_INSNS (42), /* SI */
457 COSTS_N_INSNS (74), /* DI */
458 COSTS_N_INSNS (74)}, /* other */
459 COSTS_N_INSNS (1), /* cost of movsx */
460 COSTS_N_INSNS (1), /* cost of movzx */
4977bab6
ZW
461 8, /* "large" insn */
462 9, /* MOVE_RATIO */
463 4, /* cost for loading QImode using movzbl */
464 {3, 4, 3}, /* cost of loading integer registers
465 in QImode, HImode and SImode.
466 Relative to reg-reg move (2). */
467 {3, 4, 3}, /* cost of storing integer registers */
468 4, /* cost of reg,reg fld/fst */
469 {4, 4, 12}, /* cost of loading fp registers
470 in SFmode, DFmode and XFmode */
75bcbcdb
L
471 {6, 6, 8}, /* cost of storing fp registers
472 in SFmode, DFmode and XFmode */
4977bab6
ZW
473 2, /* cost of moving MMX register */
474 {3, 3}, /* cost of loading MMX registers
475 in SImode and DImode */
476 {4, 4}, /* cost of storing MMX registers
477 in SImode and DImode */
478 2, /* cost of moving SSE register */
479 {4, 3, 6}, /* cost of loading SSE registers
480 in SImode, DImode and TImode */
481 {4, 4, 5}, /* cost of storing SSE registers
482 in SImode, DImode and TImode */
483 5, /* MMX or SSE register to integer */
484 64, /* size of prefetch block */
485 6, /* number of parallel prefetches */
8c1e80e9 486 5, /* Branch cost */
a9cc9cc6
JH
487 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
488 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
489 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
490 COSTS_N_INSNS (2), /* cost of FABS instruction. */
491 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
492 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
4977bab6
ZW
493};
494
fce5a9f2 495static const
b4e89e2d 496struct processor_costs pentium4_cost = {
a9cc9cc6
JH
497 COSTS_N_INSNS (1), /* cost of an add instruction */
498 COSTS_N_INSNS (3), /* cost of a lea instruction */
499 COSTS_N_INSNS (4), /* variable shift costs */
500 COSTS_N_INSNS (4), /* constant shift costs */
501 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
502 COSTS_N_INSNS (15), /* HI */
503 COSTS_N_INSNS (15), /* SI */
504 COSTS_N_INSNS (15), /* DI */
505 COSTS_N_INSNS (15)}, /* other */
b4e89e2d 506 0, /* cost of multiply per each bit set */
a9cc9cc6
JH
507 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
508 COSTS_N_INSNS (56), /* HI */
509 COSTS_N_INSNS (56), /* SI */
510 COSTS_N_INSNS (56), /* DI */
511 COSTS_N_INSNS (56)}, /* other */
512 COSTS_N_INSNS (1), /* cost of movsx */
513 COSTS_N_INSNS (1), /* cost of movzx */
b4e89e2d
JH
514 16, /* "large" insn */
515 6, /* MOVE_RATIO */
516 2, /* cost for loading QImode using movzbl */
517 {4, 5, 4}, /* cost of loading integer registers
518 in QImode, HImode and SImode.
519 Relative to reg-reg move (2). */
520 {2, 3, 2}, /* cost of storing integer registers */
521 2, /* cost of reg,reg fld/fst */
522 {2, 2, 6}, /* cost of loading fp registers
523 in SFmode, DFmode and XFmode */
75bcbcdb
L
524 {4, 4, 6}, /* cost of storing fp registers
525 in SFmode, DFmode and XFmode */
b4e89e2d
JH
526 2, /* cost of moving MMX register */
527 {2, 2}, /* cost of loading MMX registers
528 in SImode and DImode */
529 {2, 2}, /* cost of storing MMX registers
530 in SImode and DImode */
531 12, /* cost of moving SSE register */
532 {12, 12, 12}, /* cost of loading SSE registers
533 in SImode, DImode and TImode */
534 {2, 2, 8}, /* cost of storing SSE registers
535 in SImode, DImode and TImode */
536 10, /* MMX or SSE register to integer */
f4365627
JH
537 64, /* size of prefetch block */
538 6, /* number of parallel prefetches */
4977bab6 539 2, /* Branch cost */
a9cc9cc6
JH
540 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
541 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
542 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
543 COSTS_N_INSNS (2), /* cost of FABS instruction. */
544 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
545 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
b4e89e2d
JH
546};
547
89c43c0a
VM
548static const
549struct processor_costs nocona_cost = {
a9cc9cc6
JH
550 COSTS_N_INSNS (1), /* cost of an add instruction */
551 COSTS_N_INSNS (1), /* cost of a lea instruction */
552 COSTS_N_INSNS (1), /* variable shift costs */
553 COSTS_N_INSNS (1), /* constant shift costs */
554 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
555 COSTS_N_INSNS (10), /* HI */
556 COSTS_N_INSNS (10), /* SI */
557 COSTS_N_INSNS (10), /* DI */
558 COSTS_N_INSNS (10)}, /* other */
89c43c0a 559 0, /* cost of multiply per each bit set */
a9cc9cc6
JH
560 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
561 COSTS_N_INSNS (66), /* HI */
562 COSTS_N_INSNS (66), /* SI */
563 COSTS_N_INSNS (66), /* DI */
564 COSTS_N_INSNS (66)}, /* other */
565 COSTS_N_INSNS (1), /* cost of movsx */
566 COSTS_N_INSNS (1), /* cost of movzx */
89c43c0a 567 16, /* "large" insn */
ea407814 568 17, /* MOVE_RATIO */
89c43c0a
VM
569 4, /* cost for loading QImode using movzbl */
570 {4, 4, 4}, /* cost of loading integer registers
571 in QImode, HImode and SImode.
572 Relative to reg-reg move (2). */
573 {4, 4, 4}, /* cost of storing integer registers */
574 3, /* cost of reg,reg fld/fst */
575 {12, 12, 12}, /* cost of loading fp registers
576 in SFmode, DFmode and XFmode */
75bcbcdb
L
577 {4, 4, 4}, /* cost of storing fp registers
578 in SFmode, DFmode and XFmode */
89c43c0a
VM
579 6, /* cost of moving MMX register */
580 {12, 12}, /* cost of loading MMX registers
581 in SImode and DImode */
582 {12, 12}, /* cost of storing MMX registers
583 in SImode and DImode */
584 6, /* cost of moving SSE register */
585 {12, 12, 12}, /* cost of loading SSE registers
586 in SImode, DImode and TImode */
587 {12, 12, 12}, /* cost of storing SSE registers
588 in SImode, DImode and TImode */
589 8, /* MMX or SSE register to integer */
590 128, /* size of prefetch block */
591 8, /* number of parallel prefetches */
592 1, /* Branch cost */
a9cc9cc6
JH
593 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
594 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
595 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
596 COSTS_N_INSNS (3), /* cost of FABS instruction. */
597 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
598 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
89c43c0a
VM
599};
600
d326eaf0
JH
601/* Generic64 should produce code tuned for Nocona and K8. */
602static const
603struct processor_costs generic64_cost = {
604 COSTS_N_INSNS (1), /* cost of an add instruction */
605 /* On all chips taken into consideration lea is 2 cycles and more. With
606 this cost however our current implementation of synth_mult results in
607 use of unnecesary temporary registers causing regression on several
608 SPECfp benchmarks. */
609 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (2)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (26), /* HI */
620 COSTS_N_INSNS (42), /* SI */
621 COSTS_N_INSNS (74), /* DI */
622 COSTS_N_INSNS (74)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
626 17, /* MOVE_RATIO */
627 4, /* cost for loading QImode using movzbl */
628 {4, 4, 4}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {4, 4, 4}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {12, 12, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
75bcbcdb
L
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
d326eaf0
JH
637 2, /* cost of moving MMX register */
638 {8, 8}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {8, 8}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {8, 8, 8}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {8, 8, 8}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 5, /* MMX or SSE register to integer */
648 64, /* size of prefetch block */
649 6, /* number of parallel prefetches */
650 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
651 is increased to perhaps more appropriate value of 5. */
652 3, /* Branch cost */
653 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
654 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
655 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
656 COSTS_N_INSNS (8), /* cost of FABS instruction. */
657 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
658 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
659};
660
661/* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
662static const
663struct processor_costs generic32_cost = {
664 COSTS_N_INSNS (1), /* cost of an add instruction */
665 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
666 COSTS_N_INSNS (1), /* variable shift costs */
667 COSTS_N_INSNS (1), /* constant shift costs */
668 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
669 COSTS_N_INSNS (4), /* HI */
670 COSTS_N_INSNS (3), /* SI */
671 COSTS_N_INSNS (4), /* DI */
672 COSTS_N_INSNS (2)}, /* other */
673 0, /* cost of multiply per each bit set */
674 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
675 COSTS_N_INSNS (26), /* HI */
676 COSTS_N_INSNS (42), /* SI */
677 COSTS_N_INSNS (74), /* DI */
678 COSTS_N_INSNS (74)}, /* other */
679 COSTS_N_INSNS (1), /* cost of movsx */
680 COSTS_N_INSNS (1), /* cost of movzx */
681 8, /* "large" insn */
682 17, /* MOVE_RATIO */
683 4, /* cost for loading QImode using movzbl */
684 {4, 4, 4}, /* cost of loading integer registers
685 in QImode, HImode and SImode.
686 Relative to reg-reg move (2). */
687 {4, 4, 4}, /* cost of storing integer registers */
688 4, /* cost of reg,reg fld/fst */
689 {12, 12, 12}, /* cost of loading fp registers
690 in SFmode, DFmode and XFmode */
75bcbcdb
L
691 {6, 6, 8}, /* cost of storing fp registers
692 in SFmode, DFmode and XFmode */
d326eaf0
JH
693 2, /* cost of moving MMX register */
694 {8, 8}, /* cost of loading MMX registers
695 in SImode and DImode */
696 {8, 8}, /* cost of storing MMX registers
697 in SImode and DImode */
698 2, /* cost of moving SSE register */
699 {8, 8, 8}, /* cost of loading SSE registers
700 in SImode, DImode and TImode */
701 {8, 8, 8}, /* cost of storing SSE registers
702 in SImode, DImode and TImode */
703 5, /* MMX or SSE register to integer */
704 64, /* size of prefetch block */
705 6, /* number of parallel prefetches */
706 3, /* Branch cost */
707 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
708 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
709 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
710 COSTS_N_INSNS (8), /* cost of FABS instruction. */
711 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
712 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
713};
714
8b60264b 715const struct processor_costs *ix86_cost = &pentium_cost;
32b5b1aa 716
a269a03c
JC
717/* Processor feature/optimization bitmasks. */
718#define m_386 (1<<PROCESSOR_I386)
719#define m_486 (1<<PROCESSOR_I486)
720#define m_PENT (1<<PROCESSOR_PENTIUM)
721#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
722#define m_K6 (1<<PROCESSOR_K6)
309ada50 723#define m_ATHLON (1<<PROCESSOR_ATHLON)
b4e89e2d 724#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
4977bab6
ZW
725#define m_K8 (1<<PROCESSOR_K8)
726#define m_ATHLON_K8 (m_K8 | m_ATHLON)
89c43c0a 727#define m_NOCONA (1<<PROCESSOR_NOCONA)
d326eaf0
JH
728#define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
729#define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
730#define m_GENERIC (m_GENERIC32 | m_GENERIC64)
731
732/* Generic instruction choice should be common subset of supported CPUs
733 (PPro/PENT4/NOCONA/Athlon/K8). */
734
735/* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
736 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
737 generic because it is not working well with PPro base chips. */
738const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8 | m_GENERIC64;
739const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
a269a03c 740const int x86_zero_extend_with_and = m_486 | m_PENT;
d326eaf0 741const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC /* m_386 | m_K6 */;
e075ae69 742const int x86_double_with_add = ~m_386;
a269a03c 743const int x86_use_bit_test = m_386;
d326eaf0
JH
744const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_GENERIC;
745const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
9199f050 746const int x86_fisttp = m_NOCONA;
4977bab6 747const int x86_3dnow_a = m_ATHLON_K8;
d326eaf0 748const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
d20bf446
L
749/* Branch hints were put in P4 based on simulation result. But
750 after P4 was made, no performance benefit was observed with
751 branch hints. It also increases the code size. As the result,
752 icc never generates branch hints. */
753const int x86_branch_hints = 0;
d326eaf0
JH
754const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */
755/* We probably ought to watch for partial register stalls on Generic32
756 compilation setting as well. However in current implementation the
757 partial register stalls are not eliminated very well - they can
c0220ea4 758 be introduced via subregs synthesized by combine and can happen
d326eaf0
JH
759 in caller/callee saving sequences.
760 Because this option pays back little on PPro based chips and is in conflict
761 with partial reg. dependencies used by Athlon/P4 based chips, it is better
762 to leave it off for generic32 for now. */
e075ae69 763const int x86_partial_reg_stall = m_PPRO;
0e8c2b0d 764const int x86_use_himode_fiop = m_386 | m_486 | m_K6;
d326eaf0 765const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_GENERIC);
e075ae69 766const int x86_use_mov0 = m_K6;
d326eaf0 767const int x86_use_cltd = ~(m_PENT | m_K6 | m_GENERIC);
e075ae69
RH
768const int x86_read_modify_write = ~m_PENT;
769const int x86_read_modify = ~(m_PENT | m_PPRO);
770const int x86_split_long_moves = m_PPRO;
d326eaf0 771const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_GENERIC; /* m_PENT4 ? */
285464d0 772const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
89c43c0a 773const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
d9f32422
JH
774const int x86_qimode_math = ~(0);
775const int x86_promote_qi_regs = 0;
d326eaf0
JH
776/* On PPro this flag is meant to avoid partial register stalls. Just like
777 the x86_partial_reg_stall this option might be considered for Generic32
778 if our scheme for avoiding partial stalls was more effective. */
d9f32422
JH
779const int x86_himode_math = ~(m_PPRO);
780const int x86_promote_hi_regs = m_PPRO;
d326eaf0
JH
781const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC;
782const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
783const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC;
784const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
785const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC);
786const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
787const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
788const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
789const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
790const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
495333a6 791const int x86_shift1 = ~m_486;
d326eaf0 792const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
c0220ea4 793/* In Generic model we have an conflict here in between PPro/Pentium4 based chips
d326eaf0
JH
794 that thread 128bit SSE registers as single units versus K8 based chips that
795 divide SSE registers to two 64bit halves.
796 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
797 to allow register renaming on 128bit SSE units, but usually results in one
798 extra microop on 64bit SSE units. Experimental results shows that disabling
799 this option on P4 brings over 20% SPECfp regression, while enabling it on
800 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
801 of moves. */
802const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
41afe4ef
RH
803/* Set for machines where the type and dependencies are resolved on SSE
804 register parts instead of whole registers, so we may maintain just
805 lower part of scalar values in proper format leaving the upper part
806 undefined. */
807const int x86_sse_split_regs = m_ATHLON_K8;
4977bab6 808const int x86_sse_typeless_stores = m_ATHLON_K8;
89c43c0a 809const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
4977bab6
ZW
810const int x86_use_ffreep = m_ATHLON_K8;
811const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
d326eaf0 812const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_GENERIC);
51df7179
RH
813
814/* ??? Allowing interunit moves makes it all too easy for the compiler to put
815 integer data in xmm registers. Which results in pretty abysmal code. */
816const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
817
d326eaf0 818const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC32;
be04394b
JH
819/* Some CPU cores are not able to predict more than 4 branch instructions in
820 the 16 byte window. */
d326eaf0
JH
821const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
822const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT | m_GENERIC;
7cacf53e 823const int x86_use_bt = m_ATHLON_K8;
1ef45b77
RH
824/* Compare and exchange was added for 80486. */
825const int x86_cmpxchg = ~m_386;
a0274e3e
JJ
826/* Compare and exchange 8 bytes was added for pentium. */
827const int x86_cmpxchg8b = ~(m_386 | m_486);
828/* Compare and exchange 16 bytes was added for nocona. */
829const int x86_cmpxchg16b = m_NOCONA;
1ef45b77
RH
830/* Exchange and add was added for 80486. */
831const int x86_xadd = ~m_386;
d326eaf0 832const int x86_pad_returns = m_ATHLON_K8 | m_GENERIC;
a269a03c 833
d1f87653 834/* In case the average insn count for single function invocation is
6ab16dd9
JH
835 lower than this constant, emit fast (but longer) prologue and
836 epilogue code. */
4977bab6 837#define FAST_PROLOGUE_INSN_COUNT 20
5bf0ebab 838
5bf0ebab
RH
839/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
840static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
841static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
842static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
4c0d89b5
RS
843
844/* Array of the smallest class containing reg number REGNO, indexed by
0f290768 845 REGNO. Used by REGNO_REG_CLASS in i386.h. */
4c0d89b5 846
e075ae69 847enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
4c0d89b5
RS
848{
849 /* ax, dx, cx, bx */
ab408a86 850 AREG, DREG, CREG, BREG,
4c0d89b5 851 /* si, di, bp, sp */
e075ae69 852 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
4c0d89b5
RS
853 /* FP registers */
854 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
79325812 855 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
4c0d89b5 856 /* arg pointer */
83774849 857 NON_Q_REGS,
564d80f4 858 /* flags, fpsr, dirflag, frame */
a7180f70
BS
859 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
860 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
861 SSE_REGS, SSE_REGS,
862 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
3d117b30
JH
863 MMX_REGS, MMX_REGS,
864 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
865 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
866 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
867 SSE_REGS, SSE_REGS,
4c0d89b5 868};
c572e5ba 869
3d117b30 870/* The "default" register map used in 32bit mode. */
83774849 871
0f290768 872int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
873{
874 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
875 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
3d117b30 876 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
877 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
878 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
3d117b30
JH
879 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
880 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
881};
882
5bf0ebab
RH
883static int const x86_64_int_parameter_registers[6] =
884{
885 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
886 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
887};
888
889static int const x86_64_int_return_registers[4] =
890{
891 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
892};
53c17031 893
0f7fa3d0
JH
894/* The "default" register map used in 64bit mode. */
895int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
896{
897 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
3de72741 898 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
0f7fa3d0
JH
899 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
900 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
901 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
902 8,9,10,11,12,13,14,15, /* extended integer registers */
903 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
904};
905
83774849
RH
906/* Define the register numbers to be used in Dwarf debugging information.
907 The SVR4 reference port C compiler uses the following register numbers
908 in its Dwarf output code:
909 0 for %eax (gcc regno = 0)
910 1 for %ecx (gcc regno = 2)
911 2 for %edx (gcc regno = 1)
912 3 for %ebx (gcc regno = 3)
913 4 for %esp (gcc regno = 7)
914 5 for %ebp (gcc regno = 6)
915 6 for %esi (gcc regno = 4)
916 7 for %edi (gcc regno = 5)
917 The following three DWARF register numbers are never generated by
918 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
919 believes these numbers have these meanings.
920 8 for %eip (no gcc equivalent)
921 9 for %eflags (gcc regno = 17)
922 10 for %trapno (no gcc equivalent)
923 It is not at all clear how we should number the FP stack registers
924 for the x86 architecture. If the version of SDB on x86/svr4 were
925 a bit less brain dead with respect to floating-point then we would
926 have a precedent to follow with respect to DWARF register numbers
927 for x86 FP registers, but the SDB on x86/svr4 is so completely
928 broken with respect to FP registers that it is hardly worth thinking
929 of it as something to strive for compatibility with.
930 The version of x86/svr4 SDB I have at the moment does (partially)
931 seem to believe that DWARF register number 11 is associated with
932 the x86 register %st(0), but that's about all. Higher DWARF
933 register numbers don't seem to be associated with anything in
934 particular, and even for DWARF regno 11, SDB only seems to under-
935 stand that it should say that a variable lives in %st(0) (when
936 asked via an `=' command) if we said it was in DWARF regno 11,
937 but SDB still prints garbage when asked for the value of the
938 variable in question (via a `/' command).
939 (Also note that the labels SDB prints for various FP stack regs
940 when doing an `x' command are all wrong.)
941 Note that these problems generally don't affect the native SVR4
942 C compiler because it doesn't allow the use of -O with -g and
943 because when it is *not* optimizing, it allocates a memory
944 location for each floating-point variable, and the memory
945 location is what gets described in the DWARF AT_location
946 attribute for the variable in question.
947 Regardless of the severe mental illness of the x86/svr4 SDB, we
948 do something sensible here and we use the following DWARF
949 register numbers. Note that these are all stack-top-relative
950 numbers.
951 11 for %st(0) (gcc regno = 8)
952 12 for %st(1) (gcc regno = 9)
953 13 for %st(2) (gcc regno = 10)
954 14 for %st(3) (gcc regno = 11)
955 15 for %st(4) (gcc regno = 12)
956 16 for %st(5) (gcc regno = 13)
957 17 for %st(6) (gcc regno = 14)
958 18 for %st(7) (gcc regno = 15)
959*/
0f290768 960int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
961{
962 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
963 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
3f3f2124 964 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
965 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
966 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
d1f87653
KH
967 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
968 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
969};
970
c572e5ba
JVA
971/* Test and compare insns in i386.md store the information needed to
972 generate branch and scc insns here. */
973
07933f72
GS
974rtx ix86_compare_op0 = NULL_RTX;
975rtx ix86_compare_op1 = NULL_RTX;
1ef45b77 976rtx ix86_compare_emitted = NULL_RTX;
f5316dfe 977
8362f420
JH
978/* Size of the register save area. */
979#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
36edd3cc
BS
980
981/* Define the structure for the machine field in struct function. */
ddb0ae00
ZW
982
983struct stack_local_entry GTY(())
984{
985 unsigned short mode;
986 unsigned short n;
987 rtx rtl;
988 struct stack_local_entry *next;
989};
990
4dd2ac2c
JH
991/* Structure describing stack frame layout.
992 Stack grows downward:
993
994 [arguments]
995 <- ARG_POINTER
996 saved pc
997
998 saved frame pointer if frame_pointer_needed
999 <- HARD_FRAME_POINTER
1000 [saved regs]
1001
1002 [padding1] \
1003 )
1004 [va_arg registers] (
1005 > to_allocate <- FRAME_POINTER
1006 [frame] (
1007 )
1008 [padding2] /
1009 */
1010struct ix86_frame
1011{
1012 int nregs;
1013 int padding1;
8362f420 1014 int va_arg_size;
4dd2ac2c
JH
1015 HOST_WIDE_INT frame;
1016 int padding2;
1017 int outgoing_arguments_size;
8362f420 1018 int red_zone_size;
4dd2ac2c
JH
1019
1020 HOST_WIDE_INT to_allocate;
1021 /* The offsets relative to ARG_POINTER. */
1022 HOST_WIDE_INT frame_pointer_offset;
1023 HOST_WIDE_INT hard_frame_pointer_offset;
1024 HOST_WIDE_INT stack_pointer_offset;
d9b40e8d
JH
1025
1026 /* When save_regs_using_mov is set, emit prologue using
1027 move instead of push instructions. */
1028 bool save_regs_using_mov;
4dd2ac2c
JH
1029};
1030
55bea00a 1031/* Code model option. */
6189a572 1032enum cmodel ix86_cmodel;
80f33d06 1033/* Asm dialect. */
80f33d06 1034enum asm_dialect ix86_asm_dialect = ASM_ATT;
5bf5a10b 1035/* TLS dialects. */
f996902d 1036enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
6189a572 1037
5bf0ebab 1038/* Which unit we are generating floating point math for. */
965f5423
JH
1039enum fpmath_unit ix86_fpmath;
1040
5bf0ebab 1041/* Which cpu are we scheduling for. */
9e555526 1042enum processor_type ix86_tune;
5bf0ebab
RH
1043/* Which instruction set architecture to use. */
1044enum processor_type ix86_arch;
c8c5cb99 1045
f4365627
JH
1046/* true if sse prefetch instruction is not NOOP. */
1047int x86_prefetch_sse;
1048
e075ae69 1049/* ix86_regparm_string as a number */
6ac49599 1050static int ix86_regparm;
e9a25f70 1051
3af4bd89 1052/* Preferred alignment for stack boundary in bits. */
95899b34 1053unsigned int ix86_preferred_stack_boundary;
3af4bd89 1054
e9a25f70 1055/* Values 1-5: see jump.c */
e075ae69 1056int ix86_branch_cost;
623fe810 1057
7dcbf659
JH
1058/* Variables which are this size or smaller are put in the data/bss
1059 or ldata/lbss sections. */
1060
1061int ix86_section_threshold = 65536;
1062
623fe810 1063/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
8fe75e43
RH
1064char internal_label_prefix[16];
1065int internal_label_prefix_len;
ee963181
RG
1066
1067/* Table for BUILT_IN_NORMAL to BUILT_IN_MD mapping. */
1068static GTY(()) tree ix86_builtin_function_variants[(int) END_BUILTINS];
e075ae69 1069\f
6ac49599 1070static bool ix86_handle_option (size_t, const char *, int);
b96a374d
AJ
1071static void output_pic_addr_const (FILE *, rtx, int);
1072static void put_condition_code (enum rtx_code, enum machine_mode,
1073 int, int, FILE *);
1074static const char *get_some_local_dynamic_name (void);
1075static int get_some_local_dynamic_name_1 (rtx *, void *);
b96a374d
AJ
1076static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1077static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1078 rtx *);
e129d93a
ILT
1079static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1080static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1081 enum machine_mode);
b96a374d
AJ
1082static rtx get_thread_pointer (int);
1083static rtx legitimize_tls_address (rtx, enum tls_model, int);
1084static void get_pc_thunk_name (char [32], unsigned int);
1085static rtx gen_push (rtx);
b96a374d
AJ
1086static int ix86_flags_dependant (rtx, rtx, enum attr_type);
1087static int ix86_agi_dependant (rtx, rtx, enum attr_type);
b96a374d
AJ
1088static struct machine_function * ix86_init_machine_status (void);
1089static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1090static int ix86_nsaved_regs (void);
1091static void ix86_emit_save_regs (void);
1092static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
72613dfa 1093static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
b96a374d 1094static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
b96a374d
AJ
1095static HOST_WIDE_INT ix86_GOT_alias_set (void);
1096static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1097static rtx ix86_expand_aligntest (rtx, int);
4e44c1ef 1098static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
b96a374d
AJ
1099static int ix86_issue_rate (void);
1100static int ix86_adjust_cost (rtx, rtx, rtx, int);
b96a374d
AJ
1101static int ia32_multipass_dfa_lookahead (void);
1102static void ix86_init_mmx_sse_builtins (void);
ee963181 1103static void ix86_init_sse_abi_builtins (void);
b96a374d
AJ
1104static rtx x86_this_parameter (tree);
1105static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1106 HOST_WIDE_INT, tree);
1107static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1108static void x86_file_start (void);
1109static void ix86_reorg (void);
c35d187f
RH
1110static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1111static tree ix86_build_builtin_va_list (void);
a0524eb3
KH
1112static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1113 tree, int *, int);
23a60a04 1114static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
a81083b2 1115static bool ix86_scalar_mode_supported_p (enum machine_mode);
f676971a 1116static bool ix86_vector_mode_supported_p (enum machine_mode);
e075ae69 1117
b96a374d
AJ
1118static int ix86_address_cost (rtx);
1119static bool ix86_cannot_force_const_mem (rtx);
1120static rtx ix86_delegitimize_address (rtx);
bd793c65 1121
fdbe66f2
EB
1122static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1123
bd793c65 1124struct builtin_description;
b96a374d
AJ
1125static rtx ix86_expand_sse_comi (const struct builtin_description *,
1126 tree, rtx);
1127static rtx ix86_expand_sse_compare (const struct builtin_description *,
1128 tree, rtx);
1129static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1130static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1131static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1132static rtx ix86_expand_store_builtin (enum insn_code, tree);
1133static rtx safe_vector_operand (rtx, enum machine_mode);
b96a374d
AJ
1134static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1135static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1136static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1137static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1138static int ix86_fp_comparison_cost (enum rtx_code code);
1139static unsigned int ix86_select_alt_pic_regnum (void);
1140static int ix86_save_reg (unsigned int, int);
1141static void ix86_compute_frame_layout (struct ix86_frame *);
1142static int ix86_comp_type_attributes (tree, tree);
e767b5be 1143static int ix86_function_regparm (tree, tree);
91d231cb 1144const struct attribute_spec ix86_attribute_table[];
b96a374d 1145static bool ix86_function_ok_for_sibcall (tree, tree);
2f84b963 1146static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
cb1119b7 1147static int ix86_value_regno (enum machine_mode, tree, tree);
b96a374d 1148static bool contains_128bit_aligned_vector_p (tree);
0397ac35 1149static rtx ix86_struct_value_rtx (tree, int);
b96a374d
AJ
1150static bool ix86_ms_bitfield_layout_p (tree);
1151static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1152static int extended_reg_mentioned_1 (rtx *, void *);
1153static bool ix86_rtx_costs (rtx, int, int, int *);
1154static int min_insn_size (rtx);
61158923 1155static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
fe984136 1156static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
8cd5a4e0
RH
1157static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1158 tree, bool);
eb701deb
RH
1159static void ix86_init_builtins (void);
1160static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
ee963181 1161static rtx ix86_expand_library_builtin (tree, rtx, rtx, enum machine_mode, int);
cac24f06 1162static const char *ix86_mangle_fundamental_type (tree);
7ce918c5 1163static tree ix86_stack_protect_fail (void);
150cdc9e
RH
1164static rtx ix86_internal_arg_pointer (void);
1165static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
7c262518 1166
7915fbaa
MM
1167/* This function is only used on Solaris. */
1168static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1169 ATTRIBUTE_UNUSED;
e56feed6 1170
53c17031
JH
1171/* Register class used for passing given 64bit part of the argument.
1172 These represent classes as documented by the PS ABI, with the exception
1173 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
d1f87653 1174 use SF or DFmode move instead of DImode to avoid reformatting penalties.
53c17031 1175
d1f87653 1176 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
53c17031
JH
1177 whenever possible (upper half does contain padding).
1178 */
1179enum x86_64_reg_class
1180 {
1181 X86_64_NO_CLASS,
1182 X86_64_INTEGER_CLASS,
1183 X86_64_INTEGERSI_CLASS,
1184 X86_64_SSE_CLASS,
1185 X86_64_SSESF_CLASS,
1186 X86_64_SSEDF_CLASS,
1187 X86_64_SSEUP_CLASS,
1188 X86_64_X87_CLASS,
1189 X86_64_X87UP_CLASS,
499accd7 1190 X86_64_COMPLEX_X87_CLASS,
53c17031
JH
1191 X86_64_MEMORY_CLASS
1192 };
6c4ccfd8
RH
1193static const char * const x86_64_reg_class_name[] = {
1194 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1195 "sseup", "x87", "x87up", "cplx87", "no"
1196};
53c17031
JH
1197
1198#define MAX_CLASSES 4
881b2a96 1199
43f3a59d 1200/* Table of constants used by fldpi, fldln2, etc.... */
881b2a96
RS
1201static REAL_VALUE_TYPE ext_80387_constants_table [5];
1202static bool ext_80387_constants_init = 0;
b96a374d 1203static void init_ext_80387_constants (void);
f18faab7
DS
1204static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1205static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
7dcbf659 1206static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
d6b5193b
RS
1207static section *x86_64_elf_select_section (tree decl, int reloc,
1208 unsigned HOST_WIDE_INT align)
1209 ATTRIBUTE_UNUSED;
672a6f42
NB
1210\f
1211/* Initialize the GCC target structure. */
91d231cb
JM
1212#undef TARGET_ATTRIBUTE_TABLE
1213#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
b2ca3702 1214#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
f5f4be42
NB
1215# undef TARGET_MERGE_DECL_ATTRIBUTES
1216# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
672a6f42
NB
1217#endif
1218
8d8e52be
JM
1219#undef TARGET_COMP_TYPE_ATTRIBUTES
1220#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1221
f6155fda
SS
1222#undef TARGET_INIT_BUILTINS
1223#define TARGET_INIT_BUILTINS ix86_init_builtins
f6155fda
SS
1224#undef TARGET_EXPAND_BUILTIN
1225#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
ee963181
RG
1226#undef TARGET_EXPAND_LIBRARY_BUILTIN
1227#define TARGET_EXPAND_LIBRARY_BUILTIN ix86_expand_library_builtin
f6155fda 1228
bd09bdeb
RH
1229#undef TARGET_ASM_FUNCTION_EPILOGUE
1230#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
08c148a8 1231
7dcbf659 1232#undef TARGET_ENCODE_SECTION_INFO
f18faab7 1233#ifndef SUBTARGET_ENCODE_SECTION_INFO
7dcbf659 1234#define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
f18faab7
DS
1235#else
1236#define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1237#endif
7dcbf659 1238
17b53c33
NB
1239#undef TARGET_ASM_OPEN_PAREN
1240#define TARGET_ASM_OPEN_PAREN ""
1241#undef TARGET_ASM_CLOSE_PAREN
1242#define TARGET_ASM_CLOSE_PAREN ""
1243
301d03af
RS
1244#undef TARGET_ASM_ALIGNED_HI_OP
1245#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1246#undef TARGET_ASM_ALIGNED_SI_OP
1247#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1248#ifdef ASM_QUAD
1249#undef TARGET_ASM_ALIGNED_DI_OP
1250#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1251#endif
1252
1253#undef TARGET_ASM_UNALIGNED_HI_OP
1254#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1255#undef TARGET_ASM_UNALIGNED_SI_OP
1256#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1257#undef TARGET_ASM_UNALIGNED_DI_OP
1258#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1259
c237e94a
ZW
1260#undef TARGET_SCHED_ADJUST_COST
1261#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1262#undef TARGET_SCHED_ISSUE_RATE
1263#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
9b690711
RH
1264#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1265#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1266 ia32_multipass_dfa_lookahead
c237e94a 1267
4977bab6
ZW
1268#undef TARGET_FUNCTION_OK_FOR_SIBCALL
1269#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1270
f996902d
RH
1271#ifdef HAVE_AS_TLS
1272#undef TARGET_HAVE_TLS
1273#define TARGET_HAVE_TLS true
1274#endif
3a04ff64
RH
1275#undef TARGET_CANNOT_FORCE_CONST_MEM
1276#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1d3dbd99
RS
1277#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1278#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
f996902d 1279
7daebb7a 1280#undef TARGET_DELEGITIMIZE_ADDRESS
69bd9368 1281#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
7daebb7a 1282
4977bab6
ZW
1283#undef TARGET_MS_BITFIELD_LAYOUT_P
1284#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1285
31920d83
DJ
1286#if TARGET_MACHO
1287#undef TARGET_BINDS_LOCAL_P
1288#define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1289#endif
1290
c590b625
RH
1291#undef TARGET_ASM_OUTPUT_MI_THUNK
1292#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
3961e8fe
RH
1293#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1294#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
c590b625 1295
1bc7c5b6
ZW
1296#undef TARGET_ASM_FILE_START
1297#define TARGET_ASM_FILE_START x86_file_start
1298
6ac49599
RS
1299#undef TARGET_DEFAULT_TARGET_FLAGS
1300#define TARGET_DEFAULT_TARGET_FLAGS \
1301 (TARGET_DEFAULT \
1302 | TARGET_64BIT_DEFAULT \
1303 | TARGET_SUBTARGET_DEFAULT \
1304 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1305
1306#undef TARGET_HANDLE_OPTION
1307#define TARGET_HANDLE_OPTION ix86_handle_option
1308
3c50106f
RH
1309#undef TARGET_RTX_COSTS
1310#define TARGET_RTX_COSTS ix86_rtx_costs
dcefdf67
RH
1311#undef TARGET_ADDRESS_COST
1312#define TARGET_ADDRESS_COST ix86_address_cost
3c50106f 1313
e129d93a
ILT
1314#undef TARGET_FIXED_CONDITION_CODE_REGS
1315#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1316#undef TARGET_CC_MODES_COMPATIBLE
1317#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1318
18dbd950
RS
1319#undef TARGET_MACHINE_DEPENDENT_REORG
1320#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1321
c35d187f
RH
1322#undef TARGET_BUILD_BUILTIN_VA_LIST
1323#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1324
67dfe110
KH
1325#undef TARGET_MD_ASM_CLOBBERS
1326#define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1327
9184f892
KH
1328#undef TARGET_PROMOTE_PROTOTYPES
1329#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
0397ac35
RH
1330#undef TARGET_STRUCT_VALUE_RTX
1331#define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
a0524eb3
KH
1332#undef TARGET_SETUP_INCOMING_VARARGS
1333#define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
fe984136
RH
1334#undef TARGET_MUST_PASS_IN_STACK
1335#define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
8cd5a4e0
RH
1336#undef TARGET_PASS_BY_REFERENCE
1337#define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
150cdc9e
RH
1338#undef TARGET_INTERNAL_ARG_POINTER
1339#define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1340#undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1341#define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
a0524eb3 1342
cd3ce9b4
JM
1343#undef TARGET_GIMPLIFY_VA_ARG_EXPR
1344#define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1345
a81083b2
BE
1346#undef TARGET_SCALAR_MODE_SUPPORTED_P
1347#define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1348
f676971a
EC
1349#undef TARGET_VECTOR_MODE_SUPPORTED_P
1350#define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1351
fdbe66f2
EB
1352#ifdef HAVE_AS_TLS
1353#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1354#define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1355#endif
1356
07a43492
DJ
1357#ifdef SUBTARGET_INSERT_ATTRIBUTES
1358#undef TARGET_INSERT_ATTRIBUTES
1359#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1360#endif
1361
cac24f06
JM
1362#undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1363#define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1364
7d69de61 1365#undef TARGET_STACK_PROTECT_FAIL
7ce918c5 1366#define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
7d69de61 1367
cb1119b7
RG
1368#undef TARGET_FUNCTION_VALUE
1369#define TARGET_FUNCTION_VALUE ix86_function_value
1370
f6897b10 1371struct gcc_target targetm = TARGET_INITIALIZER;
89c43c0a 1372
e075ae69 1373\f
67c2b45f
JS
1374/* The svr4 ABI for the i386 says that records and unions are returned
1375 in memory. */
1376#ifndef DEFAULT_PCC_STRUCT_RETURN
1377#define DEFAULT_PCC_STRUCT_RETURN 1
1378#endif
1379
6ac49599
RS
1380/* Implement TARGET_HANDLE_OPTION. */
1381
1382static bool
55bea00a 1383ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
6ac49599
RS
1384{
1385 switch (code)
1386 {
1387 case OPT_m3dnow:
1388 if (!value)
1389 {
1390 target_flags &= ~MASK_3DNOW_A;
1391 target_flags_explicit |= MASK_3DNOW_A;
1392 }
1393 return true;
1394
6ac49599
RS
1395 case OPT_mmmx:
1396 if (!value)
1397 {
1398 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1399 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1400 }
1401 return true;
1402
6ac49599
RS
1403 case OPT_msse:
1404 if (!value)
1405 {
1406 target_flags &= ~(MASK_SSE2 | MASK_SSE3);
1407 target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
1408 }
1409 return true;
1410
1411 case OPT_msse2:
1412 if (!value)
1413 {
1414 target_flags &= ~MASK_SSE3;
1415 target_flags_explicit |= MASK_SSE3;
1416 }
1417 return true;
1418
6ac49599
RS
1419 default:
1420 return true;
1421 }
1422}
1423
f5316dfe
MM
1424/* Sometimes certain combinations of command options do not make
1425 sense on a particular target machine. You can define a macro
1426 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1427 defined, is executed once just after all the command options have
1428 been parsed.
1429
1430 Don't use this macro to turn on various extra optimizations for
1431 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1432
1433void
b96a374d 1434override_options (void)
f5316dfe 1435{
400500c4 1436 int i;
3326f410
DJ
1437 int ix86_tune_defaulted = 0;
1438
e075ae69
RH
1439 /* Comes from final.c -- no real reason to change it. */
1440#define MAX_CODE_ALIGN 16
f5316dfe 1441
c8c5cb99
SC
1442 static struct ptt
1443 {
8b60264b
KG
1444 const struct processor_costs *cost; /* Processor costs */
1445 const int target_enable; /* Target flags to enable. */
1446 const int target_disable; /* Target flags to disable. */
1447 const int align_loop; /* Default alignments. */
2cca7283 1448 const int align_loop_max_skip;
8b60264b 1449 const int align_jump;
2cca7283 1450 const int align_jump_max_skip;
8b60264b 1451 const int align_func;
e075ae69 1452 }
0f290768 1453 const processor_target_table[PROCESSOR_max] =
e075ae69 1454 {
4977bab6
ZW
1455 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1456 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1457 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1458 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1459 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1460 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1461 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
89c43c0a 1462 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
d326eaf0
JH
1463 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1464 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1465 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16}
e075ae69
RH
1466 };
1467
f4365627 1468 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
e075ae69
RH
1469 static struct pta
1470 {
8b60264b
KG
1471 const char *const name; /* processor name or nickname. */
1472 const enum processor_type processor;
0dd0e980
JH
1473 const enum pta_flags
1474 {
1475 PTA_SSE = 1,
1476 PTA_SSE2 = 2,
5bbeea44
JH
1477 PTA_SSE3 = 4,
1478 PTA_MMX = 8,
1479 PTA_PREFETCH_SSE = 16,
1480 PTA_3DNOW = 32,
4977bab6
ZW
1481 PTA_3DNOW_A = 64,
1482 PTA_64BIT = 128
0dd0e980 1483 } flags;
e075ae69 1484 }
0f290768 1485 const processor_alias_table[] =
e075ae69 1486 {
0dd0e980
JH
1487 {"i386", PROCESSOR_I386, 0},
1488 {"i486", PROCESSOR_I486, 0},
1489 {"i586", PROCESSOR_PENTIUM, 0},
1490 {"pentium", PROCESSOR_PENTIUM, 0},
1491 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
3780101d
JG
1492 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1493 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1494 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
3462df62 1495 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
0dd0e980
JH
1496 {"i686", PROCESSOR_PENTIUMPRO, 0},
1497 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1498 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
f4365627 1499 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
5bbeea44
JH
1500 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1501 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1502 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1503 | PTA_MMX | PTA_PREFETCH_SSE},
1504 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1505 | PTA_MMX | PTA_PREFETCH_SSE},
89c43c0a
VM
1506 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1507 | PTA_MMX | PTA_PREFETCH_SSE},
1508 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
5bbeea44 1509 | PTA_MMX | PTA_PREFETCH_SSE},
0dd0e980
JH
1510 {"k6", PROCESSOR_K6, PTA_MMX},
1511 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1512 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
f4365627 1513 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1514 | PTA_3DNOW_A},
f4365627 1515 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
0dd0e980 1516 | PTA_3DNOW | PTA_3DNOW_A},
f4365627 1517 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1518 | PTA_3DNOW_A | PTA_SSE},
f4365627 1519 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1520 | PTA_3DNOW_A | PTA_SSE},
f4365627 1521 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1522 | PTA_3DNOW_A | PTA_SSE},
3fec9fa9
JJ
1523 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1524 | PTA_SSE | PTA_SSE2 },
4977bab6
ZW
1525 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1526 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
9a609388
JH
1527 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1528 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1529 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1530 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1531 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1532 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
d326eaf0
JH
1533 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1534 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
3af4bd89 1535 };
c8c5cb99 1536
ca7558fc 1537 int const pta_size = ARRAY_SIZE (processor_alias_table);
c8c5cb99 1538
554707bd
DJ
1539#ifdef SUBTARGET_OVERRIDE_OPTIONS
1540 SUBTARGET_OVERRIDE_OPTIONS;
1541#endif
1542
41ed2237 1543 /* Set the default values for switches whose default depends on TARGET_64BIT
d1f87653 1544 in case they weren't overwritten by command line options. */
55ba61f3
JH
1545 if (TARGET_64BIT)
1546 {
1547 if (flag_omit_frame_pointer == 2)
1548 flag_omit_frame_pointer = 1;
1549 if (flag_asynchronous_unwind_tables == 2)
1550 flag_asynchronous_unwind_tables = 1;
1551 if (flag_pcc_struct_return == 2)
1552 flag_pcc_struct_return = 0;
1553 }
1554 else
1555 {
1556 if (flag_omit_frame_pointer == 2)
1557 flag_omit_frame_pointer = 0;
1558 if (flag_asynchronous_unwind_tables == 2)
1559 flag_asynchronous_unwind_tables = 0;
1560 if (flag_pcc_struct_return == 2)
7c712dcc 1561 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
55ba61f3
JH
1562 }
1563
d326eaf0
JH
1564 /* Need to check -mtune=generic first. */
1565 if (ix86_tune_string)
3326f410 1566 {
d326eaf0
JH
1567 if (!strcmp (ix86_tune_string, "generic")
1568 || !strcmp (ix86_tune_string, "i686"))
1569 {
1570 if (TARGET_64BIT)
1571 ix86_tune_string = "generic64";
1572 else
1573 ix86_tune_string = "generic32";
1574 }
1575 else if (!strncmp (ix86_tune_string, "generic", 7))
1576 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
3326f410 1577 }
d326eaf0
JH
1578 else
1579 {
1580 if (ix86_arch_string)
1581 ix86_tune_string = ix86_arch_string;
1582 if (!ix86_tune_string)
1583 {
1584 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1585 ix86_tune_defaulted = 1;
1586 }
1587
1588 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1589 need to use a sensible tune option. */
1590 if (!strcmp (ix86_tune_string, "generic")
1591 || !strcmp (ix86_tune_string, "x86-64")
1592 || !strcmp (ix86_tune_string, "i686"))
1593 {
1594 if (TARGET_64BIT)
1595 ix86_tune_string = "generic64";
1596 else
1597 ix86_tune_string = "generic32";
1598 }
1599 }
1600 if (!strcmp (ix86_tune_string, "x86-64"))
1601 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1602 "-mtune=generic instead as appropriate.");
1603
f4365627 1604 if (!ix86_arch_string)
3fec9fa9 1605 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
d326eaf0
JH
1606 if (!strcmp (ix86_arch_string, "generic"))
1607 error ("generic CPU can be used only for -mtune= switch");
1608 if (!strncmp (ix86_arch_string, "generic", 7))
1609 error ("bad value (%s) for -march= switch", ix86_arch_string);
e075ae69 1610
6189a572
JH
1611 if (ix86_cmodel_string != 0)
1612 {
1613 if (!strcmp (ix86_cmodel_string, "small"))
1614 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
7dcbf659
JH
1615 else if (!strcmp (ix86_cmodel_string, "medium"))
1616 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
6189a572 1617 else if (flag_pic)
c725bd79 1618 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
6189a572
JH
1619 else if (!strcmp (ix86_cmodel_string, "32"))
1620 ix86_cmodel = CM_32;
1621 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1622 ix86_cmodel = CM_KERNEL;
6189a572
JH
1623 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1624 ix86_cmodel = CM_LARGE;
1625 else
1626 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1627 }
1628 else
1629 {
1630 ix86_cmodel = CM_32;
1631 if (TARGET_64BIT)
1632 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1633 }
c93e80a5
JH
1634 if (ix86_asm_string != 0)
1635 {
1f4c2c57
MS
1636 if (! TARGET_MACHO
1637 && !strcmp (ix86_asm_string, "intel"))
c93e80a5
JH
1638 ix86_asm_dialect = ASM_INTEL;
1639 else if (!strcmp (ix86_asm_string, "att"))
1640 ix86_asm_dialect = ASM_ATT;
1641 else
1642 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1643 }
6189a572 1644 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
9e637a26 1645 error ("code model %qs not supported in the %s bit mode",
6189a572
JH
1646 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1647 if (ix86_cmodel == CM_LARGE)
9e637a26 1648 sorry ("code model %<large%> not supported yet");
0c2dc519 1649 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
c725bd79 1650 sorry ("%i-bit mode not compiled in",
0c2dc519 1651 (target_flags & MASK_64BIT) ? 64 : 32);
6189a572 1652
f4365627
JH
1653 for (i = 0; i < pta_size; i++)
1654 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1655 {
1656 ix86_arch = processor_alias_table[i].processor;
1657 /* Default cpu tuning to the architecture. */
9e555526 1658 ix86_tune = ix86_arch;
f4365627 1659 if (processor_alias_table[i].flags & PTA_MMX
9ef1b13a 1660 && !(target_flags_explicit & MASK_MMX))
f4365627
JH
1661 target_flags |= MASK_MMX;
1662 if (processor_alias_table[i].flags & PTA_3DNOW
9ef1b13a 1663 && !(target_flags_explicit & MASK_3DNOW))
f4365627
JH
1664 target_flags |= MASK_3DNOW;
1665 if (processor_alias_table[i].flags & PTA_3DNOW_A
9ef1b13a 1666 && !(target_flags_explicit & MASK_3DNOW_A))
f4365627
JH
1667 target_flags |= MASK_3DNOW_A;
1668 if (processor_alias_table[i].flags & PTA_SSE
9ef1b13a 1669 && !(target_flags_explicit & MASK_SSE))
f4365627
JH
1670 target_flags |= MASK_SSE;
1671 if (processor_alias_table[i].flags & PTA_SSE2
9ef1b13a 1672 && !(target_flags_explicit & MASK_SSE2))
f4365627 1673 target_flags |= MASK_SSE2;
5bbeea44
JH
1674 if (processor_alias_table[i].flags & PTA_SSE3
1675 && !(target_flags_explicit & MASK_SSE3))
1676 target_flags |= MASK_SSE3;
f4365627
JH
1677 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1678 x86_prefetch_sse = true;
6716ecbc
JM
1679 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1680 error ("CPU you selected does not support x86-64 "
1681 "instruction set");
1682 break;
1683 }
1684
1685 if (i == pta_size)
1686 error ("bad value (%s) for -march= switch", ix86_arch_string);
1687
1688 for (i = 0; i < pta_size; i++)
1689 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1690 {
1691 ix86_tune = processor_alias_table[i].processor;
4977bab6 1692 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3326f410
DJ
1693 {
1694 if (ix86_tune_defaulted)
1695 {
1696 ix86_tune_string = "x86-64";
1697 for (i = 0; i < pta_size; i++)
1698 if (! strcmp (ix86_tune_string,
1699 processor_alias_table[i].name))
1700 break;
1701 ix86_tune = processor_alias_table[i].processor;
1702 }
1703 else
1704 error ("CPU you selected does not support x86-64 "
1705 "instruction set");
1706 }
c618c6ec
JJ
1707 /* Intel CPUs have always interpreted SSE prefetch instructions as
1708 NOPs; so, we can enable SSE prefetch instructions even when
1709 -mtune (rather than -march) points us to a processor that has them.
1710 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1711 higher processors. */
1712 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1713 x86_prefetch_sse = true;
f4365627
JH
1714 break;
1715 }
f4365627 1716 if (i == pta_size)
9e555526 1717 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
e075ae69 1718
2ab0437e
JH
1719 if (optimize_size)
1720 ix86_cost = &size_cost;
1721 else
9e555526
RH
1722 ix86_cost = processor_target_table[ix86_tune].cost;
1723 target_flags |= processor_target_table[ix86_tune].target_enable;
1724 target_flags &= ~processor_target_table[ix86_tune].target_disable;
e075ae69 1725
36edd3cc
BS
1726 /* Arrange to set up i386_stack_locals for all functions. */
1727 init_machine_status = ix86_init_machine_status;
fce5a9f2 1728
0f290768 1729 /* Validate -mregparm= value. */
e075ae69 1730 if (ix86_regparm_string)
b08de47e 1731 {
400500c4
RK
1732 i = atoi (ix86_regparm_string);
1733 if (i < 0 || i > REGPARM_MAX)
1734 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1735 else
1736 ix86_regparm = i;
b08de47e 1737 }
0d7d98ee
JH
1738 else
1739 if (TARGET_64BIT)
1740 ix86_regparm = REGPARM_MAX;
b08de47e 1741
3e18fdf6 1742 /* If the user has provided any of the -malign-* options,
a4f31c00 1743 warn and use that value only if -falign-* is not set.
3e18fdf6 1744 Remove this code in GCC 3.2 or later. */
e075ae69 1745 if (ix86_align_loops_string)
b08de47e 1746 {
d4ee4d25 1747 warning (0, "-malign-loops is obsolete, use -falign-loops");
3e18fdf6
GK
1748 if (align_loops == 0)
1749 {
1750 i = atoi (ix86_align_loops_string);
1751 if (i < 0 || i > MAX_CODE_ALIGN)
1752 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1753 else
1754 align_loops = 1 << i;
1755 }
b08de47e 1756 }
3af4bd89 1757
e075ae69 1758 if (ix86_align_jumps_string)
b08de47e 1759 {
d4ee4d25 1760 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
3e18fdf6
GK
1761 if (align_jumps == 0)
1762 {
1763 i = atoi (ix86_align_jumps_string);
1764 if (i < 0 || i > MAX_CODE_ALIGN)
1765 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1766 else
1767 align_jumps = 1 << i;
1768 }
b08de47e 1769 }
b08de47e 1770
e075ae69 1771 if (ix86_align_funcs_string)
b08de47e 1772 {
d4ee4d25 1773 warning (0, "-malign-functions is obsolete, use -falign-functions");
3e18fdf6
GK
1774 if (align_functions == 0)
1775 {
1776 i = atoi (ix86_align_funcs_string);
1777 if (i < 0 || i > MAX_CODE_ALIGN)
1778 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1779 else
1780 align_functions = 1 << i;
1781 }
b08de47e 1782 }
3af4bd89 1783
3e18fdf6 1784 /* Default align_* from the processor table. */
3e18fdf6 1785 if (align_loops == 0)
2cca7283 1786 {
9e555526
RH
1787 align_loops = processor_target_table[ix86_tune].align_loop;
1788 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2cca7283 1789 }
3e18fdf6 1790 if (align_jumps == 0)
2cca7283 1791 {
9e555526
RH
1792 align_jumps = processor_target_table[ix86_tune].align_jump;
1793 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2cca7283 1794 }
3e18fdf6 1795 if (align_functions == 0)
2cca7283 1796 {
9e555526 1797 align_functions = processor_target_table[ix86_tune].align_func;
2cca7283 1798 }
3e18fdf6 1799
e4c0478d 1800 /* Validate -mpreferred-stack-boundary= value, or provide default.
fbb83b43
AO
1801 The default of 128 bits is for Pentium III's SSE __m128, but we
1802 don't want additional code to keep the stack aligned when
1803 optimizing for code size. */
7d072037
SH
1804 ix86_preferred_stack_boundary = ((TARGET_64BIT || TARGET_MACHO || !optimize_size)
1805 ? 128 : 32);
e075ae69 1806 if (ix86_preferred_stack_boundary_string)
3af4bd89 1807 {
400500c4 1808 i = atoi (ix86_preferred_stack_boundary_string);
ef49d42e 1809 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
c6257c5d 1810 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
ef49d42e 1811 TARGET_64BIT ? 4 : 2);
400500c4
RK
1812 else
1813 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3af4bd89 1814 }
77a989d1 1815
0f290768 1816 /* Validate -mbranch-cost= value, or provide default. */
3dd0df7f 1817 ix86_branch_cost = ix86_cost->branch_cost;
e075ae69 1818 if (ix86_branch_cost_string)
804a8ee0 1819 {
400500c4
RK
1820 i = atoi (ix86_branch_cost_string);
1821 if (i < 0 || i > 5)
1822 error ("-mbranch-cost=%d is not between 0 and 5", i);
1823 else
1824 ix86_branch_cost = i;
804a8ee0 1825 }
7dcbf659
JH
1826 if (ix86_section_threshold_string)
1827 {
1828 i = atoi (ix86_section_threshold_string);
1829 if (i < 0)
1830 error ("-mlarge-data-threshold=%d is negative", i);
1831 else
1832 ix86_section_threshold = i;
1833 }
804a8ee0 1834
f996902d
RH
1835 if (ix86_tls_dialect_string)
1836 {
1837 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1838 ix86_tls_dialect = TLS_DIALECT_GNU;
5bf5a10b
AO
1839 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
1840 ix86_tls_dialect = TLS_DIALECT_GNU2;
f996902d
RH
1841 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1842 ix86_tls_dialect = TLS_DIALECT_SUN;
1843 else
1844 error ("bad value (%s) for -mtls-dialect= switch",
1845 ix86_tls_dialect_string);
1846 }
1847
e9a25f70 1848 /* Keep nonleaf frame pointers. */
14c473b9
RS
1849 if (flag_omit_frame_pointer)
1850 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1851 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
77a989d1 1852 flag_omit_frame_pointer = 1;
e075ae69
RH
1853
1854 /* If we're doing fast math, we don't care about comparison order
1855 wrt NaNs. This lets us use a shorter comparison sequence. */
de6c5979 1856 if (flag_unsafe_math_optimizations)
e075ae69
RH
1857 target_flags &= ~MASK_IEEE_FP;
1858
30c99a84
RH
1859 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1860 since the insns won't need emulation. */
9690a821 1861 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
30c99a84
RH
1862 target_flags &= ~MASK_NO_FANCY_MATH_387;
1863
ba2baa55 1864 /* Likewise, if the target doesn't have a 387, or we've specified
0fa2e4df 1865 software floating point, don't use 387 inline intrinsics. */
ba2baa55
RS
1866 if (!TARGET_80387)
1867 target_flags |= MASK_NO_FANCY_MATH_387;
1868
9e200aaf
KC
1869 /* Turn on SSE2 builtins for -msse3. */
1870 if (TARGET_SSE3)
22c7c85e
L
1871 target_flags |= MASK_SSE2;
1872
1873 /* Turn on SSE builtins for -msse2. */
1874 if (TARGET_SSE2)
1875 target_flags |= MASK_SSE;
1876
a5370cf0
RH
1877 /* Turn on MMX builtins for -msse. */
1878 if (TARGET_SSE)
1879 {
1880 target_flags |= MASK_MMX & ~target_flags_explicit;
1881 x86_prefetch_sse = true;
1882 }
1883
1884 /* Turn on MMX builtins for 3Dnow. */
1885 if (TARGET_3DNOW)
1886 target_flags |= MASK_MMX;
1887
14f73b5a
JH
1888 if (TARGET_64BIT)
1889 {
1890 if (TARGET_ALIGN_DOUBLE)
c725bd79 1891 error ("-malign-double makes no sense in the 64bit mode");
14f73b5a 1892 if (TARGET_RTD)
c725bd79 1893 error ("-mrtd calling convention not supported in the 64bit mode");
a5370cf0
RH
1894
1895 /* Enable by default the SSE and MMX builtins. Do allow the user to
1896 explicitly disable any of these. In particular, disabling SSE and
1897 MMX for kernel code is extremely useful. */
1898 target_flags
1899 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
1900 & ~target_flags_explicit);
14f73b5a 1901 }
965f5423 1902 else
a5b378d6 1903 {
a5b378d6
JH
1904 /* i386 ABI does not specify red zone. It still makes sense to use it
1905 when programmer takes care to stack from being destroyed. */
1906 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1907 target_flags |= MASK_NO_RED_ZONE;
1908 }
965f5423 1909
1f97667f
RG
1910 /* Accept -msseregparm only if at least SSE support is enabled. */
1911 if (TARGET_SSEREGPARM
1912 && ! TARGET_SSE)
1913 error ("-msseregparm used without SSE enabled");
1914
ee963181
RG
1915 /* Accept -msselibm only if at least SSE support is enabled. */
1916 if (TARGET_SSELIBM
1917 && ! TARGET_SSE2)
1918 error ("-msselibm used without SSE2 enabled");
1919
1920 /* Ignore -msselibm on 64bit targets. */
1921 if (TARGET_SSELIBM
1922 && TARGET_64BIT)
1923 error ("-msselibm used on a 64bit target");
1924
004d3859
GK
1925 ix86_fpmath = TARGET_FPMATH_DEFAULT;
1926
965f5423
JH
1927 if (ix86_fpmath_string != 0)
1928 {
1929 if (! strcmp (ix86_fpmath_string, "387"))
1930 ix86_fpmath = FPMATH_387;
1931 else if (! strcmp (ix86_fpmath_string, "sse"))
1932 {
1933 if (!TARGET_SSE)
1934 {
d4ee4d25 1935 warning (0, "SSE instruction set disabled, using 387 arithmetics");
965f5423
JH
1936 ix86_fpmath = FPMATH_387;
1937 }
1938 else
1939 ix86_fpmath = FPMATH_SSE;
1940 }
1941 else if (! strcmp (ix86_fpmath_string, "387,sse")
1942 || ! strcmp (ix86_fpmath_string, "sse,387"))
1943 {
1944 if (!TARGET_SSE)
1945 {
d4ee4d25 1946 warning (0, "SSE instruction set disabled, using 387 arithmetics");
965f5423
JH
1947 ix86_fpmath = FPMATH_387;
1948 }
1949 else if (!TARGET_80387)
1950 {
d4ee4d25 1951 warning (0, "387 instruction set disabled, using SSE arithmetics");
965f5423
JH
1952 ix86_fpmath = FPMATH_SSE;
1953 }
1954 else
1955 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1956 }
fce5a9f2 1957 else
965f5423
JH
1958 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1959 }
14f73b5a 1960
de004e6d
JS
1961 /* If the i387 is disabled, then do not return values in it. */
1962 if (!TARGET_80387)
1963 target_flags &= ~MASK_FLOAT_RETURNS;
1964
9e555526 1965 if ((x86_accumulate_outgoing_args & TUNEMASK)
9ef1b13a 1966 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
c6036a37
JH
1967 && !optimize_size)
1968 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
623fe810 1969
d3073c70
RH
1970 /* ??? Unwind info is not correct around the CFG unless either a frame
1971 pointer is present or M_A_O_A is set. Fixing this requires rewriting
1972 unwind info generation to be aware of the CFG and propagating states
1973 around edges. */
1974 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
1975 || flag_exceptions || flag_non_call_exceptions)
1976 && flag_omit_frame_pointer
1977 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
1978 {
1979 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1980 warning (0, "unwind tables currently require either a frame pointer "
1981 "or -maccumulate-outgoing-args for correctness");
1982 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1983 }
1984
623fe810
RH
1985 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1986 {
1987 char *p;
1988 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1989 p = strchr (internal_label_prefix, 'X');
1990 internal_label_prefix_len = p - internal_label_prefix;
1991 *p = '\0';
1992 }
a5370cf0
RH
1993
1994 /* When scheduling description is not available, disable scheduler pass
1995 so it won't slow down the compilation and make x87 code slower. */
ad7b96a9
JH
1996 if (!TARGET_SCHEDULE)
1997 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
f5316dfe
MM
1998}
1999\f
7dcbf659
JH
2000/* switch to the appropriate section for output of DECL.
2001 DECL is either a `VAR_DECL' node or a constant of some sort.
2002 RELOC indicates whether forming the initial value of DECL requires
2003 link-time relocations. */
2004
d6b5193b 2005static section *
7dcbf659 2006x86_64_elf_select_section (tree decl, int reloc,
d6b5193b 2007 unsigned HOST_WIDE_INT align)
7dcbf659
JH
2008{
2009 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2010 && ix86_in_large_data_p (decl))
2011 {
2012 const char *sname = NULL;
3b10d286 2013 unsigned int flags = SECTION_WRITE;
7dcbf659
JH
2014 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2015 {
2016 case SECCAT_DATA:
2017 sname = ".ldata";
2018 break;
2019 case SECCAT_DATA_REL:
2020 sname = ".ldata.rel";
2021 break;
2022 case SECCAT_DATA_REL_LOCAL:
2023 sname = ".ldata.rel.local";
2024 break;
2025 case SECCAT_DATA_REL_RO:
2026 sname = ".ldata.rel.ro";
2027 break;
2028 case SECCAT_DATA_REL_RO_LOCAL:
2029 sname = ".ldata.rel.ro.local";
2030 break;
2031 case SECCAT_BSS:
2032 sname = ".lbss";
3b10d286 2033 flags |= SECTION_BSS;
7dcbf659
JH
2034 break;
2035 case SECCAT_RODATA:
2036 case SECCAT_RODATA_MERGE_STR:
2037 case SECCAT_RODATA_MERGE_STR_INIT:
2038 case SECCAT_RODATA_MERGE_CONST:
2039 sname = ".lrodata";
3b10d286 2040 flags = 0;
7dcbf659
JH
2041 break;
2042 case SECCAT_SRODATA:
2043 case SECCAT_SDATA:
2044 case SECCAT_SBSS:
2045 gcc_unreachable ();
2046 case SECCAT_TEXT:
2047 case SECCAT_TDATA:
2048 case SECCAT_TBSS:
2049 /* We don't split these for medium model. Place them into
2050 default sections and hope for best. */
2051 break;
2052 }
2053 if (sname)
3b10d286
JJ
2054 {
2055 /* We might get called with string constants, but get_named_section
2056 doesn't like them as they are not DECLs. Also, we need to set
2057 flags in that case. */
2058 if (!DECL_P (decl))
2059 return get_section (sname, flags, NULL);
2060 return get_named_section (decl, sname, reloc);
2061 }
7dcbf659 2062 }
d6b5193b 2063 return default_elf_select_section (decl, reloc, align);
7dcbf659
JH
2064}
2065
2066/* Build up a unique section name, expressed as a
2067 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2068 RELOC indicates whether the initial value of EXP requires
2069 link-time relocations. */
2070
2071static void
2072x86_64_elf_unique_section (tree decl, int reloc)
2073{
2074 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2075 && ix86_in_large_data_p (decl))
2076 {
2077 const char *prefix = NULL;
2078 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2079 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2080
2081 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2082 {
2083 case SECCAT_DATA:
2084 case SECCAT_DATA_REL:
2085 case SECCAT_DATA_REL_LOCAL:
2086 case SECCAT_DATA_REL_RO:
2087 case SECCAT_DATA_REL_RO_LOCAL:
2088 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2089 break;
2090 case SECCAT_BSS:
2091 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2092 break;
2093 case SECCAT_RODATA:
2094 case SECCAT_RODATA_MERGE_STR:
2095 case SECCAT_RODATA_MERGE_STR_INIT:
2096 case SECCAT_RODATA_MERGE_CONST:
2097 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2098 break;
2099 case SECCAT_SRODATA:
2100 case SECCAT_SDATA:
2101 case SECCAT_SBSS:
2102 gcc_unreachable ();
2103 case SECCAT_TEXT:
2104 case SECCAT_TDATA:
2105 case SECCAT_TBSS:
2106 /* We don't split these for medium model. Place them into
2107 default sections and hope for best. */
2108 break;
2109 }
2110 if (prefix)
2111 {
2112 const char *name;
2113 size_t nlen, plen;
2114 char *string;
2115 plen = strlen (prefix);
2116
2117 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2118 name = targetm.strip_name_encoding (name);
2119 nlen = strlen (name);
2120
2121 string = alloca (nlen + plen + 1);
2122 memcpy (string, prefix, plen);
2123 memcpy (string + plen, name, nlen + 1);
2124
2125 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2126 return;
2127 }
2128 }
2129 default_unique_section (decl, reloc);
2130}
2131
e81d37df 2132#ifdef COMMON_ASM_OP
7dcbf659
JH
2133/* This says how to output assembler code to declare an
2134 uninitialized external linkage data object.
2135
569b7f6a 2136 For medium model x86-64 we need to use .largecomm opcode for
7dcbf659
JH
2137 large objects. */
2138void
2139x86_elf_aligned_common (FILE *file,
2140 const char *name, unsigned HOST_WIDE_INT size,
2141 int align)
2142{
2143 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2144 && size > (unsigned int)ix86_section_threshold)
2145 fprintf (file, ".largecomm\t");
2146 else
2147 fprintf (file, "%s", COMMON_ASM_OP);
2148 assemble_name (file, name);
2149 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2150 size, align / BITS_PER_UNIT);
2151}
2152
2153/* Utility function for targets to use in implementing
2154 ASM_OUTPUT_ALIGNED_BSS. */
2155
2156void
2157x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2158 const char *name, unsigned HOST_WIDE_INT size,
2159 int align)
2160{
2161 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2162 && size > (unsigned int)ix86_section_threshold)
d6b5193b 2163 switch_to_section (get_named_section (decl, ".lbss", 0));
7dcbf659 2164 else
d6b5193b 2165 switch_to_section (bss_section);
7dcbf659
JH
2166 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2167#ifdef ASM_DECLARE_OBJECT_NAME
2168 last_assemble_variable_decl = decl;
2169 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2170#else
2171 /* Standard thing is just output label for the object. */
2172 ASM_OUTPUT_LABEL (file, name);
2173#endif /* ASM_DECLARE_OBJECT_NAME */
2174 ASM_OUTPUT_SKIP (file, size ? size : 1);
2175}
e81d37df 2176#endif
7dcbf659 2177\f
32b5b1aa 2178void
b96a374d 2179optimization_options (int level, int size ATTRIBUTE_UNUSED)
32b5b1aa 2180{
e9a25f70
JL
2181 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2182 make the problem with not enough registers even worse. */
32b5b1aa
SC
2183#ifdef INSN_SCHEDULING
2184 if (level > 1)
2185 flag_schedule_insns = 0;
2186#endif
55ba61f3 2187
2e3f0db6
DJ
2188 if (TARGET_MACHO)
2189 /* The Darwin libraries never set errno, so we might as well
2190 avoid calling them when that's the only reason we would. */
2191 flag_errno_math = 0;
2192
55ba61f3
JH
2193 /* The default values of these switches depend on the TARGET_64BIT
2194 that is not known at this moment. Mark these values with 2 and
2195 let user the to override these. In case there is no command line option
2196 specifying them, we will set the defaults in override_options. */
2197 if (optimize >= 1)
2198 flag_omit_frame_pointer = 2;
2199 flag_pcc_struct_return = 2;
2200 flag_asynchronous_unwind_tables = 2;
4f514514
JM
2201#ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2202 SUBTARGET_OPTIMIZATION_OPTIONS;
2203#endif
32b5b1aa 2204}
b08de47e 2205\f
91d231cb
JM
2206/* Table of valid machine attributes. */
2207const struct attribute_spec ix86_attribute_table[] =
b08de47e 2208{
91d231cb 2209 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
b08de47e
MM
2210 /* Stdcall attribute says callee is responsible for popping arguments
2211 if they are not variable. */
2f84b963 2212 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
e91f04de
CH
2213 /* Fastcall attribute says callee is responsible for popping arguments
2214 if they are not variable. */
2f84b963 2215 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
91d231cb 2216 /* Cdecl attribute says the callee is a normal C declaration */
2f84b963 2217 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
b08de47e 2218 /* Regparm attribute specifies how many integer arguments are to be
0f290768 2219 passed in registers. */
2f84b963
RG
2220 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2221 /* Sseregparm attribute says we are using x86_64 calling conventions
2222 for FP arguments. */
2223 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
b2ca3702
MM
2224#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2225 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2226 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
3da1eb0b 2227 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
91d231cb 2228#endif
fe77449a
DR
2229 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2230 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
07a43492
DJ
2231#ifdef SUBTARGET_ATTRIBUTE_TABLE
2232 SUBTARGET_ATTRIBUTE_TABLE,
2233#endif
91d231cb
JM
2234 { NULL, 0, 0, false, false, false, NULL }
2235};
2236
5fbf0217
EB
2237/* Decide whether we can make a sibling call to a function. DECL is the
2238 declaration of the function being targeted by the call and EXP is the
2239 CALL_EXPR representing the call. */
4977bab6
ZW
2240
2241static bool
b96a374d 2242ix86_function_ok_for_sibcall (tree decl, tree exp)
4977bab6 2243{
f19e3a64 2244 tree func;
cb1119b7 2245 rtx a, b;
f19e3a64 2246
4977bab6
ZW
2247 /* If we are generating position-independent code, we cannot sibcall
2248 optimize any indirect call, or a direct call to a global function,
2249 as the PLT requires %ebx be live. */
010ef110 2250 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4977bab6
ZW
2251 return false;
2252
f19e3a64
JJ
2253 if (decl)
2254 func = decl;
2255 else
cb1119b7
RG
2256 {
2257 func = TREE_TYPE (TREE_OPERAND (exp, 0));
2258 if (POINTER_TYPE_P (func))
2259 func = TREE_TYPE (func);
2260 }
f19e3a64 2261
cb1119b7
RG
2262 /* Check that the return value locations are the same. Like
2263 if we are returning floats on the 80387 register stack, we cannot
4977bab6 2264 make a sibcall from a function that doesn't return a float to a
5fbf0217
EB
2265 function that does or, conversely, from a function that does return
2266 a float to a function that doesn't; the necessary stack adjustment
cb1119b7 2267 would not be executed. This is also the place we notice
cac32996
RG
2268 differences in the return value ABI. Note that it is ok for one
2269 of the functions to have void return type as long as the return
2270 value of the other is passed in a register. */
cb1119b7
RG
2271 a = ix86_function_value (TREE_TYPE (exp), func, false);
2272 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2273 cfun->decl, false);
5d3018ce
RH
2274 if (STACK_REG_P (a) || STACK_REG_P (b))
2275 {
2276 if (!rtx_equal_p (a, b))
2277 return false;
2278 }
2279 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2280 ;
2281 else if (!rtx_equal_p (a, b))
4977bab6
ZW
2282 return false;
2283
2284 /* If this call is indirect, we'll need to be able to use a call-clobbered
b96a374d 2285 register for the address of the target function. Make sure that all
4977bab6
ZW
2286 such registers are not used for passing parameters. */
2287 if (!decl && !TARGET_64BIT)
2288 {
e767b5be 2289 tree type;
4977bab6
ZW
2290
2291 /* We're looking at the CALL_EXPR, we need the type of the function. */
2292 type = TREE_OPERAND (exp, 0); /* pointer expression */
2293 type = TREE_TYPE (type); /* pointer type */
2294 type = TREE_TYPE (type); /* function type */
2295
e767b5be 2296 if (ix86_function_regparm (type, NULL) >= 3)
4977bab6
ZW
2297 {
2298 /* ??? Need to count the actual number of registers to be used,
2299 not the possible number of registers. Fix later. */
2300 return false;
2301 }
2302 }
2303
6cc37e7e
DS
2304#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2305 /* Dllimport'd functions are also called indirectly. */
43d9ad1d 2306 if (decl && DECL_DLLIMPORT_P (decl)
6cc37e7e
DS
2307 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2308 return false;
2309#endif
2310
150cdc9e
RH
2311 /* If we forced aligned the stack, then sibcalling would unalign the
2312 stack, which may break the called function. */
2313 if (cfun->machine->force_align_arg_pointer)
2314 return false;
2315
4977bab6
ZW
2316 /* Otherwise okay. That also includes certain types of indirect calls. */
2317 return true;
2318}
2319
2f84b963
RG
2320/* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2321 calling convention attributes;
91d231cb 2322 arguments as in struct attribute_spec.handler. */
b08de47e 2323
91d231cb 2324static tree
2f84b963
RG
2325ix86_handle_cconv_attribute (tree *node, tree name,
2326 tree args,
2327 int flags ATTRIBUTE_UNUSED,
2328 bool *no_add_attrs)
91d231cb
JM
2329{
2330 if (TREE_CODE (*node) != FUNCTION_TYPE
2331 && TREE_CODE (*node) != METHOD_TYPE
2332 && TREE_CODE (*node) != FIELD_DECL
2333 && TREE_CODE (*node) != TYPE_DECL)
2334 {
5c498b10 2335 warning (OPT_Wattributes, "%qs attribute only applies to functions",
91d231cb
JM
2336 IDENTIFIER_POINTER (name));
2337 *no_add_attrs = true;
2f84b963 2338 return NULL_TREE;
91d231cb 2339 }
2f84b963
RG
2340
2341 /* Can combine regparm with all attributes but fastcall. */
2342 if (is_attribute_p ("regparm", name))
91d231cb
JM
2343 {
2344 tree cst;
b08de47e 2345
2f84b963
RG
2346 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2347 {
2348 error ("fastcall and regparm attributes are not compatible");
2349 }
2350
91d231cb
JM
2351 cst = TREE_VALUE (args);
2352 if (TREE_CODE (cst) != INTEGER_CST)
2353 {
5c498b10
DD
2354 warning (OPT_Wattributes,
2355 "%qs attribute requires an integer constant argument",
91d231cb
JM
2356 IDENTIFIER_POINTER (name));
2357 *no_add_attrs = true;
2358 }
2359 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2360 {
5c498b10 2361 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
91d231cb
JM
2362 IDENTIFIER_POINTER (name), REGPARM_MAX);
2363 *no_add_attrs = true;
2364 }
e91f04de 2365
2f84b963
RG
2366 return NULL_TREE;
2367 }
2368
2369 if (TARGET_64BIT)
2370 {
2371 warning (OPT_Wattributes, "%qs attribute ignored",
2372 IDENTIFIER_POINTER (name));
2373 *no_add_attrs = true;
2374 return NULL_TREE;
2375 }
2376
2377 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2378 if (is_attribute_p ("fastcall", name))
2379 {
2380 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2381 {
2382 error ("fastcall and cdecl attributes are not compatible");
2383 }
2384 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2385 {
2386 error ("fastcall and stdcall attributes are not compatible");
2387 }
2388 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2389 {
e767b5be
JH
2390 error ("fastcall and regparm attributes are not compatible");
2391 }
b08de47e
MM
2392 }
2393
2f84b963
RG
2394 /* Can combine stdcall with fastcall (redundant), regparm and
2395 sseregparm. */
2396 else if (is_attribute_p ("stdcall", name))
2397 {
2398 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2399 {
2400 error ("stdcall and cdecl attributes are not compatible");
2401 }
2402 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2403 {
2404 error ("stdcall and fastcall attributes are not compatible");
2405 }
2406 }
2407
2408 /* Can combine cdecl with regparm and sseregparm. */
2409 else if (is_attribute_p ("cdecl", name))
2410 {
2411 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2412 {
2413 error ("stdcall and cdecl attributes are not compatible");
2414 }
2415 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2416 {
2417 error ("fastcall and cdecl attributes are not compatible");
2418 }
2419 }
2420
2421 /* Can combine sseregparm with all attributes. */
2422
91d231cb 2423 return NULL_TREE;
b08de47e
MM
2424}
2425
2426/* Return 0 if the attributes for two types are incompatible, 1 if they
2427 are compatible, and 2 if they are nearly compatible (which causes a
2428 warning to be generated). */
2429
8d8e52be 2430static int
b96a374d 2431ix86_comp_type_attributes (tree type1, tree type2)
b08de47e 2432{
0f290768 2433 /* Check for mismatch of non-default calling convention. */
27c38fbe 2434 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
afcfe58c
MM
2435
2436 if (TREE_CODE (type1) != FUNCTION_TYPE)
2437 return 1;
2438
2f84b963
RG
2439 /* Check for mismatched fastcall/regparm types. */
2440 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2441 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2442 || (ix86_function_regparm (type1, NULL)
2443 != ix86_function_regparm (type2, NULL)))
2444 return 0;
2445
2446 /* Check for mismatched sseregparm types. */
2447 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2448 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
b96a374d 2449 return 0;
e91f04de 2450
afcfe58c 2451 /* Check for mismatched return types (cdecl vs stdcall). */
6093f019
RH
2452 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2453 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
265d94ac 2454 return 0;
2f84b963 2455
b08de47e
MM
2456 return 1;
2457}
b08de47e 2458\f
0fa2e4df 2459/* Return the regparm value for a function with the indicated TYPE and DECL.
e767b5be 2460 DECL may be NULL when calling function indirectly
839a4992 2461 or considering a libcall. */
483ab821
MM
2462
2463static int
e767b5be 2464ix86_function_regparm (tree type, tree decl)
483ab821
MM
2465{
2466 tree attr;
e767b5be
JH
2467 int regparm = ix86_regparm;
2468 bool user_convention = false;
483ab821 2469
e767b5be
JH
2470 if (!TARGET_64BIT)
2471 {
2472 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2473 if (attr)
2474 {
2475 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2476 user_convention = true;
2477 }
2478
2479 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2480 {
2481 regparm = 2;
2482 user_convention = true;
2483 }
2484
2485 /* Use register calling convention for local functions when possible. */
2486 if (!TARGET_64BIT && !user_convention && decl
cb0bc263 2487 && flag_unit_at_a_time && !profile_flag)
e767b5be
JH
2488 {
2489 struct cgraph_local_info *i = cgraph_local_info (decl);
2490 if (i && i->local)
2491 {
73109af7
JJ
2492 int local_regparm, globals = 0, regno;
2493
2494 /* Make sure no regparm register is taken by a global register
2495 variable. */
2496 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2497 if (global_regs[local_regparm])
2498 break;
e767b5be
JH
2499 /* We can't use regparm(3) for nested functions as these use
2500 static chain pointer in third argument. */
73109af7 2501 if (local_regparm == 3
ad90e28f
DN
2502 && decl_function_context (decl)
2503 && !DECL_NO_STATIC_CHAIN (decl))
73109af7
JJ
2504 local_regparm = 2;
2505 /* Each global register variable increases register preassure,
2506 so the more global reg vars there are, the smaller regparm
2507 optimization use, unless requested by the user explicitly. */
2508 for (regno = 0; regno < 6; regno++)
2509 if (global_regs[regno])
2510 globals++;
2511 local_regparm
2512 = globals < local_regparm ? local_regparm - globals : 0;
2513
2514 if (local_regparm > regparm)
2515 regparm = local_regparm;
e767b5be
JH
2516 }
2517 }
2518 }
2519 return regparm;
483ab821
MM
2520}
2521
2f84b963
RG
2522/* Return 1 or 2, if we can pass up to 8 SFmode (1) and DFmode (2) arguments
2523 in SSE registers for a function with the indicated TYPE and DECL.
2524 DECL may be NULL when calling function indirectly
2525 or considering a libcall. Otherwise return 0. */
2526
2527static int
2528ix86_function_sseregparm (tree type, tree decl)
2529{
2530 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2531 by the sseregparm attribute. */
1f97667f
RG
2532 if (TARGET_SSEREGPARM
2533 || (type
2534 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2f84b963
RG
2535 {
2536 if (!TARGET_SSE)
2537 {
2538 if (decl)
2539 error ("Calling %qD with attribute sseregparm without "
2540 "SSE/SSE2 enabled", decl);
2541 else
2542 error ("Calling %qT with attribute sseregparm without "
2543 "SSE/SSE2 enabled", type);
2544 return 0;
2545 }
2546
2547 return 2;
2548 }
2549
2550 /* For local functions, pass SFmode (and DFmode for SSE2) arguments
2551 in SSE registers even for 32-bit mode and not just 3, but up to
2552 8 SSE arguments in registers. */
2553 if (!TARGET_64BIT && decl
2554 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2555 {
2556 struct cgraph_local_info *i = cgraph_local_info (decl);
2557 if (i && i->local)
2558 return TARGET_SSE2 ? 2 : 1;
2559 }
2560
2561 return 0;
2562}
2563
f676971a 2564/* Return true if EAX is live at the start of the function. Used by
fe9f516f
RH
2565 ix86_expand_prologue to determine if we need special help before
2566 calling allocate_stack_worker. */
2567
2568static bool
2569ix86_eax_live_at_start_p (void)
2570{
2571 /* Cheat. Don't bother working forward from ix86_function_regparm
2572 to the function type to whether an actual argument is located in
2573 eax. Instead just look at cfg info, which is still close enough
2574 to correct at this point. This gives false positives for broken
2575 functions that might use uninitialized data that happens to be
2576 allocated in eax, but who cares? */
5e2d947c 2577 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
fe9f516f
RH
2578}
2579
b08de47e
MM
2580/* Value is the number of bytes of arguments automatically
2581 popped when returning from a subroutine call.
2582 FUNDECL is the declaration node of the function (as a tree),
2583 FUNTYPE is the data type of the function (as a tree),
2584 or for a library call it is an identifier node for the subroutine name.
2585 SIZE is the number of bytes of arguments passed on the stack.
2586
2587 On the 80386, the RTD insn may be used to pop them if the number
2588 of args is fixed, but if the number is variable then the caller
2589 must pop them all. RTD can't be used for library calls now
2590 because the library is compiled with the Unix compiler.
2591 Use of RTD is a selectable option, since it is incompatible with
2592 standard Unix calling sequences. If the option is not selected,
2593 the caller must always pop the args.
2594
2595 The attribute stdcall is equivalent to RTD on a per module basis. */
2596
2597int
b96a374d 2598ix86_return_pops_args (tree fundecl, tree funtype, int size)
79325812 2599{
3345ee7d 2600 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
b08de47e 2601
43f3a59d 2602 /* Cdecl functions override -mrtd, and never pop the stack. */
e9a25f70 2603 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
79325812 2604
43f3a59d
KH
2605 /* Stdcall and fastcall functions will pop the stack if not
2606 variable args. */
e91f04de
CH
2607 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2608 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
698cdd84 2609 rtd = 1;
79325812 2610
698cdd84
SC
2611 if (rtd
2612 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
e9a25f70
JL
2613 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2614 == void_type_node)))
698cdd84
SC
2615 return size;
2616 }
79325812 2617
232b8f52 2618 /* Lose any fake structure return argument if it is passed on the stack. */
61f71b34 2619 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
61fec9ff
JB
2620 && !TARGET_64BIT
2621 && !KEEP_AGGREGATE_RETURN_POINTER)
232b8f52 2622 {
e767b5be 2623 int nregs = ix86_function_regparm (funtype, fundecl);
232b8f52
JJ
2624
2625 if (!nregs)
2626 return GET_MODE_SIZE (Pmode);
2627 }
2628
2629 return 0;
b08de47e 2630}
b08de47e
MM
2631\f
2632/* Argument support functions. */
2633
53c17031
JH
2634/* Return true when register may be used to pass function parameters. */
2635bool
b96a374d 2636ix86_function_arg_regno_p (int regno)
53c17031
JH
2637{
2638 int i;
2639 if (!TARGET_64BIT)
0333394e 2640 return (regno < REGPARM_MAX
aa941a60
UB
2641 || (TARGET_MMX && MMX_REGNO_P (regno)
2642 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2643 || (TARGET_SSE && SSE_REGNO_P (regno)
2644 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2645
2646 if (TARGET_SSE && SSE_REGNO_P (regno)
2647 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
53c17031
JH
2648 return true;
2649 /* RAX is used as hidden argument to va_arg functions. */
2650 if (!regno)
2651 return true;
2652 for (i = 0; i < REGPARM_MAX; i++)
2653 if (regno == x86_64_int_parameter_registers[i])
2654 return true;
2655 return false;
2656}
2657
fe984136
RH
2658/* Return if we do not know how to pass TYPE solely in registers. */
2659
2660static bool
2661ix86_must_pass_in_stack (enum machine_mode mode, tree type)
2662{
2663 if (must_pass_in_stack_var_size_or_pad (mode, type))
2664 return true;
dcbca208
RH
2665
2666 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2667 The layout_type routine is crafty and tries to trick us into passing
2668 currently unsupported vector types on the stack by using TImode. */
2669 return (!TARGET_64BIT && mode == TImode
2670 && type && TREE_CODE (type) != VECTOR_TYPE);
fe984136
RH
2671}
2672
b08de47e
MM
2673/* Initialize a variable CUM of type CUMULATIVE_ARGS
2674 for a call to a function whose data type is FNTYPE.
2675 For a library call, FNTYPE is 0. */
2676
2677void
b96a374d
AJ
2678init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
2679 tree fntype, /* tree ptr for function decl */
2680 rtx libname, /* SYMBOL_REF of library name or 0 */
2681 tree fndecl)
b08de47e
MM
2682{
2683 static CUMULATIVE_ARGS zero_cum;
2684 tree param, next_param;
2685
2686 if (TARGET_DEBUG_ARG)
2687 {
2688 fprintf (stderr, "\ninit_cumulative_args (");
2689 if (fntype)
e9a25f70
JL
2690 fprintf (stderr, "fntype code = %s, ret code = %s",
2691 tree_code_name[(int) TREE_CODE (fntype)],
2692 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
b08de47e
MM
2693 else
2694 fprintf (stderr, "no fntype");
2695
2696 if (libname)
2697 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
2698 }
2699
2700 *cum = zero_cum;
2701
2702 /* Set up the number of registers to use for passing arguments. */
2f84b963 2703 cum->nregs = ix86_regparm;
78fbfc4b
JB
2704 if (TARGET_SSE)
2705 cum->sse_nregs = SSE_REGPARM_MAX;
2706 if (TARGET_MMX)
2707 cum->mmx_nregs = MMX_REGPARM_MAX;
e1be55d0
JH
2708 cum->warn_sse = true;
2709 cum->warn_mmx = true;
53c17031 2710 cum->maybe_vaarg = false;
b08de47e 2711
2f84b963
RG
2712 /* Use ecx and edx registers if function has fastcall attribute,
2713 else look for regparm information. */
e91f04de
CH
2714 if (fntype && !TARGET_64BIT)
2715 {
2716 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2717 {
2718 cum->nregs = 2;
2719 cum->fastcall = 1;
2720 }
2f84b963
RG
2721 else
2722 cum->nregs = ix86_function_regparm (fntype, fndecl);
e91f04de
CH
2723 }
2724
2f84b963
RG
2725 /* Set up the number of SSE registers used for passing SFmode
2726 and DFmode arguments. Warn for mismatching ABI. */
2727 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
2728
b08de47e
MM
2729 /* Determine if this function has variable arguments. This is
2730 indicated by the last argument being 'void_type_mode' if there
2731 are no variable arguments. If there are variable arguments, then
78fbfc4b 2732 we won't pass anything in registers in 32-bit mode. */
b08de47e 2733
78fbfc4b 2734 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
b08de47e
MM
2735 {
2736 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
e9a25f70 2737 param != 0; param = next_param)
b08de47e
MM
2738 {
2739 next_param = TREE_CHAIN (param);
e9a25f70 2740 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
53c17031
JH
2741 {
2742 if (!TARGET_64BIT)
e91f04de
CH
2743 {
2744 cum->nregs = 0;
e1be55d0
JH
2745 cum->sse_nregs = 0;
2746 cum->mmx_nregs = 0;
2747 cum->warn_sse = 0;
2748 cum->warn_mmx = 0;
e91f04de 2749 cum->fastcall = 0;
2f84b963 2750 cum->float_in_sse = 0;
e91f04de 2751 }
53c17031
JH
2752 cum->maybe_vaarg = true;
2753 }
b08de47e
MM
2754 }
2755 }
53c17031
JH
2756 if ((!fntype && !libname)
2757 || (fntype && !TYPE_ARG_TYPES (fntype)))
f19e3a64
JJ
2758 cum->maybe_vaarg = true;
2759
b08de47e
MM
2760 if (TARGET_DEBUG_ARG)
2761 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2762
2763 return;
2764}
2765
6c4ccfd8
RH
2766/* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2767 But in the case of vector types, it is some vector mode.
2768
2769 When we have only some of our vector isa extensions enabled, then there
2770 are some modes for which vector_mode_supported_p is false. For these
2771 modes, the generic vector support in gcc will choose some non-vector mode
2772 in order to implement the type. By computing the natural mode, we'll
2773 select the proper ABI location for the operand and not depend on whatever
2774 the middle-end decides to do with these vector types. */
2775
2776static enum machine_mode
2777type_natural_mode (tree type)
2778{
2779 enum machine_mode mode = TYPE_MODE (type);
2780
2781 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2782 {
2783 HOST_WIDE_INT size = int_size_in_bytes (type);
2784 if ((size == 8 || size == 16)
2785 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2786 && TYPE_VECTOR_SUBPARTS (type) > 1)
2787 {
2788 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2789
2790 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2791 mode = MIN_MODE_VECTOR_FLOAT;
2792 else
2793 mode = MIN_MODE_VECTOR_INT;
2794
2795 /* Get the mode which has this inner mode and number of units. */
2796 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2797 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2798 && GET_MODE_INNER (mode) == innermode)
2799 return mode;
2800
d0396b79 2801 gcc_unreachable ();
6c4ccfd8
RH
2802 }
2803 }
2804
2805 return mode;
2806}
2807
2808/* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2809 this may not agree with the mode that the type system has chosen for the
2810 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2811 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2812
2813static rtx
2814gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2815 unsigned int regno)
2816{
2817 rtx tmp;
2818
2819 if (orig_mode != BLKmode)
2820 tmp = gen_rtx_REG (orig_mode, regno);
2821 else
2822 {
2823 tmp = gen_rtx_REG (mode, regno);
2824 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2825 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2826 }
2827
2828 return tmp;
2829}
2830
d1f87653 2831/* x86-64 register passing implementation. See x86-64 ABI for details. Goal
f710504c 2832 of this code is to classify each 8bytes of incoming argument by the register
53c17031
JH
2833 class and assign registers accordingly. */
2834
2835/* Return the union class of CLASS1 and CLASS2.
2836 See the x86-64 PS ABI for details. */
2837
2838static enum x86_64_reg_class
b96a374d 2839merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
53c17031
JH
2840{
2841 /* Rule #1: If both classes are equal, this is the resulting class. */
2842 if (class1 == class2)
2843 return class1;
2844
2845 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2846 the other class. */
2847 if (class1 == X86_64_NO_CLASS)
2848 return class2;
2849 if (class2 == X86_64_NO_CLASS)
2850 return class1;
2851
2852 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2853 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2854 return X86_64_MEMORY_CLASS;
2855
2856 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2857 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2858 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2859 return X86_64_INTEGERSI_CLASS;
2860 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2861 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2862 return X86_64_INTEGER_CLASS;
2863
499accd7
JB
2864 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2865 MEMORY is used. */
2866 if (class1 == X86_64_X87_CLASS
2867 || class1 == X86_64_X87UP_CLASS
2868 || class1 == X86_64_COMPLEX_X87_CLASS
2869 || class2 == X86_64_X87_CLASS
2870 || class2 == X86_64_X87UP_CLASS
2871 || class2 == X86_64_COMPLEX_X87_CLASS)
53c17031
JH
2872 return X86_64_MEMORY_CLASS;
2873
2874 /* Rule #6: Otherwise class SSE is used. */
2875 return X86_64_SSE_CLASS;
2876}
2877
2878/* Classify the argument of type TYPE and mode MODE.
2879 CLASSES will be filled by the register class used to pass each word
2880 of the operand. The number of words is returned. In case the parameter
2881 should be passed in memory, 0 is returned. As a special case for zero
2882 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2883
2884 BIT_OFFSET is used internally for handling records and specifies offset
2885 of the offset in bits modulo 256 to avoid overflow cases.
2886
2887 See the x86-64 PS ABI for details.
2888*/
2889
2890static int
b96a374d
AJ
2891classify_argument (enum machine_mode mode, tree type,
2892 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
53c17031 2893{
296e4ae8 2894 HOST_WIDE_INT bytes =
53c17031 2895 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
23327dae 2896 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
53c17031 2897
c60ee6f5
JH
2898 /* Variable sized entities are always passed/returned in memory. */
2899 if (bytes < 0)
2900 return 0;
2901
dafc5b82 2902 if (mode != VOIDmode
fe984136 2903 && targetm.calls.must_pass_in_stack (mode, type))
dafc5b82
JH
2904 return 0;
2905
53c17031
JH
2906 if (type && AGGREGATE_TYPE_P (type))
2907 {
2908 int i;
2909 tree field;
2910 enum x86_64_reg_class subclasses[MAX_CLASSES];
2911
2912 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2913 if (bytes > 16)
2914 return 0;
2915
2916 for (i = 0; i < words; i++)
2917 classes[i] = X86_64_NO_CLASS;
2918
2919 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2920 signalize memory class, so handle it as special case. */
2921 if (!words)
2922 {
2923 classes[0] = X86_64_NO_CLASS;
2924 return 1;
2925 }
2926
2927 /* Classify each field of record and merge classes. */
d0396b79 2928 switch (TREE_CODE (type))
53c17031 2929 {
d0396b79 2930 case RECORD_TYPE:
91ea38f9 2931 /* For classes first merge in the field of the subclasses. */
fa743e8c 2932 if (TYPE_BINFO (type))
91ea38f9 2933 {
fa743e8c 2934 tree binfo, base_binfo;
e8112eac 2935 int basenum;
91ea38f9 2936
e8112eac
ZK
2937 for (binfo = TYPE_BINFO (type), basenum = 0;
2938 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
91ea38f9 2939 {
91ea38f9 2940 int num;
fa743e8c
NS
2941 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2942 tree type = BINFO_TYPE (base_binfo);
91ea38f9
JH
2943
2944 num = classify_argument (TYPE_MODE (type),
2945 type, subclasses,
2946 (offset + bit_offset) % 256);
2947 if (!num)
2948 return 0;
2949 for (i = 0; i < num; i++)
2950 {
db01f480 2951 int pos = (offset + (bit_offset % 64)) / 8 / 8;
91ea38f9
JH
2952 classes[i + pos] =
2953 merge_classes (subclasses[i], classes[i + pos]);
2954 }
2955 }
2956 }
43f3a59d 2957 /* And now merge the fields of structure. */
53c17031
JH
2958 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2959 {
2960 if (TREE_CODE (field) == FIELD_DECL)
2961 {
2962 int num;
2963
2964 /* Bitfields are always classified as integer. Handle them
2965 early, since later code would consider them to be
2966 misaligned integers. */
2967 if (DECL_BIT_FIELD (field))
2968 {
9286af97
JH
2969 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2970 i < ((int_bit_position (field) + (bit_offset % 64))
53c17031 2971 + tree_low_cst (DECL_SIZE (field), 0)
b96a374d 2972 + 63) / 8 / 8; i++)
53c17031
JH
2973 classes[i] =
2974 merge_classes (X86_64_INTEGER_CLASS,
2975 classes[i]);
2976 }
2977 else
2978 {
2979 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2980 TREE_TYPE (field), subclasses,
2981 (int_bit_position (field)
2982 + bit_offset) % 256);
2983 if (!num)
2984 return 0;
2985 for (i = 0; i < num; i++)
2986 {
2987 int pos =
db01f480 2988 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
53c17031
JH
2989 classes[i + pos] =
2990 merge_classes (subclasses[i], classes[i + pos]);
2991 }
2992 }
2993 }
2994 }
d0396b79 2995 break;
91ea38f9 2996
d0396b79
NS
2997 case ARRAY_TYPE:
2998 /* Arrays are handled as small records. */
2999 {
3000 int num;
3001 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3002 TREE_TYPE (type), subclasses, bit_offset);
3003 if (!num)
3004 return 0;
91ea38f9 3005
d0396b79
NS
3006 /* The partial classes are now full classes. */
3007 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3008 subclasses[0] = X86_64_SSE_CLASS;
3009 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3010 subclasses[0] = X86_64_INTEGER_CLASS;
3011
3012 for (i = 0; i < words; i++)
3013 classes[i] = subclasses[i % num];
3014
3015 break;
3016 }
3017 case UNION_TYPE:
3018 case QUAL_UNION_TYPE:
3019 /* Unions are similar to RECORD_TYPE but offset is always 0.
3020 */
3021
3022 /* Unions are not derived. */
3023 gcc_assert (!TYPE_BINFO (type)
3024 || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type)));
53c17031
JH
3025 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3026 {
3027 if (TREE_CODE (field) == FIELD_DECL)
3028 {
3029 int num;
3030 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3031 TREE_TYPE (field), subclasses,
3032 bit_offset);
3033 if (!num)
3034 return 0;
3035 for (i = 0; i < num; i++)
3036 classes[i] = merge_classes (subclasses[i], classes[i]);
3037 }
3038 }
d0396b79
NS
3039 break;
3040
3041 default:
3042 gcc_unreachable ();
53c17031 3043 }
53c17031
JH
3044
3045 /* Final merger cleanup. */
3046 for (i = 0; i < words; i++)
3047 {
3048 /* If one class is MEMORY, everything should be passed in
3049 memory. */
3050 if (classes[i] == X86_64_MEMORY_CLASS)
3051 return 0;
3052
d6a7951f 3053 /* The X86_64_SSEUP_CLASS should be always preceded by
53c17031
JH
3054 X86_64_SSE_CLASS. */
3055 if (classes[i] == X86_64_SSEUP_CLASS
3056 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3057 classes[i] = X86_64_SSE_CLASS;
3058
d6a7951f 3059 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
53c17031
JH
3060 if (classes[i] == X86_64_X87UP_CLASS
3061 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3062 classes[i] = X86_64_SSE_CLASS;
3063 }
3064 return words;
3065 }
3066
3067 /* Compute alignment needed. We align all types to natural boundaries with
3068 exception of XFmode that is aligned to 64bits. */
3069 if (mode != VOIDmode && mode != BLKmode)
3070 {
3071 int mode_alignment = GET_MODE_BITSIZE (mode);
3072
3073 if (mode == XFmode)
3074 mode_alignment = 128;
3075 else if (mode == XCmode)
3076 mode_alignment = 256;
2c6b27c3
JH
3077 if (COMPLEX_MODE_P (mode))
3078 mode_alignment /= 2;
f5143c46 3079 /* Misaligned fields are always returned in memory. */
53c17031
JH
3080 if (bit_offset % mode_alignment)
3081 return 0;
3082 }
3083
9e9fb0ce
JB
3084 /* for V1xx modes, just use the base mode */
3085 if (VECTOR_MODE_P (mode)
3086 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3087 mode = GET_MODE_INNER (mode);
3088
53c17031
JH
3089 /* Classification of atomic types. */
3090 switch (mode)
3091 {
a81083b2
BE
3092 case SDmode:
3093 case DDmode:
3094 classes[0] = X86_64_SSE_CLASS;
3095 return 1;
3096 case TDmode:
3097 classes[0] = X86_64_SSE_CLASS;
3098 classes[1] = X86_64_SSEUP_CLASS;
3099 return 2;
53c17031
JH
3100 case DImode:
3101 case SImode:
3102 case HImode:
3103 case QImode:
3104 case CSImode:
3105 case CHImode:
3106 case CQImode:
3107 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3108 classes[0] = X86_64_INTEGERSI_CLASS;
3109 else
3110 classes[0] = X86_64_INTEGER_CLASS;
3111 return 1;
3112 case CDImode:
3113 case TImode:
3114 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3115 return 2;
3116 case CTImode:
9e9fb0ce 3117 return 0;
53c17031
JH
3118 case SFmode:
3119 if (!(bit_offset % 64))
3120 classes[0] = X86_64_SSESF_CLASS;
3121 else
3122 classes[0] = X86_64_SSE_CLASS;
3123 return 1;
3124 case DFmode:
3125 classes[0] = X86_64_SSEDF_CLASS;
3126 return 1;
f8a1ebc6 3127 case XFmode:
53c17031
JH
3128 classes[0] = X86_64_X87_CLASS;
3129 classes[1] = X86_64_X87UP_CLASS;
3130 return 2;
f8a1ebc6 3131 case TFmode:
9e9fb0ce
JB
3132 classes[0] = X86_64_SSE_CLASS;
3133 classes[1] = X86_64_SSEUP_CLASS;
53c17031
JH
3134 return 2;
3135 case SCmode:
3136 classes[0] = X86_64_SSE_CLASS;
3137 return 1;
9e9fb0ce
JB
3138 case DCmode:
3139 classes[0] = X86_64_SSEDF_CLASS;
3140 classes[1] = X86_64_SSEDF_CLASS;
3141 return 2;
3142 case XCmode:
499accd7
JB
3143 classes[0] = X86_64_COMPLEX_X87_CLASS;
3144 return 1;
9e9fb0ce 3145 case TCmode:
499accd7 3146 /* This modes is larger than 16 bytes. */
9e9fb0ce 3147 return 0;
e95d6b23
JH
3148 case V4SFmode:
3149 case V4SImode:
495333a6
JH
3150 case V16QImode:
3151 case V8HImode:
3152 case V2DFmode:
3153 case V2DImode:
e95d6b23
JH
3154 classes[0] = X86_64_SSE_CLASS;
3155 classes[1] = X86_64_SSEUP_CLASS;
3156 return 2;
3157 case V2SFmode:
3158 case V2SImode:
3159 case V4HImode:
3160 case V8QImode:
9e9fb0ce
JB
3161 classes[0] = X86_64_SSE_CLASS;
3162 return 1;
53c17031 3163 case BLKmode:
e95d6b23 3164 case VOIDmode:
53c17031
JH
3165 return 0;
3166 default:
d0396b79
NS
3167 gcc_assert (VECTOR_MODE_P (mode));
3168
3169 if (bytes > 16)
3170 return 0;
3171
3172 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3173
3174 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3175 classes[0] = X86_64_INTEGERSI_CLASS;
3176 else
3177 classes[0] = X86_64_INTEGER_CLASS;
3178 classes[1] = X86_64_INTEGER_CLASS;
3179 return 1 + (bytes > 8);
53c17031
JH
3180 }
3181}
3182
3183/* Examine the argument and return set number of register required in each
f5143c46 3184 class. Return 0 iff parameter should be passed in memory. */
53c17031 3185static int
b96a374d
AJ
3186examine_argument (enum machine_mode mode, tree type, int in_return,
3187 int *int_nregs, int *sse_nregs)
53c17031
JH
3188{
3189 enum x86_64_reg_class class[MAX_CLASSES];
3190 int n = classify_argument (mode, type, class, 0);
3191
3192 *int_nregs = 0;
3193 *sse_nregs = 0;
3194 if (!n)
3195 return 0;
3196 for (n--; n >= 0; n--)
3197 switch (class[n])
3198 {
3199 case X86_64_INTEGER_CLASS:
3200 case X86_64_INTEGERSI_CLASS:
3201 (*int_nregs)++;
3202 break;
3203 case X86_64_SSE_CLASS:
3204 case X86_64_SSESF_CLASS:
3205 case X86_64_SSEDF_CLASS:
3206 (*sse_nregs)++;
3207 break;
3208 case X86_64_NO_CLASS:
3209 case X86_64_SSEUP_CLASS:
3210 break;
3211 case X86_64_X87_CLASS:
3212 case X86_64_X87UP_CLASS:
3213 if (!in_return)
3214 return 0;
3215 break;
499accd7
JB
3216 case X86_64_COMPLEX_X87_CLASS:
3217 return in_return ? 2 : 0;
53c17031 3218 case X86_64_MEMORY_CLASS:
d0396b79 3219 gcc_unreachable ();
53c17031
JH
3220 }
3221 return 1;
3222}
6c4ccfd8 3223
53c17031
JH
3224/* Construct container for the argument used by GCC interface. See
3225 FUNCTION_ARG for the detailed description. */
6c4ccfd8 3226
53c17031 3227static rtx
6c4ccfd8
RH
3228construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3229 tree type, int in_return, int nintregs, int nsseregs,
3230 const int *intreg, int sse_regno)
53c17031
JH
3231{
3232 enum machine_mode tmpmode;
3233 int bytes =
3234 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3235 enum x86_64_reg_class class[MAX_CLASSES];
3236 int n;
3237 int i;
3238 int nexps = 0;
3239 int needed_sseregs, needed_intregs;
3240 rtx exp[MAX_CLASSES];
3241 rtx ret;
3242
3243 n = classify_argument (mode, type, class, 0);
3244 if (TARGET_DEBUG_ARG)
3245 {
3246 if (!n)
3247 fprintf (stderr, "Memory class\n");
3248 else
3249 {
3250 fprintf (stderr, "Classes:");
3251 for (i = 0; i < n; i++)
3252 {
3253 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3254 }
3255 fprintf (stderr, "\n");
3256 }
3257 }
3258 if (!n)
3259 return NULL;
6c4ccfd8
RH
3260 if (!examine_argument (mode, type, in_return, &needed_intregs,
3261 &needed_sseregs))
53c17031
JH
3262 return NULL;
3263 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3264 return NULL;
3265
a5370cf0
RH
3266 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3267 some less clueful developer tries to use floating-point anyway. */
3268 if (needed_sseregs && !TARGET_SSE)
3269 {
3270 static bool issued_error;
3271 if (!issued_error)
3272 {
3273 issued_error = true;
3274 if (in_return)
3275 error ("SSE register return with SSE disabled");
3276 else
3277 error ("SSE register argument with SSE disabled");
3278 }
3279 return NULL;
3280 }
3281
53c17031
JH
3282 /* First construct simple cases. Avoid SCmode, since we want to use
3283 single register to pass this type. */
3284 if (n == 1 && mode != SCmode)
3285 switch (class[0])
3286 {
3287 case X86_64_INTEGER_CLASS:
3288 case X86_64_INTEGERSI_CLASS:
3289 return gen_rtx_REG (mode, intreg[0]);
3290 case X86_64_SSE_CLASS:
3291 case X86_64_SSESF_CLASS:
3292 case X86_64_SSEDF_CLASS:
6c4ccfd8 3293 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
53c17031 3294 case X86_64_X87_CLASS:
499accd7 3295 case X86_64_COMPLEX_X87_CLASS:
53c17031
JH
3296 return gen_rtx_REG (mode, FIRST_STACK_REG);
3297 case X86_64_NO_CLASS:
3298 /* Zero sized array, struct or class. */
3299 return NULL;
3300 default:
d0396b79 3301 gcc_unreachable ();
53c17031 3302 }
2c6b27c3
JH
3303 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3304 && mode != BLKmode)
e95d6b23 3305 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
53c17031
JH
3306 if (n == 2
3307 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
f8a1ebc6 3308 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
53c17031
JH
3309 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3310 && class[1] == X86_64_INTEGER_CLASS
f8a1ebc6 3311 && (mode == CDImode || mode == TImode || mode == TFmode)
53c17031
JH
3312 && intreg[0] + 1 == intreg[1])
3313 return gen_rtx_REG (mode, intreg[0]);
53c17031
JH
3314
3315 /* Otherwise figure out the entries of the PARALLEL. */
3316 for (i = 0; i < n; i++)
3317 {
3318 switch (class[i])
3319 {
3320 case X86_64_NO_CLASS:
3321 break;
3322 case X86_64_INTEGER_CLASS:
3323 case X86_64_INTEGERSI_CLASS:
d1f87653 3324 /* Merge TImodes on aligned occasions here too. */
53c17031
JH
3325 if (i * 8 + 8 > bytes)
3326 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3327 else if (class[i] == X86_64_INTEGERSI_CLASS)
3328 tmpmode = SImode;
3329 else
3330 tmpmode = DImode;
3331 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3332 if (tmpmode == BLKmode)
3333 tmpmode = DImode;
3334 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3335 gen_rtx_REG (tmpmode, *intreg),
3336 GEN_INT (i*8));
3337 intreg++;
3338 break;
3339 case X86_64_SSESF_CLASS:
3340 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3341 gen_rtx_REG (SFmode,
3342 SSE_REGNO (sse_regno)),
3343 GEN_INT (i*8));
3344 sse_regno++;
3345 break;
3346 case X86_64_SSEDF_CLASS:
3347 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3348 gen_rtx_REG (DFmode,
3349 SSE_REGNO (sse_regno)),
3350 GEN_INT (i*8));
3351 sse_regno++;
3352 break;
3353 case X86_64_SSE_CLASS:
12f5c45e
JH
3354 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3355 tmpmode = TImode;
53c17031
JH
3356 else
3357 tmpmode = DImode;
3358 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3359 gen_rtx_REG (tmpmode,
3360 SSE_REGNO (sse_regno)),
3361 GEN_INT (i*8));
12f5c45e
JH
3362 if (tmpmode == TImode)
3363 i++;
53c17031
JH
3364 sse_regno++;
3365 break;
3366 default:
d0396b79 3367 gcc_unreachable ();
53c17031
JH
3368 }
3369 }
1b803355
JJ
3370
3371 /* Empty aligned struct, union or class. */
3372 if (nexps == 0)
3373 return NULL;
3374
53c17031
JH
3375 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3376 for (i = 0; i < nexps; i++)
3377 XVECEXP (ret, 0, i) = exp [i];
3378 return ret;
3379}
3380
b08de47e
MM
3381/* Update the data in CUM to advance over an argument
3382 of mode MODE and data type TYPE.
3383 (TYPE is null for libcalls where that information may not be available.) */
3384
3385void
6c4ccfd8
RH
3386function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3387 tree type, int named)
b08de47e 3388{
5ac9118e
KG
3389 int bytes =
3390 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
3391 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3392
b3a1ca49
RH
3393 if (type)
3394 mode = type_natural_mode (type);
3395
b08de47e 3396 if (TARGET_DEBUG_ARG)
6c4ccfd8
RH
3397 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3398 "mode=%s, named=%d)\n\n",
3399 words, cum->words, cum->nregs, cum->sse_nregs,
3400 GET_MODE_NAME (mode), named);
b3a1ca49 3401
53c17031 3402 if (TARGET_64BIT)
b08de47e 3403 {
53c17031
JH
3404 int int_nregs, sse_nregs;
3405 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3406 cum->words += words;
3407 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
82a127a9 3408 {
53c17031
JH
3409 cum->nregs -= int_nregs;
3410 cum->sse_nregs -= sse_nregs;
3411 cum->regno += int_nregs;
3412 cum->sse_regno += sse_nregs;
82a127a9 3413 }
53c17031
JH
3414 else
3415 cum->words += words;
b08de47e 3416 }
a4f31c00 3417 else
82a127a9 3418 {
b3a1ca49 3419 switch (mode)
82a127a9 3420 {
b3a1ca49
RH
3421 default:
3422 break;
3423
3424 case BLKmode:
3425 if (bytes < 0)
3426 break;
3427 /* FALLTHRU */
3428
3429 case DImode:
3430 case SImode:
3431 case HImode:
3432 case QImode:
53c17031
JH
3433 cum->words += words;
3434 cum->nregs -= words;
3435 cum->regno += words;
3436
3437 if (cum->nregs <= 0)
3438 {
3439 cum->nregs = 0;
3440 cum->regno = 0;
3441 }
b3a1ca49
RH
3442 break;
3443
f19e3a64 3444 case DFmode:
2f84b963 3445 if (cum->float_in_sse < 2)
f19e3a64
JJ
3446 break;
3447 case SFmode:
2f84b963 3448 if (cum->float_in_sse < 1)
f19e3a64
JJ
3449 break;
3450 /* FALLTHRU */
3451
b3a1ca49
RH
3452 case TImode:
3453 case V16QImode:
3454 case V8HImode:
3455 case V4SImode:
3456 case V2DImode:
3457 case V4SFmode:
3458 case V2DFmode:
3459 if (!type || !AGGREGATE_TYPE_P (type))
3460 {
3461 cum->sse_words += words;
3462 cum->sse_nregs -= 1;
3463 cum->sse_regno += 1;
3464 if (cum->sse_nregs <= 0)
3465 {
3466 cum->sse_nregs = 0;
3467 cum->sse_regno = 0;
3468 }
3469 }
3470 break;
3471
3472 case V8QImode:
3473 case V4HImode:
3474 case V2SImode:
3475 case V2SFmode:
3476 if (!type || !AGGREGATE_TYPE_P (type))
3477 {
3478 cum->mmx_words += words;
3479 cum->mmx_nregs -= 1;
3480 cum->mmx_regno += 1;
3481 if (cum->mmx_nregs <= 0)
3482 {
3483 cum->mmx_nregs = 0;
3484 cum->mmx_regno = 0;
3485 }
3486 }
3487 break;
82a127a9
CM
3488 }
3489 }
b08de47e
MM
3490}
3491
3492/* Define where to put the arguments to a function.
3493 Value is zero to push the argument on the stack,
3494 or a hard register in which to store the argument.
3495
3496 MODE is the argument's machine mode.
3497 TYPE is the data type of the argument (as a tree).
3498 This is null for libcalls where that information may
3499 not be available.
3500 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3501 the preceding args and about the function being called.
3502 NAMED is nonzero if this argument is a named parameter
3503 (otherwise it is an extra parameter matching an ellipsis). */
3504
07933f72 3505rtx
dcbca208
RH
3506function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3507 tree type, int named)
b08de47e 3508{
dcbca208
RH
3509 enum machine_mode mode = orig_mode;
3510 rtx ret = NULL_RTX;
5ac9118e
KG
3511 int bytes =
3512 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e 3513 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
bcf17554 3514 static bool warnedsse, warnedmmx;
b08de47e 3515
90d5887b
PB
3516 /* To simplify the code below, represent vector types with a vector mode
3517 even if MMX/SSE are not active. */
6c4ccfd8
RH
3518 if (type && TREE_CODE (type) == VECTOR_TYPE)
3519 mode = type_natural_mode (type);
90d5887b 3520
5bdc5878 3521 /* Handle a hidden AL argument containing number of registers for varargs
53c17031
JH
3522 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
3523 any AL settings. */
32ee7d1d 3524 if (mode == VOIDmode)
b08de47e 3525 {
53c17031
JH
3526 if (TARGET_64BIT)
3527 return GEN_INT (cum->maybe_vaarg
3528 ? (cum->sse_nregs < 0
3529 ? SSE_REGPARM_MAX
3530 : cum->sse_regno)
3531 : -1);
3532 else
3533 return constm1_rtx;
b08de47e 3534 }
53c17031 3535 if (TARGET_64BIT)
6c4ccfd8
RH
3536 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
3537 cum->sse_nregs,
53c17031
JH
3538 &x86_64_int_parameter_registers [cum->regno],
3539 cum->sse_regno);
3540 else
3541 switch (mode)
3542 {
3543 /* For now, pass fp/complex values on the stack. */
3544 default:
3545 break;
3546
3547 case BLKmode:
8d454008
RH
3548 if (bytes < 0)
3549 break;
5efb1046 3550 /* FALLTHRU */
53c17031
JH
3551 case DImode:
3552 case SImode:
3553 case HImode:
3554 case QImode:
3555 if (words <= cum->nregs)
b96a374d
AJ
3556 {
3557 int regno = cum->regno;
3558
3559 /* Fastcall allocates the first two DWORD (SImode) or
3560 smaller arguments to ECX and EDX. */
3561 if (cum->fastcall)
3562 {
3563 if (mode == BLKmode || mode == DImode)
3564 break;
3565
3566 /* ECX not EAX is the first allocated register. */
3567 if (regno == 0)
e767b5be 3568 regno = 2;
b96a374d
AJ
3569 }
3570 ret = gen_rtx_REG (mode, regno);
3571 }
53c17031 3572 break;
f19e3a64 3573 case DFmode:
2f84b963 3574 if (cum->float_in_sse < 2)
f19e3a64
JJ
3575 break;
3576 case SFmode:
2f84b963 3577 if (cum->float_in_sse < 1)
f19e3a64
JJ
3578 break;
3579 /* FALLTHRU */
53c17031 3580 case TImode:
bcf17554
JH
3581 case V16QImode:
3582 case V8HImode:
3583 case V4SImode:
3584 case V2DImode:
3585 case V4SFmode:
3586 case V2DFmode:
3587 if (!type || !AGGREGATE_TYPE_P (type))
3588 {
78fbfc4b 3589 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
bcf17554
JH
3590 {
3591 warnedsse = true;
d4ee4d25 3592 warning (0, "SSE vector argument without SSE enabled "
bcf17554
JH
3593 "changes the ABI");
3594 }
3595 if (cum->sse_nregs)
6c4ccfd8 3596 ret = gen_reg_or_parallel (mode, orig_mode,
dcbca208 3597 cum->sse_regno + FIRST_SSE_REG);
bcf17554
JH
3598 }
3599 break;
3600 case V8QImode:
3601 case V4HImode:
3602 case V2SImode:
3603 case V2SFmode:
3604 if (!type || !AGGREGATE_TYPE_P (type))
3605 {
e1be55d0 3606 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
bcf17554
JH
3607 {
3608 warnedmmx = true;
d4ee4d25 3609 warning (0, "MMX vector argument without MMX enabled "
bcf17554
JH
3610 "changes the ABI");
3611 }
3612 if (cum->mmx_nregs)
6c4ccfd8 3613 ret = gen_reg_or_parallel (mode, orig_mode,
dcbca208 3614 cum->mmx_regno + FIRST_MMX_REG);
bcf17554 3615 }
53c17031
JH
3616 break;
3617 }
b08de47e
MM
3618
3619 if (TARGET_DEBUG_ARG)
3620 {
3621 fprintf (stderr,
91ea38f9 3622 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
b08de47e
MM
3623 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
3624
3625 if (ret)
91ea38f9 3626 print_simple_rtl (stderr, ret);
b08de47e
MM
3627 else
3628 fprintf (stderr, ", stack");
3629
3630 fprintf (stderr, " )\n");
3631 }
3632
3633 return ret;
3634}
53c17031 3635
09b2e78d
ZD
3636/* A C expression that indicates when an argument must be passed by
3637 reference. If nonzero for an argument, a copy of that argument is
3638 made in memory and a pointer to the argument is passed instead of
3639 the argument itself. The pointer is passed in whatever way is
3640 appropriate for passing a pointer to that type. */
3641
8cd5a4e0
RH
3642static bool
3643ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3644 enum machine_mode mode ATTRIBUTE_UNUSED,
3645 tree type, bool named ATTRIBUTE_UNUSED)
09b2e78d
ZD
3646{
3647 if (!TARGET_64BIT)
3648 return 0;
3649
3650 if (type && int_size_in_bytes (type) == -1)
3651 {
3652 if (TARGET_DEBUG_ARG)
3653 fprintf (stderr, "function_arg_pass_by_reference\n");
3654 return 1;
3655 }
3656
3657 return 0;
3658}
3659
8b978a57 3660/* Return true when TYPE should be 128bit aligned for 32bit argument passing
90d5887b 3661 ABI. Only called if TARGET_SSE. */
8b978a57 3662static bool
b96a374d 3663contains_128bit_aligned_vector_p (tree type)
8b978a57
JH
3664{
3665 enum machine_mode mode = TYPE_MODE (type);
3666 if (SSE_REG_MODE_P (mode)
3667 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3668 return true;
3669 if (TYPE_ALIGN (type) < 128)
3670 return false;
3671
3672 if (AGGREGATE_TYPE_P (type))
3673 {
2a43945f 3674 /* Walk the aggregates recursively. */
d0396b79 3675 switch (TREE_CODE (type))
8b978a57 3676 {
d0396b79
NS
3677 case RECORD_TYPE:
3678 case UNION_TYPE:
3679 case QUAL_UNION_TYPE:
3680 {
3681 tree field;
3682
3683 if (TYPE_BINFO (type))
3684 {
3685 tree binfo, base_binfo;
3686 int i;
3687
3688 for (binfo = TYPE_BINFO (type), i = 0;
3689 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
3690 if (contains_128bit_aligned_vector_p
3691 (BINFO_TYPE (base_binfo)))
3692 return true;
3693 }
3694 /* And now merge the fields of structure. */
3695 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3696 {
3697 if (TREE_CODE (field) == FIELD_DECL
3698 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
fa743e8c 3699 return true;
d0396b79
NS
3700 }
3701 break;
3702 }
3703
3704 case ARRAY_TYPE:
3705 /* Just for use if some languages passes arrays by value. */
8b978a57
JH
3706 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
3707 return true;
5139c66b 3708 break;
d0396b79
NS
3709
3710 default:
3711 gcc_unreachable ();
8b978a57 3712 }
8b978a57
JH
3713 }
3714 return false;
3715}
3716
bb498ea3
AH
3717/* Gives the alignment boundary, in bits, of an argument with the
3718 specified mode and type. */
53c17031
JH
3719
3720int
b96a374d 3721ix86_function_arg_boundary (enum machine_mode mode, tree type)
53c17031
JH
3722{
3723 int align;
53c17031
JH
3724 if (type)
3725 align = TYPE_ALIGN (type);
3726 else
3727 align = GET_MODE_ALIGNMENT (mode);
3728 if (align < PARM_BOUNDARY)
3729 align = PARM_BOUNDARY;
8b978a57
JH
3730 if (!TARGET_64BIT)
3731 {
3732 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3733 make an exception for SSE modes since these require 128bit
b96a374d 3734 alignment.
8b978a57
JH
3735
3736 The handling here differs from field_alignment. ICC aligns MMX
3737 arguments to 4 byte boundaries, while structure fields are aligned
3738 to 8 byte boundaries. */
78fbfc4b
JB
3739 if (!TARGET_SSE)
3740 align = PARM_BOUNDARY;
3741 else if (!type)
8b978a57
JH
3742 {
3743 if (!SSE_REG_MODE_P (mode))
3744 align = PARM_BOUNDARY;
3745 }
3746 else
3747 {
3748 if (!contains_128bit_aligned_vector_p (type))
3749 align = PARM_BOUNDARY;
3750 }
8b978a57 3751 }
53c17031
JH
3752 if (align > 128)
3753 align = 128;
3754 return align;
3755}
3756
3757/* Return true if N is a possible register number of function value. */
3758bool
b96a374d 3759ix86_function_value_regno_p (int regno)
53c17031 3760{
aa941a60
UB
3761 if (regno == 0
3762 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3763 || (regno == FIRST_SSE_REG && TARGET_SSE))
3764 return true;
3765
3766 if (!TARGET_64BIT
3767 && (regno == FIRST_MMX_REG && TARGET_MMX))
3768 return true;
3769
3770 return false;
53c17031
JH
3771}
3772
3773/* Define how to find the value returned by a function.
3774 VALTYPE is the data type of the value (as a tree).
3775 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3776 otherwise, FUNC is 0. */
3777rtx
cb1119b7
RG
3778ix86_function_value (tree valtype, tree fntype_or_decl,
3779 bool outgoing ATTRIBUTE_UNUSED)
53c17031 3780{
b3a1ca49
RH
3781 enum machine_mode natmode = type_natural_mode (valtype);
3782
53c17031
JH
3783 if (TARGET_64BIT)
3784 {
b3a1ca49 3785 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
6c4ccfd8 3786 1, REGPARM_MAX, SSE_REGPARM_MAX,
53c17031 3787 x86_64_int_return_registers, 0);
6c4ccfd8
RH
3788 /* For zero sized structures, construct_container return NULL, but we
3789 need to keep rest of compiler happy by returning meaningful value. */
53c17031
JH
3790 if (!ret)
3791 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
3792 return ret;
3793 }
3794 else
cb1119b7
RG
3795 {
3796 tree fn = NULL_TREE, fntype;
3797 if (fntype_or_decl
3798 && DECL_P (fntype_or_decl))
3799 fn = fntype_or_decl;
3800 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
3801 return gen_rtx_REG (TYPE_MODE (valtype),
3802 ix86_value_regno (natmode, fn, fntype));
3803 }
53c17031
JH
3804}
3805
0966949b 3806/* Return true iff type is returned in memory. */
53c17031 3807int
b96a374d 3808ix86_return_in_memory (tree type)
53c17031 3809{
a30b6839 3810 int needed_intregs, needed_sseregs, size;
b3a1ca49 3811 enum machine_mode mode = type_natural_mode (type);
a30b6839 3812
53c17031 3813 if (TARGET_64BIT)
a30b6839
RH
3814 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
3815
3816 if (mode == BLKmode)
3817 return 1;
3818
3819 size = int_size_in_bytes (type);
3820
3821 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3822 return 0;
3823
3824 if (VECTOR_MODE_P (mode) || mode == TImode)
53c17031 3825 {
a30b6839
RH
3826 /* User-created vectors small enough to fit in EAX. */
3827 if (size < 8)
5e062767 3828 return 0;
a30b6839 3829
74c4a88a
UB
3830 /* MMX/3dNow values are returned in MM0,
3831 except when it doesn't exits. */
a30b6839 3832 if (size == 8)
74c4a88a 3833 return (TARGET_MMX ? 0 : 1);
a30b6839 3834
0397ac35 3835 /* SSE values are returned in XMM0, except when it doesn't exist. */
a30b6839 3836 if (size == 16)
0397ac35 3837 return (TARGET_SSE ? 0 : 1);
53c17031 3838 }
a30b6839 3839
cf2348cb 3840 if (mode == XFmode)
a30b6839 3841 return 0;
f8a1ebc6 3842
a81083b2
BE
3843 if (mode == TDmode)
3844 return 1;
3845
a30b6839
RH
3846 if (size > 12)
3847 return 1;
3848 return 0;
53c17031
JH
3849}
3850
0397ac35
RH
3851/* When returning SSE vector types, we have a choice of either
3852 (1) being abi incompatible with a -march switch, or
3853 (2) generating an error.
3854 Given no good solution, I think the safest thing is one warning.
3855 The user won't be able to use -Werror, but....
3856
3857 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3858 called in response to actually generating a caller or callee that
3859 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3860 via aggregate_value_p for general type probing from tree-ssa. */
3861
3862static rtx
3863ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3864{
74c4a88a 3865 static bool warnedsse, warnedmmx;
0397ac35 3866
74c4a88a 3867 if (type)
0397ac35
RH
3868 {
3869 /* Look at the return type of the function, not the function type. */
3870 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3871
74c4a88a
UB
3872 if (!TARGET_SSE && !warnedsse)
3873 {
3874 if (mode == TImode
3875 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3876 {
3877 warnedsse = true;
3878 warning (0, "SSE vector return without SSE enabled "
3879 "changes the ABI");
3880 }
3881 }
3882
3883 if (!TARGET_MMX && !warnedmmx)
0397ac35 3884 {
74c4a88a
UB
3885 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
3886 {
3887 warnedmmx = true;
3888 warning (0, "MMX vector return without MMX enabled "
3889 "changes the ABI");
3890 }
0397ac35
RH
3891 }
3892 }
3893
3894 return NULL;
3895}
3896
53c17031
JH
3897/* Define how to find the value returned by a library function
3898 assuming the value has mode MODE. */
3899rtx
b96a374d 3900ix86_libcall_value (enum machine_mode mode)
53c17031
JH
3901{
3902 if (TARGET_64BIT)
3903 {
3904 switch (mode)
3905 {
f8a1ebc6
JH
3906 case SFmode:
3907 case SCmode:
3908 case DFmode:
3909 case DCmode:
9e9fb0ce 3910 case TFmode:
a81083b2
BE
3911 case SDmode:
3912 case DDmode:
3913 case TDmode:
f8a1ebc6
JH
3914 return gen_rtx_REG (mode, FIRST_SSE_REG);
3915 case XFmode:
9e9fb0ce 3916 case XCmode:
499accd7 3917 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
f8a1ebc6
JH
3918 case TCmode:
3919 return NULL;
3920 default:
3921 return gen_rtx_REG (mode, 0);
53c17031
JH
3922 }
3923 }
3924 else
cb1119b7 3925 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
b069de3b
SS
3926}
3927
3928/* Given a mode, return the register to use for a return value. */
3929
3930static int
cb1119b7 3931ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
b069de3b 3932{
f19e3a64
JJ
3933 gcc_assert (!TARGET_64BIT);
3934
74c4a88a
UB
3935 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
3936 we prevent this case when mmx is not available. */
3937 if ((VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8))
3938 return FIRST_MMX_REG;
3939
a30b6839
RH
3940 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3941 we prevent this case when sse is not available. */
3942 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
b069de3b 3943 return FIRST_SSE_REG;
f19e3a64 3944
a81083b2
BE
3945 /* Decimal floating point values can go in %eax, unlike other float modes. */
3946 if (DECIMAL_FLOAT_MODE_P (mode))
3947 return 0;
3948
f19e3a64 3949 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
ebb109ad 3950 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
f19e3a64
JJ
3951 return 0;
3952
3953 /* Floating point return values in %st(0), except for local functions when
2f84b963 3954 SSE math is enabled or for functions with sseregparm attribute. */
cb1119b7
RG
3955 if ((func || fntype)
3956 && (mode == SFmode || mode == DFmode))
f19e3a64 3957 {
cb1119b7 3958 int sse_level = ix86_function_sseregparm (fntype, func);
2f84b963
RG
3959 if ((sse_level >= 1 && mode == SFmode)
3960 || (sse_level == 2 && mode == DFmode))
3961 return FIRST_SSE_REG;
f19e3a64
JJ
3962 }
3963
3964 return FIRST_FLOAT_REG;
53c17031 3965}
ad919812
JH
3966\f
3967/* Create the va_list data type. */
53c17031 3968
c35d187f
RH
3969static tree
3970ix86_build_builtin_va_list (void)
ad919812
JH
3971{
3972 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
53c17031 3973
ad919812
JH
3974 /* For i386 we use plain pointer to argument area. */
3975 if (!TARGET_64BIT)
3976 return build_pointer_type (char_type_node);
3977
f1e639b1 3978 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
ad919812
JH
3979 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3980
fce5a9f2 3981 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
ad919812 3982 unsigned_type_node);
fce5a9f2 3983 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
ad919812
JH
3984 unsigned_type_node);
3985 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
3986 ptr_type_node);
3987 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
3988 ptr_type_node);
3989
9d30f3c1
JJ
3990 va_list_gpr_counter_field = f_gpr;
3991 va_list_fpr_counter_field = f_fpr;
3992
ad919812
JH
3993 DECL_FIELD_CONTEXT (f_gpr) = record;
3994 DECL_FIELD_CONTEXT (f_fpr) = record;
3995 DECL_FIELD_CONTEXT (f_ovf) = record;
3996 DECL_FIELD_CONTEXT (f_sav) = record;
3997
3998 TREE_CHAIN (record) = type_decl;
3999 TYPE_NAME (record) = type_decl;
4000 TYPE_FIELDS (record) = f_gpr;
4001 TREE_CHAIN (f_gpr) = f_fpr;
4002 TREE_CHAIN (f_fpr) = f_ovf;
4003 TREE_CHAIN (f_ovf) = f_sav;
4004
4005 layout_type (record);
4006
4007 /* The correct type is an array type of one element. */
4008 return build_array_type (record, build_index_type (size_zero_node));
4009}
4010
a0524eb3 4011/* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
ad919812 4012
a0524eb3 4013static void
b96a374d
AJ
4014ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4015 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4016 int no_rtl)
ad919812
JH
4017{
4018 CUMULATIVE_ARGS next_cum;
4019 rtx save_area = NULL_RTX, mem;
4020 rtx label;
4021 rtx label_ref;
4022 rtx tmp_reg;
4023 rtx nsse_reg;
4024 int set;
4025 tree fntype;
4026 int stdarg_p;
4027 int i;
4028
4029 if (!TARGET_64BIT)
4030 return;
4031
9d30f3c1
JJ
4032 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4033 return;
4034
ad919812
JH
4035 /* Indicate to allocate space on the stack for varargs save area. */
4036 ix86_save_varrargs_registers = 1;
4037
5474eed5
JH
4038 cfun->stack_alignment_needed = 128;
4039
ad919812
JH
4040 fntype = TREE_TYPE (current_function_decl);
4041 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4042 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4043 != void_type_node));
4044
4045 /* For varargs, we do not want to skip the dummy va_dcl argument.
4046 For stdargs, we do want to skip the last named argument. */
4047 next_cum = *cum;
4048 if (stdarg_p)
4049 function_arg_advance (&next_cum, mode, type, 1);
4050
4051 if (!no_rtl)
4052 save_area = frame_pointer_rtx;
4053
4054 set = get_varargs_alias_set ();
4055
9d30f3c1
JJ
4056 for (i = next_cum.regno;
4057 i < ix86_regparm
4058 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4059 i++)
ad919812
JH
4060 {
4061 mem = gen_rtx_MEM (Pmode,
4062 plus_constant (save_area, i * UNITS_PER_WORD));
8476af98 4063 MEM_NOTRAP_P (mem) = 1;
0692acba 4064 set_mem_alias_set (mem, set);
ad919812
JH
4065 emit_move_insn (mem, gen_rtx_REG (Pmode,
4066 x86_64_int_parameter_registers[i]));
4067 }
4068
9d30f3c1 4069 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
ad919812
JH
4070 {
4071 /* Now emit code to save SSE registers. The AX parameter contains number
d1f87653 4072 of SSE parameter registers used to call this function. We use
ad919812
JH
4073 sse_prologue_save insn template that produces computed jump across
4074 SSE saves. We need some preparation work to get this working. */
4075
4076 label = gen_label_rtx ();
4077 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4078
4079 /* Compute address to jump to :
4080 label - 5*eax + nnamed_sse_arguments*5 */
4081 tmp_reg = gen_reg_rtx (Pmode);
4082 nsse_reg = gen_reg_rtx (Pmode);
4083 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4084 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
44cf5b6a 4085 gen_rtx_MULT (Pmode, nsse_reg,
ad919812
JH
4086 GEN_INT (4))));
4087 if (next_cum.sse_regno)
4088 emit_move_insn
4089 (nsse_reg,
4090 gen_rtx_CONST (DImode,
4091 gen_rtx_PLUS (DImode,
4092 label_ref,
4093 GEN_INT (next_cum.sse_regno * 4))));
4094 else
4095 emit_move_insn (nsse_reg, label_ref);
4096 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4097
4098 /* Compute address of memory block we save into. We always use pointer
4099 pointing 127 bytes after first byte to store - this is needed to keep
4100 instruction size limited by 4 bytes. */
4101 tmp_reg = gen_reg_rtx (Pmode);
8ac61af7
RK
4102 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4103 plus_constant (save_area,
4104 8 * REGPARM_MAX + 127)));
ad919812 4105 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
8476af98 4106 MEM_NOTRAP_P (mem) = 1;
14f73b5a 4107 set_mem_alias_set (mem, set);
8ac61af7 4108 set_mem_align (mem, BITS_PER_WORD);
ad919812
JH
4109
4110 /* And finally do the dirty job! */
8ac61af7
RK
4111 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4112 GEN_INT (next_cum.sse_regno), label));
ad919812
JH
4113 }
4114
4115}
4116
4117/* Implement va_start. */
4118
4119void
b96a374d 4120ix86_va_start (tree valist, rtx nextarg)
ad919812
JH
4121{
4122 HOST_WIDE_INT words, n_gpr, n_fpr;
4123 tree f_gpr, f_fpr, f_ovf, f_sav;
4124 tree gpr, fpr, ovf, sav, t;
4125
4126 /* Only 64bit target needs something special. */
4127 if (!TARGET_64BIT)
4128 {
e5faf155 4129 std_expand_builtin_va_start (valist, nextarg);
ad919812
JH
4130 return;
4131 }
4132
4133 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4134 f_fpr = TREE_CHAIN (f_gpr);
4135 f_ovf = TREE_CHAIN (f_fpr);
4136 f_sav = TREE_CHAIN (f_ovf);
4137
4138 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
47a25a46
RG
4139 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4140 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4141 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4142 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
ad919812
JH
4143
4144 /* Count number of gp and fp argument registers used. */
4145 words = current_function_args_info.words;
4146 n_gpr = current_function_args_info.regno;
4147 n_fpr = current_function_args_info.sse_regno;
4148
4149 if (TARGET_DEBUG_ARG)
4150 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
b531087a 4151 (int) words, (int) n_gpr, (int) n_fpr);
ad919812 4152
9d30f3c1
JJ
4153 if (cfun->va_list_gpr_size)
4154 {
47a25a46
RG
4155 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
4156 build_int_cst (NULL_TREE, n_gpr * 8));
9d30f3c1
JJ
4157 TREE_SIDE_EFFECTS (t) = 1;
4158 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4159 }
ad919812 4160
9d30f3c1
JJ
4161 if (cfun->va_list_fpr_size)
4162 {
47a25a46
RG
4163 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
4164 build_int_cst (NULL_TREE, n_fpr * 16 + 8*REGPARM_MAX));
9d30f3c1
JJ
4165 TREE_SIDE_EFFECTS (t) = 1;
4166 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4167 }
ad919812
JH
4168
4169 /* Find the overflow area. */
4170 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
4171 if (words != 0)
47a25a46
RG
4172 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), t,
4173 build_int_cst (NULL_TREE, words * UNITS_PER_WORD));
4174 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
ad919812
JH
4175 TREE_SIDE_EFFECTS (t) = 1;
4176 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4177
9d30f3c1
JJ
4178 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4179 {
4180 /* Find the register save area.
4181 Prologue of the function save it right above stack frame. */
4182 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
47a25a46 4183 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
9d30f3c1
JJ
4184 TREE_SIDE_EFFECTS (t) = 1;
4185 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4186 }
ad919812
JH
4187}
4188
4189/* Implement va_arg. */
cd3ce9b4 4190
23a60a04
JM
4191tree
4192ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
cd3ce9b4 4193{
cd3ce9b4
JM
4194 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4195 tree f_gpr, f_fpr, f_ovf, f_sav;
4196 tree gpr, fpr, ovf, sav, t;
4197 int size, rsize;
4198 tree lab_false, lab_over = NULL_TREE;
4199 tree addr, t2;
4200 rtx container;
4201 int indirect_p = 0;
4202 tree ptrtype;
52cf10a3 4203 enum machine_mode nat_mode;
cd3ce9b4
JM
4204
4205 /* Only 64bit target needs something special. */
4206 if (!TARGET_64BIT)
23a60a04 4207 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
cd3ce9b4
JM
4208
4209 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4210 f_fpr = TREE_CHAIN (f_gpr);
4211 f_ovf = TREE_CHAIN (f_fpr);
4212 f_sav = TREE_CHAIN (f_ovf);
4213
c2433d7d 4214 valist = build_va_arg_indirect_ref (valist);
47a25a46
RG
4215 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4216 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4217 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4218 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
cd3ce9b4 4219
08b0dc1b
RH
4220 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4221 if (indirect_p)
4222 type = build_pointer_type (type);
cd3ce9b4 4223 size = int_size_in_bytes (type);
cd3ce9b4
JM
4224 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4225
52cf10a3
RH
4226 nat_mode = type_natural_mode (type);
4227 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4228 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
6c4ccfd8
RH
4229
4230 /* Pull the value out of the saved registers. */
cd3ce9b4
JM
4231
4232 addr = create_tmp_var (ptr_type_node, "addr");
4233 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4234
4235 if (container)
4236 {
4237 int needed_intregs, needed_sseregs;
e52a6df5 4238 bool need_temp;
cd3ce9b4
JM
4239 tree int_addr, sse_addr;
4240
4241 lab_false = create_artificial_label ();
4242 lab_over = create_artificial_label ();
4243
52cf10a3 4244 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
cd3ce9b4 4245
e52a6df5
JB
4246 need_temp = (!REG_P (container)
4247 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4248 || TYPE_ALIGN (type) > 128));
cd3ce9b4
JM
4249
4250 /* In case we are passing structure, verify that it is consecutive block
4251 on the register save area. If not we need to do moves. */
4252 if (!need_temp && !REG_P (container))
4253 {
4254 /* Verify that all registers are strictly consecutive */
4255 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4256 {
4257 int i;
4258
4259 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4260 {
4261 rtx slot = XVECEXP (container, 0, i);
4262 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4263 || INTVAL (XEXP (slot, 1)) != i * 16)
4264 need_temp = 1;
4265 }
4266 }
4267 else
4268 {
4269 int i;
4270
4271 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4272 {
4273 rtx slot = XVECEXP (container, 0, i);
4274 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4275 || INTVAL (XEXP (slot, 1)) != i * 8)
4276 need_temp = 1;
4277 }
4278 }
4279 }
4280 if (!need_temp)
4281 {
4282 int_addr = addr;
4283 sse_addr = addr;
4284 }
4285 else
4286 {
4287 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4288 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4289 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4290 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4291 }
56d76b69 4292
cd3ce9b4
JM
4293 /* First ensure that we fit completely in registers. */
4294 if (needed_intregs)
4295 {
4a90aeeb 4296 t = build_int_cst (TREE_TYPE (gpr),
7d60be94 4297 (REGPARM_MAX - needed_intregs + 1) * 8);
cd3ce9b4
JM
4298 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4299 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
47a25a46 4300 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
cd3ce9b4
JM
4301 gimplify_and_add (t, pre_p);
4302 }
4303 if (needed_sseregs)
4304 {
4a90aeeb
NS
4305 t = build_int_cst (TREE_TYPE (fpr),
4306 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7d60be94 4307 + REGPARM_MAX * 8);
cd3ce9b4
JM
4308 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4309 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
47a25a46 4310 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
cd3ce9b4
JM
4311 gimplify_and_add (t, pre_p);
4312 }
4313
4314 /* Compute index to start of area used for integer regs. */
4315 if (needed_intregs)
4316 {
4317 /* int_addr = gpr + sav; */
56d76b69
RH
4318 t = fold_convert (ptr_type_node, gpr);
4319 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
cd3ce9b4
JM
4320 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
4321 gimplify_and_add (t, pre_p);
4322 }
4323 if (needed_sseregs)
4324 {
4325 /* sse_addr = fpr + sav; */
56d76b69
RH
4326 t = fold_convert (ptr_type_node, fpr);
4327 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
cd3ce9b4
JM
4328 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
4329 gimplify_and_add (t, pre_p);
4330 }
4331 if (need_temp)
4332 {
4333 int i;
4334 tree temp = create_tmp_var (type, "va_arg_tmp");
4335
4336 /* addr = &temp; */
4337 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4338 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
4339 gimplify_and_add (t, pre_p);
f676971a 4340
cd3ce9b4
JM
4341 for (i = 0; i < XVECLEN (container, 0); i++)
4342 {
4343 rtx slot = XVECEXP (container, 0, i);
4344 rtx reg = XEXP (slot, 0);
4345 enum machine_mode mode = GET_MODE (reg);
4346 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4347 tree addr_type = build_pointer_type (piece_type);
4348 tree src_addr, src;
4349 int src_offset;
4350 tree dest_addr, dest;
4351
4352 if (SSE_REGNO_P (REGNO (reg)))
4353 {
4354 src_addr = sse_addr;
4355 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4356 }
4357 else
4358 {
4359 src_addr = int_addr;
4360 src_offset = REGNO (reg) * 8;
4361 }
8fe75e43
RH
4362 src_addr = fold_convert (addr_type, src_addr);
4363 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4364 size_int (src_offset)));
c2433d7d 4365 src = build_va_arg_indirect_ref (src_addr);
e6e81735 4366
8fe75e43
RH
4367 dest_addr = fold_convert (addr_type, addr);
4368 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4369 size_int (INTVAL (XEXP (slot, 1)))));
c2433d7d 4370 dest = build_va_arg_indirect_ref (dest_addr);
3a3677ff 4371
8fe75e43
RH
4372 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
4373 gimplify_and_add (t, pre_p);
4374 }
4375 }
e6e81735 4376
8fe75e43
RH
4377 if (needed_intregs)
4378 {
4379 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
56d76b69 4380 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8fe75e43
RH
4381 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
4382 gimplify_and_add (t, pre_p);
4383 }
4384 if (needed_sseregs)
4385 {
4a90aeeb 4386 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
56d76b69 4387 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8fe75e43
RH
4388 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
4389 gimplify_and_add (t, pre_p);
4390 }
e6e81735 4391
8fe75e43
RH
4392 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4393 gimplify_and_add (t, pre_p);
4394
4395 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4396 append_to_statement_list (t, pre_p);
3a3677ff 4397 }
b840bfb0 4398
8fe75e43 4399 /* ... otherwise out of the overflow area. */
e9e80858 4400
8fe75e43 4401 /* Care for on-stack alignment if needed. */
f5a7da0f
RG
4402 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4403 || integer_zerop (TYPE_SIZE (type)))
8fe75e43
RH
4404 t = ovf;
4405 else
e9e80858 4406 {
8fe75e43 4407 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
47a25a46
RG
4408 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4409 build_int_cst (TREE_TYPE (ovf), align - 1));
4410 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4411 build_int_cst (TREE_TYPE (t), -align));
e9e80858 4412 }
8fe75e43 4413 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
e075ae69 4414
8fe75e43
RH
4415 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
4416 gimplify_and_add (t2, pre_p);
e075ae69 4417
8fe75e43 4418 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
56d76b69 4419 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
8fe75e43
RH
4420 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
4421 gimplify_and_add (t, pre_p);
e075ae69 4422
8fe75e43 4423 if (container)
2a2ab3f9 4424 {
8fe75e43
RH
4425 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4426 append_to_statement_list (t, pre_p);
2a2ab3f9 4427 }
e075ae69 4428
8fe75e43
RH
4429 ptrtype = build_pointer_type (type);
4430 addr = fold_convert (ptrtype, addr);
0a726ef1 4431
8fe75e43 4432 if (indirect_p)
c2433d7d
FCE
4433 addr = build_va_arg_indirect_ref (addr);
4434 return build_va_arg_indirect_ref (addr);
0a726ef1 4435}
8fe75e43
RH
4436\f
4437/* Return nonzero if OPNUM's MEM should be matched
4438 in movabs* patterns. */
fee2770d
RS
4439
4440int
8fe75e43 4441ix86_check_movabs (rtx insn, int opnum)
4f2c8ebb 4442{
8fe75e43 4443 rtx set, mem;
e075ae69 4444
8fe75e43
RH
4445 set = PATTERN (insn);
4446 if (GET_CODE (set) == PARALLEL)
4447 set = XVECEXP (set, 0, 0);
d0396b79 4448 gcc_assert (GET_CODE (set) == SET);
8fe75e43
RH
4449 mem = XEXP (set, opnum);
4450 while (GET_CODE (mem) == SUBREG)
4451 mem = SUBREG_REG (mem);
d0396b79 4452 gcc_assert (GET_CODE (mem) == MEM);
8fe75e43 4453 return (volatile_ok || !MEM_VOLATILE_P (mem));
2247f6ed 4454}
e075ae69 4455\f
881b2a96
RS
4456/* Initialize the table of extra 80387 mathematical constants. */
4457
4458static void
b96a374d 4459init_ext_80387_constants (void)
881b2a96
RS
4460{
4461 static const char * cst[5] =
4462 {
4463 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4464 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4465 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4466 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4467 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4468 };
4469 int i;
4470
4471 for (i = 0; i < 5; i++)
4472 {
4473 real_from_string (&ext_80387_constants_table[i], cst[i]);
4474 /* Ensure each constant is rounded to XFmode precision. */
1f48e56d 4475 real_convert (&ext_80387_constants_table[i],
f8a1ebc6 4476 XFmode, &ext_80387_constants_table[i]);
881b2a96
RS
4477 }
4478
4479 ext_80387_constants_init = 1;
4480}
4481
e075ae69 4482/* Return true if the constant is something that can be loaded with
881b2a96 4483 a special instruction. */
57dbca5e
BS
4484
4485int
b96a374d 4486standard_80387_constant_p (rtx x)
57dbca5e 4487{
2b04e52b 4488 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
e075ae69 4489 return -1;
881b2a96 4490
2b04e52b
JH
4491 if (x == CONST0_RTX (GET_MODE (x)))
4492 return 1;
4493 if (x == CONST1_RTX (GET_MODE (x)))
4494 return 2;
881b2a96 4495
22cc69c4
RS
4496 /* For XFmode constants, try to find a special 80387 instruction when
4497 optimizing for size or on those CPUs that benefit from them. */
f8a1ebc6 4498 if (GET_MODE (x) == XFmode
22cc69c4 4499 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
881b2a96
RS
4500 {
4501 REAL_VALUE_TYPE r;
4502 int i;
4503
4504 if (! ext_80387_constants_init)
4505 init_ext_80387_constants ();
4506
4507 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4508 for (i = 0; i < 5; i++)
4509 if (real_identical (&r, &ext_80387_constants_table[i]))
4510 return i + 3;
4511 }
4512
e075ae69 4513 return 0;
57dbca5e
BS
4514}
4515
881b2a96
RS
4516/* Return the opcode of the special instruction to be used to load
4517 the constant X. */
4518
4519const char *
b96a374d 4520standard_80387_constant_opcode (rtx x)
881b2a96
RS
4521{
4522 switch (standard_80387_constant_p (x))
4523 {
b96a374d 4524 case 1:
881b2a96
RS
4525 return "fldz";
4526 case 2:
4527 return "fld1";
b96a374d 4528 case 3:
881b2a96
RS
4529 return "fldlg2";
4530 case 4:
4531 return "fldln2";
b96a374d 4532 case 5:
881b2a96
RS
4533 return "fldl2e";
4534 case 6:
4535 return "fldl2t";
b96a374d 4536 case 7:
881b2a96 4537 return "fldpi";
d0396b79
NS
4538 default:
4539 gcc_unreachable ();
881b2a96 4540 }
881b2a96
RS
4541}
4542
4543/* Return the CONST_DOUBLE representing the 80387 constant that is
4544 loaded by the specified special instruction. The argument IDX
4545 matches the return value from standard_80387_constant_p. */
4546
4547rtx
b96a374d 4548standard_80387_constant_rtx (int idx)
881b2a96
RS
4549{
4550 int i;
4551
4552 if (! ext_80387_constants_init)
4553 init_ext_80387_constants ();
4554
4555 switch (idx)
4556 {
4557 case 3:
4558 case 4:
4559 case 5:
4560 case 6:
4561 case 7:
4562 i = idx - 3;
4563 break;
4564
4565 default:
d0396b79 4566 gcc_unreachable ();
881b2a96
RS
4567 }
4568
1f48e56d 4569 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
f8a1ebc6 4570 XFmode);
881b2a96
RS
4571}
4572
2b04e52b
JH
4573/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4574 */
4575int
b96a374d 4576standard_sse_constant_p (rtx x)
2b04e52b 4577{
0e67d460
JH
4578 if (x == const0_rtx)
4579 return 1;
2b04e52b
JH
4580 return (x == CONST0_RTX (GET_MODE (x)));
4581}
4582
2a2ab3f9
JVA
4583/* Returns 1 if OP contains a symbol reference */
4584
4585int
b96a374d 4586symbolic_reference_mentioned_p (rtx op)
2a2ab3f9 4587{
8d531ab9
KH
4588 const char *fmt;
4589 int i;
2a2ab3f9
JVA
4590
4591 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4592 return 1;
4593
4594 fmt = GET_RTX_FORMAT (GET_CODE (op));
4595 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4596 {
4597 if (fmt[i] == 'E')
4598 {
8d531ab9 4599 int j;
2a2ab3f9
JVA
4600
4601 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4602 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4603 return 1;
4604 }
e9a25f70 4605
2a2ab3f9
JVA
4606 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4607 return 1;
4608 }
4609
4610 return 0;
4611}
e075ae69
RH
4612
4613/* Return 1 if it is appropriate to emit `ret' instructions in the
4614 body of a function. Do this only if the epilogue is simple, needing a
4615 couple of insns. Prior to reloading, we can't tell how many registers
4616 must be saved, so return 0 then. Return 0 if there is no frame
6e14af16 4617 marker to de-allocate. */
32b5b1aa
SC
4618
4619int
b96a374d 4620ix86_can_use_return_insn_p (void)
32b5b1aa 4621{
4dd2ac2c 4622 struct ix86_frame frame;
9a7372d6 4623
9a7372d6
RH
4624 if (! reload_completed || frame_pointer_needed)
4625 return 0;
32b5b1aa 4626
9a7372d6
RH
4627 /* Don't allow more than 32 pop, since that's all we can do
4628 with one instruction. */
4629 if (current_function_pops_args
4630 && current_function_args_size >= 32768)
e075ae69 4631 return 0;
32b5b1aa 4632
4dd2ac2c
JH
4633 ix86_compute_frame_layout (&frame);
4634 return frame.to_allocate == 0 && frame.nregs == 0;
e075ae69 4635}
6189a572 4636\f
6fca22eb
RH
4637/* Value should be nonzero if functions must have frame pointers.
4638 Zero means the frame pointer need not be set up (and parms may
4639 be accessed via the stack pointer) in functions that seem suitable. */
4640
4641int
b96a374d 4642ix86_frame_pointer_required (void)
6fca22eb
RH
4643{
4644 /* If we accessed previous frames, then the generated code expects
4645 to be able to access the saved ebp value in our frame. */
4646 if (cfun->machine->accesses_prev_frame)
4647 return 1;
a4f31c00 4648
6fca22eb
RH
4649 /* Several x86 os'es need a frame pointer for other reasons,
4650 usually pertaining to setjmp. */
4651 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4652 return 1;
4653
4654 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4655 the frame pointer by default. Turn it back on now if we've not
4656 got a leaf function. */
a7943381 4657 if (TARGET_OMIT_LEAF_FRAME_POINTER
5bf5a10b
AO
4658 && (!current_function_is_leaf
4659 || ix86_current_function_calls_tls_descriptor))
55ba61f3
JH
4660 return 1;
4661
4662 if (current_function_profile)
6fca22eb
RH
4663 return 1;
4664
4665 return 0;
4666}
4667
4668/* Record that the current function accesses previous call frames. */
4669
4670void
b96a374d 4671ix86_setup_frame_addresses (void)
6fca22eb
RH
4672{
4673 cfun->machine->accesses_prev_frame = 1;
4674}
e075ae69 4675\f
7d072037 4676#if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
145aacc2
RH
4677# define USE_HIDDEN_LINKONCE 1
4678#else
4679# define USE_HIDDEN_LINKONCE 0
4680#endif
4681
bd09bdeb 4682static int pic_labels_used;
e9a25f70 4683
145aacc2
RH
4684/* Fills in the label name that should be used for a pc thunk for
4685 the given register. */
4686
4687static void
b96a374d 4688get_pc_thunk_name (char name[32], unsigned int regno)
145aacc2
RH
4689{
4690 if (USE_HIDDEN_LINKONCE)
4691 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4692 else
4693 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4694}
4695
4696
e075ae69
RH
4697/* This function generates code for -fpic that loads %ebx with
4698 the return address of the caller and then returns. */
4699
4700void
b96a374d 4701ix86_file_end (void)
e075ae69
RH
4702{
4703 rtx xops[2];
bd09bdeb 4704 int regno;
32b5b1aa 4705
bd09bdeb 4706 for (regno = 0; regno < 8; ++regno)
7c262518 4707 {
145aacc2
RH
4708 char name[32];
4709
bd09bdeb
RH
4710 if (! ((pic_labels_used >> regno) & 1))
4711 continue;
4712
145aacc2 4713 get_pc_thunk_name (name, regno);
bd09bdeb 4714
7d072037
SH
4715#if TARGET_MACHO
4716 if (TARGET_MACHO)
4717 {
4718 switch_to_section (darwin_sections[text_coal_section]);
4719 fputs ("\t.weak_definition\t", asm_out_file);
4720 assemble_name (asm_out_file, name);
4721 fputs ("\n\t.private_extern\t", asm_out_file);
4722 assemble_name (asm_out_file, name);
4723 fputs ("\n", asm_out_file);
4724 ASM_OUTPUT_LABEL (asm_out_file, name);
4725 }
4726 else
4727#endif
145aacc2
RH
4728 if (USE_HIDDEN_LINKONCE)
4729 {
4730 tree decl;
4731
4732 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4733 error_mark_node);
4734 TREE_PUBLIC (decl) = 1;
4735 TREE_STATIC (decl) = 1;
4736 DECL_ONE_ONLY (decl) = 1;
4737
4738 (*targetm.asm_out.unique_section) (decl, 0);
d6b5193b 4739 switch_to_section (get_named_section (decl, NULL, 0));
145aacc2 4740
a5fe455b
ZW
4741 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4742 fputs ("\t.hidden\t", asm_out_file);
4743 assemble_name (asm_out_file, name);
4744 fputc ('\n', asm_out_file);
4745 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
145aacc2
RH
4746 }
4747 else
4748 {
d6b5193b 4749 switch_to_section (text_section);
a5fe455b 4750 ASM_OUTPUT_LABEL (asm_out_file, name);
145aacc2 4751 }
bd09bdeb
RH
4752
4753 xops[0] = gen_rtx_REG (SImode, regno);
4754 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4755 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4756 output_asm_insn ("ret", xops);
7c262518 4757 }
3edc56a9 4758
a5fe455b
ZW
4759 if (NEED_INDICATE_EXEC_STACK)
4760 file_end_indicate_exec_stack ();
32b5b1aa 4761}
32b5b1aa 4762
c8c03509 4763/* Emit code for the SET_GOT patterns. */
32b5b1aa 4764
c8c03509 4765const char *
7d072037 4766output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
c8c03509
RH
4767{
4768 rtx xops[3];
0d7d98ee 4769
c8c03509 4770 xops[0] = dest;
5fc0e5df 4771 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
32b5b1aa 4772
c8c03509 4773 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
32b5b1aa 4774 {
7d072037 4775 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
c8c03509
RH
4776
4777 if (!flag_pic)
4778 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4779 else
4780 output_asm_insn ("call\t%a2", xops);
4781
b069de3b 4782#if TARGET_MACHO
7d072037
SH
4783 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
4784 is what will be referenced by the Mach-O PIC subsystem. */
4785 if (!label)
4786 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
b069de3b 4787#endif
7d072037 4788
4977bab6 4789 (*targetm.asm_out.internal_label) (asm_out_file, "L",
c8c03509
RH
4790 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4791
4792 if (flag_pic)
4793 output_asm_insn ("pop{l}\t%0", xops);
32b5b1aa 4794 }
e075ae69 4795 else
e5cb57e8 4796 {
145aacc2
RH
4797 char name[32];
4798 get_pc_thunk_name (name, REGNO (dest));
bd09bdeb 4799 pic_labels_used |= 1 << REGNO (dest);
f996902d 4800
145aacc2 4801 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
c8c03509
RH
4802 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4803 output_asm_insn ("call\t%X2", xops);
7d072037
SH
4804 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
4805 is what will be referenced by the Mach-O PIC subsystem. */
4806#if TARGET_MACHO
4807 if (!label)
4808 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
9795d9fd
AP
4809 else
4810 targetm.asm_out.internal_label (asm_out_file, "L",
4811 CODE_LABEL_NUMBER (label));
7d072037 4812#endif
e5cb57e8 4813 }
e5cb57e8 4814
7d072037
SH
4815 if (TARGET_MACHO)
4816 return "";
4817
c8c03509
RH
4818 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4819 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
7d072037 4820 else
4a8ce6ce 4821 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
79325812 4822
c8c03509 4823 return "";
e9a25f70 4824}
8dfe5673 4825
0d7d98ee 4826/* Generate an "push" pattern for input ARG. */
e9a25f70 4827
e075ae69 4828static rtx
b96a374d 4829gen_push (rtx arg)
e9a25f70 4830{
c5c76735 4831 return gen_rtx_SET (VOIDmode,
0d7d98ee
JH
4832 gen_rtx_MEM (Pmode,
4833 gen_rtx_PRE_DEC (Pmode,
c5c76735
JL
4834 stack_pointer_rtx)),
4835 arg);
e9a25f70
JL
4836}
4837
bd09bdeb
RH
4838/* Return >= 0 if there is an unused call-clobbered register available
4839 for the entire function. */
4840
4841static unsigned int
b96a374d 4842ix86_select_alt_pic_regnum (void)
bd09bdeb 4843{
5bf5a10b
AO
4844 if (current_function_is_leaf && !current_function_profile
4845 && !ix86_current_function_calls_tls_descriptor)
bd09bdeb
RH
4846 {
4847 int i;
4848 for (i = 2; i >= 0; --i)
4849 if (!regs_ever_live[i])
4850 return i;
4851 }
4852
4853 return INVALID_REGNUM;
4854}
fce5a9f2 4855
4dd2ac2c
JH
4856/* Return 1 if we need to save REGNO. */
4857static int
b96a374d 4858ix86_save_reg (unsigned int regno, int maybe_eh_return)
1020a5ab 4859{
bd09bdeb
RH
4860 if (pic_offset_table_rtx
4861 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4862 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
66edd3b4 4863 || current_function_profile
8c38a24f
MM
4864 || current_function_calls_eh_return
4865 || current_function_uses_const_pool))
bd09bdeb
RH
4866 {
4867 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4868 return 0;
4869 return 1;
4870 }
1020a5ab
RH
4871
4872 if (current_function_calls_eh_return && maybe_eh_return)
4873 {
4874 unsigned i;
4875 for (i = 0; ; i++)
4876 {
b531087a 4877 unsigned test = EH_RETURN_DATA_REGNO (i);
1020a5ab
RH
4878 if (test == INVALID_REGNUM)
4879 break;
9b690711 4880 if (test == regno)
1020a5ab
RH
4881 return 1;
4882 }
4883 }
4dd2ac2c 4884
150cdc9e
RH
4885 if (cfun->machine->force_align_arg_pointer
4886 && regno == REGNO (cfun->machine->force_align_arg_pointer))
4887 return 1;
4888
1020a5ab
RH
4889 return (regs_ever_live[regno]
4890 && !call_used_regs[regno]
4891 && !fixed_regs[regno]
4892 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4dd2ac2c
JH
4893}
4894
0903fcab
JH
4895/* Return number of registers to be saved on the stack. */
4896
4897static int
b96a374d 4898ix86_nsaved_regs (void)
0903fcab
JH
4899{
4900 int nregs = 0;
0903fcab
JH
4901 int regno;
4902
4dd2ac2c 4903 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 4904 if (ix86_save_reg (regno, true))
4dd2ac2c 4905 nregs++;
0903fcab
JH
4906 return nregs;
4907}
4908
4909/* Return the offset between two registers, one to be eliminated, and the other
4910 its replacement, at the start of a routine. */
4911
4912HOST_WIDE_INT
b96a374d 4913ix86_initial_elimination_offset (int from, int to)
0903fcab 4914{
4dd2ac2c
JH
4915 struct ix86_frame frame;
4916 ix86_compute_frame_layout (&frame);
564d80f4
JH
4917
4918 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 4919 return frame.hard_frame_pointer_offset;
564d80f4
JH
4920 else if (from == FRAME_POINTER_REGNUM
4921 && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 4922 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
4923 else
4924 {
d0396b79
NS
4925 gcc_assert (to == STACK_POINTER_REGNUM);
4926
4927 if (from == ARG_POINTER_REGNUM)
4dd2ac2c 4928 return frame.stack_pointer_offset;
d0396b79
NS
4929
4930 gcc_assert (from == FRAME_POINTER_REGNUM);
4931 return frame.stack_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
4932 }
4933}
4934
4dd2ac2c 4935/* Fill structure ix86_frame about frame of currently computed function. */
0f290768 4936
4dd2ac2c 4937static void
b96a374d 4938ix86_compute_frame_layout (struct ix86_frame *frame)
65954bd8 4939{
65954bd8 4940 HOST_WIDE_INT total_size;
95899b34 4941 unsigned int stack_alignment_needed;
b19ee4bd 4942 HOST_WIDE_INT offset;
95899b34 4943 unsigned int preferred_alignment;
4dd2ac2c 4944 HOST_WIDE_INT size = get_frame_size ();
65954bd8 4945
4dd2ac2c 4946 frame->nregs = ix86_nsaved_regs ();
564d80f4 4947 total_size = size;
65954bd8 4948
95899b34
RH
4949 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4950 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4951
d7394366
JH
4952 /* During reload iteration the amount of registers saved can change.
4953 Recompute the value as needed. Do not recompute when amount of registers
aabcd309 4954 didn't change as reload does multiple calls to the function and does not
d7394366
JH
4955 expect the decision to change within single iteration. */
4956 if (!optimize_size
4957 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
d9b40e8d
JH
4958 {
4959 int count = frame->nregs;
4960
d7394366 4961 cfun->machine->use_fast_prologue_epilogue_nregs = count;
d9b40e8d
JH
4962 /* The fast prologue uses move instead of push to save registers. This
4963 is significantly longer, but also executes faster as modern hardware
4964 can execute the moves in parallel, but can't do that for push/pop.
b96a374d 4965
d9b40e8d
JH
4966 Be careful about choosing what prologue to emit: When function takes
4967 many instructions to execute we may use slow version as well as in
4968 case function is known to be outside hot spot (this is known with
4969 feedback only). Weight the size of function by number of registers
4970 to save as it is cheap to use one or two push instructions but very
4971 slow to use many of them. */
4972 if (count)
4973 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4974 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4975 || (flag_branch_probabilities
4976 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4977 cfun->machine->use_fast_prologue_epilogue = false;
4978 else
4979 cfun->machine->use_fast_prologue_epilogue
4980 = !expensive_function_p (count);
4981 }
4982 if (TARGET_PROLOGUE_USING_MOVE
4983 && cfun->machine->use_fast_prologue_epilogue)
4984 frame->save_regs_using_mov = true;
4985 else
4986 frame->save_regs_using_mov = false;
4987
4988
9ba81eaa 4989 /* Skip return address and saved base pointer. */
4dd2ac2c
JH
4990 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4991
4992 frame->hard_frame_pointer_offset = offset;
564d80f4 4993
fcbfaa65
RK
4994 /* Do some sanity checking of stack_alignment_needed and
4995 preferred_alignment, since i386 port is the only using those features
f710504c 4996 that may break easily. */
564d80f4 4997
d0396b79
NS
4998 gcc_assert (!size || stack_alignment_needed);
4999 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5000 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5001 gcc_assert (stack_alignment_needed
5002 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
564d80f4 5003
4dd2ac2c
JH
5004 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5005 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
564d80f4 5006
4dd2ac2c
JH
5007 /* Register save area */
5008 offset += frame->nregs * UNITS_PER_WORD;
65954bd8 5009
8362f420
JH
5010 /* Va-arg area */
5011 if (ix86_save_varrargs_registers)
5012 {
5013 offset += X86_64_VARARGS_SIZE;
5014 frame->va_arg_size = X86_64_VARARGS_SIZE;
5015 }
5016 else
5017 frame->va_arg_size = 0;
5018
4dd2ac2c
JH
5019 /* Align start of frame for local function. */
5020 frame->padding1 = ((offset + stack_alignment_needed - 1)
5021 & -stack_alignment_needed) - offset;
f73ad30e 5022
4dd2ac2c 5023 offset += frame->padding1;
65954bd8 5024
4dd2ac2c
JH
5025 /* Frame pointer points here. */
5026 frame->frame_pointer_offset = offset;
54ff41b7 5027
4dd2ac2c 5028 offset += size;
65954bd8 5029
0b7ae565 5030 /* Add outgoing arguments area. Can be skipped if we eliminated
965514bd
JH
5031 all the function calls as dead code.
5032 Skipping is however impossible when function calls alloca. Alloca
5033 expander assumes that last current_function_outgoing_args_size
5034 of stack frame are unused. */
5035 if (ACCUMULATE_OUTGOING_ARGS
5bf5a10b
AO
5036 && (!current_function_is_leaf || current_function_calls_alloca
5037 || ix86_current_function_calls_tls_descriptor))
4dd2ac2c
JH
5038 {
5039 offset += current_function_outgoing_args_size;
5040 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5041 }
5042 else
5043 frame->outgoing_arguments_size = 0;
564d80f4 5044
002ff5bc
RH
5045 /* Align stack boundary. Only needed if we're calling another function
5046 or using alloca. */
5bf5a10b
AO
5047 if (!current_function_is_leaf || current_function_calls_alloca
5048 || ix86_current_function_calls_tls_descriptor)
0b7ae565
RH
5049 frame->padding2 = ((offset + preferred_alignment - 1)
5050 & -preferred_alignment) - offset;
5051 else
5052 frame->padding2 = 0;
4dd2ac2c
JH
5053
5054 offset += frame->padding2;
5055
5056 /* We've reached end of stack frame. */
5057 frame->stack_pointer_offset = offset;
5058
5059 /* Size prologue needs to allocate. */
5060 frame->to_allocate =
5061 (size + frame->padding1 + frame->padding2
8362f420 5062 + frame->outgoing_arguments_size + frame->va_arg_size);
4dd2ac2c 5063
b19ee4bd
JJ
5064 if ((!frame->to_allocate && frame->nregs <= 1)
5065 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
d9b40e8d
JH
5066 frame->save_regs_using_mov = false;
5067
a5b378d6 5068 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5bf5a10b
AO
5069 && current_function_is_leaf
5070 && !ix86_current_function_calls_tls_descriptor)
8362f420
JH
5071 {
5072 frame->red_zone_size = frame->to_allocate;
d9b40e8d
JH
5073 if (frame->save_regs_using_mov)
5074 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8362f420
JH
5075 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5076 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5077 }
5078 else
5079 frame->red_zone_size = 0;
5080 frame->to_allocate -= frame->red_zone_size;
5081 frame->stack_pointer_offset -= frame->red_zone_size;
4dd2ac2c
JH
5082#if 0
5083 fprintf (stderr, "nregs: %i\n", frame->nregs);
5084 fprintf (stderr, "size: %i\n", size);
5085 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5086 fprintf (stderr, "padding1: %i\n", frame->padding1);
8362f420 5087 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4dd2ac2c
JH
5088 fprintf (stderr, "padding2: %i\n", frame->padding2);
5089 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
8362f420 5090 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4dd2ac2c
JH
5091 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5092 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5093 frame->hard_frame_pointer_offset);
5094 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5095#endif
65954bd8
JL
5096}
5097
0903fcab
JH
5098/* Emit code to save registers in the prologue. */
5099
5100static void
b96a374d 5101ix86_emit_save_regs (void)
0903fcab 5102{
150cdc9e 5103 unsigned int regno;
0903fcab 5104 rtx insn;
0903fcab 5105
150cdc9e 5106 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
1020a5ab 5107 if (ix86_save_reg (regno, true))
0903fcab 5108 {
0d7d98ee 5109 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
0903fcab
JH
5110 RTX_FRAME_RELATED_P (insn) = 1;
5111 }
5112}
5113
c6036a37
JH
5114/* Emit code to save registers using MOV insns. First register
5115 is restored from POINTER + OFFSET. */
5116static void
b96a374d 5117ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
c6036a37 5118{
150cdc9e 5119 unsigned int regno;
c6036a37
JH
5120 rtx insn;
5121
5122 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5123 if (ix86_save_reg (regno, true))
5124 {
b72f00af
RK
5125 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5126 Pmode, offset),
c6036a37
JH
5127 gen_rtx_REG (Pmode, regno));
5128 RTX_FRAME_RELATED_P (insn) = 1;
5129 offset += UNITS_PER_WORD;
5130 }
5131}
5132
839a4992 5133/* Expand prologue or epilogue stack adjustment.
b19ee4bd
JJ
5134 The pattern exist to put a dependency on all ebp-based memory accesses.
5135 STYLE should be negative if instructions should be marked as frame related,
5136 zero if %r11 register is live and cannot be freely used and positive
5137 otherwise. */
5138
5139static void
5140pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5141{
5142 rtx insn;
5143
5144 if (! TARGET_64BIT)
5145 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5146 else if (x86_64_immediate_operand (offset, DImode))
5147 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5148 else
5149 {
5150 rtx r11;
5151 /* r11 is used by indirect sibcall return as well, set before the
5152 epilogue and used after the epilogue. ATM indirect sibcall
5153 shouldn't be used together with huge frame sizes in one
5154 function because of the frame_size check in sibcall.c. */
d0396b79 5155 gcc_assert (style);
b19ee4bd
JJ
5156 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5157 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5158 if (style < 0)
5159 RTX_FRAME_RELATED_P (insn) = 1;
5160 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5161 offset));
5162 }
5163 if (style < 0)
5164 RTX_FRAME_RELATED_P (insn) = 1;
5165}
5166
150cdc9e
RH
5167/* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5168
5169static rtx
5170ix86_internal_arg_pointer (void)
5171{
5172 if (FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5173 && DECL_NAME (current_function_decl)
5174 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5175 && DECL_FILE_SCOPE_P (current_function_decl))
5176 {
5177 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5178 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5179 }
5180 else
5181 return virtual_incoming_args_rtx;
5182}
5183
5184/* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5185 This is called from dwarf2out.c to emit call frame instructions
5186 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5187static void
5188ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5189{
5190 rtx unspec = SET_SRC (pattern);
5191 gcc_assert (GET_CODE (unspec) == UNSPEC);
5192
5193 switch (index)
5194 {
5195 case UNSPEC_REG_SAVE:
5196 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5197 SET_DEST (pattern));
5198 break;
5199 case UNSPEC_DEF_CFA:
5200 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5201 INTVAL (XVECEXP (unspec, 0, 0)));
5202 break;
5203 default:
5204 gcc_unreachable ();
5205 }
5206}
5207
0f290768 5208/* Expand the prologue into a bunch of separate insns. */
e075ae69
RH
5209
5210void
b96a374d 5211ix86_expand_prologue (void)
2a2ab3f9 5212{
564d80f4 5213 rtx insn;
bd09bdeb 5214 bool pic_reg_used;
4dd2ac2c 5215 struct ix86_frame frame;
c6036a37 5216 HOST_WIDE_INT allocate;
4dd2ac2c 5217
4977bab6 5218 ix86_compute_frame_layout (&frame);
79325812 5219
150cdc9e
RH
5220 if (cfun->machine->force_align_arg_pointer)
5221 {
5222 rtx x, y;
5223
5224 /* Grab the argument pointer. */
5225 x = plus_constant (stack_pointer_rtx, 4);
5226 y = cfun->machine->force_align_arg_pointer;
5227 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5228 RTX_FRAME_RELATED_P (insn) = 1;
5229
5230 /* The unwind info consists of two parts: install the fafp as the cfa,
5231 and record the fafp as the "save register" of the stack pointer.
5232 The later is there in order that the unwinder can see where it
5233 should restore the stack pointer across the and insn. */
5234 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5235 x = gen_rtx_SET (VOIDmode, y, x);
5236 RTX_FRAME_RELATED_P (x) = 1;
5237 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5238 UNSPEC_REG_SAVE);
5239 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5240 RTX_FRAME_RELATED_P (y) = 1;
5241 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5242 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5243 REG_NOTES (insn) = x;
5244
5245 /* Align the stack. */
5246 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5247 GEN_INT (-16)));
5248
5249 /* And here we cheat like madmen with the unwind info. We force the
5250 cfa register back to sp+4, which is exactly what it was at the
5251 start of the function. Re-pushing the return address results in
5252 the return at the same spot relative to the cfa, and thus is
5253 correct wrt the unwind info. */
5254 x = cfun->machine->force_align_arg_pointer;
5255 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5256 insn = emit_insn (gen_push (x));
5257 RTX_FRAME_RELATED_P (insn) = 1;
5258
5259 x = GEN_INT (4);
5260 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5261 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5262 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5263 REG_NOTES (insn) = x;
5264 }
5265
e075ae69
RH
5266 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5267 slower on all targets. Also sdb doesn't like it. */
e9a25f70 5268
2a2ab3f9
JVA
5269 if (frame_pointer_needed)
5270 {
564d80f4 5271 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
e075ae69 5272 RTX_FRAME_RELATED_P (insn) = 1;
e9a25f70 5273
564d80f4 5274 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
e075ae69 5275 RTX_FRAME_RELATED_P (insn) = 1;
2a2ab3f9
JVA
5276 }
5277
c6036a37 5278 allocate = frame.to_allocate;
c6036a37 5279
d9b40e8d 5280 if (!frame.save_regs_using_mov)
c6036a37
JH
5281 ix86_emit_save_regs ();
5282 else
5283 allocate += frame.nregs * UNITS_PER_WORD;
564d80f4 5284
d9b40e8d
JH
5285 /* When using red zone we may start register saving before allocating
5286 the stack frame saving one cycle of the prologue. */
5287 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5288 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5289 : stack_pointer_rtx,
5290 -frame.nregs * UNITS_PER_WORD);
5291
c6036a37 5292 if (allocate == 0)
8dfe5673 5293 ;
e323735c 5294 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
b19ee4bd
JJ
5295 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5296 GEN_INT (-allocate), -1);
79325812 5297 else
8dfe5673 5298 {
fe9f516f
RH
5299 /* Only valid for Win32. */
5300 rtx eax = gen_rtx_REG (SImode, 0);
5301 bool eax_live = ix86_eax_live_at_start_p ();
5fc94ac4 5302 rtx t;
e9a25f70 5303
d0396b79 5304 gcc_assert (!TARGET_64BIT);
e075ae69 5305
fe9f516f
RH
5306 if (eax_live)
5307 {
5308 emit_insn (gen_push (eax));
5309 allocate -= 4;
5310 }
5311
5fc94ac4 5312 emit_move_insn (eax, GEN_INT (allocate));
98417968 5313
b1177d69
KC
5314 insn = emit_insn (gen_allocate_stack_worker (eax));
5315 RTX_FRAME_RELATED_P (insn) = 1;
5fc94ac4
RH
5316 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5317 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5318 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5319 t, REG_NOTES (insn));
fe9f516f
RH
5320
5321 if (eax_live)
5322 {
ea5f7a19
RS
5323 if (frame_pointer_needed)
5324 t = plus_constant (hard_frame_pointer_rtx,
5325 allocate
5326 - frame.to_allocate
5327 - frame.nregs * UNITS_PER_WORD);
5328 else
5329 t = plus_constant (stack_pointer_rtx, allocate);
fe9f516f
RH
5330 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5331 }
e075ae69 5332 }
fe9f516f 5333
d9b40e8d 5334 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
c6036a37
JH
5335 {
5336 if (!frame_pointer_needed || !frame.to_allocate)
5337 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5338 else
5339 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5340 -frame.nregs * UNITS_PER_WORD);
5341 }
e9a25f70 5342
bd09bdeb
RH
5343 pic_reg_used = false;
5344 if (pic_offset_table_rtx
5345 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5346 || current_function_profile))
5347 {
5348 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5349
5350 if (alt_pic_reg_used != INVALID_REGNUM)
5351 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5352
5353 pic_reg_used = true;
5354 }
5355
e9a25f70 5356 if (pic_reg_used)
c8c03509 5357 {
7dcbf659
JH
5358 if (TARGET_64BIT)
5359 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5360 else
5361 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
c8c03509 5362
66edd3b4
RH
5363 /* Even with accurate pre-reload life analysis, we can wind up
5364 deleting all references to the pic register after reload.
5365 Consider if cross-jumping unifies two sides of a branch
d1f87653 5366 controlled by a comparison vs the only read from a global.
66edd3b4
RH
5367 In which case, allow the set_got to be deleted, though we're
5368 too late to do anything about the ebx save in the prologue. */
c8c03509
RH
5369 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5370 }
77a989d1 5371
66edd3b4
RH
5372 /* Prevent function calls from be scheduled before the call to mcount.
5373 In the pic_reg_used case, make sure that the got load isn't deleted. */
5374 if (current_function_profile)
5375 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
77a989d1
SC
5376}
5377
da2d1d3a
JH
5378/* Emit code to restore saved registers using MOV insns. First register
5379 is restored from POINTER + OFFSET. */
5380static void
72613dfa
JH
5381ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5382 int maybe_eh_return)
da2d1d3a
JH
5383{
5384 int regno;
72613dfa 5385 rtx base_address = gen_rtx_MEM (Pmode, pointer);
da2d1d3a 5386
4dd2ac2c 5387 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 5388 if (ix86_save_reg (regno, maybe_eh_return))
da2d1d3a 5389 {
72613dfa
JH
5390 /* Ensure that adjust_address won't be forced to produce pointer
5391 out of range allowed by x86-64 instruction set. */
5392 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5393 {
5394 rtx r11;
5395
5396 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5397 emit_move_insn (r11, GEN_INT (offset));
5398 emit_insn (gen_adddi3 (r11, r11, pointer));
5399 base_address = gen_rtx_MEM (Pmode, r11);
5400 offset = 0;
5401 }
4dd2ac2c 5402 emit_move_insn (gen_rtx_REG (Pmode, regno),
72613dfa 5403 adjust_address (base_address, Pmode, offset));
4dd2ac2c 5404 offset += UNITS_PER_WORD;
da2d1d3a
JH
5405 }
5406}
5407
0f290768 5408/* Restore function stack, frame, and registers. */
e9a25f70 5409
2a2ab3f9 5410void
b96a374d 5411ix86_expand_epilogue (int style)
2a2ab3f9 5412{
1c71e60e 5413 int regno;
fdb8a883 5414 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4dd2ac2c 5415 struct ix86_frame frame;
65954bd8 5416 HOST_WIDE_INT offset;
4dd2ac2c
JH
5417
5418 ix86_compute_frame_layout (&frame);
2a2ab3f9 5419
a4f31c00 5420 /* Calculate start of saved registers relative to ebp. Special care
84e306b4
RH
5421 must be taken for the normal return case of a function using
5422 eh_return: the eax and edx registers are marked as saved, but not
5423 restored along this path. */
5424 offset = frame.nregs;
5425 if (current_function_calls_eh_return && style != 2)
5426 offset -= 2;
5427 offset *= -UNITS_PER_WORD;
2a2ab3f9 5428
fdb8a883
JW
5429 /* If we're only restoring one register and sp is not valid then
5430 using a move instruction to restore the register since it's
0f290768 5431 less work than reloading sp and popping the register.
da2d1d3a
JH
5432
5433 The default code result in stack adjustment using add/lea instruction,
5434 while this code results in LEAVE instruction (or discrete equivalent),
5435 so it is profitable in some other cases as well. Especially when there
5436 are no registers to restore. We also use this code when TARGET_USE_LEAVE
d1f87653 5437 and there is exactly one register to pop. This heuristic may need some
da2d1d3a 5438 tuning in future. */
4dd2ac2c 5439 if ((!sp_valid && frame.nregs <= 1)
2ab0437e 5440 || (TARGET_EPILOGUE_USING_MOVE
d9b40e8d 5441 && cfun->machine->use_fast_prologue_epilogue
c6036a37 5442 && (frame.nregs > 1 || frame.to_allocate))
4dd2ac2c 5443 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
2ab0437e 5444 || (frame_pointer_needed && TARGET_USE_LEAVE
d9b40e8d
JH
5445 && cfun->machine->use_fast_prologue_epilogue
5446 && frame.nregs == 1)
2ab0437e 5447 || current_function_calls_eh_return)
2a2ab3f9 5448 {
da2d1d3a
JH
5449 /* Restore registers. We can use ebp or esp to address the memory
5450 locations. If both are available, default to ebp, since offsets
5451 are known to be small. Only exception is esp pointing directly to the
5452 end of block of saved registers, where we may simplify addressing
5453 mode. */
5454
4dd2ac2c 5455 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
1020a5ab
RH
5456 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5457 frame.to_allocate, style == 2);
da2d1d3a 5458 else
1020a5ab
RH
5459 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5460 offset, style == 2);
5461
5462 /* eh_return epilogues need %ecx added to the stack pointer. */
5463 if (style == 2)
5464 {
5465 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
2a2ab3f9 5466
1020a5ab
RH
5467 if (frame_pointer_needed)
5468 {
5469 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5470 tmp = plus_constant (tmp, UNITS_PER_WORD);
5471 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5472
5473 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5474 emit_move_insn (hard_frame_pointer_rtx, tmp);
5475
b19ee4bd
JJ
5476 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5477 const0_rtx, style);
1020a5ab
RH
5478 }
5479 else
5480 {
5481 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5482 tmp = plus_constant (tmp, (frame.to_allocate
5483 + frame.nregs * UNITS_PER_WORD));
5484 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5485 }
5486 }
5487 else if (!frame_pointer_needed)
b19ee4bd
JJ
5488 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5489 GEN_INT (frame.to_allocate
5490 + frame.nregs * UNITS_PER_WORD),
5491 style);
0f290768 5492 /* If not an i386, mov & pop is faster than "leave". */
d9b40e8d
JH
5493 else if (TARGET_USE_LEAVE || optimize_size
5494 || !cfun->machine->use_fast_prologue_epilogue)
8362f420 5495 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
c8c5cb99 5496 else
2a2ab3f9 5497 {
b19ee4bd
JJ
5498 pro_epilogue_adjust_stack (stack_pointer_rtx,
5499 hard_frame_pointer_rtx,
5500 const0_rtx, style);
8362f420
JH
5501 if (TARGET_64BIT)
5502 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5503 else
5504 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
e9a25f70
JL
5505 }
5506 }
1c71e60e 5507 else
68f654ec 5508 {
1c71e60e
JH
5509 /* First step is to deallocate the stack frame so that we can
5510 pop the registers. */
5511 if (!sp_valid)
5512 {
d0396b79 5513 gcc_assert (frame_pointer_needed);
b19ee4bd
JJ
5514 pro_epilogue_adjust_stack (stack_pointer_rtx,
5515 hard_frame_pointer_rtx,
5516 GEN_INT (offset), style);
1c71e60e 5517 }
4dd2ac2c 5518 else if (frame.to_allocate)
b19ee4bd
JJ
5519 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5520 GEN_INT (frame.to_allocate), style);
1c71e60e 5521
4dd2ac2c 5522 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 5523 if (ix86_save_reg (regno, false))
8362f420
JH
5524 {
5525 if (TARGET_64BIT)
5526 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5527 else
5528 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5529 }
4dd2ac2c 5530 if (frame_pointer_needed)
8362f420 5531 {
f5143c46 5532 /* Leave results in shorter dependency chains on CPUs that are
2ab0437e
JH
5533 able to grok it fast. */
5534 if (TARGET_USE_LEAVE)
5535 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5536 else if (TARGET_64BIT)
8362f420
JH
5537 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5538 else
5539 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5540 }
68f654ec 5541 }
68f654ec 5542
150cdc9e
RH
5543 if (cfun->machine->force_align_arg_pointer)
5544 {
5545 emit_insn (gen_addsi3 (stack_pointer_rtx,
5546 cfun->machine->force_align_arg_pointer,
5547 GEN_INT (-4)));
5548 }
5549
cbbf65e0 5550 /* Sibcall epilogues don't want a return instruction. */
1020a5ab 5551 if (style == 0)
cbbf65e0
RH
5552 return;
5553
2a2ab3f9
JVA
5554 if (current_function_pops_args && current_function_args_size)
5555 {
e075ae69 5556 rtx popc = GEN_INT (current_function_pops_args);
2a2ab3f9 5557
b8c752c8
UD
5558 /* i386 can only pop 64K bytes. If asked to pop more, pop
5559 return address, do explicit add, and jump indirectly to the
0f290768 5560 caller. */
2a2ab3f9 5561
b8c752c8 5562 if (current_function_pops_args >= 65536)
2a2ab3f9 5563 {
e075ae69 5564 rtx ecx = gen_rtx_REG (SImode, 2);
e9a25f70 5565
b19ee4bd 5566 /* There is no "pascal" calling convention in 64bit ABI. */
d0396b79 5567 gcc_assert (!TARGET_64BIT);
8362f420 5568
e075ae69
RH
5569 emit_insn (gen_popsi1 (ecx));
5570 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
11837777 5571 emit_jump_insn (gen_return_indirect_internal (ecx));
e9a25f70 5572 }
79325812 5573 else
e075ae69
RH
5574 emit_jump_insn (gen_return_pop_internal (popc));
5575 }
5576 else
5577 emit_jump_insn (gen_return_internal ());
5578}
bd09bdeb
RH
5579
5580/* Reset from the function's potential modifications. */
5581
5582static void
b96a374d
AJ
5583ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5584 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
bd09bdeb
RH
5585{
5586 if (pic_offset_table_rtx)
5587 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5588}
e075ae69
RH
5589\f
5590/* Extract the parts of an RTL expression that is a valid memory address
b446e5a2
JH
5591 for an instruction. Return 0 if the structure of the address is
5592 grossly off. Return -1 if the address contains ASHIFT, so it is not
74dc3e94 5593 strictly valid, but still used for computing length of lea instruction. */
e075ae69 5594
8fe75e43 5595int
8d531ab9 5596ix86_decompose_address (rtx addr, struct ix86_address *out)
e075ae69 5597{
7c93c2cc
PB
5598 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
5599 rtx base_reg, index_reg;
e075ae69
RH
5600 HOST_WIDE_INT scale = 1;
5601 rtx scale_rtx = NULL_RTX;
b446e5a2 5602 int retval = 1;
74dc3e94 5603 enum ix86_address_seg seg = SEG_DEFAULT;
e075ae69 5604
90e4e4c5 5605 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
e075ae69
RH
5606 base = addr;
5607 else if (GET_CODE (addr) == PLUS)
5608 {
74dc3e94
RH
5609 rtx addends[4], op;
5610 int n = 0, i;
e075ae69 5611
74dc3e94
RH
5612 op = addr;
5613 do
e075ae69 5614 {
74dc3e94
RH
5615 if (n >= 4)
5616 return 0;
5617 addends[n++] = XEXP (op, 1);
5618 op = XEXP (op, 0);
2a2ab3f9 5619 }
74dc3e94
RH
5620 while (GET_CODE (op) == PLUS);
5621 if (n >= 4)
5622 return 0;
5623 addends[n] = op;
5624
5625 for (i = n; i >= 0; --i)
e075ae69 5626 {
74dc3e94
RH
5627 op = addends[i];
5628 switch (GET_CODE (op))
5629 {
5630 case MULT:
5631 if (index)
5632 return 0;
5633 index = XEXP (op, 0);
5634 scale_rtx = XEXP (op, 1);
5635 break;
5636
5637 case UNSPEC:
5638 if (XINT (op, 1) == UNSPEC_TP
5639 && TARGET_TLS_DIRECT_SEG_REFS
5640 && seg == SEG_DEFAULT)
5641 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5642 else
5643 return 0;
5644 break;
5645
5646 case REG:
5647 case SUBREG:
5648 if (!base)
5649 base = op;
5650 else if (!index)
5651 index = op;
5652 else
5653 return 0;
5654 break;
5655
5656 case CONST:
5657 case CONST_INT:
5658 case SYMBOL_REF:
5659 case LABEL_REF:
5660 if (disp)
5661 return 0;
5662 disp = op;
5663 break;
5664
5665 default:
5666 return 0;
5667 }
e075ae69 5668 }
e075ae69
RH
5669 }
5670 else if (GET_CODE (addr) == MULT)
5671 {
5672 index = XEXP (addr, 0); /* index*scale */
5673 scale_rtx = XEXP (addr, 1);
5674 }
5675 else if (GET_CODE (addr) == ASHIFT)
5676 {
5677 rtx tmp;
5678
5679 /* We're called for lea too, which implements ashift on occasion. */
5680 index = XEXP (addr, 0);
5681 tmp = XEXP (addr, 1);
5682 if (GET_CODE (tmp) != CONST_INT)
b446e5a2 5683 return 0;
e075ae69
RH
5684 scale = INTVAL (tmp);
5685 if ((unsigned HOST_WIDE_INT) scale > 3)
b446e5a2 5686 return 0;
e075ae69 5687 scale = 1 << scale;
b446e5a2 5688 retval = -1;
2a2ab3f9 5689 }
2a2ab3f9 5690 else
e075ae69
RH
5691 disp = addr; /* displacement */
5692
5693 /* Extract the integral value of scale. */
5694 if (scale_rtx)
e9a25f70 5695 {
e075ae69 5696 if (GET_CODE (scale_rtx) != CONST_INT)
b446e5a2 5697 return 0;
e075ae69 5698 scale = INTVAL (scale_rtx);
e9a25f70 5699 }
3b3c6a3f 5700
7c93c2cc
PB
5701 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
5702 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
5703
74dc3e94 5704 /* Allow arg pointer and stack pointer as index if there is not scaling. */
7c93c2cc
PB
5705 if (base_reg && index_reg && scale == 1
5706 && (index_reg == arg_pointer_rtx
5707 || index_reg == frame_pointer_rtx
5708 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
e075ae69 5709 {
7c93c2cc
PB
5710 rtx tmp;
5711 tmp = base, base = index, index = tmp;
5712 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
e075ae69
RH
5713 }
5714
5715 /* Special case: %ebp cannot be encoded as a base without a displacement. */
7c93c2cc
PB
5716 if ((base_reg == hard_frame_pointer_rtx
5717 || base_reg == frame_pointer_rtx
5718 || base_reg == arg_pointer_rtx) && !disp)
e075ae69
RH
5719 disp = const0_rtx;
5720
5721 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5722 Avoid this by transforming to [%esi+0]. */
9e555526 5723 if (ix86_tune == PROCESSOR_K6 && !optimize_size
7c93c2cc
PB
5724 && base_reg && !index_reg && !disp
5725 && REG_P (base_reg)
5726 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
e075ae69
RH
5727 disp = const0_rtx;
5728
5729 /* Special case: encode reg+reg instead of reg*2. */
5730 if (!base && index && scale && scale == 2)
7c93c2cc 5731 base = index, base_reg = index_reg, scale = 1;
0f290768 5732
e075ae69
RH
5733 /* Special case: scaling cannot be encoded without base or displacement. */
5734 if (!base && !disp && index && scale != 1)
5735 disp = const0_rtx;
5736
5737 out->base = base;
5738 out->index = index;
5739 out->disp = disp;
5740 out->scale = scale;
74dc3e94 5741 out->seg = seg;
3b3c6a3f 5742
b446e5a2 5743 return retval;
e075ae69 5744}
01329426
JH
5745\f
5746/* Return cost of the memory address x.
5747 For i386, it is better to use a complex address than let gcc copy
5748 the address into a reg and make a new pseudo. But not if the address
5749 requires to two regs - that would mean more pseudos with longer
5750 lifetimes. */
dcefdf67 5751static int
b96a374d 5752ix86_address_cost (rtx x)
01329426
JH
5753{
5754 struct ix86_address parts;
5755 int cost = 1;
d0396b79 5756 int ok = ix86_decompose_address (x, &parts);
3b3c6a3f 5757
d0396b79 5758 gcc_assert (ok);
01329426 5759
7c93c2cc
PB
5760 if (parts.base && GET_CODE (parts.base) == SUBREG)
5761 parts.base = SUBREG_REG (parts.base);
5762 if (parts.index && GET_CODE (parts.index) == SUBREG)
5763 parts.index = SUBREG_REG (parts.index);
5764
01329426
JH
5765 /* More complex memory references are better. */
5766 if (parts.disp && parts.disp != const0_rtx)
5767 cost--;
74dc3e94
RH
5768 if (parts.seg != SEG_DEFAULT)
5769 cost--;
01329426
JH
5770
5771 /* Attempt to minimize number of registers in the address. */
5772 if ((parts.base
5773 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5774 || (parts.index
5775 && (!REG_P (parts.index)
5776 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5777 cost++;
5778
5779 if (parts.base
5780 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5781 && parts.index
5782 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5783 && parts.base != parts.index)
5784 cost++;
5785
5786 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5787 since it's predecode logic can't detect the length of instructions
5788 and it degenerates to vector decoded. Increase cost of such
5789 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
0f290768 5790 to split such addresses or even refuse such addresses at all.
01329426
JH
5791
5792 Following addressing modes are affected:
5793 [base+scale*index]
5794 [scale*index+disp]
5795 [base+index]
0f290768 5796
01329426
JH
5797 The first and last case may be avoidable by explicitly coding the zero in
5798 memory address, but I don't have AMD-K6 machine handy to check this
5799 theory. */
5800
5801 if (TARGET_K6
5802 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5803 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5804 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5805 cost += 10;
0f290768 5806
01329426
JH
5807 return cost;
5808}
5809\f
b949ea8b
JW
5810/* If X is a machine specific address (i.e. a symbol or label being
5811 referenced as a displacement from the GOT implemented using an
5812 UNSPEC), then return the base term. Otherwise return X. */
5813
5814rtx
b96a374d 5815ix86_find_base_term (rtx x)
b949ea8b
JW
5816{
5817 rtx term;
5818
6eb791fc
JH
5819 if (TARGET_64BIT)
5820 {
5821 if (GET_CODE (x) != CONST)
5822 return x;
5823 term = XEXP (x, 0);
5824 if (GET_CODE (term) == PLUS
5825 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5826 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5827 term = XEXP (term, 0);
5828 if (GET_CODE (term) != UNSPEC
8ee41eaf 5829 || XINT (term, 1) != UNSPEC_GOTPCREL)
6eb791fc
JH
5830 return x;
5831
5832 term = XVECEXP (term, 0, 0);
5833
5834 if (GET_CODE (term) != SYMBOL_REF
5835 && GET_CODE (term) != LABEL_REF)
5836 return x;
5837
5838 return term;
5839 }
5840
69bd9368 5841 term = ix86_delegitimize_address (x);
b949ea8b
JW
5842
5843 if (GET_CODE (term) != SYMBOL_REF
5844 && GET_CODE (term) != LABEL_REF)
5845 return x;
5846
5847 return term;
5848}
828a4fe4
MS
5849
5850/* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
5851 this is used for to form addresses to local data when -fPIC is in
5852 use. */
5853
5854static bool
5855darwin_local_data_pic (rtx disp)
5856{
5857 if (GET_CODE (disp) == MINUS)
5858 {
5859 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5860 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5861 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5862 {
5863 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5864 if (! strcmp (sym_name, "<pic base>"))
5865 return true;
5866 }
5867 }
5868
5869 return false;
5870}
b949ea8b 5871\f
f996902d
RH
5872/* Determine if a given RTX is a valid constant. We already know this
5873 satisfies CONSTANT_P. */
5874
5875bool
b96a374d 5876legitimate_constant_p (rtx x)
f996902d 5877{
f996902d
RH
5878 switch (GET_CODE (x))
5879 {
f996902d 5880 case CONST:
1e19ac74 5881 x = XEXP (x, 0);
f996902d 5882
1e19ac74 5883 if (GET_CODE (x) == PLUS)
828a4fe4 5884 {
1e19ac74 5885 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
828a4fe4 5886 return false;
1e19ac74 5887 x = XEXP (x, 0);
828a4fe4
MS
5888 }
5889
1e19ac74 5890 if (TARGET_MACHO && darwin_local_data_pic (x))
828a4fe4
MS
5891 return true;
5892
f996902d 5893 /* Only some unspecs are valid as "constants". */
1e19ac74
RH
5894 if (GET_CODE (x) == UNSPEC)
5895 switch (XINT (x, 1))
f996902d 5896 {
7dcbf659
JH
5897 case UNSPEC_GOTOFF:
5898 return TARGET_64BIT;
f996902d 5899 case UNSPEC_TPOFF:
cb0e3e3f 5900 case UNSPEC_NTPOFF:
fd4aca96
RH
5901 x = XVECEXP (x, 0, 0);
5902 return (GET_CODE (x) == SYMBOL_REF
5903 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
cb0e3e3f 5904 case UNSPEC_DTPOFF:
fd4aca96
RH
5905 x = XVECEXP (x, 0, 0);
5906 return (GET_CODE (x) == SYMBOL_REF
5907 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
f996902d
RH
5908 default:
5909 return false;
5910 }
1e19ac74
RH
5911
5912 /* We must have drilled down to a symbol. */
fd4aca96
RH
5913 if (GET_CODE (x) == LABEL_REF)
5914 return true;
5915 if (GET_CODE (x) != SYMBOL_REF)
1e19ac74
RH
5916 return false;
5917 /* FALLTHRU */
5918
5919 case SYMBOL_REF:
5920 /* TLS symbols are never valid. */
fd4aca96 5921 if (SYMBOL_REF_TLS_MODEL (x))
1e19ac74 5922 return false;
f996902d
RH
5923 break;
5924
5925 default:
5926 break;
5927 }
5928
5929 /* Otherwise we handle everything else in the move patterns. */
5930 return true;
5931}
5932
3a04ff64
RH
5933/* Determine if it's legal to put X into the constant pool. This
5934 is not possible for the address of thread-local symbols, which
5935 is checked above. */
5936
5937static bool
b96a374d 5938ix86_cannot_force_const_mem (rtx x)
3a04ff64
RH
5939{
5940 return !legitimate_constant_p (x);
5941}
5942
f996902d
RH
5943/* Determine if a given RTX is a valid constant address. */
5944
5945bool
b96a374d 5946constant_address_p (rtx x)
f996902d 5947{
a94f136b 5948 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
f996902d
RH
5949}
5950
5951/* Nonzero if the constant value X is a legitimate general operand
fce5a9f2 5952 when generating PIC code. It is given that flag_pic is on and
f996902d
RH
5953 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5954
5955bool
b96a374d 5956legitimate_pic_operand_p (rtx x)
f996902d
RH
5957{
5958 rtx inner;
5959
5960 switch (GET_CODE (x))
5961 {
5962 case CONST:
5963 inner = XEXP (x, 0);
7dcbf659
JH
5964 if (GET_CODE (inner) == PLUS
5965 && GET_CODE (XEXP (inner, 1)) == CONST_INT)
5966 inner = XEXP (inner, 0);
f996902d
RH
5967
5968 /* Only some unspecs are valid as "constants". */
5969 if (GET_CODE (inner) == UNSPEC)
5970 switch (XINT (inner, 1))
5971 {
7dcbf659
JH
5972 case UNSPEC_GOTOFF:
5973 return TARGET_64BIT;
f996902d 5974 case UNSPEC_TPOFF:
fd4aca96
RH
5975 x = XVECEXP (inner, 0, 0);
5976 return (GET_CODE (x) == SYMBOL_REF
5977 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
f996902d
RH
5978 default:
5979 return false;
5980 }
5efb1046 5981 /* FALLTHRU */
f996902d
RH
5982
5983 case SYMBOL_REF:
5984 case LABEL_REF:
5985 return legitimate_pic_address_disp_p (x);
5986
5987 default:
5988 return true;
5989 }
5990}
5991
e075ae69
RH
5992/* Determine if a given CONST RTX is a valid memory displacement
5993 in PIC mode. */
0f290768 5994
59be65f6 5995int
8d531ab9 5996legitimate_pic_address_disp_p (rtx disp)
91bb873f 5997{
f996902d
RH
5998 bool saw_plus;
5999
6eb791fc
JH
6000 /* In 64bit mode we can allow direct addresses of symbols and labels
6001 when they are not dynamic symbols. */
c05dbe81
JH
6002 if (TARGET_64BIT)
6003 {
fd4aca96
RH
6004 rtx op0 = disp, op1;
6005
6006 switch (GET_CODE (disp))
a132b6a8 6007 {
fd4aca96
RH
6008 case LABEL_REF:
6009 return true;
6010
6011 case CONST:
6012 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6013 break;
6014 op0 = XEXP (XEXP (disp, 0), 0);
6015 op1 = XEXP (XEXP (disp, 0), 1);
6016 if (GET_CODE (op1) != CONST_INT
6017 || INTVAL (op1) >= 16*1024*1024
6018 || INTVAL (op1) < -16*1024*1024)
6019 break;
6020 if (GET_CODE (op0) == LABEL_REF)
6021 return true;
6022 if (GET_CODE (op0) != SYMBOL_REF)
6023 break;
6024 /* FALLTHRU */
a132b6a8 6025
fd4aca96 6026 case SYMBOL_REF:
a132b6a8 6027 /* TLS references should always be enclosed in UNSPEC. */
fd4aca96
RH
6028 if (SYMBOL_REF_TLS_MODEL (op0))
6029 return false;
6030 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
6031 return true;
6032 break;
6033
6034 default:
6035 break;
a132b6a8 6036 }
c05dbe81 6037 }
91bb873f
RH
6038 if (GET_CODE (disp) != CONST)
6039 return 0;
6040 disp = XEXP (disp, 0);
6041
6eb791fc
JH
6042 if (TARGET_64BIT)
6043 {
6044 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6045 of GOT tables. We should not need these anyway. */
6046 if (GET_CODE (disp) != UNSPEC
7dcbf659
JH
6047 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6048 && XINT (disp, 1) != UNSPEC_GOTOFF))
6eb791fc
JH
6049 return 0;
6050
6051 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6052 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6053 return 0;
6054 return 1;
6055 }
6056
f996902d 6057 saw_plus = false;
91bb873f
RH
6058 if (GET_CODE (disp) == PLUS)
6059 {
6060 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
6061 return 0;
6062 disp = XEXP (disp, 0);
f996902d 6063 saw_plus = true;
91bb873f
RH
6064 }
6065
828a4fe4
MS
6066 if (TARGET_MACHO && darwin_local_data_pic (disp))
6067 return 1;
b069de3b 6068
8ee41eaf 6069 if (GET_CODE (disp) != UNSPEC)
91bb873f
RH
6070 return 0;
6071
623fe810
RH
6072 switch (XINT (disp, 1))
6073 {
8ee41eaf 6074 case UNSPEC_GOT:
f996902d
RH
6075 if (saw_plus)
6076 return false;
623fe810 6077 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
8ee41eaf 6078 case UNSPEC_GOTOFF:
47efdea4
JH
6079 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6080 While ABI specify also 32bit relocation but we don't produce it in
6081 small PIC model at all. */
6082 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6083 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6084 && !TARGET_64BIT)
799b33a0
JH
6085 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6086 return false;
f996902d 6087 case UNSPEC_GOTTPOFF:
dea73790
JJ
6088 case UNSPEC_GOTNTPOFF:
6089 case UNSPEC_INDNTPOFF:
f996902d
RH
6090 if (saw_plus)
6091 return false;
fd4aca96
RH
6092 disp = XVECEXP (disp, 0, 0);
6093 return (GET_CODE (disp) == SYMBOL_REF
6094 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
f996902d 6095 case UNSPEC_NTPOFF:
fd4aca96
RH
6096 disp = XVECEXP (disp, 0, 0);
6097 return (GET_CODE (disp) == SYMBOL_REF
6098 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
f996902d 6099 case UNSPEC_DTPOFF:
fd4aca96
RH
6100 disp = XVECEXP (disp, 0, 0);
6101 return (GET_CODE (disp) == SYMBOL_REF
6102 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
623fe810 6103 }
fce5a9f2 6104
623fe810 6105 return 0;
91bb873f
RH
6106}
6107
e075ae69
RH
6108/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6109 memory address for an instruction. The MODE argument is the machine mode
6110 for the MEM expression that wants to use this address.
6111
6112 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6113 convert common non-canonical forms to canonical form so that they will
6114 be recognized. */
6115
3b3c6a3f 6116int
8d531ab9 6117legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
3b3c6a3f 6118{
e075ae69
RH
6119 struct ix86_address parts;
6120 rtx base, index, disp;
6121 HOST_WIDE_INT scale;
6122 const char *reason = NULL;
6123 rtx reason_rtx = NULL_RTX;
3b3c6a3f
MM
6124
6125 if (TARGET_DEBUG_ADDR)
6126 {
6127 fprintf (stderr,
e9a25f70 6128 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
3b3c6a3f 6129 GET_MODE_NAME (mode), strict);
3b3c6a3f
MM
6130 debug_rtx (addr);
6131 }
6132
b446e5a2 6133 if (ix86_decompose_address (addr, &parts) <= 0)
3b3c6a3f 6134 {
e075ae69 6135 reason = "decomposition failed";
50e60bc3 6136 goto report_error;
3b3c6a3f
MM
6137 }
6138
e075ae69
RH
6139 base = parts.base;
6140 index = parts.index;
6141 disp = parts.disp;
6142 scale = parts.scale;
91f0226f 6143
e075ae69 6144 /* Validate base register.
e9a25f70 6145
7c93c2cc
PB
6146 Don't allow SUBREG's that span more than a word here. It can lead to spill
6147 failures when the base is one word out of a two word structure, which is
6148 represented internally as a DImode int. */
e9a25f70 6149
3b3c6a3f
MM
6150 if (base)
6151 {
7c93c2cc 6152 rtx reg;
e075ae69 6153 reason_rtx = base;
7c93c2cc
PB
6154
6155 if (REG_P (base))
6156 reg = base;
6157 else if (GET_CODE (base) == SUBREG
6158 && REG_P (SUBREG_REG (base))
6159 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6160 <= UNITS_PER_WORD)
6161 reg = SUBREG_REG (base);
6162 else
3b3c6a3f 6163 {
e075ae69 6164 reason = "base is not a register";
50e60bc3 6165 goto report_error;
3b3c6a3f
MM
6166 }
6167
c954bd01
RH
6168 if (GET_MODE (base) != Pmode)
6169 {
e075ae69 6170 reason = "base is not in Pmode";
50e60bc3 6171 goto report_error;
c954bd01
RH
6172 }
6173
7c93c2cc
PB
6174 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6175 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
3b3c6a3f 6176 {
e075ae69 6177 reason = "base is not valid";
50e60bc3 6178 goto report_error;
3b3c6a3f
MM
6179 }
6180 }
6181
e075ae69 6182 /* Validate index register.
e9a25f70 6183
7c93c2cc 6184 Don't allow SUBREG's that span more than a word here -- same as above. */
e075ae69
RH
6185
6186 if (index)
3b3c6a3f 6187 {
7c93c2cc 6188 rtx reg;
e075ae69
RH
6189 reason_rtx = index;
6190
7c93c2cc
PB
6191 if (REG_P (index))
6192 reg = index;
6193 else if (GET_CODE (index) == SUBREG
6194 && REG_P (SUBREG_REG (index))
6195 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6196 <= UNITS_PER_WORD)
6197 reg = SUBREG_REG (index);
6198 else
3b3c6a3f 6199 {
e075ae69 6200 reason = "index is not a register";
50e60bc3 6201 goto report_error;
3b3c6a3f
MM
6202 }
6203
e075ae69 6204 if (GET_MODE (index) != Pmode)
c954bd01 6205 {
e075ae69 6206 reason = "index is not in Pmode";
50e60bc3 6207 goto report_error;
c954bd01
RH
6208 }
6209
7c93c2cc
PB
6210 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6211 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
3b3c6a3f 6212 {
e075ae69 6213 reason = "index is not valid";
50e60bc3 6214 goto report_error;
3b3c6a3f
MM
6215 }
6216 }
3b3c6a3f 6217
e075ae69
RH
6218 /* Validate scale factor. */
6219 if (scale != 1)
3b3c6a3f 6220 {
e075ae69
RH
6221 reason_rtx = GEN_INT (scale);
6222 if (!index)
3b3c6a3f 6223 {
e075ae69 6224 reason = "scale without index";
50e60bc3 6225 goto report_error;
3b3c6a3f
MM
6226 }
6227
e075ae69 6228 if (scale != 2 && scale != 4 && scale != 8)
3b3c6a3f 6229 {
e075ae69 6230 reason = "scale is not a valid multiplier";
50e60bc3 6231 goto report_error;
3b3c6a3f
MM
6232 }
6233 }
6234
91bb873f 6235 /* Validate displacement. */
3b3c6a3f
MM
6236 if (disp)
6237 {
e075ae69
RH
6238 reason_rtx = disp;
6239
f996902d
RH
6240 if (GET_CODE (disp) == CONST
6241 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6242 switch (XINT (XEXP (disp, 0), 1))
6243 {
47efdea4
JH
6244 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6245 used. While ABI specify also 32bit relocations, we don't produce
6246 them at all and use IP relative instead. */
f996902d
RH
6247 case UNSPEC_GOT:
6248 case UNSPEC_GOTOFF:
47efdea4
JH
6249 gcc_assert (flag_pic);
6250 if (!TARGET_64BIT)
6251 goto is_legitimate_pic;
6252 reason = "64bit address unspec";
6253 goto report_error;
6254
f996902d 6255 case UNSPEC_GOTPCREL:
d0396b79 6256 gcc_assert (flag_pic);
f996902d
RH
6257 goto is_legitimate_pic;
6258
6259 case UNSPEC_GOTTPOFF:
dea73790
JJ
6260 case UNSPEC_GOTNTPOFF:
6261 case UNSPEC_INDNTPOFF:
f996902d
RH
6262 case UNSPEC_NTPOFF:
6263 case UNSPEC_DTPOFF:
6264 break;
6265
6266 default:
6267 reason = "invalid address unspec";
6268 goto report_error;
6269 }
6270
b069de3b
SS
6271 else if (flag_pic && (SYMBOLIC_CONST (disp)
6272#if TARGET_MACHO
6273 && !machopic_operand_p (disp)
6274#endif
6275 ))
3b3c6a3f 6276 {
f996902d 6277 is_legitimate_pic:
0d7d98ee
JH
6278 if (TARGET_64BIT && (index || base))
6279 {
75d38379
JJ
6280 /* foo@dtpoff(%rX) is ok. */
6281 if (GET_CODE (disp) != CONST
6282 || GET_CODE (XEXP (disp, 0)) != PLUS
6283 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6284 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6285 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6286 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6287 {
6288 reason = "non-constant pic memory reference";
6289 goto report_error;
6290 }
0d7d98ee 6291 }
75d38379 6292 else if (! legitimate_pic_address_disp_p (disp))
91bb873f 6293 {
e075ae69 6294 reason = "displacement is an invalid pic construct";
50e60bc3 6295 goto report_error;
91bb873f
RH
6296 }
6297
4e9efe54 6298 /* This code used to verify that a symbolic pic displacement
0f290768
KH
6299 includes the pic_offset_table_rtx register.
6300
4e9efe54
JH
6301 While this is good idea, unfortunately these constructs may
6302 be created by "adds using lea" optimization for incorrect
6303 code like:
6304
6305 int a;
6306 int foo(int i)
6307 {
6308 return *(&a+i);
6309 }
6310
50e60bc3 6311 This code is nonsensical, but results in addressing
4e9efe54 6312 GOT table with pic_offset_table_rtx base. We can't
f710504c 6313 just refuse it easily, since it gets matched by
4e9efe54
JH
6314 "addsi3" pattern, that later gets split to lea in the
6315 case output register differs from input. While this
6316 can be handled by separate addsi pattern for this case
6317 that never results in lea, this seems to be easier and
6318 correct fix for crash to disable this test. */
3b3c6a3f 6319 }
a94f136b
JH
6320 else if (GET_CODE (disp) != LABEL_REF
6321 && GET_CODE (disp) != CONST_INT
6322 && (GET_CODE (disp) != CONST
6323 || !legitimate_constant_p (disp))
6324 && (GET_CODE (disp) != SYMBOL_REF
6325 || !legitimate_constant_p (disp)))
f996902d
RH
6326 {
6327 reason = "displacement is not constant";
6328 goto report_error;
6329 }
8fe75e43
RH
6330 else if (TARGET_64BIT
6331 && !x86_64_immediate_operand (disp, VOIDmode))
c05dbe81
JH
6332 {
6333 reason = "displacement is out of range";
6334 goto report_error;
6335 }
3b3c6a3f
MM
6336 }
6337
e075ae69 6338 /* Everything looks valid. */
3b3c6a3f 6339 if (TARGET_DEBUG_ADDR)
e075ae69 6340 fprintf (stderr, "Success.\n");
3b3c6a3f 6341 return TRUE;
e075ae69 6342
5bf0ebab 6343 report_error:
e075ae69
RH
6344 if (TARGET_DEBUG_ADDR)
6345 {
6346 fprintf (stderr, "Error: %s\n", reason);
6347 debug_rtx (reason_rtx);
6348 }
6349 return FALSE;
3b3c6a3f 6350}
3b3c6a3f 6351\f
569b7f6a 6352/* Return a unique alias set for the GOT. */
55efb413 6353
0f290768 6354static HOST_WIDE_INT
b96a374d 6355ix86_GOT_alias_set (void)
55efb413 6356{
5bf0ebab
RH
6357 static HOST_WIDE_INT set = -1;
6358 if (set == -1)
6359 set = new_alias_set ();
6360 return set;
0f290768 6361}
55efb413 6362
3b3c6a3f
MM
6363/* Return a legitimate reference for ORIG (an address) using the
6364 register REG. If REG is 0, a new pseudo is generated.
6365
91bb873f 6366 There are two types of references that must be handled:
3b3c6a3f
MM
6367
6368 1. Global data references must load the address from the GOT, via
6369 the PIC reg. An insn is emitted to do this load, and the reg is
6370 returned.
6371
91bb873f
RH
6372 2. Static data references, constant pool addresses, and code labels
6373 compute the address as an offset from the GOT, whose base is in
2ae5ae57 6374 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
91bb873f
RH
6375 differentiate them from global data objects. The returned
6376 address is the PIC reg + an unspec constant.
3b3c6a3f
MM
6377
6378 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
91bb873f 6379 reg also appears in the address. */
3b3c6a3f 6380
b39edae3 6381static rtx
b96a374d 6382legitimize_pic_address (rtx orig, rtx reg)
3b3c6a3f
MM
6383{
6384 rtx addr = orig;
6385 rtx new = orig;
91bb873f 6386 rtx base;
3b3c6a3f 6387
b069de3b
SS
6388#if TARGET_MACHO
6389 if (reg == 0)
6390 reg = gen_reg_rtx (Pmode);
6391 /* Use the generic Mach-O PIC machinery. */
6392 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6393#endif
6394
c05dbe81
JH
6395 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6396 new = addr;
7dcbf659
JH
6397 else if (TARGET_64BIT
6398 && ix86_cmodel != CM_SMALL_PIC
6399 && local_symbolic_operand (addr, Pmode))
6400 {
6401 rtx tmpreg;
6402 /* This symbol may be referenced via a displacement from the PIC
6403 base address (@GOTOFF). */
6404
6405 if (reload_in_progress)
6406 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6407 if (GET_CODE (addr) == CONST)
6408 addr = XEXP (addr, 0);
6409 if (GET_CODE (addr) == PLUS)
6410 {
6411 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6412 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6413 }
6414 else
6415 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6416 new = gen_rtx_CONST (Pmode, new);
6417 if (!reg)
6418 tmpreg = gen_reg_rtx (Pmode);
6419 else
6420 tmpreg = reg;
6421 emit_move_insn (tmpreg, new);
6422
6423 if (reg != 0)
6424 {
6425 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
6426 tmpreg, 1, OPTAB_DIRECT);
6427 new = reg;
6428 }
6429 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
6430 }
c05dbe81 6431 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
3b3c6a3f 6432 {
c05dbe81
JH
6433 /* This symbol may be referenced via a displacement from the PIC
6434 base address (@GOTOFF). */
3b3c6a3f 6435
c05dbe81
JH
6436 if (reload_in_progress)
6437 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
799b33a0
JH
6438 if (GET_CODE (addr) == CONST)
6439 addr = XEXP (addr, 0);
6440 if (GET_CODE (addr) == PLUS)
6441 {
6442 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6443 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6444 }
6445 else
6446 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
c05dbe81
JH
6447 new = gen_rtx_CONST (Pmode, new);
6448 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3b3c6a3f 6449
c05dbe81
JH
6450 if (reg != 0)
6451 {
6452 emit_move_insn (reg, new);
6453 new = reg;
6454 }
3b3c6a3f 6455 }
91bb873f 6456 else if (GET_CODE (addr) == SYMBOL_REF)
3b3c6a3f 6457 {
14f73b5a
JH
6458 if (TARGET_64BIT)
6459 {
8ee41eaf 6460 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
14f73b5a 6461 new = gen_rtx_CONST (Pmode, new);
542a8afa 6462 new = gen_const_mem (Pmode, new);
14f73b5a
JH
6463 set_mem_alias_set (new, ix86_GOT_alias_set ());
6464
6465 if (reg == 0)
6466 reg = gen_reg_rtx (Pmode);
6467 /* Use directly gen_movsi, otherwise the address is loaded
6468 into register for CSE. We don't want to CSE this addresses,
6469 instead we CSE addresses from the GOT table, so skip this. */
6470 emit_insn (gen_movsi (reg, new));
6471 new = reg;
6472 }
6473 else
6474 {
6475 /* This symbol must be referenced via a load from the
6476 Global Offset Table (@GOT). */
3b3c6a3f 6477
66edd3b4
RH
6478 if (reload_in_progress)
6479 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf 6480 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
14f73b5a
JH
6481 new = gen_rtx_CONST (Pmode, new);
6482 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
542a8afa 6483 new = gen_const_mem (Pmode, new);
14f73b5a 6484 set_mem_alias_set (new, ix86_GOT_alias_set ());
3b3c6a3f 6485
14f73b5a
JH
6486 if (reg == 0)
6487 reg = gen_reg_rtx (Pmode);
6488 emit_move_insn (reg, new);
6489 new = reg;
6490 }
0f290768 6491 }
91bb873f
RH
6492 else
6493 {
d8ff1871
JH
6494 if (GET_CODE (addr) == CONST_INT
6495 && !x86_64_immediate_operand (addr, VOIDmode))
6496 {
6497 if (reg)
6498 {
6499 emit_move_insn (reg, addr);
6500 new = reg;
6501 }
6502 else
6503 new = force_reg (Pmode, addr);
6504 }
6505 else if (GET_CODE (addr) == CONST)
3b3c6a3f 6506 {
91bb873f 6507 addr = XEXP (addr, 0);
e3c8ea67
RH
6508
6509 /* We must match stuff we generate before. Assume the only
6510 unspecs that can get here are ours. Not that we could do
43f3a59d 6511 anything with them anyway.... */
e3c8ea67
RH
6512 if (GET_CODE (addr) == UNSPEC
6513 || (GET_CODE (addr) == PLUS
6514 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6515 return orig;
d0396b79 6516 gcc_assert (GET_CODE (addr) == PLUS);
3b3c6a3f 6517 }
91bb873f
RH
6518 if (GET_CODE (addr) == PLUS)
6519 {
6520 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
e9a25f70 6521
91bb873f
RH
6522 /* Check first to see if this is a constant offset from a @GOTOFF
6523 symbol reference. */
623fe810 6524 if (local_symbolic_operand (op0, Pmode)
91bb873f
RH
6525 && GET_CODE (op1) == CONST_INT)
6526 {
6eb791fc
JH
6527 if (!TARGET_64BIT)
6528 {
66edd3b4
RH
6529 if (reload_in_progress)
6530 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf
RH
6531 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6532 UNSPEC_GOTOFF);
6eb791fc
JH
6533 new = gen_rtx_PLUS (Pmode, new, op1);
6534 new = gen_rtx_CONST (Pmode, new);
6535 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
91bb873f 6536
6eb791fc
JH
6537 if (reg != 0)
6538 {
6539 emit_move_insn (reg, new);
6540 new = reg;
6541 }
6542 }
6543 else
91bb873f 6544 {
75d38379
JJ
6545 if (INTVAL (op1) < -16*1024*1024
6546 || INTVAL (op1) >= 16*1024*1024)
a7297856
ILT
6547 {
6548 if (!x86_64_immediate_operand (op1, Pmode))
6549 op1 = force_reg (Pmode, op1);
6550 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
6551 }
91bb873f
RH
6552 }
6553 }
6554 else
6555 {
6556 base = legitimize_pic_address (XEXP (addr, 0), reg);
6557 new = legitimize_pic_address (XEXP (addr, 1),
6558 base == reg ? NULL_RTX : reg);
6559
6560 if (GET_CODE (new) == CONST_INT)
6561 new = plus_constant (base, INTVAL (new));
6562 else
6563 {
6564 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6565 {
6566 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6567 new = XEXP (new, 1);
6568 }
6569 new = gen_rtx_PLUS (Pmode, base, new);
6570 }
6571 }
6572 }
3b3c6a3f
MM
6573 }
6574 return new;
6575}
6576\f
74dc3e94 6577/* Load the thread pointer. If TO_REG is true, force it into a register. */
f996902d
RH
6578
6579static rtx
b96a374d 6580get_thread_pointer (int to_reg)
f996902d 6581{
74dc3e94 6582 rtx tp, reg, insn;
f996902d
RH
6583
6584 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
74dc3e94
RH
6585 if (!to_reg)
6586 return tp;
f996902d 6587
74dc3e94
RH
6588 reg = gen_reg_rtx (Pmode);
6589 insn = gen_rtx_SET (VOIDmode, reg, tp);
6590 insn = emit_insn (insn);
6591
6592 return reg;
6593}
6594
6595/* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6596 false if we expect this to be used for a memory address and true if
6597 we expect to load the address into a register. */
6598
6599static rtx
b96a374d 6600legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
74dc3e94 6601{
5bf5a10b 6602 rtx dest, base, off, pic, tp;
74dc3e94
RH
6603 int type;
6604
6605 switch (model)
6606 {
6607 case TLS_MODEL_GLOBAL_DYNAMIC:
6608 dest = gen_reg_rtx (Pmode);
5bf5a10b
AO
6609 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6610
6611 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
74dc3e94
RH
6612 {
6613 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6614
6615 start_sequence ();
6616 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6617 insns = get_insns ();
6618 end_sequence ();
6619
6620 emit_libcall_block (insns, dest, rax, x);
6621 }
5bf5a10b
AO
6622 else if (TARGET_64BIT && TARGET_GNU2_TLS)
6623 emit_insn (gen_tls_global_dynamic_64 (dest, x));
74dc3e94
RH
6624 else
6625 emit_insn (gen_tls_global_dynamic_32 (dest, x));
5bf5a10b
AO
6626
6627 if (TARGET_GNU2_TLS)
6628 {
6629 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
6630
6631 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6632 }
74dc3e94
RH
6633 break;
6634
6635 case TLS_MODEL_LOCAL_DYNAMIC:
6636 base = gen_reg_rtx (Pmode);
5bf5a10b
AO
6637 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6638
6639 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
74dc3e94
RH
6640 {
6641 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6642
6643 start_sequence ();
6644 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6645 insns = get_insns ();
6646 end_sequence ();
6647
6648 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6649 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6650 emit_libcall_block (insns, base, rax, note);
6651 }
5bf5a10b
AO
6652 else if (TARGET_64BIT && TARGET_GNU2_TLS)
6653 emit_insn (gen_tls_local_dynamic_base_64 (base));
74dc3e94
RH
6654 else
6655 emit_insn (gen_tls_local_dynamic_base_32 (base));
6656
5bf5a10b
AO
6657 if (TARGET_GNU2_TLS)
6658 {
6659 rtx x = ix86_tls_module_base ();
6660
6661 base = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, base));
6662
6663 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6664 }
6665
74dc3e94
RH
6666 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6667 off = gen_rtx_CONST (Pmode, off);
6668
5bf5a10b
AO
6669 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
6670 break;
74dc3e94
RH
6671
6672 case TLS_MODEL_INITIAL_EXEC:
6673 if (TARGET_64BIT)
6674 {
6675 pic = NULL;
6676 type = UNSPEC_GOTNTPOFF;
6677 }
6678 else if (flag_pic)
6679 {
6680 if (reload_in_progress)
6681 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6682 pic = pic_offset_table_rtx;
5bf5a10b 6683 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
74dc3e94 6684 }
5bf5a10b 6685 else if (!TARGET_ANY_GNU_TLS)
74dc3e94
RH
6686 {
6687 pic = gen_reg_rtx (Pmode);
6688 emit_insn (gen_set_got (pic));
6689 type = UNSPEC_GOTTPOFF;
6690 }
6691 else
6692 {
6693 pic = NULL;
6694 type = UNSPEC_INDNTPOFF;
6695 }
6696
6697 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6698 off = gen_rtx_CONST (Pmode, off);
6699 if (pic)
6700 off = gen_rtx_PLUS (Pmode, pic, off);
542a8afa 6701 off = gen_const_mem (Pmode, off);
74dc3e94
RH
6702 set_mem_alias_set (off, ix86_GOT_alias_set ());
6703
5bf5a10b 6704 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
74dc3e94
RH
6705 {
6706 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6707 off = force_reg (Pmode, off);
6708 return gen_rtx_PLUS (Pmode, base, off);
6709 }
6710 else
6711 {
6712 base = get_thread_pointer (true);
6713 dest = gen_reg_rtx (Pmode);
6714 emit_insn (gen_subsi3 (dest, base, off));
6715 }
6716 break;
6717
6718 case TLS_MODEL_LOCAL_EXEC:
6719 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5bf5a10b 6720 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
74dc3e94
RH
6721 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6722 off = gen_rtx_CONST (Pmode, off);
6723
5bf5a10b 6724 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
74dc3e94
RH
6725 {
6726 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6727 return gen_rtx_PLUS (Pmode, base, off);
6728 }
6729 else
6730 {
6731 base = get_thread_pointer (true);
6732 dest = gen_reg_rtx (Pmode);
6733 emit_insn (gen_subsi3 (dest, base, off));
6734 }
6735 break;
6736
6737 default:
d0396b79 6738 gcc_unreachable ();
74dc3e94
RH
6739 }
6740
6741 return dest;
f996902d 6742}
fce5a9f2 6743
3b3c6a3f
MM
6744/* Try machine-dependent ways of modifying an illegitimate address
6745 to be legitimate. If we find one, return the new, valid address.
6746 This macro is used in only one place: `memory_address' in explow.c.
6747
6748 OLDX is the address as it was before break_out_memory_refs was called.
6749 In some cases it is useful to look at this to decide what needs to be done.
6750
6751 MODE and WIN are passed so that this macro can use
6752 GO_IF_LEGITIMATE_ADDRESS.
6753
6754 It is always safe for this macro to do nothing. It exists to recognize
6755 opportunities to optimize the output.
6756
6757 For the 80386, we handle X+REG by loading X into a register R and
6758 using R+REG. R will go in a general reg and indexing will be used.
6759 However, if REG is a broken-out memory address or multiplication,
6760 nothing needs to be done because REG can certainly go in a general reg.
6761
6762 When -fpic is used, special handling is needed for symbolic references.
6763 See comments by legitimize_pic_address in i386.c for details. */
6764
6765rtx
8d531ab9 6766legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
3b3c6a3f
MM
6767{
6768 int changed = 0;
6769 unsigned log;
6770
6771 if (TARGET_DEBUG_ADDR)
6772 {
e9a25f70
JL
6773 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6774 GET_MODE_NAME (mode));
3b3c6a3f
MM
6775 debug_rtx (x);
6776 }
6777
8fe75e43 6778 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
f996902d 6779 if (log)
74dc3e94 6780 return legitimize_tls_address (x, log, false);
b39edae3
RH
6781 if (GET_CODE (x) == CONST
6782 && GET_CODE (XEXP (x, 0)) == PLUS
8fe75e43
RH
6783 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
6784 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
b39edae3
RH
6785 {
6786 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
6787 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
6788 }
f996902d 6789
3b3c6a3f
MM
6790 if (flag_pic && SYMBOLIC_CONST (x))
6791 return legitimize_pic_address (x, 0);
6792
6793 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6794 if (GET_CODE (x) == ASHIFT
6795 && GET_CODE (XEXP (x, 1)) == CONST_INT
85b583d3 6796 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
3b3c6a3f
MM
6797 {
6798 changed = 1;
85b583d3 6799 log = INTVAL (XEXP (x, 1));
a269a03c
JC
6800 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6801 GEN_INT (1 << log));
3b3c6a3f
MM
6802 }
6803
6804 if (GET_CODE (x) == PLUS)
6805 {
0f290768 6806 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
e9a25f70 6807
3b3c6a3f
MM
6808 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6809 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
85b583d3 6810 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
3b3c6a3f
MM
6811 {
6812 changed = 1;
85b583d3 6813 log = INTVAL (XEXP (XEXP (x, 0), 1));
c5c76735
JL
6814 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6815 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6816 GEN_INT (1 << log));
3b3c6a3f
MM
6817 }
6818
6819 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6820 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
85b583d3 6821 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
3b3c6a3f
MM
6822 {
6823 changed = 1;
85b583d3 6824 log = INTVAL (XEXP (XEXP (x, 1), 1));
c5c76735
JL
6825 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6826 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6827 GEN_INT (1 << log));
3b3c6a3f
MM
6828 }
6829
0f290768 6830 /* Put multiply first if it isn't already. */
3b3c6a3f
MM
6831 if (GET_CODE (XEXP (x, 1)) == MULT)
6832 {
6833 rtx tmp = XEXP (x, 0);
6834 XEXP (x, 0) = XEXP (x, 1);
6835 XEXP (x, 1) = tmp;
6836 changed = 1;
6837 }
6838
6839 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6840 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6841 created by virtual register instantiation, register elimination, and
6842 similar optimizations. */
6843 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6844 {
6845 changed = 1;
c5c76735
JL
6846 x = gen_rtx_PLUS (Pmode,
6847 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6848 XEXP (XEXP (x, 1), 0)),
6849 XEXP (XEXP (x, 1), 1));
3b3c6a3f
MM
6850 }
6851
e9a25f70
JL
6852 /* Canonicalize
6853 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3b3c6a3f
MM
6854 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6855 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6856 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6857 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6858 && CONSTANT_P (XEXP (x, 1)))
6859 {
00c79232
ML
6860 rtx constant;
6861 rtx other = NULL_RTX;
3b3c6a3f
MM
6862
6863 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6864 {
6865 constant = XEXP (x, 1);
6866 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6867 }
6868 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6869 {
6870 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6871 other = XEXP (x, 1);
6872 }
6873 else
6874 constant = 0;
6875
6876 if (constant)
6877 {
6878 changed = 1;
c5c76735
JL
6879 x = gen_rtx_PLUS (Pmode,
6880 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6881 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6882 plus_constant (other, INTVAL (constant)));
3b3c6a3f
MM
6883 }
6884 }
6885
6886 if (changed && legitimate_address_p (mode, x, FALSE))
6887 return x;
6888
6889 if (GET_CODE (XEXP (x, 0)) == MULT)
6890 {
6891 changed = 1;
6892 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6893 }
6894
6895 if (GET_CODE (XEXP (x, 1)) == MULT)
6896 {
6897 changed = 1;
6898 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6899 }
6900
6901 if (changed
6902 && GET_CODE (XEXP (x, 1)) == REG
6903 && GET_CODE (XEXP (x, 0)) == REG)
6904 return x;
6905
6906 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6907 {
6908 changed = 1;
6909 x = legitimize_pic_address (x, 0);
6910 }
6911
6912 if (changed && legitimate_address_p (mode, x, FALSE))
6913 return x;
6914
6915 if (GET_CODE (XEXP (x, 0)) == REG)
6916 {
8d531ab9
KH
6917 rtx temp = gen_reg_rtx (Pmode);
6918 rtx val = force_operand (XEXP (x, 1), temp);
3b3c6a3f
MM
6919 if (val != temp)
6920 emit_move_insn (temp, val);
6921
6922 XEXP (x, 1) = temp;
6923 return x;
6924 }
6925
6926 else if (GET_CODE (XEXP (x, 1)) == REG)
6927 {
8d531ab9
KH
6928 rtx temp = gen_reg_rtx (Pmode);
6929 rtx val = force_operand (XEXP (x, 0), temp);
3b3c6a3f
MM
6930 if (val != temp)
6931 emit_move_insn (temp, val);
6932
6933 XEXP (x, 0) = temp;
6934 return x;
6935 }
6936 }
6937
6938 return x;
6939}
2a2ab3f9
JVA
6940\f
6941/* Print an integer constant expression in assembler syntax. Addition
6942 and subtraction are the only arithmetic that may appear in these
6943 expressions. FILE is the stdio stream to write to, X is the rtx, and
6944 CODE is the operand print code from the output string. */
6945
6946static void
b96a374d 6947output_pic_addr_const (FILE *file, rtx x, int code)
2a2ab3f9
JVA
6948{
6949 char buf[256];
6950
6951 switch (GET_CODE (x))
6952 {
6953 case PC:
d0396b79
NS
6954 gcc_assert (flag_pic);
6955 putc ('.', file);
2a2ab3f9
JVA
6956 break;
6957
6958 case SYMBOL_REF:
b8795edd 6959 output_addr_const (file, x);
12969f45 6960 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
91bb873f 6961 fputs ("@PLT", file);
2a2ab3f9
JVA
6962 break;
6963
91bb873f
RH
6964 case LABEL_REF:
6965 x = XEXP (x, 0);
5efb1046 6966 /* FALLTHRU */
2a2ab3f9
JVA
6967 case CODE_LABEL:
6968 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6969 assemble_name (asm_out_file, buf);
6970 break;
6971
6972 case CONST_INT:
f64cecad 6973 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2a2ab3f9
JVA
6974 break;
6975
6976 case CONST:
6977 /* This used to output parentheses around the expression,
6978 but that does not work on the 386 (either ATT or BSD assembler). */
6979 output_pic_addr_const (file, XEXP (x, 0), code);
6980 break;
6981
6982 case CONST_DOUBLE:
6983 if (GET_MODE (x) == VOIDmode)
6984 {
6985 /* We can use %d if the number is <32 bits and positive. */
6986 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
f64cecad
JC
6987 fprintf (file, "0x%lx%08lx",
6988 (unsigned long) CONST_DOUBLE_HIGH (x),
6989 (unsigned long) CONST_DOUBLE_LOW (x));
2a2ab3f9 6990 else
f64cecad 6991 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2a2ab3f9
JVA
6992 }
6993 else
6994 /* We can't handle floating point constants;
6995 PRINT_OPERAND must handle them. */
6996 output_operand_lossage ("floating constant misused");
6997 break;
6998
6999 case PLUS:
e9a25f70 7000 /* Some assemblers need integer constants to appear first. */
2a2ab3f9
JVA
7001 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
7002 {
2a2ab3f9 7003 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 7004 putc ('+', file);
e9a25f70 7005 output_pic_addr_const (file, XEXP (x, 1), code);
2a2ab3f9 7006 }
d0396b79 7007 else
2a2ab3f9 7008 {
d0396b79 7009 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
2a2ab3f9 7010 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 7011 putc ('+', file);
e9a25f70 7012 output_pic_addr_const (file, XEXP (x, 0), code);
2a2ab3f9
JVA
7013 }
7014 break;
7015
7016 case MINUS:
b069de3b
SS
7017 if (!TARGET_MACHO)
7018 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
2a2ab3f9 7019 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 7020 putc ('-', file);
2a2ab3f9 7021 output_pic_addr_const (file, XEXP (x, 1), code);
b069de3b
SS
7022 if (!TARGET_MACHO)
7023 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
2a2ab3f9
JVA
7024 break;
7025
91bb873f 7026 case UNSPEC:
d0396b79 7027 gcc_assert (XVECLEN (x, 0) == 1);
91bb873f
RH
7028 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7029 switch (XINT (x, 1))
77ebd435 7030 {
8ee41eaf 7031 case UNSPEC_GOT:
77ebd435
AJ
7032 fputs ("@GOT", file);
7033 break;
8ee41eaf 7034 case UNSPEC_GOTOFF:
77ebd435
AJ
7035 fputs ("@GOTOFF", file);
7036 break;
8ee41eaf 7037 case UNSPEC_GOTPCREL:
edfe8595 7038 fputs ("@GOTPCREL(%rip)", file);
6eb791fc 7039 break;
f996902d 7040 case UNSPEC_GOTTPOFF:
dea73790 7041 /* FIXME: This might be @TPOFF in Sun ld too. */
f996902d
RH
7042 fputs ("@GOTTPOFF", file);
7043 break;
7044 case UNSPEC_TPOFF:
7045 fputs ("@TPOFF", file);
7046 break;
7047 case UNSPEC_NTPOFF:
75d38379
JJ
7048 if (TARGET_64BIT)
7049 fputs ("@TPOFF", file);
7050 else
7051 fputs ("@NTPOFF", file);
f996902d
RH
7052 break;
7053 case UNSPEC_DTPOFF:
7054 fputs ("@DTPOFF", file);
7055 break;
dea73790 7056 case UNSPEC_GOTNTPOFF:
75d38379
JJ
7057 if (TARGET_64BIT)
7058 fputs ("@GOTTPOFF(%rip)", file);
7059 else
7060 fputs ("@GOTNTPOFF", file);
dea73790
JJ
7061 break;
7062 case UNSPEC_INDNTPOFF:
7063 fputs ("@INDNTPOFF", file);
7064 break;
77ebd435
AJ
7065 default:
7066 output_operand_lossage ("invalid UNSPEC as operand");
7067 break;
7068 }
91bb873f
RH
7069 break;
7070
2a2ab3f9
JVA
7071 default:
7072 output_operand_lossage ("invalid expression as operand");
7073 }
7074}
1865dbb5 7075
fdbe66f2 7076/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
b9203463
RH
7077 We need to emit DTP-relative relocations. */
7078
fdbe66f2 7079static void
b96a374d 7080i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
b9203463 7081{
75d38379
JJ
7082 fputs (ASM_LONG, file);
7083 output_addr_const (file, x);
7084 fputs ("@DTPOFF", file);
b9203463
RH
7085 switch (size)
7086 {
7087 case 4:
b9203463
RH
7088 break;
7089 case 8:
75d38379 7090 fputs (", 0", file);
b9203463 7091 break;
b9203463 7092 default:
d0396b79 7093 gcc_unreachable ();
b9203463 7094 }
b9203463
RH
7095}
7096
1865dbb5 7097/* In the name of slightly smaller debug output, and to cater to
aabcd309 7098 general assembler lossage, recognize PIC+GOTOFF and turn it back
dbde310d
GK
7099 into a direct symbol reference.
7100
7101 On Darwin, this is necessary to avoid a crash, because Darwin
7102 has a different PIC label for each routine but the DWARF debugging
7103 information is not associated with any particular routine, so it's
7104 necessary to remove references to the PIC label from RTL stored by
7105 the DWARF output code. */
1865dbb5 7106
69bd9368 7107static rtx
b96a374d 7108ix86_delegitimize_address (rtx orig_x)
1865dbb5 7109{
dbde310d
GK
7110 rtx x = orig_x;
7111 /* reg_addend is NULL or a multiple of some register. */
7112 rtx reg_addend = NULL_RTX;
7113 /* const_addend is NULL or a const_int. */
7114 rtx const_addend = NULL_RTX;
7115 /* This is the result, or NULL. */
7116 rtx result = NULL_RTX;
1865dbb5 7117
4c8c0dec
JJ
7118 if (GET_CODE (x) == MEM)
7119 x = XEXP (x, 0);
7120
6eb791fc
JH
7121 if (TARGET_64BIT)
7122 {
7123 if (GET_CODE (x) != CONST
7124 || GET_CODE (XEXP (x, 0)) != UNSPEC
8ee41eaf 7125 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
4c8c0dec 7126 || GET_CODE (orig_x) != MEM)
6eb791fc
JH
7127 return orig_x;
7128 return XVECEXP (XEXP (x, 0), 0, 0);
7129 }
7130
1865dbb5 7131 if (GET_CODE (x) != PLUS
1865dbb5
JM
7132 || GET_CODE (XEXP (x, 1)) != CONST)
7133 return orig_x;
7134
ec65b2e3
JJ
7135 if (GET_CODE (XEXP (x, 0)) == REG
7136 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7137 /* %ebx + GOT/GOTOFF */
dbde310d 7138 ;
ec65b2e3
JJ
7139 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7140 {
7141 /* %ebx + %reg * scale + GOT/GOTOFF */
dbde310d
GK
7142 reg_addend = XEXP (x, 0);
7143 if (GET_CODE (XEXP (reg_addend, 0)) == REG
7144 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7145 reg_addend = XEXP (reg_addend, 1);
7146 else if (GET_CODE (XEXP (reg_addend, 1)) == REG
7147 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7148 reg_addend = XEXP (reg_addend, 0);
ec65b2e3
JJ
7149 else
7150 return orig_x;
dbde310d
GK
7151 if (GET_CODE (reg_addend) != REG
7152 && GET_CODE (reg_addend) != MULT
7153 && GET_CODE (reg_addend) != ASHIFT)
ec65b2e3
JJ
7154 return orig_x;
7155 }
7156 else
7157 return orig_x;
7158
1865dbb5 7159 x = XEXP (XEXP (x, 1), 0);
1865dbb5 7160 if (GET_CODE (x) == PLUS
dbde310d 7161 && GET_CODE (XEXP (x, 1)) == CONST_INT)
ec65b2e3 7162 {
dbde310d
GK
7163 const_addend = XEXP (x, 1);
7164 x = XEXP (x, 0);
ec65b2e3 7165 }
1865dbb5 7166
dbde310d
GK
7167 if (GET_CODE (x) == UNSPEC
7168 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7169 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
7170 result = XVECEXP (x, 0, 0);
7171
7931b1be
GK
7172 if (TARGET_MACHO && darwin_local_data_pic (x)
7173 && GET_CODE (orig_x) != MEM)
dbde310d
GK
7174 result = XEXP (x, 0);
7175
7176 if (! result)
7177 return orig_x;
7178
7179 if (const_addend)
7180 result = gen_rtx_PLUS (Pmode, result, const_addend);
7181 if (reg_addend)
7182 result = gen_rtx_PLUS (Pmode, reg_addend, result);
7183 return result;
1865dbb5 7184}
2a2ab3f9 7185\f
a269a03c 7186static void
b96a374d
AJ
7187put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7188 int fp, FILE *file)
a269a03c 7189{
a269a03c
JC
7190 const char *suffix;
7191
9a915772
JH
7192 if (mode == CCFPmode || mode == CCFPUmode)
7193 {
7194 enum rtx_code second_code, bypass_code;
7195 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
d0396b79 7196 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
9a915772
JH
7197 code = ix86_fp_compare_code_to_integer (code);
7198 mode = CCmode;
7199 }
a269a03c
JC
7200 if (reverse)
7201 code = reverse_condition (code);
e075ae69 7202
a269a03c
JC
7203 switch (code)
7204 {
7205 case EQ:
7206 suffix = "e";
7207 break;
a269a03c
JC
7208 case NE:
7209 suffix = "ne";
7210 break;
a269a03c 7211 case GT:
d0396b79 7212 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
e075ae69 7213 suffix = "g";
a269a03c 7214 break;
a269a03c 7215 case GTU:
aabcd309
KH
7216 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7217 Those same assemblers have the same but opposite lossage on cmov. */
d0396b79 7218 gcc_assert (mode == CCmode);
e075ae69 7219 suffix = fp ? "nbe" : "a";
a269a03c 7220 break;
a269a03c 7221 case LT:
d0396b79
NS
7222 switch (mode)
7223 {
7224 case CCNOmode:
7225 case CCGOCmode:
7226 suffix = "s";
7227 break;
7228
7229 case CCmode:
7230 case CCGCmode:
7231 suffix = "l";
7232 break;
7233
7234 default:
7235 gcc_unreachable ();
7236 }
a269a03c 7237 break;
a269a03c 7238 case LTU:
d0396b79 7239 gcc_assert (mode == CCmode);
a269a03c
JC
7240 suffix = "b";
7241 break;
a269a03c 7242 case GE:
d0396b79
NS
7243 switch (mode)
7244 {
7245 case CCNOmode:
7246 case CCGOCmode:
7247 suffix = "ns";
7248 break;
7249
7250 case CCmode:
7251 case CCGCmode:
7252 suffix = "ge";
7253 break;
7254
7255 default:
7256 gcc_unreachable ();
7257 }
a269a03c 7258 break;
a269a03c 7259 case GEU:
e075ae69 7260 /* ??? As above. */
d0396b79 7261 gcc_assert (mode == CCmode);
7e08e190 7262 suffix = fp ? "nb" : "ae";
a269a03c 7263 break;
a269a03c 7264 case LE:
d0396b79 7265 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
e075ae69 7266 suffix = "le";
a269a03c 7267 break;
a269a03c 7268 case LEU:
d0396b79 7269 gcc_assert (mode == CCmode);
7e08e190 7270 suffix = "be";
a269a03c 7271 break;
3a3677ff 7272 case UNORDERED:
9e7adcb3 7273 suffix = fp ? "u" : "p";
3a3677ff
RH
7274 break;
7275 case ORDERED:
9e7adcb3 7276 suffix = fp ? "nu" : "np";
3a3677ff 7277 break;
a269a03c 7278 default:
d0396b79 7279 gcc_unreachable ();
a269a03c
JC
7280 }
7281 fputs (suffix, file);
7282}
7283
a55f4481
RK
7284/* Print the name of register X to FILE based on its machine mode and number.
7285 If CODE is 'w', pretend the mode is HImode.
7286 If CODE is 'b', pretend the mode is QImode.
7287 If CODE is 'k', pretend the mode is SImode.
7288 If CODE is 'q', pretend the mode is DImode.
d0396b79 7289 If CODE is 'h', pretend the reg is the 'high' byte register.
a55f4481
RK
7290 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7291
e075ae69 7292void
b96a374d 7293print_reg (rtx x, int code, FILE *file)
e5cb57e8 7294{
d0396b79
NS
7295 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7296 && REGNO (x) != FRAME_POINTER_REGNUM
7297 && REGNO (x) != FLAGS_REG
7298 && REGNO (x) != FPSR_REG);
480feac0 7299
5bf0ebab 7300 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
e075ae69
RH
7301 putc ('%', file);
7302
ef6257cd 7303 if (code == 'w' || MMX_REG_P (x))
e075ae69
RH
7304 code = 2;
7305 else if (code == 'b')
7306 code = 1;
7307 else if (code == 'k')
7308 code = 4;
3f3f2124
JH
7309 else if (code == 'q')
7310 code = 8;
e075ae69
RH
7311 else if (code == 'y')
7312 code = 3;
7313 else if (code == 'h')
7314 code = 0;
7315 else
7316 code = GET_MODE_SIZE (GET_MODE (x));
e9a25f70 7317
3f3f2124
JH
7318 /* Irritatingly, AMD extended registers use different naming convention
7319 from the normal registers. */
7320 if (REX_INT_REG_P (x))
7321 {
d0396b79 7322 gcc_assert (TARGET_64BIT);
3f3f2124
JH
7323 switch (code)
7324 {
ef6257cd 7325 case 0:
c725bd79 7326 error ("extended registers have no high halves");
3f3f2124
JH
7327 break;
7328 case 1:
7329 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7330 break;
7331 case 2:
7332 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7333 break;
7334 case 4:
7335 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7336 break;
7337 case 8:
7338 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7339 break;
7340 default:
c725bd79 7341 error ("unsupported operand size for extended register");
3f3f2124
JH
7342 break;
7343 }
7344 return;
7345 }
e075ae69
RH
7346 switch (code)
7347 {
7348 case 3:
7349 if (STACK_TOP_P (x))
7350 {
7351 fputs ("st(0)", file);
7352 break;
7353 }
5efb1046 7354 /* FALLTHRU */
e075ae69 7355 case 8:
3f3f2124 7356 case 4:
e075ae69 7357 case 12:
446988df 7358 if (! ANY_FP_REG_P (x))
885a70fd 7359 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
5efb1046 7360 /* FALLTHRU */
a7180f70 7361 case 16:
e075ae69 7362 case 2:
d4c32b6f 7363 normal:
e075ae69
RH
7364 fputs (hi_reg_name[REGNO (x)], file);
7365 break;
7366 case 1:
d4c32b6f
RH
7367 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7368 goto normal;
e075ae69
RH
7369 fputs (qi_reg_name[REGNO (x)], file);
7370 break;
7371 case 0:
d4c32b6f
RH
7372 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7373 goto normal;
e075ae69
RH
7374 fputs (qi_high_reg_name[REGNO (x)], file);
7375 break;
7376 default:
d0396b79 7377 gcc_unreachable ();
fe25fea3 7378 }
e5cb57e8
SC
7379}
7380
f996902d
RH
7381/* Locate some local-dynamic symbol still in use by this function
7382 so that we can print its name in some tls_local_dynamic_base
7383 pattern. */
7384
7385static const char *
b96a374d 7386get_some_local_dynamic_name (void)
f996902d
RH
7387{
7388 rtx insn;
7389
7390 if (cfun->machine->some_ld_name)
7391 return cfun->machine->some_ld_name;
7392
7393 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7394 if (INSN_P (insn)
7395 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7396 return cfun->machine->some_ld_name;
7397
d0396b79 7398 gcc_unreachable ();
f996902d
RH
7399}
7400
7401static int
b96a374d 7402get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
f996902d
RH
7403{
7404 rtx x = *px;
7405
7406 if (GET_CODE (x) == SYMBOL_REF
fd4aca96 7407 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
f996902d
RH
7408 {
7409 cfun->machine->some_ld_name = XSTR (x, 0);
7410 return 1;
7411 }
7412
7413 return 0;
7414}
7415
2a2ab3f9 7416/* Meaning of CODE:
fe25fea3 7417 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
e5cb57e8 7418 C -- print opcode suffix for set/cmov insn.
fe25fea3 7419 c -- like C, but print reversed condition
ef6257cd 7420 F,f -- likewise, but for floating-point.
f6f5dff2
RO
7421 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7422 otherwise nothing
2a2ab3f9
JVA
7423 R -- print the prefix for register names.
7424 z -- print the opcode suffix for the size of the current operand.
7425 * -- print a star (in certain assembler syntax)
fb204271 7426 A -- print an absolute memory reference.
2a2ab3f9 7427 w -- print the operand as if it's a "word" (HImode) even if it isn't.
2d49677f
SC
7428 s -- print a shift double count, followed by the assemblers argument
7429 delimiter.
fe25fea3
SC
7430 b -- print the QImode name of the register for the indicated operand.
7431 %b0 would print %al if operands[0] is reg 0.
7432 w -- likewise, print the HImode name of the register.
7433 k -- likewise, print the SImode name of the register.
3f3f2124 7434 q -- likewise, print the DImode name of the register.
ef6257cd
JH
7435 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7436 y -- print "st(0)" instead of "st" as a register.
a46d1d38 7437 D -- print condition for SSE cmp instruction.
ef6257cd
JH
7438 P -- if PIC, print an @PLT suffix.
7439 X -- don't print any sort of PIC '@' suffix for a symbol.
f996902d 7440 & -- print some in-use local-dynamic symbol name.
ef719a44 7441 H -- print a memory address offset by 8; used for sse high-parts
a46d1d38 7442 */
2a2ab3f9
JVA
7443
7444void
b96a374d 7445print_operand (FILE *file, rtx x, int code)
2a2ab3f9
JVA
7446{
7447 if (code)
7448 {
7449 switch (code)
7450 {
7451 case '*':
80f33d06 7452 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9
JVA
7453 putc ('*', file);
7454 return;
7455
f996902d
RH
7456 case '&':
7457 assemble_name (file, get_some_local_dynamic_name ());
7458 return;
7459
fb204271 7460 case 'A':
d0396b79 7461 switch (ASSEMBLER_DIALECT)
fb204271 7462 {
d0396b79
NS
7463 case ASM_ATT:
7464 putc ('*', file);
7465 break;
7466
7467 case ASM_INTEL:
fb204271
DN
7468 /* Intel syntax. For absolute addresses, registers should not
7469 be surrounded by braces. */
7470 if (GET_CODE (x) != REG)
7471 {
7472 putc ('[', file);
7473 PRINT_OPERAND (file, x, 0);
7474 putc (']', file);
7475 return;
7476 }
d0396b79
NS
7477 break;
7478
7479 default:
7480 gcc_unreachable ();
fb204271
DN
7481 }
7482
7483 PRINT_OPERAND (file, x, 0);
7484 return;
7485
7486
2a2ab3f9 7487 case 'L':
80f33d06 7488 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7489 putc ('l', file);
2a2ab3f9
JVA
7490 return;
7491
7492 case 'W':
80f33d06 7493 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7494 putc ('w', file);
2a2ab3f9
JVA
7495 return;
7496
7497 case 'B':
80f33d06 7498 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7499 putc ('b', file);
2a2ab3f9
JVA
7500 return;
7501
7502 case 'Q':
80f33d06 7503 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7504 putc ('l', file);
2a2ab3f9
JVA
7505 return;
7506
7507 case 'S':
80f33d06 7508 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7509 putc ('s', file);
2a2ab3f9
JVA
7510 return;
7511
5f1ec3e6 7512 case 'T':
80f33d06 7513 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7514 putc ('t', file);
5f1ec3e6
JVA
7515 return;
7516
2a2ab3f9
JVA
7517 case 'z':
7518 /* 387 opcodes don't get size suffixes if the operands are
0f290768 7519 registers. */
2a2ab3f9
JVA
7520 if (STACK_REG_P (x))
7521 return;
7522
831c4e87
KC
7523 /* Likewise if using Intel opcodes. */
7524 if (ASSEMBLER_DIALECT == ASM_INTEL)
7525 return;
7526
7527 /* This is the size of op from size of operand. */
2a2ab3f9
JVA
7528 switch (GET_MODE_SIZE (GET_MODE (x)))
7529 {
2a2ab3f9 7530 case 2:
155d8a47
JW
7531#ifdef HAVE_GAS_FILDS_FISTS
7532 putc ('s', file);
7533#endif
2a2ab3f9
JVA
7534 return;
7535
7536 case 4:
7537 if (GET_MODE (x) == SFmode)
7538 {
e075ae69 7539 putc ('s', file);
2a2ab3f9
JVA
7540 return;
7541 }
7542 else
e075ae69 7543 putc ('l', file);
2a2ab3f9
JVA
7544 return;
7545
5f1ec3e6 7546 case 12:
2b589241 7547 case 16:
e075ae69
RH
7548 putc ('t', file);
7549 return;
5f1ec3e6 7550
2a2ab3f9
JVA
7551 case 8:
7552 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
56c0e8fa
JVA
7553 {
7554#ifdef GAS_MNEMONICS
e075ae69 7555 putc ('q', file);
56c0e8fa 7556#else
e075ae69
RH
7557 putc ('l', file);
7558 putc ('l', file);
56c0e8fa
JVA
7559#endif
7560 }
e075ae69
RH
7561 else
7562 putc ('l', file);
2a2ab3f9 7563 return;
155d8a47
JW
7564
7565 default:
d0396b79 7566 gcc_unreachable ();
2a2ab3f9 7567 }
4af3895e
JVA
7568
7569 case 'b':
7570 case 'w':
7571 case 'k':
3f3f2124 7572 case 'q':
4af3895e
JVA
7573 case 'h':
7574 case 'y':
5cb6195d 7575 case 'X':
e075ae69 7576 case 'P':
4af3895e
JVA
7577 break;
7578
2d49677f
SC
7579 case 's':
7580 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7581 {
7582 PRINT_OPERAND (file, x, 0);
e075ae69 7583 putc (',', file);
2d49677f 7584 }
a269a03c
JC
7585 return;
7586
a46d1d38
JH
7587 case 'D':
7588 /* Little bit of braindamage here. The SSE compare instructions
7589 does use completely different names for the comparisons that the
7590 fp conditional moves. */
7591 switch (GET_CODE (x))
7592 {
7593 case EQ:
7594 case UNEQ:
7595 fputs ("eq", file);
7596 break;
7597 case LT:
7598 case UNLT:
7599 fputs ("lt", file);
7600 break;
7601 case LE:
7602 case UNLE:
7603 fputs ("le", file);
7604 break;
7605 case UNORDERED:
7606 fputs ("unord", file);
7607 break;
7608 case NE:
7609 case LTGT:
7610 fputs ("neq", file);
7611 break;
7612 case UNGE:
7613 case GE:
7614 fputs ("nlt", file);
7615 break;
7616 case UNGT:
7617 case GT:
7618 fputs ("nle", file);
7619 break;
7620 case ORDERED:
7621 fputs ("ord", file);
7622 break;
7623 default:
d0396b79 7624 gcc_unreachable ();
a46d1d38
JH
7625 }
7626 return;
048b1c95 7627 case 'O':
f6f5dff2 7628#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
048b1c95
JJ
7629 if (ASSEMBLER_DIALECT == ASM_ATT)
7630 {
7631 switch (GET_MODE (x))
7632 {
7633 case HImode: putc ('w', file); break;
7634 case SImode:
7635 case SFmode: putc ('l', file); break;
7636 case DImode:
7637 case DFmode: putc ('q', file); break;
d0396b79 7638 default: gcc_unreachable ();
048b1c95
JJ
7639 }
7640 putc ('.', file);
7641 }
7642#endif
7643 return;
1853aadd 7644 case 'C':
e075ae69 7645 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
1853aadd 7646 return;
fe25fea3 7647 case 'F':
f6f5dff2 7648#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
048b1c95
JJ
7649 if (ASSEMBLER_DIALECT == ASM_ATT)
7650 putc ('.', file);
7651#endif
e075ae69 7652 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
fe25fea3
SC
7653 return;
7654
e9a25f70 7655 /* Like above, but reverse condition */
e075ae69 7656 case 'c':
fce5a9f2 7657 /* Check to see if argument to %c is really a constant
c1d5afc4 7658 and not a condition code which needs to be reversed. */
ec8e098d 7659 if (!COMPARISON_P (x))
c1d5afc4
CR
7660 {
7661 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7662 return;
7663 }
e075ae69
RH
7664 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7665 return;
fe25fea3 7666 case 'f':
f6f5dff2 7667#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
048b1c95
JJ
7668 if (ASSEMBLER_DIALECT == ASM_ATT)
7669 putc ('.', file);
7670#endif
e075ae69 7671 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
1853aadd 7672 return;
ef719a44
RH
7673
7674 case 'H':
7675 /* It doesn't actually matter what mode we use here, as we're
7676 only going to use this for printing. */
7677 x = adjust_address_nv (x, DImode, 8);
7678 break;
7679
ef6257cd
JH
7680 case '+':
7681 {
7682 rtx x;
e5cb57e8 7683
ef6257cd
JH
7684 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7685 return;
a4f31c00 7686
ef6257cd
JH
7687 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7688 if (x)
7689 {
7690 int pred_val = INTVAL (XEXP (x, 0));
7691
7692 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7693 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7694 {
7695 int taken = pred_val > REG_BR_PROB_BASE / 2;
7696 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7697
7698 /* Emit hints only in the case default branch prediction
d1f87653 7699 heuristics would fail. */
ef6257cd
JH
7700 if (taken != cputaken)
7701 {
7702 /* We use 3e (DS) prefix for taken branches and
7703 2e (CS) prefix for not taken branches. */
7704 if (taken)
7705 fputs ("ds ; ", file);
7706 else
7707 fputs ("cs ; ", file);
7708 }
7709 }
7710 }
7711 return;
7712 }
4af3895e 7713 default:
9e637a26 7714 output_operand_lossage ("invalid operand code '%c'", code);
2a2ab3f9
JVA
7715 }
7716 }
e9a25f70 7717
2a2ab3f9 7718 if (GET_CODE (x) == REG)
a55f4481 7719 print_reg (x, code, file);
e9a25f70 7720
2a2ab3f9
JVA
7721 else if (GET_CODE (x) == MEM)
7722 {
e075ae69 7723 /* No `byte ptr' prefix for call instructions. */
80f33d06 7724 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
2a2ab3f9 7725 {
69ddee61 7726 const char * size;
e075ae69
RH
7727 switch (GET_MODE_SIZE (GET_MODE (x)))
7728 {
7729 case 1: size = "BYTE"; break;
7730 case 2: size = "WORD"; break;
7731 case 4: size = "DWORD"; break;
7732 case 8: size = "QWORD"; break;
7733 case 12: size = "XWORD"; break;
a7180f70 7734 case 16: size = "XMMWORD"; break;
e075ae69 7735 default:
d0396b79 7736 gcc_unreachable ();
e075ae69 7737 }
fb204271
DN
7738
7739 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7740 if (code == 'b')
7741 size = "BYTE";
7742 else if (code == 'w')
7743 size = "WORD";
7744 else if (code == 'k')
7745 size = "DWORD";
7746
e075ae69
RH
7747 fputs (size, file);
7748 fputs (" PTR ", file);
2a2ab3f9 7749 }
e075ae69
RH
7750
7751 x = XEXP (x, 0);
0d7d98ee 7752 /* Avoid (%rip) for call operands. */
d10f5ecf 7753 if (CONSTANT_ADDRESS_P (x) && code == 'P'
0d7d98ee
JH
7754 && GET_CODE (x) != CONST_INT)
7755 output_addr_const (file, x);
c8b94768
RH
7756 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7757 output_operand_lossage ("invalid constraints for operand");
2a2ab3f9 7758 else
e075ae69 7759 output_address (x);
2a2ab3f9 7760 }
e9a25f70 7761
2a2ab3f9
JVA
7762 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7763 {
e9a25f70
JL
7764 REAL_VALUE_TYPE r;
7765 long l;
7766
5f1ec3e6
JVA
7767 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7768 REAL_VALUE_TO_TARGET_SINGLE (r, l);
e075ae69 7769
80f33d06 7770 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7771 putc ('$', file);
781f4ec1 7772 fprintf (file, "0x%08lx", l);
5f1ec3e6 7773 }
e9a25f70 7774
74dc3e94
RH
7775 /* These float cases don't actually occur as immediate operands. */
7776 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
5f1ec3e6 7777 {
e9a25f70
JL
7778 char dstr[30];
7779
da6eec72 7780 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
5f1ec3e6 7781 fprintf (file, "%s", dstr);
2a2ab3f9 7782 }
e9a25f70 7783
2b589241 7784 else if (GET_CODE (x) == CONST_DOUBLE
f8a1ebc6 7785 && GET_MODE (x) == XFmode)
2a2ab3f9 7786 {
e9a25f70
JL
7787 char dstr[30];
7788
da6eec72 7789 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
5f1ec3e6 7790 fprintf (file, "%s", dstr);
2a2ab3f9 7791 }
f996902d 7792
79325812 7793 else
2a2ab3f9 7794 {
b4e82619
RH
7795 /* We have patterns that allow zero sets of memory, for instance.
7796 In 64-bit mode, we should probably support all 8-byte vectors,
7797 since we can in fact encode that into an immediate. */
7798 if (GET_CODE (x) == CONST_VECTOR)
7799 {
d0396b79
NS
7800 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
7801 x = const0_rtx;
b4e82619
RH
7802 }
7803
4af3895e 7804 if (code != 'P')
2a2ab3f9 7805 {
695dac07 7806 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
e075ae69 7807 {
80f33d06 7808 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
7809 putc ('$', file);
7810 }
2a2ab3f9
JVA
7811 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7812 || GET_CODE (x) == LABEL_REF)
e075ae69 7813 {
80f33d06 7814 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
7815 putc ('$', file);
7816 else
7817 fputs ("OFFSET FLAT:", file);
7818 }
2a2ab3f9 7819 }
e075ae69
RH
7820 if (GET_CODE (x) == CONST_INT)
7821 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7822 else if (flag_pic)
2a2ab3f9
JVA
7823 output_pic_addr_const (file, x, code);
7824 else
7825 output_addr_const (file, x);
7826 }
7827}
7828\f
7829/* Print a memory operand whose address is ADDR. */
7830
7831void
8d531ab9 7832print_operand_address (FILE *file, rtx addr)
2a2ab3f9 7833{
e075ae69
RH
7834 struct ix86_address parts;
7835 rtx base, index, disp;
7836 int scale;
d0396b79 7837 int ok = ix86_decompose_address (addr, &parts);
e9a25f70 7838
d0396b79 7839 gcc_assert (ok);
e9a25f70 7840
e075ae69
RH
7841 base = parts.base;
7842 index = parts.index;
7843 disp = parts.disp;
7844 scale = parts.scale;
e9a25f70 7845
74dc3e94
RH
7846 switch (parts.seg)
7847 {
7848 case SEG_DEFAULT:
7849 break;
7850 case SEG_FS:
7851 case SEG_GS:
7852 if (USER_LABEL_PREFIX[0] == 0)
7853 putc ('%', file);
7854 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7855 break;
7856 default:
d0396b79 7857 gcc_unreachable ();
74dc3e94
RH
7858 }
7859
e075ae69
RH
7860 if (!base && !index)
7861 {
7862 /* Displacement only requires special attention. */
e9a25f70 7863
e075ae69 7864 if (GET_CODE (disp) == CONST_INT)
2a2ab3f9 7865 {
74dc3e94 7866 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
fb204271
DN
7867 {
7868 if (USER_LABEL_PREFIX[0] == 0)
7869 putc ('%', file);
7870 fputs ("ds:", file);
7871 }
74dc3e94 7872 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
2a2ab3f9 7873 }
e075ae69 7874 else if (flag_pic)
74dc3e94 7875 output_pic_addr_const (file, disp, 0);
e075ae69 7876 else
74dc3e94 7877 output_addr_const (file, disp);
0d7d98ee
JH
7878
7879 /* Use one byte shorter RIP relative addressing for 64bit mode. */
fd4aca96
RH
7880 if (TARGET_64BIT)
7881 {
7882 if (GET_CODE (disp) == CONST
7883 && GET_CODE (XEXP (disp, 0)) == PLUS
7884 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7885 disp = XEXP (XEXP (disp, 0), 0);
7886 if (GET_CODE (disp) == LABEL_REF
7887 || (GET_CODE (disp) == SYMBOL_REF
7888 && SYMBOL_REF_TLS_MODEL (disp) == 0))
7889 fputs ("(%rip)", file);
7890 }
e075ae69
RH
7891 }
7892 else
7893 {
80f33d06 7894 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9 7895 {
e075ae69 7896 if (disp)
2a2ab3f9 7897 {
c399861d 7898 if (flag_pic)
e075ae69
RH
7899 output_pic_addr_const (file, disp, 0);
7900 else if (GET_CODE (disp) == LABEL_REF)
7901 output_asm_label (disp);
2a2ab3f9 7902 else
e075ae69 7903 output_addr_const (file, disp);
2a2ab3f9
JVA
7904 }
7905
e075ae69
RH
7906 putc ('(', file);
7907 if (base)
a55f4481 7908 print_reg (base, 0, file);
e075ae69 7909 if (index)
2a2ab3f9 7910 {
e075ae69 7911 putc (',', file);
a55f4481 7912 print_reg (index, 0, file);
e075ae69
RH
7913 if (scale != 1)
7914 fprintf (file, ",%d", scale);
2a2ab3f9 7915 }
e075ae69 7916 putc (')', file);
2a2ab3f9 7917 }
2a2ab3f9
JVA
7918 else
7919 {
e075ae69 7920 rtx offset = NULL_RTX;
e9a25f70 7921
e075ae69
RH
7922 if (disp)
7923 {
7924 /* Pull out the offset of a symbol; print any symbol itself. */
7925 if (GET_CODE (disp) == CONST
7926 && GET_CODE (XEXP (disp, 0)) == PLUS
7927 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7928 {
7929 offset = XEXP (XEXP (disp, 0), 1);
7930 disp = gen_rtx_CONST (VOIDmode,
7931 XEXP (XEXP (disp, 0), 0));
7932 }
ce193852 7933
e075ae69
RH
7934 if (flag_pic)
7935 output_pic_addr_const (file, disp, 0);
7936 else if (GET_CODE (disp) == LABEL_REF)
7937 output_asm_label (disp);
7938 else if (GET_CODE (disp) == CONST_INT)
7939 offset = disp;
7940 else
7941 output_addr_const (file, disp);
7942 }
e9a25f70 7943
e075ae69
RH
7944 putc ('[', file);
7945 if (base)
a8620236 7946 {
a55f4481 7947 print_reg (base, 0, file);
e075ae69
RH
7948 if (offset)
7949 {
7950 if (INTVAL (offset) >= 0)
7951 putc ('+', file);
7952 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7953 }
a8620236 7954 }
e075ae69
RH
7955 else if (offset)
7956 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
2247a58c 7957 else
e075ae69 7958 putc ('0', file);
e9a25f70 7959
e075ae69
RH
7960 if (index)
7961 {
7962 putc ('+', file);
a55f4481 7963 print_reg (index, 0, file);
e075ae69
RH
7964 if (scale != 1)
7965 fprintf (file, "*%d", scale);
7966 }
7967 putc (']', file);
7968 }
2a2ab3f9
JVA
7969 }
7970}
f996902d
RH
7971
7972bool
b96a374d 7973output_addr_const_extra (FILE *file, rtx x)
f996902d
RH
7974{
7975 rtx op;
7976
7977 if (GET_CODE (x) != UNSPEC)
7978 return false;
7979
7980 op = XVECEXP (x, 0, 0);
7981 switch (XINT (x, 1))
7982 {
7983 case UNSPEC_GOTTPOFF:
7984 output_addr_const (file, op);
dea73790 7985 /* FIXME: This might be @TPOFF in Sun ld. */
f996902d
RH
7986 fputs ("@GOTTPOFF", file);
7987 break;
7988 case UNSPEC_TPOFF:
7989 output_addr_const (file, op);
7990 fputs ("@TPOFF", file);
7991 break;
7992 case UNSPEC_NTPOFF:
7993 output_addr_const (file, op);
75d38379
JJ
7994 if (TARGET_64BIT)
7995 fputs ("@TPOFF", file);
7996 else
7997 fputs ("@NTPOFF", file);
f996902d
RH
7998 break;
7999 case UNSPEC_DTPOFF:
8000 output_addr_const (file, op);
8001 fputs ("@DTPOFF", file);
8002 break;
dea73790
JJ
8003 case UNSPEC_GOTNTPOFF:
8004 output_addr_const (file, op);
75d38379
JJ
8005 if (TARGET_64BIT)
8006 fputs ("@GOTTPOFF(%rip)", file);
8007 else
8008 fputs ("@GOTNTPOFF", file);
dea73790
JJ
8009 break;
8010 case UNSPEC_INDNTPOFF:
8011 output_addr_const (file, op);
8012 fputs ("@INDNTPOFF", file);
8013 break;
f996902d
RH
8014
8015 default:
8016 return false;
8017 }
8018
8019 return true;
8020}
2a2ab3f9
JVA
8021\f
8022/* Split one or more DImode RTL references into pairs of SImode
8023 references. The RTL can be REG, offsettable MEM, integer constant, or
8024 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8025 split and "num" is its length. lo_half and hi_half are output arrays
0f290768 8026 that parallel "operands". */
2a2ab3f9
JVA
8027
8028void
b96a374d 8029split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
2a2ab3f9
JVA
8030{
8031 while (num--)
8032 {
57dbca5e 8033 rtx op = operands[num];
b932f770
JH
8034
8035 /* simplify_subreg refuse to split volatile memory addresses,
8036 but we still have to handle it. */
8037 if (GET_CODE (op) == MEM)
2a2ab3f9 8038 {
f4ef873c 8039 lo_half[num] = adjust_address (op, SImode, 0);
b72f00af 8040 hi_half[num] = adjust_address (op, SImode, 4);
2a2ab3f9
JVA
8041 }
8042 else
b932f770 8043 {
38ca929b
JH
8044 lo_half[num] = simplify_gen_subreg (SImode, op,
8045 GET_MODE (op) == VOIDmode
8046 ? DImode : GET_MODE (op), 0);
8047 hi_half[num] = simplify_gen_subreg (SImode, op,
8048 GET_MODE (op) == VOIDmode
8049 ? DImode : GET_MODE (op), 4);
b932f770 8050 }
2a2ab3f9
JVA
8051 }
8052}
28356f52 8053/* Split one or more TImode RTL references into pairs of DImode
44cf5b6a
JH
8054 references. The RTL can be REG, offsettable MEM, integer constant, or
8055 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8056 split and "num" is its length. lo_half and hi_half are output arrays
8057 that parallel "operands". */
8058
8059void
b96a374d 8060split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
44cf5b6a
JH
8061{
8062 while (num--)
8063 {
8064 rtx op = operands[num];
b932f770
JH
8065
8066 /* simplify_subreg refuse to split volatile memory addresses, but we
8067 still have to handle it. */
8068 if (GET_CODE (op) == MEM)
44cf5b6a
JH
8069 {
8070 lo_half[num] = adjust_address (op, DImode, 0);
8071 hi_half[num] = adjust_address (op, DImode, 8);
8072 }
8073 else
b932f770
JH
8074 {
8075 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8076 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8077 }
44cf5b6a
JH
8078 }
8079}
2a2ab3f9 8080\f
2a2ab3f9
JVA
8081/* Output code to perform a 387 binary operation in INSN, one of PLUS,
8082 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8083 is the expression of the binary operation. The output may either be
8084 emitted here, or returned to the caller, like all output_* functions.
8085
8086 There is no guarantee that the operands are the same mode, as they
0f290768 8087 might be within FLOAT or FLOAT_EXTEND expressions. */
2a2ab3f9 8088
e3c2afab
AM
8089#ifndef SYSV386_COMPAT
8090/* Set to 1 for compatibility with brain-damaged assemblers. No-one
8091 wants to fix the assemblers because that causes incompatibility
8092 with gcc. No-one wants to fix gcc because that causes
8093 incompatibility with assemblers... You can use the option of
8094 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8095#define SYSV386_COMPAT 1
8096#endif
8097
69ddee61 8098const char *
b96a374d 8099output_387_binary_op (rtx insn, rtx *operands)
2a2ab3f9 8100{
e3c2afab 8101 static char buf[30];
69ddee61 8102 const char *p;
1deaa899 8103 const char *ssep;
89b17498 8104 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
2a2ab3f9 8105
e3c2afab
AM
8106#ifdef ENABLE_CHECKING
8107 /* Even if we do not want to check the inputs, this documents input
8108 constraints. Which helps in understanding the following code. */
8109 if (STACK_REG_P (operands[0])
8110 && ((REG_P (operands[1])
8111 && REGNO (operands[0]) == REGNO (operands[1])
8112 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
8113 || (REG_P (operands[2])
8114 && REGNO (operands[0]) == REGNO (operands[2])
8115 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
8116 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8117 ; /* ok */
d0396b79
NS
8118 else
8119 gcc_assert (is_sse);
e3c2afab
AM
8120#endif
8121
2a2ab3f9
JVA
8122 switch (GET_CODE (operands[3]))
8123 {
8124 case PLUS:
e075ae69
RH
8125 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8126 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8127 p = "fiadd";
8128 else
8129 p = "fadd";
1deaa899 8130 ssep = "add";
2a2ab3f9
JVA
8131 break;
8132
8133 case MINUS:
e075ae69
RH
8134 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8135 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8136 p = "fisub";
8137 else
8138 p = "fsub";
1deaa899 8139 ssep = "sub";
2a2ab3f9
JVA
8140 break;
8141
8142 case MULT:
e075ae69
RH
8143 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8144 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8145 p = "fimul";
8146 else
8147 p = "fmul";
1deaa899 8148 ssep = "mul";
2a2ab3f9
JVA
8149 break;
8150
8151 case DIV:
e075ae69
RH
8152 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8153 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8154 p = "fidiv";
8155 else
8156 p = "fdiv";
1deaa899 8157 ssep = "div";
2a2ab3f9
JVA
8158 break;
8159
8160 default:
d0396b79 8161 gcc_unreachable ();
2a2ab3f9
JVA
8162 }
8163
1deaa899
JH
8164 if (is_sse)
8165 {
8166 strcpy (buf, ssep);
8167 if (GET_MODE (operands[0]) == SFmode)
8168 strcat (buf, "ss\t{%2, %0|%0, %2}");
8169 else
8170 strcat (buf, "sd\t{%2, %0|%0, %2}");
8171 return buf;
8172 }
e075ae69 8173 strcpy (buf, p);
2a2ab3f9
JVA
8174
8175 switch (GET_CODE (operands[3]))
8176 {
8177 case MULT:
8178 case PLUS:
8179 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8180 {
e3c2afab 8181 rtx temp = operands[2];
2a2ab3f9
JVA
8182 operands[2] = operands[1];
8183 operands[1] = temp;
8184 }
8185
e3c2afab
AM
8186 /* know operands[0] == operands[1]. */
8187
2a2ab3f9 8188 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
8189 {
8190 p = "%z2\t%2";
8191 break;
8192 }
2a2ab3f9
JVA
8193
8194 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63
JL
8195 {
8196 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
8197 /* How is it that we are storing to a dead operand[2]?
8198 Well, presumably operands[1] is dead too. We can't
8199 store the result to st(0) as st(0) gets popped on this
8200 instruction. Instead store to operands[2] (which I
8201 think has to be st(1)). st(1) will be popped later.
8202 gcc <= 2.8.1 didn't have this check and generated
8203 assembly code that the Unixware assembler rejected. */
8204 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 8205 else
e3c2afab 8206 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
e075ae69 8207 break;
6b28fd63 8208 }
2a2ab3f9
JVA
8209
8210 if (STACK_TOP_P (operands[0]))
e3c2afab 8211 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 8212 else
e3c2afab 8213 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
e075ae69 8214 break;
2a2ab3f9
JVA
8215
8216 case MINUS:
8217 case DIV:
8218 if (GET_CODE (operands[1]) == MEM)
e075ae69
RH
8219 {
8220 p = "r%z1\t%1";
8221 break;
8222 }
2a2ab3f9
JVA
8223
8224 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
8225 {
8226 p = "%z2\t%2";
8227 break;
8228 }
2a2ab3f9 8229
2a2ab3f9 8230 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63 8231 {
e3c2afab
AM
8232#if SYSV386_COMPAT
8233 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8234 derived assemblers, confusingly reverse the direction of
8235 the operation for fsub{r} and fdiv{r} when the
8236 destination register is not st(0). The Intel assembler
8237 doesn't have this brain damage. Read !SYSV386_COMPAT to
8238 figure out what the hardware really does. */
8239 if (STACK_TOP_P (operands[0]))
8240 p = "{p\t%0, %2|rp\t%2, %0}";
8241 else
8242 p = "{rp\t%2, %0|p\t%0, %2}";
8243#else
6b28fd63 8244 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
8245 /* As above for fmul/fadd, we can't store to st(0). */
8246 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 8247 else
e3c2afab
AM
8248 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8249#endif
e075ae69 8250 break;
6b28fd63 8251 }
2a2ab3f9
JVA
8252
8253 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6b28fd63 8254 {
e3c2afab 8255#if SYSV386_COMPAT
6b28fd63 8256 if (STACK_TOP_P (operands[0]))
e3c2afab 8257 p = "{rp\t%0, %1|p\t%1, %0}";
6b28fd63 8258 else
e3c2afab
AM
8259 p = "{p\t%1, %0|rp\t%0, %1}";
8260#else
8261 if (STACK_TOP_P (operands[0]))
8262 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8263 else
8264 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8265#endif
e075ae69 8266 break;
6b28fd63 8267 }
2a2ab3f9
JVA
8268
8269 if (STACK_TOP_P (operands[0]))
8270 {
8271 if (STACK_TOP_P (operands[1]))
e3c2afab 8272 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 8273 else
e3c2afab 8274 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
e075ae69 8275 break;
2a2ab3f9
JVA
8276 }
8277 else if (STACK_TOP_P (operands[1]))
e3c2afab
AM
8278 {
8279#if SYSV386_COMPAT
8280 p = "{\t%1, %0|r\t%0, %1}";
8281#else
8282 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8283#endif
8284 }
2a2ab3f9 8285 else
e3c2afab
AM
8286 {
8287#if SYSV386_COMPAT
8288 p = "{r\t%2, %0|\t%0, %2}";
8289#else
8290 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8291#endif
8292 }
e075ae69 8293 break;
2a2ab3f9
JVA
8294
8295 default:
d0396b79 8296 gcc_unreachable ();
2a2ab3f9 8297 }
e075ae69
RH
8298
8299 strcat (buf, p);
8300 return buf;
2a2ab3f9 8301}
e075ae69 8302
ff680eb1
UB
8303/* Return needed mode for entity in optimize_mode_switching pass. */
8304
8305int
8306ix86_mode_needed (int entity, rtx insn)
8307{
8308 enum attr_i387_cw mode;
8309
8310 /* The mode UNINITIALIZED is used to store control word after a
8311 function call or ASM pattern. The mode ANY specify that function
8312 has no requirements on the control word and make no changes in the
8313 bits we are interested in. */
8314
8315 if (CALL_P (insn)
8316 || (NONJUMP_INSN_P (insn)
8317 && (asm_noperands (PATTERN (insn)) >= 0
8318 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
8319 return I387_CW_UNINITIALIZED;
8320
8321 if (recog_memoized (insn) < 0)
8322 return I387_CW_ANY;
8323
8324 mode = get_attr_i387_cw (insn);
8325
8326 switch (entity)
8327 {
8328 case I387_TRUNC:
8329 if (mode == I387_CW_TRUNC)
8330 return mode;
8331 break;
8332
8333 case I387_FLOOR:
8334 if (mode == I387_CW_FLOOR)
8335 return mode;
8336 break;
8337
8338 case I387_CEIL:
8339 if (mode == I387_CW_CEIL)
8340 return mode;
8341 break;
8342
8343 case I387_MASK_PM:
8344 if (mode == I387_CW_MASK_PM)
8345 return mode;
8346 break;
8347
8348 default:
8349 gcc_unreachable ();
8350 }
8351
8352 return I387_CW_ANY;
8353}
8354
edeacc14
UB
8355/* Output code to initialize control word copies used by trunc?f?i and
8356 rounding patterns. CURRENT_MODE is set to current control word,
8357 while NEW_MODE is set to new control word. */
8358
7a2e09f4 8359void
ff680eb1 8360emit_i387_cw_initialization (int mode)
7a2e09f4 8361{
ff680eb1
UB
8362 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
8363 rtx new_mode;
8364
8365 int slot;
8366
7a2e09f4
JH
8367 rtx reg = gen_reg_rtx (HImode);
8368
ff680eb1
UB
8369 emit_insn (gen_x86_fnstcw_1 (stored_mode));
8370 emit_move_insn (reg, stored_mode);
edeacc14 8371
ff680eb1 8372 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
edeacc14
UB
8373 {
8374 switch (mode)
8375 {
ff680eb1
UB
8376 case I387_CW_TRUNC:
8377 /* round toward zero (truncate) */
8378 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
8379 slot = SLOT_CW_TRUNC;
8380 break;
8381
edeacc14
UB
8382 case I387_CW_FLOOR:
8383 /* round down toward -oo */
ff680eb1
UB
8384 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8385 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
8386 slot = SLOT_CW_FLOOR;
edeacc14
UB
8387 break;
8388
8389 case I387_CW_CEIL:
8390 /* round up toward +oo */
ff680eb1
UB
8391 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8392 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
8393 slot = SLOT_CW_CEIL;
edeacc14
UB
8394 break;
8395
edeacc14
UB
8396 case I387_CW_MASK_PM:
8397 /* mask precision exception for nearbyint() */
8398 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
ff680eb1 8399 slot = SLOT_CW_MASK_PM;
edeacc14
UB
8400 break;
8401
8402 default:
d0396b79 8403 gcc_unreachable ();
edeacc14
UB
8404 }
8405 }
7a2e09f4 8406 else
edeacc14
UB
8407 {
8408 switch (mode)
8409 {
ff680eb1
UB
8410 case I387_CW_TRUNC:
8411 /* round toward zero (truncate) */
8412 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8413 slot = SLOT_CW_TRUNC;
8414 break;
8415
edeacc14
UB
8416 case I387_CW_FLOOR:
8417 /* round down toward -oo */
ff680eb1
UB
8418 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
8419 slot = SLOT_CW_FLOOR;
edeacc14
UB
8420 break;
8421
8422 case I387_CW_CEIL:
8423 /* round up toward +oo */
ff680eb1
UB
8424 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
8425 slot = SLOT_CW_CEIL;
edeacc14 8426 break;
ff680eb1 8427
edeacc14
UB
8428 case I387_CW_MASK_PM:
8429 /* mask precision exception for nearbyint() */
8430 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
ff680eb1 8431 slot = SLOT_CW_MASK_PM;
edeacc14
UB
8432 break;
8433
8434 default:
d0396b79 8435 gcc_unreachable ();
edeacc14
UB
8436 }
8437 }
8438
ff680eb1
UB
8439 gcc_assert (slot < MAX_386_STACK_LOCALS);
8440
8441 new_mode = assign_386_stack_local (HImode, slot);
edeacc14 8442 emit_move_insn (new_mode, reg);
7a2e09f4
JH
8443}
8444
2a2ab3f9 8445/* Output code for INSN to convert a float to a signed int. OPERANDS
46d21d2c 8446 are the insn operands. The output may be [HSD]Imode and the input
e075ae69 8447 operand may be [SDX]Fmode. */
2a2ab3f9 8448
69ddee61 8449const char *
9199f050 8450output_fix_trunc (rtx insn, rtx *operands, int fisttp)
2a2ab3f9
JVA
8451{
8452 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
e075ae69 8453 int dimode_p = GET_MODE (operands[0]) == DImode;
6e858d45 8454 int round_mode = get_attr_i387_cw (insn);
2a2ab3f9 8455
e075ae69
RH
8456 /* Jump through a hoop or two for DImode, since the hardware has no
8457 non-popping instruction. We used to do this a different way, but
8458 that was somewhat fragile and broke with post-reload splitters. */
9199f050 8459 if ((dimode_p || fisttp) && !stack_top_dies)
a05924f9 8460 output_asm_insn ("fld\t%y1", operands);
e075ae69 8461
d0396b79
NS
8462 gcc_assert (STACK_TOP_P (operands[1]));
8463 gcc_assert (GET_CODE (operands[0]) == MEM);
e9a25f70 8464
9199f050
UB
8465 if (fisttp)
8466 output_asm_insn ("fisttp%z0\t%0", operands);
10195bd8 8467 else
9199f050 8468 {
6e858d45
UB
8469 if (round_mode != I387_CW_ANY)
8470 output_asm_insn ("fldcw\t%3", operands);
9199f050
UB
8471 if (stack_top_dies || dimode_p)
8472 output_asm_insn ("fistp%z0\t%0", operands);
8473 else
8474 output_asm_insn ("fist%z0\t%0", operands);
6e858d45
UB
8475 if (round_mode != I387_CW_ANY)
8476 output_asm_insn ("fldcw\t%2", operands);
9199f050 8477 }
10195bd8 8478
e075ae69 8479 return "";
2a2ab3f9 8480}
cda749b1 8481
e075ae69 8482/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7c82106f 8483 should be used. UNORDERED_P is true when fucom should be used. */
e075ae69 8484
69ddee61 8485const char *
b96a374d 8486output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
cda749b1 8487{
e075ae69 8488 int stack_top_dies;
869d095e 8489 rtx cmp_op0, cmp_op1;
7c82106f 8490 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
e075ae69 8491
7c82106f 8492 if (eflags_p)
e075ae69 8493 {
7c82106f
UB
8494 cmp_op0 = operands[0];
8495 cmp_op1 = operands[1];
e075ae69 8496 }
869d095e
UB
8497 else
8498 {
7c82106f
UB
8499 cmp_op0 = operands[1];
8500 cmp_op1 = operands[2];
869d095e
UB
8501 }
8502
0644b628
JH
8503 if (is_sse)
8504 {
8505 if (GET_MODE (operands[0]) == SFmode)
8506 if (unordered_p)
8507 return "ucomiss\t{%1, %0|%0, %1}";
8508 else
a5cf80f0 8509 return "comiss\t{%1, %0|%0, %1}";
0644b628
JH
8510 else
8511 if (unordered_p)
8512 return "ucomisd\t{%1, %0|%0, %1}";
8513 else
a5cf80f0 8514 return "comisd\t{%1, %0|%0, %1}";
0644b628 8515 }
cda749b1 8516
d0396b79 8517 gcc_assert (STACK_TOP_P (cmp_op0));
cda749b1 8518
e075ae69 8519 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
cda749b1 8520
869d095e
UB
8521 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
8522 {
8523 if (stack_top_dies)
8524 {
8525 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
8526 return TARGET_USE_FFREEP ? "ffreep\t%y1" : "fstp\t%y1";
8527 }
8528 else
8529 return "ftst\n\tfnstsw\t%0";
8530 }
8531
e075ae69
RH
8532 if (STACK_REG_P (cmp_op1)
8533 && stack_top_dies
8534 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8535 && REGNO (cmp_op1) != FIRST_STACK_REG)
cda749b1 8536 {
e075ae69
RH
8537 /* If both the top of the 387 stack dies, and the other operand
8538 is also a stack register that dies, then this must be a
8539 `fcompp' float compare */
8540
7c82106f 8541 if (eflags_p)
e075ae69
RH
8542 {
8543 /* There is no double popping fcomi variant. Fortunately,
8544 eflags is immune from the fstp's cc clobbering. */
8545 if (unordered_p)
8546 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8547 else
8548 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
fb364dc4 8549 return TARGET_USE_FFREEP ? "ffreep\t%y0" : "fstp\t%y0";
e075ae69
RH
8550 }
8551 else
cda749b1 8552 {
7c82106f
UB
8553 if (unordered_p)
8554 return "fucompp\n\tfnstsw\t%0";
cda749b1 8555 else
7c82106f 8556 return "fcompp\n\tfnstsw\t%0";
cda749b1 8557 }
cda749b1
JW
8558 }
8559 else
8560 {
e075ae69 8561 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
cda749b1 8562
7c82106f 8563 static const char * const alt[16] =
e075ae69 8564 {
7c82106f
UB
8565 "fcom%z2\t%y2\n\tfnstsw\t%0",
8566 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8567 "fucom%z2\t%y2\n\tfnstsw\t%0",
8568 "fucomp%z2\t%y2\n\tfnstsw\t%0",
0f290768 8569
7c82106f
UB
8570 "ficom%z2\t%y2\n\tfnstsw\t%0",
8571 "ficomp%z2\t%y2\n\tfnstsw\t%0",
e075ae69
RH
8572 NULL,
8573 NULL,
8574
8575 "fcomi\t{%y1, %0|%0, %y1}",
8576 "fcomip\t{%y1, %0|%0, %y1}",
8577 "fucomi\t{%y1, %0|%0, %y1}",
8578 "fucomip\t{%y1, %0|%0, %y1}",
8579
8580 NULL,
8581 NULL,
8582 NULL,
e075ae69
RH
8583 NULL
8584 };
8585
8586 int mask;
69ddee61 8587 const char *ret;
e075ae69
RH
8588
8589 mask = eflags_p << 3;
7c82106f 8590 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
e075ae69
RH
8591 mask |= unordered_p << 1;
8592 mask |= stack_top_dies;
8593
d0396b79 8594 gcc_assert (mask < 16);
e075ae69 8595 ret = alt[mask];
d0396b79 8596 gcc_assert (ret);
cda749b1 8597
e075ae69 8598 return ret;
cda749b1
JW
8599 }
8600}
2a2ab3f9 8601
f88c65f7 8602void
b96a374d 8603ix86_output_addr_vec_elt (FILE *file, int value)
f88c65f7
RH
8604{
8605 const char *directive = ASM_LONG;
8606
f88c65f7 8607#ifdef ASM_QUAD
d0396b79
NS
8608 if (TARGET_64BIT)
8609 directive = ASM_QUAD;
f88c65f7 8610#else
d0396b79 8611 gcc_assert (!TARGET_64BIT);
f88c65f7 8612#endif
f88c65f7
RH
8613
8614 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8615}
8616
8617void
b96a374d 8618ix86_output_addr_diff_elt (FILE *file, int value, int rel)
f88c65f7
RH
8619{
8620 if (TARGET_64BIT)
74411039 8621 fprintf (file, "%s%s%d-%s%d\n",
f88c65f7
RH
8622 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8623 else if (HAVE_AS_GOTOFF_IN_DATA)
8624 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
b069de3b
SS
8625#if TARGET_MACHO
8626 else if (TARGET_MACHO)
86ecdfb6
AP
8627 {
8628 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8629 machopic_output_function_base_name (file);
8630 fprintf(file, "\n");
8631 }
b069de3b 8632#endif
f88c65f7 8633 else
5fc0e5df
KW
8634 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8635 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
f88c65f7 8636}
32b5b1aa 8637\f
a8bac9ab
RH
8638/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8639 for the target. */
8640
8641void
b96a374d 8642ix86_expand_clear (rtx dest)
a8bac9ab
RH
8643{
8644 rtx tmp;
8645
8646 /* We play register width games, which are only valid after reload. */
d0396b79 8647 gcc_assert (reload_completed);
a8bac9ab
RH
8648
8649 /* Avoid HImode and its attendant prefix byte. */
8650 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8651 dest = gen_rtx_REG (SImode, REGNO (dest));
8652
8653 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8654
8655 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8656 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8657 {
8658 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8659 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8660 }
8661
8662 emit_insn (tmp);
8663}
8664
f996902d
RH
8665/* X is an unchanging MEM. If it is a constant pool reference, return
8666 the constant pool rtx, else NULL. */
8667
8fe75e43 8668rtx
b96a374d 8669maybe_get_pool_constant (rtx x)
f996902d 8670{
69bd9368 8671 x = ix86_delegitimize_address (XEXP (x, 0));
f996902d
RH
8672
8673 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8674 return get_pool_constant (x);
8675
8676 return NULL_RTX;
8677}
8678
79325812 8679void
b96a374d 8680ix86_expand_move (enum machine_mode mode, rtx operands[])
32b5b1aa 8681{
e075ae69 8682 int strict = (reload_in_progress || reload_completed);
74dc3e94
RH
8683 rtx op0, op1;
8684 enum tls_model model;
f996902d
RH
8685
8686 op0 = operands[0];
8687 op1 = operands[1];
8688
d2ad2c8a 8689 if (GET_CODE (op1) == SYMBOL_REF)
f996902d 8690 {
d2ad2c8a
JH
8691 model = SYMBOL_REF_TLS_MODEL (op1);
8692 if (model)
8693 {
8694 op1 = legitimize_tls_address (op1, model, true);
8695 op1 = force_operand (op1, op0);
8696 if (op1 == op0)
8697 return;
8698 }
8699 }
8700 else if (GET_CODE (op1) == CONST
8701 && GET_CODE (XEXP (op1, 0)) == PLUS
8702 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
8703 {
8704 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
8705 if (model)
8706 {
8707 rtx addend = XEXP (XEXP (op1, 0), 1);
8708 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
8709 op1 = force_operand (op1, NULL);
8710 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
8711 op0, 1, OPTAB_DIRECT);
8712 if (op1 == op0)
8713 return;
8714 }
f996902d 8715 }
74dc3e94
RH
8716
8717 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
f996902d 8718 {
b069de3b
SS
8719#if TARGET_MACHO
8720 if (MACHOPIC_PURE)
8721 {
8722 rtx temp = ((reload_in_progress
8723 || ((op0 && GET_CODE (op0) == REG)
8724 && mode == Pmode))
8725 ? op0 : gen_reg_rtx (Pmode));
8726 op1 = machopic_indirect_data_reference (op1, temp);
8727 op1 = machopic_legitimize_pic_address (op1, mode,
8728 temp == op1 ? 0 : temp);
8729 }
74dc3e94
RH
8730 else if (MACHOPIC_INDIRECT)
8731 op1 = machopic_indirect_data_reference (op1, 0);
8732 if (op0 == op1)
8733 return;
8734#else
f996902d
RH
8735 if (GET_CODE (op0) == MEM)
8736 op1 = force_reg (Pmode, op1);
7dcbf659 8737 else
b39edae3 8738 op1 = legitimize_address (op1, op1, Pmode);
74dc3e94 8739#endif /* TARGET_MACHO */
e075ae69
RH
8740 }
8741 else
8742 {
f996902d 8743 if (GET_CODE (op0) == MEM
44cf5b6a 8744 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
f996902d
RH
8745 || !push_operand (op0, mode))
8746 && GET_CODE (op1) == MEM)
8747 op1 = force_reg (mode, op1);
e9a25f70 8748
f996902d
RH
8749 if (push_operand (op0, mode)
8750 && ! general_no_elim_operand (op1, mode))
8751 op1 = copy_to_mode_reg (mode, op1);
2c5a510c 8752
44cf5b6a
JH
8753 /* Force large constants in 64bit compilation into register
8754 to get them CSEed. */
8755 if (TARGET_64BIT && mode == DImode
f996902d 8756 && immediate_operand (op1, mode)
8fe75e43 8757 && !x86_64_zext_immediate_operand (op1, VOIDmode)
f996902d 8758 && !register_operand (op0, mode)
44cf5b6a 8759 && optimize && !reload_completed && !reload_in_progress)
f996902d 8760 op1 = copy_to_mode_reg (mode, op1);
44cf5b6a 8761
e075ae69 8762 if (FLOAT_MODE_P (mode))
32b5b1aa 8763 {
d7a29404
JH
8764 /* If we are loading a floating point constant to a register,
8765 force the value to memory now, since we'll get better code
8766 out the back end. */
e075ae69
RH
8767
8768 if (strict)
8769 ;
ddc67067
MM
8770 else if (GET_CODE (op1) == CONST_DOUBLE)
8771 {
8772 op1 = validize_mem (force_const_mem (mode, op1));
8773 if (!register_operand (op0, mode))
8774 {
8775 rtx temp = gen_reg_rtx (mode);
8776 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8777 emit_move_insn (op0, temp);
8778 return;
8779 }
8780 }
32b5b1aa 8781 }
32b5b1aa 8782 }
e9a25f70 8783
74dc3e94 8784 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
e075ae69 8785}
e9a25f70 8786
e37af218 8787void
b96a374d 8788ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
e37af218 8789{
c38573a8
RH
8790 rtx op0 = operands[0], op1 = operands[1];
8791
e37af218
RH
8792 /* Force constants other than zero into memory. We do not know how
8793 the instructions used to build constants modify the upper 64 bits
8794 of the register, once we have that information we may be able
8795 to handle some of them more efficiently. */
8796 if ((reload_in_progress | reload_completed) == 0
c38573a8
RH
8797 && register_operand (op0, mode)
8798 && CONSTANT_P (op1) && op1 != CONST0_RTX (mode))
8799 op1 = validize_mem (force_const_mem (mode, op1));
e37af218
RH
8800
8801 /* Make operand1 a register if it isn't already. */
f8ca7923 8802 if (!no_new_pseudos
c38573a8
RH
8803 && !register_operand (op0, mode)
8804 && !register_operand (op1, mode))
e37af218 8805 {
c38573a8 8806 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
e37af218
RH
8807 return;
8808 }
8809
c38573a8 8810 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
fce5a9f2 8811}
e37af218 8812
c38573a8
RH
8813/* Implement the movmisalign patterns for SSE. Non-SSE modes go
8814 straight to ix86_expand_vector_move. */
8815
8816void
8817ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
8818{
8819 rtx op0, op1, m;
8820
8821 op0 = operands[0];
8822 op1 = operands[1];
8823
8824 if (MEM_P (op1))
8825 {
8826 /* If we're optimizing for size, movups is the smallest. */
8827 if (optimize_size)
8828 {
8829 op0 = gen_lowpart (V4SFmode, op0);
8830 op1 = gen_lowpart (V4SFmode, op1);
8831 emit_insn (gen_sse_movups (op0, op1));
8832 return;
8833 }
8834
8835 /* ??? If we have typed data, then it would appear that using
8836 movdqu is the only way to get unaligned data loaded with
8837 integer type. */
8838 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
8839 {
8840 op0 = gen_lowpart (V16QImode, op0);
8841 op1 = gen_lowpart (V16QImode, op1);
8842 emit_insn (gen_sse2_movdqu (op0, op1));
8843 return;
8844 }
8845
8846 if (TARGET_SSE2 && mode == V2DFmode)
8847 {
eb701deb
RH
8848 rtx zero;
8849
c38573a8
RH
8850 /* When SSE registers are split into halves, we can avoid
8851 writing to the top half twice. */
8852 if (TARGET_SSE_SPLIT_REGS)
8853 {
8854 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
eb701deb 8855 zero = op0;
c38573a8
RH
8856 }
8857 else
8858 {
8859 /* ??? Not sure about the best option for the Intel chips.
8860 The following would seem to satisfy; the register is
8861 entirely cleared, breaking the dependency chain. We
8862 then store to the upper half, with a dependency depth
8863 of one. A rumor has it that Intel recommends two movsd
8864 followed by an unpacklpd, but this is unconfirmed. And
8865 given that the dependency depth of the unpacklpd would
8866 still be one, I'm not sure why this would be better. */
eb701deb 8867 zero = CONST0_RTX (V2DFmode);
c38573a8 8868 }
eb701deb
RH
8869
8870 m = adjust_address (op1, DFmode, 0);
8871 emit_insn (gen_sse2_loadlpd (op0, zero, m));
8872 m = adjust_address (op1, DFmode, 8);
8873 emit_insn (gen_sse2_loadhpd (op0, op0, m));
c38573a8
RH
8874 }
8875 else
8876 {
8877 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
8878 emit_move_insn (op0, CONST0_RTX (mode));
8879 else
8880 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
8881
b4bb3199
RH
8882 if (mode != V4SFmode)
8883 op0 = gen_lowpart (V4SFmode, op0);
2cdb3148
RH
8884 m = adjust_address (op1, V2SFmode, 0);
8885 emit_insn (gen_sse_loadlps (op0, op0, m));
8886 m = adjust_address (op1, V2SFmode, 8);
8887 emit_insn (gen_sse_loadhps (op0, op0, m));
c38573a8
RH
8888 }
8889 }
8890 else if (MEM_P (op0))
8891 {
8892 /* If we're optimizing for size, movups is the smallest. */
8893 if (optimize_size)
8894 {
8895 op0 = gen_lowpart (V4SFmode, op0);
8896 op1 = gen_lowpart (V4SFmode, op1);
8897 emit_insn (gen_sse_movups (op0, op1));
8898 return;
8899 }
8900
8901 /* ??? Similar to above, only less clear because of quote
8902 typeless stores unquote. */
8903 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
8904 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
8905 {
8906 op0 = gen_lowpart (V16QImode, op0);
8907 op1 = gen_lowpart (V16QImode, op1);
8908 emit_insn (gen_sse2_movdqu (op0, op1));
8909 return;
8910 }
8911
8912 if (TARGET_SSE2 && mode == V2DFmode)
8913 {
8914 m = adjust_address (op0, DFmode, 0);
8915 emit_insn (gen_sse2_storelpd (m, op1));
8916 m = adjust_address (op0, DFmode, 8);
8917 emit_insn (gen_sse2_storehpd (m, op1));
c38573a8
RH
8918 }
8919 else
8920 {
eb701deb
RH
8921 if (mode != V4SFmode)
8922 op1 = gen_lowpart (V4SFmode, op1);
2cdb3148
RH
8923 m = adjust_address (op0, V2SFmode, 0);
8924 emit_insn (gen_sse_storelps (m, op1));
8925 m = adjust_address (op0, V2SFmode, 8);
8926 emit_insn (gen_sse_storehps (m, op1));
c38573a8
RH
8927 }
8928 }
8929 else
8930 gcc_unreachable ();
8931}
8932
6b79c03c
RH
8933/* Expand a push in MODE. This is some mode for which we do not support
8934 proper push instructions, at least from the registers that we expect
8935 the value to live in. */
8936
8937void
8938ix86_expand_push (enum machine_mode mode, rtx x)
8939{
8940 rtx tmp;
8941
8942 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
8943 GEN_INT (-GET_MODE_SIZE (mode)),
8944 stack_pointer_rtx, 1, OPTAB_DIRECT);
8945 if (tmp != stack_pointer_rtx)
8946 emit_move_insn (stack_pointer_rtx, tmp);
8947
8948 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
8949 emit_move_insn (tmp, x);
8950}
c38573a8 8951
ef719a44
RH
8952/* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
8953 destination to use for the operation. If different from the true
8954 destination in operands[0], a copy operation will be required. */
e9a25f70 8955
ef719a44
RH
8956rtx
8957ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
8958 rtx operands[])
e075ae69
RH
8959{
8960 int matching_memory;
ef719a44 8961 rtx src1, src2, dst;
e075ae69
RH
8962
8963 dst = operands[0];
8964 src1 = operands[1];
8965 src2 = operands[2];
8966
8967 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
ec8e098d 8968 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
e075ae69
RH
8969 && (rtx_equal_p (dst, src2)
8970 || immediate_operand (src1, mode)))
8971 {
8972 rtx temp = src1;
8973 src1 = src2;
8974 src2 = temp;
32b5b1aa 8975 }
e9a25f70 8976
e075ae69
RH
8977 /* If the destination is memory, and we do not have matching source
8978 operands, do things in registers. */
8979 matching_memory = 0;
8980 if (GET_CODE (dst) == MEM)
32b5b1aa 8981 {
e075ae69
RH
8982 if (rtx_equal_p (dst, src1))
8983 matching_memory = 1;
ec8e098d 8984 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
e075ae69
RH
8985 && rtx_equal_p (dst, src2))
8986 matching_memory = 2;
8987 else
8988 dst = gen_reg_rtx (mode);
8989 }
0f290768 8990
e075ae69
RH
8991 /* Both source operands cannot be in memory. */
8992 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8993 {
8994 if (matching_memory != 2)
8995 src2 = force_reg (mode, src2);
8996 else
8997 src1 = force_reg (mode, src1);
32b5b1aa 8998 }
e9a25f70 8999
06a964de
JH
9000 /* If the operation is not commutable, source 1 cannot be a constant
9001 or non-matching memory. */
0f290768 9002 if ((CONSTANT_P (src1)
06a964de 9003 || (!matching_memory && GET_CODE (src1) == MEM))
ec8e098d 9004 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
e075ae69 9005 src1 = force_reg (mode, src1);
0f290768 9006
ef719a44
RH
9007 src1 = operands[1] = src1;
9008 src2 = operands[2] = src2;
9009 return dst;
9010}
9011
9012/* Similarly, but assume that the destination has already been
9013 set up properly. */
9014
9015void
9016ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9017 enum machine_mode mode, rtx operands[])
9018{
9019 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9020 gcc_assert (dst == operands[0]);
9021}
9022
9023/* Attempt to expand a binary operator. Make the expansion closer to the
9024 actual machine, then just general_operand, which will allow 3 separate
9025 memory references (one output, two input) in a single insn. */
9026
9027void
9028ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9029 rtx operands[])
9030{
9031 rtx src1, src2, dst, op, clob;
9032
9033 dst = ix86_fixup_binary_operands (code, mode, operands);
9034 src1 = operands[1];
9035 src2 = operands[2];
9036
9037 /* Emit the instruction. */
e075ae69
RH
9038
9039 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9040 if (reload_in_progress)
9041 {
9042 /* Reload doesn't know about the flags register, and doesn't know that
9043 it doesn't want to clobber it. We can only do this with PLUS. */
d0396b79 9044 gcc_assert (code == PLUS);
e075ae69
RH
9045 emit_insn (op);
9046 }
9047 else
32b5b1aa 9048 {
e075ae69
RH
9049 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9050 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
32b5b1aa 9051 }
e9a25f70 9052
e075ae69
RH
9053 /* Fix up the destination if needed. */
9054 if (dst != operands[0])
9055 emit_move_insn (operands[0], dst);
9056}
9057
9058/* Return TRUE or FALSE depending on whether the binary operator meets the
9059 appropriate constraints. */
9060
9061int
b96a374d
AJ
9062ix86_binary_operator_ok (enum rtx_code code,
9063 enum machine_mode mode ATTRIBUTE_UNUSED,
9064 rtx operands[3])
e075ae69
RH
9065{
9066 /* Both source operands cannot be in memory. */
9067 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
9068 return 0;
9069 /* If the operation is not commutable, source 1 cannot be a constant. */
ec8e098d 9070 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
e075ae69
RH
9071 return 0;
9072 /* If the destination is memory, we must have a matching source operand. */
9073 if (GET_CODE (operands[0]) == MEM
9074 && ! (rtx_equal_p (operands[0], operands[1])
ec8e098d 9075 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
e075ae69
RH
9076 && rtx_equal_p (operands[0], operands[2]))))
9077 return 0;
06a964de 9078 /* If the operation is not commutable and the source 1 is memory, we must
d6a7951f 9079 have a matching destination. */
06a964de 9080 if (GET_CODE (operands[1]) == MEM
ec8e098d 9081 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
06a964de
JH
9082 && ! rtx_equal_p (operands[0], operands[1]))
9083 return 0;
e075ae69
RH
9084 return 1;
9085}
9086
9087/* Attempt to expand a unary operator. Make the expansion closer to the
9088 actual machine, then just general_operand, which will allow 2 separate
9d81fc27 9089 memory references (one output, one input) in a single insn. */
e075ae69 9090
9d81fc27 9091void
b96a374d
AJ
9092ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9093 rtx operands[])
e075ae69 9094{
06a964de
JH
9095 int matching_memory;
9096 rtx src, dst, op, clob;
9097
9098 dst = operands[0];
9099 src = operands[1];
e075ae69 9100
06a964de
JH
9101 /* If the destination is memory, and we do not have matching source
9102 operands, do things in registers. */
9103 matching_memory = 0;
7cacf53e 9104 if (MEM_P (dst))
32b5b1aa 9105 {
06a964de
JH
9106 if (rtx_equal_p (dst, src))
9107 matching_memory = 1;
e075ae69 9108 else
06a964de 9109 dst = gen_reg_rtx (mode);
32b5b1aa 9110 }
e9a25f70 9111
06a964de 9112 /* When source operand is memory, destination must match. */
7cacf53e 9113 if (MEM_P (src) && !matching_memory)
06a964de 9114 src = force_reg (mode, src);
0f290768 9115
06a964de
JH
9116 /* Emit the instruction. */
9117
9118 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9119 if (reload_in_progress || code == NOT)
9120 {
9121 /* Reload doesn't know about the flags register, and doesn't know that
9122 it doesn't want to clobber it. */
d0396b79 9123 gcc_assert (code == NOT);
06a964de
JH
9124 emit_insn (op);
9125 }
9126 else
9127 {
9128 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9129 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9130 }
9131
9132 /* Fix up the destination if needed. */
9133 if (dst != operands[0])
9134 emit_move_insn (operands[0], dst);
e075ae69
RH
9135}
9136
9137/* Return TRUE or FALSE depending on whether the unary operator meets the
9138 appropriate constraints. */
9139
9140int
b96a374d
AJ
9141ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9142 enum machine_mode mode ATTRIBUTE_UNUSED,
9143 rtx operands[2] ATTRIBUTE_UNUSED)
e075ae69 9144{
06a964de
JH
9145 /* If one of operands is memory, source and destination must match. */
9146 if ((GET_CODE (operands[0]) == MEM
9147 || GET_CODE (operands[1]) == MEM)
9148 && ! rtx_equal_p (operands[0], operands[1]))
9149 return FALSE;
e075ae69
RH
9150 return TRUE;
9151}
7cacf53e 9152
046625fa
RH
9153/* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9154 Create a mask for the sign bit in MODE for an SSE register. If VECT is
9155 true, then replicate the mask for all elements of the vector register.
9156 If INVERT is true, then create a mask excluding the sign bit. */
9157
9158rtx
9159ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
9160{
9161 enum machine_mode vec_mode;
9162 HOST_WIDE_INT hi, lo;
9163 int shift = 63;
9164 rtvec v;
9165 rtx mask;
9166
9167 /* Find the sign bit, sign extended to 2*HWI. */
9168 if (mode == SFmode)
9169 lo = 0x80000000, hi = lo < 0;
9170 else if (HOST_BITS_PER_WIDE_INT >= 64)
9171 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
9172 else
9173 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
9174
9175 if (invert)
9176 lo = ~lo, hi = ~hi;
9177
9178 /* Force this value into the low part of a fp vector constant. */
9179 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
9180 mask = gen_lowpart (mode, mask);
9181
9182 if (mode == SFmode)
9183 {
9184 if (vect)
9185 v = gen_rtvec (4, mask, mask, mask, mask);
9186 else
9187 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
9188 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9189 vec_mode = V4SFmode;
9190 }
9191 else
9192 {
9193 if (vect)
9194 v = gen_rtvec (2, mask, mask);
9195 else
9196 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
9197 vec_mode = V2DFmode;
9198 }
9199
9200 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
9201}
9202
7cacf53e
RH
9203/* Generate code for floating point ABS or NEG. */
9204
9205void
9206ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
9207 rtx operands[])
9208{
9209 rtx mask, set, use, clob, dst, src;
9210 bool matching_memory;
9211 bool use_sse = false;
ef719a44
RH
9212 bool vector_mode = VECTOR_MODE_P (mode);
9213 enum machine_mode elt_mode = mode;
7cacf53e 9214
ef719a44
RH
9215 if (vector_mode)
9216 {
9217 elt_mode = GET_MODE_INNER (mode);
ef719a44
RH
9218 use_sse = true;
9219 }
046625fa 9220 else if (TARGET_SSE_MATH)
2aa3d033 9221 use_sse = SSE_FLOAT_MODE_P (mode);
7cacf53e
RH
9222
9223 /* NEG and ABS performed with SSE use bitwise mask operations.
9224 Create the appropriate mask now. */
9225 if (use_sse)
046625fa 9226 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
7cacf53e
RH
9227 else
9228 {
9229 /* When not using SSE, we don't use the mask, but prefer to keep the
9230 same general form of the insn pattern to reduce duplication when
9231 it comes time to split. */
9232 mask = const0_rtx;
9233 }
9234
9235 dst = operands[0];
9236 src = operands[1];
9237
9238 /* If the destination is memory, and we don't have matching source
9239 operands, do things in registers. */
9240 matching_memory = false;
9241 if (MEM_P (dst))
9242 {
3bc751bd 9243 if (rtx_equal_p (dst, src))
7cacf53e
RH
9244 matching_memory = true;
9245 else
9246 dst = gen_reg_rtx (mode);
9247 }
9248 if (MEM_P (src) && !matching_memory)
9249 src = force_reg (mode, src);
9250
ef719a44
RH
9251 if (vector_mode)
9252 {
9253 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
9254 set = gen_rtx_SET (VOIDmode, dst, set);
9255 emit_insn (set);
9256 }
9257 else
9258 {
9259 set = gen_rtx_fmt_e (code, mode, src);
9260 set = gen_rtx_SET (VOIDmode, dst, set);
9261 use = gen_rtx_USE (VOIDmode, mask);
9262 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9263 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, set, use, clob)));
9264 }
7cacf53e
RH
9265
9266 if (dst != operands[0])
9267 emit_move_insn (operands[0], dst);
9268}
e075ae69 9269
b99d6d2b 9270/* Expand a copysign operation. Special case operand 0 being a constant. */
046625fa
RH
9271
9272void
b99d6d2b
RH
9273ix86_expand_copysign (rtx operands[])
9274{
9275 enum machine_mode mode, vmode;
9276 rtx dest, op0, op1, mask, nmask;
9277
9278 dest = operands[0];
9279 op0 = operands[1];
9280 op1 = operands[2];
9281
9282 mode = GET_MODE (dest);
9283 vmode = mode == SFmode ? V4SFmode : V2DFmode;
9284
9285 if (GET_CODE (op0) == CONST_DOUBLE)
9286 {
9287 rtvec v;
9288
9289 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
9290 op0 = simplify_unary_operation (ABS, mode, op0, mode);
9291
9292 if (op0 == CONST0_RTX (mode))
9293 op0 = CONST0_RTX (vmode);
9294 else
9295 {
9296 if (mode == SFmode)
9297 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
9298 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9299 else
9300 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
9301 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
9302 }
9303
9304 mask = ix86_build_signbit_mask (mode, 0, 0);
9305
9306 if (mode == SFmode)
9307 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
9308 else
9309 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
9310 }
9311 else
9312 {
9313 nmask = ix86_build_signbit_mask (mode, 0, 1);
9314 mask = ix86_build_signbit_mask (mode, 0, 0);
9315
9316 if (mode == SFmode)
9317 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
9318 else
9319 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
9320 }
9321}
9322
9323/* Deconstruct a copysign operation into bit masks. Operand 0 is known to
9324 be a constant, and so has already been expanded into a vector constant. */
9325
9326void
9327ix86_split_copysign_const (rtx operands[])
9328{
9329 enum machine_mode mode, vmode;
9330 rtx dest, op0, op1, mask, x;
9331
9332 dest = operands[0];
9333 op0 = operands[1];
9334 op1 = operands[2];
9335 mask = operands[3];
9336
9337 mode = GET_MODE (dest);
9338 vmode = GET_MODE (mask);
9339
9340 dest = simplify_gen_subreg (vmode, dest, mode, 0);
9341 x = gen_rtx_AND (vmode, dest, mask);
9342 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9343
9344 if (op0 != CONST0_RTX (vmode))
9345 {
9346 x = gen_rtx_IOR (vmode, dest, op0);
9347 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9348 }
9349}
9350
9351/* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
9352 so we have to do two masks. */
9353
9354void
9355ix86_split_copysign_var (rtx operands[])
046625fa
RH
9356{
9357 enum machine_mode mode, vmode;
9358 rtx dest, scratch, op0, op1, mask, nmask, x;
9359
9360 dest = operands[0];
9361 scratch = operands[1];
9362 op0 = operands[2];
b99d6d2b
RH
9363 op1 = operands[3];
9364 nmask = operands[4];
046625fa
RH
9365 mask = operands[5];
9366
9367 mode = GET_MODE (dest);
9368 vmode = GET_MODE (mask);
9369
9370 if (rtx_equal_p (op0, op1))
9371 {
9372 /* Shouldn't happen often (it's useless, obviously), but when it does
9373 we'd generate incorrect code if we continue below. */
9374 emit_move_insn (dest, op0);
9375 return;
9376 }
9377
9378 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
9379 {
9380 gcc_assert (REGNO (op1) == REGNO (scratch));
9381
9382 x = gen_rtx_AND (vmode, scratch, mask);
9383 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9384
9385 dest = mask;
9386 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9387 x = gen_rtx_NOT (vmode, dest);
9388 x = gen_rtx_AND (vmode, x, op0);
9389 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9390 }
9391 else
9392 {
9393 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
9394 {
9395 x = gen_rtx_AND (vmode, scratch, mask);
9396 }
9397 else /* alternative 2,4 */
9398 {
9399 gcc_assert (REGNO (mask) == REGNO (scratch));
9400 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
9401 x = gen_rtx_AND (vmode, scratch, op1);
9402 }
9403 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9404
9405 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
9406 {
9407 dest = simplify_gen_subreg (vmode, op0, mode, 0);
9408 x = gen_rtx_AND (vmode, dest, nmask);
9409 }
9410 else /* alternative 3,4 */
9411 {
9412 gcc_assert (REGNO (nmask) == REGNO (dest));
9413 dest = nmask;
9414 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9415 x = gen_rtx_AND (vmode, dest, op0);
9416 }
9417 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9418 }
9419
9420 x = gen_rtx_IOR (vmode, dest, scratch);
9421 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9422}
9423
16189740
RH
9424/* Return TRUE or FALSE depending on whether the first SET in INSN
9425 has source and destination with matching CC modes, and that the
9426 CC mode is at least as constrained as REQ_MODE. */
9427
9428int
b96a374d 9429ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
16189740
RH
9430{
9431 rtx set;
9432 enum machine_mode set_mode;
9433
9434 set = PATTERN (insn);
9435 if (GET_CODE (set) == PARALLEL)
9436 set = XVECEXP (set, 0, 0);
d0396b79
NS
9437 gcc_assert (GET_CODE (set) == SET);
9438 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
16189740
RH
9439
9440 set_mode = GET_MODE (SET_DEST (set));
9441 switch (set_mode)
9442 {
9076b9c1
JH
9443 case CCNOmode:
9444 if (req_mode != CCNOmode
9445 && (req_mode != CCmode
9446 || XEXP (SET_SRC (set), 1) != const0_rtx))
9447 return 0;
9448 break;
16189740 9449 case CCmode:
9076b9c1 9450 if (req_mode == CCGCmode)
16189740 9451 return 0;
5efb1046 9452 /* FALLTHRU */
9076b9c1
JH
9453 case CCGCmode:
9454 if (req_mode == CCGOCmode || req_mode == CCNOmode)
9455 return 0;
5efb1046 9456 /* FALLTHRU */
9076b9c1 9457 case CCGOCmode:
16189740
RH
9458 if (req_mode == CCZmode)
9459 return 0;
5efb1046 9460 /* FALLTHRU */
16189740
RH
9461 case CCZmode:
9462 break;
9463
9464 default:
d0396b79 9465 gcc_unreachable ();
16189740
RH
9466 }
9467
9468 return (GET_MODE (SET_SRC (set)) == set_mode);
9469}
9470
e075ae69
RH
9471/* Generate insn patterns to do an integer compare of OPERANDS. */
9472
9473static rtx
b96a374d 9474ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
e075ae69
RH
9475{
9476 enum machine_mode cmpmode;
9477 rtx tmp, flags;
9478
9479 cmpmode = SELECT_CC_MODE (code, op0, op1);
9480 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
9481
9482 /* This is very simple, but making the interface the same as in the
9483 FP case makes the rest of the code easier. */
9484 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
9485 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
9486
9487 /* Return the test that should be put into the flags user, i.e.
9488 the bcc, scc, or cmov instruction. */
9489 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
9490}
9491
3a3677ff
RH
9492/* Figure out whether to use ordered or unordered fp comparisons.
9493 Return the appropriate mode to use. */
e075ae69 9494
b1cdafbb 9495enum machine_mode
b96a374d 9496ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
e075ae69 9497{
9e7adcb3
JH
9498 /* ??? In order to make all comparisons reversible, we do all comparisons
9499 non-trapping when compiling for IEEE. Once gcc is able to distinguish
9500 all forms trapping and nontrapping comparisons, we can make inequality
9501 comparisons trapping again, since it results in better code when using
9502 FCOM based compares. */
9503 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
3a3677ff
RH
9504}
9505
9076b9c1 9506enum machine_mode
b96a374d 9507ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
9076b9c1 9508{
ebb109ad 9509 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
9076b9c1
JH
9510 return ix86_fp_compare_mode (code);
9511 switch (code)
9512 {
9513 /* Only zero flag is needed. */
9514 case EQ: /* ZF=0 */
9515 case NE: /* ZF!=0 */
9516 return CCZmode;
9517 /* Codes needing carry flag. */
265dab10
JH
9518 case GEU: /* CF=0 */
9519 case GTU: /* CF=0 & ZF=0 */
7e08e190
JH
9520 case LTU: /* CF=1 */
9521 case LEU: /* CF=1 | ZF=1 */
265dab10 9522 return CCmode;
9076b9c1
JH
9523 /* Codes possibly doable only with sign flag when
9524 comparing against zero. */
9525 case GE: /* SF=OF or SF=0 */
7e08e190 9526 case LT: /* SF<>OF or SF=1 */
9076b9c1
JH
9527 if (op1 == const0_rtx)
9528 return CCGOCmode;
9529 else
9530 /* For other cases Carry flag is not required. */
9531 return CCGCmode;
9532 /* Codes doable only with sign flag when comparing
9533 against zero, but we miss jump instruction for it
4aae8a9a 9534 so we need to use relational tests against overflow
9076b9c1
JH
9535 that thus needs to be zero. */
9536 case GT: /* ZF=0 & SF=OF */
9537 case LE: /* ZF=1 | SF<>OF */
9538 if (op1 == const0_rtx)
9539 return CCNOmode;
9540 else
9541 return CCGCmode;
7fcd7218
JH
9542 /* strcmp pattern do (use flags) and combine may ask us for proper
9543 mode. */
9544 case USE:
9545 return CCmode;
9076b9c1 9546 default:
d0396b79 9547 gcc_unreachable ();
9076b9c1
JH
9548 }
9549}
9550
e129d93a
ILT
9551/* Return the fixed registers used for condition codes. */
9552
9553static bool
9554ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
9555{
9556 *p1 = FLAGS_REG;
9557 *p2 = FPSR_REG;
9558 return true;
9559}
9560
9561/* If two condition code modes are compatible, return a condition code
9562 mode which is compatible with both. Otherwise, return
9563 VOIDmode. */
9564
9565static enum machine_mode
9566ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
9567{
9568 if (m1 == m2)
9569 return m1;
9570
9571 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
9572 return VOIDmode;
9573
9574 if ((m1 == CCGCmode && m2 == CCGOCmode)
9575 || (m1 == CCGOCmode && m2 == CCGCmode))
9576 return CCGCmode;
9577
9578 switch (m1)
9579 {
9580 default:
d0396b79 9581 gcc_unreachable ();
e129d93a
ILT
9582
9583 case CCmode:
9584 case CCGCmode:
9585 case CCGOCmode:
9586 case CCNOmode:
9587 case CCZmode:
9588 switch (m2)
9589 {
9590 default:
9591 return VOIDmode;
9592
9593 case CCmode:
9594 case CCGCmode:
9595 case CCGOCmode:
9596 case CCNOmode:
9597 case CCZmode:
9598 return CCmode;
9599 }
9600
9601 case CCFPmode:
9602 case CCFPUmode:
9603 /* These are only compatible with themselves, which we already
9604 checked above. */
9605 return VOIDmode;
9606 }
9607}
9608
3a3677ff
RH
9609/* Return true if we should use an FCOMI instruction for this fp comparison. */
9610
a940d8bd 9611int
b96a374d 9612ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
3a3677ff 9613{
9e7adcb3
JH
9614 enum rtx_code swapped_code = swap_condition (code);
9615 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
9616 || (ix86_fp_comparison_cost (swapped_code)
9617 == ix86_fp_comparison_fcomi_cost (swapped_code)));
3a3677ff
RH
9618}
9619
0f290768 9620/* Swap, force into registers, or otherwise massage the two operands
3a3677ff 9621 to a fp comparison. The operands are updated in place; the new
d1f87653 9622 comparison code is returned. */
3a3677ff
RH
9623
9624static enum rtx_code
b96a374d 9625ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
3a3677ff
RH
9626{
9627 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
9628 rtx op0 = *pop0, op1 = *pop1;
9629 enum machine_mode op_mode = GET_MODE (op0);
eabd294f 9630 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
3a3677ff 9631
e075ae69 9632 /* All of the unordered compare instructions only work on registers.
d06e6434
UB
9633 The same is true of the fcomi compare instructions. The XFmode
9634 compare instructions require registers except when comparing
9635 against zero or when converting operand 1 from fixed point to
9636 floating point. */
3a3677ff 9637
0644b628
JH
9638 if (!is_sse
9639 && (fpcmp_mode == CCFPUmode
45c8c47f
UB
9640 || (op_mode == XFmode
9641 && ! (standard_80387_constant_p (op0) == 1
d06e6434
UB
9642 || standard_80387_constant_p (op1) == 1)
9643 && GET_CODE (op1) != FLOAT)
0644b628 9644 || ix86_use_fcomi_compare (code)))
e075ae69 9645 {
3a3677ff
RH
9646 op0 = force_reg (op_mode, op0);
9647 op1 = force_reg (op_mode, op1);
e075ae69
RH
9648 }
9649 else
9650 {
9651 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
9652 things around if they appear profitable, otherwise force op0
9653 into a register. */
9654
9655 if (standard_80387_constant_p (op0) == 0
9656 || (GET_CODE (op0) == MEM
9657 && ! (standard_80387_constant_p (op1) == 0
9658 || GET_CODE (op1) == MEM)))
32b5b1aa 9659 {
e075ae69
RH
9660 rtx tmp;
9661 tmp = op0, op0 = op1, op1 = tmp;
9662 code = swap_condition (code);
9663 }
9664
9665 if (GET_CODE (op0) != REG)
3a3677ff 9666 op0 = force_reg (op_mode, op0);
e075ae69
RH
9667
9668 if (CONSTANT_P (op1))
9669 {
45c8c47f
UB
9670 int tmp = standard_80387_constant_p (op1);
9671 if (tmp == 0)
3a3677ff 9672 op1 = validize_mem (force_const_mem (op_mode, op1));
45c8c47f
UB
9673 else if (tmp == 1)
9674 {
9675 if (TARGET_CMOVE)
9676 op1 = force_reg (op_mode, op1);
9677 }
9678 else
9679 op1 = force_reg (op_mode, op1);
32b5b1aa
SC
9680 }
9681 }
e9a25f70 9682
9e7adcb3
JH
9683 /* Try to rearrange the comparison to make it cheaper. */
9684 if (ix86_fp_comparison_cost (code)
9685 > ix86_fp_comparison_cost (swap_condition (code))
558740bf 9686 && (GET_CODE (op1) == REG || !no_new_pseudos))
9e7adcb3
JH
9687 {
9688 rtx tmp;
9689 tmp = op0, op0 = op1, op1 = tmp;
9690 code = swap_condition (code);
9691 if (GET_CODE (op0) != REG)
9692 op0 = force_reg (op_mode, op0);
9693 }
9694
3a3677ff
RH
9695 *pop0 = op0;
9696 *pop1 = op1;
9697 return code;
9698}
9699
c0c102a9
JH
9700/* Convert comparison codes we use to represent FP comparison to integer
9701 code that will result in proper branch. Return UNKNOWN if no such code
9702 is available. */
8fe75e43
RH
9703
9704enum rtx_code
b96a374d 9705ix86_fp_compare_code_to_integer (enum rtx_code code)
c0c102a9
JH
9706{
9707 switch (code)
9708 {
9709 case GT:
9710 return GTU;
9711 case GE:
9712 return GEU;
9713 case ORDERED:
9714 case UNORDERED:
9715 return code;
9716 break;
9717 case UNEQ:
9718 return EQ;
9719 break;
9720 case UNLT:
9721 return LTU;
9722 break;
9723 case UNLE:
9724 return LEU;
9725 break;
9726 case LTGT:
9727 return NE;
9728 break;
9729 default:
9730 return UNKNOWN;
9731 }
9732}
9733
9734/* Split comparison code CODE into comparisons we can do using branch
9735 instructions. BYPASS_CODE is comparison code for branch that will
9736 branch around FIRST_CODE and SECOND_CODE. If some of branches
f822d252 9737 is not required, set value to UNKNOWN.
c0c102a9 9738 We never require more than two branches. */
8fe75e43
RH
9739
9740void
b96a374d
AJ
9741ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
9742 enum rtx_code *first_code,
9743 enum rtx_code *second_code)
c0c102a9
JH
9744{
9745 *first_code = code;
f822d252
ZW
9746 *bypass_code = UNKNOWN;
9747 *second_code = UNKNOWN;
c0c102a9
JH
9748
9749 /* The fcomi comparison sets flags as follows:
9750
9751 cmp ZF PF CF
9752 > 0 0 0
9753 < 0 0 1
9754 = 1 0 0
9755 un 1 1 1 */
9756
9757 switch (code)
9758 {
9759 case GT: /* GTU - CF=0 & ZF=0 */
9760 case GE: /* GEU - CF=0 */
9761 case ORDERED: /* PF=0 */
9762 case UNORDERED: /* PF=1 */
9763 case UNEQ: /* EQ - ZF=1 */
9764 case UNLT: /* LTU - CF=1 */
9765 case UNLE: /* LEU - CF=1 | ZF=1 */
9766 case LTGT: /* EQ - ZF=0 */
9767 break;
9768 case LT: /* LTU - CF=1 - fails on unordered */
9769 *first_code = UNLT;
9770 *bypass_code = UNORDERED;
9771 break;
9772 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
9773 *first_code = UNLE;
9774 *bypass_code = UNORDERED;
9775 break;
9776 case EQ: /* EQ - ZF=1 - fails on unordered */
9777 *first_code = UNEQ;
9778 *bypass_code = UNORDERED;
9779 break;
9780 case NE: /* NE - ZF=0 - fails on unordered */
9781 *first_code = LTGT;
9782 *second_code = UNORDERED;
9783 break;
9784 case UNGE: /* GEU - CF=0 - fails on unordered */
9785 *first_code = GE;
9786 *second_code = UNORDERED;
9787 break;
9788 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
9789 *first_code = GT;
9790 *second_code = UNORDERED;
9791 break;
9792 default:
d0396b79 9793 gcc_unreachable ();
c0c102a9
JH
9794 }
9795 if (!TARGET_IEEE_FP)
9796 {
f822d252
ZW
9797 *second_code = UNKNOWN;
9798 *bypass_code = UNKNOWN;
c0c102a9
JH
9799 }
9800}
9801
9e7adcb3 9802/* Return cost of comparison done fcom + arithmetics operations on AX.
5bdc5878 9803 All following functions do use number of instructions as a cost metrics.
9e7adcb3
JH
9804 In future this should be tweaked to compute bytes for optimize_size and
9805 take into account performance of various instructions on various CPUs. */
9806static int
b96a374d 9807ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
9e7adcb3
JH
9808{
9809 if (!TARGET_IEEE_FP)
9810 return 4;
9811 /* The cost of code output by ix86_expand_fp_compare. */
9812 switch (code)
9813 {
9814 case UNLE:
9815 case UNLT:
9816 case LTGT:
9817 case GT:
9818 case GE:
9819 case UNORDERED:
9820 case ORDERED:
9821 case UNEQ:
9822 return 4;
9823 break;
9824 case LT:
9825 case NE:
9826 case EQ:
9827 case UNGE:
9828 return 5;
9829 break;
9830 case LE:
9831 case UNGT:
9832 return 6;
9833 break;
9834 default:
d0396b79 9835 gcc_unreachable ();
9e7adcb3
JH
9836 }
9837}
9838
9839/* Return cost of comparison done using fcomi operation.
9840 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9841static int
b96a374d 9842ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9e7adcb3
JH
9843{
9844 enum rtx_code bypass_code, first_code, second_code;
d1f87653 9845 /* Return arbitrarily high cost when instruction is not supported - this
9e7adcb3
JH
9846 prevents gcc from using it. */
9847 if (!TARGET_CMOVE)
9848 return 1024;
9849 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
f822d252 9850 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
9e7adcb3
JH
9851}
9852
9853/* Return cost of comparison done using sahf operation.
9854 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9855static int
b96a374d 9856ix86_fp_comparison_sahf_cost (enum rtx_code code)
9e7adcb3
JH
9857{
9858 enum rtx_code bypass_code, first_code, second_code;
d1f87653 9859 /* Return arbitrarily high cost when instruction is not preferred - this
9e7adcb3
JH
9860 avoids gcc from using it. */
9861 if (!TARGET_USE_SAHF && !optimize_size)
9862 return 1024;
9863 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
f822d252 9864 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
9e7adcb3
JH
9865}
9866
9867/* Compute cost of the comparison done using any method.
9868 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9869static int
b96a374d 9870ix86_fp_comparison_cost (enum rtx_code code)
9e7adcb3
JH
9871{
9872 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9873 int min;
9874
9875 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9876 sahf_cost = ix86_fp_comparison_sahf_cost (code);
9877
9878 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9879 if (min > sahf_cost)
9880 min = sahf_cost;
9881 if (min > fcomi_cost)
9882 min = fcomi_cost;
9883 return min;
9884}
c0c102a9 9885
3a3677ff
RH
9886/* Generate insn patterns to do a floating point compare of OPERANDS. */
9887
9e7adcb3 9888static rtx
b96a374d
AJ
9889ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
9890 rtx *second_test, rtx *bypass_test)
3a3677ff
RH
9891{
9892 enum machine_mode fpcmp_mode, intcmp_mode;
c0c102a9 9893 rtx tmp, tmp2;
9e7adcb3 9894 int cost = ix86_fp_comparison_cost (code);
c0c102a9 9895 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
9896
9897 fpcmp_mode = ix86_fp_compare_mode (code);
9898 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9899
9e7adcb3
JH
9900 if (second_test)
9901 *second_test = NULL_RTX;
9902 if (bypass_test)
9903 *bypass_test = NULL_RTX;
9904
c0c102a9
JH
9905 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9906
9e7adcb3 9907 /* Do fcomi/sahf based test when profitable. */
f822d252
ZW
9908 if ((bypass_code == UNKNOWN || bypass_test)
9909 && (second_code == UNKNOWN || second_test)
9e7adcb3 9910 && ix86_fp_comparison_arithmetics_cost (code) > cost)
32b5b1aa 9911 {
c0c102a9
JH
9912 if (TARGET_CMOVE)
9913 {
9914 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9915 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9916 tmp);
9917 emit_insn (tmp);
9918 }
9919 else
9920 {
9921 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 9922 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
9923 if (!scratch)
9924 scratch = gen_reg_rtx (HImode);
c0c102a9
JH
9925 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9926 emit_insn (gen_x86_sahf_1 (scratch));
9927 }
e075ae69
RH
9928
9929 /* The FP codes work out to act like unsigned. */
9a915772 9930 intcmp_mode = fpcmp_mode;
9e7adcb3 9931 code = first_code;
f822d252 9932 if (bypass_code != UNKNOWN)
9e7adcb3
JH
9933 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9934 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9935 const0_rtx);
f822d252 9936 if (second_code != UNKNOWN)
9e7adcb3
JH
9937 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9938 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9939 const0_rtx);
e075ae69
RH
9940 }
9941 else
9942 {
9943 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
e075ae69 9944 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 9945 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
9946 if (!scratch)
9947 scratch = gen_reg_rtx (HImode);
3a3677ff 9948 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
e075ae69 9949
9a915772
JH
9950 /* In the unordered case, we have to check C2 for NaN's, which
9951 doesn't happen to work out to anything nice combination-wise.
9952 So do some bit twiddling on the value we've got in AH to come
9953 up with an appropriate set of condition codes. */
e075ae69 9954
9a915772
JH
9955 intcmp_mode = CCNOmode;
9956 switch (code)
32b5b1aa 9957 {
9a915772
JH
9958 case GT:
9959 case UNGT:
9960 if (code == GT || !TARGET_IEEE_FP)
32b5b1aa 9961 {
3a3677ff 9962 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
e075ae69 9963 code = EQ;
9a915772
JH
9964 }
9965 else
9966 {
9967 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9968 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9969 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9970 intcmp_mode = CCmode;
9971 code = GEU;
9972 }
9973 break;
9974 case LT:
9975 case UNLT:
9976 if (code == LT && TARGET_IEEE_FP)
9977 {
3a3677ff
RH
9978 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9979 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
e075ae69
RH
9980 intcmp_mode = CCmode;
9981 code = EQ;
9a915772
JH
9982 }
9983 else
9984 {
9985 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9986 code = NE;
9987 }
9988 break;
9989 case GE:
9990 case UNGE:
9991 if (code == GE || !TARGET_IEEE_FP)
9992 {
3a3677ff 9993 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
e075ae69 9994 code = EQ;
9a915772
JH
9995 }
9996 else
9997 {
9998 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9999 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10000 GEN_INT (0x01)));
10001 code = NE;
10002 }
10003 break;
10004 case LE:
10005 case UNLE:
10006 if (code == LE && TARGET_IEEE_FP)
10007 {
3a3677ff
RH
10008 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10009 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10010 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
10011 intcmp_mode = CCmode;
10012 code = LTU;
9a915772
JH
10013 }
10014 else
10015 {
10016 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10017 code = NE;
10018 }
10019 break;
10020 case EQ:
10021 case UNEQ:
10022 if (code == EQ && TARGET_IEEE_FP)
10023 {
3a3677ff
RH
10024 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10025 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
10026 intcmp_mode = CCmode;
10027 code = EQ;
9a915772
JH
10028 }
10029 else
10030 {
3a3677ff
RH
10031 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10032 code = NE;
10033 break;
9a915772
JH
10034 }
10035 break;
10036 case NE:
10037 case LTGT:
10038 if (code == NE && TARGET_IEEE_FP)
10039 {
3a3677ff 10040 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9a915772
JH
10041 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10042 GEN_INT (0x40)));
3a3677ff 10043 code = NE;
9a915772
JH
10044 }
10045 else
10046 {
3a3677ff
RH
10047 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10048 code = EQ;
32b5b1aa 10049 }
9a915772
JH
10050 break;
10051
10052 case UNORDERED:
10053 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10054 code = NE;
10055 break;
10056 case ORDERED:
10057 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10058 code = EQ;
10059 break;
10060
10061 default:
d0396b79 10062 gcc_unreachable ();
32b5b1aa 10063 }
32b5b1aa 10064 }
e075ae69
RH
10065
10066 /* Return the test that should be put into the flags user, i.e.
10067 the bcc, scc, or cmov instruction. */
10068 return gen_rtx_fmt_ee (code, VOIDmode,
10069 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10070 const0_rtx);
10071}
10072
9e3e266c 10073rtx
b96a374d 10074ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
e075ae69
RH
10075{
10076 rtx op0, op1, ret;
10077 op0 = ix86_compare_op0;
10078 op1 = ix86_compare_op1;
10079
a1b8572c
JH
10080 if (second_test)
10081 *second_test = NULL_RTX;
10082 if (bypass_test)
10083 *bypass_test = NULL_RTX;
10084
1ef45b77
RH
10085 if (ix86_compare_emitted)
10086 {
10087 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
10088 ix86_compare_emitted = NULL_RTX;
10089 }
ebb109ad 10090 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
bf71a4f8 10091 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
77ebd435 10092 second_test, bypass_test);
32b5b1aa 10093 else
e075ae69
RH
10094 ret = ix86_expand_int_compare (code, op0, op1);
10095
10096 return ret;
10097}
10098
03598dea
JH
10099/* Return true if the CODE will result in nontrivial jump sequence. */
10100bool
b96a374d 10101ix86_fp_jump_nontrivial_p (enum rtx_code code)
03598dea
JH
10102{
10103 enum rtx_code bypass_code, first_code, second_code;
10104 if (!TARGET_CMOVE)
10105 return true;
10106 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
f822d252 10107 return bypass_code != UNKNOWN || second_code != UNKNOWN;
03598dea
JH
10108}
10109
e075ae69 10110void
b96a374d 10111ix86_expand_branch (enum rtx_code code, rtx label)
e075ae69 10112{
3a3677ff 10113 rtx tmp;
e075ae69 10114
3a3677ff 10115 switch (GET_MODE (ix86_compare_op0))
32b5b1aa 10116 {
3a3677ff
RH
10117 case QImode:
10118 case HImode:
10119 case SImode:
0d7d98ee 10120 simple:
a1b8572c 10121 tmp = ix86_expand_compare (code, NULL, NULL);
e075ae69
RH
10122 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10123 gen_rtx_LABEL_REF (VOIDmode, label),
10124 pc_rtx);
10125 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32b5b1aa 10126 return;
e075ae69 10127
3a3677ff
RH
10128 case SFmode:
10129 case DFmode:
0f290768 10130 case XFmode:
3a3677ff
RH
10131 {
10132 rtvec vec;
10133 int use_fcomi;
03598dea 10134 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
10135
10136 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
10137 &ix86_compare_op1);
fce5a9f2 10138
03598dea
JH
10139 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10140
10141 /* Check whether we will use the natural sequence with one jump. If
10142 so, we can expand jump early. Otherwise delay expansion by
10143 creating compound insn to not confuse optimizers. */
f822d252 10144 if (bypass_code == UNKNOWN && second_code == UNKNOWN
03598dea
JH
10145 && TARGET_CMOVE)
10146 {
10147 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
10148 gen_rtx_LABEL_REF (VOIDmode, label),
7c82106f 10149 pc_rtx, NULL_RTX, NULL_RTX);
03598dea
JH
10150 }
10151 else
10152 {
10153 tmp = gen_rtx_fmt_ee (code, VOIDmode,
10154 ix86_compare_op0, ix86_compare_op1);
10155 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10156 gen_rtx_LABEL_REF (VOIDmode, label),
10157 pc_rtx);
10158 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
10159
10160 use_fcomi = ix86_use_fcomi_compare (code);
10161 vec = rtvec_alloc (3 + !use_fcomi);
10162 RTVEC_ELT (vec, 0) = tmp;
10163 RTVEC_ELT (vec, 1)
10164 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
10165 RTVEC_ELT (vec, 2)
10166 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
10167 if (! use_fcomi)
10168 RTVEC_ELT (vec, 3)
10169 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
10170
10171 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
10172 }
3a3677ff
RH
10173 return;
10174 }
32b5b1aa 10175
3a3677ff 10176 case DImode:
0d7d98ee
JH
10177 if (TARGET_64BIT)
10178 goto simple;
28356f52 10179 case TImode:
3a3677ff
RH
10180 /* Expand DImode branch into multiple compare+branch. */
10181 {
10182 rtx lo[2], hi[2], label2;
10183 enum rtx_code code1, code2, code3;
28356f52 10184 enum machine_mode submode;
32b5b1aa 10185
3a3677ff
RH
10186 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
10187 {
10188 tmp = ix86_compare_op0;
10189 ix86_compare_op0 = ix86_compare_op1;
10190 ix86_compare_op1 = tmp;
10191 code = swap_condition (code);
10192 }
28356f52
JB
10193 if (GET_MODE (ix86_compare_op0) == DImode)
10194 {
10195 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
10196 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
10197 submode = SImode;
10198 }
10199 else
10200 {
10201 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
10202 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
10203 submode = DImode;
10204 }
32b5b1aa 10205
3a3677ff
RH
10206 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10207 avoid two branches. This costs one extra insn, so disable when
10208 optimizing for size. */
32b5b1aa 10209
3a3677ff
RH
10210 if ((code == EQ || code == NE)
10211 && (!optimize_size
10212 || hi[1] == const0_rtx || lo[1] == const0_rtx))
10213 {
10214 rtx xor0, xor1;
32b5b1aa 10215
3a3677ff
RH
10216 xor1 = hi[0];
10217 if (hi[1] != const0_rtx)
28356f52 10218 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
3a3677ff 10219 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 10220
3a3677ff
RH
10221 xor0 = lo[0];
10222 if (lo[1] != const0_rtx)
28356f52 10223 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
3a3677ff 10224 NULL_RTX, 0, OPTAB_WIDEN);
e075ae69 10225
28356f52 10226 tmp = expand_binop (submode, ior_optab, xor1, xor0,
3a3677ff 10227 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 10228
3a3677ff
RH
10229 ix86_compare_op0 = tmp;
10230 ix86_compare_op1 = const0_rtx;
10231 ix86_expand_branch (code, label);
10232 return;
10233 }
e075ae69 10234
1f9124e4
JJ
10235 /* Otherwise, if we are doing less-than or greater-or-equal-than,
10236 op1 is a constant and the low word is zero, then we can just
10237 examine the high word. */
32b5b1aa 10238
1f9124e4
JJ
10239 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
10240 switch (code)
10241 {
10242 case LT: case LTU: case GE: case GEU:
10243 ix86_compare_op0 = hi[0];
10244 ix86_compare_op1 = hi[1];
10245 ix86_expand_branch (code, label);
10246 return;
10247 default:
10248 break;
10249 }
e075ae69 10250
3a3677ff 10251 /* Otherwise, we need two or three jumps. */
e075ae69 10252
3a3677ff 10253 label2 = gen_label_rtx ();
e075ae69 10254
3a3677ff
RH
10255 code1 = code;
10256 code2 = swap_condition (code);
10257 code3 = unsigned_condition (code);
e075ae69 10258
3a3677ff
RH
10259 switch (code)
10260 {
10261 case LT: case GT: case LTU: case GTU:
10262 break;
e075ae69 10263
3a3677ff
RH
10264 case LE: code1 = LT; code2 = GT; break;
10265 case GE: code1 = GT; code2 = LT; break;
10266 case LEU: code1 = LTU; code2 = GTU; break;
10267 case GEU: code1 = GTU; code2 = LTU; break;
e075ae69 10268
f822d252
ZW
10269 case EQ: code1 = UNKNOWN; code2 = NE; break;
10270 case NE: code2 = UNKNOWN; break;
e075ae69 10271
3a3677ff 10272 default:
d0396b79 10273 gcc_unreachable ();
3a3677ff 10274 }
e075ae69 10275
3a3677ff
RH
10276 /*
10277 * a < b =>
10278 * if (hi(a) < hi(b)) goto true;
10279 * if (hi(a) > hi(b)) goto false;
10280 * if (lo(a) < lo(b)) goto true;
10281 * false:
10282 */
10283
10284 ix86_compare_op0 = hi[0];
10285 ix86_compare_op1 = hi[1];
10286
f822d252 10287 if (code1 != UNKNOWN)
3a3677ff 10288 ix86_expand_branch (code1, label);
f822d252 10289 if (code2 != UNKNOWN)
3a3677ff
RH
10290 ix86_expand_branch (code2, label2);
10291
10292 ix86_compare_op0 = lo[0];
10293 ix86_compare_op1 = lo[1];
10294 ix86_expand_branch (code3, label);
10295
f822d252 10296 if (code2 != UNKNOWN)
3a3677ff
RH
10297 emit_label (label2);
10298 return;
10299 }
e075ae69 10300
3a3677ff 10301 default:
d0396b79 10302 gcc_unreachable ();
3a3677ff 10303 }
32b5b1aa 10304}
e075ae69 10305
9e7adcb3
JH
10306/* Split branch based on floating point condition. */
10307void
b96a374d 10308ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
7c82106f 10309 rtx target1, rtx target2, rtx tmp, rtx pushed)
9e7adcb3
JH
10310{
10311 rtx second, bypass;
10312 rtx label = NULL_RTX;
03598dea 10313 rtx condition;
6b24c259
JH
10314 int bypass_probability = -1, second_probability = -1, probability = -1;
10315 rtx i;
9e7adcb3
JH
10316
10317 if (target2 != pc_rtx)
10318 {
10319 rtx tmp = target2;
10320 code = reverse_condition_maybe_unordered (code);
10321 target2 = target1;
10322 target1 = tmp;
10323 }
10324
10325 condition = ix86_expand_fp_compare (code, op1, op2,
10326 tmp, &second, &bypass);
6b24c259 10327
7c82106f
UB
10328 /* Remove pushed operand from stack. */
10329 if (pushed)
10330 ix86_free_from_memory (GET_MODE (pushed));
10331
6b24c259
JH
10332 if (split_branch_probability >= 0)
10333 {
10334 /* Distribute the probabilities across the jumps.
10335 Assume the BYPASS and SECOND to be always test
10336 for UNORDERED. */
10337 probability = split_branch_probability;
10338
d6a7951f 10339 /* Value of 1 is low enough to make no need for probability
6b24c259
JH
10340 to be updated. Later we may run some experiments and see
10341 if unordered values are more frequent in practice. */
10342 if (bypass)
10343 bypass_probability = 1;
10344 if (second)
10345 second_probability = 1;
10346 }
9e7adcb3
JH
10347 if (bypass != NULL_RTX)
10348 {
10349 label = gen_label_rtx ();
6b24c259
JH
10350 i = emit_jump_insn (gen_rtx_SET
10351 (VOIDmode, pc_rtx,
10352 gen_rtx_IF_THEN_ELSE (VOIDmode,
10353 bypass,
10354 gen_rtx_LABEL_REF (VOIDmode,
10355 label),
10356 pc_rtx)));
10357 if (bypass_probability >= 0)
10358 REG_NOTES (i)
10359 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10360 GEN_INT (bypass_probability),
10361 REG_NOTES (i));
10362 }
10363 i = emit_jump_insn (gen_rtx_SET
9e7adcb3
JH
10364 (VOIDmode, pc_rtx,
10365 gen_rtx_IF_THEN_ELSE (VOIDmode,
6b24c259
JH
10366 condition, target1, target2)));
10367 if (probability >= 0)
10368 REG_NOTES (i)
10369 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10370 GEN_INT (probability),
10371 REG_NOTES (i));
10372 if (second != NULL_RTX)
9e7adcb3 10373 {
6b24c259
JH
10374 i = emit_jump_insn (gen_rtx_SET
10375 (VOIDmode, pc_rtx,
10376 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
10377 target2)));
10378 if (second_probability >= 0)
10379 REG_NOTES (i)
10380 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10381 GEN_INT (second_probability),
10382 REG_NOTES (i));
9e7adcb3 10383 }
9e7adcb3
JH
10384 if (label != NULL_RTX)
10385 emit_label (label);
10386}
10387
32b5b1aa 10388int
b96a374d 10389ix86_expand_setcc (enum rtx_code code, rtx dest)
32b5b1aa 10390{
3a627503 10391 rtx ret, tmp, tmpreg, equiv;
a1b8572c 10392 rtx second_test, bypass_test;
e075ae69 10393
28356f52 10394 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
e075ae69
RH
10395 return 0; /* FAIL */
10396
d0396b79 10397 gcc_assert (GET_MODE (dest) == QImode);
e075ae69 10398
a1b8572c 10399 ret = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
10400 PUT_MODE (ret, QImode);
10401
10402 tmp = dest;
a1b8572c 10403 tmpreg = dest;
32b5b1aa 10404
e075ae69 10405 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
a1b8572c
JH
10406 if (bypass_test || second_test)
10407 {
10408 rtx test = second_test;
10409 int bypass = 0;
10410 rtx tmp2 = gen_reg_rtx (QImode);
10411 if (bypass_test)
10412 {
d0396b79 10413 gcc_assert (!second_test);
a1b8572c
JH
10414 test = bypass_test;
10415 bypass = 1;
10416 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
10417 }
10418 PUT_MODE (test, QImode);
10419 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
10420
10421 if (bypass)
10422 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
10423 else
10424 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
10425 }
e075ae69 10426
3a627503 10427 /* Attach a REG_EQUAL note describing the comparison result. */
1ef45b77
RH
10428 if (ix86_compare_op0 && ix86_compare_op1)
10429 {
10430 equiv = simplify_gen_relational (code, QImode,
10431 GET_MODE (ix86_compare_op0),
10432 ix86_compare_op0, ix86_compare_op1);
10433 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
10434 }
3a627503 10435
e075ae69 10436 return 1; /* DONE */
32b5b1aa 10437}
e075ae69 10438
c35d187f
RH
10439/* Expand comparison setting or clearing carry flag. Return true when
10440 successful and set pop for the operation. */
10441static bool
b96a374d 10442ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
4977bab6
ZW
10443{
10444 enum machine_mode mode =
10445 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
10446
10447 /* Do not handle DImode compares that go trought special path. Also we can't
43f3a59d 10448 deal with FP compares yet. This is possible to add. */
28356f52 10449 if (mode == (TARGET_64BIT ? TImode : DImode))
e6e81735
JH
10450 return false;
10451 if (FLOAT_MODE_P (mode))
10452 {
10453 rtx second_test = NULL, bypass_test = NULL;
10454 rtx compare_op, compare_seq;
10455
10456 /* Shortcut: following common codes never translate into carry flag compares. */
10457 if (code == EQ || code == NE || code == UNEQ || code == LTGT
10458 || code == ORDERED || code == UNORDERED)
10459 return false;
10460
10461 /* These comparisons require zero flag; swap operands so they won't. */
10462 if ((code == GT || code == UNLE || code == LE || code == UNGT)
10463 && !TARGET_IEEE_FP)
10464 {
10465 rtx tmp = op0;
10466 op0 = op1;
10467 op1 = tmp;
10468 code = swap_condition (code);
10469 }
10470
c51e6d85
KH
10471 /* Try to expand the comparison and verify that we end up with carry flag
10472 based comparison. This is fails to be true only when we decide to expand
10473 comparison using arithmetic that is not too common scenario. */
e6e81735
JH
10474 start_sequence ();
10475 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10476 &second_test, &bypass_test);
10477 compare_seq = get_insns ();
10478 end_sequence ();
10479
10480 if (second_test || bypass_test)
10481 return false;
10482 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10483 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10484 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
10485 else
10486 code = GET_CODE (compare_op);
10487 if (code != LTU && code != GEU)
10488 return false;
10489 emit_insn (compare_seq);
10490 *pop = compare_op;
10491 return true;
10492 }
10493 if (!INTEGRAL_MODE_P (mode))
4977bab6
ZW
10494 return false;
10495 switch (code)
10496 {
10497 case LTU:
10498 case GEU:
10499 break;
10500
10501 /* Convert a==0 into (unsigned)a<1. */
10502 case EQ:
10503 case NE:
10504 if (op1 != const0_rtx)
10505 return false;
10506 op1 = const1_rtx;
10507 code = (code == EQ ? LTU : GEU);
10508 break;
10509
10510 /* Convert a>b into b<a or a>=b-1. */
10511 case GTU:
10512 case LEU:
10513 if (GET_CODE (op1) == CONST_INT)
10514 {
10515 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
10516 /* Bail out on overflow. We still can swap operands but that
43f3a59d 10517 would force loading of the constant into register. */
4977bab6
ZW
10518 if (op1 == const0_rtx
10519 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
10520 return false;
10521 code = (code == GTU ? GEU : LTU);
10522 }
10523 else
10524 {
10525 rtx tmp = op1;
10526 op1 = op0;
10527 op0 = tmp;
10528 code = (code == GTU ? LTU : GEU);
10529 }
10530 break;
10531
ccea753c 10532 /* Convert a>=0 into (unsigned)a<0x80000000. */
4977bab6
ZW
10533 case LT:
10534 case GE:
10535 if (mode == DImode || op1 != const0_rtx)
10536 return false;
ccea753c 10537 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
4977bab6
ZW
10538 code = (code == LT ? GEU : LTU);
10539 break;
10540 case LE:
10541 case GT:
10542 if (mode == DImode || op1 != constm1_rtx)
10543 return false;
ccea753c 10544 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
4977bab6
ZW
10545 code = (code == LE ? GEU : LTU);
10546 break;
10547
10548 default:
10549 return false;
10550 }
ebe75517
JH
10551 /* Swapping operands may cause constant to appear as first operand. */
10552 if (!nonimmediate_operand (op0, VOIDmode))
10553 {
10554 if (no_new_pseudos)
10555 return false;
10556 op0 = force_reg (mode, op0);
10557 }
4977bab6
ZW
10558 ix86_compare_op0 = op0;
10559 ix86_compare_op1 = op1;
10560 *pop = ix86_expand_compare (code, NULL, NULL);
d0396b79 10561 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
4977bab6
ZW
10562 return true;
10563}
10564
32b5b1aa 10565int
b96a374d 10566ix86_expand_int_movcc (rtx operands[])
32b5b1aa 10567{
e075ae69
RH
10568 enum rtx_code code = GET_CODE (operands[1]), compare_code;
10569 rtx compare_seq, compare_op;
a1b8572c 10570 rtx second_test, bypass_test;
635559ab 10571 enum machine_mode mode = GET_MODE (operands[0]);
4977bab6 10572 bool sign_bit_compare_p = false;;
3a3677ff 10573
e075ae69 10574 start_sequence ();
a1b8572c 10575 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
2f937369 10576 compare_seq = get_insns ();
e075ae69
RH
10577 end_sequence ();
10578
10579 compare_code = GET_CODE (compare_op);
10580
4977bab6
ZW
10581 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
10582 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
10583 sign_bit_compare_p = true;
10584
e075ae69
RH
10585 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
10586 HImode insns, we'd be swallowed in word prefix ops. */
10587
4977bab6 10588 if ((mode != HImode || TARGET_FAST_PREFIX)
28356f52 10589 && (mode != (TARGET_64BIT ? TImode : DImode))
0f290768 10590 && GET_CODE (operands[2]) == CONST_INT
e075ae69
RH
10591 && GET_CODE (operands[3]) == CONST_INT)
10592 {
10593 rtx out = operands[0];
10594 HOST_WIDE_INT ct = INTVAL (operands[2]);
10595 HOST_WIDE_INT cf = INTVAL (operands[3]);
10596 HOST_WIDE_INT diff;
10597
4977bab6
ZW
10598 diff = ct - cf;
10599 /* Sign bit compares are better done using shifts than we do by using
b96a374d 10600 sbb. */
4977bab6
ZW
10601 if (sign_bit_compare_p
10602 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10603 ix86_compare_op1, &compare_op))
e075ae69 10604 {
e075ae69
RH
10605 /* Detect overlap between destination and compare sources. */
10606 rtx tmp = out;
10607
4977bab6 10608 if (!sign_bit_compare_p)
36583fea 10609 {
e6e81735
JH
10610 bool fpcmp = false;
10611
4977bab6
ZW
10612 compare_code = GET_CODE (compare_op);
10613
e6e81735
JH
10614 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10615 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10616 {
10617 fpcmp = true;
10618 compare_code = ix86_fp_compare_code_to_integer (compare_code);
10619 }
10620
4977bab6
ZW
10621 /* To simplify rest of code, restrict to the GEU case. */
10622 if (compare_code == LTU)
10623 {
10624 HOST_WIDE_INT tmp = ct;
10625 ct = cf;
10626 cf = tmp;
10627 compare_code = reverse_condition (compare_code);
10628 code = reverse_condition (code);
10629 }
e6e81735
JH
10630 else
10631 {
10632 if (fpcmp)
10633 PUT_CODE (compare_op,
10634 reverse_condition_maybe_unordered
10635 (GET_CODE (compare_op)));
10636 else
10637 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10638 }
4977bab6 10639 diff = ct - cf;
36583fea 10640
4977bab6
ZW
10641 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
10642 || reg_overlap_mentioned_p (out, ix86_compare_op1))
10643 tmp = gen_reg_rtx (mode);
e075ae69 10644
4977bab6 10645 if (mode == DImode)
e6e81735 10646 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
4977bab6 10647 else
e6e81735 10648 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
4977bab6 10649 }
14f73b5a 10650 else
4977bab6
ZW
10651 {
10652 if (code == GT || code == GE)
10653 code = reverse_condition (code);
10654 else
10655 {
10656 HOST_WIDE_INT tmp = ct;
10657 ct = cf;
10658 cf = tmp;
5fb48685 10659 diff = ct - cf;
4977bab6
ZW
10660 }
10661 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
10662 ix86_compare_op1, VOIDmode, 0, -1);
10663 }
e075ae69 10664
36583fea
JH
10665 if (diff == 1)
10666 {
10667 /*
10668 * cmpl op0,op1
10669 * sbbl dest,dest
10670 * [addl dest, ct]
10671 *
10672 * Size 5 - 8.
10673 */
10674 if (ct)
b96a374d 10675 tmp = expand_simple_binop (mode, PLUS,
635559ab 10676 tmp, GEN_INT (ct),
4977bab6 10677 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
10678 }
10679 else if (cf == -1)
10680 {
10681 /*
10682 * cmpl op0,op1
10683 * sbbl dest,dest
10684 * orl $ct, dest
10685 *
10686 * Size 8.
10687 */
635559ab
JH
10688 tmp = expand_simple_binop (mode, IOR,
10689 tmp, GEN_INT (ct),
4977bab6 10690 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
10691 }
10692 else if (diff == -1 && ct)
10693 {
10694 /*
10695 * cmpl op0,op1
10696 * sbbl dest,dest
06ec023f 10697 * notl dest
36583fea
JH
10698 * [addl dest, cf]
10699 *
10700 * Size 8 - 11.
10701 */
4977bab6 10702 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
635559ab 10703 if (cf)
b96a374d 10704 tmp = expand_simple_binop (mode, PLUS,
4977bab6
ZW
10705 copy_rtx (tmp), GEN_INT (cf),
10706 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
10707 }
10708 else
10709 {
10710 /*
10711 * cmpl op0,op1
10712 * sbbl dest,dest
06ec023f 10713 * [notl dest]
36583fea
JH
10714 * andl cf - ct, dest
10715 * [addl dest, ct]
10716 *
10717 * Size 8 - 11.
10718 */
06ec023f
RB
10719
10720 if (cf == 0)
10721 {
10722 cf = ct;
10723 ct = 0;
4977bab6 10724 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
06ec023f
RB
10725 }
10726
635559ab 10727 tmp = expand_simple_binop (mode, AND,
4977bab6 10728 copy_rtx (tmp),
d8bf17f9 10729 gen_int_mode (cf - ct, mode),
4977bab6 10730 copy_rtx (tmp), 1, OPTAB_DIRECT);
635559ab 10731 if (ct)
b96a374d 10732 tmp = expand_simple_binop (mode, PLUS,
4977bab6
ZW
10733 copy_rtx (tmp), GEN_INT (ct),
10734 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea 10735 }
e075ae69 10736
4977bab6
ZW
10737 if (!rtx_equal_p (tmp, out))
10738 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
e075ae69
RH
10739
10740 return 1; /* DONE */
10741 }
10742
e075ae69
RH
10743 if (diff < 0)
10744 {
10745 HOST_WIDE_INT tmp;
10746 tmp = ct, ct = cf, cf = tmp;
10747 diff = -diff;
734dba19
JH
10748 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10749 {
10750 /* We may be reversing unordered compare to normal compare, that
10751 is not valid in general (we may convert non-trapping condition
10752 to trapping one), however on i386 we currently emit all
10753 comparisons unordered. */
10754 compare_code = reverse_condition_maybe_unordered (compare_code);
10755 code = reverse_condition_maybe_unordered (code);
10756 }
10757 else
10758 {
10759 compare_code = reverse_condition (compare_code);
10760 code = reverse_condition (code);
10761 }
e075ae69 10762 }
0f2a3457 10763
f822d252 10764 compare_code = UNKNOWN;
0f2a3457
JJ
10765 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
10766 && GET_CODE (ix86_compare_op1) == CONST_INT)
10767 {
10768 if (ix86_compare_op1 == const0_rtx
10769 && (code == LT || code == GE))
10770 compare_code = code;
10771 else if (ix86_compare_op1 == constm1_rtx)
10772 {
10773 if (code == LE)
10774 compare_code = LT;
10775 else if (code == GT)
10776 compare_code = GE;
10777 }
10778 }
10779
10780 /* Optimize dest = (op0 < 0) ? -1 : cf. */
f822d252 10781 if (compare_code != UNKNOWN
0f2a3457
JJ
10782 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
10783 && (cf == -1 || ct == -1))
10784 {
10785 /* If lea code below could be used, only optimize
10786 if it results in a 2 insn sequence. */
10787
10788 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
10789 || diff == 3 || diff == 5 || diff == 9)
10790 || (compare_code == LT && ct == -1)
10791 || (compare_code == GE && cf == -1))
10792 {
10793 /*
10794 * notl op1 (if necessary)
10795 * sarl $31, op1
10796 * orl cf, op1
10797 */
10798 if (ct != -1)
10799 {
10800 cf = ct;
b96a374d 10801 ct = -1;
0f2a3457
JJ
10802 code = reverse_condition (code);
10803 }
10804
10805 out = emit_store_flag (out, code, ix86_compare_op0,
10806 ix86_compare_op1, VOIDmode, 0, -1);
10807
10808 out = expand_simple_binop (mode, IOR,
10809 out, GEN_INT (cf),
10810 out, 1, OPTAB_DIRECT);
10811 if (out != operands[0])
10812 emit_move_insn (operands[0], out);
10813
10814 return 1; /* DONE */
10815 }
10816 }
10817
4977bab6 10818
635559ab
JH
10819 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
10820 || diff == 3 || diff == 5 || diff == 9)
4977bab6 10821 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
8fe75e43
RH
10822 && (mode != DImode
10823 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
e075ae69
RH
10824 {
10825 /*
10826 * xorl dest,dest
10827 * cmpl op1,op2
10828 * setcc dest
10829 * lea cf(dest*(ct-cf)),dest
10830 *
10831 * Size 14.
10832 *
10833 * This also catches the degenerate setcc-only case.
10834 */
10835
10836 rtx tmp;
10837 int nops;
10838
10839 out = emit_store_flag (out, code, ix86_compare_op0,
10840 ix86_compare_op1, VOIDmode, 0, 1);
10841
10842 nops = 0;
97f51ac4
RB
10843 /* On x86_64 the lea instruction operates on Pmode, so we need
10844 to get arithmetics done in proper mode to match. */
e075ae69 10845 if (diff == 1)
068f5dea 10846 tmp = copy_rtx (out);
e075ae69
RH
10847 else
10848 {
885a70fd 10849 rtx out1;
068f5dea 10850 out1 = copy_rtx (out);
635559ab 10851 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
e075ae69
RH
10852 nops++;
10853 if (diff & 1)
10854 {
635559ab 10855 tmp = gen_rtx_PLUS (mode, tmp, out1);
e075ae69
RH
10856 nops++;
10857 }
10858 }
10859 if (cf != 0)
10860 {
635559ab 10861 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
e075ae69
RH
10862 nops++;
10863 }
4977bab6 10864 if (!rtx_equal_p (tmp, out))
e075ae69 10865 {
14f73b5a 10866 if (nops == 1)
a5cf80f0 10867 out = force_operand (tmp, copy_rtx (out));
e075ae69 10868 else
4977bab6 10869 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
e075ae69 10870 }
4977bab6 10871 if (!rtx_equal_p (out, operands[0]))
1985ef90 10872 emit_move_insn (operands[0], copy_rtx (out));
e075ae69
RH
10873
10874 return 1; /* DONE */
10875 }
10876
10877 /*
10878 * General case: Jumpful:
10879 * xorl dest,dest cmpl op1, op2
10880 * cmpl op1, op2 movl ct, dest
10881 * setcc dest jcc 1f
10882 * decl dest movl cf, dest
10883 * andl (cf-ct),dest 1:
10884 * addl ct,dest
0f290768 10885 *
e075ae69
RH
10886 * Size 20. Size 14.
10887 *
10888 * This is reasonably steep, but branch mispredict costs are
10889 * high on modern cpus, so consider failing only if optimizing
10890 * for space.
e075ae69
RH
10891 */
10892
4977bab6
ZW
10893 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10894 && BRANCH_COST >= 2)
e075ae69 10895 {
97f51ac4 10896 if (cf == 0)
e075ae69 10897 {
97f51ac4
RB
10898 cf = ct;
10899 ct = 0;
734dba19 10900 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
0f2a3457
JJ
10901 /* We may be reversing unordered compare to normal compare,
10902 that is not valid in general (we may convert non-trapping
10903 condition to trapping one), however on i386 we currently
10904 emit all comparisons unordered. */
10905 code = reverse_condition_maybe_unordered (code);
10906 else
10907 {
10908 code = reverse_condition (code);
f822d252 10909 if (compare_code != UNKNOWN)
0f2a3457
JJ
10910 compare_code = reverse_condition (compare_code);
10911 }
10912 }
10913
f822d252 10914 if (compare_code != UNKNOWN)
0f2a3457
JJ
10915 {
10916 /* notl op1 (if needed)
10917 sarl $31, op1
10918 andl (cf-ct), op1
b96a374d 10919 addl ct, op1
0f2a3457
JJ
10920
10921 For x < 0 (resp. x <= -1) there will be no notl,
10922 so if possible swap the constants to get rid of the
10923 complement.
10924 True/false will be -1/0 while code below (store flag
10925 followed by decrement) is 0/-1, so the constants need
10926 to be exchanged once more. */
10927
10928 if (compare_code == GE || !cf)
734dba19 10929 {
b96a374d 10930 code = reverse_condition (code);
0f2a3457 10931 compare_code = LT;
734dba19
JH
10932 }
10933 else
10934 {
0f2a3457 10935 HOST_WIDE_INT tmp = cf;
b96a374d 10936 cf = ct;
0f2a3457 10937 ct = tmp;
734dba19 10938 }
0f2a3457
JJ
10939
10940 out = emit_store_flag (out, code, ix86_compare_op0,
10941 ix86_compare_op1, VOIDmode, 0, -1);
e075ae69 10942 }
0f2a3457
JJ
10943 else
10944 {
10945 out = emit_store_flag (out, code, ix86_compare_op0,
10946 ix86_compare_op1, VOIDmode, 0, 1);
e075ae69 10947
4977bab6
ZW
10948 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10949 copy_rtx (out), 1, OPTAB_DIRECT);
0f2a3457 10950 }
e075ae69 10951
4977bab6 10952 out = expand_simple_binop (mode, AND, copy_rtx (out),
d8bf17f9 10953 gen_int_mode (cf - ct, mode),
4977bab6 10954 copy_rtx (out), 1, OPTAB_DIRECT);
97f51ac4 10955 if (ct)
4977bab6
ZW
10956 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10957 copy_rtx (out), 1, OPTAB_DIRECT);
10958 if (!rtx_equal_p (out, operands[0]))
10959 emit_move_insn (operands[0], copy_rtx (out));
e075ae69
RH
10960
10961 return 1; /* DONE */
10962 }
10963 }
10964
4977bab6 10965 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
e075ae69
RH
10966 {
10967 /* Try a few things more with specific constants and a variable. */
10968
78a0d70c 10969 optab op;
e075ae69
RH
10970 rtx var, orig_out, out, tmp;
10971
4977bab6 10972 if (BRANCH_COST <= 2)
e075ae69
RH
10973 return 0; /* FAIL */
10974
0f290768 10975 /* If one of the two operands is an interesting constant, load a
e075ae69 10976 constant with the above and mask it in with a logical operation. */
0f290768 10977
e075ae69
RH
10978 if (GET_CODE (operands[2]) == CONST_INT)
10979 {
10980 var = operands[3];
4977bab6 10981 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
e075ae69 10982 operands[3] = constm1_rtx, op = and_optab;
4977bab6 10983 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
e075ae69 10984 operands[3] = const0_rtx, op = ior_optab;
78a0d70c
ZW
10985 else
10986 return 0; /* FAIL */
e075ae69
RH
10987 }
10988 else if (GET_CODE (operands[3]) == CONST_INT)
10989 {
10990 var = operands[2];
4977bab6 10991 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
e075ae69 10992 operands[2] = constm1_rtx, op = and_optab;
4977bab6 10993 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
e075ae69 10994 operands[2] = const0_rtx, op = ior_optab;
78a0d70c
ZW
10995 else
10996 return 0; /* FAIL */
e075ae69 10997 }
78a0d70c 10998 else
e075ae69
RH
10999 return 0; /* FAIL */
11000
11001 orig_out = operands[0];
635559ab 11002 tmp = gen_reg_rtx (mode);
e075ae69
RH
11003 operands[0] = tmp;
11004
11005 /* Recurse to get the constant loaded. */
11006 if (ix86_expand_int_movcc (operands) == 0)
11007 return 0; /* FAIL */
11008
11009 /* Mask in the interesting variable. */
635559ab 11010 out = expand_binop (mode, op, var, tmp, orig_out, 0,
e075ae69 11011 OPTAB_WIDEN);
4977bab6
ZW
11012 if (!rtx_equal_p (out, orig_out))
11013 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
e075ae69
RH
11014
11015 return 1; /* DONE */
11016 }
11017
11018 /*
11019 * For comparison with above,
11020 *
11021 * movl cf,dest
11022 * movl ct,tmp
11023 * cmpl op1,op2
11024 * cmovcc tmp,dest
11025 *
11026 * Size 15.
11027 */
11028
635559ab
JH
11029 if (! nonimmediate_operand (operands[2], mode))
11030 operands[2] = force_reg (mode, operands[2]);
11031 if (! nonimmediate_operand (operands[3], mode))
11032 operands[3] = force_reg (mode, operands[3]);
e075ae69 11033
a1b8572c
JH
11034 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11035 {
635559ab 11036 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
11037 emit_move_insn (tmp, operands[3]);
11038 operands[3] = tmp;
11039 }
11040 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11041 {
635559ab 11042 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
11043 emit_move_insn (tmp, operands[2]);
11044 operands[2] = tmp;
11045 }
4977bab6 11046
c9682caf 11047 if (! register_operand (operands[2], VOIDmode)
b96a374d 11048 && (mode == QImode
4977bab6 11049 || ! register_operand (operands[3], VOIDmode)))
635559ab 11050 operands[2] = force_reg (mode, operands[2]);
a1b8572c 11051
4977bab6
ZW
11052 if (mode == QImode
11053 && ! register_operand (operands[3], VOIDmode))
11054 operands[3] = force_reg (mode, operands[3]);
11055
e075ae69
RH
11056 emit_insn (compare_seq);
11057 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 11058 gen_rtx_IF_THEN_ELSE (mode,
e075ae69
RH
11059 compare_op, operands[2],
11060 operands[3])));
a1b8572c 11061 if (bypass_test)
4977bab6 11062 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
635559ab 11063 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c 11064 bypass_test,
4977bab6
ZW
11065 copy_rtx (operands[3]),
11066 copy_rtx (operands[0]))));
a1b8572c 11067 if (second_test)
4977bab6 11068 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
635559ab 11069 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c 11070 second_test,
4977bab6
ZW
11071 copy_rtx (operands[2]),
11072 copy_rtx (operands[0]))));
e075ae69
RH
11073
11074 return 1; /* DONE */
e9a25f70 11075}
e075ae69 11076
ab8efbd8
RH
11077/* Swap, force into registers, or otherwise massage the two operands
11078 to an sse comparison with a mask result. Thus we differ a bit from
11079 ix86_prepare_fp_compare_args which expects to produce a flags result.
11080
11081 The DEST operand exists to help determine whether to commute commutative
11082 operators. The POP0/POP1 operands are updated in place. The new
11083 comparison code is returned, or UNKNOWN if not implementable. */
11084
11085static enum rtx_code
11086ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
11087 rtx *pop0, rtx *pop1)
11088{
11089 rtx tmp;
11090
11091 switch (code)
11092 {
11093 case LTGT:
11094 case UNEQ:
11095 /* We have no LTGT as an operator. We could implement it with
11096 NE & ORDERED, but this requires an extra temporary. It's
11097 not clear that it's worth it. */
11098 return UNKNOWN;
11099
11100 case LT:
11101 case LE:
11102 case UNGT:
11103 case UNGE:
11104 /* These are supported directly. */
11105 break;
11106
11107 case EQ:
11108 case NE:
11109 case UNORDERED:
11110 case ORDERED:
11111 /* For commutative operators, try to canonicalize the destination
11112 operand to be first in the comparison - this helps reload to
11113 avoid extra moves. */
11114 if (!dest || !rtx_equal_p (dest, *pop1))
11115 break;
11116 /* FALLTHRU */
11117
11118 case GE:
11119 case GT:
11120 case UNLE:
11121 case UNLT:
11122 /* These are not supported directly. Swap the comparison operands
11123 to transform into something that is supported. */
11124 tmp = *pop0;
11125 *pop0 = *pop1;
11126 *pop1 = tmp;
11127 code = swap_condition (code);
11128 break;
11129
11130 default:
11131 gcc_unreachable ();
11132 }
11133
11134 return code;
11135}
11136
11137/* Detect conditional moves that exactly match min/max operational
11138 semantics. Note that this is IEEE safe, as long as we don't
11139 interchange the operands.
11140
11141 Returns FALSE if this conditional move doesn't match a MIN/MAX,
11142 and TRUE if the operation is successful and instructions are emitted. */
11143
11144static bool
11145ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
11146 rtx cmp_op1, rtx if_true, rtx if_false)
11147{
11148 enum machine_mode mode;
11149 bool is_min;
11150 rtx tmp;
11151
11152 if (code == LT)
11153 ;
11154 else if (code == UNGE)
11155 {
11156 tmp = if_true;
11157 if_true = if_false;
11158 if_false = tmp;
11159 }
11160 else
11161 return false;
11162
11163 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
11164 is_min = true;
11165 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
11166 is_min = false;
11167 else
11168 return false;
11169
11170 mode = GET_MODE (dest);
11171
11172 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11173 but MODE may be a vector mode and thus not appropriate. */
11174 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
11175 {
11176 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
11177 rtvec v;
11178
11179 if_true = force_reg (mode, if_true);
11180 v = gen_rtvec (2, if_true, if_false);
11181 tmp = gen_rtx_UNSPEC (mode, v, u);
11182 }
11183 else
11184 {
11185 code = is_min ? SMIN : SMAX;
11186 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
11187 }
11188
11189 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
11190 return true;
11191}
11192
ae46a07a
RH
11193/* Expand an sse vector comparison. Return the register with the result. */
11194
11195static rtx
11196ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
11197 rtx op_true, rtx op_false)
ab8efbd8
RH
11198{
11199 enum machine_mode mode = GET_MODE (dest);
ae46a07a 11200 rtx x;
ab8efbd8
RH
11201
11202 cmp_op0 = force_reg (mode, cmp_op0);
11203 if (!nonimmediate_operand (cmp_op1, mode))
11204 cmp_op1 = force_reg (mode, cmp_op1);
11205
11206 if (optimize
11207 || reg_overlap_mentioned_p (dest, op_true)
11208 || reg_overlap_mentioned_p (dest, op_false))
ae46a07a 11209 dest = gen_reg_rtx (mode);
ab8efbd8
RH
11210
11211 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
ae46a07a
RH
11212 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11213
11214 return dest;
11215}
11216
11217/* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11218 operations. This is used for both scalar and vector conditional moves. */
11219
11220static void
11221ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
11222{
11223 enum machine_mode mode = GET_MODE (dest);
11224 rtx t2, t3, x;
ab8efbd8
RH
11225
11226 if (op_false == CONST0_RTX (mode))
11227 {
11228 op_true = force_reg (mode, op_true);
ae46a07a 11229 x = gen_rtx_AND (mode, cmp, op_true);
ab8efbd8
RH
11230 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11231 }
11232 else if (op_true == CONST0_RTX (mode))
11233 {
11234 op_false = force_reg (mode, op_false);
ae46a07a 11235 x = gen_rtx_NOT (mode, cmp);
ab8efbd8
RH
11236 x = gen_rtx_AND (mode, x, op_false);
11237 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11238 }
11239 else
11240 {
11241 op_true = force_reg (mode, op_true);
11242 op_false = force_reg (mode, op_false);
11243
11244 t2 = gen_reg_rtx (mode);
11245 if (optimize)
11246 t3 = gen_reg_rtx (mode);
11247 else
11248 t3 = dest;
11249
ae46a07a 11250 x = gen_rtx_AND (mode, op_true, cmp);
ab8efbd8
RH
11251 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
11252
ae46a07a 11253 x = gen_rtx_NOT (mode, cmp);
ab8efbd8
RH
11254 x = gen_rtx_AND (mode, x, op_false);
11255 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
11256
11257 x = gen_rtx_IOR (mode, t3, t2);
11258 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11259 }
11260}
11261
ae46a07a
RH
11262/* Expand a floating-point conditional move. Return true if successful. */
11263
32b5b1aa 11264int
b96a374d 11265ix86_expand_fp_movcc (rtx operands[])
32b5b1aa 11266{
eaa49b49
RH
11267 enum machine_mode mode = GET_MODE (operands[0]);
11268 enum rtx_code code = GET_CODE (operands[1]);
11269 rtx tmp, compare_op, second_test, bypass_test;
11270
11271 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
11272 {
ab8efbd8 11273 enum machine_mode cmode;
eaa49b49
RH
11274
11275 /* Since we've no cmove for sse registers, don't force bad register
11276 allocation just to gain access to it. Deny movcc when the
11277 comparison mode doesn't match the move mode. */
ab8efbd8 11278 cmode = GET_MODE (ix86_compare_op0);
eaa49b49 11279 if (cmode == VOIDmode)
ab8efbd8 11280 cmode = GET_MODE (ix86_compare_op1);
eaa49b49
RH
11281 if (cmode != mode)
11282 return 0;
11283
ab8efbd8
RH
11284 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11285 &ix86_compare_op0,
11286 &ix86_compare_op1);
11287 if (code == UNKNOWN)
51d7bae6
RH
11288 return 0;
11289
ab8efbd8
RH
11290 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
11291 ix86_compare_op1, operands[2],
11292 operands[3]))
11293 return 1;
eaa49b49 11294
ae46a07a
RH
11295 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
11296 ix86_compare_op1, operands[2], operands[3]);
11297 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
0073023d
JH
11298 return 1;
11299 }
11300
e075ae69 11301 /* The floating point conditional move instructions don't directly
0f290768 11302 support conditions resulting from a signed integer comparison. */
32b5b1aa 11303
a1b8572c 11304 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9e7adcb3
JH
11305
11306 /* The floating point conditional move instructions don't directly
11307 support signed integer comparisons. */
11308
a1b8572c 11309 if (!fcmov_comparison_operator (compare_op, VOIDmode))
e075ae69 11310 {
d0396b79 11311 gcc_assert (!second_test && !bypass_test);
e075ae69 11312 tmp = gen_reg_rtx (QImode);
3a3677ff 11313 ix86_expand_setcc (code, tmp);
e075ae69
RH
11314 code = NE;
11315 ix86_compare_op0 = tmp;
11316 ix86_compare_op1 = const0_rtx;
a1b8572c
JH
11317 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11318 }
11319 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11320 {
eaa49b49 11321 tmp = gen_reg_rtx (mode);
a1b8572c
JH
11322 emit_move_insn (tmp, operands[3]);
11323 operands[3] = tmp;
11324 }
11325 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11326 {
eaa49b49 11327 tmp = gen_reg_rtx (mode);
a1b8572c
JH
11328 emit_move_insn (tmp, operands[2]);
11329 operands[2] = tmp;
e075ae69 11330 }
e9a25f70 11331
e075ae69 11332 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
eaa49b49
RH
11333 gen_rtx_IF_THEN_ELSE (mode, compare_op,
11334 operands[2], operands[3])));
a1b8572c
JH
11335 if (bypass_test)
11336 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
eaa49b49
RH
11337 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
11338 operands[3], operands[0])));
a1b8572c
JH
11339 if (second_test)
11340 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
eaa49b49
RH
11341 gen_rtx_IF_THEN_ELSE (mode, second_test,
11342 operands[2], operands[0])));
32b5b1aa 11343
e075ae69 11344 return 1;
32b5b1aa
SC
11345}
11346
ae46a07a
RH
11347/* Expand a floating-point vector conditional move; a vcond operation
11348 rather than a movcc operation. */
11349
11350bool
11351ix86_expand_fp_vcond (rtx operands[])
11352{
11353 enum rtx_code code = GET_CODE (operands[3]);
11354 rtx cmp;
11355
11356 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11357 &operands[4], &operands[5]);
11358 if (code == UNKNOWN)
11359 return false;
11360
11361 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
11362 operands[5], operands[1], operands[2]))
11363 return true;
11364
11365 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
11366 operands[1], operands[2]);
11367 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
11368 return true;
11369}
11370
11371/* Expand a signed integral vector conditional move. */
11372
11373bool
9fb93f89 11374ix86_expand_int_vcond (rtx operands[])
ae46a07a
RH
11375{
11376 enum machine_mode mode = GET_MODE (operands[0]);
11377 enum rtx_code code = GET_CODE (operands[3]);
9fb93f89
RH
11378 bool negate = false;
11379 rtx x, cop0, cop1;
ae46a07a 11380
9fb93f89
RH
11381 cop0 = operands[4];
11382 cop1 = operands[5];
11383
11384 /* Canonicalize the comparison to EQ, GT, GTU. */
11385 switch (code)
ae46a07a 11386 {
9fb93f89
RH
11387 case EQ:
11388 case GT:
11389 case GTU:
11390 break;
11391
11392 case NE:
11393 case LE:
11394 case LEU:
ae46a07a 11395 code = reverse_condition (code);
9fb93f89
RH
11396 negate = true;
11397 break;
11398
11399 case GE:
11400 case GEU:
11401 code = reverse_condition (code);
11402 negate = true;
11403 /* FALLTHRU */
11404
11405 case LT:
11406 case LTU:
ae46a07a 11407 code = swap_condition (code);
9fb93f89
RH
11408 x = cop0, cop0 = cop1, cop1 = x;
11409 break;
ae46a07a 11410
9fb93f89
RH
11411 default:
11412 gcc_unreachable ();
11413 }
ae46a07a 11414
9fb93f89
RH
11415 /* Unsigned parallel compare is not supported by the hardware. Play some
11416 tricks to turn this into a signed comparison against 0. */
11417 if (code == GTU)
ae46a07a 11418 {
ae46a07a
RH
11419 switch (mode)
11420 {
9fb93f89
RH
11421 case V4SImode:
11422 {
11423 rtx t1, t2, mask;
11424
11425 /* Perform a parallel modulo subtraction. */
11426 t1 = gen_reg_rtx (mode);
11427 emit_insn (gen_subv4si3 (t1, cop0, cop1));
11428
11429 /* Extract the original sign bit of op0. */
11430 mask = GEN_INT (-0x80000000);
11431 mask = gen_rtx_CONST_VECTOR (mode,
11432 gen_rtvec (4, mask, mask, mask, mask));
11433 mask = force_reg (mode, mask);
11434 t2 = gen_reg_rtx (mode);
11435 emit_insn (gen_andv4si3 (t2, cop0, mask));
11436
11437 /* XOR it back into the result of the subtraction. This results
11438 in the sign bit set iff we saw unsigned underflow. */
11439 x = gen_reg_rtx (mode);
11440 emit_insn (gen_xorv4si3 (x, t1, t2));
11441
11442 code = GT;
11443 }
ae46a07a 11444 break;
9fb93f89
RH
11445
11446 case V16QImode:
ae46a07a 11447 case V8HImode:
9fb93f89
RH
11448 /* Perform a parallel unsigned saturating subtraction. */
11449 x = gen_reg_rtx (mode);
11450 emit_insn (gen_rtx_SET (VOIDmode, x,
11451 gen_rtx_US_MINUS (mode, cop0, cop1)));
11452
11453 code = EQ;
11454 negate = !negate;
ae46a07a 11455 break;
9fb93f89 11456
ae46a07a
RH
11457 default:
11458 gcc_unreachable ();
11459 }
11460
9fb93f89
RH
11461 cop0 = x;
11462 cop1 = CONST0_RTX (mode);
ae46a07a 11463 }
ae46a07a 11464
9fb93f89
RH
11465 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
11466 operands[1+negate], operands[2-negate]);
11467
11468 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
11469 operands[2-negate]);
ae46a07a
RH
11470 return true;
11471}
11472
7b52eede
JH
11473/* Expand conditional increment or decrement using adb/sbb instructions.
11474 The default case using setcc followed by the conditional move can be
11475 done by generic code. */
11476int
b96a374d 11477ix86_expand_int_addcc (rtx operands[])
7b52eede
JH
11478{
11479 enum rtx_code code = GET_CODE (operands[1]);
11480 rtx compare_op;
11481 rtx val = const0_rtx;
e6e81735 11482 bool fpcmp = false;
e6e81735 11483 enum machine_mode mode = GET_MODE (operands[0]);
7b52eede
JH
11484
11485 if (operands[3] != const1_rtx
11486 && operands[3] != constm1_rtx)
11487 return 0;
11488 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11489 ix86_compare_op1, &compare_op))
11490 return 0;
e6e81735
JH
11491 code = GET_CODE (compare_op);
11492
11493 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11494 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11495 {
11496 fpcmp = true;
11497 code = ix86_fp_compare_code_to_integer (code);
11498 }
11499
11500 if (code != LTU)
11501 {
11502 val = constm1_rtx;
11503 if (fpcmp)
11504 PUT_CODE (compare_op,
11505 reverse_condition_maybe_unordered
11506 (GET_CODE (compare_op)));
11507 else
11508 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11509 }
11510 PUT_MODE (compare_op, mode);
11511
11512 /* Construct either adc or sbb insn. */
11513 if ((code == LTU) == (operands[3] == constm1_rtx))
7b52eede
JH
11514 {
11515 switch (GET_MODE (operands[0]))
11516 {
11517 case QImode:
e6e81735 11518 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
11519 break;
11520 case HImode:
e6e81735 11521 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
11522 break;
11523 case SImode:
e6e81735 11524 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
11525 break;
11526 case DImode:
e6e81735 11527 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
7b52eede
JH
11528 break;
11529 default:
d0396b79 11530 gcc_unreachable ();
7b52eede
JH
11531 }
11532 }
11533 else
11534 {
11535 switch (GET_MODE (operands[0]))
11536 {
11537 case QImode:
e6e81735 11538 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
11539 break;
11540 case HImode:
e6e81735 11541 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
11542 break;
11543 case SImode:
e6e81735 11544 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
11545 break;
11546 case DImode:
e6e81735 11547 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
7b52eede
JH
11548 break;
11549 default:
d0396b79 11550 gcc_unreachable ();
7b52eede
JH
11551 }
11552 }
11553 return 1; /* DONE */
11554}
11555
11556
2450a057
JH
11557/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
11558 works for floating pointer parameters and nonoffsetable memories.
11559 For pushes, it returns just stack offsets; the values will be saved
11560 in the right order. Maximally three parts are generated. */
11561
2b589241 11562static int
b96a374d 11563ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
32b5b1aa 11564{
26e5b205
JH
11565 int size;
11566
11567 if (!TARGET_64BIT)
f8a1ebc6 11568 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
26e5b205
JH
11569 else
11570 size = (GET_MODE_SIZE (mode) + 4) / 8;
2450a057 11571
d0396b79
NS
11572 gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
11573 gcc_assert (size >= 2 && size <= 3);
2450a057 11574
f996902d
RH
11575 /* Optimize constant pool reference to immediates. This is used by fp
11576 moves, that force all constants to memory to allow combining. */
389fdba0 11577 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
f996902d
RH
11578 {
11579 rtx tmp = maybe_get_pool_constant (operand);
11580 if (tmp)
11581 operand = tmp;
11582 }
d7a29404 11583
2450a057 11584 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
e075ae69 11585 {
2450a057 11586 /* The only non-offsetable memories we handle are pushes. */
d0396b79
NS
11587 int ok = push_operand (operand, VOIDmode);
11588
11589 gcc_assert (ok);
11590
26e5b205
JH
11591 operand = copy_rtx (operand);
11592 PUT_MODE (operand, Pmode);
2450a057 11593 parts[0] = parts[1] = parts[2] = operand;
b4e82619 11594 return size;
2450a057 11595 }
b4e82619
RH
11596
11597 if (GET_CODE (operand) == CONST_VECTOR)
11598 {
11599 enum machine_mode imode = int_mode_for_mode (mode);
bd08db74
RH
11600 /* Caution: if we looked through a constant pool memory above,
11601 the operand may actually have a different mode now. That's
11602 ok, since we want to pun this all the way back to an integer. */
11603 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
b4e82619
RH
11604 gcc_assert (operand != NULL);
11605 mode = imode;
11606 }
11607
11608 if (!TARGET_64BIT)
2450a057
JH
11609 {
11610 if (mode == DImode)
11611 split_di (&operand, 1, &parts[0], &parts[1]);
11612 else
e075ae69 11613 {
2450a057
JH
11614 if (REG_P (operand))
11615 {
d0396b79 11616 gcc_assert (reload_completed);
2450a057
JH
11617 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
11618 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
11619 if (size == 3)
11620 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
11621 }
11622 else if (offsettable_memref_p (operand))
11623 {
f4ef873c 11624 operand = adjust_address (operand, SImode, 0);
2450a057 11625 parts[0] = operand;
b72f00af 11626 parts[1] = adjust_address (operand, SImode, 4);
2450a057 11627 if (size == 3)
b72f00af 11628 parts[2] = adjust_address (operand, SImode, 8);
2450a057
JH
11629 }
11630 else if (GET_CODE (operand) == CONST_DOUBLE)
11631 {
11632 REAL_VALUE_TYPE r;
2b589241 11633 long l[4];
2450a057
JH
11634
11635 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11636 switch (mode)
11637 {
11638 case XFmode:
11639 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
d8bf17f9 11640 parts[2] = gen_int_mode (l[2], SImode);
2450a057
JH
11641 break;
11642 case DFmode:
11643 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
11644 break;
11645 default:
d0396b79 11646 gcc_unreachable ();
2450a057 11647 }
d8bf17f9
LB
11648 parts[1] = gen_int_mode (l[1], SImode);
11649 parts[0] = gen_int_mode (l[0], SImode);
2450a057
JH
11650 }
11651 else
d0396b79 11652 gcc_unreachable ();
e075ae69 11653 }
2450a057 11654 }
26e5b205
JH
11655 else
11656 {
44cf5b6a
JH
11657 if (mode == TImode)
11658 split_ti (&operand, 1, &parts[0], &parts[1]);
26e5b205
JH
11659 if (mode == XFmode || mode == TFmode)
11660 {
f8a1ebc6 11661 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
26e5b205
JH
11662 if (REG_P (operand))
11663 {
d0396b79 11664 gcc_assert (reload_completed);
26e5b205 11665 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
f8a1ebc6 11666 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
26e5b205
JH
11667 }
11668 else if (offsettable_memref_p (operand))
11669 {
b72f00af 11670 operand = adjust_address (operand, DImode, 0);
26e5b205 11671 parts[0] = operand;
f8a1ebc6 11672 parts[1] = adjust_address (operand, upper_mode, 8);
26e5b205
JH
11673 }
11674 else if (GET_CODE (operand) == CONST_DOUBLE)
11675 {
11676 REAL_VALUE_TYPE r;
38606553 11677 long l[4];
26e5b205
JH
11678
11679 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9953b5e1 11680 real_to_target (l, &r, mode);
38606553 11681
26e5b205
JH
11682 /* Do not use shift by 32 to avoid warning on 32bit systems. */
11683 if (HOST_BITS_PER_WIDE_INT >= 64)
523fbd9d 11684 parts[0]
d8bf17f9 11685 = gen_int_mode
44cf5b6a 11686 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
b531087a 11687 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
d8bf17f9 11688 DImode);
26e5b205
JH
11689 else
11690 parts[0] = immed_double_const (l[0], l[1], DImode);
38606553 11691
f8a1ebc6
JH
11692 if (upper_mode == SImode)
11693 parts[1] = gen_int_mode (l[2], SImode);
11694 else if (HOST_BITS_PER_WIDE_INT >= 64)
11695 parts[1]
11696 = gen_int_mode
11697 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
11698 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
11699 DImode);
11700 else
11701 parts[1] = immed_double_const (l[2], l[3], DImode);
26e5b205
JH
11702 }
11703 else
d0396b79 11704 gcc_unreachable ();
26e5b205
JH
11705 }
11706 }
2450a057 11707
2b589241 11708 return size;
2450a057
JH
11709}
11710
11711/* Emit insns to perform a move or push of DI, DF, and XF values.
11712 Return false when normal moves are needed; true when all required
11713 insns have been emitted. Operands 2-4 contain the input values
11714 int the correct order; operands 5-7 contain the output values. */
11715
26e5b205 11716void
b96a374d 11717ix86_split_long_move (rtx operands[])
2450a057
JH
11718{
11719 rtx part[2][3];
26e5b205 11720 int nparts;
2450a057
JH
11721 int push = 0;
11722 int collisions = 0;
26e5b205
JH
11723 enum machine_mode mode = GET_MODE (operands[0]);
11724
11725 /* The DFmode expanders may ask us to move double.
11726 For 64bit target this is single move. By hiding the fact
11727 here we simplify i386.md splitters. */
11728 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
11729 {
8cdfa312
RH
11730 /* Optimize constant pool reference to immediates. This is used by
11731 fp moves, that force all constants to memory to allow combining. */
26e5b205
JH
11732
11733 if (GET_CODE (operands[1]) == MEM
11734 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
11735 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
11736 operands[1] = get_pool_constant (XEXP (operands[1], 0));
11737 if (push_operand (operands[0], VOIDmode))
b47b4f21
JH
11738 {
11739 operands[0] = copy_rtx (operands[0]);
11740 PUT_MODE (operands[0], Pmode);
11741 }
26e5b205
JH
11742 else
11743 operands[0] = gen_lowpart (DImode, operands[0]);
11744 operands[1] = gen_lowpart (DImode, operands[1]);
11745 emit_move_insn (operands[0], operands[1]);
11746 return;
11747 }
2450a057 11748
2450a057
JH
11749 /* The only non-offsettable memory we handle is push. */
11750 if (push_operand (operands[0], VOIDmode))
11751 push = 1;
d0396b79
NS
11752 else
11753 gcc_assert (GET_CODE (operands[0]) != MEM
11754 || offsettable_memref_p (operands[0]));
2450a057 11755
26e5b205
JH
11756 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
11757 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
2450a057
JH
11758
11759 /* When emitting push, take care for source operands on the stack. */
11760 if (push && GET_CODE (operands[1]) == MEM
11761 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
11762 {
26e5b205 11763 if (nparts == 3)
886cbb88
JH
11764 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
11765 XEXP (part[1][2], 0));
11766 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
11767 XEXP (part[1][1], 0));
2450a057
JH
11768 }
11769
0f290768 11770 /* We need to do copy in the right order in case an address register
2450a057
JH
11771 of the source overlaps the destination. */
11772 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
11773 {
11774 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
11775 collisions++;
11776 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
11777 collisions++;
26e5b205 11778 if (nparts == 3
2450a057
JH
11779 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
11780 collisions++;
11781
11782 /* Collision in the middle part can be handled by reordering. */
26e5b205 11783 if (collisions == 1 && nparts == 3
2450a057 11784 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
e075ae69 11785 {
2450a057
JH
11786 rtx tmp;
11787 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
11788 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
11789 }
e075ae69 11790
2450a057
JH
11791 /* If there are more collisions, we can't handle it by reordering.
11792 Do an lea to the last part and use only one colliding move. */
11793 else if (collisions > 1)
11794 {
8231b3f9
RH
11795 rtx base;
11796
2450a057 11797 collisions = 1;
8231b3f9
RH
11798
11799 base = part[0][nparts - 1];
11800
11801 /* Handle the case when the last part isn't valid for lea.
11802 Happens in 64-bit mode storing the 12-byte XFmode. */
11803 if (GET_MODE (base) != Pmode)
11804 base = gen_rtx_REG (Pmode, REGNO (base));
11805
11806 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
11807 part[1][0] = replace_equiv_address (part[1][0], base);
11808 part[1][1] = replace_equiv_address (part[1][1],
11809 plus_constant (base, UNITS_PER_WORD));
26e5b205 11810 if (nparts == 3)
8231b3f9
RH
11811 part[1][2] = replace_equiv_address (part[1][2],
11812 plus_constant (base, 8));
2450a057
JH
11813 }
11814 }
11815
11816 if (push)
11817 {
26e5b205 11818 if (!TARGET_64BIT)
2b589241 11819 {
26e5b205
JH
11820 if (nparts == 3)
11821 {
f8a1ebc6
JH
11822 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
11823 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
26e5b205
JH
11824 emit_move_insn (part[0][2], part[1][2]);
11825 }
2b589241 11826 }
26e5b205
JH
11827 else
11828 {
11829 /* In 64bit mode we don't have 32bit push available. In case this is
11830 register, it is OK - we will just use larger counterpart. We also
11831 retype memory - these comes from attempt to avoid REX prefix on
11832 moving of second half of TFmode value. */
11833 if (GET_MODE (part[1][1]) == SImode)
11834 {
d0396b79
NS
11835 switch (GET_CODE (part[1][1]))
11836 {
11837 case MEM:
11838 part[1][1] = adjust_address (part[1][1], DImode, 0);
11839 break;
11840
11841 case REG:
11842 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
11843 break;
11844
11845 default:
11846 gcc_unreachable ();
11847 }
11848
886cbb88
JH
11849 if (GET_MODE (part[1][0]) == SImode)
11850 part[1][0] = part[1][1];
26e5b205
JH
11851 }
11852 }
11853 emit_move_insn (part[0][1], part[1][1]);
11854 emit_move_insn (part[0][0], part[1][0]);
11855 return;
2450a057
JH
11856 }
11857
11858 /* Choose correct order to not overwrite the source before it is copied. */
11859 if ((REG_P (part[0][0])
11860 && REG_P (part[1][1])
11861 && (REGNO (part[0][0]) == REGNO (part[1][1])
26e5b205 11862 || (nparts == 3
2450a057
JH
11863 && REGNO (part[0][0]) == REGNO (part[1][2]))))
11864 || (collisions > 0
11865 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
11866 {
26e5b205 11867 if (nparts == 3)
2450a057 11868 {
26e5b205
JH
11869 operands[2] = part[0][2];
11870 operands[3] = part[0][1];
11871 operands[4] = part[0][0];
11872 operands[5] = part[1][2];
11873 operands[6] = part[1][1];
11874 operands[7] = part[1][0];
2450a057
JH
11875 }
11876 else
11877 {
26e5b205
JH
11878 operands[2] = part[0][1];
11879 operands[3] = part[0][0];
11880 operands[5] = part[1][1];
11881 operands[6] = part[1][0];
2450a057
JH
11882 }
11883 }
11884 else
11885 {
26e5b205 11886 if (nparts == 3)
2450a057 11887 {
26e5b205
JH
11888 operands[2] = part[0][0];
11889 operands[3] = part[0][1];
11890 operands[4] = part[0][2];
11891 operands[5] = part[1][0];
11892 operands[6] = part[1][1];
11893 operands[7] = part[1][2];
2450a057
JH
11894 }
11895 else
11896 {
26e5b205
JH
11897 operands[2] = part[0][0];
11898 operands[3] = part[0][1];
11899 operands[5] = part[1][0];
11900 operands[6] = part[1][1];
e075ae69
RH
11901 }
11902 }
903a5059 11903
0e40b5f2 11904 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
903a5059
RS
11905 if (optimize_size)
11906 {
11907 if (GET_CODE (operands[5]) == CONST_INT
11908 && operands[5] != const0_rtx
11909 && REG_P (operands[2]))
11910 {
11911 if (GET_CODE (operands[6]) == CONST_INT
11912 && INTVAL (operands[6]) == INTVAL (operands[5]))
11913 operands[6] = operands[2];
11914
11915 if (nparts == 3
11916 && GET_CODE (operands[7]) == CONST_INT
11917 && INTVAL (operands[7]) == INTVAL (operands[5]))
11918 operands[7] = operands[2];
11919 }
11920
11921 if (nparts == 3
11922 && GET_CODE (operands[6]) == CONST_INT
11923 && operands[6] != const0_rtx
11924 && REG_P (operands[3])
11925 && GET_CODE (operands[7]) == CONST_INT
11926 && INTVAL (operands[7]) == INTVAL (operands[6]))
11927 operands[7] = operands[3];
11928 }
11929
26e5b205
JH
11930 emit_move_insn (operands[2], operands[5]);
11931 emit_move_insn (operands[3], operands[6]);
11932 if (nparts == 3)
11933 emit_move_insn (operands[4], operands[7]);
32b5b1aa 11934
26e5b205 11935 return;
32b5b1aa 11936}
32b5b1aa 11937
28356f52 11938/* Helper function of ix86_split_ashl used to generate an SImode/DImode
1b83d209
RS
11939 left shift by a constant, either using a single shift or
11940 a sequence of add instructions. */
11941
11942static void
28356f52 11943ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
1b83d209
RS
11944{
11945 if (count == 1)
28356f52
JB
11946 {
11947 emit_insn ((mode == DImode
11948 ? gen_addsi3
11949 : gen_adddi3) (operand, operand, operand));
11950 }
1b83d209
RS
11951 else if (!optimize_size
11952 && count * ix86_cost->add <= ix86_cost->shift_const)
11953 {
11954 int i;
11955 for (i=0; i<count; i++)
28356f52
JB
11956 {
11957 emit_insn ((mode == DImode
11958 ? gen_addsi3
11959 : gen_adddi3) (operand, operand, operand));
11960 }
1b83d209
RS
11961 }
11962 else
28356f52
JB
11963 emit_insn ((mode == DImode
11964 ? gen_ashlsi3
11965 : gen_ashldi3) (operand, operand, GEN_INT (count)));
1b83d209
RS
11966}
11967
e075ae69 11968void
28356f52 11969ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
32b5b1aa 11970{
e075ae69
RH
11971 rtx low[2], high[2];
11972 int count;
28356f52 11973 const int single_width = mode == DImode ? 32 : 64;
b985a30f 11974
e075ae69
RH
11975 if (GET_CODE (operands[2]) == CONST_INT)
11976 {
28356f52
JB
11977 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
11978 count = INTVAL (operands[2]) & (single_width * 2 - 1);
32b5b1aa 11979
28356f52 11980 if (count >= single_width)
e075ae69
RH
11981 {
11982 emit_move_insn (high[0], low[1]);
11983 emit_move_insn (low[0], const0_rtx);
b985a30f 11984
28356f52
JB
11985 if (count > single_width)
11986 ix86_expand_ashl_const (high[0], count - single_width, mode);
e075ae69
RH
11987 }
11988 else
11989 {
11990 if (!rtx_equal_p (operands[0], operands[1]))
11991 emit_move_insn (operands[0], operands[1]);
28356f52
JB
11992 emit_insn ((mode == DImode
11993 ? gen_x86_shld_1
11994 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
11995 ix86_expand_ashl_const (low[0], count, mode);
e075ae69 11996 }
93330ea1 11997 return;
e075ae69 11998 }
93330ea1 11999
28356f52 12000 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
93330ea1
RH
12001
12002 if (operands[1] == const1_rtx)
e075ae69 12003 {
28356f52
JB
12004 /* Assuming we've chosen a QImode capable registers, then 1 << N
12005 can be done with two 32/64-bit shifts, no branches, no cmoves. */
93330ea1
RH
12006 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
12007 {
12008 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
b985a30f 12009
93330ea1
RH
12010 ix86_expand_clear (low[0]);
12011 ix86_expand_clear (high[0]);
28356f52 12012 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
93330ea1
RH
12013
12014 d = gen_lowpart (QImode, low[0]);
12015 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12016 s = gen_rtx_EQ (QImode, flags, const0_rtx);
12017 emit_insn (gen_rtx_SET (VOIDmode, d, s));
b985a30f 12018
93330ea1
RH
12019 d = gen_lowpart (QImode, high[0]);
12020 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12021 s = gen_rtx_NE (QImode, flags, const0_rtx);
12022 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12023 }
32b5b1aa 12024
93330ea1 12025 /* Otherwise, we can get the same results by manually performing
28356f52 12026 a bit extract operation on bit 5/6, and then performing the two
93330ea1
RH
12027 shifts. The two methods of getting 0/1 into low/high are exactly
12028 the same size. Avoiding the shift in the bit extract case helps
12029 pentium4 a bit; no one else seems to care much either way. */
12030 else
e075ae69 12031 {
93330ea1
RH
12032 rtx x;
12033
12034 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
28356f52 12035 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
e075ae69 12036 else
28356f52 12037 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
93330ea1 12038 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
e075ae69 12039
28356f52
JB
12040 emit_insn ((mode == DImode
12041 ? gen_lshrsi3
12042 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
12043 emit_insn ((mode == DImode
12044 ? gen_andsi3
12045 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
93330ea1 12046 emit_move_insn (low[0], high[0]);
28356f52
JB
12047 emit_insn ((mode == DImode
12048 ? gen_xorsi3
12049 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
e075ae69 12050 }
93330ea1 12051
28356f52
JB
12052 emit_insn ((mode == DImode
12053 ? gen_ashlsi3
12054 : gen_ashldi3) (low[0], low[0], operands[2]));
12055 emit_insn ((mode == DImode
12056 ? gen_ashlsi3
12057 : gen_ashldi3) (high[0], high[0], operands[2]));
93330ea1
RH
12058 return;
12059 }
12060
12061 if (operands[1] == constm1_rtx)
12062 {
28356f52
JB
12063 /* For -1 << N, we can avoid the shld instruction, because we
12064 know that we're shifting 0...31/63 ones into a -1. */
93330ea1
RH
12065 emit_move_insn (low[0], constm1_rtx);
12066 if (optimize_size)
28356f52 12067 emit_move_insn (high[0], low[0]);
e075ae69 12068 else
93330ea1 12069 emit_move_insn (high[0], constm1_rtx);
e075ae69 12070 }
93330ea1
RH
12071 else
12072 {
12073 if (!rtx_equal_p (operands[0], operands[1]))
12074 emit_move_insn (operands[0], operands[1]);
12075
28356f52
JB
12076 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12077 emit_insn ((mode == DImode
12078 ? gen_x86_shld_1
12079 : gen_x86_64_shld) (high[0], low[0], operands[2]));
93330ea1
RH
12080 }
12081
28356f52 12082 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
93330ea1
RH
12083
12084 if (TARGET_CMOVE && scratch)
12085 {
12086 ix86_expand_clear (scratch);
28356f52
JB
12087 emit_insn ((mode == DImode
12088 ? gen_x86_shift_adj_1
12089 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
93330ea1
RH
12090 }
12091 else
12092 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
e9a25f70 12093}
32b5b1aa 12094
e075ae69 12095void
28356f52 12096ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
32b5b1aa 12097{
e075ae69
RH
12098 rtx low[2], high[2];
12099 int count;
28356f52 12100 const int single_width = mode == DImode ? 32 : 64;
32b5b1aa 12101
e075ae69
RH
12102 if (GET_CODE (operands[2]) == CONST_INT)
12103 {
28356f52
JB
12104 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12105 count = INTVAL (operands[2]) & (single_width * 2 - 1);
32b5b1aa 12106
28356f52 12107 if (count == single_width * 2 - 1)
8937b6a2
RS
12108 {
12109 emit_move_insn (high[0], high[1]);
28356f52
JB
12110 emit_insn ((mode == DImode
12111 ? gen_ashrsi3
12112 : gen_ashrdi3) (high[0], high[0],
12113 GEN_INT (single_width - 1)));
8937b6a2
RS
12114 emit_move_insn (low[0], high[0]);
12115
12116 }
28356f52 12117 else if (count >= single_width)
e075ae69
RH
12118 {
12119 emit_move_insn (low[0], high[1]);
93330ea1 12120 emit_move_insn (high[0], low[0]);
28356f52
JB
12121 emit_insn ((mode == DImode
12122 ? gen_ashrsi3
12123 : gen_ashrdi3) (high[0], high[0],
12124 GEN_INT (single_width - 1)));
12125 if (count > single_width)
12126 emit_insn ((mode == DImode
12127 ? gen_ashrsi3
12128 : gen_ashrdi3) (low[0], low[0],
12129 GEN_INT (count - single_width)));
e075ae69
RH
12130 }
12131 else
12132 {
12133 if (!rtx_equal_p (operands[0], operands[1]))
12134 emit_move_insn (operands[0], operands[1]);
28356f52
JB
12135 emit_insn ((mode == DImode
12136 ? gen_x86_shrd_1
12137 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12138 emit_insn ((mode == DImode
12139 ? gen_ashrsi3
12140 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
e075ae69
RH
12141 }
12142 }
12143 else
32b5b1aa 12144 {
e075ae69
RH
12145 if (!rtx_equal_p (operands[0], operands[1]))
12146 emit_move_insn (operands[0], operands[1]);
12147
28356f52 12148 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
e075ae69 12149
28356f52
JB
12150 emit_insn ((mode == DImode
12151 ? gen_x86_shrd_1
12152 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12153 emit_insn ((mode == DImode
12154 ? gen_ashrsi3
12155 : gen_ashrdi3) (high[0], high[0], operands[2]));
e075ae69 12156
93330ea1 12157 if (TARGET_CMOVE && scratch)
e075ae69 12158 {
e075ae69 12159 emit_move_insn (scratch, high[0]);
28356f52
JB
12160 emit_insn ((mode == DImode
12161 ? gen_ashrsi3
12162 : gen_ashrdi3) (scratch, scratch,
12163 GEN_INT (single_width - 1)));
12164 emit_insn ((mode == DImode
12165 ? gen_x86_shift_adj_1
12166 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12167 scratch));
e075ae69
RH
12168 }
12169 else
12170 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
32b5b1aa 12171 }
e075ae69 12172}
32b5b1aa 12173
e075ae69 12174void
28356f52 12175ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
e075ae69
RH
12176{
12177 rtx low[2], high[2];
12178 int count;
28356f52 12179 const int single_width = mode == DImode ? 32 : 64;
32b5b1aa 12180
e075ae69 12181 if (GET_CODE (operands[2]) == CONST_INT)
32b5b1aa 12182 {
28356f52
JB
12183 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12184 count = INTVAL (operands[2]) & (single_width * 2 - 1);
e075ae69 12185
28356f52 12186 if (count >= single_width)
c7271385 12187 {
e075ae69 12188 emit_move_insn (low[0], high[1]);
93330ea1 12189 ix86_expand_clear (high[0]);
32b5b1aa 12190
28356f52
JB
12191 if (count > single_width)
12192 emit_insn ((mode == DImode
12193 ? gen_lshrsi3
12194 : gen_lshrdi3) (low[0], low[0],
12195 GEN_INT (count - single_width)));
e075ae69
RH
12196 }
12197 else
12198 {
12199 if (!rtx_equal_p (operands[0], operands[1]))
12200 emit_move_insn (operands[0], operands[1]);
28356f52
JB
12201 emit_insn ((mode == DImode
12202 ? gen_x86_shrd_1
12203 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12204 emit_insn ((mode == DImode
12205 ? gen_lshrsi3
12206 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
e075ae69 12207 }
32b5b1aa 12208 }
e075ae69
RH
12209 else
12210 {
12211 if (!rtx_equal_p (operands[0], operands[1]))
12212 emit_move_insn (operands[0], operands[1]);
32b5b1aa 12213
28356f52 12214 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
e075ae69 12215
28356f52
JB
12216 emit_insn ((mode == DImode
12217 ? gen_x86_shrd_1
12218 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12219 emit_insn ((mode == DImode
12220 ? gen_lshrsi3
12221 : gen_lshrdi3) (high[0], high[0], operands[2]));
e075ae69
RH
12222
12223 /* Heh. By reversing the arguments, we can reuse this pattern. */
93330ea1 12224 if (TARGET_CMOVE && scratch)
e075ae69 12225 {
93330ea1 12226 ix86_expand_clear (scratch);
28356f52
JB
12227 emit_insn ((mode == DImode
12228 ? gen_x86_shift_adj_1
12229 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12230 scratch));
e075ae69
RH
12231 }
12232 else
12233 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
12234 }
32b5b1aa 12235}
3f803cd9 12236
0407c02b 12237/* Helper function for the string operations below. Dest VARIABLE whether
0945b39d
JH
12238 it is aligned to VALUE bytes. If true, jump to the label. */
12239static rtx
b96a374d 12240ix86_expand_aligntest (rtx variable, int value)
0945b39d
JH
12241{
12242 rtx label = gen_label_rtx ();
12243 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
12244 if (GET_MODE (variable) == DImode)
12245 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
12246 else
12247 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
12248 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
d43e0b7d 12249 1, label);
0945b39d
JH
12250 return label;
12251}
12252
12253/* Adjust COUNTER by the VALUE. */
12254static void
b96a374d 12255ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
0945b39d
JH
12256{
12257 if (GET_MODE (countreg) == DImode)
12258 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
12259 else
12260 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
12261}
12262
12263/* Zero extend possibly SImode EXP to Pmode register. */
d24b3457 12264rtx
b96a374d 12265ix86_zero_extend_to_Pmode (rtx exp)
0945b39d
JH
12266{
12267 rtx r;
12268 if (GET_MODE (exp) == VOIDmode)
12269 return force_reg (Pmode, exp);
12270 if (GET_MODE (exp) == Pmode)
12271 return copy_to_mode_reg (Pmode, exp);
12272 r = gen_reg_rtx (Pmode);
12273 emit_insn (gen_zero_extendsidi2 (r, exp));
12274 return r;
12275}
12276
12277/* Expand string move (memcpy) operation. Use i386 string operations when
70128ad9 12278 profitable. expand_clrmem contains similar code. */
0945b39d 12279int
70128ad9 12280ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
0945b39d 12281{
4e44c1ef 12282 rtx srcreg, destreg, countreg, srcexp, destexp;
0945b39d
JH
12283 enum machine_mode counter_mode;
12284 HOST_WIDE_INT align = 0;
12285 unsigned HOST_WIDE_INT count = 0;
0945b39d 12286
0945b39d
JH
12287 if (GET_CODE (align_exp) == CONST_INT)
12288 align = INTVAL (align_exp);
12289
d0a5295a
RH
12290 /* Can't use any of this if the user has appropriated esi or edi. */
12291 if (global_regs[4] || global_regs[5])
12292 return 0;
12293
5519a4f9 12294 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
12295 if (!TARGET_ALIGN_STRINGOPS)
12296 align = 64;
12297
12298 if (GET_CODE (count_exp) == CONST_INT)
26771da7
JH
12299 {
12300 count = INTVAL (count_exp);
12301 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12302 return 0;
12303 }
0945b39d
JH
12304
12305 /* Figure out proper mode for counter. For 32bits it is always SImode,
12306 for 64bits use SImode when possible, otherwise DImode.
12307 Set count to number of bytes copied when known at compile time. */
8fe75e43
RH
12308 if (!TARGET_64BIT
12309 || GET_MODE (count_exp) == SImode
12310 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
0945b39d
JH
12311 counter_mode = SImode;
12312 else
12313 counter_mode = DImode;
12314
d0396b79 12315 gcc_assert (counter_mode == SImode || counter_mode == DImode);
0945b39d
JH
12316
12317 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
4e44c1ef
JJ
12318 if (destreg != XEXP (dst, 0))
12319 dst = replace_equiv_address_nv (dst, destreg);
0945b39d 12320 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
4e44c1ef
JJ
12321 if (srcreg != XEXP (src, 0))
12322 src = replace_equiv_address_nv (src, srcreg);
0945b39d
JH
12323
12324 /* When optimizing for size emit simple rep ; movsb instruction for
95935e2d
DV
12325 counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
12326 sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
12327 Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
12328 count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
12329 but we don't know whether upper 24 (resp. 56) bits of %ecx will be
12330 known to be zero or not. The rep; movsb sequence causes higher
bb22512c 12331 register pressure though, so take that into account. */
95935e2d
DV
12332
12333 if ((!optimize || optimize_size)
12334 && (count == 0
12335 || ((count & 0x03)
12336 && (!optimize_size
12337 || count > 5 * 4
12338 || (count & 3) + count / 4 > 6))))
0945b39d 12339 {
4e44c1ef 12340 emit_insn (gen_cld ());
0945b39d 12341 countreg = ix86_zero_extend_to_Pmode (count_exp);
4e44c1ef
JJ
12342 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12343 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
12344 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
12345 destexp, srcexp));
0945b39d
JH
12346 }
12347
12348 /* For constant aligned (or small unaligned) copies use rep movsl
12349 followed by code copying the rest. For PentiumPro ensure 8 byte
12350 alignment to allow rep movsl acceleration. */
12351
12352 else if (count != 0
12353 && (align >= 8
12354 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 12355 || optimize_size || count < (unsigned int) 64))
0945b39d 12356 {
4e44c1ef 12357 unsigned HOST_WIDE_INT offset = 0;
0945b39d 12358 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
4e44c1ef
JJ
12359 rtx srcmem, dstmem;
12360
12361 emit_insn (gen_cld ());
0945b39d
JH
12362 if (count & ~(size - 1))
12363 {
95935e2d
DV
12364 if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4)
12365 {
12366 enum machine_mode movs_mode = size == 4 ? SImode : DImode;
4e44c1ef 12367
95935e2d
DV
12368 while (offset < (count & ~(size - 1)))
12369 {
12370 srcmem = adjust_automodify_address_nv (src, movs_mode,
12371 srcreg, offset);
12372 dstmem = adjust_automodify_address_nv (dst, movs_mode,
12373 destreg, offset);
12374 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12375 offset += size;
12376 }
12377 }
12378 else
12379 {
12380 countreg = GEN_INT ((count >> (size == 4 ? 2 : 3))
12381 & (TARGET_64BIT ? -1 : 0x3fffffff));
12382 countreg = copy_to_mode_reg (counter_mode, countreg);
12383 countreg = ix86_zero_extend_to_Pmode (countreg);
12384
12385 destexp = gen_rtx_ASHIFT (Pmode, countreg,
12386 GEN_INT (size == 4 ? 2 : 3));
12387 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12388 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12389
12390 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12391 countreg, destexp, srcexp));
12392 offset = count & ~(size - 1);
12393 }
0945b39d
JH
12394 }
12395 if (size == 8 && (count & 0x04))
4e44c1ef
JJ
12396 {
12397 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
12398 offset);
12399 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
12400 offset);
12401 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12402 offset += 4;
12403 }
0945b39d 12404 if (count & 0x02)
4e44c1ef
JJ
12405 {
12406 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
12407 offset);
12408 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
12409 offset);
12410 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12411 offset += 2;
12412 }
0945b39d 12413 if (count & 0x01)
4e44c1ef
JJ
12414 {
12415 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
12416 offset);
12417 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
12418 offset);
12419 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12420 }
0945b39d
JH
12421 }
12422 /* The generic code based on the glibc implementation:
12423 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
12424 allowing accelerated copying there)
12425 - copy the data using rep movsl
12426 - copy the rest. */
12427 else
12428 {
12429 rtx countreg2;
12430 rtx label = NULL;
4e44c1ef 12431 rtx srcmem, dstmem;
37ad04a5
JH
12432 int desired_alignment = (TARGET_PENTIUMPRO
12433 && (count == 0 || count >= (unsigned int) 260)
12434 ? 8 : UNITS_PER_WORD);
4e44c1ef
JJ
12435 /* Get rid of MEM_OFFSETs, they won't be accurate. */
12436 dst = change_address (dst, BLKmode, destreg);
12437 src = change_address (src, BLKmode, srcreg);
0945b39d
JH
12438
12439 /* In case we don't know anything about the alignment, default to
12440 library version, since it is usually equally fast and result in
b96a374d 12441 shorter code.
4977bab6
ZW
12442
12443 Also emit call when we know that the count is large and call overhead
12444 will not be important. */
12445 if (!TARGET_INLINE_ALL_STRINGOPS
12446 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
4e44c1ef 12447 return 0;
0945b39d
JH
12448
12449 if (TARGET_SINGLE_STRINGOP)
12450 emit_insn (gen_cld ());
12451
12452 countreg2 = gen_reg_rtx (Pmode);
12453 countreg = copy_to_mode_reg (counter_mode, count_exp);
12454
12455 /* We don't use loops to align destination and to copy parts smaller
12456 than 4 bytes, because gcc is able to optimize such code better (in
12457 the case the destination or the count really is aligned, gcc is often
12458 able to predict the branches) and also it is friendlier to the
a4f31c00 12459 hardware branch prediction.
0945b39d 12460
d1f87653 12461 Using loops is beneficial for generic case, because we can
0945b39d
JH
12462 handle small counts using the loops. Many CPUs (such as Athlon)
12463 have large REP prefix setup costs.
12464
4aae8a9a 12465 This is quite costly. Maybe we can revisit this decision later or
0945b39d
JH
12466 add some customizability to this code. */
12467
37ad04a5 12468 if (count == 0 && align < desired_alignment)
0945b39d
JH
12469 {
12470 label = gen_label_rtx ();
aaae0bb9 12471 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
d43e0b7d 12472 LEU, 0, counter_mode, 1, label);
0945b39d
JH
12473 }
12474 if (align <= 1)
12475 {
12476 rtx label = ix86_expand_aligntest (destreg, 1);
4e44c1ef
JJ
12477 srcmem = change_address (src, QImode, srcreg);
12478 dstmem = change_address (dst, QImode, destreg);
12479 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
12480 ix86_adjust_counter (countreg, 1);
12481 emit_label (label);
12482 LABEL_NUSES (label) = 1;
12483 }
12484 if (align <= 2)
12485 {
12486 rtx label = ix86_expand_aligntest (destreg, 2);
4e44c1ef
JJ
12487 srcmem = change_address (src, HImode, srcreg);
12488 dstmem = change_address (dst, HImode, destreg);
12489 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
12490 ix86_adjust_counter (countreg, 2);
12491 emit_label (label);
12492 LABEL_NUSES (label) = 1;
12493 }
37ad04a5 12494 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
12495 {
12496 rtx label = ix86_expand_aligntest (destreg, 4);
4e44c1ef
JJ
12497 srcmem = change_address (src, SImode, srcreg);
12498 dstmem = change_address (dst, SImode, destreg);
12499 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
12500 ix86_adjust_counter (countreg, 4);
12501 emit_label (label);
12502 LABEL_NUSES (label) = 1;
12503 }
12504
37ad04a5
JH
12505 if (label && desired_alignment > 4 && !TARGET_64BIT)
12506 {
12507 emit_label (label);
12508 LABEL_NUSES (label) = 1;
12509 label = NULL_RTX;
12510 }
0945b39d
JH
12511 if (!TARGET_SINGLE_STRINGOP)
12512 emit_insn (gen_cld ());
12513 if (TARGET_64BIT)
12514 {
12515 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
12516 GEN_INT (3)));
4e44c1ef 12517 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
0945b39d
JH
12518 }
12519 else
12520 {
4e44c1ef
JJ
12521 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
12522 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
0945b39d 12523 }
4e44c1ef
JJ
12524 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12525 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12526 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12527 countreg2, destexp, srcexp));
0945b39d
JH
12528
12529 if (label)
12530 {
12531 emit_label (label);
12532 LABEL_NUSES (label) = 1;
12533 }
12534 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
4e44c1ef
JJ
12535 {
12536 srcmem = change_address (src, SImode, srcreg);
12537 dstmem = change_address (dst, SImode, destreg);
12538 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12539 }
0945b39d
JH
12540 if ((align <= 4 || count == 0) && TARGET_64BIT)
12541 {
12542 rtx label = ix86_expand_aligntest (countreg, 4);
4e44c1ef
JJ
12543 srcmem = change_address (src, SImode, srcreg);
12544 dstmem = change_address (dst, SImode, destreg);
12545 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
12546 emit_label (label);
12547 LABEL_NUSES (label) = 1;
12548 }
12549 if (align > 2 && count != 0 && (count & 2))
4e44c1ef
JJ
12550 {
12551 srcmem = change_address (src, HImode, srcreg);
12552 dstmem = change_address (dst, HImode, destreg);
12553 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12554 }
0945b39d
JH
12555 if (align <= 2 || count == 0)
12556 {
12557 rtx label = ix86_expand_aligntest (countreg, 2);
4e44c1ef
JJ
12558 srcmem = change_address (src, HImode, srcreg);
12559 dstmem = change_address (dst, HImode, destreg);
12560 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
12561 emit_label (label);
12562 LABEL_NUSES (label) = 1;
12563 }
12564 if (align > 1 && count != 0 && (count & 1))
4e44c1ef
JJ
12565 {
12566 srcmem = change_address (src, QImode, srcreg);
12567 dstmem = change_address (dst, QImode, destreg);
12568 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12569 }
0945b39d
JH
12570 if (align <= 1 || count == 0)
12571 {
12572 rtx label = ix86_expand_aligntest (countreg, 1);
4e44c1ef
JJ
12573 srcmem = change_address (src, QImode, srcreg);
12574 dstmem = change_address (dst, QImode, destreg);
12575 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
12576 emit_label (label);
12577 LABEL_NUSES (label) = 1;
12578 }
12579 }
12580
0945b39d
JH
12581 return 1;
12582}
12583
12584/* Expand string clear operation (bzero). Use i386 string operations when
70128ad9 12585 profitable. expand_movmem contains similar code. */
0945b39d 12586int
70128ad9 12587ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
0945b39d 12588{
4e44c1ef 12589 rtx destreg, zeroreg, countreg, destexp;
0945b39d
JH
12590 enum machine_mode counter_mode;
12591 HOST_WIDE_INT align = 0;
12592 unsigned HOST_WIDE_INT count = 0;
12593
12594 if (GET_CODE (align_exp) == CONST_INT)
12595 align = INTVAL (align_exp);
12596
d0a5295a
RH
12597 /* Can't use any of this if the user has appropriated esi. */
12598 if (global_regs[4])
12599 return 0;
12600
5519a4f9 12601 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
12602 if (!TARGET_ALIGN_STRINGOPS)
12603 align = 32;
12604
12605 if (GET_CODE (count_exp) == CONST_INT)
26771da7
JH
12606 {
12607 count = INTVAL (count_exp);
12608 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12609 return 0;
12610 }
0945b39d
JH
12611 /* Figure out proper mode for counter. For 32bits it is always SImode,
12612 for 64bits use SImode when possible, otherwise DImode.
12613 Set count to number of bytes copied when known at compile time. */
8fe75e43
RH
12614 if (!TARGET_64BIT
12615 || GET_MODE (count_exp) == SImode
12616 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
0945b39d
JH
12617 counter_mode = SImode;
12618 else
12619 counter_mode = DImode;
12620
4e44c1ef
JJ
12621 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12622 if (destreg != XEXP (dst, 0))
12623 dst = replace_equiv_address_nv (dst, destreg);
0945b39d 12624
0945b39d
JH
12625
12626 /* When optimizing for size emit simple rep ; movsb instruction for
6b32b628
JJ
12627 counts not divisible by 4. The movl $N, %ecx; rep; stosb
12628 sequence is 7 bytes long, so if optimizing for size and count is
12629 small enough that some stosl, stosw and stosb instructions without
12630 rep are shorter, fall back into the next if. */
0945b39d 12631
6b32b628
JJ
12632 if ((!optimize || optimize_size)
12633 && (count == 0
12634 || ((count & 0x03)
12635 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
0945b39d 12636 {
6b32b628
JJ
12637 emit_insn (gen_cld ());
12638
0945b39d
JH
12639 countreg = ix86_zero_extend_to_Pmode (count_exp);
12640 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
4e44c1ef
JJ
12641 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12642 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
0945b39d
JH
12643 }
12644 else if (count != 0
12645 && (align >= 8
12646 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 12647 || optimize_size || count < (unsigned int) 64))
0945b39d
JH
12648 {
12649 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
4e44c1ef
JJ
12650 unsigned HOST_WIDE_INT offset = 0;
12651
6b32b628
JJ
12652 emit_insn (gen_cld ());
12653
0945b39d
JH
12654 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
12655 if (count & ~(size - 1))
12656 {
6b32b628
JJ
12657 unsigned HOST_WIDE_INT repcount;
12658 unsigned int max_nonrep;
12659
12660 repcount = count >> (size == 4 ? 2 : 3);
12661 if (!TARGET_64BIT)
12662 repcount &= 0x3fffffff;
12663
12664 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
12665 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
12666 bytes. In both cases the latter seems to be faster for small
12667 values of N. */
12668 max_nonrep = size == 4 ? 7 : 4;
12669 if (!optimize_size)
12670 switch (ix86_tune)
12671 {
12672 case PROCESSOR_PENTIUM4:
12673 case PROCESSOR_NOCONA:
12674 max_nonrep = 3;
12675 break;
12676 default:
12677 break;
12678 }
12679
12680 if (repcount <= max_nonrep)
12681 while (repcount-- > 0)
12682 {
12683 rtx mem = adjust_automodify_address_nv (dst,
12684 GET_MODE (zeroreg),
12685 destreg, offset);
0737fbff 12686 emit_insn (gen_strset (destreg, mem, zeroreg));
6b32b628
JJ
12687 offset += size;
12688 }
12689 else
12690 {
12691 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
12692 countreg = ix86_zero_extend_to_Pmode (countreg);
12693 destexp = gen_rtx_ASHIFT (Pmode, countreg,
12694 GEN_INT (size == 4 ? 2 : 3));
12695 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12696 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
12697 destexp));
12698 offset = count & ~(size - 1);
12699 }
0945b39d
JH
12700 }
12701 if (size == 8 && (count & 0x04))
4e44c1ef
JJ
12702 {
12703 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
12704 offset);
12705 emit_insn (gen_strset (destreg, mem,
0945b39d 12706 gen_rtx_SUBREG (SImode, zeroreg, 0)));
4e44c1ef
JJ
12707 offset += 4;
12708 }
0945b39d 12709 if (count & 0x02)
4e44c1ef
JJ
12710 {
12711 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
12712 offset);
12713 emit_insn (gen_strset (destreg, mem,
0945b39d 12714 gen_rtx_SUBREG (HImode, zeroreg, 0)));
4e44c1ef
JJ
12715 offset += 2;
12716 }
0945b39d 12717 if (count & 0x01)
4e44c1ef
JJ
12718 {
12719 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
12720 offset);
12721 emit_insn (gen_strset (destreg, mem,
0945b39d 12722 gen_rtx_SUBREG (QImode, zeroreg, 0)));
4e44c1ef 12723 }
0945b39d
JH
12724 }
12725 else
12726 {
12727 rtx countreg2;
12728 rtx label = NULL;
37ad04a5
JH
12729 /* Compute desired alignment of the string operation. */
12730 int desired_alignment = (TARGET_PENTIUMPRO
12731 && (count == 0 || count >= (unsigned int) 260)
12732 ? 8 : UNITS_PER_WORD);
0945b39d
JH
12733
12734 /* In case we don't know anything about the alignment, default to
12735 library version, since it is usually equally fast and result in
4977bab6
ZW
12736 shorter code.
12737
12738 Also emit call when we know that the count is large and call overhead
12739 will not be important. */
12740 if (!TARGET_INLINE_ALL_STRINGOPS
12741 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
0945b39d
JH
12742 return 0;
12743
12744 if (TARGET_SINGLE_STRINGOP)
12745 emit_insn (gen_cld ());
12746
12747 countreg2 = gen_reg_rtx (Pmode);
12748 countreg = copy_to_mode_reg (counter_mode, count_exp);
12749 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
4e44c1ef
JJ
12750 /* Get rid of MEM_OFFSET, it won't be accurate. */
12751 dst = change_address (dst, BLKmode, destreg);
0945b39d 12752
37ad04a5 12753 if (count == 0 && align < desired_alignment)
0945b39d
JH
12754 {
12755 label = gen_label_rtx ();
37ad04a5 12756 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
d43e0b7d 12757 LEU, 0, counter_mode, 1, label);
0945b39d
JH
12758 }
12759 if (align <= 1)
12760 {
12761 rtx label = ix86_expand_aligntest (destreg, 1);
4e44c1ef
JJ
12762 emit_insn (gen_strset (destreg, dst,
12763 gen_rtx_SUBREG (QImode, zeroreg, 0)));
0945b39d
JH
12764 ix86_adjust_counter (countreg, 1);
12765 emit_label (label);
12766 LABEL_NUSES (label) = 1;
12767 }
12768 if (align <= 2)
12769 {
12770 rtx label = ix86_expand_aligntest (destreg, 2);
4e44c1ef
JJ
12771 emit_insn (gen_strset (destreg, dst,
12772 gen_rtx_SUBREG (HImode, zeroreg, 0)));
0945b39d
JH
12773 ix86_adjust_counter (countreg, 2);
12774 emit_label (label);
12775 LABEL_NUSES (label) = 1;
12776 }
37ad04a5 12777 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
12778 {
12779 rtx label = ix86_expand_aligntest (destreg, 4);
4e44c1ef
JJ
12780 emit_insn (gen_strset (destreg, dst,
12781 (TARGET_64BIT
12782 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
12783 : zeroreg)));
0945b39d
JH
12784 ix86_adjust_counter (countreg, 4);
12785 emit_label (label);
12786 LABEL_NUSES (label) = 1;
12787 }
12788
37ad04a5
JH
12789 if (label && desired_alignment > 4 && !TARGET_64BIT)
12790 {
12791 emit_label (label);
12792 LABEL_NUSES (label) = 1;
12793 label = NULL_RTX;
12794 }
12795
0945b39d
JH
12796 if (!TARGET_SINGLE_STRINGOP)
12797 emit_insn (gen_cld ());
12798 if (TARGET_64BIT)
12799 {
12800 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
12801 GEN_INT (3)));
4e44c1ef 12802 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
0945b39d
JH
12803 }
12804 else
12805 {
4e44c1ef
JJ
12806 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
12807 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
0945b39d 12808 }
4e44c1ef
JJ
12809 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12810 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
12811
0945b39d
JH
12812 if (label)
12813 {
12814 emit_label (label);
12815 LABEL_NUSES (label) = 1;
12816 }
37ad04a5 12817
0945b39d 12818 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
4e44c1ef
JJ
12819 emit_insn (gen_strset (destreg, dst,
12820 gen_rtx_SUBREG (SImode, zeroreg, 0)));
0945b39d
JH
12821 if (TARGET_64BIT && (align <= 4 || count == 0))
12822 {
79258dce 12823 rtx label = ix86_expand_aligntest (countreg, 4);
4e44c1ef
JJ
12824 emit_insn (gen_strset (destreg, dst,
12825 gen_rtx_SUBREG (SImode, zeroreg, 0)));
0945b39d
JH
12826 emit_label (label);
12827 LABEL_NUSES (label) = 1;
12828 }
12829 if (align > 2 && count != 0 && (count & 2))
4e44c1ef
JJ
12830 emit_insn (gen_strset (destreg, dst,
12831 gen_rtx_SUBREG (HImode, zeroreg, 0)));
0945b39d
JH
12832 if (align <= 2 || count == 0)
12833 {
74411039 12834 rtx label = ix86_expand_aligntest (countreg, 2);
4e44c1ef
JJ
12835 emit_insn (gen_strset (destreg, dst,
12836 gen_rtx_SUBREG (HImode, zeroreg, 0)));
0945b39d
JH
12837 emit_label (label);
12838 LABEL_NUSES (label) = 1;
12839 }
12840 if (align > 1 && count != 0 && (count & 1))
4e44c1ef
JJ
12841 emit_insn (gen_strset (destreg, dst,
12842 gen_rtx_SUBREG (QImode, zeroreg, 0)));
0945b39d
JH
12843 if (align <= 1 || count == 0)
12844 {
74411039 12845 rtx label = ix86_expand_aligntest (countreg, 1);
4e44c1ef
JJ
12846 emit_insn (gen_strset (destreg, dst,
12847 gen_rtx_SUBREG (QImode, zeroreg, 0)));
0945b39d
JH
12848 emit_label (label);
12849 LABEL_NUSES (label) = 1;
12850 }
12851 }
12852 return 1;
12853}
4e44c1ef 12854
0945b39d
JH
12855/* Expand strlen. */
12856int
b96a374d 12857ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
0945b39d
JH
12858{
12859 rtx addr, scratch1, scratch2, scratch3, scratch4;
12860
12861 /* The generic case of strlen expander is long. Avoid it's
12862 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
12863
12864 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
12865 && !TARGET_INLINE_ALL_STRINGOPS
12866 && !optimize_size
12867 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
12868 return 0;
12869
12870 addr = force_reg (Pmode, XEXP (src, 0));
12871 scratch1 = gen_reg_rtx (Pmode);
12872
12873 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
12874 && !optimize_size)
12875 {
12876 /* Well it seems that some optimizer does not combine a call like
12877 foo(strlen(bar), strlen(bar));
12878 when the move and the subtraction is done here. It does calculate
12879 the length just once when these instructions are done inside of
12880 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
12881 often used and I use one fewer register for the lifetime of
12882 output_strlen_unroll() this is better. */
12883
12884 emit_move_insn (out, addr);
12885
4e44c1ef 12886 ix86_expand_strlensi_unroll_1 (out, src, align);
0945b39d
JH
12887
12888 /* strlensi_unroll_1 returns the address of the zero at the end of
12889 the string, like memchr(), so compute the length by subtracting
12890 the start address. */
12891 if (TARGET_64BIT)
12892 emit_insn (gen_subdi3 (out, out, addr));
12893 else
12894 emit_insn (gen_subsi3 (out, out, addr));
12895 }
12896 else
12897 {
4e44c1ef 12898 rtx unspec;
0945b39d
JH
12899 scratch2 = gen_reg_rtx (Pmode);
12900 scratch3 = gen_reg_rtx (Pmode);
12901 scratch4 = force_reg (Pmode, constm1_rtx);
12902
12903 emit_move_insn (scratch3, addr);
12904 eoschar = force_reg (QImode, eoschar);
12905
12906 emit_insn (gen_cld ());
4e44c1ef
JJ
12907 src = replace_equiv_address_nv (src, scratch3);
12908
12909 /* If .md starts supporting :P, this can be done in .md. */
12910 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
12911 scratch4), UNSPEC_SCAS);
12912 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
0945b39d
JH
12913 if (TARGET_64BIT)
12914 {
0945b39d
JH
12915 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
12916 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
12917 }
12918 else
12919 {
0945b39d
JH
12920 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
12921 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
12922 }
12923 }
12924 return 1;
12925}
12926
e075ae69
RH
12927/* Expand the appropriate insns for doing strlen if not just doing
12928 repnz; scasb
12929
12930 out = result, initialized with the start address
12931 align_rtx = alignment of the address.
12932 scratch = scratch register, initialized with the startaddress when
77ebd435 12933 not aligned, otherwise undefined
3f803cd9 12934
39e3f58c 12935 This is just the body. It needs the initializations mentioned above and
3f803cd9
SC
12936 some address computing at the end. These things are done in i386.md. */
12937
0945b39d 12938static void
4e44c1ef 12939ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
3f803cd9 12940{
e075ae69
RH
12941 int align;
12942 rtx tmp;
12943 rtx align_2_label = NULL_RTX;
12944 rtx align_3_label = NULL_RTX;
12945 rtx align_4_label = gen_label_rtx ();
12946 rtx end_0_label = gen_label_rtx ();
e075ae69 12947 rtx mem;
e2e52e1b 12948 rtx tmpreg = gen_reg_rtx (SImode);
0945b39d 12949 rtx scratch = gen_reg_rtx (SImode);
e6e81735 12950 rtx cmp;
e075ae69
RH
12951
12952 align = 0;
12953 if (GET_CODE (align_rtx) == CONST_INT)
12954 align = INTVAL (align_rtx);
3f803cd9 12955
e9a25f70 12956 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
3f803cd9 12957
e9a25f70 12958 /* Is there a known alignment and is it less than 4? */
e075ae69 12959 if (align < 4)
3f803cd9 12960 {
0945b39d
JH
12961 rtx scratch1 = gen_reg_rtx (Pmode);
12962 emit_move_insn (scratch1, out);
e9a25f70 12963 /* Is there a known alignment and is it not 2? */
e075ae69 12964 if (align != 2)
3f803cd9 12965 {
e075ae69
RH
12966 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
12967 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
12968
12969 /* Leave just the 3 lower bits. */
0945b39d 12970 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
e075ae69
RH
12971 NULL_RTX, 0, OPTAB_WIDEN);
12972
9076b9c1 12973 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 12974 Pmode, 1, align_4_label);
60c81c89 12975 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
d43e0b7d 12976 Pmode, 1, align_2_label);
60c81c89 12977 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
d43e0b7d 12978 Pmode, 1, align_3_label);
3f803cd9
SC
12979 }
12980 else
12981 {
e9a25f70
JL
12982 /* Since the alignment is 2, we have to check 2 or 0 bytes;
12983 check if is aligned to 4 - byte. */
e9a25f70 12984
60c81c89 12985 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
e075ae69
RH
12986 NULL_RTX, 0, OPTAB_WIDEN);
12987
9076b9c1 12988 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 12989 Pmode, 1, align_4_label);
3f803cd9
SC
12990 }
12991
4e44c1ef 12992 mem = change_address (src, QImode, out);
e9a25f70 12993
e075ae69 12994 /* Now compare the bytes. */
e9a25f70 12995
0f290768 12996 /* Compare the first n unaligned byte on a byte per byte basis. */
9076b9c1 12997 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
d43e0b7d 12998 QImode, 1, end_0_label);
3f803cd9 12999
0f290768 13000 /* Increment the address. */
0945b39d
JH
13001 if (TARGET_64BIT)
13002 emit_insn (gen_adddi3 (out, out, const1_rtx));
13003 else
13004 emit_insn (gen_addsi3 (out, out, const1_rtx));
e9a25f70 13005
e075ae69
RH
13006 /* Not needed with an alignment of 2 */
13007 if (align != 2)
13008 {
13009 emit_label (align_2_label);
3f803cd9 13010
d43e0b7d
RK
13011 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13012 end_0_label);
e075ae69 13013
0945b39d
JH
13014 if (TARGET_64BIT)
13015 emit_insn (gen_adddi3 (out, out, const1_rtx));
13016 else
13017 emit_insn (gen_addsi3 (out, out, const1_rtx));
e075ae69
RH
13018
13019 emit_label (align_3_label);
13020 }
13021
d43e0b7d
RK
13022 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13023 end_0_label);
e075ae69 13024
0945b39d
JH
13025 if (TARGET_64BIT)
13026 emit_insn (gen_adddi3 (out, out, const1_rtx));
13027 else
13028 emit_insn (gen_addsi3 (out, out, const1_rtx));
3f803cd9
SC
13029 }
13030
e075ae69
RH
13031 /* Generate loop to check 4 bytes at a time. It is not a good idea to
13032 align this loop. It gives only huge programs, but does not help to
13033 speed up. */
13034 emit_label (align_4_label);
3f803cd9 13035
4e44c1ef 13036 mem = change_address (src, SImode, out);
e075ae69 13037 emit_move_insn (scratch, mem);
0945b39d
JH
13038 if (TARGET_64BIT)
13039 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
13040 else
13041 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
e075ae69 13042
e2e52e1b
JH
13043 /* This formula yields a nonzero result iff one of the bytes is zero.
13044 This saves three branches inside loop and many cycles. */
13045
13046 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
13047 emit_insn (gen_one_cmplsi2 (scratch, scratch));
13048 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
7471a1f0 13049 emit_insn (gen_andsi3 (tmpreg, tmpreg,
d8bf17f9 13050 gen_int_mode (0x80808080, SImode)));
d43e0b7d
RK
13051 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
13052 align_4_label);
e2e52e1b
JH
13053
13054 if (TARGET_CMOVE)
13055 {
13056 rtx reg = gen_reg_rtx (SImode);
0945b39d 13057 rtx reg2 = gen_reg_rtx (Pmode);
e2e52e1b
JH
13058 emit_move_insn (reg, tmpreg);
13059 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
13060
0f290768 13061 /* If zero is not in the first two bytes, move two bytes forward. */
16189740 13062 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
13063 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13064 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13065 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
13066 gen_rtx_IF_THEN_ELSE (SImode, tmp,
77ebd435
AJ
13067 reg,
13068 tmpreg)));
e2e52e1b 13069 /* Emit lea manually to avoid clobbering of flags. */
0945b39d 13070 emit_insn (gen_rtx_SET (SImode, reg2,
60c81c89 13071 gen_rtx_PLUS (Pmode, out, const2_rtx)));
e2e52e1b
JH
13072
13073 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13074 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13075 emit_insn (gen_rtx_SET (VOIDmode, out,
0945b39d 13076 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
a4f31c00
AJ
13077 reg2,
13078 out)));
e2e52e1b
JH
13079
13080 }
13081 else
13082 {
13083 rtx end_2_label = gen_label_rtx ();
13084 /* Is zero in the first two bytes? */
13085
16189740 13086 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
13087 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13088 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
13089 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13090 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
13091 pc_rtx);
13092 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
13093 JUMP_LABEL (tmp) = end_2_label;
13094
0f290768 13095 /* Not in the first two. Move two bytes forward. */
e2e52e1b 13096 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
0945b39d 13097 if (TARGET_64BIT)
60c81c89 13098 emit_insn (gen_adddi3 (out, out, const2_rtx));
0945b39d 13099 else
60c81c89 13100 emit_insn (gen_addsi3 (out, out, const2_rtx));
e2e52e1b
JH
13101
13102 emit_label (end_2_label);
13103
13104 }
13105
0f290768 13106 /* Avoid branch in fixing the byte. */
e2e52e1b 13107 tmpreg = gen_lowpart (QImode, tmpreg);
7e08e190 13108 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
e6e81735 13109 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
0945b39d 13110 if (TARGET_64BIT)
e6e81735 13111 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
0945b39d 13112 else
e6e81735 13113 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
e075ae69
RH
13114
13115 emit_label (end_0_label);
13116}
0e07aff3
RH
13117
13118void
0f901c4c
SH
13119ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
13120 rtx callarg2 ATTRIBUTE_UNUSED,
b96a374d 13121 rtx pop, int sibcall)
0e07aff3
RH
13122{
13123 rtx use = NULL, call;
13124
13125 if (pop == const0_rtx)
13126 pop = NULL;
d0396b79 13127 gcc_assert (!TARGET_64BIT || !pop);
0e07aff3 13128
b069de3b
SS
13129#if TARGET_MACHO
13130 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
13131 fnaddr = machopic_indirect_call_target (fnaddr);
13132#else
0e07aff3
RH
13133 /* Static functions and indirect calls don't need the pic register. */
13134 if (! TARGET_64BIT && flag_pic
13135 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
12969f45 13136 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
66edd3b4 13137 use_reg (&use, pic_offset_table_rtx);
0e07aff3
RH
13138
13139 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
13140 {
13141 rtx al = gen_rtx_REG (QImode, 0);
13142 emit_move_insn (al, callarg2);
13143 use_reg (&use, al);
13144 }
b069de3b 13145#endif /* TARGET_MACHO */
0e07aff3
RH
13146
13147 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
13148 {
13149 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13150 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13151 }
4977bab6
ZW
13152 if (sibcall && TARGET_64BIT
13153 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
13154 {
13155 rtx addr;
13156 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
b19ee4bd 13157 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
4977bab6
ZW
13158 emit_move_insn (fnaddr, addr);
13159 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13160 }
0e07aff3
RH
13161
13162 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
13163 if (retval)
13164 call = gen_rtx_SET (VOIDmode, retval, call);
13165 if (pop)
13166 {
13167 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
13168 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
13169 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
13170 }
13171
13172 call = emit_call_insn (call);
13173 if (use)
13174 CALL_INSN_FUNCTION_USAGE (call) = use;
13175}
fce5a9f2 13176
e075ae69 13177\f
e075ae69
RH
13178/* Clear stack slot assignments remembered from previous functions.
13179 This is called from INIT_EXPANDERS once before RTL is emitted for each
13180 function. */
13181
e2500fed 13182static struct machine_function *
b96a374d 13183ix86_init_machine_status (void)
37b15744 13184{
d7394366
JH
13185 struct machine_function *f;
13186
13187 f = ggc_alloc_cleared (sizeof (struct machine_function));
13188 f->use_fast_prologue_epilogue_nregs = -1;
5bf5a10b 13189 f->tls_descriptor_call_expanded_p = 0;
8330e2c6
AJ
13190
13191 return f;
1526a060
BS
13192}
13193
e075ae69
RH
13194/* Return a MEM corresponding to a stack slot with mode MODE.
13195 Allocate a new slot if necessary.
13196
13197 The RTL for a function can have several slots available: N is
13198 which slot to use. */
13199
13200rtx
ff680eb1 13201assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
e075ae69 13202{
ddb0ae00
ZW
13203 struct stack_local_entry *s;
13204
ff680eb1 13205 gcc_assert (n < MAX_386_STACK_LOCALS);
e075ae69 13206
ddb0ae00
ZW
13207 for (s = ix86_stack_locals; s; s = s->next)
13208 if (s->mode == mode && s->n == n)
13209 return s->rtl;
13210
13211 s = (struct stack_local_entry *)
13212 ggc_alloc (sizeof (struct stack_local_entry));
13213 s->n = n;
13214 s->mode = mode;
13215 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
e075ae69 13216
ddb0ae00
ZW
13217 s->next = ix86_stack_locals;
13218 ix86_stack_locals = s;
13219 return s->rtl;
e075ae69 13220}
f996902d
RH
13221
13222/* Construct the SYMBOL_REF for the tls_get_addr function. */
13223
e2500fed 13224static GTY(()) rtx ix86_tls_symbol;
f996902d 13225rtx
b96a374d 13226ix86_tls_get_addr (void)
f996902d 13227{
f996902d 13228
e2500fed 13229 if (!ix86_tls_symbol)
f996902d 13230 {
75d38379 13231 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
5bf5a10b
AO
13232 (TARGET_ANY_GNU_TLS
13233 && !TARGET_64BIT)
75d38379
JJ
13234 ? "___tls_get_addr"
13235 : "__tls_get_addr");
f996902d
RH
13236 }
13237
e2500fed 13238 return ix86_tls_symbol;
f996902d 13239}
5bf5a10b
AO
13240
13241/* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13242
13243static GTY(()) rtx ix86_tls_module_base_symbol;
13244rtx
13245ix86_tls_module_base (void)
13246{
13247
13248 if (!ix86_tls_module_base_symbol)
13249 {
13250 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
13251 "_TLS_MODULE_BASE_");
13252 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13253 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13254 }
13255
13256 return ix86_tls_module_base_symbol;
13257}
e075ae69
RH
13258\f
13259/* Calculate the length of the memory address in the instruction
13260 encoding. Does not include the one-byte modrm, opcode, or prefix. */
13261
8fe75e43 13262int
b96a374d 13263memory_address_length (rtx addr)
e075ae69
RH
13264{
13265 struct ix86_address parts;
13266 rtx base, index, disp;
13267 int len;
d0396b79 13268 int ok;
e075ae69
RH
13269
13270 if (GET_CODE (addr) == PRE_DEC
0d7d98ee
JH
13271 || GET_CODE (addr) == POST_INC
13272 || GET_CODE (addr) == PRE_MODIFY
13273 || GET_CODE (addr) == POST_MODIFY)
e075ae69 13274 return 0;
3f803cd9 13275
d0396b79
NS
13276 ok = ix86_decompose_address (addr, &parts);
13277 gcc_assert (ok);
3f803cd9 13278
7c93c2cc
PB
13279 if (parts.base && GET_CODE (parts.base) == SUBREG)
13280 parts.base = SUBREG_REG (parts.base);
13281 if (parts.index && GET_CODE (parts.index) == SUBREG)
13282 parts.index = SUBREG_REG (parts.index);
13283
e075ae69
RH
13284 base = parts.base;
13285 index = parts.index;
13286 disp = parts.disp;
13287 len = 0;
3f803cd9 13288
7b65ed54
EB
13289 /* Rule of thumb:
13290 - esp as the base always wants an index,
13291 - ebp as the base always wants a displacement. */
13292
e075ae69
RH
13293 /* Register Indirect. */
13294 if (base && !index && !disp)
13295 {
7b65ed54
EB
13296 /* esp (for its index) and ebp (for its displacement) need
13297 the two-byte modrm form. */
e075ae69
RH
13298 if (addr == stack_pointer_rtx
13299 || addr == arg_pointer_rtx
564d80f4
JH
13300 || addr == frame_pointer_rtx
13301 || addr == hard_frame_pointer_rtx)
e075ae69 13302 len = 1;
3f803cd9 13303 }
e9a25f70 13304
e075ae69
RH
13305 /* Direct Addressing. */
13306 else if (disp && !base && !index)
13307 len = 4;
13308
3f803cd9
SC
13309 else
13310 {
e075ae69
RH
13311 /* Find the length of the displacement constant. */
13312 if (disp)
13313 {
f38840db 13314 if (base && satisfies_constraint_K (disp))
e075ae69
RH
13315 len = 1;
13316 else
13317 len = 4;
13318 }
7b65ed54
EB
13319 /* ebp always wants a displacement. */
13320 else if (base == hard_frame_pointer_rtx)
13321 len = 1;
3f803cd9 13322
43f3a59d 13323 /* An index requires the two-byte modrm form.... */
7b65ed54
EB
13324 if (index
13325 /* ...like esp, which always wants an index. */
13326 || base == stack_pointer_rtx
13327 || base == arg_pointer_rtx
13328 || base == frame_pointer_rtx)
e075ae69 13329 len += 1;
3f803cd9
SC
13330 }
13331
e075ae69
RH
13332 return len;
13333}
79325812 13334
5bf0ebab
RH
13335/* Compute default value for "length_immediate" attribute. When SHORTFORM
13336 is set, expect that insn have 8bit immediate alternative. */
e075ae69 13337int
b96a374d 13338ix86_attr_length_immediate_default (rtx insn, int shortform)
e075ae69 13339{
6ef67412
JH
13340 int len = 0;
13341 int i;
6c698a6d 13342 extract_insn_cached (insn);
6ef67412
JH
13343 for (i = recog_data.n_operands - 1; i >= 0; --i)
13344 if (CONSTANT_P (recog_data.operand[i]))
3071fab5 13345 {
d0396b79 13346 gcc_assert (!len);
f38840db 13347 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
6ef67412
JH
13348 len = 1;
13349 else
13350 {
13351 switch (get_attr_mode (insn))
13352 {
13353 case MODE_QI:
13354 len+=1;
13355 break;
13356 case MODE_HI:
13357 len+=2;
13358 break;
13359 case MODE_SI:
13360 len+=4;
13361 break;
14f73b5a
JH
13362 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
13363 case MODE_DI:
13364 len+=4;
13365 break;
6ef67412 13366 default:
c725bd79 13367 fatal_insn ("unknown insn mode", insn);
6ef67412
JH
13368 }
13369 }
3071fab5 13370 }
6ef67412
JH
13371 return len;
13372}
13373/* Compute default value for "length_address" attribute. */
13374int
b96a374d 13375ix86_attr_length_address_default (rtx insn)
6ef67412
JH
13376{
13377 int i;
9b73c90a
EB
13378
13379 if (get_attr_type (insn) == TYPE_LEA)
13380 {
13381 rtx set = PATTERN (insn);
d0396b79
NS
13382
13383 if (GET_CODE (set) == PARALLEL)
9b73c90a 13384 set = XVECEXP (set, 0, 0);
d0396b79
NS
13385
13386 gcc_assert (GET_CODE (set) == SET);
9b73c90a
EB
13387
13388 return memory_address_length (SET_SRC (set));
13389 }
13390
6c698a6d 13391 extract_insn_cached (insn);
1ccbefce
RH
13392 for (i = recog_data.n_operands - 1; i >= 0; --i)
13393 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 13394 {
6ef67412 13395 return memory_address_length (XEXP (recog_data.operand[i], 0));
e075ae69
RH
13396 break;
13397 }
6ef67412 13398 return 0;
3f803cd9 13399}
e075ae69
RH
13400\f
13401/* Return the maximum number of instructions a cpu can issue. */
b657fc39 13402
c237e94a 13403static int
b96a374d 13404ix86_issue_rate (void)
b657fc39 13405{
9e555526 13406 switch (ix86_tune)
b657fc39 13407 {
e075ae69
RH
13408 case PROCESSOR_PENTIUM:
13409 case PROCESSOR_K6:
13410 return 2;
79325812 13411
e075ae69 13412 case PROCESSOR_PENTIUMPRO:
b4e89e2d
JH
13413 case PROCESSOR_PENTIUM4:
13414 case PROCESSOR_ATHLON:
4977bab6 13415 case PROCESSOR_K8:
89c43c0a 13416 case PROCESSOR_NOCONA:
d326eaf0
JH
13417 case PROCESSOR_GENERIC32:
13418 case PROCESSOR_GENERIC64:
e075ae69 13419 return 3;
b657fc39 13420
b657fc39 13421 default:
e075ae69 13422 return 1;
b657fc39 13423 }
b657fc39
L
13424}
13425
e075ae69
RH
13426/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
13427 by DEP_INSN and nothing set by DEP_INSN. */
b657fc39 13428
e075ae69 13429static int
b96a374d 13430ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
e075ae69
RH
13431{
13432 rtx set, set2;
b657fc39 13433
e075ae69
RH
13434 /* Simplify the test for uninteresting insns. */
13435 if (insn_type != TYPE_SETCC
13436 && insn_type != TYPE_ICMOV
13437 && insn_type != TYPE_FCMOV
13438 && insn_type != TYPE_IBR)
13439 return 0;
b657fc39 13440
e075ae69
RH
13441 if ((set = single_set (dep_insn)) != 0)
13442 {
13443 set = SET_DEST (set);
13444 set2 = NULL_RTX;
13445 }
13446 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
13447 && XVECLEN (PATTERN (dep_insn), 0) == 2
13448 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
13449 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
13450 {
13451 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13452 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13453 }
78a0d70c
ZW
13454 else
13455 return 0;
b657fc39 13456
78a0d70c
ZW
13457 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
13458 return 0;
b657fc39 13459
f5143c46 13460 /* This test is true if the dependent insn reads the flags but
78a0d70c
ZW
13461 not any other potentially set register. */
13462 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
13463 return 0;
13464
13465 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
13466 return 0;
13467
13468 return 1;
e075ae69 13469}
b657fc39 13470
e075ae69
RH
13471/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
13472 address with operands set by DEP_INSN. */
13473
13474static int
b96a374d 13475ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
e075ae69
RH
13476{
13477 rtx addr;
13478
6ad48e84
JH
13479 if (insn_type == TYPE_LEA
13480 && TARGET_PENTIUM)
5fbdde42
RH
13481 {
13482 addr = PATTERN (insn);
d0396b79
NS
13483
13484 if (GET_CODE (addr) == PARALLEL)
5fbdde42 13485 addr = XVECEXP (addr, 0, 0);
d0396b79
NS
13486
13487 gcc_assert (GET_CODE (addr) == SET);
13488
5fbdde42
RH
13489 addr = SET_SRC (addr);
13490 }
e075ae69
RH
13491 else
13492 {
13493 int i;
6c698a6d 13494 extract_insn_cached (insn);
1ccbefce
RH
13495 for (i = recog_data.n_operands - 1; i >= 0; --i)
13496 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 13497 {
1ccbefce 13498 addr = XEXP (recog_data.operand[i], 0);
e075ae69
RH
13499 goto found;
13500 }
13501 return 0;
13502 found:;
b657fc39
L
13503 }
13504
e075ae69 13505 return modified_in_p (addr, dep_insn);
b657fc39 13506}
a269a03c 13507
c237e94a 13508static int
b96a374d 13509ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
a269a03c 13510{
e075ae69 13511 enum attr_type insn_type, dep_insn_type;
8695f61e 13512 enum attr_memory memory;
e075ae69 13513 rtx set, set2;
9b00189f 13514 int dep_insn_code_number;
a269a03c 13515
d1f87653 13516 /* Anti and output dependencies have zero cost on all CPUs. */
e075ae69 13517 if (REG_NOTE_KIND (link) != 0)
309ada50 13518 return 0;
a269a03c 13519
9b00189f
JH
13520 dep_insn_code_number = recog_memoized (dep_insn);
13521
e075ae69 13522 /* If we can't recognize the insns, we can't really do anything. */
9b00189f 13523 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
e075ae69 13524 return cost;
a269a03c 13525
1c71e60e
JH
13526 insn_type = get_attr_type (insn);
13527 dep_insn_type = get_attr_type (dep_insn);
9b00189f 13528
9e555526 13529 switch (ix86_tune)
a269a03c
JC
13530 {
13531 case PROCESSOR_PENTIUM:
e075ae69
RH
13532 /* Address Generation Interlock adds a cycle of latency. */
13533 if (ix86_agi_dependant (insn, dep_insn, insn_type))
13534 cost += 1;
13535
13536 /* ??? Compares pair with jump/setcc. */
13537 if (ix86_flags_dependant (insn, dep_insn, insn_type))
13538 cost = 0;
13539
d1f87653 13540 /* Floating point stores require value to be ready one cycle earlier. */
0f290768 13541 if (insn_type == TYPE_FMOV
e075ae69
RH
13542 && get_attr_memory (insn) == MEMORY_STORE
13543 && !ix86_agi_dependant (insn, dep_insn, insn_type))
13544 cost += 1;
13545 break;
a269a03c 13546
e075ae69 13547 case PROCESSOR_PENTIUMPRO:
6ad48e84 13548 memory = get_attr_memory (insn);
e075ae69
RH
13549
13550 /* INT->FP conversion is expensive. */
13551 if (get_attr_fp_int_src (dep_insn))
13552 cost += 5;
13553
13554 /* There is one cycle extra latency between an FP op and a store. */
13555 if (insn_type == TYPE_FMOV
13556 && (set = single_set (dep_insn)) != NULL_RTX
13557 && (set2 = single_set (insn)) != NULL_RTX
13558 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
13559 && GET_CODE (SET_DEST (set2)) == MEM)
13560 cost += 1;
6ad48e84
JH
13561
13562 /* Show ability of reorder buffer to hide latency of load by executing
13563 in parallel with previous instruction in case
13564 previous instruction is not needed to compute the address. */
13565 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13566 && !ix86_agi_dependant (insn, dep_insn, insn_type))
b96a374d 13567 {
6ad48e84
JH
13568 /* Claim moves to take one cycle, as core can issue one load
13569 at time and the next load can start cycle later. */
13570 if (dep_insn_type == TYPE_IMOV
13571 || dep_insn_type == TYPE_FMOV)
13572 cost = 1;
13573 else if (cost > 1)
13574 cost--;
13575 }
e075ae69 13576 break;
a269a03c 13577
e075ae69 13578 case PROCESSOR_K6:
6ad48e84 13579 memory = get_attr_memory (insn);
8695f61e 13580
e075ae69
RH
13581 /* The esp dependency is resolved before the instruction is really
13582 finished. */
13583 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
13584 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
13585 return 1;
a269a03c 13586
e075ae69
RH
13587 /* INT->FP conversion is expensive. */
13588 if (get_attr_fp_int_src (dep_insn))
13589 cost += 5;
6ad48e84
JH
13590
13591 /* Show ability of reorder buffer to hide latency of load by executing
13592 in parallel with previous instruction in case
13593 previous instruction is not needed to compute the address. */
13594 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13595 && !ix86_agi_dependant (insn, dep_insn, insn_type))
b96a374d 13596 {
6ad48e84
JH
13597 /* Claim moves to take one cycle, as core can issue one load
13598 at time and the next load can start cycle later. */
13599 if (dep_insn_type == TYPE_IMOV
13600 || dep_insn_type == TYPE_FMOV)
13601 cost = 1;
13602 else if (cost > 2)
13603 cost -= 2;
13604 else
13605 cost = 1;
13606 }
a14003ee 13607 break;
e075ae69 13608
309ada50 13609 case PROCESSOR_ATHLON:
4977bab6 13610 case PROCESSOR_K8:
d326eaf0
JH
13611 case PROCESSOR_GENERIC32:
13612 case PROCESSOR_GENERIC64:
6ad48e84 13613 memory = get_attr_memory (insn);
6ad48e84 13614
6ad48e84
JH
13615 /* Show ability of reorder buffer to hide latency of load by executing
13616 in parallel with previous instruction in case
13617 previous instruction is not needed to compute the address. */
13618 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13619 && !ix86_agi_dependant (insn, dep_insn, insn_type))
b96a374d 13620 {
26f74aa3
JH
13621 enum attr_unit unit = get_attr_unit (insn);
13622 int loadcost = 3;
13623
13624 /* Because of the difference between the length of integer and
13625 floating unit pipeline preparation stages, the memory operands
b96a374d 13626 for floating point are cheaper.
26f74aa3 13627
c51e6d85 13628 ??? For Athlon it the difference is most probably 2. */
26f74aa3
JH
13629 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
13630 loadcost = 3;
13631 else
13632 loadcost = TARGET_ATHLON ? 2 : 0;
13633
13634 if (cost >= loadcost)
13635 cost -= loadcost;
6ad48e84
JH
13636 else
13637 cost = 0;
13638 }
309ada50 13639
a269a03c 13640 default:
a269a03c
JC
13641 break;
13642 }
13643
13644 return cost;
13645}
0a726ef1 13646
9b690711
RH
13647/* How many alternative schedules to try. This should be as wide as the
13648 scheduling freedom in the DFA, but no wider. Making this value too
13649 large results extra work for the scheduler. */
13650
13651static int
b96a374d 13652ia32_multipass_dfa_lookahead (void)
9b690711 13653{
9e555526 13654 if (ix86_tune == PROCESSOR_PENTIUM)
9b690711 13655 return 2;
56bab446 13656
8695f61e
SB
13657 if (ix86_tune == PROCESSOR_PENTIUMPRO
13658 || ix86_tune == PROCESSOR_K6)
56bab446
SB
13659 return 1;
13660
9b690711 13661 else
56bab446 13662 return 0;
9b690711
RH
13663}
13664
0e4970d7 13665\f
a7180f70
BS
13666/* Compute the alignment given to a constant that is being placed in memory.
13667 EXP is the constant and ALIGN is the alignment that the object would
13668 ordinarily have.
13669 The value of this function is used instead of that alignment to align
13670 the object. */
13671
13672int
b96a374d 13673ix86_constant_alignment (tree exp, int align)
a7180f70
BS
13674{
13675 if (TREE_CODE (exp) == REAL_CST)
13676 {
13677 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
13678 return 64;
13679 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
13680 return 128;
13681 }
4137ba7a
JJ
13682 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
13683 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
13684 return BITS_PER_WORD;
a7180f70
BS
13685
13686 return align;
13687}
13688
13689/* Compute the alignment for a static variable.
13690 TYPE is the data type, and ALIGN is the alignment that
13691 the object would ordinarily have. The value of this function is used
13692 instead of that alignment to align the object. */
13693
13694int
b96a374d 13695ix86_data_alignment (tree type, int align)
a7180f70 13696{
6c23a1f2
JB
13697 int max_align = optimize_size ? BITS_PER_WORD : 256;
13698
a7180f70 13699 if (AGGREGATE_TYPE_P (type)
6c23a1f2
JB
13700 && TYPE_SIZE (type)
13701 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13702 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
13703 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
13704 && align < max_align)
13705 align = max_align;
a7180f70 13706
0d7d98ee
JH
13707 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
13708 to 16byte boundary. */
13709 if (TARGET_64BIT)
13710 {
13711 if (AGGREGATE_TYPE_P (type)
13712 && TYPE_SIZE (type)
13713 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13714 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
13715 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
13716 return 128;
13717 }
13718
a7180f70
BS
13719 if (TREE_CODE (type) == ARRAY_TYPE)
13720 {
13721 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
13722 return 64;
13723 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
13724 return 128;
13725 }
13726 else if (TREE_CODE (type) == COMPLEX_TYPE)
13727 {
0f290768 13728
a7180f70
BS
13729 if (TYPE_MODE (type) == DCmode && align < 64)
13730 return 64;
13731 if (TYPE_MODE (type) == XCmode && align < 128)
13732 return 128;
13733 }
13734 else if ((TREE_CODE (type) == RECORD_TYPE
13735 || TREE_CODE (type) == UNION_TYPE
13736 || TREE_CODE (type) == QUAL_UNION_TYPE)
13737 && TYPE_FIELDS (type))
13738 {
13739 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
13740 return 64;
13741 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
13742 return 128;
13743 }
13744 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
13745 || TREE_CODE (type) == INTEGER_TYPE)
13746 {
13747 if (TYPE_MODE (type) == DFmode && align < 64)
13748 return 64;
13749 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
13750 return 128;
13751 }
13752
13753 return align;
13754}
13755
13756/* Compute the alignment for a local variable.
13757 TYPE is the data type, and ALIGN is the alignment that
13758 the object would ordinarily have. The value of this macro is used
13759 instead of that alignment to align the object. */
13760
13761int
b96a374d 13762ix86_local_alignment (tree type, int align)
a7180f70 13763{
0d7d98ee
JH
13764 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
13765 to 16byte boundary. */
13766 if (TARGET_64BIT)
13767 {
13768 if (AGGREGATE_TYPE_P (type)
13769 && TYPE_SIZE (type)
13770 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13771 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
13772 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
13773 return 128;
13774 }
a7180f70
BS
13775 if (TREE_CODE (type) == ARRAY_TYPE)
13776 {
13777 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
13778 return 64;
13779 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
13780 return 128;
13781 }
13782 else if (TREE_CODE (type) == COMPLEX_TYPE)
13783 {
13784 if (TYPE_MODE (type) == DCmode && align < 64)
13785 return 64;
13786 if (TYPE_MODE (type) == XCmode && align < 128)
13787 return 128;
13788 }
13789 else if ((TREE_CODE (type) == RECORD_TYPE
13790 || TREE_CODE (type) == UNION_TYPE
13791 || TREE_CODE (type) == QUAL_UNION_TYPE)
13792 && TYPE_FIELDS (type))
13793 {
13794 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
13795 return 64;
13796 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
13797 return 128;
13798 }
13799 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
13800 || TREE_CODE (type) == INTEGER_TYPE)
13801 {
0f290768 13802
a7180f70
BS
13803 if (TYPE_MODE (type) == DFmode && align < 64)
13804 return 64;
13805 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
13806 return 128;
13807 }
13808 return align;
13809}
0ed08620
JH
13810\f
13811/* Emit RTL insns to initialize the variable parts of a trampoline.
13812 FNADDR is an RTX for the address of the function's pure code.
13813 CXT is an RTX for the static chain value for the function. */
13814void
b96a374d 13815x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
0ed08620
JH
13816{
13817 if (!TARGET_64BIT)
13818 {
13819 /* Compute offset from the end of the jmp to the target function. */
13820 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
13821 plus_constant (tramp, 10),
13822 NULL_RTX, 1, OPTAB_DIRECT);
13823 emit_move_insn (gen_rtx_MEM (QImode, tramp),
d8bf17f9 13824 gen_int_mode (0xb9, QImode));
0ed08620
JH
13825 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
13826 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
d8bf17f9 13827 gen_int_mode (0xe9, QImode));
0ed08620
JH
13828 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
13829 }
13830 else
13831 {
13832 int offset = 0;
13833 /* Try to load address using shorter movl instead of movabs.
13834 We may want to support movq for kernel mode, but kernel does not use
13835 trampolines at the moment. */
8fe75e43 13836 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
0ed08620
JH
13837 {
13838 fnaddr = copy_to_mode_reg (DImode, fnaddr);
13839 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 13840 gen_int_mode (0xbb41, HImode));
0ed08620
JH
13841 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
13842 gen_lowpart (SImode, fnaddr));
13843 offset += 6;
13844 }
13845 else
13846 {
13847 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 13848 gen_int_mode (0xbb49, HImode));
0ed08620
JH
13849 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
13850 fnaddr);
13851 offset += 10;
13852 }
13853 /* Load static chain using movabs to r10. */
13854 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 13855 gen_int_mode (0xba49, HImode));
0ed08620
JH
13856 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
13857 cxt);
13858 offset += 10;
13859 /* Jump to the r11 */
13860 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 13861 gen_int_mode (0xff49, HImode));
0ed08620 13862 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
d8bf17f9 13863 gen_int_mode (0xe3, QImode));
0ed08620 13864 offset += 3;
d0396b79 13865 gcc_assert (offset <= TRAMPOLINE_SIZE);
0ed08620 13866 }
5791cc29 13867
e7a742ec 13868#ifdef ENABLE_EXECUTE_STACK
f84d109f 13869 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
5791cc29
JT
13870 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
13871#endif
0ed08620 13872}
eeb06b1b 13873\f
eb701deb
RH
13874/* Codes for all the SSE/MMX builtins. */
13875enum ix86_builtins
13876{
13877 IX86_BUILTIN_ADDPS,
13878 IX86_BUILTIN_ADDSS,
13879 IX86_BUILTIN_DIVPS,
13880 IX86_BUILTIN_DIVSS,
13881 IX86_BUILTIN_MULPS,
13882 IX86_BUILTIN_MULSS,
13883 IX86_BUILTIN_SUBPS,
13884 IX86_BUILTIN_SUBSS,
13885
13886 IX86_BUILTIN_CMPEQPS,
13887 IX86_BUILTIN_CMPLTPS,
13888 IX86_BUILTIN_CMPLEPS,
13889 IX86_BUILTIN_CMPGTPS,
13890 IX86_BUILTIN_CMPGEPS,
13891 IX86_BUILTIN_CMPNEQPS,
13892 IX86_BUILTIN_CMPNLTPS,
13893 IX86_BUILTIN_CMPNLEPS,
13894 IX86_BUILTIN_CMPNGTPS,
13895 IX86_BUILTIN_CMPNGEPS,
13896 IX86_BUILTIN_CMPORDPS,
13897 IX86_BUILTIN_CMPUNORDPS,
eb701deb
RH
13898 IX86_BUILTIN_CMPEQSS,
13899 IX86_BUILTIN_CMPLTSS,
13900 IX86_BUILTIN_CMPLESS,
13901 IX86_BUILTIN_CMPNEQSS,
13902 IX86_BUILTIN_CMPNLTSS,
13903 IX86_BUILTIN_CMPNLESS,
13904 IX86_BUILTIN_CMPNGTSS,
13905 IX86_BUILTIN_CMPNGESS,
13906 IX86_BUILTIN_CMPORDSS,
13907 IX86_BUILTIN_CMPUNORDSS,
eb701deb
RH
13908
13909 IX86_BUILTIN_COMIEQSS,
13910 IX86_BUILTIN_COMILTSS,
13911 IX86_BUILTIN_COMILESS,
13912 IX86_BUILTIN_COMIGTSS,
13913 IX86_BUILTIN_COMIGESS,
13914 IX86_BUILTIN_COMINEQSS,
13915 IX86_BUILTIN_UCOMIEQSS,
13916 IX86_BUILTIN_UCOMILTSS,
13917 IX86_BUILTIN_UCOMILESS,
13918 IX86_BUILTIN_UCOMIGTSS,
13919 IX86_BUILTIN_UCOMIGESS,
13920 IX86_BUILTIN_UCOMINEQSS,
13921
13922 IX86_BUILTIN_CVTPI2PS,
13923 IX86_BUILTIN_CVTPS2PI,
13924 IX86_BUILTIN_CVTSI2SS,
13925 IX86_BUILTIN_CVTSI642SS,
13926 IX86_BUILTIN_CVTSS2SI,
13927 IX86_BUILTIN_CVTSS2SI64,
13928 IX86_BUILTIN_CVTTPS2PI,
13929 IX86_BUILTIN_CVTTSS2SI,
13930 IX86_BUILTIN_CVTTSS2SI64,
13931
13932 IX86_BUILTIN_MAXPS,
13933 IX86_BUILTIN_MAXSS,
13934 IX86_BUILTIN_MINPS,
13935 IX86_BUILTIN_MINSS,
13936
13937 IX86_BUILTIN_LOADUPS,
13938 IX86_BUILTIN_STOREUPS,
13939 IX86_BUILTIN_MOVSS,
13940
13941 IX86_BUILTIN_MOVHLPS,
13942 IX86_BUILTIN_MOVLHPS,
13943 IX86_BUILTIN_LOADHPS,
13944 IX86_BUILTIN_LOADLPS,
13945 IX86_BUILTIN_STOREHPS,
13946 IX86_BUILTIN_STORELPS,
13947
13948 IX86_BUILTIN_MASKMOVQ,
13949 IX86_BUILTIN_MOVMSKPS,
13950 IX86_BUILTIN_PMOVMSKB,
13951
13952 IX86_BUILTIN_MOVNTPS,
13953 IX86_BUILTIN_MOVNTQ,
13954
13955 IX86_BUILTIN_LOADDQU,
13956 IX86_BUILTIN_STOREDQU,
eb701deb
RH
13957
13958 IX86_BUILTIN_PACKSSWB,
13959 IX86_BUILTIN_PACKSSDW,
13960 IX86_BUILTIN_PACKUSWB,
13961
13962 IX86_BUILTIN_PADDB,
13963 IX86_BUILTIN_PADDW,
13964 IX86_BUILTIN_PADDD,
13965 IX86_BUILTIN_PADDQ,
13966 IX86_BUILTIN_PADDSB,
13967 IX86_BUILTIN_PADDSW,
13968 IX86_BUILTIN_PADDUSB,
13969 IX86_BUILTIN_PADDUSW,
13970 IX86_BUILTIN_PSUBB,
13971 IX86_BUILTIN_PSUBW,
13972 IX86_BUILTIN_PSUBD,
13973 IX86_BUILTIN_PSUBQ,
13974 IX86_BUILTIN_PSUBSB,
13975 IX86_BUILTIN_PSUBSW,
13976 IX86_BUILTIN_PSUBUSB,
13977 IX86_BUILTIN_PSUBUSW,
13978
13979 IX86_BUILTIN_PAND,
13980 IX86_BUILTIN_PANDN,
13981 IX86_BUILTIN_POR,
13982 IX86_BUILTIN_PXOR,
13983
13984 IX86_BUILTIN_PAVGB,
13985 IX86_BUILTIN_PAVGW,
13986
13987 IX86_BUILTIN_PCMPEQB,
13988 IX86_BUILTIN_PCMPEQW,
13989 IX86_BUILTIN_PCMPEQD,
13990 IX86_BUILTIN_PCMPGTB,
13991 IX86_BUILTIN_PCMPGTW,
13992 IX86_BUILTIN_PCMPGTD,
13993
13994 IX86_BUILTIN_PMADDWD,
13995
13996 IX86_BUILTIN_PMAXSW,
13997 IX86_BUILTIN_PMAXUB,
13998 IX86_BUILTIN_PMINSW,
13999 IX86_BUILTIN_PMINUB,
14000
14001 IX86_BUILTIN_PMULHUW,
14002 IX86_BUILTIN_PMULHW,
14003 IX86_BUILTIN_PMULLW,
14004
14005 IX86_BUILTIN_PSADBW,
14006 IX86_BUILTIN_PSHUFW,
14007
14008 IX86_BUILTIN_PSLLW,
14009 IX86_BUILTIN_PSLLD,
14010 IX86_BUILTIN_PSLLQ,
14011 IX86_BUILTIN_PSRAW,
14012 IX86_BUILTIN_PSRAD,
14013 IX86_BUILTIN_PSRLW,
14014 IX86_BUILTIN_PSRLD,
14015 IX86_BUILTIN_PSRLQ,
14016 IX86_BUILTIN_PSLLWI,
14017 IX86_BUILTIN_PSLLDI,
14018 IX86_BUILTIN_PSLLQI,
14019 IX86_BUILTIN_PSRAWI,
14020 IX86_BUILTIN_PSRADI,
14021 IX86_BUILTIN_PSRLWI,
14022 IX86_BUILTIN_PSRLDI,
14023 IX86_BUILTIN_PSRLQI,
14024
14025 IX86_BUILTIN_PUNPCKHBW,
14026 IX86_BUILTIN_PUNPCKHWD,
14027 IX86_BUILTIN_PUNPCKHDQ,
14028 IX86_BUILTIN_PUNPCKLBW,
14029 IX86_BUILTIN_PUNPCKLWD,
14030 IX86_BUILTIN_PUNPCKLDQ,
14031
14032 IX86_BUILTIN_SHUFPS,
14033
14034 IX86_BUILTIN_RCPPS,
14035 IX86_BUILTIN_RCPSS,
14036 IX86_BUILTIN_RSQRTPS,
14037 IX86_BUILTIN_RSQRTSS,
14038 IX86_BUILTIN_SQRTPS,
14039 IX86_BUILTIN_SQRTSS,
14040
14041 IX86_BUILTIN_UNPCKHPS,
14042 IX86_BUILTIN_UNPCKLPS,
14043
14044 IX86_BUILTIN_ANDPS,
14045 IX86_BUILTIN_ANDNPS,
14046 IX86_BUILTIN_ORPS,
14047 IX86_BUILTIN_XORPS,
14048
14049 IX86_BUILTIN_EMMS,
14050 IX86_BUILTIN_LDMXCSR,
14051 IX86_BUILTIN_STMXCSR,
14052 IX86_BUILTIN_SFENCE,
14053
14054 /* 3DNow! Original */
14055 IX86_BUILTIN_FEMMS,
14056 IX86_BUILTIN_PAVGUSB,
14057 IX86_BUILTIN_PF2ID,
14058 IX86_BUILTIN_PFACC,
14059 IX86_BUILTIN_PFADD,
14060 IX86_BUILTIN_PFCMPEQ,
14061 IX86_BUILTIN_PFCMPGE,
14062 IX86_BUILTIN_PFCMPGT,
14063 IX86_BUILTIN_PFMAX,
14064 IX86_BUILTIN_PFMIN,
14065 IX86_BUILTIN_PFMUL,
14066 IX86_BUILTIN_PFRCP,
14067 IX86_BUILTIN_PFRCPIT1,
14068 IX86_BUILTIN_PFRCPIT2,
14069 IX86_BUILTIN_PFRSQIT1,
14070 IX86_BUILTIN_PFRSQRT,
14071 IX86_BUILTIN_PFSUB,
14072 IX86_BUILTIN_PFSUBR,
14073 IX86_BUILTIN_PI2FD,
14074 IX86_BUILTIN_PMULHRW,
14075
14076 /* 3DNow! Athlon Extensions */
14077 IX86_BUILTIN_PF2IW,
14078 IX86_BUILTIN_PFNACC,
14079 IX86_BUILTIN_PFPNACC,
14080 IX86_BUILTIN_PI2FW,
14081 IX86_BUILTIN_PSWAPDSI,
14082 IX86_BUILTIN_PSWAPDSF,
14083
14084 /* SSE2 */
14085 IX86_BUILTIN_ADDPD,
14086 IX86_BUILTIN_ADDSD,
14087 IX86_BUILTIN_DIVPD,
14088 IX86_BUILTIN_DIVSD,
14089 IX86_BUILTIN_MULPD,
14090 IX86_BUILTIN_MULSD,
14091 IX86_BUILTIN_SUBPD,
14092 IX86_BUILTIN_SUBSD,
14093
14094 IX86_BUILTIN_CMPEQPD,
14095 IX86_BUILTIN_CMPLTPD,
14096 IX86_BUILTIN_CMPLEPD,
14097 IX86_BUILTIN_CMPGTPD,
14098 IX86_BUILTIN_CMPGEPD,
14099 IX86_BUILTIN_CMPNEQPD,
14100 IX86_BUILTIN_CMPNLTPD,
14101 IX86_BUILTIN_CMPNLEPD,
14102 IX86_BUILTIN_CMPNGTPD,
14103 IX86_BUILTIN_CMPNGEPD,
14104 IX86_BUILTIN_CMPORDPD,
14105 IX86_BUILTIN_CMPUNORDPD,
14106 IX86_BUILTIN_CMPNEPD,
14107 IX86_BUILTIN_CMPEQSD,
14108 IX86_BUILTIN_CMPLTSD,
14109 IX86_BUILTIN_CMPLESD,
14110 IX86_BUILTIN_CMPNEQSD,
14111 IX86_BUILTIN_CMPNLTSD,
14112 IX86_BUILTIN_CMPNLESD,
14113 IX86_BUILTIN_CMPORDSD,
14114 IX86_BUILTIN_CMPUNORDSD,
14115 IX86_BUILTIN_CMPNESD,
14116
14117 IX86_BUILTIN_COMIEQSD,
14118 IX86_BUILTIN_COMILTSD,
14119 IX86_BUILTIN_COMILESD,
14120 IX86_BUILTIN_COMIGTSD,
14121 IX86_BUILTIN_COMIGESD,
14122 IX86_BUILTIN_COMINEQSD,
14123 IX86_BUILTIN_UCOMIEQSD,
14124 IX86_BUILTIN_UCOMILTSD,
14125 IX86_BUILTIN_UCOMILESD,
14126 IX86_BUILTIN_UCOMIGTSD,
14127 IX86_BUILTIN_UCOMIGESD,
14128 IX86_BUILTIN_UCOMINEQSD,
14129
14130 IX86_BUILTIN_MAXPD,
14131 IX86_BUILTIN_MAXSD,
14132 IX86_BUILTIN_MINPD,
14133 IX86_BUILTIN_MINSD,
14134
14135 IX86_BUILTIN_ANDPD,
14136 IX86_BUILTIN_ANDNPD,
14137 IX86_BUILTIN_ORPD,
14138 IX86_BUILTIN_XORPD,
14139
14140 IX86_BUILTIN_SQRTPD,
14141 IX86_BUILTIN_SQRTSD,
14142
14143 IX86_BUILTIN_UNPCKHPD,
14144 IX86_BUILTIN_UNPCKLPD,
14145
14146 IX86_BUILTIN_SHUFPD,
14147
14148 IX86_BUILTIN_LOADUPD,
14149 IX86_BUILTIN_STOREUPD,
14150 IX86_BUILTIN_MOVSD,
14151
14152 IX86_BUILTIN_LOADHPD,
14153 IX86_BUILTIN_LOADLPD,
14154
14155 IX86_BUILTIN_CVTDQ2PD,
14156 IX86_BUILTIN_CVTDQ2PS,
14157
14158 IX86_BUILTIN_CVTPD2DQ,
14159 IX86_BUILTIN_CVTPD2PI,
14160 IX86_BUILTIN_CVTPD2PS,
14161 IX86_BUILTIN_CVTTPD2DQ,
14162 IX86_BUILTIN_CVTTPD2PI,
14163
14164 IX86_BUILTIN_CVTPI2PD,
14165 IX86_BUILTIN_CVTSI2SD,
14166 IX86_BUILTIN_CVTSI642SD,
14167
14168 IX86_BUILTIN_CVTSD2SI,
14169 IX86_BUILTIN_CVTSD2SI64,
14170 IX86_BUILTIN_CVTSD2SS,
14171 IX86_BUILTIN_CVTSS2SD,
14172 IX86_BUILTIN_CVTTSD2SI,
14173 IX86_BUILTIN_CVTTSD2SI64,
14174
14175 IX86_BUILTIN_CVTPS2DQ,
14176 IX86_BUILTIN_CVTPS2PD,
14177 IX86_BUILTIN_CVTTPS2DQ,
14178
14179 IX86_BUILTIN_MOVNTI,
14180 IX86_BUILTIN_MOVNTPD,
14181 IX86_BUILTIN_MOVNTDQ,
14182
14183 /* SSE2 MMX */
14184 IX86_BUILTIN_MASKMOVDQU,
14185 IX86_BUILTIN_MOVMSKPD,
14186 IX86_BUILTIN_PMOVMSKB128,
eb701deb
RH
14187
14188 IX86_BUILTIN_PACKSSWB128,
14189 IX86_BUILTIN_PACKSSDW128,
14190 IX86_BUILTIN_PACKUSWB128,
14191
14192 IX86_BUILTIN_PADDB128,
14193 IX86_BUILTIN_PADDW128,
14194 IX86_BUILTIN_PADDD128,
14195 IX86_BUILTIN_PADDQ128,
14196 IX86_BUILTIN_PADDSB128,
14197 IX86_BUILTIN_PADDSW128,
14198 IX86_BUILTIN_PADDUSB128,
14199 IX86_BUILTIN_PADDUSW128,
14200 IX86_BUILTIN_PSUBB128,
14201 IX86_BUILTIN_PSUBW128,
14202 IX86_BUILTIN_PSUBD128,
14203 IX86_BUILTIN_PSUBQ128,
14204 IX86_BUILTIN_PSUBSB128,
14205 IX86_BUILTIN_PSUBSW128,
14206 IX86_BUILTIN_PSUBUSB128,
14207 IX86_BUILTIN_PSUBUSW128,
14208
14209 IX86_BUILTIN_PAND128,
14210 IX86_BUILTIN_PANDN128,
14211 IX86_BUILTIN_POR128,
14212 IX86_BUILTIN_PXOR128,
14213
14214 IX86_BUILTIN_PAVGB128,
14215 IX86_BUILTIN_PAVGW128,
14216
14217 IX86_BUILTIN_PCMPEQB128,
14218 IX86_BUILTIN_PCMPEQW128,
14219 IX86_BUILTIN_PCMPEQD128,
14220 IX86_BUILTIN_PCMPGTB128,
14221 IX86_BUILTIN_PCMPGTW128,
14222 IX86_BUILTIN_PCMPGTD128,
14223
14224 IX86_BUILTIN_PMADDWD128,
14225
14226 IX86_BUILTIN_PMAXSW128,
14227 IX86_BUILTIN_PMAXUB128,
14228 IX86_BUILTIN_PMINSW128,
14229 IX86_BUILTIN_PMINUB128,
14230
14231 IX86_BUILTIN_PMULUDQ,
14232 IX86_BUILTIN_PMULUDQ128,
14233 IX86_BUILTIN_PMULHUW128,
14234 IX86_BUILTIN_PMULHW128,
14235 IX86_BUILTIN_PMULLW128,
14236
14237 IX86_BUILTIN_PSADBW128,
14238 IX86_BUILTIN_PSHUFHW,
14239 IX86_BUILTIN_PSHUFLW,
14240 IX86_BUILTIN_PSHUFD,
14241
14242 IX86_BUILTIN_PSLLW128,
14243 IX86_BUILTIN_PSLLD128,
14244 IX86_BUILTIN_PSLLQ128,
14245 IX86_BUILTIN_PSRAW128,
14246 IX86_BUILTIN_PSRAD128,
14247 IX86_BUILTIN_PSRLW128,
14248 IX86_BUILTIN_PSRLD128,
14249 IX86_BUILTIN_PSRLQ128,
14250 IX86_BUILTIN_PSLLDQI128,
14251 IX86_BUILTIN_PSLLWI128,
14252 IX86_BUILTIN_PSLLDI128,
14253 IX86_BUILTIN_PSLLQI128,
14254 IX86_BUILTIN_PSRAWI128,
14255 IX86_BUILTIN_PSRADI128,
14256 IX86_BUILTIN_PSRLDQI128,
14257 IX86_BUILTIN_PSRLWI128,
14258 IX86_BUILTIN_PSRLDI128,
14259 IX86_BUILTIN_PSRLQI128,
14260
14261 IX86_BUILTIN_PUNPCKHBW128,
14262 IX86_BUILTIN_PUNPCKHWD128,
14263 IX86_BUILTIN_PUNPCKHDQ128,
14264 IX86_BUILTIN_PUNPCKHQDQ128,
14265 IX86_BUILTIN_PUNPCKLBW128,
14266 IX86_BUILTIN_PUNPCKLWD128,
14267 IX86_BUILTIN_PUNPCKLDQ128,
14268 IX86_BUILTIN_PUNPCKLQDQ128,
14269
14270 IX86_BUILTIN_CLFLUSH,
14271 IX86_BUILTIN_MFENCE,
14272 IX86_BUILTIN_LFENCE,
14273
14274 /* Prescott New Instructions. */
14275 IX86_BUILTIN_ADDSUBPS,
14276 IX86_BUILTIN_HADDPS,
14277 IX86_BUILTIN_HSUBPS,
14278 IX86_BUILTIN_MOVSHDUP,
14279 IX86_BUILTIN_MOVSLDUP,
14280 IX86_BUILTIN_ADDSUBPD,
14281 IX86_BUILTIN_HADDPD,
14282 IX86_BUILTIN_HSUBPD,
14283 IX86_BUILTIN_LDDQU,
14284
14285 IX86_BUILTIN_MONITOR,
14286 IX86_BUILTIN_MWAIT,
14287
14288 IX86_BUILTIN_VEC_INIT_V2SI,
14289 IX86_BUILTIN_VEC_INIT_V4HI,
14290 IX86_BUILTIN_VEC_INIT_V8QI,
14291 IX86_BUILTIN_VEC_EXT_V2DF,
14292 IX86_BUILTIN_VEC_EXT_V2DI,
14293 IX86_BUILTIN_VEC_EXT_V4SF,
ed9b5396 14294 IX86_BUILTIN_VEC_EXT_V4SI,
eb701deb 14295 IX86_BUILTIN_VEC_EXT_V8HI,
0f2698d0 14296 IX86_BUILTIN_VEC_EXT_V2SI,
eb701deb
RH
14297 IX86_BUILTIN_VEC_EXT_V4HI,
14298 IX86_BUILTIN_VEC_SET_V8HI,
14299 IX86_BUILTIN_VEC_SET_V4HI,
14300
ee963181
RG
14301 /* SSE2 ABI functions. */
14302 IX86_BUILTIN_SSE2_ACOS,
14303 IX86_BUILTIN_SSE2_ACOSF,
14304 IX86_BUILTIN_SSE2_ASIN,
14305 IX86_BUILTIN_SSE2_ASINF,
14306 IX86_BUILTIN_SSE2_ATAN,
14307 IX86_BUILTIN_SSE2_ATANF,
14308 IX86_BUILTIN_SSE2_ATAN2,
14309 IX86_BUILTIN_SSE2_ATAN2F,
14310 IX86_BUILTIN_SSE2_COS,
14311 IX86_BUILTIN_SSE2_COSF,
14312 IX86_BUILTIN_SSE2_EXP,
14313 IX86_BUILTIN_SSE2_EXPF,
14314 IX86_BUILTIN_SSE2_LOG10,
14315 IX86_BUILTIN_SSE2_LOG10F,
14316 IX86_BUILTIN_SSE2_LOG,
14317 IX86_BUILTIN_SSE2_LOGF,
14318 IX86_BUILTIN_SSE2_SIN,
14319 IX86_BUILTIN_SSE2_SINF,
14320 IX86_BUILTIN_SSE2_TAN,
14321 IX86_BUILTIN_SSE2_TANF,
14322
eb701deb
RH
14323 IX86_BUILTIN_MAX
14324};
14325
6e34d3a3
JM
14326#define def_builtin(MASK, NAME, TYPE, CODE) \
14327do { \
14328 if ((MASK) & target_flags \
14329 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
14330 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
14331 NULL, NULL_TREE); \
eeb06b1b 14332} while (0)
bd793c65 14333
e358acde
RH
14334/* Bits for builtin_description.flag. */
14335
14336/* Set when we don't support the comparison natively, and should
14337 swap_comparison in order to support it. */
14338#define BUILTIN_DESC_SWAP_OPERANDS 1
14339
bd793c65
BS
14340struct builtin_description
14341{
8b60264b
KG
14342 const unsigned int mask;
14343 const enum insn_code icode;
14344 const char *const name;
14345 const enum ix86_builtins code;
14346 const enum rtx_code comparison;
14347 const unsigned int flag;
bd793c65
BS
14348};
14349
8b60264b 14350static const struct builtin_description bdesc_comi[] =
bd793c65 14351{
37f22004
L
14352 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
14353 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
14354 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
14355 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
14356 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
14357 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
14358 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
14359 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
14360 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
14361 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
14362 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
14363 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
1194ca05
JH
14364 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
14365 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
14366 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
14367 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
14368 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
14369 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
14370 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
14371 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
14372 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
14373 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
14374 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
14375 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
bd793c65
BS
14376};
14377
8b60264b 14378static const struct builtin_description bdesc_2arg[] =
bd793c65
BS
14379{
14380 /* SSE */
37f22004
L
14381 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
14382 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
14383 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
14384 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
ef719a44
RH
14385 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
14386 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
14387 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
14388 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
14389
14390 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
14391 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
14392 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
14393 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
e358acde 14394 BUILTIN_DESC_SWAP_OPERANDS },
ef719a44 14395 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
e358acde 14396 BUILTIN_DESC_SWAP_OPERANDS },
ef719a44
RH
14397 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
14398 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
14399 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
14400 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
14401 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
e358acde 14402 BUILTIN_DESC_SWAP_OPERANDS },
ef719a44 14403 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
e358acde 14404 BUILTIN_DESC_SWAP_OPERANDS },
ef719a44
RH
14405 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
14406 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
14407 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
14408 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
14409 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
14410 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
14411 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
14412 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
14413 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
14414 BUILTIN_DESC_SWAP_OPERANDS },
14415 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
14416 BUILTIN_DESC_SWAP_OPERANDS },
14417 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
37f22004
L
14418
14419 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
14420 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
ef719a44
RH
14421 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
14422 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
37f22004 14423
ef719a44 14424 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
37f22004 14425 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
ef719a44
RH
14426 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
14427 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
37f22004
L
14428
14429 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
14430 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
14431 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
14432 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
14433 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
bd793c65
BS
14434
14435 /* MMX */
80e8bb90
RH
14436 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
14437 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
14438 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
1b004b58 14439 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
80e8bb90
RH
14440 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
14441 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
14442 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
1b004b58 14443 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
eeb06b1b 14444
80e8bb90
RH
14445 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
14446 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
14447 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
14448 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
14449 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
14450 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
14451 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
14452 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
eeb06b1b 14453
80e8bb90
RH
14454 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
14455 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
14456 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
eeb06b1b 14457
80e8bb90
RH
14458 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
14459 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
14460 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
14461 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
eeb06b1b 14462
37f22004
L
14463 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
14464 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
eeb06b1b 14465
80e8bb90
RH
14466 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
14467 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
14468 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
14469 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
14470 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
14471 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
eeb06b1b 14472
80e8bb90
RH
14473 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
14474 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
14475 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
14476 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
eeb06b1b
BS
14477
14478 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
14479 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
14480 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
14481 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
14482 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
14483 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
bd793c65
BS
14484
14485 /* Special. */
eeb06b1b
BS
14486 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
14487 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
14488 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
14489
ef719a44
RH
14490 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
14491 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
14492 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
eeb06b1b 14493
80e8bb90
RH
14494 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
14495 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
14496 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
14497 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
eeb06b1b
BS
14498 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
14499 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
14500
80e8bb90
RH
14501 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
14502 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
14503 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
14504 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
eeb06b1b
BS
14505 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
14506 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
14507
80e8bb90
RH
14508 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
14509 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
14510 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
14511 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
eeb06b1b 14512
37f22004 14513 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
fbe5eb6d
BS
14514 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
14515
14516 /* SSE2 */
14517 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
14518 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
14519 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
14520 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
ef719a44
RH
14521 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
14522 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
14523 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
14524 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
14525
14526 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
14527 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
14528 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
14529 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
e358acde 14530 BUILTIN_DESC_SWAP_OPERANDS },
ef719a44 14531 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
e358acde 14532 BUILTIN_DESC_SWAP_OPERANDS },
ef719a44
RH
14533 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
14534 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
14535 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
14536 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
14537 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
e358acde 14538 BUILTIN_DESC_SWAP_OPERANDS },
ef719a44 14539 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
e358acde 14540 BUILTIN_DESC_SWAP_OPERANDS },
ef719a44
RH
14541 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
14542 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
14543 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
14544 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
14545 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
14546 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
14547 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
14548 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
14549 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
fbe5eb6d
BS
14550
14551 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
14552 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
ef719a44
RH
14553 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
14554 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
fbe5eb6d 14555
ef719a44 14556 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
1877be45 14557 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
ef719a44
RH
14558 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
14559 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
fbe5eb6d
BS
14560
14561 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
14562 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
14563 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
14564
14565 /* SSE2 MMX */
14566 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
14567 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
14568 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
d50672ef 14569 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
fbe5eb6d
BS
14570 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
14571 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
14572 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
d50672ef 14573 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
fbe5eb6d 14574
ef719a44
RH
14575 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
14576 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
14577 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
14578 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
14579 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
14580 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
14581 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
14582 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
fbe5eb6d
BS
14583
14584 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
ef719a44 14585 { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
fbe5eb6d 14586
ef719a44 14587 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
916b60b7 14588 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
ef719a44
RH
14589 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
14590 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
fbe5eb6d
BS
14591
14592 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
14593 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
14594
ef719a44
RH
14595 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
14596 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
14597 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
14598 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
14599 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
14600 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
fbe5eb6d
BS
14601
14602 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
14603 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
14604 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
14605 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
14606
14607 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
14608 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
14609 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
077084dd 14610 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
fbe5eb6d
BS
14611 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
14612 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
14613 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
f02e1358 14614 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
fbe5eb6d 14615
916b60b7
BS
14616 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
14617 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
14618 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
14619
ef719a44 14620 { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
916b60b7
BS
14621 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
14622
9e9fb0ce
JB
14623 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
14624 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
14625
916b60b7 14626 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
916b60b7 14627 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
916b60b7
BS
14628 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
14629
916b60b7 14630 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
916b60b7 14631 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
916b60b7
BS
14632 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
14633
916b60b7 14634 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
916b60b7
BS
14635 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
14636
14637 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
14638
ef719a44
RH
14639 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
14640 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
14641 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
14642 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
22c7c85e 14643
9e200aaf 14644 /* SSE3 MMX */
ef719a44
RH
14645 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
14646 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
14647 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
14648 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
14649 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
14650 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
bd793c65
BS
14651};
14652
8b60264b 14653static const struct builtin_description bdesc_1arg[] =
bd793c65 14654{
37f22004
L
14655 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
14656 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
fbe5eb6d 14657
37f22004 14658 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
ef719a44
RH
14659 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
14660 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
fbe5eb6d 14661
ef719a44
RH
14662 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
14663 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
14664 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
14665 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
14666 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
14667 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
fbe5eb6d
BS
14668
14669 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
14670 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
fbe5eb6d
BS
14671
14672 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
14673
ef719a44
RH
14674 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
14675 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
bd793c65 14676
ef719a44
RH
14677 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
14678 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
14679 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
14680 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
14681 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
bd793c65 14682
ef719a44 14683 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
fbe5eb6d 14684
ef719a44
RH
14685 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
14686 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
14687 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
14688 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
f02e1358 14689
ef719a44
RH
14690 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
14691 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
14692 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
22c7c85e 14693
9e200aaf 14694 /* SSE3 */
ef719a44
RH
14695 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
14696 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
bd793c65
BS
14697};
14698
eb701deb 14699static void
b96a374d 14700ix86_init_builtins (void)
f6155fda
SS
14701{
14702 if (TARGET_MMX)
14703 ix86_init_mmx_sse_builtins ();
ee963181
RG
14704 if (TARGET_SSE2)
14705 ix86_init_sse_abi_builtins ();
f6155fda
SS
14706}
14707
14708/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
bd793c65
BS
14709 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
14710 builtins. */
e37af218 14711static void
b96a374d 14712ix86_init_mmx_sse_builtins (void)
bd793c65 14713{
8b60264b 14714 const struct builtin_description * d;
77ebd435 14715 size_t i;
bd793c65 14716
4a5eab38
PB
14717 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
14718 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
14719 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
a16da3ae
RH
14720 tree V2DI_type_node
14721 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
4a5eab38
PB
14722 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
14723 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
14724 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
14725 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
14726 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
14727 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
14728
bd793c65 14729 tree pchar_type_node = build_pointer_type (char_type_node);
068f5dea
JH
14730 tree pcchar_type_node = build_pointer_type (
14731 build_type_variant (char_type_node, 1, 0));
bd793c65 14732 tree pfloat_type_node = build_pointer_type (float_type_node);
068f5dea
JH
14733 tree pcfloat_type_node = build_pointer_type (
14734 build_type_variant (float_type_node, 1, 0));
bd793c65 14735 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
916b60b7 14736 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
bd793c65
BS
14737 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
14738
14739 /* Comparisons. */
14740 tree int_ftype_v4sf_v4sf
b4de2f7d
AH
14741 = build_function_type_list (integer_type_node,
14742 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 14743 tree v4si_ftype_v4sf_v4sf
b4de2f7d
AH
14744 = build_function_type_list (V4SI_type_node,
14745 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 14746 /* MMX/SSE/integer conversions. */
bd793c65 14747 tree int_ftype_v4sf
b4de2f7d
AH
14748 = build_function_type_list (integer_type_node,
14749 V4SF_type_node, NULL_TREE);
453ee231
JH
14750 tree int64_ftype_v4sf
14751 = build_function_type_list (long_long_integer_type_node,
14752 V4SF_type_node, NULL_TREE);
bd793c65 14753 tree int_ftype_v8qi
b4de2f7d 14754 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
bd793c65 14755 tree v4sf_ftype_v4sf_int
b4de2f7d
AH
14756 = build_function_type_list (V4SF_type_node,
14757 V4SF_type_node, integer_type_node, NULL_TREE);
453ee231
JH
14758 tree v4sf_ftype_v4sf_int64
14759 = build_function_type_list (V4SF_type_node,
14760 V4SF_type_node, long_long_integer_type_node,
14761 NULL_TREE);
bd793c65 14762 tree v4sf_ftype_v4sf_v2si
b4de2f7d
AH
14763 = build_function_type_list (V4SF_type_node,
14764 V4SF_type_node, V2SI_type_node, NULL_TREE);
eb701deb 14765
bd793c65
BS
14766 /* Miscellaneous. */
14767 tree v8qi_ftype_v4hi_v4hi
b4de2f7d
AH
14768 = build_function_type_list (V8QI_type_node,
14769 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 14770 tree v4hi_ftype_v2si_v2si
b4de2f7d
AH
14771 = build_function_type_list (V4HI_type_node,
14772 V2SI_type_node, V2SI_type_node, NULL_TREE);
bd793c65 14773 tree v4sf_ftype_v4sf_v4sf_int
b4de2f7d
AH
14774 = build_function_type_list (V4SF_type_node,
14775 V4SF_type_node, V4SF_type_node,
14776 integer_type_node, NULL_TREE);
bd793c65 14777 tree v2si_ftype_v4hi_v4hi
b4de2f7d
AH
14778 = build_function_type_list (V2SI_type_node,
14779 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 14780 tree v4hi_ftype_v4hi_int
b4de2f7d 14781 = build_function_type_list (V4HI_type_node,
e7a60f56 14782 V4HI_type_node, integer_type_node, NULL_TREE);
bd793c65 14783 tree v4hi_ftype_v4hi_di
b4de2f7d
AH
14784 = build_function_type_list (V4HI_type_node,
14785 V4HI_type_node, long_long_unsigned_type_node,
14786 NULL_TREE);
bd793c65 14787 tree v2si_ftype_v2si_di
b4de2f7d
AH
14788 = build_function_type_list (V2SI_type_node,
14789 V2SI_type_node, long_long_unsigned_type_node,
14790 NULL_TREE);
bd793c65 14791 tree void_ftype_void
b4de2f7d 14792 = build_function_type (void_type_node, void_list_node);
bd793c65 14793 tree void_ftype_unsigned
b4de2f7d 14794 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
22c7c85e
L
14795 tree void_ftype_unsigned_unsigned
14796 = build_function_type_list (void_type_node, unsigned_type_node,
14797 unsigned_type_node, NULL_TREE);
14798 tree void_ftype_pcvoid_unsigned_unsigned
14799 = build_function_type_list (void_type_node, const_ptr_type_node,
14800 unsigned_type_node, unsigned_type_node,
14801 NULL_TREE);
bd793c65 14802 tree unsigned_ftype_void
b4de2f7d 14803 = build_function_type (unsigned_type_node, void_list_node);
bd793c65 14804 tree v2si_ftype_v4sf
b4de2f7d 14805 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
bd793c65 14806 /* Loads/stores. */
bd793c65 14807 tree void_ftype_v8qi_v8qi_pchar
b4de2f7d
AH
14808 = build_function_type_list (void_type_node,
14809 V8QI_type_node, V8QI_type_node,
14810 pchar_type_node, NULL_TREE);
068f5dea
JH
14811 tree v4sf_ftype_pcfloat
14812 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
bd793c65
BS
14813 /* @@@ the type is bogus */
14814 tree v4sf_ftype_v4sf_pv2si
b4de2f7d 14815 = build_function_type_list (V4SF_type_node,
f8ca7923 14816 V4SF_type_node, pv2si_type_node, NULL_TREE);
1255c85c 14817 tree void_ftype_pv2si_v4sf
b4de2f7d 14818 = build_function_type_list (void_type_node,
f8ca7923 14819 pv2si_type_node, V4SF_type_node, NULL_TREE);
bd793c65 14820 tree void_ftype_pfloat_v4sf
b4de2f7d
AH
14821 = build_function_type_list (void_type_node,
14822 pfloat_type_node, V4SF_type_node, NULL_TREE);
bd793c65 14823 tree void_ftype_pdi_di
b4de2f7d
AH
14824 = build_function_type_list (void_type_node,
14825 pdi_type_node, long_long_unsigned_type_node,
14826 NULL_TREE);
916b60b7 14827 tree void_ftype_pv2di_v2di
b4de2f7d
AH
14828 = build_function_type_list (void_type_node,
14829 pv2di_type_node, V2DI_type_node, NULL_TREE);
bd793c65
BS
14830 /* Normal vector unops. */
14831 tree v4sf_ftype_v4sf
b4de2f7d 14832 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
0f290768 14833
bd793c65
BS
14834 /* Normal vector binops. */
14835 tree v4sf_ftype_v4sf_v4sf
b4de2f7d
AH
14836 = build_function_type_list (V4SF_type_node,
14837 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 14838 tree v8qi_ftype_v8qi_v8qi
b4de2f7d
AH
14839 = build_function_type_list (V8QI_type_node,
14840 V8QI_type_node, V8QI_type_node, NULL_TREE);
bd793c65 14841 tree v4hi_ftype_v4hi_v4hi
b4de2f7d
AH
14842 = build_function_type_list (V4HI_type_node,
14843 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 14844 tree v2si_ftype_v2si_v2si
b4de2f7d
AH
14845 = build_function_type_list (V2SI_type_node,
14846 V2SI_type_node, V2SI_type_node, NULL_TREE);
bd793c65 14847 tree di_ftype_di_di
b4de2f7d
AH
14848 = build_function_type_list (long_long_unsigned_type_node,
14849 long_long_unsigned_type_node,
14850 long_long_unsigned_type_node, NULL_TREE);
bd793c65 14851
47f339cf 14852 tree v2si_ftype_v2sf
ae3aa00d 14853 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
47f339cf 14854 tree v2sf_ftype_v2si
b4de2f7d 14855 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
47f339cf 14856 tree v2si_ftype_v2si
b4de2f7d 14857 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
47f339cf 14858 tree v2sf_ftype_v2sf
b4de2f7d 14859 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 14860 tree v2sf_ftype_v2sf_v2sf
b4de2f7d
AH
14861 = build_function_type_list (V2SF_type_node,
14862 V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 14863 tree v2si_ftype_v2sf_v2sf
b4de2f7d
AH
14864 = build_function_type_list (V2SI_type_node,
14865 V2SF_type_node, V2SF_type_node, NULL_TREE);
fbe5eb6d
BS
14866 tree pint_type_node = build_pointer_type (integer_type_node);
14867 tree pdouble_type_node = build_pointer_type (double_type_node);
068f5dea
JH
14868 tree pcdouble_type_node = build_pointer_type (
14869 build_type_variant (double_type_node, 1, 0));
fbe5eb6d 14870 tree int_ftype_v2df_v2df
b4de2f7d
AH
14871 = build_function_type_list (integer_type_node,
14872 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 14873
068f5dea
JH
14874 tree void_ftype_pcvoid
14875 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
fbe5eb6d 14876 tree v4sf_ftype_v4si
b4de2f7d 14877 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 14878 tree v4si_ftype_v4sf
b4de2f7d 14879 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 14880 tree v2df_ftype_v4si
b4de2f7d 14881 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 14882 tree v4si_ftype_v2df
b4de2f7d 14883 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 14884 tree v2si_ftype_v2df
b4de2f7d 14885 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 14886 tree v4sf_ftype_v2df
b4de2f7d 14887 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 14888 tree v2df_ftype_v2si
b4de2f7d 14889 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
fbe5eb6d 14890 tree v2df_ftype_v4sf
b4de2f7d 14891 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 14892 tree int_ftype_v2df
b4de2f7d 14893 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
453ee231
JH
14894 tree int64_ftype_v2df
14895 = build_function_type_list (long_long_integer_type_node,
b96a374d 14896 V2DF_type_node, NULL_TREE);
fbe5eb6d 14897 tree v2df_ftype_v2df_int
b4de2f7d
AH
14898 = build_function_type_list (V2DF_type_node,
14899 V2DF_type_node, integer_type_node, NULL_TREE);
453ee231
JH
14900 tree v2df_ftype_v2df_int64
14901 = build_function_type_list (V2DF_type_node,
14902 V2DF_type_node, long_long_integer_type_node,
14903 NULL_TREE);
fbe5eb6d 14904 tree v4sf_ftype_v4sf_v2df
b4de2f7d
AH
14905 = build_function_type_list (V4SF_type_node,
14906 V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 14907 tree v2df_ftype_v2df_v4sf
b4de2f7d
AH
14908 = build_function_type_list (V2DF_type_node,
14909 V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 14910 tree v2df_ftype_v2df_v2df_int
b4de2f7d
AH
14911 = build_function_type_list (V2DF_type_node,
14912 V2DF_type_node, V2DF_type_node,
14913 integer_type_node,
14914 NULL_TREE);
1c47af84 14915 tree v2df_ftype_v2df_pcdouble
b4de2f7d 14916 = build_function_type_list (V2DF_type_node,
1c47af84 14917 V2DF_type_node, pcdouble_type_node, NULL_TREE);
fbe5eb6d 14918 tree void_ftype_pdouble_v2df
b4de2f7d
AH
14919 = build_function_type_list (void_type_node,
14920 pdouble_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 14921 tree void_ftype_pint_int
b4de2f7d
AH
14922 = build_function_type_list (void_type_node,
14923 pint_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 14924 tree void_ftype_v16qi_v16qi_pchar
b4de2f7d
AH
14925 = build_function_type_list (void_type_node,
14926 V16QI_type_node, V16QI_type_node,
14927 pchar_type_node, NULL_TREE);
068f5dea
JH
14928 tree v2df_ftype_pcdouble
14929 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
fbe5eb6d 14930 tree v2df_ftype_v2df_v2df
b4de2f7d
AH
14931 = build_function_type_list (V2DF_type_node,
14932 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 14933 tree v16qi_ftype_v16qi_v16qi
b4de2f7d
AH
14934 = build_function_type_list (V16QI_type_node,
14935 V16QI_type_node, V16QI_type_node, NULL_TREE);
fbe5eb6d 14936 tree v8hi_ftype_v8hi_v8hi
b4de2f7d
AH
14937 = build_function_type_list (V8HI_type_node,
14938 V8HI_type_node, V8HI_type_node, NULL_TREE);
fbe5eb6d 14939 tree v4si_ftype_v4si_v4si
b4de2f7d
AH
14940 = build_function_type_list (V4SI_type_node,
14941 V4SI_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 14942 tree v2di_ftype_v2di_v2di
b4de2f7d
AH
14943 = build_function_type_list (V2DI_type_node,
14944 V2DI_type_node, V2DI_type_node, NULL_TREE);
fbe5eb6d 14945 tree v2di_ftype_v2df_v2df
b4de2f7d
AH
14946 = build_function_type_list (V2DI_type_node,
14947 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 14948 tree v2df_ftype_v2df
b4de2f7d 14949 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
916b60b7 14950 tree v2di_ftype_v2di_int
b4de2f7d
AH
14951 = build_function_type_list (V2DI_type_node,
14952 V2DI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 14953 tree v4si_ftype_v4si_int
b4de2f7d
AH
14954 = build_function_type_list (V4SI_type_node,
14955 V4SI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 14956 tree v8hi_ftype_v8hi_int
b4de2f7d
AH
14957 = build_function_type_list (V8HI_type_node,
14958 V8HI_type_node, integer_type_node, NULL_TREE);
916b60b7 14959 tree v8hi_ftype_v8hi_v2di
b4de2f7d
AH
14960 = build_function_type_list (V8HI_type_node,
14961 V8HI_type_node, V2DI_type_node, NULL_TREE);
916b60b7 14962 tree v4si_ftype_v4si_v2di
b4de2f7d
AH
14963 = build_function_type_list (V4SI_type_node,
14964 V4SI_type_node, V2DI_type_node, NULL_TREE);
916b60b7 14965 tree v4si_ftype_v8hi_v8hi
b4de2f7d
AH
14966 = build_function_type_list (V4SI_type_node,
14967 V8HI_type_node, V8HI_type_node, NULL_TREE);
916b60b7 14968 tree di_ftype_v8qi_v8qi
b4de2f7d
AH
14969 = build_function_type_list (long_long_unsigned_type_node,
14970 V8QI_type_node, V8QI_type_node, NULL_TREE);
9e9fb0ce
JB
14971 tree di_ftype_v2si_v2si
14972 = build_function_type_list (long_long_unsigned_type_node,
14973 V2SI_type_node, V2SI_type_node, NULL_TREE);
916b60b7 14974 tree v2di_ftype_v16qi_v16qi
b4de2f7d
AH
14975 = build_function_type_list (V2DI_type_node,
14976 V16QI_type_node, V16QI_type_node, NULL_TREE);
9e9fb0ce
JB
14977 tree v2di_ftype_v4si_v4si
14978 = build_function_type_list (V2DI_type_node,
14979 V4SI_type_node, V4SI_type_node, NULL_TREE);
916b60b7 14980 tree int_ftype_v16qi
b4de2f7d 14981 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
068f5dea
JH
14982 tree v16qi_ftype_pcchar
14983 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
f02e1358
JH
14984 tree void_ftype_pchar_v16qi
14985 = build_function_type_list (void_type_node,
14986 pchar_type_node, V16QI_type_node, NULL_TREE);
47f339cf 14987
f8a1ebc6
JH
14988 tree float80_type;
14989 tree float128_type;
eb701deb 14990 tree ftype;
f8a1ebc6
JH
14991
14992 /* The __float80 type. */
14993 if (TYPE_MODE (long_double_type_node) == XFmode)
14994 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
14995 "__float80");
14996 else
14997 {
14998 /* The __float80 type. */
14999 float80_type = make_node (REAL_TYPE);
968a7562 15000 TYPE_PRECISION (float80_type) = 80;
f8a1ebc6
JH
15001 layout_type (float80_type);
15002 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
15003 }
15004
f749178d
JH
15005 if (TARGET_64BIT)
15006 {
15007 float128_type = make_node (REAL_TYPE);
15008 TYPE_PRECISION (float128_type) = 128;
15009 layout_type (float128_type);
15010 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
15011 }
f8a1ebc6 15012
bd793c65
BS
15013 /* Add all builtins that are more or less simple operations on two
15014 operands. */
ca7558fc 15015 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
15016 {
15017 /* Use one of the operands; the target can have a different mode for
15018 mask-generating compares. */
15019 enum machine_mode mode;
15020 tree type;
15021
15022 if (d->name == 0)
15023 continue;
15024 mode = insn_data[d->icode].operand[1].mode;
15025
bd793c65
BS
15026 switch (mode)
15027 {
fbe5eb6d
BS
15028 case V16QImode:
15029 type = v16qi_ftype_v16qi_v16qi;
15030 break;
15031 case V8HImode:
15032 type = v8hi_ftype_v8hi_v8hi;
15033 break;
15034 case V4SImode:
15035 type = v4si_ftype_v4si_v4si;
15036 break;
15037 case V2DImode:
15038 type = v2di_ftype_v2di_v2di;
15039 break;
15040 case V2DFmode:
15041 type = v2df_ftype_v2df_v2df;
15042 break;
bd793c65
BS
15043 case V4SFmode:
15044 type = v4sf_ftype_v4sf_v4sf;
15045 break;
15046 case V8QImode:
15047 type = v8qi_ftype_v8qi_v8qi;
15048 break;
15049 case V4HImode:
15050 type = v4hi_ftype_v4hi_v4hi;
15051 break;
15052 case V2SImode:
15053 type = v2si_ftype_v2si_v2si;
15054 break;
bd793c65
BS
15055 case DImode:
15056 type = di_ftype_di_di;
15057 break;
15058
15059 default:
d0396b79 15060 gcc_unreachable ();
bd793c65 15061 }
0f290768 15062
bd793c65 15063 /* Override for comparisons. */
ef719a44
RH
15064 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
15065 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
bd793c65
BS
15066 type = v4si_ftype_v4sf_v4sf;
15067
ef719a44
RH
15068 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
15069 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
fbe5eb6d
BS
15070 type = v2di_ftype_v2df_v2df;
15071
eeb06b1b 15072 def_builtin (d->mask, d->name, type, d->code);
bd793c65
BS
15073 }
15074
15075 /* Add the remaining MMX insns with somewhat more complicated types. */
eeb06b1b 15076 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
eeb06b1b
BS
15077 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
15078 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
15079 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
15080
15081 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
15082 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
15083 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
15084
15085 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
15086 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
15087
1b004b58 15088 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
eeb06b1b 15089 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
0f290768 15090
bd793c65 15091 /* comi/ucomi insns. */
ca7558fc 15092 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
fbe5eb6d
BS
15093 if (d->mask == MASK_SSE2)
15094 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
15095 else
15096 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
bd793c65 15097
1255c85c
BS
15098 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
15099 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
15100 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
bd793c65 15101
37f22004
L
15102 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
15103 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
15104 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
15105 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
15106 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
15107 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
15108 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
15109 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
15110 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
15111 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
15112 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
15113
37f22004
L
15114 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
15115
37f22004 15116 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
37f22004 15117 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
37f22004
L
15118
15119 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
15120 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
15121 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
15122 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
15123
15124 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
15125 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
15126 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
15127 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
15128
15129 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
15130
15131 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
15132
15133 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
15134 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
15135 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
15136 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
15137 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
15138 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
15139
15140 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
bd793c65 15141
47f339cf
BS
15142 /* Original 3DNow! */
15143 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
15144 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
15145 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
15146 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
15147 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
15148 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
15149 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
15150 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
15151 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
15152 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
15153 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
15154 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
15155 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
15156 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
15157 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
15158 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
15159 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
15160 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
15161 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
15162 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
47f339cf
BS
15163
15164 /* 3DNow! extension as used in the Athlon CPU. */
15165 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
15166 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
15167 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
15168 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
15169 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
15170 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
15171
fbe5eb6d 15172 /* SSE2 */
fbe5eb6d 15173 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
fbe5eb6d 15174
068f5dea 15175 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
fbe5eb6d 15176 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
fbe5eb6d 15177
1c47af84
RH
15178 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
15179 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
fbe5eb6d
BS
15180
15181 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
916b60b7 15182 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
fbe5eb6d
BS
15183 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
15184 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
916b60b7 15185 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
fbe5eb6d
BS
15186
15187 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
15188 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
15189 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
916b60b7 15190 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
fbe5eb6d
BS
15191
15192 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
15193 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
15194
15195 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
15196
15197 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
916b60b7 15198 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
fbe5eb6d
BS
15199
15200 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
15201 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
15202 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
15203 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
15204 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
15205
15206 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
15207
15208 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
15209 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
37f22004
L
15210 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
15211 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
fbe5eb6d
BS
15212
15213 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
15214 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
15215 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
15216
15217 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
37f22004 15218 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
fbe5eb6d
BS
15219 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
15220 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
15221
068f5dea 15222 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
fbe5eb6d
BS
15223 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
15224 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
916b60b7 15225
068f5dea 15226 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
f02e1358 15227 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
f02e1358 15228
9e9fb0ce
JB
15229 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
15230 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
15231
916b60b7
BS
15232 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
15233 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
15234 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
15235
15236 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
15237 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
15238 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
15239
15240 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
15241 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
15242
ab3146fd 15243 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
916b60b7
BS
15244 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
15245 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
15246 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
15247
ab3146fd 15248 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
916b60b7
BS
15249 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
15250 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
15251 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
15252
15253 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
15254 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
15255
15256 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
22c7c85e
L
15257
15258 /* Prescott New Instructions. */
9e200aaf 15259 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
22c7c85e
L
15260 void_ftype_pcvoid_unsigned_unsigned,
15261 IX86_BUILTIN_MONITOR);
9e200aaf 15262 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
22c7c85e
L
15263 void_ftype_unsigned_unsigned,
15264 IX86_BUILTIN_MWAIT);
9e200aaf 15265 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
22c7c85e
L
15266 v4sf_ftype_v4sf,
15267 IX86_BUILTIN_MOVSHDUP);
9e200aaf 15268 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
22c7c85e
L
15269 v4sf_ftype_v4sf,
15270 IX86_BUILTIN_MOVSLDUP);
9e200aaf 15271 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
22c7c85e 15272 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
eb701deb
RH
15273
15274 /* Access to the vec_init patterns. */
15275 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
15276 integer_type_node, NULL_TREE);
15277 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
15278 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
15279
15280 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
15281 short_integer_type_node,
15282 short_integer_type_node,
15283 short_integer_type_node, NULL_TREE);
15284 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
15285 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
15286
15287 ftype = build_function_type_list (V8QI_type_node, char_type_node,
15288 char_type_node, char_type_node,
15289 char_type_node, char_type_node,
15290 char_type_node, char_type_node,
15291 char_type_node, NULL_TREE);
15292 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
15293 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
15294
15295 /* Access to the vec_extract patterns. */
15296 ftype = build_function_type_list (double_type_node, V2DF_type_node,
15297 integer_type_node, NULL_TREE);
15298 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
15299 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
15300
15301 ftype = build_function_type_list (long_long_integer_type_node,
15302 V2DI_type_node, integer_type_node,
15303 NULL_TREE);
15304 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
15305 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
15306
15307 ftype = build_function_type_list (float_type_node, V4SF_type_node,
15308 integer_type_node, NULL_TREE);
15309 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
15310 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
15311
ed9b5396
RH
15312 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
15313 integer_type_node, NULL_TREE);
15314 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
15315 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
15316
eb701deb
RH
15317 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
15318 integer_type_node, NULL_TREE);
15319 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
15320 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
15321
15322 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
15323 integer_type_node, NULL_TREE);
15324 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
15325 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
15326
0f2698d0
RH
15327 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
15328 integer_type_node, NULL_TREE);
15329 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
15330 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
15331
eb701deb
RH
15332 /* Access to the vec_set patterns. */
15333 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
15334 intHI_type_node,
15335 integer_type_node, NULL_TREE);
15336 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
15337 ftype, IX86_BUILTIN_VEC_SET_V8HI);
15338
15339 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
15340 intHI_type_node,
15341 integer_type_node, NULL_TREE);
15342 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
15343 ftype, IX86_BUILTIN_VEC_SET_V4HI);
bd793c65 15344}
ee963181
RG
15345#undef def_builtin
15346
15347/* Set up all the SSE ABI builtins that we may use to override
15348 the normal builtins. */
15349static void
15350ix86_init_sse_abi_builtins (void)
15351{
7d3a3b01 15352 tree flt, flt2;
ee963181
RG
15353
15354 /* Bail out in case the template definitions are not available. */
15355 if (! built_in_decls [BUILT_IN_SIN]
15356 || ! built_in_decls [BUILT_IN_SINF]
15357 || ! built_in_decls [BUILT_IN_ATAN2]
15358 || ! built_in_decls [BUILT_IN_ATAN2F])
15359 return;
15360
15361 /* Build the function types as variants of the existing ones. */
ee963181
RG
15362 flt = build_variant_type_copy (TREE_TYPE (built_in_decls [BUILT_IN_SINF]));
15363 TYPE_ATTRIBUTES (flt)
15364 = tree_cons (get_identifier ("sseregparm"),
15365 NULL_TREE, TYPE_ATTRIBUTES (flt));
ee963181
RG
15366 flt2 = build_variant_type_copy (TREE_TYPE (built_in_decls [BUILT_IN_ATAN2F]));
15367 TYPE_ATTRIBUTES (flt2)
15368 = tree_cons (get_identifier ("sseregparm"),
15369 NULL_TREE, TYPE_ATTRIBUTES (flt2));
15370
15371#define def_builtin(capname, name, type) \
15372 ix86_builtin_function_variants [BUILT_IN_ ## capname] \
15373 = lang_hooks.builtin_function ("__builtin_sse2_" # name, type, \
15374 IX86_BUILTIN_SSE2_ ## capname, \
15375 BUILT_IN_NORMAL, \
15376 "__libm_sse2_" # name, NULL_TREE)
15377
ee963181 15378 def_builtin (ACOSF, acosf, flt);
ee963181 15379 def_builtin (ASINF, asinf, flt);
ee963181 15380 def_builtin (ATANF, atanf, flt);
ee963181 15381 def_builtin (ATAN2F, atan2f, flt2);
ee963181 15382 def_builtin (COSF, cosf, flt);
ee963181 15383 def_builtin (EXPF, expf, flt);
ee963181 15384 def_builtin (LOG10F, log10f, flt);
ee963181 15385 def_builtin (LOGF, logf, flt);
ee963181 15386 def_builtin (SINF, sinf, flt);
ee963181
RG
15387 def_builtin (TANF, tanf, flt);
15388
15389#undef def_builtin
15390}
bd793c65
BS
15391
15392/* Errors in the source file can cause expand_expr to return const0_rtx
15393 where we expect a vector. To avoid crashing, use one of the vector
15394 clear instructions. */
15395static rtx
b96a374d 15396safe_vector_operand (rtx x, enum machine_mode mode)
bd793c65 15397{
ef719a44
RH
15398 if (x == const0_rtx)
15399 x = CONST0_RTX (mode);
bd793c65
BS
15400 return x;
15401}
15402
15403/* Subroutine of ix86_expand_builtin to take care of binop insns. */
15404
15405static rtx
b96a374d 15406ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
bd793c65 15407{
ef719a44 15408 rtx pat, xops[3];
bd793c65
BS
15409 tree arg0 = TREE_VALUE (arglist);
15410 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
84217346
MD
15411 rtx op0 = expand_normal (arg0);
15412 rtx op1 = expand_normal (arg1);
bd793c65
BS
15413 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15414 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15415 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
15416
15417 if (VECTOR_MODE_P (mode0))
15418 op0 = safe_vector_operand (op0, mode0);
15419 if (VECTOR_MODE_P (mode1))
15420 op1 = safe_vector_operand (op1, mode1);
15421
e358acde 15422 if (optimize || !target
bd793c65
BS
15423 || GET_MODE (target) != tmode
15424 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15425 target = gen_reg_rtx (tmode);
15426
d9deed68
JH
15427 if (GET_MODE (op1) == SImode && mode1 == TImode)
15428 {
15429 rtx x = gen_reg_rtx (V4SImode);
15430 emit_insn (gen_sse2_loadd (x, op1));
15431 op1 = gen_lowpart (TImode, x);
15432 }
15433
d0396b79
NS
15434 /* The insn must want input operands in the same modes as the
15435 result. */
15436 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
15437 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
bd793c65 15438
ef719a44 15439 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
bd793c65 15440 op0 = copy_to_mode_reg (mode0, op0);
ef719a44 15441 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
bd793c65
BS
15442 op1 = copy_to_mode_reg (mode1, op1);
15443
eb701deb
RH
15444 /* ??? Using ix86_fixup_binary_operands is problematic when
15445 we've got mismatched modes. Fake it. */
15446
ef719a44
RH
15447 xops[0] = target;
15448 xops[1] = op0;
15449 xops[2] = op1;
59bef189 15450
eb701deb
RH
15451 if (tmode == mode0 && tmode == mode1)
15452 {
15453 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
15454 op0 = xops[1];
15455 op1 = xops[2];
15456 }
15457 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
15458 {
15459 op0 = force_reg (mode0, op0);
15460 op1 = force_reg (mode1, op1);
15461 target = gen_reg_rtx (tmode);
15462 }
15463
15464 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
15465 if (! pat)
15466 return 0;
15467 emit_insn (pat);
15468 return target;
15469}
15470
15471/* Subroutine of ix86_expand_builtin to take care of stores. */
15472
15473static rtx
b96a374d 15474ix86_expand_store_builtin (enum insn_code icode, tree arglist)
bd793c65
BS
15475{
15476 rtx pat;
15477 tree arg0 = TREE_VALUE (arglist);
15478 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
84217346
MD
15479 rtx op0 = expand_normal (arg0);
15480 rtx op1 = expand_normal (arg1);
bd793c65
BS
15481 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
15482 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
15483
15484 if (VECTOR_MODE_P (mode1))
15485 op1 = safe_vector_operand (op1, mode1);
15486
15487 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
7f0e57bd 15488 op1 = copy_to_mode_reg (mode1, op1);
59bef189 15489
bd793c65
BS
15490 pat = GEN_FCN (icode) (op0, op1);
15491 if (pat)
15492 emit_insn (pat);
15493 return 0;
15494}
15495
15496/* Subroutine of ix86_expand_builtin to take care of unop insns. */
15497
15498static rtx
b96a374d
AJ
15499ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
15500 rtx target, int do_load)
bd793c65
BS
15501{
15502 rtx pat;
15503 tree arg0 = TREE_VALUE (arglist);
84217346 15504 rtx op0 = expand_normal (arg0);
bd793c65
BS
15505 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15506 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15507
e358acde 15508 if (optimize || !target
bd793c65
BS
15509 || GET_MODE (target) != tmode
15510 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15511 target = gen_reg_rtx (tmode);
15512 if (do_load)
15513 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15514 else
15515 {
15516 if (VECTOR_MODE_P (mode0))
15517 op0 = safe_vector_operand (op0, mode0);
15518
e358acde
RH
15519 if ((optimize && !register_operand (op0, mode0))
15520 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
bd793c65
BS
15521 op0 = copy_to_mode_reg (mode0, op0);
15522 }
15523
15524 pat = GEN_FCN (icode) (target, op0);
15525 if (! pat)
15526 return 0;
15527 emit_insn (pat);
15528 return target;
15529}
15530
15531/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
15532 sqrtss, rsqrtss, rcpss. */
15533
15534static rtx
b96a374d 15535ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
bd793c65
BS
15536{
15537 rtx pat;
15538 tree arg0 = TREE_VALUE (arglist);
84217346 15539 rtx op1, op0 = expand_normal (arg0);
bd793c65
BS
15540 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15541 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15542
e358acde 15543 if (optimize || !target
bd793c65
BS
15544 || GET_MODE (target) != tmode
15545 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15546 target = gen_reg_rtx (tmode);
15547
15548 if (VECTOR_MODE_P (mode0))
15549 op0 = safe_vector_operand (op0, mode0);
15550
e358acde
RH
15551 if ((optimize && !register_operand (op0, mode0))
15552 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
bd793c65 15553 op0 = copy_to_mode_reg (mode0, op0);
fce5a9f2 15554
59bef189
RH
15555 op1 = op0;
15556 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
15557 op1 = copy_to_mode_reg (mode0, op1);
fce5a9f2 15558
59bef189 15559 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
15560 if (! pat)
15561 return 0;
15562 emit_insn (pat);
15563 return target;
15564}
15565
15566/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
15567
15568static rtx
b96a374d
AJ
15569ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
15570 rtx target)
bd793c65
BS
15571{
15572 rtx pat;
15573 tree arg0 = TREE_VALUE (arglist);
15574 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
84217346
MD
15575 rtx op0 = expand_normal (arg0);
15576 rtx op1 = expand_normal (arg1);
bd793c65
BS
15577 rtx op2;
15578 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
15579 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
15580 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
15581 enum rtx_code comparison = d->comparison;
15582
15583 if (VECTOR_MODE_P (mode0))
15584 op0 = safe_vector_operand (op0, mode0);
15585 if (VECTOR_MODE_P (mode1))
15586 op1 = safe_vector_operand (op1, mode1);
15587
15588 /* Swap operands if we have a comparison that isn't available in
15589 hardware. */
e358acde 15590 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
bd793c65 15591 {
21e1b5f1
BS
15592 rtx tmp = gen_reg_rtx (mode1);
15593 emit_move_insn (tmp, op1);
bd793c65 15594 op1 = op0;
21e1b5f1 15595 op0 = tmp;
bd793c65 15596 }
21e1b5f1 15597
e358acde 15598 if (optimize || !target
21e1b5f1
BS
15599 || GET_MODE (target) != tmode
15600 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
bd793c65
BS
15601 target = gen_reg_rtx (tmode);
15602
e358acde
RH
15603 if ((optimize && !register_operand (op0, mode0))
15604 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
bd793c65 15605 op0 = copy_to_mode_reg (mode0, op0);
e358acde
RH
15606 if ((optimize && !register_operand (op1, mode1))
15607 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
bd793c65
BS
15608 op1 = copy_to_mode_reg (mode1, op1);
15609
15610 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
15611 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
15612 if (! pat)
15613 return 0;
15614 emit_insn (pat);
15615 return target;
15616}
15617
15618/* Subroutine of ix86_expand_builtin to take care of comi insns. */
15619
15620static rtx
b96a374d
AJ
15621ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
15622 rtx target)
bd793c65
BS
15623{
15624 rtx pat;
15625 tree arg0 = TREE_VALUE (arglist);
15626 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
84217346
MD
15627 rtx op0 = expand_normal (arg0);
15628 rtx op1 = expand_normal (arg1);
bd793c65
BS
15629 rtx op2;
15630 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
15631 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
15632 enum rtx_code comparison = d->comparison;
15633
15634 if (VECTOR_MODE_P (mode0))
15635 op0 = safe_vector_operand (op0, mode0);
15636 if (VECTOR_MODE_P (mode1))
15637 op1 = safe_vector_operand (op1, mode1);
15638
15639 /* Swap operands if we have a comparison that isn't available in
15640 hardware. */
e358acde 15641 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
bd793c65
BS
15642 {
15643 rtx tmp = op1;
15644 op1 = op0;
15645 op0 = tmp;
bd793c65
BS
15646 }
15647
15648 target = gen_reg_rtx (SImode);
15649 emit_move_insn (target, const0_rtx);
15650 target = gen_rtx_SUBREG (QImode, target, 0);
15651
e358acde
RH
15652 if ((optimize && !register_operand (op0, mode0))
15653 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
bd793c65 15654 op0 = copy_to_mode_reg (mode0, op0);
e358acde
RH
15655 if ((optimize && !register_operand (op1, mode1))
15656 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
bd793c65
BS
15657 op1 = copy_to_mode_reg (mode1, op1);
15658
15659 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
1194ca05 15660 pat = GEN_FCN (d->icode) (op0, op1);
bd793c65
BS
15661 if (! pat)
15662 return 0;
15663 emit_insn (pat);
29628f27
BS
15664 emit_insn (gen_rtx_SET (VOIDmode,
15665 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
15666 gen_rtx_fmt_ee (comparison, QImode,
1194ca05 15667 SET_DEST (pat),
29628f27 15668 const0_rtx)));
bd793c65 15669
6f1a6c5b 15670 return SUBREG_REG (target);
bd793c65
BS
15671}
15672
eb701deb
RH
15673/* Return the integer constant in ARG. Constrain it to be in the range
15674 of the subparts of VEC_TYPE; issue an error if not. */
15675
15676static int
15677get_element_number (tree vec_type, tree arg)
15678{
15679 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
15680
15681 if (!host_integerp (arg, 1)
15682 || (elt = tree_low_cst (arg, 1), elt > max))
15683 {
ea40ba9c 15684 error ("selector must be an integer constant in the range 0..%wi", max);
eb701deb
RH
15685 return 0;
15686 }
15687
15688 return elt;
15689}
15690
15691/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15692 ix86_expand_vector_init. We DO have language-level syntax for this, in
15693 the form of (type){ init-list }. Except that since we can't place emms
15694 instructions from inside the compiler, we can't allow the use of MMX
15695 registers unless the user explicitly asks for it. So we do *not* define
15696 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
15697 we have builtins invoked by mmintrin.h that gives us license to emit
15698 these sorts of instructions. */
15699
15700static rtx
15701ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
15702{
15703 enum machine_mode tmode = TYPE_MODE (type);
15704 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
15705 int i, n_elt = GET_MODE_NUNITS (tmode);
15706 rtvec v = rtvec_alloc (n_elt);
15707
15708 gcc_assert (VECTOR_MODE_P (tmode));
15709
15710 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
15711 {
84217346 15712 rtx x = expand_normal (TREE_VALUE (arglist));
eb701deb
RH
15713 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
15714 }
15715
15716 gcc_assert (arglist == NULL);
15717
15718 if (!target || !register_operand (target, tmode))
15719 target = gen_reg_rtx (tmode);
15720
15721 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
15722 return target;
15723}
15724
15725/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15726 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
15727 had a language-level syntax for referencing vector elements. */
15728
15729static rtx
15730ix86_expand_vec_ext_builtin (tree arglist, rtx target)
15731{
15732 enum machine_mode tmode, mode0;
15733 tree arg0, arg1;
15734 int elt;
15735 rtx op0;
15736
15737 arg0 = TREE_VALUE (arglist);
15738 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15739
84217346 15740 op0 = expand_normal (arg0);
eb701deb
RH
15741 elt = get_element_number (TREE_TYPE (arg0), arg1);
15742
15743 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15744 mode0 = TYPE_MODE (TREE_TYPE (arg0));
15745 gcc_assert (VECTOR_MODE_P (mode0));
15746
15747 op0 = force_reg (mode0, op0);
15748
15749 if (optimize || !target || !register_operand (target, tmode))
15750 target = gen_reg_rtx (tmode);
15751
15752 ix86_expand_vector_extract (true, target, op0, elt);
15753
15754 return target;
15755}
15756
15757/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15758 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
15759 a language-level syntax for referencing vector elements. */
15760
15761static rtx
15762ix86_expand_vec_set_builtin (tree arglist)
15763{
15764 enum machine_mode tmode, mode1;
15765 tree arg0, arg1, arg2;
15766 int elt;
15767 rtx op0, op1;
15768
15769 arg0 = TREE_VALUE (arglist);
15770 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15771 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15772
15773 tmode = TYPE_MODE (TREE_TYPE (arg0));
15774 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15775 gcc_assert (VECTOR_MODE_P (tmode));
15776
15777 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
15778 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
15779 elt = get_element_number (TREE_TYPE (arg0), arg2);
15780
15781 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
15782 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
15783
15784 op0 = force_reg (tmode, op0);
15785 op1 = force_reg (mode1, op1);
15786
15787 ix86_expand_vector_set (true, op0, op1, elt);
15788
15789 return op0;
15790}
15791
bd793c65
BS
15792/* Expand an expression EXP that calls a built-in function,
15793 with result going to TARGET if that's convenient
15794 (and in mode MODE if that's convenient).
15795 SUBTARGET may be used as the target for computing one of EXP's operands.
15796 IGNORE is nonzero if the value is to be ignored. */
15797
eb701deb 15798static rtx
b96a374d
AJ
15799ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
15800 enum machine_mode mode ATTRIBUTE_UNUSED,
15801 int ignore ATTRIBUTE_UNUSED)
bd793c65 15802{
8b60264b 15803 const struct builtin_description *d;
77ebd435 15804 size_t i;
bd793c65
BS
15805 enum insn_code icode;
15806 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
15807 tree arglist = TREE_OPERAND (exp, 1);
e37af218 15808 tree arg0, arg1, arg2;
bd793c65
BS
15809 rtx op0, op1, op2, pat;
15810 enum machine_mode tmode, mode0, mode1, mode2;
8752c357 15811 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
bd793c65
BS
15812
15813 switch (fcode)
15814 {
15815 case IX86_BUILTIN_EMMS:
80e8bb90 15816 emit_insn (gen_mmx_emms ());
bd793c65
BS
15817 return 0;
15818
15819 case IX86_BUILTIN_SFENCE:
80e8bb90 15820 emit_insn (gen_sse_sfence ());
bd793c65
BS
15821 return 0;
15822
bd793c65 15823 case IX86_BUILTIN_MASKMOVQ:
077084dd 15824 case IX86_BUILTIN_MASKMOVDQU:
fbe5eb6d 15825 icode = (fcode == IX86_BUILTIN_MASKMOVQ
80e8bb90 15826 ? CODE_FOR_mmx_maskmovq
ef719a44 15827 : CODE_FOR_sse2_maskmovdqu);
bd793c65
BS
15828 /* Note the arg order is different from the operand order. */
15829 arg1 = TREE_VALUE (arglist);
15830 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
15831 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
84217346
MD
15832 op0 = expand_normal (arg0);
15833 op1 = expand_normal (arg1);
15834 op2 = expand_normal (arg2);
bd793c65
BS
15835 mode0 = insn_data[icode].operand[0].mode;
15836 mode1 = insn_data[icode].operand[1].mode;
15837 mode2 = insn_data[icode].operand[2].mode;
15838
80e8bb90
RH
15839 op0 = force_reg (Pmode, op0);
15840 op0 = gen_rtx_MEM (mode1, op0);
ef719a44 15841
5c464583 15842 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
bd793c65
BS
15843 op0 = copy_to_mode_reg (mode0, op0);
15844 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
15845 op1 = copy_to_mode_reg (mode1, op1);
15846 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
15847 op2 = copy_to_mode_reg (mode2, op2);
15848 pat = GEN_FCN (icode) (op0, op1, op2);
15849 if (! pat)
15850 return 0;
15851 emit_insn (pat);
15852 return 0;
15853
15854 case IX86_BUILTIN_SQRTSS:
ef719a44 15855 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
bd793c65 15856 case IX86_BUILTIN_RSQRTSS:
ef719a44 15857 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
bd793c65 15858 case IX86_BUILTIN_RCPSS:
ef719a44 15859 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
bd793c65 15860
bd793c65
BS
15861 case IX86_BUILTIN_LOADUPS:
15862 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
15863
bd793c65 15864 case IX86_BUILTIN_STOREUPS:
e37af218 15865 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
bd793c65 15866
0f290768 15867 case IX86_BUILTIN_LOADHPS:
bd793c65 15868 case IX86_BUILTIN_LOADLPS:
fbe5eb6d
BS
15869 case IX86_BUILTIN_LOADHPD:
15870 case IX86_BUILTIN_LOADLPD:
2cdb3148
RH
15871 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
15872 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
1c47af84
RH
15873 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
15874 : CODE_FOR_sse2_loadlpd);
bd793c65
BS
15875 arg0 = TREE_VALUE (arglist);
15876 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
84217346
MD
15877 op0 = expand_normal (arg0);
15878 op1 = expand_normal (arg1);
bd793c65
BS
15879 tmode = insn_data[icode].operand[0].mode;
15880 mode0 = insn_data[icode].operand[1].mode;
15881 mode1 = insn_data[icode].operand[2].mode;
15882
e358acde 15883 op0 = force_reg (mode0, op0);
bd793c65 15884 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
e358acde 15885 if (optimize || target == 0
bd793c65 15886 || GET_MODE (target) != tmode
e358acde 15887 || !register_operand (target, tmode))
bd793c65
BS
15888 target = gen_reg_rtx (tmode);
15889 pat = GEN_FCN (icode) (target, op0, op1);
15890 if (! pat)
15891 return 0;
15892 emit_insn (pat);
15893 return target;
0f290768 15894
bd793c65
BS
15895 case IX86_BUILTIN_STOREHPS:
15896 case IX86_BUILTIN_STORELPS:
2cdb3148 15897 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
eb701deb 15898 : CODE_FOR_sse_storelps);
1c47af84
RH
15899 arg0 = TREE_VALUE (arglist);
15900 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
84217346
MD
15901 op0 = expand_normal (arg0);
15902 op1 = expand_normal (arg1);
1c47af84
RH
15903 mode0 = insn_data[icode].operand[0].mode;
15904 mode1 = insn_data[icode].operand[1].mode;
15905
15906 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
e358acde 15907 op1 = force_reg (mode1, op1);
1c47af84
RH
15908
15909 pat = GEN_FCN (icode) (op0, op1);
15910 if (! pat)
15911 return 0;
15912 emit_insn (pat);
15913 return const0_rtx;
bd793c65
BS
15914
15915 case IX86_BUILTIN_MOVNTPS:
e37af218 15916 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
bd793c65 15917 case IX86_BUILTIN_MOVNTQ:
e37af218 15918 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
bd793c65
BS
15919
15920 case IX86_BUILTIN_LDMXCSR:
84217346 15921 op0 = expand_normal (TREE_VALUE (arglist));
ff680eb1 15922 target = assign_386_stack_local (SImode, SLOT_TEMP);
bd793c65 15923 emit_move_insn (target, op0);
80e8bb90 15924 emit_insn (gen_sse_ldmxcsr (target));
bd793c65
BS
15925 return 0;
15926
15927 case IX86_BUILTIN_STMXCSR:
ff680eb1 15928 target = assign_386_stack_local (SImode, SLOT_TEMP);
80e8bb90 15929 emit_insn (gen_sse_stmxcsr (target));
bd793c65
BS
15930 return copy_to_mode_reg (SImode, target);
15931
bd793c65 15932 case IX86_BUILTIN_SHUFPS:
fbe5eb6d
BS
15933 case IX86_BUILTIN_SHUFPD:
15934 icode = (fcode == IX86_BUILTIN_SHUFPS
15935 ? CODE_FOR_sse_shufps
15936 : CODE_FOR_sse2_shufpd);
bd793c65
BS
15937 arg0 = TREE_VALUE (arglist);
15938 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15939 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
84217346
MD
15940 op0 = expand_normal (arg0);
15941 op1 = expand_normal (arg1);
15942 op2 = expand_normal (arg2);
bd793c65
BS
15943 tmode = insn_data[icode].operand[0].mode;
15944 mode0 = insn_data[icode].operand[1].mode;
15945 mode1 = insn_data[icode].operand[2].mode;
15946 mode2 = insn_data[icode].operand[3].mode;
15947
15948 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15949 op0 = copy_to_mode_reg (mode0, op0);
e358acde
RH
15950 if ((optimize && !register_operand (op1, mode1))
15951 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
bd793c65
BS
15952 op1 = copy_to_mode_reg (mode1, op1);
15953 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
15954 {
15955 /* @@@ better error message */
15956 error ("mask must be an immediate");
6f1a6c5b 15957 return gen_reg_rtx (tmode);
bd793c65 15958 }
e358acde 15959 if (optimize || target == 0
bd793c65
BS
15960 || GET_MODE (target) != tmode
15961 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15962 target = gen_reg_rtx (tmode);
15963 pat = GEN_FCN (icode) (target, op0, op1, op2);
15964 if (! pat)
15965 return 0;
15966 emit_insn (pat);
15967 return target;
15968
15969 case IX86_BUILTIN_PSHUFW:
fbe5eb6d
BS
15970 case IX86_BUILTIN_PSHUFD:
15971 case IX86_BUILTIN_PSHUFHW:
15972 case IX86_BUILTIN_PSHUFLW:
15973 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
15974 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
15975 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
15976 : CODE_FOR_mmx_pshufw);
bd793c65
BS
15977 arg0 = TREE_VALUE (arglist);
15978 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
84217346
MD
15979 op0 = expand_normal (arg0);
15980 op1 = expand_normal (arg1);
bd793c65 15981 tmode = insn_data[icode].operand[0].mode;
29628f27
BS
15982 mode1 = insn_data[icode].operand[1].mode;
15983 mode2 = insn_data[icode].operand[2].mode;
bd793c65 15984
29628f27
BS
15985 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
15986 op0 = copy_to_mode_reg (mode1, op0);
15987 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
bd793c65
BS
15988 {
15989 /* @@@ better error message */
15990 error ("mask must be an immediate");
15991 return const0_rtx;
15992 }
15993 if (target == 0
15994 || GET_MODE (target) != tmode
15995 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15996 target = gen_reg_rtx (tmode);
29628f27 15997 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
15998 if (! pat)
15999 return 0;
16000 emit_insn (pat);
16001 return target;
16002
ab3146fd
ZD
16003 case IX86_BUILTIN_PSLLDQI128:
16004 case IX86_BUILTIN_PSRLDQI128:
16005 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
16006 : CODE_FOR_sse2_lshrti3);
16007 arg0 = TREE_VALUE (arglist);
16008 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
84217346
MD
16009 op0 = expand_normal (arg0);
16010 op1 = expand_normal (arg1);
ab3146fd
ZD
16011 tmode = insn_data[icode].operand[0].mode;
16012 mode1 = insn_data[icode].operand[1].mode;
16013 mode2 = insn_data[icode].operand[2].mode;
16014
16015 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16016 {
16017 op0 = copy_to_reg (op0);
16018 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
16019 }
16020 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16021 {
16022 error ("shift must be an immediate");
16023 return const0_rtx;
16024 }
16025 target = gen_reg_rtx (V2DImode);
16026 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
16027 if (! pat)
16028 return 0;
16029 emit_insn (pat);
16030 return target;
16031
47f339cf 16032 case IX86_BUILTIN_FEMMS:
80e8bb90 16033 emit_insn (gen_mmx_femms ());
47f339cf
BS
16034 return NULL_RTX;
16035
16036 case IX86_BUILTIN_PAVGUSB:
80e8bb90 16037 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
47f339cf
BS
16038
16039 case IX86_BUILTIN_PF2ID:
80e8bb90 16040 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
47f339cf
BS
16041
16042 case IX86_BUILTIN_PFACC:
80e8bb90 16043 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
47f339cf
BS
16044
16045 case IX86_BUILTIN_PFADD:
80e8bb90 16046 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
47f339cf
BS
16047
16048 case IX86_BUILTIN_PFCMPEQ:
80e8bb90 16049 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
47f339cf
BS
16050
16051 case IX86_BUILTIN_PFCMPGE:
80e8bb90 16052 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
47f339cf
BS
16053
16054 case IX86_BUILTIN_PFCMPGT:
80e8bb90 16055 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
47f339cf
BS
16056
16057 case IX86_BUILTIN_PFMAX:
80e8bb90 16058 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
47f339cf
BS
16059
16060 case IX86_BUILTIN_PFMIN:
80e8bb90 16061 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
47f339cf
BS
16062
16063 case IX86_BUILTIN_PFMUL:
80e8bb90 16064 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
47f339cf
BS
16065
16066 case IX86_BUILTIN_PFRCP:
80e8bb90 16067 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
47f339cf
BS
16068
16069 case IX86_BUILTIN_PFRCPIT1:
80e8bb90 16070 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
47f339cf
BS
16071
16072 case IX86_BUILTIN_PFRCPIT2:
80e8bb90 16073 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
47f339cf
BS
16074
16075 case IX86_BUILTIN_PFRSQIT1:
80e8bb90 16076 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
47f339cf
BS
16077
16078 case IX86_BUILTIN_PFRSQRT:
80e8bb90 16079 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
47f339cf
BS
16080
16081 case IX86_BUILTIN_PFSUB:
80e8bb90 16082 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
47f339cf
BS
16083
16084 case IX86_BUILTIN_PFSUBR:
80e8bb90 16085 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
47f339cf
BS
16086
16087 case IX86_BUILTIN_PI2FD:
80e8bb90 16088 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
47f339cf
BS
16089
16090 case IX86_BUILTIN_PMULHRW:
80e8bb90 16091 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
47f339cf 16092
47f339cf 16093 case IX86_BUILTIN_PF2IW:
80e8bb90 16094 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
47f339cf
BS
16095
16096 case IX86_BUILTIN_PFNACC:
80e8bb90 16097 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
47f339cf
BS
16098
16099 case IX86_BUILTIN_PFPNACC:
80e8bb90 16100 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
47f339cf
BS
16101
16102 case IX86_BUILTIN_PI2FW:
80e8bb90 16103 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
47f339cf
BS
16104
16105 case IX86_BUILTIN_PSWAPDSI:
80e8bb90 16106 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
47f339cf
BS
16107
16108 case IX86_BUILTIN_PSWAPDSF:
80e8bb90 16109 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
47f339cf 16110
fbe5eb6d 16111 case IX86_BUILTIN_SQRTSD:
ef719a44 16112 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
fbe5eb6d
BS
16113 case IX86_BUILTIN_LOADUPD:
16114 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
fbe5eb6d
BS
16115 case IX86_BUILTIN_STOREUPD:
16116 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
16117
fbe5eb6d
BS
16118 case IX86_BUILTIN_MFENCE:
16119 emit_insn (gen_sse2_mfence ());
16120 return 0;
16121 case IX86_BUILTIN_LFENCE:
16122 emit_insn (gen_sse2_lfence ());
16123 return 0;
16124
16125 case IX86_BUILTIN_CLFLUSH:
16126 arg0 = TREE_VALUE (arglist);
84217346 16127 op0 = expand_normal (arg0);
fbe5eb6d 16128 icode = CODE_FOR_sse2_clflush;
1194ca05
JH
16129 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
16130 op0 = copy_to_mode_reg (Pmode, op0);
fbe5eb6d
BS
16131
16132 emit_insn (gen_sse2_clflush (op0));
16133 return 0;
16134
16135 case IX86_BUILTIN_MOVNTPD:
16136 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
16137 case IX86_BUILTIN_MOVNTDQ:
916b60b7 16138 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
fbe5eb6d
BS
16139 case IX86_BUILTIN_MOVNTI:
16140 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
16141
f02e1358
JH
16142 case IX86_BUILTIN_LOADDQU:
16143 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
f02e1358
JH
16144 case IX86_BUILTIN_STOREDQU:
16145 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
f02e1358 16146
22c7c85e
L
16147 case IX86_BUILTIN_MONITOR:
16148 arg0 = TREE_VALUE (arglist);
16149 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16150 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
84217346
MD
16151 op0 = expand_normal (arg0);
16152 op1 = expand_normal (arg1);
16153 op2 = expand_normal (arg2);
22c7c85e
L
16154 if (!REG_P (op0))
16155 op0 = copy_to_mode_reg (SImode, op0);
16156 if (!REG_P (op1))
16157 op1 = copy_to_mode_reg (SImode, op1);
16158 if (!REG_P (op2))
16159 op2 = copy_to_mode_reg (SImode, op2);
ef719a44 16160 emit_insn (gen_sse3_monitor (op0, op1, op2));
22c7c85e
L
16161 return 0;
16162
16163 case IX86_BUILTIN_MWAIT:
16164 arg0 = TREE_VALUE (arglist);
16165 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
84217346
MD
16166 op0 = expand_normal (arg0);
16167 op1 = expand_normal (arg1);
22c7c85e
L
16168 if (!REG_P (op0))
16169 op0 = copy_to_mode_reg (SImode, op0);
16170 if (!REG_P (op1))
16171 op1 = copy_to_mode_reg (SImode, op1);
ef719a44 16172 emit_insn (gen_sse3_mwait (op0, op1));
22c7c85e
L
16173 return 0;
16174
22c7c85e 16175 case IX86_BUILTIN_LDDQU:
eb701deb
RH
16176 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
16177 target, 1);
16178
16179 case IX86_BUILTIN_VEC_INIT_V2SI:
16180 case IX86_BUILTIN_VEC_INIT_V4HI:
16181 case IX86_BUILTIN_VEC_INIT_V8QI:
16182 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
16183
16184 case IX86_BUILTIN_VEC_EXT_V2DF:
16185 case IX86_BUILTIN_VEC_EXT_V2DI:
16186 case IX86_BUILTIN_VEC_EXT_V4SF:
ed9b5396 16187 case IX86_BUILTIN_VEC_EXT_V4SI:
eb701deb 16188 case IX86_BUILTIN_VEC_EXT_V8HI:
0f2698d0 16189 case IX86_BUILTIN_VEC_EXT_V2SI:
eb701deb
RH
16190 case IX86_BUILTIN_VEC_EXT_V4HI:
16191 return ix86_expand_vec_ext_builtin (arglist, target);
16192
16193 case IX86_BUILTIN_VEC_SET_V8HI:
16194 case IX86_BUILTIN_VEC_SET_V4HI:
16195 return ix86_expand_vec_set_builtin (arglist);
22c7c85e 16196
bd793c65
BS
16197 default:
16198 break;
16199 }
16200
ca7558fc 16201 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
16202 if (d->code == fcode)
16203 {
16204 /* Compares are treated specially. */
ef719a44
RH
16205 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
16206 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
16207 || d->icode == CODE_FOR_sse2_maskcmpv2df3
16208 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
bd793c65
BS
16209 return ix86_expand_sse_compare (d, arglist, target);
16210
16211 return ix86_expand_binop_builtin (d->icode, arglist, target);
16212 }
16213
ca7558fc 16214 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
bd793c65
BS
16215 if (d->code == fcode)
16216 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
0f290768 16217
ca7558fc 16218 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
bd793c65
BS
16219 if (d->code == fcode)
16220 return ix86_expand_sse_comi (d, arglist, target);
0f290768 16221
ed9b5396 16222 gcc_unreachable ();
bd793c65 16223}
4211a8fb 16224
ee963181
RG
16225/* Expand an expression EXP that calls a built-in library function,
16226 with result going to TARGET if that's convenient
16227 (and in mode MODE if that's convenient).
16228 SUBTARGET may be used as the target for computing one of EXP's operands.
16229 IGNORE is nonzero if the value is to be ignored. */
16230
16231static rtx
16232ix86_expand_library_builtin (tree exp, rtx target,
16233 rtx subtarget ATTRIBUTE_UNUSED,
16234 enum machine_mode mode ATTRIBUTE_UNUSED,
16235 int ignore)
16236{
16237 enum built_in_function fncode;
16238 tree fndecl, newfn, call;
16239
16240 /* Try expanding builtin math functions to the SSE2 ABI variants. */
16241 if (!TARGET_SSELIBM)
16242 return NULL_RTX;
16243
16244 fncode = builtin_mathfn_code (exp);
16245 if (!ix86_builtin_function_variants [(int)fncode])
16246 return NULL_RTX;
16247
16248 fndecl = get_callee_fndecl (exp);
16249 if (DECL_RTL_SET_P (fndecl))
16250 return NULL_RTX;
16251
16252 /* Build the redirected call and expand it. */
16253 newfn = ix86_builtin_function_variants [(int)fncode];
16254 call = build_function_call_expr (newfn, TREE_OPERAND (exp, 1));
16255 return expand_call (call, target, ignore);
16256}
16257
4211a8fb 16258/* Store OPERAND to the memory after reload is completed. This means
f710504c 16259 that we can't easily use assign_stack_local. */
4211a8fb 16260rtx
b96a374d 16261ix86_force_to_memory (enum machine_mode mode, rtx operand)
4211a8fb 16262{
898d374d 16263 rtx result;
d0396b79
NS
16264
16265 gcc_assert (reload_completed);
a5b378d6 16266 if (TARGET_RED_ZONE)
898d374d
JH
16267 {
16268 result = gen_rtx_MEM (mode,
16269 gen_rtx_PLUS (Pmode,
16270 stack_pointer_rtx,
16271 GEN_INT (-RED_ZONE_SIZE)));
16272 emit_move_insn (result, operand);
16273 }
a5b378d6 16274 else if (!TARGET_RED_ZONE && TARGET_64BIT)
4211a8fb 16275 {
898d374d 16276 switch (mode)
4211a8fb 16277 {
898d374d
JH
16278 case HImode:
16279 case SImode:
16280 operand = gen_lowpart (DImode, operand);
5efb1046 16281 /* FALLTHRU */
898d374d 16282 case DImode:
4211a8fb 16283 emit_insn (
898d374d
JH
16284 gen_rtx_SET (VOIDmode,
16285 gen_rtx_MEM (DImode,
16286 gen_rtx_PRE_DEC (DImode,
16287 stack_pointer_rtx)),
16288 operand));
16289 break;
16290 default:
d0396b79 16291 gcc_unreachable ();
898d374d
JH
16292 }
16293 result = gen_rtx_MEM (mode, stack_pointer_rtx);
16294 }
16295 else
16296 {
16297 switch (mode)
16298 {
16299 case DImode:
16300 {
16301 rtx operands[2];
16302 split_di (&operand, 1, operands, operands + 1);
16303 emit_insn (
16304 gen_rtx_SET (VOIDmode,
16305 gen_rtx_MEM (SImode,
16306 gen_rtx_PRE_DEC (Pmode,
16307 stack_pointer_rtx)),
16308 operands[1]));
16309 emit_insn (
16310 gen_rtx_SET (VOIDmode,
16311 gen_rtx_MEM (SImode,
16312 gen_rtx_PRE_DEC (Pmode,
16313 stack_pointer_rtx)),
16314 operands[0]));
16315 }
16316 break;
16317 case HImode:
69642eae
JJ
16318 /* Store HImodes as SImodes. */
16319 operand = gen_lowpart (SImode, operand);
5efb1046 16320 /* FALLTHRU */
898d374d 16321 case SImode:
4211a8fb 16322 emit_insn (
898d374d
JH
16323 gen_rtx_SET (VOIDmode,
16324 gen_rtx_MEM (GET_MODE (operand),
16325 gen_rtx_PRE_DEC (SImode,
16326 stack_pointer_rtx)),
16327 operand));
16328 break;
16329 default:
d0396b79 16330 gcc_unreachable ();
4211a8fb 16331 }
898d374d 16332 result = gen_rtx_MEM (mode, stack_pointer_rtx);
4211a8fb 16333 }
898d374d 16334 return result;
4211a8fb
JH
16335}
16336
16337/* Free operand from the memory. */
16338void
b96a374d 16339ix86_free_from_memory (enum machine_mode mode)
4211a8fb 16340{
a5b378d6 16341 if (!TARGET_RED_ZONE)
898d374d
JH
16342 {
16343 int size;
16344
16345 if (mode == DImode || TARGET_64BIT)
16346 size = 8;
898d374d
JH
16347 else
16348 size = 4;
16349 /* Use LEA to deallocate stack space. In peephole2 it will be converted
16350 to pop or add instruction if registers are available. */
16351 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16352 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
16353 GEN_INT (size))));
16354 }
4211a8fb 16355}
a946dd00 16356
f84aa48a
JH
16357/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
16358 QImode must go into class Q_REGS.
16359 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
892a2d68 16360 movdf to do mem-to-mem moves through integer regs. */
f84aa48a 16361enum reg_class
b96a374d 16362ix86_preferred_reload_class (rtx x, enum reg_class class)
f84aa48a 16363{
b5c82fa1
PB
16364 enum machine_mode mode = GET_MODE (x);
16365
51df7179
RH
16366 /* We're only allowed to return a subclass of CLASS. Many of the
16367 following checks fail for NO_REGS, so eliminate that early. */
f75959a6
RH
16368 if (class == NO_REGS)
16369 return NO_REGS;
51df7179
RH
16370
16371 /* All classes can load zeros. */
b5c82fa1 16372 if (x == CONST0_RTX (mode))
51df7179
RH
16373 return class;
16374
b5c82fa1
PB
16375 /* Force constants into memory if we are loading a (non-zero) constant into
16376 an MMX or SSE register. This is because there are no MMX/SSE instructions
16377 to load from a constant. */
16378 if (CONSTANT_P (x)
16379 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
16380 return NO_REGS;
16381
16382 /* Prefer SSE regs only, if we can use them for math. */
16383 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
16384 return SSE_CLASS_P (class) ? class : NO_REGS;
16385
51df7179 16386 /* Floating-point constants need more complex checks. */
f84aa48a
JH
16387 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
16388 {
f84aa48a
JH
16389 /* General regs can load everything. */
16390 if (reg_class_subset_p (class, GENERAL_REGS))
51df7179
RH
16391 return class;
16392
16393 /* Floats can load 0 and 1 plus some others. Note that we eliminated
16394 zero above. We only want to wind up preferring 80387 registers if
16395 we plan on doing computation with them. */
16396 if (TARGET_80387
51df7179
RH
16397 && standard_80387_constant_p (x))
16398 {
16399 /* Limit class to non-sse. */
16400 if (class == FLOAT_SSE_REGS)
16401 return FLOAT_REGS;
16402 if (class == FP_TOP_SSE_REGS)
16403 return FP_TOP_REG;
16404 if (class == FP_SECOND_SSE_REGS)
16405 return FP_SECOND_REG;
16406 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
16407 return class;
16408 }
16409
16410 return NO_REGS;
f84aa48a 16411 }
51df7179
RH
16412
16413 /* Generally when we see PLUS here, it's the function invariant
16414 (plus soft-fp const_int). Which can only be computed into general
16415 regs. */
16416 if (GET_CODE (x) == PLUS)
16417 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
16418
16419 /* QImode constants are easy to load, but non-constant QImode data
16420 must go into Q_REGS. */
16421 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
16422 {
16423 if (reg_class_subset_p (class, Q_REGS))
16424 return class;
16425 if (reg_class_subset_p (Q_REGS, class))
16426 return Q_REGS;
16427 return NO_REGS;
16428 }
16429
f84aa48a
JH
16430 return class;
16431}
16432
b5c82fa1
PB
16433/* Discourage putting floating-point values in SSE registers unless
16434 SSE math is being used, and likewise for the 387 registers. */
16435enum reg_class
16436ix86_preferred_output_reload_class (rtx x, enum reg_class class)
16437{
16438 enum machine_mode mode = GET_MODE (x);
16439
16440 /* Restrict the output reload class to the register bank that we are doing
16441 math on. If we would like not to return a subset of CLASS, reject this
16442 alternative: if reload cannot do this, it will still use its choice. */
16443 mode = GET_MODE (x);
16444 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
16445 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
16446
16447 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
16448 {
16449 if (class == FP_TOP_SSE_REGS)
16450 return FP_TOP_REG;
16451 else if (class == FP_SECOND_SSE_REGS)
16452 return FP_SECOND_REG;
16453 else
16454 return FLOAT_CLASS_P (class) ? class : NO_REGS;
16455 }
16456
16457 return class;
16458}
16459
f84aa48a
JH
16460/* If we are copying between general and FP registers, we need a memory
16461 location. The same is true for SSE and MMX registers.
16462
16463 The macro can't work reliably when one of the CLASSES is class containing
16464 registers from multiple units (SSE, MMX, integer). We avoid this by never
16465 combining those units in single alternative in the machine description.
16466 Ensure that this constraint holds to avoid unexpected surprises.
16467
16468 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
16469 enforce these sanity checks. */
f75959a6 16470
f84aa48a 16471int
b96a374d
AJ
16472ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
16473 enum machine_mode mode, int strict)
f84aa48a
JH
16474{
16475 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
16476 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
16477 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
16478 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
16479 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
16480 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
16481 {
d0396b79 16482 gcc_assert (!strict);
f75959a6 16483 return true;
f84aa48a 16484 }
f75959a6
RH
16485
16486 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
16487 return true;
16488
16489 /* ??? This is a lie. We do have moves between mmx/general, and for
16490 mmx/sse2. But by saying we need secondary memory we discourage the
16491 register allocator from using the mmx registers unless needed. */
16492 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
16493 return true;
16494
16495 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
16496 {
16497 /* SSE1 doesn't have any direct moves from other classes. */
16498 if (!TARGET_SSE2)
16499 return true;
16500
16501 /* If the target says that inter-unit moves are more expensive
16502 than moving through memory, then don't generate them. */
16503 if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
16504 return true;
16505
16506 /* Between SSE and general, we have moves no larger than word size. */
16507 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
16508 return true;
16509
16510 /* ??? For the cost of one register reformat penalty, we could use
16511 the same instructions to move SFmode and DFmode data, but the
16512 relevant move patterns don't support those alternatives. */
16513 if (mode == SFmode || mode == DFmode)
16514 return true;
16515 }
16516
16517 return false;
f84aa48a 16518}
f75959a6 16519
1272914c
RH
16520/* Return true if the registers in CLASS cannot represent the change from
16521 modes FROM to TO. */
16522
16523bool
16524ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
16525 enum reg_class class)
16526{
16527 if (from == to)
16528 return false;
16529
0fa2e4df 16530 /* x87 registers can't do subreg at all, as all values are reformatted
1272914c
RH
16531 to extended precision. */
16532 if (MAYBE_FLOAT_CLASS_P (class))
16533 return true;
16534
16535 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
16536 {
16537 /* Vector registers do not support QI or HImode loads. If we don't
16538 disallow a change to these modes, reload will assume it's ok to
16539 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
16540 the vec_dupv4hi pattern. */
16541 if (GET_MODE_SIZE (from) < 4)
16542 return true;
16543
16544 /* Vector registers do not support subreg with nonzero offsets, which
16545 are otherwise valid for integer registers. Since we can't see
16546 whether we have a nonzero offset from here, prohibit all
16547 nonparadoxical subregs changing size. */
16548 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
16549 return true;
16550 }
16551
16552 return false;
16553}
16554
f84aa48a 16555/* Return the cost of moving data from a register in class CLASS1 to
a4f31c00 16556 one in class CLASS2.
f84aa48a
JH
16557
16558 It is not required that the cost always equal 2 when FROM is the same as TO;
16559 on some machines it is expensive to move between registers if they are not
16560 general registers. */
f75959a6 16561
f84aa48a 16562int
b96a374d
AJ
16563ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
16564 enum reg_class class2)
f84aa48a
JH
16565{
16566 /* In case we require secondary memory, compute cost of the store followed
b96a374d 16567 by load. In order to avoid bad register allocation choices, we need
d631b80a
RH
16568 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
16569
f84aa48a
JH
16570 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
16571 {
d631b80a
RH
16572 int cost = 1;
16573
16574 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
16575 MEMORY_MOVE_COST (mode, class1, 1));
16576 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
16577 MEMORY_MOVE_COST (mode, class2, 1));
b96a374d 16578
d631b80a
RH
16579 /* In case of copying from general_purpose_register we may emit multiple
16580 stores followed by single load causing memory size mismatch stall.
d1f87653 16581 Count this as arbitrarily high cost of 20. */
62415523 16582 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
d631b80a
RH
16583 cost += 20;
16584
16585 /* In the case of FP/MMX moves, the registers actually overlap, and we
16586 have to switch modes in order to treat them differently. */
16587 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
16588 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
16589 cost += 20;
16590
16591 return cost;
f84aa48a 16592 }
d631b80a 16593
92d0fb09 16594 /* Moves between SSE/MMX and integer unit are expensive. */
62415523
JH
16595 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
16596 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
fa79946e
JH
16597 return ix86_cost->mmxsse_to_integer;
16598 if (MAYBE_FLOAT_CLASS_P (class1))
16599 return ix86_cost->fp_move;
16600 if (MAYBE_SSE_CLASS_P (class1))
16601 return ix86_cost->sse_move;
16602 if (MAYBE_MMX_CLASS_P (class1))
16603 return ix86_cost->mmx_move;
f84aa48a
JH
16604 return 2;
16605}
16606
a946dd00 16607/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
c1c5b5e3
RH
16608
16609bool
b96a374d 16610ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
a946dd00
JH
16611{
16612 /* Flags and only flags can only hold CCmode values. */
16613 if (CC_REGNO_P (regno))
16614 return GET_MODE_CLASS (mode) == MODE_CC;
16615 if (GET_MODE_CLASS (mode) == MODE_CC
16616 || GET_MODE_CLASS (mode) == MODE_RANDOM
16617 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
16618 return 0;
16619 if (FP_REGNO_P (regno))
16620 return VALID_FP_MODE_P (mode);
16621 if (SSE_REGNO_P (regno))
dcbca208 16622 {
6c4ccfd8
RH
16623 /* We implement the move patterns for all vector modes into and
16624 out of SSE registers, even when no operation instructions
16625 are available. */
16626 return (VALID_SSE_REG_MODE (mode)
16627 || VALID_SSE2_REG_MODE (mode)
16628 || VALID_MMX_REG_MODE (mode)
16629 || VALID_MMX_REG_MODE_3DNOW (mode));
dcbca208 16630 }
a946dd00 16631 if (MMX_REGNO_P (regno))
dcbca208 16632 {
6c4ccfd8
RH
16633 /* We implement the move patterns for 3DNOW modes even in MMX mode,
16634 so if the register is available at all, then we can move data of
16635 the given mode into or out of it. */
16636 return (VALID_MMX_REG_MODE (mode)
16637 || VALID_MMX_REG_MODE_3DNOW (mode));
dcbca208 16638 }
b4e82619
RH
16639
16640 if (mode == QImode)
16641 {
16642 /* Take care for QImode values - they can be in non-QI regs,
16643 but then they do cause partial register stalls. */
16644 if (regno < 4 || TARGET_64BIT)
16645 return 1;
16646 if (!TARGET_PARTIAL_REG_STALL)
16647 return 1;
16648 return reload_in_progress || reload_completed;
16649 }
16650 /* We handle both integer and floats in the general purpose registers. */
16651 else if (VALID_INT_MODE_P (mode))
16652 return 1;
16653 else if (VALID_FP_MODE_P (mode))
16654 return 1;
16655 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
16656 on to use that value in smaller contexts, this can easily force a
16657 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
16658 supporting DImode, allow it. */
16659 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
a946dd00 16660 return 1;
b4e82619
RH
16661
16662 return 0;
a946dd00 16663}
fa79946e 16664
c1c5b5e3
RH
16665/* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
16666 tieable integer mode. */
16667
16668static bool
16669ix86_tieable_integer_mode_p (enum machine_mode mode)
16670{
16671 switch (mode)
16672 {
16673 case HImode:
16674 case SImode:
16675 return true;
16676
16677 case QImode:
16678 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
16679
16680 case DImode:
16681 return TARGET_64BIT;
16682
16683 default:
16684 return false;
16685 }
16686}
16687
16688/* Return true if MODE1 is accessible in a register that can hold MODE2
16689 without copying. That is, all register classes that can hold MODE2
16690 can also hold MODE1. */
16691
16692bool
16693ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
16694{
16695 if (mode1 == mode2)
16696 return true;
16697
16698 if (ix86_tieable_integer_mode_p (mode1)
16699 && ix86_tieable_integer_mode_p (mode2))
16700 return true;
16701
16702 /* MODE2 being XFmode implies fp stack or general regs, which means we
16703 can tie any smaller floating point modes to it. Note that we do not
16704 tie this with TFmode. */
16705 if (mode2 == XFmode)
16706 return mode1 == SFmode || mode1 == DFmode;
16707
16708 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
16709 that we can tie it with SFmode. */
16710 if (mode2 == DFmode)
16711 return mode1 == SFmode;
16712
16713 /* If MODE2 is only appropriate for an SSE register, then tie with
16714 any other mode acceptable to SSE registers. */
b4e82619
RH
16715 if (GET_MODE_SIZE (mode2) >= 8
16716 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
c1c5b5e3
RH
16717 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
16718
16719 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
16720 with any other mode acceptable to MMX registers. */
b4e82619
RH
16721 if (GET_MODE_SIZE (mode2) == 8
16722 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
c1c5b5e3
RH
16723 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
16724
16725 return false;
16726}
16727
fa79946e
JH
16728/* Return the cost of moving data of mode M between a
16729 register and memory. A value of 2 is the default; this cost is
16730 relative to those in `REGISTER_MOVE_COST'.
16731
16732 If moving between registers and memory is more expensive than
16733 between two registers, you should define this macro to express the
a4f31c00
AJ
16734 relative cost.
16735
fa79946e
JH
16736 Model also increased moving costs of QImode registers in non
16737 Q_REGS classes.
16738 */
16739int
b96a374d 16740ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
fa79946e
JH
16741{
16742 if (FLOAT_CLASS_P (class))
16743 {
16744 int index;
16745 switch (mode)
16746 {
16747 case SFmode:
16748 index = 0;
16749 break;
16750 case DFmode:
16751 index = 1;
16752 break;
16753 case XFmode:
fa79946e
JH
16754 index = 2;
16755 break;
16756 default:
16757 return 100;
16758 }
16759 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
16760 }
16761 if (SSE_CLASS_P (class))
16762 {
16763 int index;
16764 switch (GET_MODE_SIZE (mode))
16765 {
16766 case 4:
16767 index = 0;
16768 break;
16769 case 8:
16770 index = 1;
16771 break;
16772 case 16:
16773 index = 2;
16774 break;
16775 default:
16776 return 100;
16777 }
16778 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
16779 }
16780 if (MMX_CLASS_P (class))
16781 {
16782 int index;
16783 switch (GET_MODE_SIZE (mode))
16784 {
16785 case 4:
16786 index = 0;
16787 break;
16788 case 8:
16789 index = 1;
16790 break;
16791 default:
16792 return 100;
16793 }
16794 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
16795 }
16796 switch (GET_MODE_SIZE (mode))
16797 {
16798 case 1:
16799 if (in)
16800 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
16801 : ix86_cost->movzbl_load);
16802 else
16803 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
16804 : ix86_cost->int_store[0] + 4);
16805 break;
16806 case 2:
16807 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
16808 default:
16809 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
16810 if (mode == TFmode)
16811 mode = XFmode;
3bb7e126 16812 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
bce75972
VM
16813 * (((int) GET_MODE_SIZE (mode)
16814 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
fa79946e
JH
16815 }
16816}
0ecf09f9 16817
3c50106f
RH
16818/* Compute a (partial) cost for rtx X. Return true if the complete
16819 cost has been computed, and false if subexpressions should be
16820 scanned. In either case, *TOTAL contains the cost result. */
16821
16822static bool
b96a374d 16823ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
3c50106f
RH
16824{
16825 enum machine_mode mode = GET_MODE (x);
16826
16827 switch (code)
16828 {
16829 case CONST_INT:
16830 case CONST:
16831 case LABEL_REF:
16832 case SYMBOL_REF:
8fe75e43 16833 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
3c50106f 16834 *total = 3;
8fe75e43 16835 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
3c50106f 16836 *total = 2;
3504dad3
JH
16837 else if (flag_pic && SYMBOLIC_CONST (x)
16838 && (!TARGET_64BIT
16839 || (!GET_CODE (x) != LABEL_REF
16840 && (GET_CODE (x) != SYMBOL_REF
12969f45 16841 || !SYMBOL_REF_LOCAL_P (x)))))
3c50106f
RH
16842 *total = 1;
16843 else
16844 *total = 0;
16845 return true;
16846
16847 case CONST_DOUBLE:
16848 if (mode == VOIDmode)
16849 *total = 0;
16850 else
16851 switch (standard_80387_constant_p (x))
16852 {
16853 case 1: /* 0.0 */
16854 *total = 1;
16855 break;
881b2a96 16856 default: /* Other constants */
3c50106f
RH
16857 *total = 2;
16858 break;
881b2a96
RS
16859 case 0:
16860 case -1:
3c50106f
RH
16861 /* Start with (MEM (SYMBOL_REF)), since that's where
16862 it'll probably end up. Add a penalty for size. */
16863 *total = (COSTS_N_INSNS (1)
3504dad3 16864 + (flag_pic != 0 && !TARGET_64BIT)
3c50106f
RH
16865 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
16866 break;
16867 }
16868 return true;
16869
16870 case ZERO_EXTEND:
16871 /* The zero extensions is often completely free on x86_64, so make
16872 it as cheap as possible. */
16873 if (TARGET_64BIT && mode == DImode
16874 && GET_MODE (XEXP (x, 0)) == SImode)
16875 *total = 1;
16876 else if (TARGET_ZERO_EXTEND_WITH_AND)
a9cc9cc6 16877 *total = ix86_cost->add;
3c50106f 16878 else
a9cc9cc6 16879 *total = ix86_cost->movzx;
3c50106f
RH
16880 return false;
16881
16882 case SIGN_EXTEND:
a9cc9cc6 16883 *total = ix86_cost->movsx;
3c50106f
RH
16884 return false;
16885
16886 case ASHIFT:
16887 if (GET_CODE (XEXP (x, 1)) == CONST_INT
16888 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
16889 {
16890 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
16891 if (value == 1)
16892 {
a9cc9cc6 16893 *total = ix86_cost->add;
3c50106f
RH
16894 return false;
16895 }
16896 if ((value == 2 || value == 3)
3c50106f
RH
16897 && ix86_cost->lea <= ix86_cost->shift_const)
16898 {
a9cc9cc6 16899 *total = ix86_cost->lea;
3c50106f
RH
16900 return false;
16901 }
16902 }
5efb1046 16903 /* FALLTHRU */
3c50106f
RH
16904
16905 case ROTATE:
16906 case ASHIFTRT:
16907 case LSHIFTRT:
16908 case ROTATERT:
16909 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
16910 {
16911 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16912 {
16913 if (INTVAL (XEXP (x, 1)) > 32)
a9cc9cc6 16914 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
3c50106f 16915 else
a9cc9cc6 16916 *total = ix86_cost->shift_const * 2;
3c50106f
RH
16917 }
16918 else
16919 {
16920 if (GET_CODE (XEXP (x, 1)) == AND)
a9cc9cc6 16921 *total = ix86_cost->shift_var * 2;
3c50106f 16922 else
a9cc9cc6 16923 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
3c50106f
RH
16924 }
16925 }
16926 else
16927 {
16928 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
a9cc9cc6 16929 *total = ix86_cost->shift_const;
3c50106f 16930 else
a9cc9cc6 16931 *total = ix86_cost->shift_var;
3c50106f
RH
16932 }
16933 return false;
16934
16935 case MULT:
16936 if (FLOAT_MODE_P (mode))
3c50106f 16937 {
a9cc9cc6 16938 *total = ix86_cost->fmul;
4a5eab38 16939 return false;
3c50106f
RH
16940 }
16941 else
16942 {
4a5eab38
PB
16943 rtx op0 = XEXP (x, 0);
16944 rtx op1 = XEXP (x, 1);
16945 int nbits;
16946 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16947 {
16948 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
16949 for (nbits = 0; value != 0; value &= value - 1)
16950 nbits++;
16951 }
16952 else
16953 /* This is arbitrary. */
16954 nbits = 7;
16955
16956 /* Compute costs correctly for widening multiplication. */
16957 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
16958 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
16959 == GET_MODE_SIZE (mode))
16960 {
16961 int is_mulwiden = 0;
16962 enum machine_mode inner_mode = GET_MODE (op0);
16963
16964 if (GET_CODE (op0) == GET_CODE (op1))
16965 is_mulwiden = 1, op1 = XEXP (op1, 0);
16966 else if (GET_CODE (op1) == CONST_INT)
16967 {
16968 if (GET_CODE (op0) == SIGN_EXTEND)
16969 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
16970 == INTVAL (op1);
16971 else
16972 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
16973 }
16974
16975 if (is_mulwiden)
16976 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
16977 }
f676971a 16978
a9cc9cc6
JH
16979 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
16980 + nbits * ix86_cost->mult_bit
16981 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
4a5eab38
PB
16982
16983 return true;
3c50106f 16984 }
3c50106f
RH
16985
16986 case DIV:
16987 case UDIV:
16988 case MOD:
16989 case UMOD:
16990 if (FLOAT_MODE_P (mode))
a9cc9cc6 16991 *total = ix86_cost->fdiv;
3c50106f 16992 else
a9cc9cc6 16993 *total = ix86_cost->divide[MODE_INDEX (mode)];
3c50106f
RH
16994 return false;
16995
16996 case PLUS:
16997 if (FLOAT_MODE_P (mode))
a9cc9cc6 16998 *total = ix86_cost->fadd;
e0c00392 16999 else if (GET_MODE_CLASS (mode) == MODE_INT
3c50106f
RH
17000 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
17001 {
17002 if (GET_CODE (XEXP (x, 0)) == PLUS
17003 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
17004 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
17005 && CONSTANT_P (XEXP (x, 1)))
17006 {
17007 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
17008 if (val == 2 || val == 4 || val == 8)
17009 {
a9cc9cc6 17010 *total = ix86_cost->lea;
3c50106f
RH
17011 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17012 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
17013 outer_code);
17014 *total += rtx_cost (XEXP (x, 1), outer_code);
17015 return true;
17016 }
17017 }
17018 else if (GET_CODE (XEXP (x, 0)) == MULT
17019 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
17020 {
17021 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
17022 if (val == 2 || val == 4 || val == 8)
17023 {
a9cc9cc6 17024 *total = ix86_cost->lea;
3c50106f
RH
17025 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17026 *total += rtx_cost (XEXP (x, 1), outer_code);
17027 return true;
17028 }
17029 }
17030 else if (GET_CODE (XEXP (x, 0)) == PLUS)
17031 {
a9cc9cc6 17032 *total = ix86_cost->lea;
3c50106f
RH
17033 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17034 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17035 *total += rtx_cost (XEXP (x, 1), outer_code);
17036 return true;
17037 }
17038 }
5efb1046 17039 /* FALLTHRU */
3c50106f
RH
17040
17041 case MINUS:
17042 if (FLOAT_MODE_P (mode))
17043 {
a9cc9cc6 17044 *total = ix86_cost->fadd;
3c50106f
RH
17045 return false;
17046 }
5efb1046 17047 /* FALLTHRU */
3c50106f
RH
17048
17049 case AND:
17050 case IOR:
17051 case XOR:
17052 if (!TARGET_64BIT && mode == DImode)
17053 {
a9cc9cc6 17054 *total = (ix86_cost->add * 2
3c50106f
RH
17055 + (rtx_cost (XEXP (x, 0), outer_code)
17056 << (GET_MODE (XEXP (x, 0)) != DImode))
17057 + (rtx_cost (XEXP (x, 1), outer_code)
b96a374d 17058 << (GET_MODE (XEXP (x, 1)) != DImode)));
3c50106f
RH
17059 return true;
17060 }
5efb1046 17061 /* FALLTHRU */
3c50106f
RH
17062
17063 case NEG:
17064 if (FLOAT_MODE_P (mode))
17065 {
a9cc9cc6 17066 *total = ix86_cost->fchs;
3c50106f
RH
17067 return false;
17068 }
5efb1046 17069 /* FALLTHRU */
3c50106f
RH
17070
17071 case NOT:
17072 if (!TARGET_64BIT && mode == DImode)
a9cc9cc6 17073 *total = ix86_cost->add * 2;
3c50106f 17074 else
a9cc9cc6 17075 *total = ix86_cost->add;
3c50106f
RH
17076 return false;
17077
c271ba77
KH
17078 case COMPARE:
17079 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
17080 && XEXP (XEXP (x, 0), 1) == const1_rtx
17081 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
17082 && XEXP (x, 1) == const0_rtx)
17083 {
17084 /* This kind of construct is implemented using test[bwl].
17085 Treat it as if we had an AND. */
a9cc9cc6 17086 *total = (ix86_cost->add
c271ba77
KH
17087 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
17088 + rtx_cost (const1_rtx, outer_code));
17089 return true;
17090 }
17091 return false;
17092
3c50106f 17093 case FLOAT_EXTEND:
dcbca208
RH
17094 if (!TARGET_SSE_MATH
17095 || mode == XFmode
17096 || (mode == DFmode && !TARGET_SSE2))
3c50106f
RH
17097 *total = 0;
17098 return false;
17099
17100 case ABS:
17101 if (FLOAT_MODE_P (mode))
a9cc9cc6 17102 *total = ix86_cost->fabs;
3c50106f
RH
17103 return false;
17104
17105 case SQRT:
17106 if (FLOAT_MODE_P (mode))
a9cc9cc6 17107 *total = ix86_cost->fsqrt;
3c50106f
RH
17108 return false;
17109
74dc3e94
RH
17110 case UNSPEC:
17111 if (XINT (x, 1) == UNSPEC_TP)
17112 *total = 0;
17113 return false;
17114
3c50106f
RH
17115 default:
17116 return false;
17117 }
17118}
17119
b069de3b
SS
17120#if TARGET_MACHO
17121
17122static int current_machopic_label_num;
17123
17124/* Given a symbol name and its associated stub, write out the
17125 definition of the stub. */
17126
17127void
b96a374d 17128machopic_output_stub (FILE *file, const char *symb, const char *stub)
b069de3b
SS
17129{
17130 unsigned int length;
17131 char *binder_name, *symbol_name, lazy_ptr_name[32];
17132 int label = ++current_machopic_label_num;
17133
17134 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
17135 symb = (*targetm.strip_name_encoding) (symb);
17136
17137 length = strlen (stub);
17138 binder_name = alloca (length + 32);
17139 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
17140
17141 length = strlen (symb);
17142 symbol_name = alloca (length + 32);
17143 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
17144
17145 sprintf (lazy_ptr_name, "L%d$lz", label);
17146
17147 if (MACHOPIC_PURE)
56c779bc 17148 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
b069de3b 17149 else
56c779bc 17150 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
b069de3b
SS
17151
17152 fprintf (file, "%s:\n", stub);
17153 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17154
17155 if (MACHOPIC_PURE)
17156 {
4b1c1f6f
EC
17157 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
17158 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
17159 fprintf (file, "\tjmp\t*%%edx\n");
b069de3b
SS
17160 }
17161 else
4b1c1f6f 17162 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
b96a374d 17163
b069de3b 17164 fprintf (file, "%s:\n", binder_name);
b96a374d 17165
b069de3b
SS
17166 if (MACHOPIC_PURE)
17167 {
4b1c1f6f
EC
17168 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
17169 fprintf (file, "\tpushl\t%%eax\n");
b069de3b
SS
17170 }
17171 else
4b1c1f6f 17172 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
b069de3b 17173
4b1c1f6f 17174 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
b069de3b 17175
56c779bc 17176 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
b069de3b
SS
17177 fprintf (file, "%s:\n", lazy_ptr_name);
17178 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17179 fprintf (file, "\t.long %s\n", binder_name);
17180}
7d072037
SH
17181
17182void
17183darwin_x86_file_end (void)
17184{
17185 darwin_file_end ();
17186 ix86_file_end ();
17187}
b069de3b
SS
17188#endif /* TARGET_MACHO */
17189
162f023b
JH
17190/* Order the registers for register allocator. */
17191
17192void
b96a374d 17193x86_order_regs_for_local_alloc (void)
162f023b
JH
17194{
17195 int pos = 0;
17196 int i;
17197
17198 /* First allocate the local general purpose registers. */
17199 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17200 if (GENERAL_REGNO_P (i) && call_used_regs[i])
17201 reg_alloc_order [pos++] = i;
17202
17203 /* Global general purpose registers. */
17204 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17205 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
17206 reg_alloc_order [pos++] = i;
17207
17208 /* x87 registers come first in case we are doing FP math
17209 using them. */
17210 if (!TARGET_SSE_MATH)
17211 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17212 reg_alloc_order [pos++] = i;
fce5a9f2 17213
162f023b
JH
17214 /* SSE registers. */
17215 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
17216 reg_alloc_order [pos++] = i;
17217 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
17218 reg_alloc_order [pos++] = i;
17219
d1f87653 17220 /* x87 registers. */
162f023b
JH
17221 if (TARGET_SSE_MATH)
17222 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17223 reg_alloc_order [pos++] = i;
17224
17225 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
17226 reg_alloc_order [pos++] = i;
17227
17228 /* Initialize the rest of array as we do not allocate some registers
17229 at all. */
17230 while (pos < FIRST_PSEUDO_REGISTER)
17231 reg_alloc_order [pos++] = 0;
17232}
194734e9 17233
fe77449a
DR
17234/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
17235 struct attribute_spec.handler. */
17236static tree
b96a374d
AJ
17237ix86_handle_struct_attribute (tree *node, tree name,
17238 tree args ATTRIBUTE_UNUSED,
17239 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
fe77449a
DR
17240{
17241 tree *type = NULL;
17242 if (DECL_P (*node))
17243 {
17244 if (TREE_CODE (*node) == TYPE_DECL)
17245 type = &TREE_TYPE (*node);
17246 }
17247 else
17248 type = node;
17249
17250 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
17251 || TREE_CODE (*type) == UNION_TYPE)))
17252 {
5c498b10
DD
17253 warning (OPT_Wattributes, "%qs attribute ignored",
17254 IDENTIFIER_POINTER (name));
fe77449a
DR
17255 *no_add_attrs = true;
17256 }
17257
17258 else if ((is_attribute_p ("ms_struct", name)
17259 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
17260 || ((is_attribute_p ("gcc_struct", name)
17261 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
17262 {
5c498b10 17263 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
fe77449a
DR
17264 IDENTIFIER_POINTER (name));
17265 *no_add_attrs = true;
17266 }
17267
17268 return NULL_TREE;
17269}
17270
4977bab6 17271static bool
b96a374d 17272ix86_ms_bitfield_layout_p (tree record_type)
4977bab6 17273{
6ac49599 17274 return (TARGET_MS_BITFIELD_LAYOUT &&
021bad8e 17275 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
fe77449a 17276 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
4977bab6
ZW
17277}
17278
483ab821
MM
17279/* Returns an expression indicating where the this parameter is
17280 located on entry to the FUNCTION. */
17281
17282static rtx
b96a374d 17283x86_this_parameter (tree function)
483ab821
MM
17284{
17285 tree type = TREE_TYPE (function);
17286
3961e8fe
RH
17287 if (TARGET_64BIT)
17288 {
61f71b34 17289 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
3961e8fe
RH
17290 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
17291 }
17292
e767b5be 17293 if (ix86_function_regparm (type, function) > 0)
483ab821
MM
17294 {
17295 tree parm;
17296
17297 parm = TYPE_ARG_TYPES (type);
17298 /* Figure out whether or not the function has a variable number of
17299 arguments. */
3961e8fe 17300 for (; parm; parm = TREE_CHAIN (parm))
483ab821
MM
17301 if (TREE_VALUE (parm) == void_type_node)
17302 break;
e767b5be 17303 /* If not, the this parameter is in the first argument. */
483ab821 17304 if (parm)
e767b5be
JH
17305 {
17306 int regno = 0;
17307 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
17308 regno = 2;
02e02343 17309 return gen_rtx_REG (SImode, regno);
e767b5be 17310 }
483ab821
MM
17311 }
17312
61f71b34 17313 if (aggregate_value_p (TREE_TYPE (type), type))
483ab821
MM
17314 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
17315 else
17316 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
17317}
17318
3961e8fe
RH
17319/* Determine whether x86_output_mi_thunk can succeed. */
17320
17321static bool
b96a374d
AJ
17322x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
17323 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
17324 HOST_WIDE_INT vcall_offset, tree function)
3961e8fe
RH
17325{
17326 /* 64-bit can handle anything. */
17327 if (TARGET_64BIT)
17328 return true;
17329
17330 /* For 32-bit, everything's fine if we have one free register. */
e767b5be 17331 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
3961e8fe
RH
17332 return true;
17333
17334 /* Need a free register for vcall_offset. */
17335 if (vcall_offset)
17336 return false;
17337
17338 /* Need a free register for GOT references. */
17339 if (flag_pic && !(*targetm.binds_local_p) (function))
17340 return false;
17341
17342 /* Otherwise ok. */
17343 return true;
17344}
17345
17346/* Output the assembler code for a thunk function. THUNK_DECL is the
17347 declaration for the thunk function itself, FUNCTION is the decl for
17348 the target function. DELTA is an immediate constant offset to be
272d0bee 17349 added to THIS. If VCALL_OFFSET is nonzero, the word at
3961e8fe 17350 *(*this + vcall_offset) should be added to THIS. */
483ab821 17351
c590b625 17352static void
b96a374d
AJ
17353x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
17354 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
17355 HOST_WIDE_INT vcall_offset, tree function)
194734e9 17356{
194734e9 17357 rtx xops[3];
3961e8fe
RH
17358 rtx this = x86_this_parameter (function);
17359 rtx this_reg, tmp;
194734e9 17360
3961e8fe
RH
17361 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
17362 pull it in now and let DELTA benefit. */
17363 if (REG_P (this))
17364 this_reg = this;
17365 else if (vcall_offset)
17366 {
17367 /* Put the this parameter into %eax. */
17368 xops[0] = this;
17369 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
17370 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17371 }
17372 else
17373 this_reg = NULL_RTX;
17374
17375 /* Adjust the this parameter by a fixed constant. */
17376 if (delta)
194734e9 17377 {
483ab821 17378 xops[0] = GEN_INT (delta);
3961e8fe
RH
17379 xops[1] = this_reg ? this_reg : this;
17380 if (TARGET_64BIT)
194734e9 17381 {
3961e8fe
RH
17382 if (!x86_64_general_operand (xops[0], DImode))
17383 {
17384 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17385 xops[1] = tmp;
17386 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
17387 xops[0] = tmp;
17388 xops[1] = this;
17389 }
17390 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
194734e9
JH
17391 }
17392 else
3961e8fe 17393 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
194734e9 17394 }
3961e8fe
RH
17395
17396 /* Adjust the this parameter by a value stored in the vtable. */
17397 if (vcall_offset)
194734e9 17398 {
3961e8fe
RH
17399 if (TARGET_64BIT)
17400 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17401 else
e767b5be
JH
17402 {
17403 int tmp_regno = 2 /* ECX */;
17404 if (lookup_attribute ("fastcall",
17405 TYPE_ATTRIBUTES (TREE_TYPE (function))))
17406 tmp_regno = 0 /* EAX */;
17407 tmp = gen_rtx_REG (SImode, tmp_regno);
17408 }
483ab821 17409
3961e8fe
RH
17410 xops[0] = gen_rtx_MEM (Pmode, this_reg);
17411 xops[1] = tmp;
17412 if (TARGET_64BIT)
17413 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17414 else
17415 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
483ab821 17416
3961e8fe
RH
17417 /* Adjust the this parameter. */
17418 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
17419 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
17420 {
17421 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
17422 xops[0] = GEN_INT (vcall_offset);
17423 xops[1] = tmp2;
17424 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17425 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
483ab821 17426 }
3961e8fe
RH
17427 xops[1] = this_reg;
17428 if (TARGET_64BIT)
17429 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17430 else
17431 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17432 }
194734e9 17433
3961e8fe
RH
17434 /* If necessary, drop THIS back to its stack slot. */
17435 if (this_reg && this_reg != this)
17436 {
17437 xops[0] = this_reg;
17438 xops[1] = this;
17439 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17440 }
194734e9 17441
89ce1c8f 17442 xops[0] = XEXP (DECL_RTL (function), 0);
3961e8fe
RH
17443 if (TARGET_64BIT)
17444 {
17445 if (!flag_pic || (*targetm.binds_local_p) (function))
17446 output_asm_insn ("jmp\t%P0", xops);
17447 else
fcbe3b89 17448 {
89ce1c8f 17449 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
fcbe3b89
RH
17450 tmp = gen_rtx_CONST (Pmode, tmp);
17451 tmp = gen_rtx_MEM (QImode, tmp);
17452 xops[0] = tmp;
17453 output_asm_insn ("jmp\t%A0", xops);
17454 }
3961e8fe
RH
17455 }
17456 else
17457 {
17458 if (!flag_pic || (*targetm.binds_local_p) (function))
17459 output_asm_insn ("jmp\t%P0", xops);
194734e9 17460 else
21ff35fb 17461#if TARGET_MACHO
095fa594
SH
17462 if (TARGET_MACHO)
17463 {
11abc112 17464 rtx sym_ref = XEXP (DECL_RTL (function), 0);
f676971a
EC
17465 tmp = (gen_rtx_SYMBOL_REF
17466 (Pmode,
11abc112 17467 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
095fa594
SH
17468 tmp = gen_rtx_MEM (QImode, tmp);
17469 xops[0] = tmp;
17470 output_asm_insn ("jmp\t%0", xops);
17471 }
17472 else
17473#endif /* TARGET_MACHO */
194734e9 17474 {
3961e8fe 17475 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
7d072037 17476 output_set_got (tmp, NULL_RTX);
3961e8fe
RH
17477
17478 xops[1] = tmp;
17479 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
17480 output_asm_insn ("jmp\t{*}%1", xops);
194734e9
JH
17481 }
17482 }
17483}
e2500fed 17484
1bc7c5b6 17485static void
b96a374d 17486x86_file_start (void)
1bc7c5b6
ZW
17487{
17488 default_file_start ();
192d0f89
GK
17489#if TARGET_MACHO
17490 darwin_file_start ();
17491#endif
1bc7c5b6
ZW
17492 if (X86_FILE_START_VERSION_DIRECTIVE)
17493 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
17494 if (X86_FILE_START_FLTUSED)
17495 fputs ("\t.global\t__fltused\n", asm_out_file);
17496 if (ix86_asm_dialect == ASM_INTEL)
17497 fputs ("\t.intel_syntax\n", asm_out_file);
17498}
17499
e932b21b 17500int
b96a374d 17501x86_field_alignment (tree field, int computed)
e932b21b
JH
17502{
17503 enum machine_mode mode;
ad9335eb
JJ
17504 tree type = TREE_TYPE (field);
17505
17506 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
e932b21b 17507 return computed;
ad9335eb
JJ
17508 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
17509 ? get_inner_array_type (type) : type);
39e3a681
JJ
17510 if (mode == DFmode || mode == DCmode
17511 || GET_MODE_CLASS (mode) == MODE_INT
17512 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
e932b21b
JH
17513 return MIN (32, computed);
17514 return computed;
17515}
17516
a5fa1ecd
JH
17517/* Output assembler code to FILE to increment profiler label # LABELNO
17518 for profiling a function entry. */
17519void
b96a374d 17520x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
a5fa1ecd
JH
17521{
17522 if (TARGET_64BIT)
17523 if (flag_pic)
17524 {
17525#ifndef NO_PROFILE_COUNTERS
17526 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
17527#endif
17528 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
17529 }
17530 else
17531 {
17532#ifndef NO_PROFILE_COUNTERS
17533 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
17534#endif
17535 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17536 }
17537 else if (flag_pic)
17538 {
17539#ifndef NO_PROFILE_COUNTERS
17540 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
17541 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
17542#endif
17543 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
17544 }
17545 else
17546 {
17547#ifndef NO_PROFILE_COUNTERS
ff6e2d3e 17548 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
a5fa1ecd
JH
17549 PROFILE_COUNT_REGISTER);
17550#endif
17551 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17552 }
17553}
17554
d2c49530
JH
17555/* We don't have exact information about the insn sizes, but we may assume
17556 quite safely that we are informed about all 1 byte insns and memory
c51e6d85 17557 address sizes. This is enough to eliminate unnecessary padding in
d2c49530
JH
17558 99% of cases. */
17559
17560static int
b96a374d 17561min_insn_size (rtx insn)
d2c49530
JH
17562{
17563 int l = 0;
17564
17565 if (!INSN_P (insn) || !active_insn_p (insn))
17566 return 0;
17567
17568 /* Discard alignments we've emit and jump instructions. */
17569 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
17570 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
17571 return 0;
17572 if (GET_CODE (insn) == JUMP_INSN
17573 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
17574 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
17575 return 0;
17576
17577 /* Important case - calls are always 5 bytes.
17578 It is common to have many calls in the row. */
17579 if (GET_CODE (insn) == CALL_INSN
17580 && symbolic_reference_mentioned_p (PATTERN (insn))
17581 && !SIBLING_CALL_P (insn))
17582 return 5;
17583 if (get_attr_length (insn) <= 1)
17584 return 1;
17585
17586 /* For normal instructions we may rely on the sizes of addresses
17587 and the presence of symbol to require 4 bytes of encoding.
17588 This is not the case for jumps where references are PC relative. */
17589 if (GET_CODE (insn) != JUMP_INSN)
17590 {
17591 l = get_attr_length_address (insn);
17592 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
17593 l = 4;
17594 }
17595 if (l)
17596 return 1+l;
17597 else
17598 return 2;
17599}
17600
c51e6d85 17601/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
d2c49530
JH
17602 window. */
17603
17604static void
be04394b 17605ix86_avoid_jump_misspredicts (void)
d2c49530
JH
17606{
17607 rtx insn, start = get_insns ();
17608 int nbytes = 0, njumps = 0;
17609 int isjump = 0;
17610
17611 /* Look for all minimal intervals of instructions containing 4 jumps.
17612 The intervals are bounded by START and INSN. NBYTES is the total
17613 size of instructions in the interval including INSN and not including
17614 START. When the NBYTES is smaller than 16 bytes, it is possible
17615 that the end of START and INSN ends up in the same 16byte page.
17616
17617 The smallest offset in the page INSN can start is the case where START
17618 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
17619 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
17620 */
17621 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
17622 {
17623
17624 nbytes += min_insn_size (insn);
c263766c
RH
17625 if (dump_file)
17626 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
d2c49530
JH
17627 INSN_UID (insn), min_insn_size (insn));
17628 if ((GET_CODE (insn) == JUMP_INSN
17629 && GET_CODE (PATTERN (insn)) != ADDR_VEC
17630 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
17631 || GET_CODE (insn) == CALL_INSN)
17632 njumps++;
17633 else
17634 continue;
17635
17636 while (njumps > 3)
17637 {
17638 start = NEXT_INSN (start);
17639 if ((GET_CODE (start) == JUMP_INSN
17640 && GET_CODE (PATTERN (start)) != ADDR_VEC
17641 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
17642 || GET_CODE (start) == CALL_INSN)
17643 njumps--, isjump = 1;
17644 else
17645 isjump = 0;
17646 nbytes -= min_insn_size (start);
17647 }
d0396b79 17648 gcc_assert (njumps >= 0);
c263766c
RH
17649 if (dump_file)
17650 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
d2c49530
JH
17651 INSN_UID (start), INSN_UID (insn), nbytes);
17652
17653 if (njumps == 3 && isjump && nbytes < 16)
17654 {
17655 int padsize = 15 - nbytes + min_insn_size (insn);
17656
c263766c
RH
17657 if (dump_file)
17658 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
17659 INSN_UID (insn), padsize);
d2c49530
JH
17660 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
17661 }
17662 }
17663}
17664
be04394b 17665/* AMD Athlon works faster
d1f87653 17666 when RET is not destination of conditional jump or directly preceded
2a500b9e
JH
17667 by other jump instruction. We avoid the penalty by inserting NOP just
17668 before the RET instructions in such cases. */
18dbd950 17669static void
be04394b 17670ix86_pad_returns (void)
2a500b9e
JH
17671{
17672 edge e;
628f6a4e 17673 edge_iterator ei;
2a500b9e 17674
628f6a4e
BE
17675 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
17676 {
17677 basic_block bb = e->src;
17678 rtx ret = BB_END (bb);
17679 rtx prev;
17680 bool replace = false;
17681
17682 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
17683 || !maybe_hot_bb_p (bb))
17684 continue;
17685 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
17686 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
17687 break;
17688 if (prev && GET_CODE (prev) == CODE_LABEL)
17689 {
17690 edge e;
17691 edge_iterator ei;
17692
17693 FOR_EACH_EDGE (e, ei, bb->preds)
17694 if (EDGE_FREQUENCY (e) && e->src->index >= 0
17695 && !(e->flags & EDGE_FALLTHRU))
17696 replace = true;
17697 }
17698 if (!replace)
17699 {
17700 prev = prev_active_insn (ret);
17701 if (prev
17702 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
17703 || GET_CODE (prev) == CALL_INSN))
253c7a00 17704 replace = true;
628f6a4e
BE
17705 /* Empty functions get branch mispredict even when the jump destination
17706 is not visible to us. */
17707 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
17708 replace = true;
17709 }
17710 if (replace)
17711 {
17712 emit_insn_before (gen_return_internal_long (), ret);
17713 delete_insn (ret);
17714 }
17715 }
be04394b
JH
17716}
17717
17718/* Implement machine specific optimizations. We implement padding of returns
17719 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
17720static void
17721ix86_reorg (void)
17722{
d326eaf0 17723 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
be04394b
JH
17724 ix86_pad_returns ();
17725 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
17726 ix86_avoid_jump_misspredicts ();
2a500b9e
JH
17727}
17728
4977bab6
ZW
17729/* Return nonzero when QImode register that must be represented via REX prefix
17730 is used. */
17731bool
b96a374d 17732x86_extended_QIreg_mentioned_p (rtx insn)
4977bab6
ZW
17733{
17734 int i;
17735 extract_insn_cached (insn);
17736 for (i = 0; i < recog_data.n_operands; i++)
17737 if (REG_P (recog_data.operand[i])
17738 && REGNO (recog_data.operand[i]) >= 4)
17739 return true;
17740 return false;
17741}
17742
17743/* Return nonzero when P points to register encoded via REX prefix.
17744 Called via for_each_rtx. */
17745static int
b96a374d 17746extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
4977bab6
ZW
17747{
17748 unsigned int regno;
17749 if (!REG_P (*p))
17750 return 0;
17751 regno = REGNO (*p);
17752 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
17753}
17754
17755/* Return true when INSN mentions register that must be encoded using REX
17756 prefix. */
17757bool
b96a374d 17758x86_extended_reg_mentioned_p (rtx insn)
4977bab6
ZW
17759{
17760 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
17761}
17762
1d6ba901 17763/* Generate an unsigned DImode/SImode to FP conversion. This is the same code
8d705469
JH
17764 optabs would emit if we didn't have TFmode patterns. */
17765
17766void
b96a374d 17767x86_emit_floatuns (rtx operands[2])
8d705469
JH
17768{
17769 rtx neglab, donelab, i0, i1, f0, in, out;
1d6ba901
ZD
17770 enum machine_mode mode, inmode;
17771
17772 inmode = GET_MODE (operands[1]);
d0396b79 17773 gcc_assert (inmode == SImode || inmode == DImode);
8d705469
JH
17774
17775 out = operands[0];
1d6ba901 17776 in = force_reg (inmode, operands[1]);
8d705469
JH
17777 mode = GET_MODE (out);
17778 neglab = gen_label_rtx ();
17779 donelab = gen_label_rtx ();
17780 i1 = gen_reg_rtx (Pmode);
17781 f0 = gen_reg_rtx (mode);
17782
17783 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
17784
17785 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
17786 emit_jump_insn (gen_jump (donelab));
17787 emit_barrier ();
17788
17789 emit_label (neglab);
17790
17791 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
17792 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
17793 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
17794 expand_float (f0, i0, 0);
17795 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
17796
17797 emit_label (donelab);
17798}
eb701deb
RH
17799\f
17800/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
17801 with all elements equal to VAR. Return true if successful. */
17802
17803static bool
17804ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
17805 rtx target, rtx val)
17806{
17807 enum machine_mode smode, wsmode, wvmode;
17808 rtx x;
17809
17810 switch (mode)
17811 {
17812 case V2SImode:
17813 case V2SFmode:
17814 if (!mmx_ok && !TARGET_SSE)
17815 return false;
17816 /* FALLTHRU */
17817
17818 case V2DFmode:
17819 case V2DImode:
17820 case V4SFmode:
17821 case V4SImode:
17822 val = force_reg (GET_MODE_INNER (mode), val);
17823 x = gen_rtx_VEC_DUPLICATE (mode, val);
17824 emit_insn (gen_rtx_SET (VOIDmode, target, x));
17825 return true;
17826
17827 case V4HImode:
17828 if (!mmx_ok)
17829 return false;
f817d5d4
RH
17830 if (TARGET_SSE || TARGET_3DNOW_A)
17831 {
17832 val = gen_lowpart (SImode, val);
17833 x = gen_rtx_TRUNCATE (HImode, val);
17834 x = gen_rtx_VEC_DUPLICATE (mode, x);
17835 emit_insn (gen_rtx_SET (VOIDmode, target, x));
17836 return true;
17837 }
17838 else
17839 {
17840 smode = HImode;
17841 wsmode = SImode;
17842 wvmode = V2SImode;
17843 goto widen;
17844 }
eb701deb
RH
17845
17846 case V8QImode:
17847 if (!mmx_ok)
17848 return false;
17849 smode = QImode;
17850 wsmode = HImode;
17851 wvmode = V4HImode;
17852 goto widen;
17853 case V8HImode:
17854 smode = HImode;
17855 wsmode = SImode;
17856 wvmode = V4SImode;
17857 goto widen;
17858 case V16QImode:
17859 smode = QImode;
17860 wsmode = HImode;
17861 wvmode = V8HImode;
17862 goto widen;
17863 widen:
17864 /* Replicate the value once into the next wider mode and recurse. */
17865 val = convert_modes (wsmode, smode, val, true);
17866 x = expand_simple_binop (wsmode, ASHIFT, val,
17867 GEN_INT (GET_MODE_BITSIZE (smode)),
17868 NULL_RTX, 1, OPTAB_LIB_WIDEN);
17869 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
17870
17871 x = gen_reg_rtx (wvmode);
17872 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
17873 gcc_unreachable ();
17874 emit_move_insn (target, gen_lowpart (mode, x));
17875 return true;
17876
17877 default:
17878 return false;
17879 }
17880}
17881
17882/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
17883 whose low element is VAR, and other elements are zero. Return true
17884 if successful. */
17885
17886static bool
17887ix86_expand_vector_init_low_nonzero (bool mmx_ok, enum machine_mode mode,
17888 rtx target, rtx var)
17889{
17890 enum machine_mode vsimode;
17891 rtx x;
17892
17893 switch (mode)
17894 {
17895 case V2SFmode:
17896 case V2SImode:
17897 if (!mmx_ok && !TARGET_SSE)
17898 return false;
17899 /* FALLTHRU */
17900
17901 case V2DFmode:
17902 case V2DImode:
17903 var = force_reg (GET_MODE_INNER (mode), var);
17904 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
17905 emit_insn (gen_rtx_SET (VOIDmode, target, x));
17906 return true;
17907
17908 case V4SFmode:
17909 case V4SImode:
17910 var = force_reg (GET_MODE_INNER (mode), var);
17911 x = gen_rtx_VEC_DUPLICATE (mode, var);
17912 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
17913 emit_insn (gen_rtx_SET (VOIDmode, target, x));
17914 return true;
17915
17916 case V8HImode:
17917 case V16QImode:
17918 vsimode = V4SImode;
17919 goto widen;
17920 case V4HImode:
17921 case V8QImode:
17922 if (!mmx_ok)
17923 return false;
17924 vsimode = V2SImode;
17925 goto widen;
17926 widen:
17927 /* Zero extend the variable element to SImode and recurse. */
17928 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
17929
17930 x = gen_reg_rtx (vsimode);
17931 if (!ix86_expand_vector_init_low_nonzero (mmx_ok, vsimode, x, var))
17932 gcc_unreachable ();
17933
17934 emit_move_insn (target, gen_lowpart (mode, x));
17935 return true;
17936
17937 default:
17938 return false;
17939 }
17940}
17941
17942/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
17943 consisting of the values in VALS. It is known that all elements
17944 except ONE_VAR are constants. Return true if successful. */
17945
17946static bool
17947ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
17948 rtx target, rtx vals, int one_var)
17949{
17950 rtx var = XVECEXP (vals, 0, one_var);
17951 enum machine_mode wmode;
17952 rtx const_vec, x;
17953
9fc5fa7b
GK
17954 const_vec = copy_rtx (vals);
17955 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
277fc67e 17956 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
eb701deb
RH
17957
17958 switch (mode)
17959 {
17960 case V2DFmode:
17961 case V2DImode:
17962 case V2SFmode:
17963 case V2SImode:
17964 /* For the two element vectors, it's just as easy to use
17965 the general case. */
17966 return false;
17967
17968 case V4SFmode:
17969 case V4SImode:
17970 case V8HImode:
17971 case V4HImode:
17972 break;
17973
17974 case V16QImode:
17975 wmode = V8HImode;
17976 goto widen;
17977 case V8QImode:
17978 wmode = V4HImode;
17979 goto widen;
17980 widen:
17981 /* There's no way to set one QImode entry easily. Combine
17982 the variable value with its adjacent constant value, and
17983 promote to an HImode set. */
17984 x = XVECEXP (vals, 0, one_var ^ 1);
17985 if (one_var & 1)
17986 {
17987 var = convert_modes (HImode, QImode, var, true);
17988 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
17989 NULL_RTX, 1, OPTAB_LIB_WIDEN);
17990 x = GEN_INT (INTVAL (x) & 0xff);
17991 }
17992 else
17993 {
17994 var = convert_modes (HImode, QImode, var, true);
17995 x = gen_int_mode (INTVAL (x) << 8, HImode);
17996 }
17997 if (x != const0_rtx)
17998 var = expand_simple_binop (HImode, IOR, var, x, var,
17999 1, OPTAB_LIB_WIDEN);
18000
18001 x = gen_reg_rtx (wmode);
18002 emit_move_insn (x, gen_lowpart (wmode, const_vec));
ceda96fc 18003 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
eb701deb
RH
18004
18005 emit_move_insn (target, gen_lowpart (mode, x));
18006 return true;
18007
18008 default:
18009 return false;
18010 }
18011
18012 emit_move_insn (target, const_vec);
18013 ix86_expand_vector_set (mmx_ok, target, var, one_var);
18014 return true;
18015}
18016
18017/* A subroutine of ix86_expand_vector_init. Handle the most general case:
18018 all values variable, and none identical. */
18019
18020static void
18021ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
18022 rtx target, rtx vals)
18023{
18024 enum machine_mode half_mode = GET_MODE_INNER (mode);
18025 rtx op0 = NULL, op1 = NULL;
18026 bool use_vec_concat = false;
18027
18028 switch (mode)
18029 {
18030 case V2SFmode:
18031 case V2SImode:
18032 if (!mmx_ok && !TARGET_SSE)
18033 break;
18034 /* FALLTHRU */
18035
18036 case V2DFmode:
18037 case V2DImode:
18038 /* For the two element vectors, we always implement VEC_CONCAT. */
18039 op0 = XVECEXP (vals, 0, 0);
18040 op1 = XVECEXP (vals, 0, 1);
18041 use_vec_concat = true;
18042 break;
18043
18044 case V4SFmode:
18045 half_mode = V2SFmode;
18046 goto half;
18047 case V4SImode:
18048 half_mode = V2SImode;
18049 goto half;
18050 half:
18051 {
18052 rtvec v;
18053
18054 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
18055 Recurse to load the two halves. */
18056
18057 op0 = gen_reg_rtx (half_mode);
18058 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
18059 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
18060
18061 op1 = gen_reg_rtx (half_mode);
18062 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
18063 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
18064
18065 use_vec_concat = true;
18066 }
18067 break;
18068
18069 case V8HImode:
18070 case V16QImode:
18071 case V4HImode:
18072 case V8QImode:
18073 break;
18074
18075 default:
18076 gcc_unreachable ();
18077 }
18078
18079 if (use_vec_concat)
18080 {
18081 if (!register_operand (op0, half_mode))
18082 op0 = force_reg (half_mode, op0);
18083 if (!register_operand (op1, half_mode))
18084 op1 = force_reg (half_mode, op1);
18085
18086 emit_insn (gen_rtx_SET (VOIDmode, target,
18087 gen_rtx_VEC_CONCAT (mode, op0, op1)));
18088 }
18089 else
18090 {
18091 int i, j, n_elts, n_words, n_elt_per_word;
18092 enum machine_mode inner_mode;
18093 rtx words[4], shift;
18094
18095 inner_mode = GET_MODE_INNER (mode);
18096 n_elts = GET_MODE_NUNITS (mode);
18097 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
18098 n_elt_per_word = n_elts / n_words;
18099 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
18100
18101 for (i = 0; i < n_words; ++i)
18102 {
18103 rtx word = NULL_RTX;
18104
18105 for (j = 0; j < n_elt_per_word; ++j)
18106 {
18107 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
18108 elt = convert_modes (word_mode, inner_mode, elt, true);
18109
18110 if (j == 0)
18111 word = elt;
18112 else
18113 {
18114 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
18115 word, 1, OPTAB_LIB_WIDEN);
18116 word = expand_simple_binop (word_mode, IOR, word, elt,
18117 word, 1, OPTAB_LIB_WIDEN);
18118 }
18119 }
18120
18121 words[i] = word;
18122 }
18123
18124 if (n_words == 1)
18125 emit_move_insn (target, gen_lowpart (mode, words[0]));
18126 else if (n_words == 2)
18127 {
18128 rtx tmp = gen_reg_rtx (mode);
18129 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
18130 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
18131 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
18132 emit_move_insn (target, tmp);
18133 }
18134 else if (n_words == 4)
18135 {
18136 rtx tmp = gen_reg_rtx (V4SImode);
18137 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
ceda96fc 18138 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
eb701deb
RH
18139 emit_move_insn (target, gen_lowpart (mode, tmp));
18140 }
18141 else
18142 gcc_unreachable ();
18143 }
18144}
18145
18146/* Initialize vector TARGET via VALS. Suppress the use of MMX
18147 instructions unless MMX_OK is true. */
8d705469 18148
997404de 18149void
eb701deb 18150ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
997404de
JH
18151{
18152 enum machine_mode mode = GET_MODE (target);
eb701deb
RH
18153 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18154 int n_elts = GET_MODE_NUNITS (mode);
18155 int n_var = 0, one_var = -1;
18156 bool all_same = true, all_const_zero = true;
997404de 18157 int i;
eb701deb 18158 rtx x;
f676971a 18159
eb701deb
RH
18160 for (i = 0; i < n_elts; ++i)
18161 {
18162 x = XVECEXP (vals, 0, i);
18163 if (!CONSTANT_P (x))
18164 n_var++, one_var = i;
18165 else if (x != CONST0_RTX (inner_mode))
18166 all_const_zero = false;
18167 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
18168 all_same = false;
18169 }
997404de 18170
eb701deb
RH
18171 /* Constants are best loaded from the constant pool. */
18172 if (n_var == 0)
997404de
JH
18173 {
18174 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
18175 return;
18176 }
18177
eb701deb
RH
18178 /* If all values are identical, broadcast the value. */
18179 if (all_same
18180 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
18181 XVECEXP (vals, 0, 0)))
18182 return;
18183
18184 /* Values where only one field is non-constant are best loaded from
18185 the pool and overwritten via move later. */
18186 if (n_var == 1)
997404de 18187 {
eb701deb
RH
18188 if (all_const_zero && one_var == 0
18189 && ix86_expand_vector_init_low_nonzero (mmx_ok, mode, target,
18190 XVECEXP (vals, 0, 0)))
18191 return;
18192
18193 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
18194 return;
18195 }
18196
18197 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
18198}
18199
18200void
18201ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
18202{
18203 enum machine_mode mode = GET_MODE (target);
18204 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18205 bool use_vec_merge = false;
18206 rtx tmp;
18207
18208 switch (mode)
18209 {
18210 case V2SFmode:
18211 case V2SImode:
0f2698d0
RH
18212 if (mmx_ok)
18213 {
18214 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
18215 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
18216 if (elt == 0)
18217 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
18218 else
18219 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
18220 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18221 return;
18222 }
18223 break;
eb701deb
RH
18224
18225 case V2DFmode:
18226 case V2DImode:
18227 {
18228 rtx op0, op1;
18229
18230 /* For the two element vectors, we implement a VEC_CONCAT with
18231 the extraction of the other element. */
18232
18233 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
18234 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
18235
18236 if (elt == 0)
18237 op0 = val, op1 = tmp;
18238 else
18239 op0 = tmp, op1 = val;
18240
18241 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
18242 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18243 }
18244 return;
1c47af84 18245
eb701deb
RH
18246 case V4SFmode:
18247 switch (elt)
997404de 18248 {
eb701deb
RH
18249 case 0:
18250 use_vec_merge = true;
1c47af84
RH
18251 break;
18252
eb701deb 18253 case 1:
125886c7 18254 /* tmp = target = A B C D */
eb701deb 18255 tmp = copy_to_reg (target);
125886c7 18256 /* target = A A B B */
eb701deb 18257 emit_insn (gen_sse_unpcklps (target, target, target));
125886c7 18258 /* target = X A B B */
eb701deb 18259 ix86_expand_vector_set (false, target, val, 0);
125886c7 18260 /* target = A X C D */
eb701deb
RH
18261 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18262 GEN_INT (1), GEN_INT (0),
b100079f 18263 GEN_INT (2+4), GEN_INT (3+4)));
eb701deb
RH
18264 return;
18265
18266 case 2:
125886c7 18267 /* tmp = target = A B C D */
eb701deb 18268 tmp = copy_to_reg (target);
125886c7
JJ
18269 /* tmp = X B C D */
18270 ix86_expand_vector_set (false, tmp, val, 0);
18271 /* target = A B X D */
eb701deb
RH
18272 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18273 GEN_INT (0), GEN_INT (1),
b100079f 18274 GEN_INT (0+4), GEN_INT (3+4)));
eb701deb
RH
18275 return;
18276
18277 case 3:
125886c7 18278 /* tmp = target = A B C D */
eb701deb 18279 tmp = copy_to_reg (target);
125886c7
JJ
18280 /* tmp = X B C D */
18281 ix86_expand_vector_set (false, tmp, val, 0);
18282 /* target = A B X D */
eb701deb
RH
18283 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18284 GEN_INT (0), GEN_INT (1),
b100079f 18285 GEN_INT (2+4), GEN_INT (0+4)));
eb701deb 18286 return;
1c47af84
RH
18287
18288 default:
eb701deb
RH
18289 gcc_unreachable ();
18290 }
18291 break;
18292
18293 case V4SImode:
18294 /* Element 0 handled by vec_merge below. */
18295 if (elt == 0)
18296 {
18297 use_vec_merge = true;
1c47af84 18298 break;
997404de 18299 }
eb701deb
RH
18300
18301 if (TARGET_SSE2)
18302 {
18303 /* With SSE2, use integer shuffles to swap element 0 and ELT,
18304 store into element 0, then shuffle them back. */
18305
18306 rtx order[4];
18307
18308 order[0] = GEN_INT (elt);
18309 order[1] = const1_rtx;
18310 order[2] = const2_rtx;
18311 order[3] = GEN_INT (3);
18312 order[elt] = const0_rtx;
18313
18314 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
18315 order[1], order[2], order[3]));
18316
18317 ix86_expand_vector_set (false, target, val, 0);
18318
18319 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
18320 order[1], order[2], order[3]));
18321 }
18322 else
18323 {
18324 /* For SSE1, we have to reuse the V4SF code. */
18325 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
18326 gen_lowpart (SFmode, val), elt);
18327 }
997404de 18328 return;
eb701deb
RH
18329
18330 case V8HImode:
18331 use_vec_merge = TARGET_SSE2;
18332 break;
18333 case V4HImode:
18334 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18335 break;
18336
18337 case V16QImode:
18338 case V8QImode:
18339 default:
18340 break;
997404de
JH
18341 }
18342
eb701deb 18343 if (use_vec_merge)
997404de 18344 {
eb701deb
RH
18345 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
18346 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
18347 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18348 }
18349 else
18350 {
18351 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
18352
18353 emit_move_insn (mem, target);
18354
18355 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
18356 emit_move_insn (tmp, val);
18357
18358 emit_move_insn (target, mem);
18359 }
18360}
18361
18362void
18363ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
18364{
18365 enum machine_mode mode = GET_MODE (vec);
18366 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18367 bool use_vec_extr = false;
18368 rtx tmp;
18369
18370 switch (mode)
18371 {
18372 case V2SImode:
18373 case V2SFmode:
18374 if (!mmx_ok)
18375 break;
18376 /* FALLTHRU */
18377
18378 case V2DFmode:
18379 case V2DImode:
18380 use_vec_extr = true;
18381 break;
18382
18383 case V4SFmode:
18384 switch (elt)
997404de 18385 {
eb701deb
RH
18386 case 0:
18387 tmp = vec;
18388 break;
997404de 18389
eb701deb
RH
18390 case 1:
18391 case 3:
18392 tmp = gen_reg_rtx (mode);
18393 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
18394 GEN_INT (elt), GEN_INT (elt),
b100079f 18395 GEN_INT (elt+4), GEN_INT (elt+4)));
eb701deb
RH
18396 break;
18397
18398 case 2:
18399 tmp = gen_reg_rtx (mode);
18400 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
18401 break;
18402
18403 default:
18404 gcc_unreachable ();
997404de 18405 }
eb701deb
RH
18406 vec = tmp;
18407 use_vec_extr = true;
ed9b5396 18408 elt = 0;
eb701deb
RH
18409 break;
18410
18411 case V4SImode:
18412 if (TARGET_SSE2)
997404de 18413 {
eb701deb
RH
18414 switch (elt)
18415 {
18416 case 0:
18417 tmp = vec;
18418 break;
18419
18420 case 1:
18421 case 3:
18422 tmp = gen_reg_rtx (mode);
18423 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
18424 GEN_INT (elt), GEN_INT (elt),
18425 GEN_INT (elt), GEN_INT (elt)));
18426 break;
18427
18428 case 2:
18429 tmp = gen_reg_rtx (mode);
18430 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
18431 break;
18432
18433 default:
18434 gcc_unreachable ();
18435 }
18436 vec = tmp;
18437 use_vec_extr = true;
ed9b5396 18438 elt = 0;
997404de 18439 }
eb701deb
RH
18440 else
18441 {
18442 /* For SSE1, we have to reuse the V4SF code. */
18443 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
18444 gen_lowpart (V4SFmode, vec), elt);
18445 return;
18446 }
18447 break;
18448
18449 case V8HImode:
18450 use_vec_extr = TARGET_SSE2;
18451 break;
18452 case V4HImode:
18453 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18454 break;
18455
18456 case V16QImode:
18457 case V8QImode:
18458 /* ??? Could extract the appropriate HImode element and shift. */
18459 default:
18460 break;
997404de 18461 }
997404de 18462
eb701deb
RH
18463 if (use_vec_extr)
18464 {
18465 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
18466 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
18467
18468 /* Let the rtl optimizers know about the zero extension performed. */
18469 if (inner_mode == HImode)
18470 {
18471 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
18472 target = gen_lowpart (SImode, target);
18473 }
18474
18475 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18476 }
18477 else
18478 {
18479 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
18480
18481 emit_move_insn (mem, vec);
18482
18483 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
18484 emit_move_insn (target, tmp);
18485 }
18486}
2ab1754e 18487
536fa7b7 18488/* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
2ab1754e
RH
18489 pattern to reduce; DEST is the destination; IN is the input vector. */
18490
18491void
18492ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
18493{
18494 rtx tmp1, tmp2, tmp3;
18495
18496 tmp1 = gen_reg_rtx (V4SFmode);
18497 tmp2 = gen_reg_rtx (V4SFmode);
18498 tmp3 = gen_reg_rtx (V4SFmode);
18499
18500 emit_insn (gen_sse_movhlps (tmp1, in, in));
18501 emit_insn (fn (tmp2, tmp1, in));
18502
18503 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
18504 GEN_INT (1), GEN_INT (1),
18505 GEN_INT (1+4), GEN_INT (1+4)));
18506 emit_insn (fn (dest, tmp2, tmp3));
18507}
eb701deb 18508\f
a81083b2
BE
18509/* Target hook for scalar_mode_supported_p. */
18510static bool
18511ix86_scalar_mode_supported_p (enum machine_mode mode)
18512{
18513 if (DECIMAL_FLOAT_MODE_P (mode))
18514 return true;
18515 else
18516 return default_scalar_mode_supported_p (mode);
18517}
18518
f676971a
EC
18519/* Implements target hook vector_mode_supported_p. */
18520static bool
18521ix86_vector_mode_supported_p (enum machine_mode mode)
18522{
dcbca208 18523 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
f676971a 18524 return true;
dcbca208 18525 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
f676971a 18526 return true;
dcbca208 18527 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
f676971a 18528 return true;
dcbca208
RH
18529 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
18530 return true;
18531 return false;
f676971a
EC
18532}
18533
67dfe110
KH
18534/* Worker function for TARGET_MD_ASM_CLOBBERS.
18535
18536 We do this in the new i386 backend to maintain source compatibility
18537 with the old cc0-based compiler. */
18538
18539static tree
61158923
HPN
18540ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
18541 tree inputs ATTRIBUTE_UNUSED,
18542 tree clobbers)
67dfe110 18543{
f676971a
EC
18544 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
18545 clobbers);
18546 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
18547 clobbers);
18548 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
18549 clobbers);
67dfe110
KH
18550 return clobbers;
18551}
18552
7dcbf659
JH
18553/* Return true if this goes in small data/bss. */
18554
18555static bool
18556ix86_in_large_data_p (tree exp)
18557{
18558 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
18559 return false;
18560
18561 /* Functions are never large data. */
18562 if (TREE_CODE (exp) == FUNCTION_DECL)
18563 return false;
18564
18565 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
18566 {
18567 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
18568 if (strcmp (section, ".ldata") == 0
18569 || strcmp (section, ".lbss") == 0)
18570 return true;
18571 return false;
18572 }
18573 else
18574 {
18575 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
18576
18577 /* If this is an incomplete type with size 0, then we can't put it
18578 in data because it might be too big when completed. */
18579 if (!size || size > ix86_section_threshold)
18580 return true;
18581 }
18582
18583 return false;
18584}
18585static void
18586ix86_encode_section_info (tree decl, rtx rtl, int first)
18587{
18588 default_encode_section_info (decl, rtl, first);
18589
18590 if (TREE_CODE (decl) == VAR_DECL
18591 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
18592 && ix86_in_large_data_p (decl))
18593 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
18594}
18595
3c5cb3e4
KH
18596/* Worker function for REVERSE_CONDITION. */
18597
18598enum rtx_code
18599ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
18600{
18601 return (mode != CCFPmode && mode != CCFPUmode
18602 ? reverse_condition (code)
18603 : reverse_condition_maybe_unordered (code));
18604}
18605
5ea9cb6e
RS
18606/* Output code to perform an x87 FP register move, from OPERANDS[1]
18607 to OPERANDS[0]. */
18608
18609const char *
18610output_387_reg_move (rtx insn, rtx *operands)
18611{
18612 if (REG_P (operands[1])
18613 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
18614 {
18615 if (REGNO (operands[0]) == FIRST_STACK_REG
18616 && TARGET_USE_FFREEP)
18617 return "ffreep\t%y0";
18618 return "fstp\t%y0";
18619 }
18620 if (STACK_TOP_P (operands[0]))
18621 return "fld%z1\t%y1";
18622 return "fst\t%y0";
18623}
18624
5ae27cfa
UB
18625/* Output code to perform a conditional jump to LABEL, if C2 flag in
18626 FP status register is set. */
18627
18628void
18629ix86_emit_fp_unordered_jump (rtx label)
18630{
18631 rtx reg = gen_reg_rtx (HImode);
18632 rtx temp;
18633
18634 emit_insn (gen_x86_fnstsw_1 (reg));
2484cc35
UB
18635
18636 if (TARGET_USE_SAHF)
18637 {
18638 emit_insn (gen_x86_sahf_1 (reg));
18639
f676971a 18640 temp = gen_rtx_REG (CCmode, FLAGS_REG);
2484cc35
UB
18641 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
18642 }
18643 else
18644 {
18645 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
18646
f676971a 18647 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
2484cc35
UB
18648 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
18649 }
f676971a 18650
5ae27cfa
UB
18651 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
18652 gen_rtx_LABEL_REF (VOIDmode, label),
18653 pc_rtx);
18654 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
18655 emit_jump_insn (temp);
18656}
18657
c2fcfa4f
UB
18658/* Output code to perform a log1p XFmode calculation. */
18659
18660void ix86_emit_i387_log1p (rtx op0, rtx op1)
18661{
18662 rtx label1 = gen_label_rtx ();
18663 rtx label2 = gen_label_rtx ();
18664
18665 rtx tmp = gen_reg_rtx (XFmode);
18666 rtx tmp2 = gen_reg_rtx (XFmode);
18667
18668 emit_insn (gen_absxf2 (tmp, op1));
18669 emit_insn (gen_cmpxf (tmp,
18670 CONST_DOUBLE_FROM_REAL_VALUE (
18671 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
18672 XFmode)));
18673 emit_jump_insn (gen_bge (label1));
18674
18675 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
18676 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
18677 emit_jump (label2);
18678
18679 emit_label (label1);
18680 emit_move_insn (tmp, CONST1_RTX (XFmode));
18681 emit_insn (gen_addxf3 (tmp, op1, tmp));
18682 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
18683 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
18684
18685 emit_label (label2);
18686}
f676971a 18687
d6b5193b 18688/* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
a8e68029
DJ
18689
18690static void
18691i386_solaris_elf_named_section (const char *name, unsigned int flags,
18692 tree decl)
18693{
18694 /* With Binutils 2.15, the "@unwind" marker must be specified on
18695 every occurrence of the ".eh_frame" section, not just the first
18696 one. */
18697 if (TARGET_64BIT
18698 && strcmp (name, ".eh_frame") == 0)
18699 {
18700 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
18701 flags & SECTION_WRITE ? "aw" : "a");
18702 return;
18703 }
18704 default_elf_asm_named_section (name, flags, decl);
18705}
18706
cac24f06
JM
18707/* Return the mangling of TYPE if it is an extended fundamental type. */
18708
18709static const char *
18710ix86_mangle_fundamental_type (tree type)
18711{
18712 switch (TYPE_MODE (type))
18713 {
18714 case TFmode:
18715 /* __float128 is "g". */
18716 return "g";
18717 case XFmode:
18718 /* "long double" or __float80 is "e". */
18719 return "e";
18720 default:
18721 return NULL;
18722 }
18723}
18724
7ce918c5
JJ
18725/* For 32-bit code we can save PIC register setup by using
18726 __stack_chk_fail_local hidden function instead of calling
18727 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
18728 register, so it is better to call __stack_chk_fail directly. */
18729
18730static tree
18731ix86_stack_protect_fail (void)
18732{
18733 return TARGET_64BIT
18734 ? default_external_stack_protect_fail ()
18735 : default_hidden_stack_protect_fail ();
18736}
18737
72ce3d4a
JH
18738/* Select a format to encode pointers in exception handling data. CODE
18739 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
18740 true if the symbol may be affected by dynamic relocations.
18741
18742 ??? All x86 object file formats are capable of representing this.
18743 After all, the relocation needed is the same as for the call insn.
18744 Whether or not a particular assembler allows us to enter such, I
18745 guess we'll have to see. */
18746int
18747asm_preferred_eh_data_format (int code, int global)
18748{
18749 if (flag_pic)
18750 {
18751int type = DW_EH_PE_sdata8;
18752 if (!TARGET_64BIT
18753 || ix86_cmodel == CM_SMALL_PIC
18754 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
18755 type = DW_EH_PE_sdata4;
18756 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
18757 }
18758 if (ix86_cmodel == CM_SMALL
18759 || (ix86_cmodel == CM_MEDIUM && code))
18760 return DW_EH_PE_udata4;
18761 return DW_EH_PE_absptr;
18762}
18763
e2500fed 18764#include "gt-i386.h"
This page took 6.039986 seconds and 5 git commands to generate.