1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
29 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
46 #include "target-def.h"
47 #include "langhooks.h"
49 #include "tree-gimple.h"
52 #include "tm-constrs.h"
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
71 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
74 struct processor_costs size_cost
= { /* costs for tuning for size */
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
84 0, /* cost of multiply per each bit set */
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
115 0, /* size of l1 cache */
116 0, /* size of l2 cache */
117 0, /* size of prefetch block */
118 0, /* number of parallel prefetches */
120 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
121 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
122 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
123 COSTS_N_BYTES (2), /* cost of FABS instruction. */
124 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
125 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
126 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
127 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
128 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
129 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}}
132 /* Processor costs (relative to an add) */
134 struct processor_costs i386_cost
= { /* 386 specific costs */
135 COSTS_N_INSNS (1), /* cost of an add instruction */
136 COSTS_N_INSNS (1), /* cost of a lea instruction */
137 COSTS_N_INSNS (3), /* variable shift costs */
138 COSTS_N_INSNS (2), /* constant shift costs */
139 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
140 COSTS_N_INSNS (6), /* HI */
141 COSTS_N_INSNS (6), /* SI */
142 COSTS_N_INSNS (6), /* DI */
143 COSTS_N_INSNS (6)}, /* other */
144 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
145 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
146 COSTS_N_INSNS (23), /* HI */
147 COSTS_N_INSNS (23), /* SI */
148 COSTS_N_INSNS (23), /* DI */
149 COSTS_N_INSNS (23)}, /* other */
150 COSTS_N_INSNS (3), /* cost of movsx */
151 COSTS_N_INSNS (2), /* cost of movzx */
152 15, /* "large" insn */
154 4, /* cost for loading QImode using movzbl */
155 {2, 4, 2}, /* cost of loading integer registers
156 in QImode, HImode and SImode.
157 Relative to reg-reg move (2). */
158 {2, 4, 2}, /* cost of storing integer registers */
159 2, /* cost of reg,reg fld/fst */
160 {8, 8, 8}, /* cost of loading fp registers
161 in SFmode, DFmode and XFmode */
162 {8, 8, 8}, /* cost of storing fp registers
163 in SFmode, DFmode and XFmode */
164 2, /* cost of moving MMX register */
165 {4, 8}, /* cost of loading MMX registers
166 in SImode and DImode */
167 {4, 8}, /* cost of storing MMX registers
168 in SImode and DImode */
169 2, /* cost of moving SSE register */
170 {4, 8, 16}, /* cost of loading SSE registers
171 in SImode, DImode and TImode */
172 {4, 8, 16}, /* cost of storing SSE registers
173 in SImode, DImode and TImode */
174 3, /* MMX or SSE register to integer */
175 0, /* size of l1 cache */
176 0, /* size of l2 cache */
177 0, /* size of prefetch block */
178 0, /* number of parallel prefetches */
180 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
181 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
182 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
183 COSTS_N_INSNS (22), /* cost of FABS instruction. */
184 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
185 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
186 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
187 DUMMY_STRINGOP_ALGS
},
188 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
189 DUMMY_STRINGOP_ALGS
},
193 struct processor_costs i486_cost
= { /* 486 specific costs */
194 COSTS_N_INSNS (1), /* cost of an add instruction */
195 COSTS_N_INSNS (1), /* cost of a lea instruction */
196 COSTS_N_INSNS (3), /* variable shift costs */
197 COSTS_N_INSNS (2), /* constant shift costs */
198 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
199 COSTS_N_INSNS (12), /* HI */
200 COSTS_N_INSNS (12), /* SI */
201 COSTS_N_INSNS (12), /* DI */
202 COSTS_N_INSNS (12)}, /* other */
203 1, /* cost of multiply per each bit set */
204 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
205 COSTS_N_INSNS (40), /* HI */
206 COSTS_N_INSNS (40), /* SI */
207 COSTS_N_INSNS (40), /* DI */
208 COSTS_N_INSNS (40)}, /* other */
209 COSTS_N_INSNS (3), /* cost of movsx */
210 COSTS_N_INSNS (2), /* cost of movzx */
211 15, /* "large" insn */
213 4, /* cost for loading QImode using movzbl */
214 {2, 4, 2}, /* cost of loading integer registers
215 in QImode, HImode and SImode.
216 Relative to reg-reg move (2). */
217 {2, 4, 2}, /* cost of storing integer registers */
218 2, /* cost of reg,reg fld/fst */
219 {8, 8, 8}, /* cost of loading fp registers
220 in SFmode, DFmode and XFmode */
221 {8, 8, 8}, /* cost of storing fp registers
222 in SFmode, DFmode and XFmode */
223 2, /* cost of moving MMX register */
224 {4, 8}, /* cost of loading MMX registers
225 in SImode and DImode */
226 {4, 8}, /* cost of storing MMX registers
227 in SImode and DImode */
228 2, /* cost of moving SSE register */
229 {4, 8, 16}, /* cost of loading SSE registers
230 in SImode, DImode and TImode */
231 {4, 8, 16}, /* cost of storing SSE registers
232 in SImode, DImode and TImode */
233 3, /* MMX or SSE register to integer */
234 4, /* size of l1 cache. 486 has 8kB cache
235 shared for code and data, so 4kB is
236 not really precise. */
237 4, /* size of l2 cache */
238 0, /* size of prefetch block */
239 0, /* number of parallel prefetches */
241 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
242 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
243 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
244 COSTS_N_INSNS (3), /* cost of FABS instruction. */
245 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
246 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
247 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
248 DUMMY_STRINGOP_ALGS
},
249 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
254 struct processor_costs pentium_cost
= {
255 COSTS_N_INSNS (1), /* cost of an add instruction */
256 COSTS_N_INSNS (1), /* cost of a lea instruction */
257 COSTS_N_INSNS (4), /* variable shift costs */
258 COSTS_N_INSNS (1), /* constant shift costs */
259 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
260 COSTS_N_INSNS (11), /* HI */
261 COSTS_N_INSNS (11), /* SI */
262 COSTS_N_INSNS (11), /* DI */
263 COSTS_N_INSNS (11)}, /* other */
264 0, /* cost of multiply per each bit set */
265 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
266 COSTS_N_INSNS (25), /* HI */
267 COSTS_N_INSNS (25), /* SI */
268 COSTS_N_INSNS (25), /* DI */
269 COSTS_N_INSNS (25)}, /* other */
270 COSTS_N_INSNS (3), /* cost of movsx */
271 COSTS_N_INSNS (2), /* cost of movzx */
272 8, /* "large" insn */
274 6, /* cost for loading QImode using movzbl */
275 {2, 4, 2}, /* cost of loading integer registers
276 in QImode, HImode and SImode.
277 Relative to reg-reg move (2). */
278 {2, 4, 2}, /* cost of storing integer registers */
279 2, /* cost of reg,reg fld/fst */
280 {2, 2, 6}, /* cost of loading fp registers
281 in SFmode, DFmode and XFmode */
282 {4, 4, 6}, /* cost of storing fp registers
283 in SFmode, DFmode and XFmode */
284 8, /* cost of moving MMX register */
285 {8, 8}, /* cost of loading MMX registers
286 in SImode and DImode */
287 {8, 8}, /* cost of storing MMX registers
288 in SImode and DImode */
289 2, /* cost of moving SSE register */
290 {4, 8, 16}, /* cost of loading SSE registers
291 in SImode, DImode and TImode */
292 {4, 8, 16}, /* cost of storing SSE registers
293 in SImode, DImode and TImode */
294 3, /* MMX or SSE register to integer */
295 8, /* size of l1 cache. */
296 8, /* size of l2 cache */
297 0, /* size of prefetch block */
298 0, /* number of parallel prefetches */
300 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
301 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
302 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
303 COSTS_N_INSNS (1), /* cost of FABS instruction. */
304 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
305 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
306 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
307 DUMMY_STRINGOP_ALGS
},
308 {{libcall
, {{-1, rep_prefix_4_byte
}}},
313 struct processor_costs pentiumpro_cost
= {
314 COSTS_N_INSNS (1), /* cost of an add instruction */
315 COSTS_N_INSNS (1), /* cost of a lea instruction */
316 COSTS_N_INSNS (1), /* variable shift costs */
317 COSTS_N_INSNS (1), /* constant shift costs */
318 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
319 COSTS_N_INSNS (4), /* HI */
320 COSTS_N_INSNS (4), /* SI */
321 COSTS_N_INSNS (4), /* DI */
322 COSTS_N_INSNS (4)}, /* other */
323 0, /* cost of multiply per each bit set */
324 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
325 COSTS_N_INSNS (17), /* HI */
326 COSTS_N_INSNS (17), /* SI */
327 COSTS_N_INSNS (17), /* DI */
328 COSTS_N_INSNS (17)}, /* other */
329 COSTS_N_INSNS (1), /* cost of movsx */
330 COSTS_N_INSNS (1), /* cost of movzx */
331 8, /* "large" insn */
333 2, /* cost for loading QImode using movzbl */
334 {4, 4, 4}, /* cost of loading integer registers
335 in QImode, HImode and SImode.
336 Relative to reg-reg move (2). */
337 {2, 2, 2}, /* cost of storing integer registers */
338 2, /* cost of reg,reg fld/fst */
339 {2, 2, 6}, /* cost of loading fp registers
340 in SFmode, DFmode and XFmode */
341 {4, 4, 6}, /* cost of storing fp registers
342 in SFmode, DFmode and XFmode */
343 2, /* cost of moving MMX register */
344 {2, 2}, /* cost of loading MMX registers
345 in SImode and DImode */
346 {2, 2}, /* cost of storing MMX registers
347 in SImode and DImode */
348 2, /* cost of moving SSE register */
349 {2, 2, 8}, /* cost of loading SSE registers
350 in SImode, DImode and TImode */
351 {2, 2, 8}, /* cost of storing SSE registers
352 in SImode, DImode and TImode */
353 3, /* MMX or SSE register to integer */
354 8, /* size of l1 cache. */
355 256, /* size of l2 cache */
356 32, /* size of prefetch block */
357 6, /* number of parallel prefetches */
359 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
360 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
361 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
362 COSTS_N_INSNS (2), /* cost of FABS instruction. */
363 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
364 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
365 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
366 the alignment). For small blocks inline loop is still a noticeable win, for bigger
367 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
368 more expensive startup time in CPU, but after 4K the difference is down in the noise.
370 {{rep_prefix_4_byte
, {{128, loop
}, {1024, unrolled_loop
},
371 {8192, rep_prefix_4_byte
}, {-1, rep_prefix_1_byte
}}},
372 DUMMY_STRINGOP_ALGS
},
373 {{rep_prefix_4_byte
, {{1024, unrolled_loop
},
374 {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
379 struct processor_costs geode_cost
= {
380 COSTS_N_INSNS (1), /* cost of an add instruction */
381 COSTS_N_INSNS (1), /* cost of a lea instruction */
382 COSTS_N_INSNS (2), /* variable shift costs */
383 COSTS_N_INSNS (1), /* constant shift costs */
384 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
385 COSTS_N_INSNS (4), /* HI */
386 COSTS_N_INSNS (7), /* SI */
387 COSTS_N_INSNS (7), /* DI */
388 COSTS_N_INSNS (7)}, /* other */
389 0, /* cost of multiply per each bit set */
390 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
391 COSTS_N_INSNS (23), /* HI */
392 COSTS_N_INSNS (39), /* SI */
393 COSTS_N_INSNS (39), /* DI */
394 COSTS_N_INSNS (39)}, /* other */
395 COSTS_N_INSNS (1), /* cost of movsx */
396 COSTS_N_INSNS (1), /* cost of movzx */
397 8, /* "large" insn */
399 1, /* cost for loading QImode using movzbl */
400 {1, 1, 1}, /* cost of loading integer registers
401 in QImode, HImode and SImode.
402 Relative to reg-reg move (2). */
403 {1, 1, 1}, /* cost of storing integer registers */
404 1, /* cost of reg,reg fld/fst */
405 {1, 1, 1}, /* cost of loading fp registers
406 in SFmode, DFmode and XFmode */
407 {4, 6, 6}, /* cost of storing fp registers
408 in SFmode, DFmode and XFmode */
410 1, /* cost of moving MMX register */
411 {1, 1}, /* cost of loading MMX registers
412 in SImode and DImode */
413 {1, 1}, /* cost of storing MMX registers
414 in SImode and DImode */
415 1, /* cost of moving SSE register */
416 {1, 1, 1}, /* cost of loading SSE registers
417 in SImode, DImode and TImode */
418 {1, 1, 1}, /* cost of storing SSE registers
419 in SImode, DImode and TImode */
420 1, /* MMX or SSE register to integer */
421 64, /* size of l1 cache. */
422 128, /* size of l2 cache. */
423 32, /* size of prefetch block */
424 1, /* number of parallel prefetches */
426 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
427 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
428 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
429 COSTS_N_INSNS (1), /* cost of FABS instruction. */
430 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
431 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
432 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
433 DUMMY_STRINGOP_ALGS
},
434 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
439 struct processor_costs k6_cost
= {
440 COSTS_N_INSNS (1), /* cost of an add instruction */
441 COSTS_N_INSNS (2), /* cost of a lea instruction */
442 COSTS_N_INSNS (1), /* variable shift costs */
443 COSTS_N_INSNS (1), /* constant shift costs */
444 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
445 COSTS_N_INSNS (3), /* HI */
446 COSTS_N_INSNS (3), /* SI */
447 COSTS_N_INSNS (3), /* DI */
448 COSTS_N_INSNS (3)}, /* other */
449 0, /* cost of multiply per each bit set */
450 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
451 COSTS_N_INSNS (18), /* HI */
452 COSTS_N_INSNS (18), /* SI */
453 COSTS_N_INSNS (18), /* DI */
454 COSTS_N_INSNS (18)}, /* other */
455 COSTS_N_INSNS (2), /* cost of movsx */
456 COSTS_N_INSNS (2), /* cost of movzx */
457 8, /* "large" insn */
459 3, /* cost for loading QImode using movzbl */
460 {4, 5, 4}, /* cost of loading integer registers
461 in QImode, HImode and SImode.
462 Relative to reg-reg move (2). */
463 {2, 3, 2}, /* cost of storing integer registers */
464 4, /* cost of reg,reg fld/fst */
465 {6, 6, 6}, /* cost of loading fp registers
466 in SFmode, DFmode and XFmode */
467 {4, 4, 4}, /* cost of storing fp registers
468 in SFmode, DFmode and XFmode */
469 2, /* cost of moving MMX register */
470 {2, 2}, /* cost of loading MMX registers
471 in SImode and DImode */
472 {2, 2}, /* cost of storing MMX registers
473 in SImode and DImode */
474 2, /* cost of moving SSE register */
475 {2, 2, 8}, /* cost of loading SSE registers
476 in SImode, DImode and TImode */
477 {2, 2, 8}, /* cost of storing SSE registers
478 in SImode, DImode and TImode */
479 6, /* MMX or SSE register to integer */
480 32, /* size of l1 cache. */
481 32, /* size of l2 cache. Some models
482 have integrated l2 cache, but
483 optimizing for k6 is not important
484 enough to worry about that. */
485 32, /* size of prefetch block */
486 1, /* number of parallel prefetches */
488 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
489 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
490 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
491 COSTS_N_INSNS (2), /* cost of FABS instruction. */
492 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
493 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
494 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
495 DUMMY_STRINGOP_ALGS
},
496 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
501 struct processor_costs athlon_cost
= {
502 COSTS_N_INSNS (1), /* cost of an add instruction */
503 COSTS_N_INSNS (2), /* cost of a lea instruction */
504 COSTS_N_INSNS (1), /* variable shift costs */
505 COSTS_N_INSNS (1), /* constant shift costs */
506 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
507 COSTS_N_INSNS (5), /* HI */
508 COSTS_N_INSNS (5), /* SI */
509 COSTS_N_INSNS (5), /* DI */
510 COSTS_N_INSNS (5)}, /* other */
511 0, /* cost of multiply per each bit set */
512 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
513 COSTS_N_INSNS (26), /* HI */
514 COSTS_N_INSNS (42), /* SI */
515 COSTS_N_INSNS (74), /* DI */
516 COSTS_N_INSNS (74)}, /* other */
517 COSTS_N_INSNS (1), /* cost of movsx */
518 COSTS_N_INSNS (1), /* cost of movzx */
519 8, /* "large" insn */
521 4, /* cost for loading QImode using movzbl */
522 {3, 4, 3}, /* cost of loading integer registers
523 in QImode, HImode and SImode.
524 Relative to reg-reg move (2). */
525 {3, 4, 3}, /* cost of storing integer registers */
526 4, /* cost of reg,reg fld/fst */
527 {4, 4, 12}, /* cost of loading fp registers
528 in SFmode, DFmode and XFmode */
529 {6, 6, 8}, /* cost of storing fp registers
530 in SFmode, DFmode and XFmode */
531 2, /* cost of moving MMX register */
532 {4, 4}, /* cost of loading MMX registers
533 in SImode and DImode */
534 {4, 4}, /* cost of storing MMX registers
535 in SImode and DImode */
536 2, /* cost of moving SSE register */
537 {4, 4, 6}, /* cost of loading SSE registers
538 in SImode, DImode and TImode */
539 {4, 4, 5}, /* cost of storing SSE registers
540 in SImode, DImode and TImode */
541 5, /* MMX or SSE register to integer */
542 64, /* size of l1 cache. */
543 256, /* size of l2 cache. */
544 64, /* size of prefetch block */
545 6, /* number of parallel prefetches */
547 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
548 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
549 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
550 COSTS_N_INSNS (2), /* cost of FABS instruction. */
551 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
552 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
553 /* For some reason, Athlon deals better with REP prefix (relative to loops)
554 compared to K8. Alignment becomes important after 8 bytes for memcpy and
555 128 bytes for memset. */
556 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
557 DUMMY_STRINGOP_ALGS
},
558 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
563 struct processor_costs k8_cost
= {
564 COSTS_N_INSNS (1), /* cost of an add instruction */
565 COSTS_N_INSNS (2), /* cost of a lea instruction */
566 COSTS_N_INSNS (1), /* variable shift costs */
567 COSTS_N_INSNS (1), /* constant shift costs */
568 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
569 COSTS_N_INSNS (4), /* HI */
570 COSTS_N_INSNS (3), /* SI */
571 COSTS_N_INSNS (4), /* DI */
572 COSTS_N_INSNS (5)}, /* other */
573 0, /* cost of multiply per each bit set */
574 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
575 COSTS_N_INSNS (26), /* HI */
576 COSTS_N_INSNS (42), /* SI */
577 COSTS_N_INSNS (74), /* DI */
578 COSTS_N_INSNS (74)}, /* other */
579 COSTS_N_INSNS (1), /* cost of movsx */
580 COSTS_N_INSNS (1), /* cost of movzx */
581 8, /* "large" insn */
583 4, /* cost for loading QImode using movzbl */
584 {3, 4, 3}, /* cost of loading integer registers
585 in QImode, HImode and SImode.
586 Relative to reg-reg move (2). */
587 {3, 4, 3}, /* cost of storing integer registers */
588 4, /* cost of reg,reg fld/fst */
589 {4, 4, 12}, /* cost of loading fp registers
590 in SFmode, DFmode and XFmode */
591 {6, 6, 8}, /* cost of storing fp registers
592 in SFmode, DFmode and XFmode */
593 2, /* cost of moving MMX register */
594 {3, 3}, /* cost of loading MMX registers
595 in SImode and DImode */
596 {4, 4}, /* cost of storing MMX registers
597 in SImode and DImode */
598 2, /* cost of moving SSE register */
599 {4, 3, 6}, /* cost of loading SSE registers
600 in SImode, DImode and TImode */
601 {4, 4, 5}, /* cost of storing SSE registers
602 in SImode, DImode and TImode */
603 5, /* MMX or SSE register to integer */
604 64, /* size of l1 cache. */
605 512, /* size of l2 cache. */
606 64, /* size of prefetch block */
607 /* New AMD processors never drop prefetches; if they cannot be performed
608 immediately, they are queued. We set number of simultaneous prefetches
609 to a large constant to reflect this (it probably is not a good idea not
610 to limit number of prefetches at all, as their execution also takes some
612 100, /* number of parallel prefetches */
614 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
615 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
616 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
617 COSTS_N_INSNS (2), /* cost of FABS instruction. */
618 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
619 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
620 /* K8 has optimized REP instruction for medium sized blocks, but for very small
621 blocks it is better to use loop. For large blocks, libcall can do
622 nontemporary accesses and beat inline considerably. */
623 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
624 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
625 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
626 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
627 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
630 struct processor_costs amdfam10_cost
= {
631 COSTS_N_INSNS (1), /* cost of an add instruction */
632 COSTS_N_INSNS (2), /* cost of a lea instruction */
633 COSTS_N_INSNS (1), /* variable shift costs */
634 COSTS_N_INSNS (1), /* constant shift costs */
635 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
636 COSTS_N_INSNS (4), /* HI */
637 COSTS_N_INSNS (3), /* SI */
638 COSTS_N_INSNS (4), /* DI */
639 COSTS_N_INSNS (5)}, /* other */
640 0, /* cost of multiply per each bit set */
641 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
642 COSTS_N_INSNS (35), /* HI */
643 COSTS_N_INSNS (51), /* SI */
644 COSTS_N_INSNS (83), /* DI */
645 COSTS_N_INSNS (83)}, /* other */
646 COSTS_N_INSNS (1), /* cost of movsx */
647 COSTS_N_INSNS (1), /* cost of movzx */
648 8, /* "large" insn */
650 4, /* cost for loading QImode using movzbl */
651 {3, 4, 3}, /* cost of loading integer registers
652 in QImode, HImode and SImode.
653 Relative to reg-reg move (2). */
654 {3, 4, 3}, /* cost of storing integer registers */
655 4, /* cost of reg,reg fld/fst */
656 {4, 4, 12}, /* cost of loading fp registers
657 in SFmode, DFmode and XFmode */
658 {6, 6, 8}, /* cost of storing fp registers
659 in SFmode, DFmode and XFmode */
660 2, /* cost of moving MMX register */
661 {3, 3}, /* cost of loading MMX registers
662 in SImode and DImode */
663 {4, 4}, /* cost of storing MMX registers
664 in SImode and DImode */
665 2, /* cost of moving SSE register */
666 {4, 4, 3}, /* cost of loading SSE registers
667 in SImode, DImode and TImode */
668 {4, 4, 5}, /* cost of storing SSE registers
669 in SImode, DImode and TImode */
670 3, /* MMX or SSE register to integer */
672 MOVD reg64, xmmreg Double FSTORE 4
673 MOVD reg32, xmmreg Double FSTORE 4
675 MOVD reg64, xmmreg Double FADD 3
677 MOVD reg32, xmmreg Double FADD 3
679 64, /* size of l1 cache. */
680 512, /* size of l2 cache. */
681 64, /* size of prefetch block */
682 /* New AMD processors never drop prefetches; if they cannot be performed
683 immediately, they are queued. We set number of simultaneous prefetches
684 to a large constant to reflect this (it probably is not a good idea not
685 to limit number of prefetches at all, as their execution also takes some
687 100, /* number of parallel prefetches */
689 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
690 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
691 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
692 COSTS_N_INSNS (2), /* cost of FABS instruction. */
693 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
694 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
696 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
697 very small blocks it is better to use loop. For large blocks, libcall can
698 do nontemporary accesses and beat inline considerably. */
699 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
700 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
701 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
702 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
703 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
707 struct processor_costs pentium4_cost
= {
708 COSTS_N_INSNS (1), /* cost of an add instruction */
709 COSTS_N_INSNS (3), /* cost of a lea instruction */
710 COSTS_N_INSNS (4), /* variable shift costs */
711 COSTS_N_INSNS (4), /* constant shift costs */
712 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
713 COSTS_N_INSNS (15), /* HI */
714 COSTS_N_INSNS (15), /* SI */
715 COSTS_N_INSNS (15), /* DI */
716 COSTS_N_INSNS (15)}, /* other */
717 0, /* cost of multiply per each bit set */
718 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
719 COSTS_N_INSNS (56), /* HI */
720 COSTS_N_INSNS (56), /* SI */
721 COSTS_N_INSNS (56), /* DI */
722 COSTS_N_INSNS (56)}, /* other */
723 COSTS_N_INSNS (1), /* cost of movsx */
724 COSTS_N_INSNS (1), /* cost of movzx */
725 16, /* "large" insn */
727 2, /* cost for loading QImode using movzbl */
728 {4, 5, 4}, /* cost of loading integer registers
729 in QImode, HImode and SImode.
730 Relative to reg-reg move (2). */
731 {2, 3, 2}, /* cost of storing integer registers */
732 2, /* cost of reg,reg fld/fst */
733 {2, 2, 6}, /* cost of loading fp registers
734 in SFmode, DFmode and XFmode */
735 {4, 4, 6}, /* cost of storing fp registers
736 in SFmode, DFmode and XFmode */
737 2, /* cost of moving MMX register */
738 {2, 2}, /* cost of loading MMX registers
739 in SImode and DImode */
740 {2, 2}, /* cost of storing MMX registers
741 in SImode and DImode */
742 12, /* cost of moving SSE register */
743 {12, 12, 12}, /* cost of loading SSE registers
744 in SImode, DImode and TImode */
745 {2, 2, 8}, /* cost of storing SSE registers
746 in SImode, DImode and TImode */
747 10, /* MMX or SSE register to integer */
748 8, /* size of l1 cache. */
749 256, /* size of l2 cache. */
750 64, /* size of prefetch block */
751 6, /* number of parallel prefetches */
753 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
754 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
755 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
756 COSTS_N_INSNS (2), /* cost of FABS instruction. */
757 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
758 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
759 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
760 DUMMY_STRINGOP_ALGS
},
761 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
763 DUMMY_STRINGOP_ALGS
},
767 struct processor_costs nocona_cost
= {
768 COSTS_N_INSNS (1), /* cost of an add instruction */
769 COSTS_N_INSNS (1), /* cost of a lea instruction */
770 COSTS_N_INSNS (1), /* variable shift costs */
771 COSTS_N_INSNS (1), /* constant shift costs */
772 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
773 COSTS_N_INSNS (10), /* HI */
774 COSTS_N_INSNS (10), /* SI */
775 COSTS_N_INSNS (10), /* DI */
776 COSTS_N_INSNS (10)}, /* other */
777 0, /* cost of multiply per each bit set */
778 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
779 COSTS_N_INSNS (66), /* HI */
780 COSTS_N_INSNS (66), /* SI */
781 COSTS_N_INSNS (66), /* DI */
782 COSTS_N_INSNS (66)}, /* other */
783 COSTS_N_INSNS (1), /* cost of movsx */
784 COSTS_N_INSNS (1), /* cost of movzx */
785 16, /* "large" insn */
787 4, /* cost for loading QImode using movzbl */
788 {4, 4, 4}, /* cost of loading integer registers
789 in QImode, HImode and SImode.
790 Relative to reg-reg move (2). */
791 {4, 4, 4}, /* cost of storing integer registers */
792 3, /* cost of reg,reg fld/fst */
793 {12, 12, 12}, /* cost of loading fp registers
794 in SFmode, DFmode and XFmode */
795 {4, 4, 4}, /* cost of storing fp registers
796 in SFmode, DFmode and XFmode */
797 6, /* cost of moving MMX register */
798 {12, 12}, /* cost of loading MMX registers
799 in SImode and DImode */
800 {12, 12}, /* cost of storing MMX registers
801 in SImode and DImode */
802 6, /* cost of moving SSE register */
803 {12, 12, 12}, /* cost of loading SSE registers
804 in SImode, DImode and TImode */
805 {12, 12, 12}, /* cost of storing SSE registers
806 in SImode, DImode and TImode */
807 8, /* MMX or SSE register to integer */
808 8, /* size of l1 cache. */
809 1024, /* size of l2 cache. */
810 128, /* size of prefetch block */
811 8, /* number of parallel prefetches */
813 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
814 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
815 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
816 COSTS_N_INSNS (3), /* cost of FABS instruction. */
817 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
818 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
819 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
820 {libcall
, {{32, loop
}, {20000, rep_prefix_8_byte
},
821 {100000, unrolled_loop
}, {-1, libcall
}}}},
822 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
824 {libcall
, {{24, loop
}, {64, unrolled_loop
},
825 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
829 struct processor_costs core2_cost
= {
830 COSTS_N_INSNS (1), /* cost of an add instruction */
831 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
832 COSTS_N_INSNS (1), /* variable shift costs */
833 COSTS_N_INSNS (1), /* constant shift costs */
834 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
835 COSTS_N_INSNS (3), /* HI */
836 COSTS_N_INSNS (3), /* SI */
837 COSTS_N_INSNS (3), /* DI */
838 COSTS_N_INSNS (3)}, /* other */
839 0, /* cost of multiply per each bit set */
840 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
841 COSTS_N_INSNS (22), /* HI */
842 COSTS_N_INSNS (22), /* SI */
843 COSTS_N_INSNS (22), /* DI */
844 COSTS_N_INSNS (22)}, /* other */
845 COSTS_N_INSNS (1), /* cost of movsx */
846 COSTS_N_INSNS (1), /* cost of movzx */
847 8, /* "large" insn */
849 2, /* cost for loading QImode using movzbl */
850 {6, 6, 6}, /* cost of loading integer registers
851 in QImode, HImode and SImode.
852 Relative to reg-reg move (2). */
853 {4, 4, 4}, /* cost of storing integer registers */
854 2, /* cost of reg,reg fld/fst */
855 {6, 6, 6}, /* cost of loading fp registers
856 in SFmode, DFmode and XFmode */
857 {4, 4, 4}, /* cost of loading integer registers */
858 2, /* cost of moving MMX register */
859 {6, 6}, /* cost of loading MMX registers
860 in SImode and DImode */
861 {4, 4}, /* cost of storing MMX registers
862 in SImode and DImode */
863 2, /* cost of moving SSE register */
864 {6, 6, 6}, /* cost of loading SSE registers
865 in SImode, DImode and TImode */
866 {4, 4, 4}, /* cost of storing SSE registers
867 in SImode, DImode and TImode */
868 2, /* MMX or SSE register to integer */
869 32, /* size of l1 cache. */
870 2048, /* size of l2 cache. */
871 128, /* size of prefetch block */
872 8, /* number of parallel prefetches */
874 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
875 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
876 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
877 COSTS_N_INSNS (1), /* cost of FABS instruction. */
878 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
879 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
880 {{libcall
, {{11, loop
}, {-1, rep_prefix_4_byte
}}},
881 {libcall
, {{32, loop
}, {64, rep_prefix_4_byte
},
882 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
883 {{libcall
, {{8, loop
}, {15, unrolled_loop
},
884 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
885 {libcall
, {{24, loop
}, {32, unrolled_loop
},
886 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
889 /* Generic64 should produce code tuned for Nocona and K8. */
891 struct processor_costs generic64_cost
= {
892 COSTS_N_INSNS (1), /* cost of an add instruction */
893 /* On all chips taken into consideration lea is 2 cycles and more. With
894 this cost however our current implementation of synth_mult results in
895 use of unnecessary temporary registers causing regression on several
896 SPECfp benchmarks. */
897 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
898 COSTS_N_INSNS (1), /* variable shift costs */
899 COSTS_N_INSNS (1), /* constant shift costs */
900 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
901 COSTS_N_INSNS (4), /* HI */
902 COSTS_N_INSNS (3), /* SI */
903 COSTS_N_INSNS (4), /* DI */
904 COSTS_N_INSNS (2)}, /* other */
905 0, /* cost of multiply per each bit set */
906 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
907 COSTS_N_INSNS (26), /* HI */
908 COSTS_N_INSNS (42), /* SI */
909 COSTS_N_INSNS (74), /* DI */
910 COSTS_N_INSNS (74)}, /* other */
911 COSTS_N_INSNS (1), /* cost of movsx */
912 COSTS_N_INSNS (1), /* cost of movzx */
913 8, /* "large" insn */
915 4, /* cost for loading QImode using movzbl */
916 {4, 4, 4}, /* cost of loading integer registers
917 in QImode, HImode and SImode.
918 Relative to reg-reg move (2). */
919 {4, 4, 4}, /* cost of storing integer registers */
920 4, /* cost of reg,reg fld/fst */
921 {12, 12, 12}, /* cost of loading fp registers
922 in SFmode, DFmode and XFmode */
923 {6, 6, 8}, /* cost of storing fp registers
924 in SFmode, DFmode and XFmode */
925 2, /* cost of moving MMX register */
926 {8, 8}, /* cost of loading MMX registers
927 in SImode and DImode */
928 {8, 8}, /* cost of storing MMX registers
929 in SImode and DImode */
930 2, /* cost of moving SSE register */
931 {8, 8, 8}, /* cost of loading SSE registers
932 in SImode, DImode and TImode */
933 {8, 8, 8}, /* cost of storing SSE registers
934 in SImode, DImode and TImode */
935 5, /* MMX or SSE register to integer */
936 32, /* size of l1 cache. */
937 512, /* size of l2 cache. */
938 64, /* size of prefetch block */
939 6, /* number of parallel prefetches */
940 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
941 is increased to perhaps more appropriate value of 5. */
943 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
944 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
945 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
946 COSTS_N_INSNS (8), /* cost of FABS instruction. */
947 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
948 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
949 {DUMMY_STRINGOP_ALGS
,
950 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
951 {DUMMY_STRINGOP_ALGS
,
952 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
955 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
957 struct processor_costs generic32_cost
= {
958 COSTS_N_INSNS (1), /* cost of an add instruction */
959 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
960 COSTS_N_INSNS (1), /* variable shift costs */
961 COSTS_N_INSNS (1), /* constant shift costs */
962 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
963 COSTS_N_INSNS (4), /* HI */
964 COSTS_N_INSNS (3), /* SI */
965 COSTS_N_INSNS (4), /* DI */
966 COSTS_N_INSNS (2)}, /* other */
967 0, /* cost of multiply per each bit set */
968 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
969 COSTS_N_INSNS (26), /* HI */
970 COSTS_N_INSNS (42), /* SI */
971 COSTS_N_INSNS (74), /* DI */
972 COSTS_N_INSNS (74)}, /* other */
973 COSTS_N_INSNS (1), /* cost of movsx */
974 COSTS_N_INSNS (1), /* cost of movzx */
975 8, /* "large" insn */
977 4, /* cost for loading QImode using movzbl */
978 {4, 4, 4}, /* cost of loading integer registers
979 in QImode, HImode and SImode.
980 Relative to reg-reg move (2). */
981 {4, 4, 4}, /* cost of storing integer registers */
982 4, /* cost of reg,reg fld/fst */
983 {12, 12, 12}, /* cost of loading fp registers
984 in SFmode, DFmode and XFmode */
985 {6, 6, 8}, /* cost of storing fp registers
986 in SFmode, DFmode and XFmode */
987 2, /* cost of moving MMX register */
988 {8, 8}, /* cost of loading MMX registers
989 in SImode and DImode */
990 {8, 8}, /* cost of storing MMX registers
991 in SImode and DImode */
992 2, /* cost of moving SSE register */
993 {8, 8, 8}, /* cost of loading SSE registers
994 in SImode, DImode and TImode */
995 {8, 8, 8}, /* cost of storing SSE registers
996 in SImode, DImode and TImode */
997 5, /* MMX or SSE register to integer */
998 32, /* size of l1 cache. */
999 256, /* size of l2 cache. */
1000 64, /* size of prefetch block */
1001 6, /* number of parallel prefetches */
1002 3, /* Branch cost */
1003 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1004 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1005 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1006 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1007 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1008 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1009 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
1010 DUMMY_STRINGOP_ALGS
},
1011 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
1012 DUMMY_STRINGOP_ALGS
},
1015 const struct processor_costs
*ix86_cost
= &pentium_cost
;
1017 /* Processor feature/optimization bitmasks. */
1018 #define m_386 (1<<PROCESSOR_I386)
1019 #define m_486 (1<<PROCESSOR_I486)
1020 #define m_PENT (1<<PROCESSOR_PENTIUM)
1021 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1022 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1023 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1024 #define m_CORE2 (1<<PROCESSOR_CORE2)
1026 #define m_GEODE (1<<PROCESSOR_GEODE)
1027 #define m_K6 (1<<PROCESSOR_K6)
1028 #define m_K6_GEODE (m_K6 | m_GEODE)
1029 #define m_K8 (1<<PROCESSOR_K8)
1030 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1031 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1032 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1033 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
1035 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1036 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1038 /* Generic instruction choice should be common subset of supported CPUs
1039 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1040 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1042 /* Feature tests against the various tunings. */
1043 unsigned int ix86_tune_features
[X86_TUNE_LAST
] = {
1044 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1045 negatively, so enabling for Generic64 seems like good code size
1046 tradeoff. We can't enable it for 32bit generic because it does not
1047 work well with PPro base chips. */
1048 m_386
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
| m_CORE2
| m_GENERIC64
,
1050 /* X86_TUNE_PUSH_MEMORY */
1051 m_386
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
| m_PENT4
1052 | m_NOCONA
| m_CORE2
| m_GENERIC
,
1054 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1057 /* X86_TUNE_USE_BIT_TEST */
1060 /* X86_TUNE_UNROLL_STRLEN */
1061 m_486
| m_PENT
| m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_K6
| m_CORE2
| m_GENERIC
,
1063 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1064 m_PPRO
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_GENERIC
,
1066 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1067 on simulation result. But after P4 was made, no performance benefit
1068 was observed with branch hints. It also increases the code size.
1069 As a result, icc never generates branch hints. */
1072 /* X86_TUNE_DOUBLE_WITH_ADD */
1075 /* X86_TUNE_USE_SAHF */
1076 m_PPRO
| m_K6_GEODE
| m_K8
| m_AMDFAM10
| m_PENT4
1077 | m_NOCONA
| m_CORE2
| m_GENERIC
,
1079 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1080 partial dependencies. */
1081 m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_PENT4
| m_NOCONA
1082 | m_CORE2
| m_GENERIC
| m_GEODE
/* m_386 | m_K6 */,
1084 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1085 register stalls on Generic32 compilation setting as well. However
1086 in current implementation the partial register stalls are not eliminated
1087 very well - they can be introduced via subregs synthesized by combine
1088 and can happen in caller/callee saving sequences. Because this option
1089 pays back little on PPro based chips and is in conflict with partial reg
1090 dependencies used by Athlon/P4 based chips, it is better to leave it off
1091 for generic32 for now. */
1094 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1095 m_CORE2
| m_GENERIC
,
1097 /* X86_TUNE_USE_HIMODE_FIOP */
1098 m_386
| m_486
| m_K6_GEODE
,
1100 /* X86_TUNE_USE_SIMODE_FIOP */
1101 ~(m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_PENT
| m_CORE2
| m_GENERIC
),
1103 /* X86_TUNE_USE_MOV0 */
1106 /* X86_TUNE_USE_CLTD */
1107 ~(m_PENT
| m_K6
| m_CORE2
| m_GENERIC
),
1109 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1112 /* X86_TUNE_SPLIT_LONG_MOVES */
1115 /* X86_TUNE_READ_MODIFY_WRITE */
1118 /* X86_TUNE_READ_MODIFY */
1121 /* X86_TUNE_PROMOTE_QIMODE */
1122 m_K6_GEODE
| m_PENT
| m_386
| m_486
| m_ATHLON_K8_AMDFAM10
| m_CORE2
1123 | m_GENERIC
/* | m_PENT4 ? */,
1125 /* X86_TUNE_FAST_PREFIX */
1126 ~(m_PENT
| m_486
| m_386
),
1128 /* X86_TUNE_SINGLE_STRINGOP */
1129 m_386
| m_PENT4
| m_NOCONA
,
1131 /* X86_TUNE_QIMODE_MATH */
1134 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1135 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1136 might be considered for Generic32 if our scheme for avoiding partial
1137 stalls was more effective. */
1140 /* X86_TUNE_PROMOTE_QI_REGS */
1143 /* X86_TUNE_PROMOTE_HI_REGS */
1146 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1147 m_ATHLON_K8_AMDFAM10
| m_K6_GEODE
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1149 /* X86_TUNE_ADD_ESP_8 */
1150 m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_K6_GEODE
| m_386
1151 | m_486
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1153 /* X86_TUNE_SUB_ESP_4 */
1154 m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1156 /* X86_TUNE_SUB_ESP_8 */
1157 m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_386
| m_486
1158 | m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1160 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1161 for DFmode copies */
1162 ~(m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
1163 | m_GENERIC
| m_GEODE
),
1165 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1166 m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1168 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1169 conflict here in between PPro/Pentium4 based chips that thread 128bit
1170 SSE registers as single units versus K8 based chips that divide SSE
1171 registers to two 64bit halves. This knob promotes all store destinations
1172 to be 128bit to allow register renaming on 128bit SSE units, but usually
1173 results in one extra microop on 64bit SSE units. Experimental results
1174 shows that disabling this option on P4 brings over 20% SPECfp regression,
1175 while enabling it on K8 brings roughly 2.4% regression that can be partly
1176 masked by careful scheduling of moves. */
1177 m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
| m_AMDFAM10
,
1179 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1182 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1183 are resolved on SSE register parts instead of whole registers, so we may
1184 maintain just lower part of scalar values in proper format leaving the
1185 upper part undefined. */
1188 /* X86_TUNE_SSE_TYPELESS_STORES */
1189 m_ATHLON_K8_AMDFAM10
,
1191 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1192 m_PPRO
| m_PENT4
| m_NOCONA
,
1194 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1195 m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1197 /* X86_TUNE_PROLOGUE_USING_MOVE */
1198 m_ATHLON_K8
| m_PPRO
| m_CORE2
| m_GENERIC
,
1200 /* X86_TUNE_EPILOGUE_USING_MOVE */
1201 m_ATHLON_K8
| m_PPRO
| m_CORE2
| m_GENERIC
,
1203 /* X86_TUNE_SHIFT1 */
1206 /* X86_TUNE_USE_FFREEP */
1207 m_ATHLON_K8_AMDFAM10
,
1209 /* X86_TUNE_INTER_UNIT_MOVES */
1210 ~(m_ATHLON_K8_AMDFAM10
| m_GENERIC
),
1212 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1213 than 4 branch instructions in the 16 byte window. */
1214 m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1216 /* X86_TUNE_SCHEDULE */
1217 m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_K6_GEODE
| m_PENT
| m_CORE2
| m_GENERIC
,
1219 /* X86_TUNE_USE_BT */
1220 m_ATHLON_K8_AMDFAM10
,
1222 /* X86_TUNE_USE_INCDEC */
1223 ~(m_PENT4
| m_NOCONA
| m_GENERIC
),
1225 /* X86_TUNE_PAD_RETURNS */
1226 m_ATHLON_K8_AMDFAM10
| m_CORE2
| m_GENERIC
,
1228 /* X86_TUNE_EXT_80387_CONSTANTS */
1229 m_K6_GEODE
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
,
1231 /* X86_TUNE_SHORTEN_X87_SSE */
1234 /* X86_TUNE_AVOID_VECTOR_DECODE */
1237 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1238 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1241 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1242 vector path on AMD machines. */
1243 m_K8
| m_GENERIC64
| m_AMDFAM10
,
1245 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1247 m_K8
| m_GENERIC64
| m_AMDFAM10
,
1249 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1253 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1254 but one byte longer. */
1257 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1258 operand that cannot be represented using a modRM byte. The XOR
1259 replacement is long decoded, so this split helps here as well. */
1263 /* Feature tests against the various architecture variations. */
1264 unsigned int ix86_arch_features
[X86_ARCH_LAST
] = {
1265 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1266 ~(m_386
| m_486
| m_PENT
| m_K6
),
1268 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1271 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1274 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1277 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1281 static const unsigned int x86_accumulate_outgoing_args
1282 = m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
;
1284 static const unsigned int x86_arch_always_fancy_math_387
1285 = m_PENT
| m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_PENT4
1286 | m_NOCONA
| m_CORE2
| m_GENERIC
;
1288 static enum stringop_alg stringop_alg
= no_stringop
;
1290 /* In case the average insn count for single function invocation is
1291 lower than this constant, emit fast (but longer) prologue and
1293 #define FAST_PROLOGUE_INSN_COUNT 20
1295 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1296 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
1297 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
1298 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
1300 /* Array of the smallest class containing reg number REGNO, indexed by
1301 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1303 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
1305 /* ax, dx, cx, bx */
1306 AREG
, DREG
, CREG
, BREG
,
1307 /* si, di, bp, sp */
1308 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
1310 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
1311 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
1314 /* flags, fpsr, fpcr, frame */
1315 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
1317 SSE_FIRST_REG
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1320 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
1323 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1324 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1325 /* SSE REX registers */
1326 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1330 /* The "default" register map used in 32bit mode. */
1332 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1334 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1335 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1336 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1337 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1338 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1339 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1340 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1343 static int const x86_64_int_parameter_registers
[6] =
1345 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1346 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
1349 static int const x86_64_ms_abi_int_parameter_registers
[4] =
1351 2 /*RCX*/, 1 /*RDX*/,
1352 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
1355 static int const x86_64_int_return_registers
[4] =
1357 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1360 /* The "default" register map used in 64bit mode. */
1361 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
1363 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1364 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1365 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1366 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1367 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1368 8,9,10,11,12,13,14,15, /* extended integer registers */
1369 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1372 /* Define the register numbers to be used in Dwarf debugging information.
1373 The SVR4 reference port C compiler uses the following register numbers
1374 in its Dwarf output code:
1375 0 for %eax (gcc regno = 0)
1376 1 for %ecx (gcc regno = 2)
1377 2 for %edx (gcc regno = 1)
1378 3 for %ebx (gcc regno = 3)
1379 4 for %esp (gcc regno = 7)
1380 5 for %ebp (gcc regno = 6)
1381 6 for %esi (gcc regno = 4)
1382 7 for %edi (gcc regno = 5)
1383 The following three DWARF register numbers are never generated by
1384 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1385 believes these numbers have these meanings.
1386 8 for %eip (no gcc equivalent)
1387 9 for %eflags (gcc regno = 17)
1388 10 for %trapno (no gcc equivalent)
1389 It is not at all clear how we should number the FP stack registers
1390 for the x86 architecture. If the version of SDB on x86/svr4 were
1391 a bit less brain dead with respect to floating-point then we would
1392 have a precedent to follow with respect to DWARF register numbers
1393 for x86 FP registers, but the SDB on x86/svr4 is so completely
1394 broken with respect to FP registers that it is hardly worth thinking
1395 of it as something to strive for compatibility with.
1396 The version of x86/svr4 SDB I have at the moment does (partially)
1397 seem to believe that DWARF register number 11 is associated with
1398 the x86 register %st(0), but that's about all. Higher DWARF
1399 register numbers don't seem to be associated with anything in
1400 particular, and even for DWARF regno 11, SDB only seems to under-
1401 stand that it should say that a variable lives in %st(0) (when
1402 asked via an `=' command) if we said it was in DWARF regno 11,
1403 but SDB still prints garbage when asked for the value of the
1404 variable in question (via a `/' command).
1405 (Also note that the labels SDB prints for various FP stack regs
1406 when doing an `x' command are all wrong.)
1407 Note that these problems generally don't affect the native SVR4
1408 C compiler because it doesn't allow the use of -O with -g and
1409 because when it is *not* optimizing, it allocates a memory
1410 location for each floating-point variable, and the memory
1411 location is what gets described in the DWARF AT_location
1412 attribute for the variable in question.
1413 Regardless of the severe mental illness of the x86/svr4 SDB, we
1414 do something sensible here and we use the following DWARF
1415 register numbers. Note that these are all stack-top-relative
1417 11 for %st(0) (gcc regno = 8)
1418 12 for %st(1) (gcc regno = 9)
1419 13 for %st(2) (gcc regno = 10)
1420 14 for %st(3) (gcc regno = 11)
1421 15 for %st(4) (gcc regno = 12)
1422 16 for %st(5) (gcc regno = 13)
1423 17 for %st(6) (gcc regno = 14)
1424 18 for %st(7) (gcc regno = 15)
1426 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1428 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1429 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1430 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1431 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1432 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1433 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1434 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1437 /* Test and compare insns in i386.md store the information needed to
1438 generate branch and scc insns here. */
1440 rtx ix86_compare_op0
= NULL_RTX
;
1441 rtx ix86_compare_op1
= NULL_RTX
;
1442 rtx ix86_compare_emitted
= NULL_RTX
;
1444 /* Size of the register save area. */
1445 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1447 /* Define the structure for the machine field in struct function. */
1449 struct stack_local_entry
GTY(())
1451 unsigned short mode
;
1454 struct stack_local_entry
*next
;
1457 /* Structure describing stack frame layout.
1458 Stack grows downward:
1464 saved frame pointer if frame_pointer_needed
1465 <- HARD_FRAME_POINTER
1470 [va_arg registers] (
1471 > to_allocate <- FRAME_POINTER
1481 HOST_WIDE_INT frame
;
1483 int outgoing_arguments_size
;
1486 HOST_WIDE_INT to_allocate
;
1487 /* The offsets relative to ARG_POINTER. */
1488 HOST_WIDE_INT frame_pointer_offset
;
1489 HOST_WIDE_INT hard_frame_pointer_offset
;
1490 HOST_WIDE_INT stack_pointer_offset
;
1492 /* When save_regs_using_mov is set, emit prologue using
1493 move instead of push instructions. */
1494 bool save_regs_using_mov
;
1497 /* Code model option. */
1498 enum cmodel ix86_cmodel
;
1500 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
1502 enum tls_dialect ix86_tls_dialect
= TLS_DIALECT_GNU
;
1504 /* Which unit we are generating floating point math for. */
1505 enum fpmath_unit ix86_fpmath
;
1507 /* Which cpu are we scheduling for. */
1508 enum processor_type ix86_tune
;
1510 /* Which instruction set architecture to use. */
1511 enum processor_type ix86_arch
;
1513 /* true if sse prefetch instruction is not NOOP. */
1514 int x86_prefetch_sse
;
1516 /* ix86_regparm_string as a number */
1517 static int ix86_regparm
;
1519 /* -mstackrealign option */
1520 extern int ix86_force_align_arg_pointer
;
1521 static const char ix86_force_align_arg_pointer_string
[] = "force_align_arg_pointer";
1523 /* Preferred alignment for stack boundary in bits. */
1524 unsigned int ix86_preferred_stack_boundary
;
1526 /* Values 1-5: see jump.c */
1527 int ix86_branch_cost
;
1529 /* Variables which are this size or smaller are put in the data/bss
1530 or ldata/lbss sections. */
1532 int ix86_section_threshold
= 65536;
1534 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1535 char internal_label_prefix
[16];
1536 int internal_label_prefix_len
;
1538 /* Fence to use after loop using movnt. */
1541 /* Register class used for passing given 64bit part of the argument.
1542 These represent classes as documented by the PS ABI, with the exception
1543 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1544 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1546 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1547 whenever possible (upper half does contain padding). */
1548 enum x86_64_reg_class
1551 X86_64_INTEGER_CLASS
,
1552 X86_64_INTEGERSI_CLASS
,
1559 X86_64_COMPLEX_X87_CLASS
,
1562 static const char * const x86_64_reg_class_name
[] =
1564 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1565 "sseup", "x87", "x87up", "cplx87", "no"
1568 #define MAX_CLASSES 4
1570 /* Table of constants used by fldpi, fldln2, etc.... */
1571 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
1572 static bool ext_80387_constants_init
= 0;
1575 static struct machine_function
* ix86_init_machine_status (void);
1576 static rtx
ix86_function_value (const_tree
, const_tree
, bool);
1577 static int ix86_function_regparm (const_tree
, const_tree
);
1578 static void ix86_compute_frame_layout (struct ix86_frame
*);
1579 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
1583 /* The svr4 ABI for the i386 says that records and unions are returned
1585 #ifndef DEFAULT_PCC_STRUCT_RETURN
1586 #define DEFAULT_PCC_STRUCT_RETURN 1
1589 /* Bit flags that specify the ISA we are compiling for. */
1590 int ix86_isa_flags
= TARGET_64BIT_DEFAULT
| TARGET_SUBTARGET_ISA_DEFAULT
;
1592 /* A mask of ix86_isa_flags that includes bit X if X
1593 was set or cleared on the command line. */
1594 static int ix86_isa_flags_explicit
;
1596 /* Define a set of ISAs which aren't available for a given ISA. MMX
1597 and SSE ISAs are handled separately. */
1599 #define OPTION_MASK_ISA_MMX_UNSET \
1600 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_UNSET)
1601 #define OPTION_MASK_ISA_3DNOW_UNSET OPTION_MASK_ISA_3DNOW_A
1603 #define OPTION_MASK_ISA_SSE_UNSET \
1604 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE2_UNSET)
1605 #define OPTION_MASK_ISA_SSE2_UNSET \
1606 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE3_UNSET)
1607 #define OPTION_MASK_ISA_SSE3_UNSET \
1608 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSSE3_UNSET)
1609 #define OPTION_MASK_ISA_SSSE3_UNSET \
1610 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_1_UNSET)
1611 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1612 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_2_UNSET)
1613 #define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4A
1615 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1616 as -msse4.1 -msse4.2. -mno-sse4 should the same as -mno-sse4.1. */
1617 #define OPTION_MASK_ISA_SSE4 \
1618 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2)
1619 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1621 #define OPTION_MASK_ISA_SSE4A_UNSET OPTION_MASK_ISA_SSE4
1623 /* Vectorization library interface and handlers. */
1624 tree (*ix86_veclib_handler
)(enum built_in_function
, tree
, tree
) = NULL
;
1625 static tree
ix86_veclibabi_acml (enum built_in_function
, tree
, tree
);
1627 /* Implement TARGET_HANDLE_OPTION. */
1630 ix86_handle_option (size_t code
, const char *arg ATTRIBUTE_UNUSED
, int value
)
1635 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_MMX
;
1638 ix86_isa_flags
&= ~OPTION_MASK_ISA_MMX_UNSET
;
1639 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_MMX_UNSET
;
1644 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_3DNOW
;
1647 ix86_isa_flags
&= ~OPTION_MASK_ISA_3DNOW_UNSET
;
1648 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_3DNOW_UNSET
;
1656 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE
;
1659 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE_UNSET
;
1660 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE_UNSET
;
1665 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE2
;
1668 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE2_UNSET
;
1669 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE2_UNSET
;
1674 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE3
;
1677 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE3_UNSET
;
1678 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE3_UNSET
;
1683 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSSE3
;
1686 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSSE3_UNSET
;
1687 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSSE3_UNSET
;
1692 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4_1
;
1695 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE4_1_UNSET
;
1696 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4_1_UNSET
;
1701 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4_2
;
1704 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE4_2_UNSET
;
1705 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4_2_UNSET
;
1710 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4
;
1711 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4
;
1715 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE4_UNSET
;
1716 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4_UNSET
;
1720 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4A
;
1723 ix86_isa_flags
&= ~OPTION_MASK_ISA_SSE4A_UNSET
;
1724 ix86_isa_flags_explicit
|= OPTION_MASK_ISA_SSE4A_UNSET
;
1733 /* Sometimes certain combinations of command options do not make
1734 sense on a particular target machine. You can define a macro
1735 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1736 defined, is executed once just after all the command options have
1739 Don't use this macro to turn on various extra optimizations for
1740 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1743 override_options (void)
1746 int ix86_tune_defaulted
= 0;
1747 int ix86_arch_specified
= 0;
1748 unsigned int ix86_arch_mask
, ix86_tune_mask
;
1750 /* Comes from final.c -- no real reason to change it. */
1751 #define MAX_CODE_ALIGN 16
1755 const struct processor_costs
*cost
; /* Processor costs */
1756 const int align_loop
; /* Default alignments. */
1757 const int align_loop_max_skip
;
1758 const int align_jump
;
1759 const int align_jump_max_skip
;
1760 const int align_func
;
1762 const processor_target_table
[PROCESSOR_max
] =
1764 {&i386_cost
, 4, 3, 4, 3, 4},
1765 {&i486_cost
, 16, 15, 16, 15, 16},
1766 {&pentium_cost
, 16, 7, 16, 7, 16},
1767 {&pentiumpro_cost
, 16, 15, 16, 10, 16},
1768 {&geode_cost
, 0, 0, 0, 0, 0},
1769 {&k6_cost
, 32, 7, 32, 7, 32},
1770 {&athlon_cost
, 16, 7, 16, 7, 16},
1771 {&pentium4_cost
, 0, 0, 0, 0, 0},
1772 {&k8_cost
, 16, 7, 16, 7, 16},
1773 {&nocona_cost
, 0, 0, 0, 0, 0},
1774 {&core2_cost
, 16, 10, 16, 10, 16},
1775 {&generic32_cost
, 16, 7, 16, 7, 16},
1776 {&generic64_cost
, 16, 10, 16, 10, 16},
1777 {&amdfam10_cost
, 32, 24, 32, 7, 32}
1780 static const char * const cpu_names
[] = TARGET_CPU_DEFAULT_NAMES
;
1787 PTA_PREFETCH_SSE
= 1 << 4,
1789 PTA_3DNOW_A
= 1 << 6,
1793 PTA_POPCNT
= 1 << 10,
1795 PTA_SSE4A
= 1 << 12,
1796 PTA_NO_SAHF
= 1 << 13,
1797 PTA_SSE4_1
= 1 << 14,
1798 PTA_SSE4_2
= 1 << 15
1803 const char *const name
; /* processor name or nickname. */
1804 const enum processor_type processor
;
1805 const unsigned /*enum pta_flags*/ flags
;
1807 const processor_alias_table
[] =
1809 {"i386", PROCESSOR_I386
, 0},
1810 {"i486", PROCESSOR_I486
, 0},
1811 {"i586", PROCESSOR_PENTIUM
, 0},
1812 {"pentium", PROCESSOR_PENTIUM
, 0},
1813 {"pentium-mmx", PROCESSOR_PENTIUM
, PTA_MMX
},
1814 {"winchip-c6", PROCESSOR_I486
, PTA_MMX
},
1815 {"winchip2", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1816 {"c3", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1817 {"c3-2", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
},
1818 {"i686", PROCESSOR_PENTIUMPRO
, 0},
1819 {"pentiumpro", PROCESSOR_PENTIUMPRO
, 0},
1820 {"pentium2", PROCESSOR_PENTIUMPRO
, PTA_MMX
},
1821 {"pentium3", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
},
1822 {"pentium3m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
},
1823 {"pentium-m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_SSE2
},
1824 {"pentium4", PROCESSOR_PENTIUM4
, PTA_MMX
|PTA_SSE
| PTA_SSE2
},
1825 {"pentium4m", PROCESSOR_PENTIUM4
, PTA_MMX
| PTA_SSE
| PTA_SSE2
},
1826 {"prescott", PROCESSOR_NOCONA
, PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
},
1827 {"nocona", PROCESSOR_NOCONA
, (PTA_64BIT
1828 | PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
1829 | PTA_CX16
| PTA_NO_SAHF
)},
1830 {"core2", PROCESSOR_CORE2
, (PTA_64BIT
1831 | PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
1834 {"geode", PROCESSOR_GEODE
, (PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
1835 |PTA_PREFETCH_SSE
)},
1836 {"k6", PROCESSOR_K6
, PTA_MMX
},
1837 {"k6-2", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1838 {"k6-3", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1839 {"athlon", PROCESSOR_ATHLON
, (PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
1840 | PTA_PREFETCH_SSE
)},
1841 {"athlon-tbird", PROCESSOR_ATHLON
, (PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
1842 | PTA_PREFETCH_SSE
)},
1843 {"athlon-4", PROCESSOR_ATHLON
, (PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
1845 {"athlon-xp", PROCESSOR_ATHLON
, (PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
1847 {"athlon-mp", PROCESSOR_ATHLON
, (PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
1849 {"x86-64", PROCESSOR_K8
, (PTA_64BIT
1850 | PTA_MMX
| PTA_SSE
| PTA_SSE2
1852 {"k8", PROCESSOR_K8
, (PTA_64BIT
1853 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
1854 | PTA_SSE
| PTA_SSE2
1856 {"k8-sse3", PROCESSOR_K8
, (PTA_64BIT
1857 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
1858 | PTA_SSE
| PTA_SSE2
| PTA_SSE3
1860 {"opteron", PROCESSOR_K8
, (PTA_64BIT
1861 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
1862 | PTA_SSE
| PTA_SSE2
1864 {"opteron-sse3", PROCESSOR_K8
, (PTA_64BIT
1865 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
1866 | PTA_SSE
| PTA_SSE2
| PTA_SSE3
1868 {"athlon64", PROCESSOR_K8
, (PTA_64BIT
1869 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
1870 | PTA_SSE
| PTA_SSE2
1872 {"athlon64-sse3", PROCESSOR_K8
, (PTA_64BIT
1873 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
1874 | PTA_SSE
| PTA_SSE2
| PTA_SSE3
1876 {"athlon-fx", PROCESSOR_K8
, (PTA_64BIT
1877 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
1878 | PTA_SSE
| PTA_SSE2
1880 {"amdfam10", PROCESSOR_AMDFAM10
, (PTA_64BIT
1881 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
1882 | PTA_SSE
| PTA_SSE2
| PTA_SSE3
1884 | PTA_CX16
| PTA_ABM
)},
1885 {"barcelona", PROCESSOR_AMDFAM10
, (PTA_64BIT
1886 | PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
1887 | PTA_SSE
| PTA_SSE2
| PTA_SSE3
1889 | PTA_CX16
| PTA_ABM
)},
1890 {"generic32", PROCESSOR_GENERIC32
, 0 /* flags are only used for -march switch. */ },
1891 {"generic64", PROCESSOR_GENERIC64
, PTA_64BIT
/* flags are only used for -march switch. */ },
1894 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
1896 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1897 SUBTARGET_OVERRIDE_OPTIONS
;
1900 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1901 SUBSUBTARGET_OVERRIDE_OPTIONS
;
1904 /* -fPIC is the default for x86_64. */
1905 if (TARGET_MACHO
&& TARGET_64BIT
)
1908 /* Set the default values for switches whose default depends on TARGET_64BIT
1909 in case they weren't overwritten by command line options. */
1912 /* Mach-O doesn't support omitting the frame pointer for now. */
1913 if (flag_omit_frame_pointer
== 2)
1914 flag_omit_frame_pointer
= (TARGET_MACHO
? 0 : 1);
1915 if (flag_asynchronous_unwind_tables
== 2)
1916 flag_asynchronous_unwind_tables
= 1;
1917 if (flag_pcc_struct_return
== 2)
1918 flag_pcc_struct_return
= 0;
1922 if (flag_omit_frame_pointer
== 2)
1923 flag_omit_frame_pointer
= 0;
1924 if (flag_asynchronous_unwind_tables
== 2)
1925 flag_asynchronous_unwind_tables
= 0;
1926 if (flag_pcc_struct_return
== 2)
1927 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
1930 /* Need to check -mtune=generic first. */
1931 if (ix86_tune_string
)
1933 if (!strcmp (ix86_tune_string
, "generic")
1934 || !strcmp (ix86_tune_string
, "i686")
1935 /* As special support for cross compilers we read -mtune=native
1936 as -mtune=generic. With native compilers we won't see the
1937 -mtune=native, as it was changed by the driver. */
1938 || !strcmp (ix86_tune_string
, "native"))
1941 ix86_tune_string
= "generic64";
1943 ix86_tune_string
= "generic32";
1945 else if (!strncmp (ix86_tune_string
, "generic", 7))
1946 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
1950 if (ix86_arch_string
)
1951 ix86_tune_string
= ix86_arch_string
;
1952 if (!ix86_tune_string
)
1954 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
1955 ix86_tune_defaulted
= 1;
1958 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1959 need to use a sensible tune option. */
1960 if (!strcmp (ix86_tune_string
, "generic")
1961 || !strcmp (ix86_tune_string
, "x86-64")
1962 || !strcmp (ix86_tune_string
, "i686"))
1965 ix86_tune_string
= "generic64";
1967 ix86_tune_string
= "generic32";
1970 if (ix86_stringop_string
)
1972 if (!strcmp (ix86_stringop_string
, "rep_byte"))
1973 stringop_alg
= rep_prefix_1_byte
;
1974 else if (!strcmp (ix86_stringop_string
, "libcall"))
1975 stringop_alg
= libcall
;
1976 else if (!strcmp (ix86_stringop_string
, "rep_4byte"))
1977 stringop_alg
= rep_prefix_4_byte
;
1978 else if (!strcmp (ix86_stringop_string
, "rep_8byte"))
1979 stringop_alg
= rep_prefix_8_byte
;
1980 else if (!strcmp (ix86_stringop_string
, "byte_loop"))
1981 stringop_alg
= loop_1_byte
;
1982 else if (!strcmp (ix86_stringop_string
, "loop"))
1983 stringop_alg
= loop
;
1984 else if (!strcmp (ix86_stringop_string
, "unrolled_loop"))
1985 stringop_alg
= unrolled_loop
;
1987 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string
);
1989 if (!strcmp (ix86_tune_string
, "x86-64"))
1990 warning (OPT_Wdeprecated
, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1991 "-mtune=generic instead as appropriate.");
1993 if (!ix86_arch_string
)
1994 ix86_arch_string
= TARGET_64BIT
? "x86-64" : "i386";
1996 ix86_arch_specified
= 1;
1998 if (!strcmp (ix86_arch_string
, "generic"))
1999 error ("generic CPU can be used only for -mtune= switch");
2000 if (!strncmp (ix86_arch_string
, "generic", 7))
2001 error ("bad value (%s) for -march= switch", ix86_arch_string
);
2003 if (ix86_cmodel_string
!= 0)
2005 if (!strcmp (ix86_cmodel_string
, "small"))
2006 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
2007 else if (!strcmp (ix86_cmodel_string
, "medium"))
2008 ix86_cmodel
= flag_pic
? CM_MEDIUM_PIC
: CM_MEDIUM
;
2009 else if (!strcmp (ix86_cmodel_string
, "large"))
2010 ix86_cmodel
= flag_pic
? CM_LARGE_PIC
: CM_LARGE
;
2012 error ("code model %s does not support PIC mode", ix86_cmodel_string
);
2013 else if (!strcmp (ix86_cmodel_string
, "32"))
2014 ix86_cmodel
= CM_32
;
2015 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
2016 ix86_cmodel
= CM_KERNEL
;
2018 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
2022 /* For TARGET_64BIT_MS_ABI, force pic on, in order to enable the
2023 use of rip-relative addressing. This eliminates fixups that
2024 would otherwise be needed if this object is to be placed in a
2025 DLL, and is essentially just as efficient as direct addressing. */
2026 if (TARGET_64BIT_MS_ABI
)
2027 ix86_cmodel
= CM_SMALL_PIC
, flag_pic
= 1;
2028 else if (TARGET_64BIT
)
2029 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
2031 ix86_cmodel
= CM_32
;
2033 if (ix86_asm_string
!= 0)
2036 && !strcmp (ix86_asm_string
, "intel"))
2037 ix86_asm_dialect
= ASM_INTEL
;
2038 else if (!strcmp (ix86_asm_string
, "att"))
2039 ix86_asm_dialect
= ASM_ATT
;
2041 error ("bad value (%s) for -masm= switch", ix86_asm_string
);
2043 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
2044 error ("code model %qs not supported in the %s bit mode",
2045 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
2046 if ((TARGET_64BIT
!= 0) != ((ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) != 0))
2047 sorry ("%i-bit mode not compiled in",
2048 (ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) ? 64 : 32);
2050 for (i
= 0; i
< pta_size
; i
++)
2051 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
2053 ix86_arch
= processor_alias_table
[i
].processor
;
2054 /* Default cpu tuning to the architecture. */
2055 ix86_tune
= ix86_arch
;
2057 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
2058 error ("CPU you selected does not support x86-64 "
2061 if (processor_alias_table
[i
].flags
& PTA_MMX
2062 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MMX
))
2063 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
;
2064 if (processor_alias_table
[i
].flags
& PTA_3DNOW
2065 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW
))
2066 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW
;
2067 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
2068 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW_A
))
2069 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW_A
;
2070 if (processor_alias_table
[i
].flags
& PTA_SSE
2071 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE
))
2072 ix86_isa_flags
|= OPTION_MASK_ISA_SSE
;
2073 if (processor_alias_table
[i
].flags
& PTA_SSE2
2074 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE2
))
2075 ix86_isa_flags
|= OPTION_MASK_ISA_SSE2
;
2076 if (processor_alias_table
[i
].flags
& PTA_SSE3
2077 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE3
))
2078 ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
2079 if (processor_alias_table
[i
].flags
& PTA_SSSE3
2080 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSSE3
))
2081 ix86_isa_flags
|= OPTION_MASK_ISA_SSSE3
;
2082 if (processor_alias_table
[i
].flags
& PTA_SSE4_1
2083 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_1
))
2084 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_1
;
2085 if (processor_alias_table
[i
].flags
& PTA_SSE4_2
2086 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_2
))
2087 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_2
;
2088 if (processor_alias_table
[i
].flags
& PTA_SSE4A
2089 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4A
))
2090 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4A
;
2092 if (processor_alias_table
[i
].flags
& PTA_ABM
)
2094 if (processor_alias_table
[i
].flags
& PTA_CX16
)
2095 x86_cmpxchg16b
= true;
2096 if (processor_alias_table
[i
].flags
& (PTA_POPCNT
| PTA_ABM
))
2098 if (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
))
2099 x86_prefetch_sse
= true;
2100 if (!(TARGET_64BIT
&& (processor_alias_table
[i
].flags
& PTA_NO_SAHF
)))
2107 error ("bad value (%s) for -march= switch", ix86_arch_string
);
2109 ix86_arch_mask
= 1u << ix86_arch
;
2110 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
2111 ix86_arch_features
[i
] &= ix86_arch_mask
;
2113 for (i
= 0; i
< pta_size
; i
++)
2114 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
2116 ix86_tune
= processor_alias_table
[i
].processor
;
2117 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
2119 if (ix86_tune_defaulted
)
2121 ix86_tune_string
= "x86-64";
2122 for (i
= 0; i
< pta_size
; i
++)
2123 if (! strcmp (ix86_tune_string
,
2124 processor_alias_table
[i
].name
))
2126 ix86_tune
= processor_alias_table
[i
].processor
;
2129 error ("CPU you selected does not support x86-64 "
2132 /* Intel CPUs have always interpreted SSE prefetch instructions as
2133 NOPs; so, we can enable SSE prefetch instructions even when
2134 -mtune (rather than -march) points us to a processor that has them.
2135 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2136 higher processors. */
2138 && (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
)))
2139 x86_prefetch_sse
= true;
2143 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
2145 ix86_tune_mask
= 1u << ix86_tune
;
2146 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
2147 ix86_tune_features
[i
] &= ix86_tune_mask
;
2150 ix86_cost
= &size_cost
;
2152 ix86_cost
= processor_target_table
[ix86_tune
].cost
;
2154 /* Arrange to set up i386_stack_locals for all functions. */
2155 init_machine_status
= ix86_init_machine_status
;
2157 /* Validate -mregparm= value. */
2158 if (ix86_regparm_string
)
2161 warning (0, "-mregparm is ignored in 64-bit mode");
2162 i
= atoi (ix86_regparm_string
);
2163 if (i
< 0 || i
> REGPARM_MAX
)
2164 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
2169 ix86_regparm
= REGPARM_MAX
;
2171 /* If the user has provided any of the -malign-* options,
2172 warn and use that value only if -falign-* is not set.
2173 Remove this code in GCC 3.2 or later. */
2174 if (ix86_align_loops_string
)
2176 warning (0, "-malign-loops is obsolete, use -falign-loops");
2177 if (align_loops
== 0)
2179 i
= atoi (ix86_align_loops_string
);
2180 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2181 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2183 align_loops
= 1 << i
;
2187 if (ix86_align_jumps_string
)
2189 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2190 if (align_jumps
== 0)
2192 i
= atoi (ix86_align_jumps_string
);
2193 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2194 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2196 align_jumps
= 1 << i
;
2200 if (ix86_align_funcs_string
)
2202 warning (0, "-malign-functions is obsolete, use -falign-functions");
2203 if (align_functions
== 0)
2205 i
= atoi (ix86_align_funcs_string
);
2206 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2207 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2209 align_functions
= 1 << i
;
2213 /* Default align_* from the processor table. */
2214 if (align_loops
== 0)
2216 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
2217 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
2219 if (align_jumps
== 0)
2221 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
2222 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
2224 if (align_functions
== 0)
2226 align_functions
= processor_target_table
[ix86_tune
].align_func
;
2229 /* Validate -mbranch-cost= value, or provide default. */
2230 ix86_branch_cost
= ix86_cost
->branch_cost
;
2231 if (ix86_branch_cost_string
)
2233 i
= atoi (ix86_branch_cost_string
);
2235 error ("-mbranch-cost=%d is not between 0 and 5", i
);
2237 ix86_branch_cost
= i
;
2239 if (ix86_section_threshold_string
)
2241 i
= atoi (ix86_section_threshold_string
);
2243 error ("-mlarge-data-threshold=%d is negative", i
);
2245 ix86_section_threshold
= i
;
2248 if (ix86_tls_dialect_string
)
2250 if (strcmp (ix86_tls_dialect_string
, "gnu") == 0)
2251 ix86_tls_dialect
= TLS_DIALECT_GNU
;
2252 else if (strcmp (ix86_tls_dialect_string
, "gnu2") == 0)
2253 ix86_tls_dialect
= TLS_DIALECT_GNU2
;
2254 else if (strcmp (ix86_tls_dialect_string
, "sun") == 0)
2255 ix86_tls_dialect
= TLS_DIALECT_SUN
;
2257 error ("bad value (%s) for -mtls-dialect= switch",
2258 ix86_tls_dialect_string
);
2261 if (ix87_precision_string
)
2263 i
= atoi (ix87_precision_string
);
2264 if (i
!= 32 && i
!= 64 && i
!= 80)
2265 error ("pc%d is not valid precision setting (32, 64 or 80)", i
);
2270 target_flags
|= TARGET_SUBTARGET64_DEFAULT
& ~target_flags_explicit
;
2272 /* Enable by default the SSE and MMX builtins. Do allow the user to
2273 explicitly disable any of these. In particular, disabling SSE and
2274 MMX for kernel code is extremely useful. */
2275 if (!ix86_arch_specified
)
2277 |= ((OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_MMX
2278 | TARGET_SUBTARGET64_ISA_DEFAULT
) & ~ix86_isa_flags_explicit
);
2281 warning (0, "-mrtd is ignored in 64bit mode");
2285 target_flags
|= TARGET_SUBTARGET32_DEFAULT
& ~target_flags_explicit
;
2287 if (!ix86_arch_specified
)
2289 |= TARGET_SUBTARGET32_ISA_DEFAULT
& ~ix86_isa_flags_explicit
;
2291 /* i386 ABI does not specify red zone. It still makes sense to use it
2292 when programmer takes care to stack from being destroyed. */
2293 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
2294 target_flags
|= MASK_NO_RED_ZONE
;
2297 /* Keep nonleaf frame pointers. */
2298 if (flag_omit_frame_pointer
)
2299 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
2300 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
2301 flag_omit_frame_pointer
= 1;
2303 /* If we're doing fast math, we don't care about comparison order
2304 wrt NaNs. This lets us use a shorter comparison sequence. */
2305 if (flag_finite_math_only
)
2306 target_flags
&= ~MASK_IEEE_FP
;
2308 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2309 since the insns won't need emulation. */
2310 if (x86_arch_always_fancy_math_387
& ix86_arch_mask
)
2311 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
2313 /* Likewise, if the target doesn't have a 387, or we've specified
2314 software floating point, don't use 387 inline intrinsics. */
2316 target_flags
|= MASK_NO_FANCY_MATH_387
;
2318 /* Turn on SSE4.1 builtins for -msse4.2. */
2320 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_1
;
2322 /* Turn on SSSE3 builtins for -msse4.1. */
2324 ix86_isa_flags
|= OPTION_MASK_ISA_SSSE3
;
2326 /* Turn on SSE3 builtins for -mssse3. */
2328 ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
2330 /* Turn on SSE3 builtins for -msse4a. */
2332 ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
2334 /* Turn on SSE2 builtins for -msse3. */
2336 ix86_isa_flags
|= OPTION_MASK_ISA_SSE2
;
2338 /* Turn on SSE builtins for -msse2. */
2340 ix86_isa_flags
|= OPTION_MASK_ISA_SSE
;
2342 /* Turn on MMX builtins for -msse. */
2345 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
& ~ix86_isa_flags_explicit
;
2346 x86_prefetch_sse
= true;
2349 /* Turn on MMX builtins for 3Dnow. */
2351 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
;
2353 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
2354 if (TARGET_SSE4_2
|| TARGET_ABM
)
2357 /* Validate -mpreferred-stack-boundary= value, or provide default.
2358 The default of 128 bits is for Pentium III's SSE __m128. We can't
2359 change it because of optimize_size. Otherwise, we can't mix object
2360 files compiled with -Os and -On. */
2361 ix86_preferred_stack_boundary
= 128;
2362 if (ix86_preferred_stack_boundary_string
)
2364 i
= atoi (ix86_preferred_stack_boundary_string
);
2365 if (i
< (TARGET_64BIT
? 4 : 2) || i
> 12)
2366 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i
,
2367 TARGET_64BIT
? 4 : 2);
2369 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
2372 /* Accept -msseregparm only if at least SSE support is enabled. */
2373 if (TARGET_SSEREGPARM
2375 error ("-msseregparm used without SSE enabled");
2377 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
2378 if (ix86_fpmath_string
!= 0)
2380 if (! strcmp (ix86_fpmath_string
, "387"))
2381 ix86_fpmath
= FPMATH_387
;
2382 else if (! strcmp (ix86_fpmath_string
, "sse"))
2386 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2387 ix86_fpmath
= FPMATH_387
;
2390 ix86_fpmath
= FPMATH_SSE
;
2392 else if (! strcmp (ix86_fpmath_string
, "387,sse")
2393 || ! strcmp (ix86_fpmath_string
, "sse,387"))
2397 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2398 ix86_fpmath
= FPMATH_387
;
2400 else if (!TARGET_80387
)
2402 warning (0, "387 instruction set disabled, using SSE arithmetics");
2403 ix86_fpmath
= FPMATH_SSE
;
2406 ix86_fpmath
= (enum fpmath_unit
) (FPMATH_SSE
| FPMATH_387
);
2409 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string
);
2412 /* If the i387 is disabled, then do not return values in it. */
2414 target_flags
&= ~MASK_FLOAT_RETURNS
;
2416 /* Use external vectorized library in vectorizing intrinsics. */
2417 if (ix86_veclibabi_string
)
2419 if (strcmp (ix86_veclibabi_string
, "acml") == 0)
2420 ix86_veclib_handler
= ix86_veclibabi_acml
;
2422 error ("unknown vectorization library ABI type (%s) for "
2423 "-mveclibabi= switch", ix86_veclibabi_string
);
2426 if ((x86_accumulate_outgoing_args
& ix86_tune_mask
)
2427 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
2429 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
2431 /* ??? Unwind info is not correct around the CFG unless either a frame
2432 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2433 unwind info generation to be aware of the CFG and propagating states
2435 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
2436 || flag_exceptions
|| flag_non_call_exceptions
)
2437 && flag_omit_frame_pointer
2438 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
2440 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
2441 warning (0, "unwind tables currently require either a frame pointer "
2442 "or -maccumulate-outgoing-args for correctness");
2443 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
2446 /* For sane SSE instruction set generation we need fcomi instruction.
2447 It is safe to enable all CMOVE instructions. */
2451 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2454 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
2455 p
= strchr (internal_label_prefix
, 'X');
2456 internal_label_prefix_len
= p
- internal_label_prefix
;
2460 /* When scheduling description is not available, disable scheduler pass
2461 so it won't slow down the compilation and make x87 code slower. */
2462 if (!TARGET_SCHEDULE
)
2463 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
2465 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES
))
2466 set_param_value ("simultaneous-prefetches",
2467 ix86_cost
->simultaneous_prefetches
);
2468 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE
))
2469 set_param_value ("l1-cache-line-size", ix86_cost
->prefetch_block
);
2470 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE
))
2471 set_param_value ("l1-cache-size", ix86_cost
->l1_cache_size
);
2472 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE
))
2473 set_param_value ("l2-cache-size", ix86_cost
->l2_cache_size
);
2476 /* Return true if this goes in large data/bss. */
2479 ix86_in_large_data_p (tree exp
)
2481 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
2484 /* Functions are never large data. */
2485 if (TREE_CODE (exp
) == FUNCTION_DECL
)
2488 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
2490 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
2491 if (strcmp (section
, ".ldata") == 0
2492 || strcmp (section
, ".lbss") == 0)
2498 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
2500 /* If this is an incomplete type with size 0, then we can't put it
2501 in data because it might be too big when completed. */
2502 if (!size
|| size
> ix86_section_threshold
)
2509 /* Switch to the appropriate section for output of DECL.
2510 DECL is either a `VAR_DECL' node or a constant of some sort.
2511 RELOC indicates whether forming the initial value of DECL requires
2512 link-time relocations. */
2514 static section
* x86_64_elf_select_section (tree
, int, unsigned HOST_WIDE_INT
)
2518 x86_64_elf_select_section (tree decl
, int reloc
,
2519 unsigned HOST_WIDE_INT align
)
2521 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2522 && ix86_in_large_data_p (decl
))
2524 const char *sname
= NULL
;
2525 unsigned int flags
= SECTION_WRITE
;
2526 switch (categorize_decl_for_section (decl
, reloc
))
2531 case SECCAT_DATA_REL
:
2532 sname
= ".ldata.rel";
2534 case SECCAT_DATA_REL_LOCAL
:
2535 sname
= ".ldata.rel.local";
2537 case SECCAT_DATA_REL_RO
:
2538 sname
= ".ldata.rel.ro";
2540 case SECCAT_DATA_REL_RO_LOCAL
:
2541 sname
= ".ldata.rel.ro.local";
2545 flags
|= SECTION_BSS
;
2548 case SECCAT_RODATA_MERGE_STR
:
2549 case SECCAT_RODATA_MERGE_STR_INIT
:
2550 case SECCAT_RODATA_MERGE_CONST
:
2554 case SECCAT_SRODATA
:
2561 /* We don't split these for medium model. Place them into
2562 default sections and hope for best. */
2567 /* We might get called with string constants, but get_named_section
2568 doesn't like them as they are not DECLs. Also, we need to set
2569 flags in that case. */
2571 return get_section (sname
, flags
, NULL
);
2572 return get_named_section (decl
, sname
, reloc
);
2575 return default_elf_select_section (decl
, reloc
, align
);
2578 /* Build up a unique section name, expressed as a
2579 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2580 RELOC indicates whether the initial value of EXP requires
2581 link-time relocations. */
2583 static void ATTRIBUTE_UNUSED
2584 x86_64_elf_unique_section (tree decl
, int reloc
)
2586 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2587 && ix86_in_large_data_p (decl
))
2589 const char *prefix
= NULL
;
2590 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2591 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
2593 switch (categorize_decl_for_section (decl
, reloc
))
2596 case SECCAT_DATA_REL
:
2597 case SECCAT_DATA_REL_LOCAL
:
2598 case SECCAT_DATA_REL_RO
:
2599 case SECCAT_DATA_REL_RO_LOCAL
:
2600 prefix
= one_only
? ".gnu.linkonce.ld." : ".ldata.";
2603 prefix
= one_only
? ".gnu.linkonce.lb." : ".lbss.";
2606 case SECCAT_RODATA_MERGE_STR
:
2607 case SECCAT_RODATA_MERGE_STR_INIT
:
2608 case SECCAT_RODATA_MERGE_CONST
:
2609 prefix
= one_only
? ".gnu.linkonce.lr." : ".lrodata.";
2611 case SECCAT_SRODATA
:
2618 /* We don't split these for medium model. Place them into
2619 default sections and hope for best. */
2627 plen
= strlen (prefix
);
2629 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
2630 name
= targetm
.strip_name_encoding (name
);
2631 nlen
= strlen (name
);
2633 string
= (char *) alloca (nlen
+ plen
+ 1);
2634 memcpy (string
, prefix
, plen
);
2635 memcpy (string
+ plen
, name
, nlen
+ 1);
2637 DECL_SECTION_NAME (decl
) = build_string (nlen
+ plen
, string
);
2641 default_unique_section (decl
, reloc
);
2644 #ifdef COMMON_ASM_OP
2645 /* This says how to output assembler code to declare an
2646 uninitialized external linkage data object.
2648 For medium model x86-64 we need to use .largecomm opcode for
2651 x86_elf_aligned_common (FILE *file
,
2652 const char *name
, unsigned HOST_WIDE_INT size
,
2655 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2656 && size
> (unsigned int)ix86_section_threshold
)
2657 fprintf (file
, ".largecomm\t");
2659 fprintf (file
, "%s", COMMON_ASM_OP
);
2660 assemble_name (file
, name
);
2661 fprintf (file
, ","HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
2662 size
, align
/ BITS_PER_UNIT
);
2666 /* Utility function for targets to use in implementing
2667 ASM_OUTPUT_ALIGNED_BSS. */
2670 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
2671 const char *name
, unsigned HOST_WIDE_INT size
,
2674 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2675 && size
> (unsigned int)ix86_section_threshold
)
2676 switch_to_section (get_named_section (decl
, ".lbss", 0));
2678 switch_to_section (bss_section
);
2679 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
2680 #ifdef ASM_DECLARE_OBJECT_NAME
2681 last_assemble_variable_decl
= decl
;
2682 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
2684 /* Standard thing is just output label for the object. */
2685 ASM_OUTPUT_LABEL (file
, name
);
2686 #endif /* ASM_DECLARE_OBJECT_NAME */
2687 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
2691 optimization_options (int level
, int size ATTRIBUTE_UNUSED
)
2693 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2694 make the problem with not enough registers even worse. */
2695 #ifdef INSN_SCHEDULING
2697 flag_schedule_insns
= 0;
2701 /* The Darwin libraries never set errno, so we might as well
2702 avoid calling them when that's the only reason we would. */
2703 flag_errno_math
= 0;
2705 /* The default values of these switches depend on the TARGET_64BIT
2706 that is not known at this moment. Mark these values with 2 and
2707 let user the to override these. In case there is no command line option
2708 specifying them, we will set the defaults in override_options. */
2710 flag_omit_frame_pointer
= 2;
2711 flag_pcc_struct_return
= 2;
2712 flag_asynchronous_unwind_tables
= 2;
2713 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2714 SUBTARGET_OPTIMIZATION_OPTIONS
;
2718 /* Decide whether we can make a sibling call to a function. DECL is the
2719 declaration of the function being targeted by the call and EXP is the
2720 CALL_EXPR representing the call. */
2723 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
2728 /* If we are generating position-independent code, we cannot sibcall
2729 optimize any indirect call, or a direct call to a global function,
2730 as the PLT requires %ebx be live. */
2731 if (!TARGET_64BIT
&& flag_pic
&& (!decl
|| !targetm
.binds_local_p (decl
)))
2738 func
= TREE_TYPE (CALL_EXPR_FN (exp
));
2739 if (POINTER_TYPE_P (func
))
2740 func
= TREE_TYPE (func
);
2743 /* Check that the return value locations are the same. Like
2744 if we are returning floats on the 80387 register stack, we cannot
2745 make a sibcall from a function that doesn't return a float to a
2746 function that does or, conversely, from a function that does return
2747 a float to a function that doesn't; the necessary stack adjustment
2748 would not be executed. This is also the place we notice
2749 differences in the return value ABI. Note that it is ok for one
2750 of the functions to have void return type as long as the return
2751 value of the other is passed in a register. */
2752 a
= ix86_function_value (TREE_TYPE (exp
), func
, false);
2753 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
2755 if (STACK_REG_P (a
) || STACK_REG_P (b
))
2757 if (!rtx_equal_p (a
, b
))
2760 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
2762 else if (!rtx_equal_p (a
, b
))
2765 /* If this call is indirect, we'll need to be able to use a call-clobbered
2766 register for the address of the target function. Make sure that all
2767 such registers are not used for passing parameters. */
2768 if (!decl
&& !TARGET_64BIT
)
2772 /* We're looking at the CALL_EXPR, we need the type of the function. */
2773 type
= CALL_EXPR_FN (exp
); /* pointer expression */
2774 type
= TREE_TYPE (type
); /* pointer type */
2775 type
= TREE_TYPE (type
); /* function type */
2777 if (ix86_function_regparm (type
, NULL
) >= 3)
2779 /* ??? Need to count the actual number of registers to be used,
2780 not the possible number of registers. Fix later. */
2785 /* Dllimport'd functions are also called indirectly. */
2786 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
2787 && decl
&& DECL_DLLIMPORT_P (decl
)
2788 && ix86_function_regparm (TREE_TYPE (decl
), NULL
) >= 3)
2791 /* If we forced aligned the stack, then sibcalling would unalign the
2792 stack, which may break the called function. */
2793 if (cfun
->machine
->force_align_arg_pointer
)
2796 /* Otherwise okay. That also includes certain types of indirect calls. */
2800 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2801 calling convention attributes;
2802 arguments as in struct attribute_spec.handler. */
2805 ix86_handle_cconv_attribute (tree
*node
, tree name
,
2807 int flags ATTRIBUTE_UNUSED
,
2810 if (TREE_CODE (*node
) != FUNCTION_TYPE
2811 && TREE_CODE (*node
) != METHOD_TYPE
2812 && TREE_CODE (*node
) != FIELD_DECL
2813 && TREE_CODE (*node
) != TYPE_DECL
)
2815 warning (OPT_Wattributes
, "%qs attribute only applies to functions",
2816 IDENTIFIER_POINTER (name
));
2817 *no_add_attrs
= true;
2821 /* Can combine regparm with all attributes but fastcall. */
2822 if (is_attribute_p ("regparm", name
))
2826 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2828 error ("fastcall and regparm attributes are not compatible");
2831 cst
= TREE_VALUE (args
);
2832 if (TREE_CODE (cst
) != INTEGER_CST
)
2834 warning (OPT_Wattributes
,
2835 "%qs attribute requires an integer constant argument",
2836 IDENTIFIER_POINTER (name
));
2837 *no_add_attrs
= true;
2839 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
2841 warning (OPT_Wattributes
, "argument to %qs attribute larger than %d",
2842 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
2843 *no_add_attrs
= true;
2847 && lookup_attribute (ix86_force_align_arg_pointer_string
,
2848 TYPE_ATTRIBUTES (*node
))
2849 && compare_tree_int (cst
, REGPARM_MAX
-1))
2851 error ("%s functions limited to %d register parameters",
2852 ix86_force_align_arg_pointer_string
, REGPARM_MAX
-1);
2860 /* Do not warn when emulating the MS ABI. */
2861 if (!TARGET_64BIT_MS_ABI
)
2862 warning (OPT_Wattributes
, "%qs attribute ignored",
2863 IDENTIFIER_POINTER (name
));
2864 *no_add_attrs
= true;
2868 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2869 if (is_attribute_p ("fastcall", name
))
2871 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
2873 error ("fastcall and cdecl attributes are not compatible");
2875 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
2877 error ("fastcall and stdcall attributes are not compatible");
2879 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
2881 error ("fastcall and regparm attributes are not compatible");
2885 /* Can combine stdcall with fastcall (redundant), regparm and
2887 else if (is_attribute_p ("stdcall", name
))
2889 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
2891 error ("stdcall and cdecl attributes are not compatible");
2893 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2895 error ("stdcall and fastcall attributes are not compatible");
2899 /* Can combine cdecl with regparm and sseregparm. */
2900 else if (is_attribute_p ("cdecl", name
))
2902 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
2904 error ("stdcall and cdecl attributes are not compatible");
2906 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2908 error ("fastcall and cdecl attributes are not compatible");
2912 /* Can combine sseregparm with all attributes. */
2917 /* Return 0 if the attributes for two types are incompatible, 1 if they
2918 are compatible, and 2 if they are nearly compatible (which causes a
2919 warning to be generated). */
2922 ix86_comp_type_attributes (const_tree type1
, const_tree type2
)
2924 /* Check for mismatch of non-default calling convention. */
2925 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
2927 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
2930 /* Check for mismatched fastcall/regparm types. */
2931 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1
))
2932 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2
)))
2933 || (ix86_function_regparm (type1
, NULL
)
2934 != ix86_function_regparm (type2
, NULL
)))
2937 /* Check for mismatched sseregparm types. */
2938 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1
))
2939 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2
)))
2942 /* Check for mismatched return types (cdecl vs stdcall). */
2943 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
2944 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
2950 /* Return the regparm value for a function with the indicated TYPE and DECL.
2951 DECL may be NULL when calling function indirectly
2952 or considering a libcall. */
2955 ix86_function_regparm (const_tree type
, const_tree decl
)
2958 int regparm
= ix86_regparm
;
2963 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
2965 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
2967 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
2970 /* Use register calling convention for local functions when possible. */
2971 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
2972 && flag_unit_at_a_time
&& !profile_flag
)
2974 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
2975 struct cgraph_local_info
*i
= cgraph_local_info ((tree
)CONST_CAST(decl
));
2978 int local_regparm
, globals
= 0, regno
;
2981 /* Make sure no regparm register is taken by a
2982 global register variable. */
2983 for (local_regparm
= 0; local_regparm
< 3; local_regparm
++)
2984 if (global_regs
[local_regparm
])
2987 /* We can't use regparm(3) for nested functions as these use
2988 static chain pointer in third argument. */
2989 if (local_regparm
== 3
2990 && (decl_function_context (decl
)
2991 || ix86_force_align_arg_pointer
)
2992 && !DECL_NO_STATIC_CHAIN (decl
))
2995 /* If the function realigns its stackpointer, the prologue will
2996 clobber %ecx. If we've already generated code for the callee,
2997 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
2998 scanning the attributes for the self-realigning property. */
2999 f
= DECL_STRUCT_FUNCTION (decl
);
3000 if (local_regparm
== 3
3001 && (f
? !!f
->machine
->force_align_arg_pointer
3002 : !!lookup_attribute (ix86_force_align_arg_pointer_string
,
3003 TYPE_ATTRIBUTES (TREE_TYPE (decl
)))))
3006 /* Each global register variable increases register preassure,
3007 so the more global reg vars there are, the smaller regparm
3008 optimization use, unless requested by the user explicitly. */
3009 for (regno
= 0; regno
< 6; regno
++)
3010 if (global_regs
[regno
])
3013 = globals
< local_regparm
? local_regparm
- globals
: 0;
3015 if (local_regparm
> regparm
)
3016 regparm
= local_regparm
;
3023 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
3024 DFmode (2) arguments in SSE registers for a function with the
3025 indicated TYPE and DECL. DECL may be NULL when calling function
3026 indirectly or considering a libcall. Otherwise return 0. */
3029 ix86_function_sseregparm (const_tree type
, const_tree decl
)
3031 gcc_assert (!TARGET_64BIT
);
3033 /* Use SSE registers to pass SFmode and DFmode arguments if requested
3034 by the sseregparm attribute. */
3035 if (TARGET_SSEREGPARM
3036 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
3041 error ("Calling %qD with attribute sseregparm without "
3042 "SSE/SSE2 enabled", decl
);
3044 error ("Calling %qT with attribute sseregparm without "
3045 "SSE/SSE2 enabled", type
);
3052 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3053 (and DFmode for SSE2) arguments in SSE registers. */
3054 if (decl
&& TARGET_SSE_MATH
&& flag_unit_at_a_time
&& !profile_flag
)
3056 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
3057 struct cgraph_local_info
*i
= cgraph_local_info ((tree
)CONST_CAST(decl
));
3059 return TARGET_SSE2
? 2 : 1;
3065 /* Return true if EAX is live at the start of the function. Used by
3066 ix86_expand_prologue to determine if we need special help before
3067 calling allocate_stack_worker. */
3070 ix86_eax_live_at_start_p (void)
3072 /* Cheat. Don't bother working forward from ix86_function_regparm
3073 to the function type to whether an actual argument is located in
3074 eax. Instead just look at cfg info, which is still close enough
3075 to correct at this point. This gives false positives for broken
3076 functions that might use uninitialized data that happens to be
3077 allocated in eax, but who cares? */
3078 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR
), 0);
3081 /* Return true if TYPE has a variable argument list. */
3084 type_has_variadic_args_p (tree type
)
3086 tree n
, t
= TYPE_ARG_TYPES (type
);
3091 while ((n
= TREE_CHAIN (t
)) != NULL
)
3094 return TREE_VALUE (t
) != void_type_node
;
3097 /* Value is the number of bytes of arguments automatically
3098 popped when returning from a subroutine call.
3099 FUNDECL is the declaration node of the function (as a tree),
3100 FUNTYPE is the data type of the function (as a tree),
3101 or for a library call it is an identifier node for the subroutine name.
3102 SIZE is the number of bytes of arguments passed on the stack.
3104 On the 80386, the RTD insn may be used to pop them if the number
3105 of args is fixed, but if the number is variable then the caller
3106 must pop them all. RTD can't be used for library calls now
3107 because the library is compiled with the Unix compiler.
3108 Use of RTD is a selectable option, since it is incompatible with
3109 standard Unix calling sequences. If the option is not selected,
3110 the caller must always pop the args.
3112 The attribute stdcall is equivalent to RTD on a per module basis. */
3115 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
3119 /* None of the 64-bit ABIs pop arguments. */
3123 rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
3125 /* Cdecl functions override -mrtd, and never pop the stack. */
3126 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
)))
3128 /* Stdcall and fastcall functions will pop the stack if not
3130 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
))
3131 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype
)))
3134 if (rtd
&& ! type_has_variadic_args_p (funtype
))
3138 /* Lose any fake structure return argument if it is passed on the stack. */
3139 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
3140 && !KEEP_AGGREGATE_RETURN_POINTER
)
3142 int nregs
= ix86_function_regparm (funtype
, fundecl
);
3144 return GET_MODE_SIZE (Pmode
);
3150 /* Argument support functions. */
3152 /* Return true when register may be used to pass function parameters. */
3154 ix86_function_arg_regno_p (int regno
)
3157 const int *parm_regs
;
3162 return (regno
< REGPARM_MAX
3163 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
3165 return (regno
< REGPARM_MAX
3166 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
3167 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
3168 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
3169 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
3174 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
3179 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
3180 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
3184 /* RAX is used as hidden argument to va_arg functions. */
3185 if (!TARGET_64BIT_MS_ABI
&& regno
== 0)
3188 if (TARGET_64BIT_MS_ABI
)
3189 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
3191 parm_regs
= x86_64_int_parameter_registers
;
3192 for (i
= 0; i
< REGPARM_MAX
; i
++)
3193 if (regno
== parm_regs
[i
])
3198 /* Return if we do not know how to pass TYPE solely in registers. */
3201 ix86_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
3203 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
3206 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3207 The layout_type routine is crafty and tries to trick us into passing
3208 currently unsupported vector types on the stack by using TImode. */
3209 return (!TARGET_64BIT
&& mode
== TImode
3210 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
3213 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3214 for a call to a function whose data type is FNTYPE.
3215 For a library call, FNTYPE is 0. */
3218 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
3219 tree fntype
, /* tree ptr for function decl */
3220 rtx libname
, /* SYMBOL_REF of library name or 0 */
3223 memset (cum
, 0, sizeof (*cum
));
3225 /* Set up the number of registers to use for passing arguments. */
3226 cum
->nregs
= ix86_regparm
;
3228 cum
->sse_nregs
= SSE_REGPARM_MAX
;
3230 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
3231 cum
->warn_sse
= true;
3232 cum
->warn_mmx
= true;
3233 cum
->maybe_vaarg
= (fntype
3234 ? (!TYPE_ARG_TYPES (fntype
)
3235 || type_has_variadic_args_p (fntype
))
3240 /* If there are variable arguments, then we won't pass anything
3241 in registers in 32-bit mode. */
3242 if (cum
->maybe_vaarg
)
3252 /* Use ecx and edx registers if function has fastcall attribute,
3253 else look for regparm information. */
3256 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)))
3262 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
3265 /* Set up the number of SSE registers used for passing SFmode
3266 and DFmode arguments. Warn for mismatching ABI. */
3267 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
);
3271 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3272 But in the case of vector types, it is some vector mode.
3274 When we have only some of our vector isa extensions enabled, then there
3275 are some modes for which vector_mode_supported_p is false. For these
3276 modes, the generic vector support in gcc will choose some non-vector mode
3277 in order to implement the type. By computing the natural mode, we'll
3278 select the proper ABI location for the operand and not depend on whatever
3279 the middle-end decides to do with these vector types. */
3281 static enum machine_mode
3282 type_natural_mode (const_tree type
)
3284 enum machine_mode mode
= TYPE_MODE (type
);
3286 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
3288 HOST_WIDE_INT size
= int_size_in_bytes (type
);
3289 if ((size
== 8 || size
== 16)
3290 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3291 && TYPE_VECTOR_SUBPARTS (type
) > 1)
3293 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
3295 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
3296 mode
= MIN_MODE_VECTOR_FLOAT
;
3298 mode
= MIN_MODE_VECTOR_INT
;
3300 /* Get the mode which has this inner mode and number of units. */
3301 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
3302 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
3303 && GET_MODE_INNER (mode
) == innermode
)
3313 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3314 this may not agree with the mode that the type system has chosen for the
3315 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3316 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3319 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
3324 if (orig_mode
!= BLKmode
)
3325 tmp
= gen_rtx_REG (orig_mode
, regno
);
3328 tmp
= gen_rtx_REG (mode
, regno
);
3329 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
3330 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
3336 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3337 of this code is to classify each 8bytes of incoming argument by the register
3338 class and assign registers accordingly. */
3340 /* Return the union class of CLASS1 and CLASS2.
3341 See the x86-64 PS ABI for details. */
3343 static enum x86_64_reg_class
3344 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
3346 /* Rule #1: If both classes are equal, this is the resulting class. */
3347 if (class1
== class2
)
3350 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3352 if (class1
== X86_64_NO_CLASS
)
3354 if (class2
== X86_64_NO_CLASS
)
3357 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3358 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
3359 return X86_64_MEMORY_CLASS
;
3361 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3362 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
3363 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
3364 return X86_64_INTEGERSI_CLASS
;
3365 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
3366 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
3367 return X86_64_INTEGER_CLASS
;
3369 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3371 if (class1
== X86_64_X87_CLASS
3372 || class1
== X86_64_X87UP_CLASS
3373 || class1
== X86_64_COMPLEX_X87_CLASS
3374 || class2
== X86_64_X87_CLASS
3375 || class2
== X86_64_X87UP_CLASS
3376 || class2
== X86_64_COMPLEX_X87_CLASS
)
3377 return X86_64_MEMORY_CLASS
;
3379 /* Rule #6: Otherwise class SSE is used. */
3380 return X86_64_SSE_CLASS
;
3383 /* Classify the argument of type TYPE and mode MODE.
3384 CLASSES will be filled by the register class used to pass each word
3385 of the operand. The number of words is returned. In case the parameter
3386 should be passed in memory, 0 is returned. As a special case for zero
3387 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3389 BIT_OFFSET is used internally for handling records and specifies offset
3390 of the offset in bits modulo 256 to avoid overflow cases.
3392 See the x86-64 PS ABI for details.
3396 classify_argument (enum machine_mode mode
, const_tree type
,
3397 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
3399 HOST_WIDE_INT bytes
=
3400 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3401 int words
= (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3403 /* Variable sized entities are always passed/returned in memory. */
3407 if (mode
!= VOIDmode
3408 && targetm
.calls
.must_pass_in_stack (mode
, type
))
3411 if (type
&& AGGREGATE_TYPE_P (type
))
3415 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
3417 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3421 for (i
= 0; i
< words
; i
++)
3422 classes
[i
] = X86_64_NO_CLASS
;
3424 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3425 signalize memory class, so handle it as special case. */
3428 classes
[0] = X86_64_NO_CLASS
;
3432 /* Classify each field of record and merge classes. */
3433 switch (TREE_CODE (type
))
3436 /* And now merge the fields of structure. */
3437 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3439 if (TREE_CODE (field
) == FIELD_DECL
)
3443 if (TREE_TYPE (field
) == error_mark_node
)
3446 /* Bitfields are always classified as integer. Handle them
3447 early, since later code would consider them to be
3448 misaligned integers. */
3449 if (DECL_BIT_FIELD (field
))
3451 for (i
= (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3452 i
< ((int_bit_position (field
) + (bit_offset
% 64))
3453 + tree_low_cst (DECL_SIZE (field
), 0)
3456 merge_classes (X86_64_INTEGER_CLASS
,
3461 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3462 TREE_TYPE (field
), subclasses
,
3463 (int_bit_position (field
)
3464 + bit_offset
) % 256);
3467 for (i
= 0; i
< num
; i
++)
3470 (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3472 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
3480 /* Arrays are handled as small records. */
3483 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
3484 TREE_TYPE (type
), subclasses
, bit_offset
);
3488 /* The partial classes are now full classes. */
3489 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
3490 subclasses
[0] = X86_64_SSE_CLASS
;
3491 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
3492 subclasses
[0] = X86_64_INTEGER_CLASS
;
3494 for (i
= 0; i
< words
; i
++)
3495 classes
[i
] = subclasses
[i
% num
];
3500 case QUAL_UNION_TYPE
:
3501 /* Unions are similar to RECORD_TYPE but offset is always 0.
3503 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3505 if (TREE_CODE (field
) == FIELD_DECL
)
3509 if (TREE_TYPE (field
) == error_mark_node
)
3512 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3513 TREE_TYPE (field
), subclasses
,
3517 for (i
= 0; i
< num
; i
++)
3518 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
3527 /* Final merger cleanup. */
3528 for (i
= 0; i
< words
; i
++)
3530 /* If one class is MEMORY, everything should be passed in
3532 if (classes
[i
] == X86_64_MEMORY_CLASS
)
3535 /* The X86_64_SSEUP_CLASS should be always preceded by
3536 X86_64_SSE_CLASS. */
3537 if (classes
[i
] == X86_64_SSEUP_CLASS
3538 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
3539 classes
[i
] = X86_64_SSE_CLASS
;
3541 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3542 if (classes
[i
] == X86_64_X87UP_CLASS
3543 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
3544 classes
[i
] = X86_64_SSE_CLASS
;
3549 /* Compute alignment needed. We align all types to natural boundaries with
3550 exception of XFmode that is aligned to 64bits. */
3551 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
3553 int mode_alignment
= GET_MODE_BITSIZE (mode
);
3556 mode_alignment
= 128;
3557 else if (mode
== XCmode
)
3558 mode_alignment
= 256;
3559 if (COMPLEX_MODE_P (mode
))
3560 mode_alignment
/= 2;
3561 /* Misaligned fields are always returned in memory. */
3562 if (bit_offset
% mode_alignment
)
3566 /* for V1xx modes, just use the base mode */
3567 if (VECTOR_MODE_P (mode
)
3568 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
3569 mode
= GET_MODE_INNER (mode
);
3571 /* Classification of atomic types. */
3576 classes
[0] = X86_64_SSE_CLASS
;
3579 classes
[0] = X86_64_SSE_CLASS
;
3580 classes
[1] = X86_64_SSEUP_CLASS
;
3589 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3590 classes
[0] = X86_64_INTEGERSI_CLASS
;
3592 classes
[0] = X86_64_INTEGER_CLASS
;
3596 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
3601 if (!(bit_offset
% 64))
3602 classes
[0] = X86_64_SSESF_CLASS
;
3604 classes
[0] = X86_64_SSE_CLASS
;
3607 classes
[0] = X86_64_SSEDF_CLASS
;
3610 classes
[0] = X86_64_X87_CLASS
;
3611 classes
[1] = X86_64_X87UP_CLASS
;
3614 classes
[0] = X86_64_SSE_CLASS
;
3615 classes
[1] = X86_64_SSEUP_CLASS
;
3618 classes
[0] = X86_64_SSE_CLASS
;
3621 classes
[0] = X86_64_SSEDF_CLASS
;
3622 classes
[1] = X86_64_SSEDF_CLASS
;
3625 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
3628 /* This modes is larger than 16 bytes. */
3636 classes
[0] = X86_64_SSE_CLASS
;
3637 classes
[1] = X86_64_SSEUP_CLASS
;
3643 classes
[0] = X86_64_SSE_CLASS
;
3649 gcc_assert (VECTOR_MODE_P (mode
));
3654 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
3656 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3657 classes
[0] = X86_64_INTEGERSI_CLASS
;
3659 classes
[0] = X86_64_INTEGER_CLASS
;
3660 classes
[1] = X86_64_INTEGER_CLASS
;
3661 return 1 + (bytes
> 8);
3665 /* Examine the argument and return set number of register required in each
3666 class. Return 0 iff parameter should be passed in memory. */
3668 examine_argument (enum machine_mode mode
, const_tree type
, int in_return
,
3669 int *int_nregs
, int *sse_nregs
)
3671 enum x86_64_reg_class regclass
[MAX_CLASSES
];
3672 int n
= classify_argument (mode
, type
, regclass
, 0);
3678 for (n
--; n
>= 0; n
--)
3679 switch (regclass
[n
])
3681 case X86_64_INTEGER_CLASS
:
3682 case X86_64_INTEGERSI_CLASS
:
3685 case X86_64_SSE_CLASS
:
3686 case X86_64_SSESF_CLASS
:
3687 case X86_64_SSEDF_CLASS
:
3690 case X86_64_NO_CLASS
:
3691 case X86_64_SSEUP_CLASS
:
3693 case X86_64_X87_CLASS
:
3694 case X86_64_X87UP_CLASS
:
3698 case X86_64_COMPLEX_X87_CLASS
:
3699 return in_return
? 2 : 0;
3700 case X86_64_MEMORY_CLASS
:
3706 /* Construct container for the argument used by GCC interface. See
3707 FUNCTION_ARG for the detailed description. */
3710 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
3711 const_tree type
, int in_return
, int nintregs
, int nsseregs
,
3712 const int *intreg
, int sse_regno
)
3714 /* The following variables hold the static issued_error state. */
3715 static bool issued_sse_arg_error
;
3716 static bool issued_sse_ret_error
;
3717 static bool issued_x87_ret_error
;
3719 enum machine_mode tmpmode
;
3721 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3722 enum x86_64_reg_class regclass
[MAX_CLASSES
];
3726 int needed_sseregs
, needed_intregs
;
3727 rtx exp
[MAX_CLASSES
];
3730 n
= classify_argument (mode
, type
, regclass
, 0);
3733 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
3736 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
3739 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3740 some less clueful developer tries to use floating-point anyway. */
3741 if (needed_sseregs
&& !TARGET_SSE
)
3745 if (!issued_sse_ret_error
)
3747 error ("SSE register return with SSE disabled");
3748 issued_sse_ret_error
= true;
3751 else if (!issued_sse_arg_error
)
3753 error ("SSE register argument with SSE disabled");
3754 issued_sse_arg_error
= true;
3759 /* Likewise, error if the ABI requires us to return values in the
3760 x87 registers and the user specified -mno-80387. */
3761 if (!TARGET_80387
&& in_return
)
3762 for (i
= 0; i
< n
; i
++)
3763 if (regclass
[i
] == X86_64_X87_CLASS
3764 || regclass
[i
] == X86_64_X87UP_CLASS
3765 || regclass
[i
] == X86_64_COMPLEX_X87_CLASS
)
3767 if (!issued_x87_ret_error
)
3769 error ("x87 register return with x87 disabled");
3770 issued_x87_ret_error
= true;
3775 /* First construct simple cases. Avoid SCmode, since we want to use
3776 single register to pass this type. */
3777 if (n
== 1 && mode
!= SCmode
)
3778 switch (regclass
[0])
3780 case X86_64_INTEGER_CLASS
:
3781 case X86_64_INTEGERSI_CLASS
:
3782 return gen_rtx_REG (mode
, intreg
[0]);
3783 case X86_64_SSE_CLASS
:
3784 case X86_64_SSESF_CLASS
:
3785 case X86_64_SSEDF_CLASS
:
3786 return gen_reg_or_parallel (mode
, orig_mode
, SSE_REGNO (sse_regno
));
3787 case X86_64_X87_CLASS
:
3788 case X86_64_COMPLEX_X87_CLASS
:
3789 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
3790 case X86_64_NO_CLASS
:
3791 /* Zero sized array, struct or class. */
3796 if (n
== 2 && regclass
[0] == X86_64_SSE_CLASS
3797 && regclass
[1] == X86_64_SSEUP_CLASS
&& mode
!= BLKmode
)
3798 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
3801 && regclass
[0] == X86_64_X87_CLASS
&& regclass
[1] == X86_64_X87UP_CLASS
)
3802 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
3803 if (n
== 2 && regclass
[0] == X86_64_INTEGER_CLASS
3804 && regclass
[1] == X86_64_INTEGER_CLASS
3805 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
3806 && intreg
[0] + 1 == intreg
[1])
3807 return gen_rtx_REG (mode
, intreg
[0]);
3809 /* Otherwise figure out the entries of the PARALLEL. */
3810 for (i
= 0; i
< n
; i
++)
3812 switch (regclass
[i
])
3814 case X86_64_NO_CLASS
:
3816 case X86_64_INTEGER_CLASS
:
3817 case X86_64_INTEGERSI_CLASS
:
3818 /* Merge TImodes on aligned occasions here too. */
3819 if (i
* 8 + 8 > bytes
)
3820 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
3821 else if (regclass
[i
] == X86_64_INTEGERSI_CLASS
)
3825 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3826 if (tmpmode
== BLKmode
)
3828 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3829 gen_rtx_REG (tmpmode
, *intreg
),
3833 case X86_64_SSESF_CLASS
:
3834 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3835 gen_rtx_REG (SFmode
,
3836 SSE_REGNO (sse_regno
)),
3840 case X86_64_SSEDF_CLASS
:
3841 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3842 gen_rtx_REG (DFmode
,
3843 SSE_REGNO (sse_regno
)),
3847 case X86_64_SSE_CLASS
:
3848 if (i
< n
- 1 && regclass
[i
+ 1] == X86_64_SSEUP_CLASS
)
3852 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3853 gen_rtx_REG (tmpmode
,
3854 SSE_REGNO (sse_regno
)),
3856 if (tmpmode
== TImode
)
3865 /* Empty aligned struct, union or class. */
3869 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
3870 for (i
= 0; i
< nexps
; i
++)
3871 XVECEXP (ret
, 0, i
) = exp
[i
];
3875 /* Update the data in CUM to advance over an argument of mode MODE
3876 and data type TYPE. (TYPE is null for libcalls where that information
3877 may not be available.) */
3880 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3881 tree type
, HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
3897 cum
->words
+= words
;
3898 cum
->nregs
-= words
;
3899 cum
->regno
+= words
;
3901 if (cum
->nregs
<= 0)
3909 if (cum
->float_in_sse
< 2)
3912 if (cum
->float_in_sse
< 1)
3923 if (!type
|| !AGGREGATE_TYPE_P (type
))
3925 cum
->sse_words
+= words
;
3926 cum
->sse_nregs
-= 1;
3927 cum
->sse_regno
+= 1;
3928 if (cum
->sse_nregs
<= 0)
3940 if (!type
|| !AGGREGATE_TYPE_P (type
))
3942 cum
->mmx_words
+= words
;
3943 cum
->mmx_nregs
-= 1;
3944 cum
->mmx_regno
+= 1;
3945 if (cum
->mmx_nregs
<= 0)
3956 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3957 tree type
, HOST_WIDE_INT words
)
3959 int int_nregs
, sse_nregs
;
3961 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
3962 cum
->words
+= words
;
3963 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
3965 cum
->nregs
-= int_nregs
;
3966 cum
->sse_nregs
-= sse_nregs
;
3967 cum
->regno
+= int_nregs
;
3968 cum
->sse_regno
+= sse_nregs
;
3971 cum
->words
+= words
;
3975 function_arg_advance_ms_64 (CUMULATIVE_ARGS
*cum
, HOST_WIDE_INT bytes
,
3976 HOST_WIDE_INT words
)
3978 /* Otherwise, this should be passed indirect. */
3979 gcc_assert (bytes
== 1 || bytes
== 2 || bytes
== 4 || bytes
== 8);
3981 cum
->words
+= words
;
3990 function_arg_advance (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3991 tree type
, int named ATTRIBUTE_UNUSED
)
3993 HOST_WIDE_INT bytes
, words
;
3995 if (mode
== BLKmode
)
3996 bytes
= int_size_in_bytes (type
);
3998 bytes
= GET_MODE_SIZE (mode
);
3999 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
4002 mode
= type_natural_mode (type
);
4004 if (TARGET_64BIT_MS_ABI
)
4005 function_arg_advance_ms_64 (cum
, bytes
, words
);
4006 else if (TARGET_64BIT
)
4007 function_arg_advance_64 (cum
, mode
, type
, words
);
4009 function_arg_advance_32 (cum
, mode
, type
, bytes
, words
);
4012 /* Define where to put the arguments to a function.
4013 Value is zero to push the argument on the stack,
4014 or a hard register in which to store the argument.
4016 MODE is the argument's machine mode.
4017 TYPE is the data type of the argument (as a tree).
4018 This is null for libcalls where that information may
4020 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4021 the preceding args and about the function being called.
4022 NAMED is nonzero if this argument is a named parameter
4023 (otherwise it is an extra parameter matching an ellipsis). */
4026 function_arg_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4027 enum machine_mode orig_mode
, tree type
,
4028 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
4030 static bool warnedsse
, warnedmmx
;
4032 /* Avoid the AL settings for the Unix64 ABI. */
4033 if (mode
== VOIDmode
)
4049 if (words
<= cum
->nregs
)
4051 int regno
= cum
->regno
;
4053 /* Fastcall allocates the first two DWORD (SImode) or
4054 smaller arguments to ECX and EDX. */
4057 if (mode
== BLKmode
|| mode
== DImode
)
4060 /* ECX not EAX is the first allocated register. */
4064 return gen_rtx_REG (mode
, regno
);
4069 if (cum
->float_in_sse
< 2)
4072 if (cum
->float_in_sse
< 1)
4082 if (!type
|| !AGGREGATE_TYPE_P (type
))
4084 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
4087 warning (0, "SSE vector argument without SSE enabled "
4091 return gen_reg_or_parallel (mode
, orig_mode
,
4092 cum
->sse_regno
+ FIRST_SSE_REG
);
4100 if (!type
|| !AGGREGATE_TYPE_P (type
))
4102 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
4105 warning (0, "MMX vector argument without MMX enabled "
4109 return gen_reg_or_parallel (mode
, orig_mode
,
4110 cum
->mmx_regno
+ FIRST_MMX_REG
);
4119 function_arg_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4120 enum machine_mode orig_mode
, tree type
)
4122 /* Handle a hidden AL argument containing number of registers
4123 for varargs x86-64 functions. */
4124 if (mode
== VOIDmode
)
4125 return GEN_INT (cum
->maybe_vaarg
4126 ? (cum
->sse_nregs
< 0
4131 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
4133 &x86_64_int_parameter_registers
[cum
->regno
],
4138 function_arg_ms_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4139 enum machine_mode orig_mode
, int named
)
4143 /* Avoid the AL settings for the Unix64 ABI. */
4144 if (mode
== VOIDmode
)
4147 /* If we've run out of registers, it goes on the stack. */
4148 if (cum
->nregs
== 0)
4151 regno
= x86_64_ms_abi_int_parameter_registers
[cum
->regno
];
4153 /* Only floating point modes are passed in anything but integer regs. */
4154 if (TARGET_SSE
&& (mode
== SFmode
|| mode
== DFmode
))
4157 regno
= cum
->regno
+ FIRST_SSE_REG
;
4162 /* Unnamed floating parameters are passed in both the
4163 SSE and integer registers. */
4164 t1
= gen_rtx_REG (mode
, cum
->regno
+ FIRST_SSE_REG
);
4165 t2
= gen_rtx_REG (mode
, regno
);
4166 t1
= gen_rtx_EXPR_LIST (VOIDmode
, t1
, const0_rtx
);
4167 t2
= gen_rtx_EXPR_LIST (VOIDmode
, t2
, const0_rtx
);
4168 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, t1
, t2
));
4172 return gen_reg_or_parallel (mode
, orig_mode
, regno
);
4176 function_arg (CUMULATIVE_ARGS
*cum
, enum machine_mode omode
,
4177 tree type
, int named
)
4179 enum machine_mode mode
= omode
;
4180 HOST_WIDE_INT bytes
, words
;
4182 if (mode
== BLKmode
)
4183 bytes
= int_size_in_bytes (type
);
4185 bytes
= GET_MODE_SIZE (mode
);
4186 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
4188 /* To simplify the code below, represent vector types with a vector mode
4189 even if MMX/SSE are not active. */
4190 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
4191 mode
= type_natural_mode (type
);
4193 if (TARGET_64BIT_MS_ABI
)
4194 return function_arg_ms_64 (cum
, mode
, omode
, named
);
4195 else if (TARGET_64BIT
)
4196 return function_arg_64 (cum
, mode
, omode
, type
);
4198 return function_arg_32 (cum
, mode
, omode
, type
, bytes
, words
);
4201 /* A C expression that indicates when an argument must be passed by
4202 reference. If nonzero for an argument, a copy of that argument is
4203 made in memory and a pointer to the argument is passed instead of
4204 the argument itself. The pointer is passed in whatever way is
4205 appropriate for passing a pointer to that type. */
4208 ix86_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
4209 enum machine_mode mode ATTRIBUTE_UNUSED
,
4210 const_tree type
, bool named ATTRIBUTE_UNUSED
)
4212 if (TARGET_64BIT_MS_ABI
)
4216 /* Arrays are passed by reference. */
4217 if (TREE_CODE (type
) == ARRAY_TYPE
)
4220 if (AGGREGATE_TYPE_P (type
))
4222 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
4223 are passed by reference. */
4224 int el2
= exact_log2 (int_size_in_bytes (type
));
4225 return !(el2
>= 0 && el2
<= 3);
4229 /* __m128 is passed by reference. */
4230 /* ??? How to handle complex? For now treat them as structs,
4231 and pass them by reference if they're too large. */
4232 if (GET_MODE_SIZE (mode
) > 8)
4235 else if (TARGET_64BIT
&& type
&& int_size_in_bytes (type
) == -1)
4241 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4242 ABI. Only called if TARGET_SSE. */
4244 contains_128bit_aligned_vector_p (tree type
)
4246 enum machine_mode mode
= TYPE_MODE (type
);
4247 if (SSE_REG_MODE_P (mode
)
4248 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
4250 if (TYPE_ALIGN (type
) < 128)
4253 if (AGGREGATE_TYPE_P (type
))
4255 /* Walk the aggregates recursively. */
4256 switch (TREE_CODE (type
))
4260 case QUAL_UNION_TYPE
:
4264 /* Walk all the structure fields. */
4265 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
4267 if (TREE_CODE (field
) == FIELD_DECL
4268 && contains_128bit_aligned_vector_p (TREE_TYPE (field
)))
4275 /* Just for use if some languages passes arrays by value. */
4276 if (contains_128bit_aligned_vector_p (TREE_TYPE (type
)))
4287 /* Gives the alignment boundary, in bits, of an argument with the
4288 specified mode and type. */
4291 ix86_function_arg_boundary (enum machine_mode mode
, tree type
)
4295 align
= TYPE_ALIGN (type
);
4297 align
= GET_MODE_ALIGNMENT (mode
);
4298 if (align
< PARM_BOUNDARY
)
4299 align
= PARM_BOUNDARY
;
4302 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4303 make an exception for SSE modes since these require 128bit
4306 The handling here differs from field_alignment. ICC aligns MMX
4307 arguments to 4 byte boundaries, while structure fields are aligned
4308 to 8 byte boundaries. */
4310 align
= PARM_BOUNDARY
;
4313 if (!SSE_REG_MODE_P (mode
))
4314 align
= PARM_BOUNDARY
;
4318 if (!contains_128bit_aligned_vector_p (type
))
4319 align
= PARM_BOUNDARY
;
4327 /* Return true if N is a possible register number of function value. */
4330 ix86_function_value_regno_p (int regno
)
4337 case FIRST_FLOAT_REG
:
4338 if (TARGET_64BIT_MS_ABI
)
4340 return TARGET_FLOAT_RETURNS_IN_80387
;
4346 if (TARGET_MACHO
|| TARGET_64BIT
)
4354 /* Define how to find the value returned by a function.
4355 VALTYPE is the data type of the value (as a tree).
4356 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4357 otherwise, FUNC is 0. */
4360 function_value_32 (enum machine_mode orig_mode
, enum machine_mode mode
,
4361 const_tree fntype
, const_tree fn
)
4365 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4366 we normally prevent this case when mmx is not available. However
4367 some ABIs may require the result to be returned like DImode. */
4368 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
4369 regno
= TARGET_MMX
? FIRST_MMX_REG
: 0;
4371 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4372 we prevent this case when sse is not available. However some ABIs
4373 may require the result to be returned like integer TImode. */
4374 else if (mode
== TImode
4375 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
4376 regno
= TARGET_SSE
? FIRST_SSE_REG
: 0;
4378 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4379 else if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
)
4380 regno
= FIRST_FLOAT_REG
;
4382 /* Most things go in %eax. */
4385 /* Override FP return register with %xmm0 for local functions when
4386 SSE math is enabled or for functions with sseregparm attribute. */
4387 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
4389 int sse_level
= ix86_function_sseregparm (fntype
, fn
);
4390 if ((sse_level
>= 1 && mode
== SFmode
)
4391 || (sse_level
== 2 && mode
== DFmode
))
4392 regno
= FIRST_SSE_REG
;
4395 return gen_rtx_REG (orig_mode
, regno
);
4399 function_value_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
4404 /* Handle libcalls, which don't provide a type node. */
4405 if (valtype
== NULL
)
4417 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
4420 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
4424 return gen_rtx_REG (mode
, 0);
4428 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
4429 REGPARM_MAX
, SSE_REGPARM_MAX
,
4430 x86_64_int_return_registers
, 0);
4432 /* For zero sized structures, construct_container returns NULL, but we
4433 need to keep rest of compiler happy by returning meaningful value. */
4435 ret
= gen_rtx_REG (orig_mode
, 0);
4441 function_value_ms_64 (enum machine_mode orig_mode
, enum machine_mode mode
)
4443 unsigned int regno
= 0;
4447 if (mode
== SFmode
|| mode
== DFmode
)
4448 regno
= FIRST_SSE_REG
;
4449 else if (VECTOR_MODE_P (mode
) || GET_MODE_SIZE (mode
) == 16)
4450 regno
= FIRST_SSE_REG
;
4453 return gen_rtx_REG (orig_mode
, regno
);
4457 ix86_function_value_1 (const_tree valtype
, const_tree fntype_or_decl
,
4458 enum machine_mode orig_mode
, enum machine_mode mode
)
4460 const_tree fn
, fntype
;
4463 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
4464 fn
= fntype_or_decl
;
4465 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
4467 if (TARGET_64BIT_MS_ABI
)
4468 return function_value_ms_64 (orig_mode
, mode
);
4469 else if (TARGET_64BIT
)
4470 return function_value_64 (orig_mode
, mode
, valtype
);
4472 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
4476 ix86_function_value (const_tree valtype
, const_tree fntype_or_decl
,
4477 bool outgoing ATTRIBUTE_UNUSED
)
4479 enum machine_mode mode
, orig_mode
;
4481 orig_mode
= TYPE_MODE (valtype
);
4482 mode
= type_natural_mode (valtype
);
4483 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
4487 ix86_libcall_value (enum machine_mode mode
)
4489 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
4492 /* Return true iff type is returned in memory. */
4495 return_in_memory_32 (const_tree type
, enum machine_mode mode
)
4499 if (mode
== BLKmode
)
4502 size
= int_size_in_bytes (type
);
4504 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
4507 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
4509 /* User-created vectors small enough to fit in EAX. */
4513 /* MMX/3dNow values are returned in MM0,
4514 except when it doesn't exits. */
4516 return (TARGET_MMX
? 0 : 1);
4518 /* SSE values are returned in XMM0, except when it doesn't exist. */
4520 return (TARGET_SSE
? 0 : 1);
4535 return_in_memory_64 (const_tree type
, enum machine_mode mode
)
4537 int needed_intregs
, needed_sseregs
;
4538 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
4542 return_in_memory_ms_64 (const_tree type
, enum machine_mode mode
)
4544 HOST_WIDE_INT size
= int_size_in_bytes (type
);
4546 /* __m128 and friends are returned in xmm0. */
4547 if (size
== 16 && VECTOR_MODE_P (mode
))
4550 /* Otherwise, the size must be exactly in [1248]. */
4551 return (size
!= 1 && size
!= 2 && size
!= 4 && size
!= 8);
4555 ix86_return_in_memory (const_tree type
)
4557 const enum machine_mode mode
= type_natural_mode (type
);
4559 if (TARGET_64BIT_MS_ABI
)
4560 return return_in_memory_ms_64 (type
, mode
);
4561 else if (TARGET_64BIT
)
4562 return return_in_memory_64 (type
, mode
);
4564 return return_in_memory_32 (type
, mode
);
4567 /* Return false iff TYPE is returned in memory. This version is used
4568 on Solaris 10. It is similar to the generic ix86_return_in_memory,
4569 but differs notably in that when MMX is available, 8-byte vectors
4570 are returned in memory, rather than in MMX registers. */
4573 ix86_sol10_return_in_memory (const_tree type
)
4576 enum machine_mode mode
= type_natural_mode (type
);
4579 return return_in_memory_64 (type
, mode
);
4581 if (mode
== BLKmode
)
4584 size
= int_size_in_bytes (type
);
4586 if (VECTOR_MODE_P (mode
))
4588 /* Return in memory only if MMX registers *are* available. This
4589 seems backwards, but it is consistent with the existing
4596 else if (mode
== TImode
)
4598 else if (mode
== XFmode
)
4604 /* When returning SSE vector types, we have a choice of either
4605 (1) being abi incompatible with a -march switch, or
4606 (2) generating an error.
4607 Given no good solution, I think the safest thing is one warning.
4608 The user won't be able to use -Werror, but....
4610 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4611 called in response to actually generating a caller or callee that
4612 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4613 via aggregate_value_p for general type probing from tree-ssa. */
4616 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
4618 static bool warnedsse
, warnedmmx
;
4620 if (!TARGET_64BIT
&& type
)
4622 /* Look at the return type of the function, not the function type. */
4623 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
4625 if (!TARGET_SSE
&& !warnedsse
)
4628 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
4631 warning (0, "SSE vector return without SSE enabled "
4636 if (!TARGET_MMX
&& !warnedmmx
)
4638 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
4641 warning (0, "MMX vector return without MMX enabled "
4651 /* Create the va_list data type. */
4654 ix86_build_builtin_va_list (void)
4656 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
4658 /* For i386 we use plain pointer to argument area. */
4659 if (!TARGET_64BIT
|| TARGET_64BIT_MS_ABI
)
4660 return build_pointer_type (char_type_node
);
4662 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
4663 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
4665 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
4666 unsigned_type_node
);
4667 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
4668 unsigned_type_node
);
4669 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
4671 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
4674 va_list_gpr_counter_field
= f_gpr
;
4675 va_list_fpr_counter_field
= f_fpr
;
4677 DECL_FIELD_CONTEXT (f_gpr
) = record
;
4678 DECL_FIELD_CONTEXT (f_fpr
) = record
;
4679 DECL_FIELD_CONTEXT (f_ovf
) = record
;
4680 DECL_FIELD_CONTEXT (f_sav
) = record
;
4682 TREE_CHAIN (record
) = type_decl
;
4683 TYPE_NAME (record
) = type_decl
;
4684 TYPE_FIELDS (record
) = f_gpr
;
4685 TREE_CHAIN (f_gpr
) = f_fpr
;
4686 TREE_CHAIN (f_fpr
) = f_ovf
;
4687 TREE_CHAIN (f_ovf
) = f_sav
;
4689 layout_type (record
);
4691 /* The correct type is an array type of one element. */
4692 return build_array_type (record
, build_index_type (size_zero_node
));
4695 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4698 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
4708 if (! cfun
->va_list_gpr_size
&& ! cfun
->va_list_fpr_size
)
4711 /* Indicate to allocate space on the stack for varargs save area. */
4712 ix86_save_varrargs_registers
= 1;
4713 /* We need 16-byte stack alignment to save SSE registers. If user
4714 asked for lower preferred_stack_boundary, lets just hope that he knows
4715 what he is doing and won't varargs SSE values.
4717 We also may end up assuming that only 64bit values are stored in SSE
4718 register let some floating point program work. */
4719 if (ix86_preferred_stack_boundary
>= 128)
4720 cfun
->stack_alignment_needed
= 128;
4722 save_area
= frame_pointer_rtx
;
4723 set
= get_varargs_alias_set ();
4725 for (i
= cum
->regno
;
4727 && i
< cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
4730 mem
= gen_rtx_MEM (Pmode
,
4731 plus_constant (save_area
, i
* UNITS_PER_WORD
));
4732 MEM_NOTRAP_P (mem
) = 1;
4733 set_mem_alias_set (mem
, set
);
4734 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
4735 x86_64_int_parameter_registers
[i
]));
4738 if (cum
->sse_nregs
&& cfun
->va_list_fpr_size
)
4740 /* Now emit code to save SSE registers. The AX parameter contains number
4741 of SSE parameter registers used to call this function. We use
4742 sse_prologue_save insn template that produces computed jump across
4743 SSE saves. We need some preparation work to get this working. */
4745 label
= gen_label_rtx ();
4746 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
4748 /* Compute address to jump to :
4749 label - 5*eax + nnamed_sse_arguments*5 */
4750 tmp_reg
= gen_reg_rtx (Pmode
);
4751 nsse_reg
= gen_reg_rtx (Pmode
);
4752 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, 0)));
4753 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4754 gen_rtx_MULT (Pmode
, nsse_reg
,
4759 gen_rtx_CONST (DImode
,
4760 gen_rtx_PLUS (DImode
,
4762 GEN_INT (cum
->sse_regno
* 4))));
4764 emit_move_insn (nsse_reg
, label_ref
);
4765 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
4767 /* Compute address of memory block we save into. We always use pointer
4768 pointing 127 bytes after first byte to store - this is needed to keep
4769 instruction size limited by 4 bytes. */
4770 tmp_reg
= gen_reg_rtx (Pmode
);
4771 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4772 plus_constant (save_area
,
4773 8 * REGPARM_MAX
+ 127)));
4774 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
4775 MEM_NOTRAP_P (mem
) = 1;
4776 set_mem_alias_set (mem
, set
);
4777 set_mem_align (mem
, BITS_PER_WORD
);
4779 /* And finally do the dirty job! */
4780 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
,
4781 GEN_INT (cum
->sse_regno
), label
));
4786 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS
*cum
)
4788 alias_set_type set
= get_varargs_alias_set ();
4791 for (i
= cum
->regno
; i
< REGPARM_MAX
; i
++)
4795 mem
= gen_rtx_MEM (Pmode
,
4796 plus_constant (virtual_incoming_args_rtx
,
4797 i
* UNITS_PER_WORD
));
4798 MEM_NOTRAP_P (mem
) = 1;
4799 set_mem_alias_set (mem
, set
);
4801 reg
= gen_rtx_REG (Pmode
, x86_64_ms_abi_int_parameter_registers
[i
]);
4802 emit_move_insn (mem
, reg
);
4807 ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4808 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
4811 CUMULATIVE_ARGS next_cum
;
4815 /* This argument doesn't appear to be used anymore. Which is good,
4816 because the old code here didn't suppress rtl generation. */
4817 gcc_assert (!no_rtl
);
4822 fntype
= TREE_TYPE (current_function_decl
);
4823 stdarg_p
= (TYPE_ARG_TYPES (fntype
) != 0
4824 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype
)))
4825 != void_type_node
));
4827 /* For varargs, we do not want to skip the dummy va_dcl argument.
4828 For stdargs, we do want to skip the last named argument. */
4831 function_arg_advance (&next_cum
, mode
, type
, 1);
4833 if (TARGET_64BIT_MS_ABI
)
4834 setup_incoming_varargs_ms_64 (&next_cum
);
4836 setup_incoming_varargs_64 (&next_cum
);
4839 /* Implement va_start. */
4842 ix86_va_start (tree valist
, rtx nextarg
)
4844 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
4845 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4846 tree gpr
, fpr
, ovf
, sav
, t
;
4849 /* Only 64bit target needs something special. */
4850 if (!TARGET_64BIT
|| TARGET_64BIT_MS_ABI
)
4852 std_expand_builtin_va_start (valist
, nextarg
);
4856 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4857 f_fpr
= TREE_CHAIN (f_gpr
);
4858 f_ovf
= TREE_CHAIN (f_fpr
);
4859 f_sav
= TREE_CHAIN (f_ovf
);
4861 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
4862 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
4863 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4864 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4865 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4867 /* Count number of gp and fp argument registers used. */
4868 words
= current_function_args_info
.words
;
4869 n_gpr
= current_function_args_info
.regno
;
4870 n_fpr
= current_function_args_info
.sse_regno
;
4872 if (cfun
->va_list_gpr_size
)
4874 type
= TREE_TYPE (gpr
);
4875 t
= build2 (GIMPLE_MODIFY_STMT
, type
, gpr
,
4876 build_int_cst (type
, n_gpr
* 8));
4877 TREE_SIDE_EFFECTS (t
) = 1;
4878 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4881 if (cfun
->va_list_fpr_size
)
4883 type
= TREE_TYPE (fpr
);
4884 t
= build2 (GIMPLE_MODIFY_STMT
, type
, fpr
,
4885 build_int_cst (type
, n_fpr
* 16 + 8*REGPARM_MAX
));
4886 TREE_SIDE_EFFECTS (t
) = 1;
4887 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4890 /* Find the overflow area. */
4891 type
= TREE_TYPE (ovf
);
4892 t
= make_tree (type
, virtual_incoming_args_rtx
);
4894 t
= build2 (POINTER_PLUS_EXPR
, type
, t
,
4895 size_int (words
* UNITS_PER_WORD
));
4896 t
= build2 (GIMPLE_MODIFY_STMT
, type
, ovf
, t
);
4897 TREE_SIDE_EFFECTS (t
) = 1;
4898 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4900 if (cfun
->va_list_gpr_size
|| cfun
->va_list_fpr_size
)
4902 /* Find the register save area.
4903 Prologue of the function save it right above stack frame. */
4904 type
= TREE_TYPE (sav
);
4905 t
= make_tree (type
, frame_pointer_rtx
);
4906 t
= build2 (GIMPLE_MODIFY_STMT
, type
, sav
, t
);
4907 TREE_SIDE_EFFECTS (t
) = 1;
4908 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4912 /* Implement va_arg. */
4915 ix86_gimplify_va_arg (tree valist
, tree type
, tree
*pre_p
, tree
*post_p
)
4917 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
4918 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4919 tree gpr
, fpr
, ovf
, sav
, t
;
4921 tree lab_false
, lab_over
= NULL_TREE
;
4926 enum machine_mode nat_mode
;
4928 /* Only 64bit target needs something special. */
4929 if (!TARGET_64BIT
|| TARGET_64BIT_MS_ABI
)
4930 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
4932 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4933 f_fpr
= TREE_CHAIN (f_gpr
);
4934 f_ovf
= TREE_CHAIN (f_fpr
);
4935 f_sav
= TREE_CHAIN (f_ovf
);
4937 valist
= build_va_arg_indirect_ref (valist
);
4938 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
4939 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4940 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4941 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4943 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
4945 type
= build_pointer_type (type
);
4946 size
= int_size_in_bytes (type
);
4947 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
4949 nat_mode
= type_natural_mode (type
);
4950 container
= construct_container (nat_mode
, TYPE_MODE (type
), type
, 0,
4951 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
4953 /* Pull the value out of the saved registers. */
4955 addr
= create_tmp_var (ptr_type_node
, "addr");
4956 DECL_POINTER_ALIAS_SET (addr
) = get_varargs_alias_set ();
4960 int needed_intregs
, needed_sseregs
;
4962 tree int_addr
, sse_addr
;
4964 lab_false
= create_artificial_label ();
4965 lab_over
= create_artificial_label ();
4967 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
4969 need_temp
= (!REG_P (container
)
4970 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
4971 || TYPE_ALIGN (type
) > 128));
4973 /* In case we are passing structure, verify that it is consecutive block
4974 on the register save area. If not we need to do moves. */
4975 if (!need_temp
&& !REG_P (container
))
4977 /* Verify that all registers are strictly consecutive */
4978 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
4982 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4984 rtx slot
= XVECEXP (container
, 0, i
);
4985 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
4986 || INTVAL (XEXP (slot
, 1)) != i
* 16)
4994 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4996 rtx slot
= XVECEXP (container
, 0, i
);
4997 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
4998 || INTVAL (XEXP (slot
, 1)) != i
* 8)
5010 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
5011 DECL_POINTER_ALIAS_SET (int_addr
) = get_varargs_alias_set ();
5012 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
5013 DECL_POINTER_ALIAS_SET (sse_addr
) = get_varargs_alias_set ();
5016 /* First ensure that we fit completely in registers. */
5019 t
= build_int_cst (TREE_TYPE (gpr
),
5020 (REGPARM_MAX
- needed_intregs
+ 1) * 8);
5021 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
5022 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
5023 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
5024 gimplify_and_add (t
, pre_p
);
5028 t
= build_int_cst (TREE_TYPE (fpr
),
5029 (SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
5031 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
5032 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
5033 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
5034 gimplify_and_add (t
, pre_p
);
5037 /* Compute index to start of area used for integer regs. */
5040 /* int_addr = gpr + sav; */
5041 t
= fold_convert (sizetype
, gpr
);
5042 t
= build2 (POINTER_PLUS_EXPR
, ptr_type_node
, sav
, t
);
5043 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, int_addr
, t
);
5044 gimplify_and_add (t
, pre_p
);
5048 /* sse_addr = fpr + sav; */
5049 t
= fold_convert (sizetype
, fpr
);
5050 t
= build2 (POINTER_PLUS_EXPR
, ptr_type_node
, sav
, t
);
5051 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, sse_addr
, t
);
5052 gimplify_and_add (t
, pre_p
);
5057 tree temp
= create_tmp_var (type
, "va_arg_tmp");
5060 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
5061 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, addr
, t
);
5062 gimplify_and_add (t
, pre_p
);
5064 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
5066 rtx slot
= XVECEXP (container
, 0, i
);
5067 rtx reg
= XEXP (slot
, 0);
5068 enum machine_mode mode
= GET_MODE (reg
);
5069 tree piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
5070 tree addr_type
= build_pointer_type (piece_type
);
5073 tree dest_addr
, dest
;
5075 if (SSE_REGNO_P (REGNO (reg
)))
5077 src_addr
= sse_addr
;
5078 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
5082 src_addr
= int_addr
;
5083 src_offset
= REGNO (reg
) * 8;
5085 src_addr
= fold_convert (addr_type
, src_addr
);
5086 src_addr
= fold_build2 (POINTER_PLUS_EXPR
, addr_type
, src_addr
,
5087 size_int (src_offset
));
5088 src
= build_va_arg_indirect_ref (src_addr
);
5090 dest_addr
= fold_convert (addr_type
, addr
);
5091 dest_addr
= fold_build2 (POINTER_PLUS_EXPR
, addr_type
, dest_addr
,
5092 size_int (INTVAL (XEXP (slot
, 1))));
5093 dest
= build_va_arg_indirect_ref (dest_addr
);
5095 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, dest
, src
);
5096 gimplify_and_add (t
, pre_p
);
5102 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
5103 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
5104 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (gpr
), gpr
, t
);
5105 gimplify_and_add (t
, pre_p
);
5109 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
5110 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
5111 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (fpr
), fpr
, t
);
5112 gimplify_and_add (t
, pre_p
);
5115 t
= build1 (GOTO_EXPR
, void_type_node
, lab_over
);
5116 gimplify_and_add (t
, pre_p
);
5118 t
= build1 (LABEL_EXPR
, void_type_node
, lab_false
);
5119 append_to_statement_list (t
, pre_p
);
5122 /* ... otherwise out of the overflow area. */
5124 /* Care for on-stack alignment if needed. */
5125 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64
5126 || integer_zerop (TYPE_SIZE (type
)))
5130 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
5131 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (ovf
), ovf
,
5132 size_int (align
- 1));
5133 t
= fold_convert (sizetype
, t
);
5134 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
5136 t
= fold_convert (TREE_TYPE (ovf
), t
);
5138 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
5140 t2
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, addr
, t
);
5141 gimplify_and_add (t2
, pre_p
);
5143 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (t
), t
,
5144 size_int (rsize
* UNITS_PER_WORD
));
5145 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (ovf
), ovf
, t
);
5146 gimplify_and_add (t
, pre_p
);
5150 t
= build1 (LABEL_EXPR
, void_type_node
, lab_over
);
5151 append_to_statement_list (t
, pre_p
);
5154 ptrtype
= build_pointer_type (type
);
5155 addr
= fold_convert (ptrtype
, addr
);
5158 addr
= build_va_arg_indirect_ref (addr
);
5159 return build_va_arg_indirect_ref (addr
);
5162 /* Return nonzero if OPNUM's MEM should be matched
5163 in movabs* patterns. */
5166 ix86_check_movabs (rtx insn
, int opnum
)
5170 set
= PATTERN (insn
);
5171 if (GET_CODE (set
) == PARALLEL
)
5172 set
= XVECEXP (set
, 0, 0);
5173 gcc_assert (GET_CODE (set
) == SET
);
5174 mem
= XEXP (set
, opnum
);
5175 while (GET_CODE (mem
) == SUBREG
)
5176 mem
= SUBREG_REG (mem
);
5177 gcc_assert (MEM_P (mem
));
5178 return (volatile_ok
|| !MEM_VOLATILE_P (mem
));
5181 /* Initialize the table of extra 80387 mathematical constants. */
5184 init_ext_80387_constants (void)
5186 static const char * cst
[5] =
5188 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5189 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5190 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5191 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5192 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5196 for (i
= 0; i
< 5; i
++)
5198 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
5199 /* Ensure each constant is rounded to XFmode precision. */
5200 real_convert (&ext_80387_constants_table
[i
],
5201 XFmode
, &ext_80387_constants_table
[i
]);
5204 ext_80387_constants_init
= 1;
5207 /* Return true if the constant is something that can be loaded with
5208 a special instruction. */
5211 standard_80387_constant_p (rtx x
)
5213 enum machine_mode mode
= GET_MODE (x
);
5217 if (!(X87_FLOAT_MODE_P (mode
) && (GET_CODE (x
) == CONST_DOUBLE
)))
5220 if (x
== CONST0_RTX (mode
))
5222 if (x
== CONST1_RTX (mode
))
5225 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
5227 /* For XFmode constants, try to find a special 80387 instruction when
5228 optimizing for size or on those CPUs that benefit from them. */
5230 && (optimize_size
|| TARGET_EXT_80387_CONSTANTS
))
5234 if (! ext_80387_constants_init
)
5235 init_ext_80387_constants ();
5237 for (i
= 0; i
< 5; i
++)
5238 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
5242 /* Load of the constant -0.0 or -1.0 will be split as
5243 fldz;fchs or fld1;fchs sequence. */
5244 if (real_isnegzero (&r
))
5246 if (real_identical (&r
, &dconstm1
))
5252 /* Return the opcode of the special instruction to be used to load
5256 standard_80387_constant_opcode (rtx x
)
5258 switch (standard_80387_constant_p (x
))
5282 /* Return the CONST_DOUBLE representing the 80387 constant that is
5283 loaded by the specified special instruction. The argument IDX
5284 matches the return value from standard_80387_constant_p. */
5287 standard_80387_constant_rtx (int idx
)
5291 if (! ext_80387_constants_init
)
5292 init_ext_80387_constants ();
5308 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
5312 /* Return 1 if mode is a valid mode for sse. */
5314 standard_sse_mode_p (enum machine_mode mode
)
5331 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5334 standard_sse_constant_p (rtx x
)
5336 enum machine_mode mode
= GET_MODE (x
);
5338 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
5340 if (vector_all_ones_operand (x
, mode
)
5341 && standard_sse_mode_p (mode
))
5342 return TARGET_SSE2
? 2 : -1;
5347 /* Return the opcode of the special instruction to be used to load
5351 standard_sse_constant_opcode (rtx insn
, rtx x
)
5353 switch (standard_sse_constant_p (x
))
5356 if (get_attr_mode (insn
) == MODE_V4SF
)
5357 return "xorps\t%0, %0";
5358 else if (get_attr_mode (insn
) == MODE_V2DF
)
5359 return "xorpd\t%0, %0";
5361 return "pxor\t%0, %0";
5363 return "pcmpeqd\t%0, %0";
5368 /* Returns 1 if OP contains a symbol reference */
5371 symbolic_reference_mentioned_p (rtx op
)
5376 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
5379 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
5380 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
5386 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
5387 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
5391 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
5398 /* Return 1 if it is appropriate to emit `ret' instructions in the
5399 body of a function. Do this only if the epilogue is simple, needing a
5400 couple of insns. Prior to reloading, we can't tell how many registers
5401 must be saved, so return 0 then. Return 0 if there is no frame
5402 marker to de-allocate. */
5405 ix86_can_use_return_insn_p (void)
5407 struct ix86_frame frame
;
5409 if (! reload_completed
|| frame_pointer_needed
)
5412 /* Don't allow more than 32 pop, since that's all we can do
5413 with one instruction. */
5414 if (current_function_pops_args
5415 && current_function_args_size
>= 32768)
5418 ix86_compute_frame_layout (&frame
);
5419 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
5422 /* Value should be nonzero if functions must have frame pointers.
5423 Zero means the frame pointer need not be set up (and parms may
5424 be accessed via the stack pointer) in functions that seem suitable. */
5427 ix86_frame_pointer_required (void)
5429 /* If we accessed previous frames, then the generated code expects
5430 to be able to access the saved ebp value in our frame. */
5431 if (cfun
->machine
->accesses_prev_frame
)
5434 /* Several x86 os'es need a frame pointer for other reasons,
5435 usually pertaining to setjmp. */
5436 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
5439 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5440 the frame pointer by default. Turn it back on now if we've not
5441 got a leaf function. */
5442 if (TARGET_OMIT_LEAF_FRAME_POINTER
5443 && (!current_function_is_leaf
5444 || ix86_current_function_calls_tls_descriptor
))
5447 if (current_function_profile
)
5453 /* Record that the current function accesses previous call frames. */
5456 ix86_setup_frame_addresses (void)
5458 cfun
->machine
->accesses_prev_frame
= 1;
5461 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5462 # define USE_HIDDEN_LINKONCE 1
5464 # define USE_HIDDEN_LINKONCE 0
5467 static int pic_labels_used
;
5469 /* Fills in the label name that should be used for a pc thunk for
5470 the given register. */
5473 get_pc_thunk_name (char name
[32], unsigned int regno
)
5475 gcc_assert (!TARGET_64BIT
);
5477 if (USE_HIDDEN_LINKONCE
)
5478 sprintf (name
, "__i686.get_pc_thunk.%s", reg_names
[regno
]);
5480 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
5484 /* This function generates code for -fpic that loads %ebx with
5485 the return address of the caller and then returns. */
5488 ix86_file_end (void)
5493 for (regno
= 0; regno
< 8; ++regno
)
5497 if (! ((pic_labels_used
>> regno
) & 1))
5500 get_pc_thunk_name (name
, regno
);
5505 switch_to_section (darwin_sections
[text_coal_section
]);
5506 fputs ("\t.weak_definition\t", asm_out_file
);
5507 assemble_name (asm_out_file
, name
);
5508 fputs ("\n\t.private_extern\t", asm_out_file
);
5509 assemble_name (asm_out_file
, name
);
5510 fputs ("\n", asm_out_file
);
5511 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5515 if (USE_HIDDEN_LINKONCE
)
5519 decl
= build_decl (FUNCTION_DECL
, get_identifier (name
),
5521 TREE_PUBLIC (decl
) = 1;
5522 TREE_STATIC (decl
) = 1;
5523 DECL_ONE_ONLY (decl
) = 1;
5525 (*targetm
.asm_out
.unique_section
) (decl
, 0);
5526 switch_to_section (get_named_section (decl
, NULL
, 0));
5528 (*targetm
.asm_out
.globalize_label
) (asm_out_file
, name
);
5529 fputs ("\t.hidden\t", asm_out_file
);
5530 assemble_name (asm_out_file
, name
);
5531 fputc ('\n', asm_out_file
);
5532 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
5536 switch_to_section (text_section
);
5537 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5540 xops
[0] = gen_rtx_REG (SImode
, regno
);
5541 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
5542 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
5543 output_asm_insn ("ret", xops
);
5546 if (NEED_INDICATE_EXEC_STACK
)
5547 file_end_indicate_exec_stack ();
5550 /* Emit code for the SET_GOT patterns. */
5553 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
5559 if (TARGET_VXWORKS_RTP
&& flag_pic
)
5561 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5562 xops
[2] = gen_rtx_MEM (Pmode
,
5563 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
5564 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
5566 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5567 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5568 an unadorned address. */
5569 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
5570 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
5571 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
5575 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
5577 if (! TARGET_DEEP_BRANCH_PREDICTION
|| !flag_pic
)
5579 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
5582 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
5584 output_asm_insn ("call\t%a2", xops
);
5587 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5588 is what will be referenced by the Mach-O PIC subsystem. */
5590 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
5593 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
5594 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
5597 output_asm_insn ("pop{l}\t%0", xops
);
5602 get_pc_thunk_name (name
, REGNO (dest
));
5603 pic_labels_used
|= 1 << REGNO (dest
);
5605 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
5606 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
5607 output_asm_insn ("call\t%X2", xops
);
5608 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5609 is what will be referenced by the Mach-O PIC subsystem. */
5612 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
5614 targetm
.asm_out
.internal_label (asm_out_file
, "L",
5615 CODE_LABEL_NUMBER (label
));
5622 if (!flag_pic
|| TARGET_DEEP_BRANCH_PREDICTION
)
5623 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops
);
5625 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops
);
5630 /* Generate an "push" pattern for input ARG. */
5635 return gen_rtx_SET (VOIDmode
,
5637 gen_rtx_PRE_DEC (Pmode
,
5638 stack_pointer_rtx
)),
5642 /* Return >= 0 if there is an unused call-clobbered register available
5643 for the entire function. */
5646 ix86_select_alt_pic_regnum (void)
5648 if (current_function_is_leaf
&& !current_function_profile
5649 && !ix86_current_function_calls_tls_descriptor
)
5652 for (i
= 2; i
>= 0; --i
)
5653 if (!df_regs_ever_live_p (i
))
5657 return INVALID_REGNUM
;
5660 /* Return 1 if we need to save REGNO. */
5662 ix86_save_reg (unsigned int regno
, int maybe_eh_return
)
5664 if (pic_offset_table_rtx
5665 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
5666 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
5667 || current_function_profile
5668 || current_function_calls_eh_return
5669 || current_function_uses_const_pool
))
5671 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM
)
5676 if (current_function_calls_eh_return
&& maybe_eh_return
)
5681 unsigned test
= EH_RETURN_DATA_REGNO (i
);
5682 if (test
== INVALID_REGNUM
)
5689 if (cfun
->machine
->force_align_arg_pointer
5690 && regno
== REGNO (cfun
->machine
->force_align_arg_pointer
))
5693 return (df_regs_ever_live_p (regno
)
5694 && !call_used_regs
[regno
]
5695 && !fixed_regs
[regno
]
5696 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
5699 /* Return number of registers to be saved on the stack. */
5702 ix86_nsaved_regs (void)
5707 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
5708 if (ix86_save_reg (regno
, true))
5713 /* Return the offset between two registers, one to be eliminated, and the other
5714 its replacement, at the start of a routine. */
5717 ix86_initial_elimination_offset (int from
, int to
)
5719 struct ix86_frame frame
;
5720 ix86_compute_frame_layout (&frame
);
5722 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
5723 return frame
.hard_frame_pointer_offset
;
5724 else if (from
== FRAME_POINTER_REGNUM
5725 && to
== HARD_FRAME_POINTER_REGNUM
)
5726 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
5729 gcc_assert (to
== STACK_POINTER_REGNUM
);
5731 if (from
== ARG_POINTER_REGNUM
)
5732 return frame
.stack_pointer_offset
;
5734 gcc_assert (from
== FRAME_POINTER_REGNUM
);
5735 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
5739 /* Fill structure ix86_frame about frame of currently computed function. */
5742 ix86_compute_frame_layout (struct ix86_frame
*frame
)
5744 HOST_WIDE_INT total_size
;
5745 unsigned int stack_alignment_needed
;
5746 HOST_WIDE_INT offset
;
5747 unsigned int preferred_alignment
;
5748 HOST_WIDE_INT size
= get_frame_size ();
5750 frame
->nregs
= ix86_nsaved_regs ();
5753 stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
5754 preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
5756 /* During reload iteration the amount of registers saved can change.
5757 Recompute the value as needed. Do not recompute when amount of registers
5758 didn't change as reload does multiple calls to the function and does not
5759 expect the decision to change within single iteration. */
5761 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
5763 int count
= frame
->nregs
;
5765 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
5766 /* The fast prologue uses move instead of push to save registers. This
5767 is significantly longer, but also executes faster as modern hardware
5768 can execute the moves in parallel, but can't do that for push/pop.
5770 Be careful about choosing what prologue to emit: When function takes
5771 many instructions to execute we may use slow version as well as in
5772 case function is known to be outside hot spot (this is known with
5773 feedback only). Weight the size of function by number of registers
5774 to save as it is cheap to use one or two push instructions but very
5775 slow to use many of them. */
5777 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
5778 if (cfun
->function_frequency
< FUNCTION_FREQUENCY_NORMAL
5779 || (flag_branch_probabilities
5780 && cfun
->function_frequency
< FUNCTION_FREQUENCY_HOT
))
5781 cfun
->machine
->use_fast_prologue_epilogue
= false;
5783 cfun
->machine
->use_fast_prologue_epilogue
5784 = !expensive_function_p (count
);
5786 if (TARGET_PROLOGUE_USING_MOVE
5787 && cfun
->machine
->use_fast_prologue_epilogue
)
5788 frame
->save_regs_using_mov
= true;
5790 frame
->save_regs_using_mov
= false;
5793 /* Skip return address and saved base pointer. */
5794 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
5796 frame
->hard_frame_pointer_offset
= offset
;
5798 /* Do some sanity checking of stack_alignment_needed and
5799 preferred_alignment, since i386 port is the only using those features
5800 that may break easily. */
5802 gcc_assert (!size
|| stack_alignment_needed
);
5803 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
5804 gcc_assert (preferred_alignment
<= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
5805 gcc_assert (stack_alignment_needed
5806 <= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
5808 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
5809 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
5811 /* Register save area */
5812 offset
+= frame
->nregs
* UNITS_PER_WORD
;
5815 if (ix86_save_varrargs_registers
)
5817 offset
+= X86_64_VARARGS_SIZE
;
5818 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
5821 frame
->va_arg_size
= 0;
5823 /* Align start of frame for local function. */
5824 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
5825 & -stack_alignment_needed
) - offset
;
5827 offset
+= frame
->padding1
;
5829 /* Frame pointer points here. */
5830 frame
->frame_pointer_offset
= offset
;
5834 /* Add outgoing arguments area. Can be skipped if we eliminated
5835 all the function calls as dead code.
5836 Skipping is however impossible when function calls alloca. Alloca
5837 expander assumes that last current_function_outgoing_args_size
5838 of stack frame are unused. */
5839 if (ACCUMULATE_OUTGOING_ARGS
5840 && (!current_function_is_leaf
|| current_function_calls_alloca
5841 || ix86_current_function_calls_tls_descriptor
))
5843 offset
+= current_function_outgoing_args_size
;
5844 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
5847 frame
->outgoing_arguments_size
= 0;
5849 /* Align stack boundary. Only needed if we're calling another function
5851 if (!current_function_is_leaf
|| current_function_calls_alloca
5852 || ix86_current_function_calls_tls_descriptor
)
5853 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
5854 & -preferred_alignment
) - offset
;
5856 frame
->padding2
= 0;
5858 offset
+= frame
->padding2
;
5860 /* We've reached end of stack frame. */
5861 frame
->stack_pointer_offset
= offset
;
5863 /* Size prologue needs to allocate. */
5864 frame
->to_allocate
=
5865 (size
+ frame
->padding1
+ frame
->padding2
5866 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
5868 if ((!frame
->to_allocate
&& frame
->nregs
<= 1)
5869 || (TARGET_64BIT
&& frame
->to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
5870 frame
->save_regs_using_mov
= false;
5872 if (TARGET_RED_ZONE
&& current_function_sp_is_unchanging
5873 && current_function_is_leaf
5874 && !ix86_current_function_calls_tls_descriptor
)
5876 frame
->red_zone_size
= frame
->to_allocate
;
5877 if (frame
->save_regs_using_mov
)
5878 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
5879 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
5880 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
5883 frame
->red_zone_size
= 0;
5884 frame
->to_allocate
-= frame
->red_zone_size
;
5885 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
5887 fprintf (stderr
, "\n");
5888 fprintf (stderr
, "nregs: %ld\n", (long)frame
->nregs
);
5889 fprintf (stderr
, "size: %ld\n", (long)size
);
5890 fprintf (stderr
, "alignment1: %ld\n", (long)stack_alignment_needed
);
5891 fprintf (stderr
, "padding1: %ld\n", (long)frame
->padding1
);
5892 fprintf (stderr
, "va_arg: %ld\n", (long)frame
->va_arg_size
);
5893 fprintf (stderr
, "padding2: %ld\n", (long)frame
->padding2
);
5894 fprintf (stderr
, "to_allocate: %ld\n", (long)frame
->to_allocate
);
5895 fprintf (stderr
, "red_zone_size: %ld\n", (long)frame
->red_zone_size
);
5896 fprintf (stderr
, "frame_pointer_offset: %ld\n", (long)frame
->frame_pointer_offset
);
5897 fprintf (stderr
, "hard_frame_pointer_offset: %ld\n",
5898 (long)frame
->hard_frame_pointer_offset
);
5899 fprintf (stderr
, "stack_pointer_offset: %ld\n", (long)frame
->stack_pointer_offset
);
5900 fprintf (stderr
, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf
);
5901 fprintf (stderr
, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca
);
5902 fprintf (stderr
, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor
);
5906 /* Emit code to save registers in the prologue. */
5909 ix86_emit_save_regs (void)
5914 for (regno
= FIRST_PSEUDO_REGISTER
; regno
-- > 0; )
5915 if (ix86_save_reg (regno
, true))
5917 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
5918 RTX_FRAME_RELATED_P (insn
) = 1;
5922 /* Emit code to save registers using MOV insns. First register
5923 is restored from POINTER + OFFSET. */
5925 ix86_emit_save_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
)
5930 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5931 if (ix86_save_reg (regno
, true))
5933 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
5935 gen_rtx_REG (Pmode
, regno
));
5936 RTX_FRAME_RELATED_P (insn
) = 1;
5937 offset
+= UNITS_PER_WORD
;
5941 /* Expand prologue or epilogue stack adjustment.
5942 The pattern exist to put a dependency on all ebp-based memory accesses.
5943 STYLE should be negative if instructions should be marked as frame related,
5944 zero if %r11 register is live and cannot be freely used and positive
5948 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
, int style
)
5953 insn
= emit_insn (gen_pro_epilogue_adjust_stack_1 (dest
, src
, offset
));
5954 else if (x86_64_immediate_operand (offset
, DImode
))
5955 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest
, src
, offset
));
5959 /* r11 is used by indirect sibcall return as well, set before the
5960 epilogue and used after the epilogue. ATM indirect sibcall
5961 shouldn't be used together with huge frame sizes in one
5962 function because of the frame_size check in sibcall.c. */
5964 r11
= gen_rtx_REG (DImode
, R11_REG
);
5965 insn
= emit_insn (gen_rtx_SET (DImode
, r11
, offset
));
5967 RTX_FRAME_RELATED_P (insn
) = 1;
5968 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest
, src
, r11
,
5972 RTX_FRAME_RELATED_P (insn
) = 1;
5975 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5978 ix86_internal_arg_pointer (void)
5980 bool has_force_align_arg_pointer
=
5981 (0 != lookup_attribute (ix86_force_align_arg_pointer_string
,
5982 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))));
5983 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5984 && DECL_NAME (current_function_decl
)
5985 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
5986 && DECL_FILE_SCOPE_P (current_function_decl
))
5987 || ix86_force_align_arg_pointer
5988 || has_force_align_arg_pointer
)
5990 /* Nested functions can't realign the stack due to a register
5992 if (DECL_CONTEXT (current_function_decl
)
5993 && TREE_CODE (DECL_CONTEXT (current_function_decl
)) == FUNCTION_DECL
)
5995 if (ix86_force_align_arg_pointer
)
5996 warning (0, "-mstackrealign ignored for nested functions");
5997 if (has_force_align_arg_pointer
)
5998 error ("%s not supported for nested functions",
5999 ix86_force_align_arg_pointer_string
);
6000 return virtual_incoming_args_rtx
;
6002 cfun
->machine
->force_align_arg_pointer
= gen_rtx_REG (Pmode
, 2);
6003 return copy_to_reg (cfun
->machine
->force_align_arg_pointer
);
6006 return virtual_incoming_args_rtx
;
6009 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
6010 This is called from dwarf2out.c to emit call frame instructions
6011 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
6013 ix86_dwarf_handle_frame_unspec (const char *label
, rtx pattern
, int index
)
6015 rtx unspec
= SET_SRC (pattern
);
6016 gcc_assert (GET_CODE (unspec
) == UNSPEC
);
6020 case UNSPEC_REG_SAVE
:
6021 dwarf2out_reg_save_reg (label
, XVECEXP (unspec
, 0, 0),
6022 SET_DEST (pattern
));
6024 case UNSPEC_DEF_CFA
:
6025 dwarf2out_def_cfa (label
, REGNO (SET_DEST (pattern
)),
6026 INTVAL (XVECEXP (unspec
, 0, 0)));
6033 /* Expand the prologue into a bunch of separate insns. */
6036 ix86_expand_prologue (void)
6040 struct ix86_frame frame
;
6041 HOST_WIDE_INT allocate
;
6043 ix86_compute_frame_layout (&frame
);
6045 if (cfun
->machine
->force_align_arg_pointer
)
6049 /* Grab the argument pointer. */
6050 x
= plus_constant (stack_pointer_rtx
, 4);
6051 y
= cfun
->machine
->force_align_arg_pointer
;
6052 insn
= emit_insn (gen_rtx_SET (VOIDmode
, y
, x
));
6053 RTX_FRAME_RELATED_P (insn
) = 1;
6055 /* The unwind info consists of two parts: install the fafp as the cfa,
6056 and record the fafp as the "save register" of the stack pointer.
6057 The later is there in order that the unwinder can see where it
6058 should restore the stack pointer across the and insn. */
6059 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
), UNSPEC_DEF_CFA
);
6060 x
= gen_rtx_SET (VOIDmode
, y
, x
);
6061 RTX_FRAME_RELATED_P (x
) = 1;
6062 y
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, stack_pointer_rtx
),
6064 y
= gen_rtx_SET (VOIDmode
, cfun
->machine
->force_align_arg_pointer
, y
);
6065 RTX_FRAME_RELATED_P (y
) = 1;
6066 x
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
));
6067 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
6068 REG_NOTES (insn
) = x
;
6070 /* Align the stack. */
6071 emit_insn (gen_andsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
6074 /* And here we cheat like madmen with the unwind info. We force the
6075 cfa register back to sp+4, which is exactly what it was at the
6076 start of the function. Re-pushing the return address results in
6077 the return at the same spot relative to the cfa, and thus is
6078 correct wrt the unwind info. */
6079 x
= cfun
->machine
->force_align_arg_pointer
;
6080 x
= gen_frame_mem (Pmode
, plus_constant (x
, -4));
6081 insn
= emit_insn (gen_push (x
));
6082 RTX_FRAME_RELATED_P (insn
) = 1;
6085 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, x
), UNSPEC_DEF_CFA
);
6086 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
6087 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
6088 REG_NOTES (insn
) = x
;
6091 /* Note: AT&T enter does NOT have reversed args. Enter is probably
6092 slower on all targets. Also sdb doesn't like it. */
6094 if (frame_pointer_needed
)
6096 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
6097 RTX_FRAME_RELATED_P (insn
) = 1;
6099 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
6100 RTX_FRAME_RELATED_P (insn
) = 1;
6103 allocate
= frame
.to_allocate
;
6105 if (!frame
.save_regs_using_mov
)
6106 ix86_emit_save_regs ();
6108 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
6110 /* When using red zone we may start register saving before allocating
6111 the stack frame saving one cycle of the prologue. */
6112 if (TARGET_RED_ZONE
&& frame
.save_regs_using_mov
)
6113 ix86_emit_save_regs_using_mov (frame_pointer_needed
? hard_frame_pointer_rtx
6114 : stack_pointer_rtx
,
6115 -frame
.nregs
* UNITS_PER_WORD
);
6119 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
6120 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
6121 GEN_INT (-allocate
), -1);
6124 /* Only valid for Win32. */
6125 rtx eax
= gen_rtx_REG (Pmode
, 0);
6129 gcc_assert (!TARGET_64BIT
|| TARGET_64BIT_MS_ABI
);
6131 if (TARGET_64BIT_MS_ABI
)
6134 eax_live
= ix86_eax_live_at_start_p ();
6138 emit_insn (gen_push (eax
));
6139 allocate
-= UNITS_PER_WORD
;
6142 emit_move_insn (eax
, GEN_INT (allocate
));
6145 insn
= gen_allocate_stack_worker_64 (eax
);
6147 insn
= gen_allocate_stack_worker_32 (eax
);
6148 insn
= emit_insn (insn
);
6149 RTX_FRAME_RELATED_P (insn
) = 1;
6150 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (-allocate
));
6151 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
6152 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
6153 t
, REG_NOTES (insn
));
6157 if (frame_pointer_needed
)
6158 t
= plus_constant (hard_frame_pointer_rtx
,
6161 - frame
.nregs
* UNITS_PER_WORD
);
6163 t
= plus_constant (stack_pointer_rtx
, allocate
);
6164 emit_move_insn (eax
, gen_rtx_MEM (Pmode
, t
));
6168 if (frame
.save_regs_using_mov
&& !TARGET_RED_ZONE
)
6170 if (!frame_pointer_needed
|| !frame
.to_allocate
)
6171 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
6173 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
6174 -frame
.nregs
* UNITS_PER_WORD
);
6177 pic_reg_used
= false;
6178 if (pic_offset_table_rtx
6179 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
6180 || current_function_profile
))
6182 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
6184 if (alt_pic_reg_used
!= INVALID_REGNUM
)
6185 SET_REGNO (pic_offset_table_rtx
, alt_pic_reg_used
);
6187 pic_reg_used
= true;
6194 if (ix86_cmodel
== CM_LARGE_PIC
)
6196 rtx tmp_reg
= gen_rtx_REG (DImode
,
6197 FIRST_REX_INT_REG
+ 3 /* R11 */);
6198 rtx label
= gen_label_rtx ();
6200 LABEL_PRESERVE_P (label
) = 1;
6201 gcc_assert (REGNO (pic_offset_table_rtx
) != REGNO (tmp_reg
));
6202 insn
= emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
, label
));
6203 insn
= emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
6204 insn
= emit_insn (gen_adddi3 (pic_offset_table_rtx
,
6205 pic_offset_table_rtx
, tmp_reg
));
6208 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
6211 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
6214 /* Prevent function calls from be scheduled before the call to mcount.
6215 In the pic_reg_used case, make sure that the got load isn't deleted. */
6216 if (current_function_profile
)
6219 emit_insn (gen_prologue_use (pic_offset_table_rtx
));
6220 emit_insn (gen_blockage ());
6224 /* Emit code to restore saved registers using MOV insns. First register
6225 is restored from POINTER + OFFSET. */
6227 ix86_emit_restore_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
,
6228 int maybe_eh_return
)
6231 rtx base_address
= gen_rtx_MEM (Pmode
, pointer
);
6233 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6234 if (ix86_save_reg (regno
, maybe_eh_return
))
6236 /* Ensure that adjust_address won't be forced to produce pointer
6237 out of range allowed by x86-64 instruction set. */
6238 if (TARGET_64BIT
&& offset
!= trunc_int_for_mode (offset
, SImode
))
6242 r11
= gen_rtx_REG (DImode
, R11_REG
);
6243 emit_move_insn (r11
, GEN_INT (offset
));
6244 emit_insn (gen_adddi3 (r11
, r11
, pointer
));
6245 base_address
= gen_rtx_MEM (Pmode
, r11
);
6248 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
6249 adjust_address (base_address
, Pmode
, offset
));
6250 offset
+= UNITS_PER_WORD
;
6254 /* Restore function stack, frame, and registers. */
6257 ix86_expand_epilogue (int style
)
6260 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
6261 struct ix86_frame frame
;
6262 HOST_WIDE_INT offset
;
6264 ix86_compute_frame_layout (&frame
);
6266 /* Calculate start of saved registers relative to ebp. Special care
6267 must be taken for the normal return case of a function using
6268 eh_return: the eax and edx registers are marked as saved, but not
6269 restored along this path. */
6270 offset
= frame
.nregs
;
6271 if (current_function_calls_eh_return
&& style
!= 2)
6273 offset
*= -UNITS_PER_WORD
;
6275 /* If we're only restoring one register and sp is not valid then
6276 using a move instruction to restore the register since it's
6277 less work than reloading sp and popping the register.
6279 The default code result in stack adjustment using add/lea instruction,
6280 while this code results in LEAVE instruction (or discrete equivalent),
6281 so it is profitable in some other cases as well. Especially when there
6282 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6283 and there is exactly one register to pop. This heuristic may need some
6284 tuning in future. */
6285 if ((!sp_valid
&& frame
.nregs
<= 1)
6286 || (TARGET_EPILOGUE_USING_MOVE
6287 && cfun
->machine
->use_fast_prologue_epilogue
6288 && (frame
.nregs
> 1 || frame
.to_allocate
))
6289 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
6290 || (frame_pointer_needed
&& TARGET_USE_LEAVE
6291 && cfun
->machine
->use_fast_prologue_epilogue
6292 && frame
.nregs
== 1)
6293 || current_function_calls_eh_return
)
6295 /* Restore registers. We can use ebp or esp to address the memory
6296 locations. If both are available, default to ebp, since offsets
6297 are known to be small. Only exception is esp pointing directly to the
6298 end of block of saved registers, where we may simplify addressing
6301 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
6302 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
6303 frame
.to_allocate
, style
== 2);
6305 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
6306 offset
, style
== 2);
6308 /* eh_return epilogues need %ecx added to the stack pointer. */
6311 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
6313 if (frame_pointer_needed
)
6315 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
6316 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
6317 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
6319 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
6320 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
6322 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
6327 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
6328 tmp
= plus_constant (tmp
, (frame
.to_allocate
6329 + frame
.nregs
* UNITS_PER_WORD
));
6330 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
6333 else if (!frame_pointer_needed
)
6334 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
6335 GEN_INT (frame
.to_allocate
6336 + frame
.nregs
* UNITS_PER_WORD
),
6338 /* If not an i386, mov & pop is faster than "leave". */
6339 else if (TARGET_USE_LEAVE
|| optimize_size
6340 || !cfun
->machine
->use_fast_prologue_epilogue
)
6341 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
6344 pro_epilogue_adjust_stack (stack_pointer_rtx
,
6345 hard_frame_pointer_rtx
,
6348 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
6350 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
6355 /* First step is to deallocate the stack frame so that we can
6356 pop the registers. */
6359 gcc_assert (frame_pointer_needed
);
6360 pro_epilogue_adjust_stack (stack_pointer_rtx
,
6361 hard_frame_pointer_rtx
,
6362 GEN_INT (offset
), style
);
6364 else if (frame
.to_allocate
)
6365 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
6366 GEN_INT (frame
.to_allocate
), style
);
6368 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6369 if (ix86_save_reg (regno
, false))
6372 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
6374 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
6376 if (frame_pointer_needed
)
6378 /* Leave results in shorter dependency chains on CPUs that are
6379 able to grok it fast. */
6380 if (TARGET_USE_LEAVE
)
6381 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
6382 else if (TARGET_64BIT
)
6383 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
6385 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
6389 if (cfun
->machine
->force_align_arg_pointer
)
6391 emit_insn (gen_addsi3 (stack_pointer_rtx
,
6392 cfun
->machine
->force_align_arg_pointer
,
6396 /* Sibcall epilogues don't want a return instruction. */
6400 if (current_function_pops_args
&& current_function_args_size
)
6402 rtx popc
= GEN_INT (current_function_pops_args
);
6404 /* i386 can only pop 64K bytes. If asked to pop more, pop
6405 return address, do explicit add, and jump indirectly to the
6408 if (current_function_pops_args
>= 65536)
6410 rtx ecx
= gen_rtx_REG (SImode
, 2);
6412 /* There is no "pascal" calling convention in any 64bit ABI. */
6413 gcc_assert (!TARGET_64BIT
);
6415 emit_insn (gen_popsi1 (ecx
));
6416 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
6417 emit_jump_insn (gen_return_indirect_internal (ecx
));
6420 emit_jump_insn (gen_return_pop_internal (popc
));
6423 emit_jump_insn (gen_return_internal ());
6426 /* Reset from the function's potential modifications. */
6429 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
6430 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
6432 if (pic_offset_table_rtx
)
6433 SET_REGNO (pic_offset_table_rtx
, REAL_PIC_OFFSET_TABLE_REGNUM
);
6435 /* Mach-O doesn't support labels at the end of objects, so if
6436 it looks like we might want one, insert a NOP. */
6438 rtx insn
= get_last_insn ();
6441 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
6442 insn
= PREV_INSN (insn
);
6446 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
)))
6447 fputs ("\tnop\n", file
);
6453 /* Extract the parts of an RTL expression that is a valid memory address
6454 for an instruction. Return 0 if the structure of the address is
6455 grossly off. Return -1 if the address contains ASHIFT, so it is not
6456 strictly valid, but still used for computing length of lea instruction. */
6459 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
6461 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
6462 rtx base_reg
, index_reg
;
6463 HOST_WIDE_INT scale
= 1;
6464 rtx scale_rtx
= NULL_RTX
;
6466 enum ix86_address_seg seg
= SEG_DEFAULT
;
6468 if (REG_P (addr
) || GET_CODE (addr
) == SUBREG
)
6470 else if (GET_CODE (addr
) == PLUS
)
6480 addends
[n
++] = XEXP (op
, 1);
6483 while (GET_CODE (op
) == PLUS
);
6488 for (i
= n
; i
>= 0; --i
)
6491 switch (GET_CODE (op
))
6496 index
= XEXP (op
, 0);
6497 scale_rtx
= XEXP (op
, 1);
6501 if (XINT (op
, 1) == UNSPEC_TP
6502 && TARGET_TLS_DIRECT_SEG_REFS
6503 && seg
== SEG_DEFAULT
)
6504 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
6533 else if (GET_CODE (addr
) == MULT
)
6535 index
= XEXP (addr
, 0); /* index*scale */
6536 scale_rtx
= XEXP (addr
, 1);
6538 else if (GET_CODE (addr
) == ASHIFT
)
6542 /* We're called for lea too, which implements ashift on occasion. */
6543 index
= XEXP (addr
, 0);
6544 tmp
= XEXP (addr
, 1);
6545 if (!CONST_INT_P (tmp
))
6547 scale
= INTVAL (tmp
);
6548 if ((unsigned HOST_WIDE_INT
) scale
> 3)
6554 disp
= addr
; /* displacement */
6556 /* Extract the integral value of scale. */
6559 if (!CONST_INT_P (scale_rtx
))
6561 scale
= INTVAL (scale_rtx
);
6564 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
6565 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
6567 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6568 if (base_reg
&& index_reg
&& scale
== 1
6569 && (index_reg
== arg_pointer_rtx
6570 || index_reg
== frame_pointer_rtx
6571 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
6574 tmp
= base
, base
= index
, index
= tmp
;
6575 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
6578 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6579 if ((base_reg
== hard_frame_pointer_rtx
6580 || base_reg
== frame_pointer_rtx
6581 || base_reg
== arg_pointer_rtx
) && !disp
)
6584 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6585 Avoid this by transforming to [%esi+0]. */
6586 if (ix86_tune
== PROCESSOR_K6
&& !optimize_size
6587 && base_reg
&& !index_reg
&& !disp
6589 && REGNO_REG_CLASS (REGNO (base_reg
)) == SIREG
)
6592 /* Special case: encode reg+reg instead of reg*2. */
6593 if (!base
&& index
&& scale
&& scale
== 2)
6594 base
= index
, base_reg
= index_reg
, scale
= 1;
6596 /* Special case: scaling cannot be encoded without base or displacement. */
6597 if (!base
&& !disp
&& index
&& scale
!= 1)
6609 /* Return cost of the memory address x.
6610 For i386, it is better to use a complex address than let gcc copy
6611 the address into a reg and make a new pseudo. But not if the address
6612 requires to two regs - that would mean more pseudos with longer
6615 ix86_address_cost (rtx x
)
6617 struct ix86_address parts
;
6619 int ok
= ix86_decompose_address (x
, &parts
);
6623 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
6624 parts
.base
= SUBREG_REG (parts
.base
);
6625 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
6626 parts
.index
= SUBREG_REG (parts
.index
);
6628 /* Attempt to minimize number of registers in the address. */
6630 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
6632 && (!REG_P (parts
.index
)
6633 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
6637 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
6639 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
6640 && parts
.base
!= parts
.index
)
6643 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6644 since it's predecode logic can't detect the length of instructions
6645 and it degenerates to vector decoded. Increase cost of such
6646 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6647 to split such addresses or even refuse such addresses at all.
6649 Following addressing modes are affected:
6654 The first and last case may be avoidable by explicitly coding the zero in
6655 memory address, but I don't have AMD-K6 machine handy to check this
6659 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
6660 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
6661 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
6667 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6668 this is used for to form addresses to local data when -fPIC is in
6672 darwin_local_data_pic (rtx disp
)
6674 if (GET_CODE (disp
) == MINUS
)
6676 if (GET_CODE (XEXP (disp
, 0)) == LABEL_REF
6677 || GET_CODE (XEXP (disp
, 0)) == SYMBOL_REF
)
6678 if (GET_CODE (XEXP (disp
, 1)) == SYMBOL_REF
)
6680 const char *sym_name
= XSTR (XEXP (disp
, 1), 0);
6681 if (! strcmp (sym_name
, "<pic base>"))
6689 /* Determine if a given RTX is a valid constant. We already know this
6690 satisfies CONSTANT_P. */
6693 legitimate_constant_p (rtx x
)
6695 switch (GET_CODE (x
))
6700 if (GET_CODE (x
) == PLUS
)
6702 if (!CONST_INT_P (XEXP (x
, 1)))
6707 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
6710 /* Only some unspecs are valid as "constants". */
6711 if (GET_CODE (x
) == UNSPEC
)
6712 switch (XINT (x
, 1))
6717 return TARGET_64BIT
;
6720 x
= XVECEXP (x
, 0, 0);
6721 return (GET_CODE (x
) == SYMBOL_REF
6722 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
6724 x
= XVECEXP (x
, 0, 0);
6725 return (GET_CODE (x
) == SYMBOL_REF
6726 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
6731 /* We must have drilled down to a symbol. */
6732 if (GET_CODE (x
) == LABEL_REF
)
6734 if (GET_CODE (x
) != SYMBOL_REF
)
6739 /* TLS symbols are never valid. */
6740 if (SYMBOL_REF_TLS_MODEL (x
))
6743 /* DLLIMPORT symbols are never valid. */
6744 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
6745 && SYMBOL_REF_DLLIMPORT_P (x
))
6750 if (GET_MODE (x
) == TImode
6751 && x
!= CONST0_RTX (TImode
)
6757 if (x
== CONST0_RTX (GET_MODE (x
)))
6765 /* Otherwise we handle everything else in the move patterns. */
6769 /* Determine if it's legal to put X into the constant pool. This
6770 is not possible for the address of thread-local symbols, which
6771 is checked above. */
6774 ix86_cannot_force_const_mem (rtx x
)
6776 /* We can always put integral constants and vectors in memory. */
6777 switch (GET_CODE (x
))
6787 return !legitimate_constant_p (x
);
6790 /* Determine if a given RTX is a valid constant address. */
6793 constant_address_p (rtx x
)
6795 return CONSTANT_P (x
) && legitimate_address_p (Pmode
, x
, 1);
6798 /* Nonzero if the constant value X is a legitimate general operand
6799 when generating PIC code. It is given that flag_pic is on and
6800 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6803 legitimate_pic_operand_p (rtx x
)
6807 switch (GET_CODE (x
))
6810 inner
= XEXP (x
, 0);
6811 if (GET_CODE (inner
) == PLUS
6812 && CONST_INT_P (XEXP (inner
, 1)))
6813 inner
= XEXP (inner
, 0);
6815 /* Only some unspecs are valid as "constants". */
6816 if (GET_CODE (inner
) == UNSPEC
)
6817 switch (XINT (inner
, 1))
6822 return TARGET_64BIT
;
6824 x
= XVECEXP (inner
, 0, 0);
6825 return (GET_CODE (x
) == SYMBOL_REF
6826 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
6834 return legitimate_pic_address_disp_p (x
);
6841 /* Determine if a given CONST RTX is a valid memory displacement
6845 legitimate_pic_address_disp_p (rtx disp
)
6849 /* In 64bit mode we can allow direct addresses of symbols and labels
6850 when they are not dynamic symbols. */
6853 rtx op0
= disp
, op1
;
6855 switch (GET_CODE (disp
))
6861 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
6863 op0
= XEXP (XEXP (disp
, 0), 0);
6864 op1
= XEXP (XEXP (disp
, 0), 1);
6865 if (!CONST_INT_P (op1
)
6866 || INTVAL (op1
) >= 16*1024*1024
6867 || INTVAL (op1
) < -16*1024*1024)
6869 if (GET_CODE (op0
) == LABEL_REF
)
6871 if (GET_CODE (op0
) != SYMBOL_REF
)
6876 /* TLS references should always be enclosed in UNSPEC. */
6877 if (SYMBOL_REF_TLS_MODEL (op0
))
6879 if (!SYMBOL_REF_FAR_ADDR_P (op0
) && SYMBOL_REF_LOCAL_P (op0
)
6880 && ix86_cmodel
!= CM_LARGE_PIC
)
6888 if (GET_CODE (disp
) != CONST
)
6890 disp
= XEXP (disp
, 0);
6894 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6895 of GOT tables. We should not need these anyway. */
6896 if (GET_CODE (disp
) != UNSPEC
6897 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
6898 && XINT (disp
, 1) != UNSPEC_GOTOFF
6899 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
6902 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
6903 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
6909 if (GET_CODE (disp
) == PLUS
)
6911 if (!CONST_INT_P (XEXP (disp
, 1)))
6913 disp
= XEXP (disp
, 0);
6917 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
6920 if (GET_CODE (disp
) != UNSPEC
)
6923 switch (XINT (disp
, 1))
6928 /* We need to check for both symbols and labels because VxWorks loads
6929 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
6931 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
6932 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
6934 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6935 While ABI specify also 32bit relocation but we don't produce it in
6936 small PIC model at all. */
6937 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
6938 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
6940 return gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
6942 case UNSPEC_GOTTPOFF
:
6943 case UNSPEC_GOTNTPOFF
:
6944 case UNSPEC_INDNTPOFF
:
6947 disp
= XVECEXP (disp
, 0, 0);
6948 return (GET_CODE (disp
) == SYMBOL_REF
6949 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
6951 disp
= XVECEXP (disp
, 0, 0);
6952 return (GET_CODE (disp
) == SYMBOL_REF
6953 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
6955 disp
= XVECEXP (disp
, 0, 0);
6956 return (GET_CODE (disp
) == SYMBOL_REF
6957 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
6963 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6964 memory address for an instruction. The MODE argument is the machine mode
6965 for the MEM expression that wants to use this address.
6967 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6968 convert common non-canonical forms to canonical form so that they will
6972 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
6973 rtx addr
, int strict
)
6975 struct ix86_address parts
;
6976 rtx base
, index
, disp
;
6977 HOST_WIDE_INT scale
;
6978 const char *reason
= NULL
;
6979 rtx reason_rtx
= NULL_RTX
;
6981 if (ix86_decompose_address (addr
, &parts
) <= 0)
6983 reason
= "decomposition failed";
6988 index
= parts
.index
;
6990 scale
= parts
.scale
;
6992 /* Validate base register.
6994 Don't allow SUBREG's that span more than a word here. It can lead to spill
6995 failures when the base is one word out of a two word structure, which is
6996 represented internally as a DImode int. */
7005 else if (GET_CODE (base
) == SUBREG
7006 && REG_P (SUBREG_REG (base
))
7007 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base
)))
7009 reg
= SUBREG_REG (base
);
7012 reason
= "base is not a register";
7016 if (GET_MODE (base
) != Pmode
)
7018 reason
= "base is not in Pmode";
7022 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
7023 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
7025 reason
= "base is not valid";
7030 /* Validate index register.
7032 Don't allow SUBREG's that span more than a word here -- same as above. */
7041 else if (GET_CODE (index
) == SUBREG
7042 && REG_P (SUBREG_REG (index
))
7043 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index
)))
7045 reg
= SUBREG_REG (index
);
7048 reason
= "index is not a register";
7052 if (GET_MODE (index
) != Pmode
)
7054 reason
= "index is not in Pmode";
7058 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
7059 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
7061 reason
= "index is not valid";
7066 /* Validate scale factor. */
7069 reason_rtx
= GEN_INT (scale
);
7072 reason
= "scale without index";
7076 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
7078 reason
= "scale is not a valid multiplier";
7083 /* Validate displacement. */
7088 if (GET_CODE (disp
) == CONST
7089 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
)
7090 switch (XINT (XEXP (disp
, 0), 1))
7092 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
7093 used. While ABI specify also 32bit relocations, we don't produce
7094 them at all and use IP relative instead. */
7097 gcc_assert (flag_pic
);
7099 goto is_legitimate_pic
;
7100 reason
= "64bit address unspec";
7103 case UNSPEC_GOTPCREL
:
7104 gcc_assert (flag_pic
);
7105 goto is_legitimate_pic
;
7107 case UNSPEC_GOTTPOFF
:
7108 case UNSPEC_GOTNTPOFF
:
7109 case UNSPEC_INDNTPOFF
:
7115 reason
= "invalid address unspec";
7119 else if (SYMBOLIC_CONST (disp
)
7123 && MACHOPIC_INDIRECT
7124 && !machopic_operand_p (disp
)
7130 if (TARGET_64BIT
&& (index
|| base
))
7132 /* foo@dtpoff(%rX) is ok. */
7133 if (GET_CODE (disp
) != CONST
7134 || GET_CODE (XEXP (disp
, 0)) != PLUS
7135 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
7136 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
7137 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
7138 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
7140 reason
= "non-constant pic memory reference";
7144 else if (! legitimate_pic_address_disp_p (disp
))
7146 reason
= "displacement is an invalid pic construct";
7150 /* This code used to verify that a symbolic pic displacement
7151 includes the pic_offset_table_rtx register.
7153 While this is good idea, unfortunately these constructs may
7154 be created by "adds using lea" optimization for incorrect
7163 This code is nonsensical, but results in addressing
7164 GOT table with pic_offset_table_rtx base. We can't
7165 just refuse it easily, since it gets matched by
7166 "addsi3" pattern, that later gets split to lea in the
7167 case output register differs from input. While this
7168 can be handled by separate addsi pattern for this case
7169 that never results in lea, this seems to be easier and
7170 correct fix for crash to disable this test. */
7172 else if (GET_CODE (disp
) != LABEL_REF
7173 && !CONST_INT_P (disp
)
7174 && (GET_CODE (disp
) != CONST
7175 || !legitimate_constant_p (disp
))
7176 && (GET_CODE (disp
) != SYMBOL_REF
7177 || !legitimate_constant_p (disp
)))
7179 reason
= "displacement is not constant";
7182 else if (TARGET_64BIT
7183 && !x86_64_immediate_operand (disp
, VOIDmode
))
7185 reason
= "displacement is out of range";
7190 /* Everything looks valid. */
7197 /* Return a unique alias set for the GOT. */
7199 static alias_set_type
7200 ix86_GOT_alias_set (void)
7202 static alias_set_type set
= -1;
7204 set
= new_alias_set ();
7208 /* Return a legitimate reference for ORIG (an address) using the
7209 register REG. If REG is 0, a new pseudo is generated.
7211 There are two types of references that must be handled:
7213 1. Global data references must load the address from the GOT, via
7214 the PIC reg. An insn is emitted to do this load, and the reg is
7217 2. Static data references, constant pool addresses, and code labels
7218 compute the address as an offset from the GOT, whose base is in
7219 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7220 differentiate them from global data objects. The returned
7221 address is the PIC reg + an unspec constant.
7223 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7224 reg also appears in the address. */
7227 legitimize_pic_address (rtx orig
, rtx reg
)
7234 if (TARGET_MACHO
&& !TARGET_64BIT
)
7237 reg
= gen_reg_rtx (Pmode
);
7238 /* Use the generic Mach-O PIC machinery. */
7239 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
7243 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
7245 else if (TARGET_64BIT
7246 && ix86_cmodel
!= CM_SMALL_PIC
7247 && gotoff_operand (addr
, Pmode
))
7250 /* This symbol may be referenced via a displacement from the PIC
7251 base address (@GOTOFF). */
7253 if (reload_in_progress
)
7254 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
7255 if (GET_CODE (addr
) == CONST
)
7256 addr
= XEXP (addr
, 0);
7257 if (GET_CODE (addr
) == PLUS
)
7259 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
7261 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
7264 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
7265 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
7267 tmpreg
= gen_reg_rtx (Pmode
);
7270 emit_move_insn (tmpreg
, new_rtx
);
7274 new_rtx
= expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
7275 tmpreg
, 1, OPTAB_DIRECT
);
7278 else new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
7280 else if (!TARGET_64BIT
&& gotoff_operand (addr
, Pmode
))
7282 /* This symbol may be referenced via a displacement from the PIC
7283 base address (@GOTOFF). */
7285 if (reload_in_progress
)
7286 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
7287 if (GET_CODE (addr
) == CONST
)
7288 addr
= XEXP (addr
, 0);
7289 if (GET_CODE (addr
) == PLUS
)
7291 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
7293 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
7296 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
7297 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
7298 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
7302 emit_move_insn (reg
, new_rtx
);
7306 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
7307 /* We can't use @GOTOFF for text labels on VxWorks;
7308 see gotoff_operand. */
7309 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
7311 /* Given that we've already handled dllimport variables separately
7312 in legitimize_address, and all other variables should satisfy
7313 legitimate_pic_address_disp_p, we should never arrive here. */
7314 gcc_assert (!TARGET_64BIT_MS_ABI
);
7316 if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
7318 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
7319 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
7320 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
7321 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
7324 reg
= gen_reg_rtx (Pmode
);
7325 /* Use directly gen_movsi, otherwise the address is loaded
7326 into register for CSE. We don't want to CSE this addresses,
7327 instead we CSE addresses from the GOT table, so skip this. */
7328 emit_insn (gen_movsi (reg
, new_rtx
));
7333 /* This symbol must be referenced via a load from the
7334 Global Offset Table (@GOT). */
7336 if (reload_in_progress
)
7337 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
7338 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
7339 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
7341 new_rtx
= force_reg (Pmode
, new_rtx
);
7342 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
7343 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
7344 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
7347 reg
= gen_reg_rtx (Pmode
);
7348 emit_move_insn (reg
, new_rtx
);
7354 if (CONST_INT_P (addr
)
7355 && !x86_64_immediate_operand (addr
, VOIDmode
))
7359 emit_move_insn (reg
, addr
);
7363 new_rtx
= force_reg (Pmode
, addr
);
7365 else if (GET_CODE (addr
) == CONST
)
7367 addr
= XEXP (addr
, 0);
7369 /* We must match stuff we generate before. Assume the only
7370 unspecs that can get here are ours. Not that we could do
7371 anything with them anyway.... */
7372 if (GET_CODE (addr
) == UNSPEC
7373 || (GET_CODE (addr
) == PLUS
7374 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
7376 gcc_assert (GET_CODE (addr
) == PLUS
);
7378 if (GET_CODE (addr
) == PLUS
)
7380 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
7382 /* Check first to see if this is a constant offset from a @GOTOFF
7383 symbol reference. */
7384 if (gotoff_operand (op0
, Pmode
)
7385 && CONST_INT_P (op1
))
7389 if (reload_in_progress
)
7390 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
7391 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
7393 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, op1
);
7394 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
7395 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
7399 emit_move_insn (reg
, new_rtx
);
7405 if (INTVAL (op1
) < -16*1024*1024
7406 || INTVAL (op1
) >= 16*1024*1024)
7408 if (!x86_64_immediate_operand (op1
, Pmode
))
7409 op1
= force_reg (Pmode
, op1
);
7410 new_rtx
= gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
7416 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
7417 new_rtx
= legitimize_pic_address (XEXP (addr
, 1),
7418 base
== reg
? NULL_RTX
: reg
);
7420 if (CONST_INT_P (new_rtx
))
7421 new_rtx
= plus_constant (base
, INTVAL (new_rtx
));
7424 if (GET_CODE (new_rtx
) == PLUS
&& CONSTANT_P (XEXP (new_rtx
, 1)))
7426 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new_rtx
, 0));
7427 new_rtx
= XEXP (new_rtx
, 1);
7429 new_rtx
= gen_rtx_PLUS (Pmode
, base
, new_rtx
);
7437 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7440 get_thread_pointer (int to_reg
)
7444 tp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
7448 reg
= gen_reg_rtx (Pmode
);
7449 insn
= gen_rtx_SET (VOIDmode
, reg
, tp
);
7450 insn
= emit_insn (insn
);
7455 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7456 false if we expect this to be used for a memory address and true if
7457 we expect to load the address into a register. */
7460 legitimize_tls_address (rtx x
, enum tls_model model
, int for_mov
)
7462 rtx dest
, base
, off
, pic
, tp
;
7467 case TLS_MODEL_GLOBAL_DYNAMIC
:
7468 dest
= gen_reg_rtx (Pmode
);
7469 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
7471 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
7473 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
;
7476 emit_call_insn (gen_tls_global_dynamic_64 (rax
, x
));
7477 insns
= get_insns ();
7480 CONST_OR_PURE_CALL_P (insns
) = 1;
7481 emit_libcall_block (insns
, dest
, rax
, x
);
7483 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
7484 emit_insn (gen_tls_global_dynamic_64 (dest
, x
));
7486 emit_insn (gen_tls_global_dynamic_32 (dest
, x
));
7488 if (TARGET_GNU2_TLS
)
7490 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
7492 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
7496 case TLS_MODEL_LOCAL_DYNAMIC
:
7497 base
= gen_reg_rtx (Pmode
);
7498 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
7500 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
7502 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
, note
;
7505 emit_call_insn (gen_tls_local_dynamic_base_64 (rax
));
7506 insns
= get_insns ();
7509 note
= gen_rtx_EXPR_LIST (VOIDmode
, const0_rtx
, NULL
);
7510 note
= gen_rtx_EXPR_LIST (VOIDmode
, ix86_tls_get_addr (), note
);
7511 CONST_OR_PURE_CALL_P (insns
) = 1;
7512 emit_libcall_block (insns
, base
, rax
, note
);
7514 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
7515 emit_insn (gen_tls_local_dynamic_base_64 (base
));
7517 emit_insn (gen_tls_local_dynamic_base_32 (base
));
7519 if (TARGET_GNU2_TLS
)
7521 rtx x
= ix86_tls_module_base ();
7523 set_unique_reg_note (get_last_insn (), REG_EQUIV
,
7524 gen_rtx_MINUS (Pmode
, x
, tp
));
7527 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
7528 off
= gen_rtx_CONST (Pmode
, off
);
7530 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
7532 if (TARGET_GNU2_TLS
)
7534 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
7536 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
7541 case TLS_MODEL_INITIAL_EXEC
:
7545 type
= UNSPEC_GOTNTPOFF
;
7549 if (reload_in_progress
)
7550 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
7551 pic
= pic_offset_table_rtx
;
7552 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
7554 else if (!TARGET_ANY_GNU_TLS
)
7556 pic
= gen_reg_rtx (Pmode
);
7557 emit_insn (gen_set_got (pic
));
7558 type
= UNSPEC_GOTTPOFF
;
7563 type
= UNSPEC_INDNTPOFF
;
7566 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
7567 off
= gen_rtx_CONST (Pmode
, off
);
7569 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
7570 off
= gen_const_mem (Pmode
, off
);
7571 set_mem_alias_set (off
, ix86_GOT_alias_set ());
7573 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7575 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
7576 off
= force_reg (Pmode
, off
);
7577 return gen_rtx_PLUS (Pmode
, base
, off
);
7581 base
= get_thread_pointer (true);
7582 dest
= gen_reg_rtx (Pmode
);
7583 emit_insn (gen_subsi3 (dest
, base
, off
));
7587 case TLS_MODEL_LOCAL_EXEC
:
7588 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
7589 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7590 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
7591 off
= gen_rtx_CONST (Pmode
, off
);
7593 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7595 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
7596 return gen_rtx_PLUS (Pmode
, base
, off
);
7600 base
= get_thread_pointer (true);
7601 dest
= gen_reg_rtx (Pmode
);
7602 emit_insn (gen_subsi3 (dest
, base
, off
));
7613 /* Create or return the unique __imp_DECL dllimport symbol corresponding
7616 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map
)))
7617 htab_t dllimport_map
;
7620 get_dllimport_decl (tree decl
)
7622 struct tree_map
*h
, in
;
7626 size_t namelen
, prefixlen
;
7632 dllimport_map
= htab_create_ggc (512, tree_map_hash
, tree_map_eq
, 0);
7634 in
.hash
= htab_hash_pointer (decl
);
7635 in
.base
.from
= decl
;
7636 loc
= htab_find_slot_with_hash (dllimport_map
, &in
, in
.hash
, INSERT
);
7637 h
= (struct tree_map
*) *loc
;
7641 *loc
= h
= GGC_NEW (struct tree_map
);
7643 h
->base
.from
= decl
;
7644 h
->to
= to
= build_decl (VAR_DECL
, NULL
, ptr_type_node
);
7645 DECL_ARTIFICIAL (to
) = 1;
7646 DECL_IGNORED_P (to
) = 1;
7647 DECL_EXTERNAL (to
) = 1;
7648 TREE_READONLY (to
) = 1;
7650 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
7651 name
= targetm
.strip_name_encoding (name
);
7652 if (name
[0] == FASTCALL_PREFIX
)
7658 prefix
= "*__imp__";
7660 namelen
= strlen (name
);
7661 prefixlen
= strlen (prefix
);
7662 imp_name
= (char *) alloca (namelen
+ prefixlen
+ 1);
7663 memcpy (imp_name
, prefix
, prefixlen
);
7664 memcpy (imp_name
+ prefixlen
, name
, namelen
+ 1);
7666 name
= ggc_alloc_string (imp_name
, namelen
+ prefixlen
);
7667 rtl
= gen_rtx_SYMBOL_REF (Pmode
, name
);
7668 SET_SYMBOL_REF_DECL (rtl
, to
);
7669 SYMBOL_REF_FLAGS (rtl
) = SYMBOL_FLAG_LOCAL
;
7671 rtl
= gen_const_mem (Pmode
, rtl
);
7672 set_mem_alias_set (rtl
, ix86_GOT_alias_set ());
7674 SET_DECL_RTL (to
, rtl
);
7679 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
7680 true if we require the result be a register. */
7683 legitimize_dllimport_symbol (rtx symbol
, bool want_reg
)
7688 gcc_assert (SYMBOL_REF_DECL (symbol
));
7689 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
));
7691 x
= DECL_RTL (imp_decl
);
7693 x
= force_reg (Pmode
, x
);
7697 /* Try machine-dependent ways of modifying an illegitimate address
7698 to be legitimate. If we find one, return the new, valid address.
7699 This macro is used in only one place: `memory_address' in explow.c.
7701 OLDX is the address as it was before break_out_memory_refs was called.
7702 In some cases it is useful to look at this to decide what needs to be done.
7704 MODE and WIN are passed so that this macro can use
7705 GO_IF_LEGITIMATE_ADDRESS.
7707 It is always safe for this macro to do nothing. It exists to recognize
7708 opportunities to optimize the output.
7710 For the 80386, we handle X+REG by loading X into a register R and
7711 using R+REG. R will go in a general reg and indexing will be used.
7712 However, if REG is a broken-out memory address or multiplication,
7713 nothing needs to be done because REG can certainly go in a general reg.
7715 When -fpic is used, special handling is needed for symbolic references.
7716 See comments by legitimize_pic_address in i386.c for details. */
7719 legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
, enum machine_mode mode
)
7724 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
7726 return legitimize_tls_address (x
, (enum tls_model
) log
, false);
7727 if (GET_CODE (x
) == CONST
7728 && GET_CODE (XEXP (x
, 0)) == PLUS
7729 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
7730 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
7732 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0),
7733 (enum tls_model
) log
, false);
7734 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
7737 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
7739 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (x
))
7740 return legitimize_dllimport_symbol (x
, true);
7741 if (GET_CODE (x
) == CONST
7742 && GET_CODE (XEXP (x
, 0)) == PLUS
7743 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
7744 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x
, 0), 0)))
7746 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (x
, 0), 0), true);
7747 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
7751 if (flag_pic
&& SYMBOLIC_CONST (x
))
7752 return legitimize_pic_address (x
, 0);
7754 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7755 if (GET_CODE (x
) == ASHIFT
7756 && CONST_INT_P (XEXP (x
, 1))
7757 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
7760 log
= INTVAL (XEXP (x
, 1));
7761 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
7762 GEN_INT (1 << log
));
7765 if (GET_CODE (x
) == PLUS
)
7767 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7769 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
7770 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
7771 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
7774 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
7775 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
7776 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
7777 GEN_INT (1 << log
));
7780 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
7781 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
7782 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
7785 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
7786 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
7787 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
7788 GEN_INT (1 << log
));
7791 /* Put multiply first if it isn't already. */
7792 if (GET_CODE (XEXP (x
, 1)) == MULT
)
7794 rtx tmp
= XEXP (x
, 0);
7795 XEXP (x
, 0) = XEXP (x
, 1);
7800 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7801 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7802 created by virtual register instantiation, register elimination, and
7803 similar optimizations. */
7804 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
7807 x
= gen_rtx_PLUS (Pmode
,
7808 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
7809 XEXP (XEXP (x
, 1), 0)),
7810 XEXP (XEXP (x
, 1), 1));
7814 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7815 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7816 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
7817 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
7818 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
7819 && CONSTANT_P (XEXP (x
, 1)))
7822 rtx other
= NULL_RTX
;
7824 if (CONST_INT_P (XEXP (x
, 1)))
7826 constant
= XEXP (x
, 1);
7827 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
7829 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
7831 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
7832 other
= XEXP (x
, 1);
7840 x
= gen_rtx_PLUS (Pmode
,
7841 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
7842 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
7843 plus_constant (other
, INTVAL (constant
)));
7847 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
7850 if (GET_CODE (XEXP (x
, 0)) == MULT
)
7853 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
7856 if (GET_CODE (XEXP (x
, 1)) == MULT
)
7859 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
7863 && REG_P (XEXP (x
, 1))
7864 && REG_P (XEXP (x
, 0)))
7867 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
7870 x
= legitimize_pic_address (x
, 0);
7873 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
7876 if (REG_P (XEXP (x
, 0)))
7878 rtx temp
= gen_reg_rtx (Pmode
);
7879 rtx val
= force_operand (XEXP (x
, 1), temp
);
7881 emit_move_insn (temp
, val
);
7887 else if (REG_P (XEXP (x
, 1)))
7889 rtx temp
= gen_reg_rtx (Pmode
);
7890 rtx val
= force_operand (XEXP (x
, 0), temp
);
7892 emit_move_insn (temp
, val
);
7902 /* Print an integer constant expression in assembler syntax. Addition
7903 and subtraction are the only arithmetic that may appear in these
7904 expressions. FILE is the stdio stream to write to, X is the rtx, and
7905 CODE is the operand print code from the output string. */
7908 output_pic_addr_const (FILE *file
, rtx x
, int code
)
7912 switch (GET_CODE (x
))
7915 gcc_assert (flag_pic
);
7920 if (! TARGET_MACHO
|| TARGET_64BIT
)
7921 output_addr_const (file
, x
);
7924 const char *name
= XSTR (x
, 0);
7926 /* Mark the decl as referenced so that cgraph will
7927 output the function. */
7928 if (SYMBOL_REF_DECL (x
))
7929 mark_decl_referenced (SYMBOL_REF_DECL (x
));
7932 if (MACHOPIC_INDIRECT
7933 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
7934 name
= machopic_indirection_name (x
, /*stub_p=*/true);
7936 assemble_name (file
, name
);
7938 if (!TARGET_MACHO
&& !TARGET_64BIT_MS_ABI
7939 && code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
7940 fputs ("@PLT", file
);
7947 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
7948 assemble_name (asm_out_file
, buf
);
7952 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
7956 /* This used to output parentheses around the expression,
7957 but that does not work on the 386 (either ATT or BSD assembler). */
7958 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7962 if (GET_MODE (x
) == VOIDmode
)
7964 /* We can use %d if the number is <32 bits and positive. */
7965 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
7966 fprintf (file
, "0x%lx%08lx",
7967 (unsigned long) CONST_DOUBLE_HIGH (x
),
7968 (unsigned long) CONST_DOUBLE_LOW (x
));
7970 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
7973 /* We can't handle floating point constants;
7974 PRINT_OPERAND must handle them. */
7975 output_operand_lossage ("floating constant misused");
7979 /* Some assemblers need integer constants to appear first. */
7980 if (CONST_INT_P (XEXP (x
, 0)))
7982 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7984 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7988 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
7989 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7991 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7997 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
7998 output_pic_addr_const (file
, XEXP (x
, 0), code
);
8000 output_pic_addr_const (file
, XEXP (x
, 1), code
);
8002 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
8006 gcc_assert (XVECLEN (x
, 0) == 1);
8007 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
8008 switch (XINT (x
, 1))
8011 fputs ("@GOT", file
);
8014 fputs ("@GOTOFF", file
);
8017 fputs ("@PLTOFF", file
);
8019 case UNSPEC_GOTPCREL
:
8020 fputs ("@GOTPCREL(%rip)", file
);
8022 case UNSPEC_GOTTPOFF
:
8023 /* FIXME: This might be @TPOFF in Sun ld too. */
8024 fputs ("@GOTTPOFF", file
);
8027 fputs ("@TPOFF", file
);
8031 fputs ("@TPOFF", file
);
8033 fputs ("@NTPOFF", file
);
8036 fputs ("@DTPOFF", file
);
8038 case UNSPEC_GOTNTPOFF
:
8040 fputs ("@GOTTPOFF(%rip)", file
);
8042 fputs ("@GOTNTPOFF", file
);
8044 case UNSPEC_INDNTPOFF
:
8045 fputs ("@INDNTPOFF", file
);
8048 output_operand_lossage ("invalid UNSPEC as operand");
8054 output_operand_lossage ("invalid expression as operand");
8058 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8059 We need to emit DTP-relative relocations. */
8061 static void ATTRIBUTE_UNUSED
8062 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
8064 fputs (ASM_LONG
, file
);
8065 output_addr_const (file
, x
);
8066 fputs ("@DTPOFF", file
);
8072 fputs (", 0", file
);
8079 /* In the name of slightly smaller debug output, and to cater to
8080 general assembler lossage, recognize PIC+GOTOFF and turn it back
8081 into a direct symbol reference.
8083 On Darwin, this is necessary to avoid a crash, because Darwin
8084 has a different PIC label for each routine but the DWARF debugging
8085 information is not associated with any particular routine, so it's
8086 necessary to remove references to the PIC label from RTL stored by
8087 the DWARF output code. */
8090 ix86_delegitimize_address (rtx orig_x
)
8093 /* reg_addend is NULL or a multiple of some register. */
8094 rtx reg_addend
= NULL_RTX
;
8095 /* const_addend is NULL or a const_int. */
8096 rtx const_addend
= NULL_RTX
;
8097 /* This is the result, or NULL. */
8098 rtx result
= NULL_RTX
;
8105 if (GET_CODE (x
) != CONST
8106 || GET_CODE (XEXP (x
, 0)) != UNSPEC
8107 || XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
8110 return XVECEXP (XEXP (x
, 0), 0, 0);
8113 if (GET_CODE (x
) != PLUS
8114 || GET_CODE (XEXP (x
, 1)) != CONST
)
8117 if (REG_P (XEXP (x
, 0))
8118 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
8119 /* %ebx + GOT/GOTOFF */
8121 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
8123 /* %ebx + %reg * scale + GOT/GOTOFF */
8124 reg_addend
= XEXP (x
, 0);
8125 if (REG_P (XEXP (reg_addend
, 0))
8126 && REGNO (XEXP (reg_addend
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
8127 reg_addend
= XEXP (reg_addend
, 1);
8128 else if (REG_P (XEXP (reg_addend
, 1))
8129 && REGNO (XEXP (reg_addend
, 1)) == PIC_OFFSET_TABLE_REGNUM
)
8130 reg_addend
= XEXP (reg_addend
, 0);
8133 if (!REG_P (reg_addend
)
8134 && GET_CODE (reg_addend
) != MULT
8135 && GET_CODE (reg_addend
) != ASHIFT
)
8141 x
= XEXP (XEXP (x
, 1), 0);
8142 if (GET_CODE (x
) == PLUS
8143 && CONST_INT_P (XEXP (x
, 1)))
8145 const_addend
= XEXP (x
, 1);
8149 if (GET_CODE (x
) == UNSPEC
8150 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
))
8151 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))))
8152 result
= XVECEXP (x
, 0, 0);
8154 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
8156 result
= XEXP (x
, 0);
8162 result
= gen_rtx_PLUS (Pmode
, result
, const_addend
);
8164 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
8168 /* If X is a machine specific address (i.e. a symbol or label being
8169 referenced as a displacement from the GOT implemented using an
8170 UNSPEC), then return the base term. Otherwise return X. */
8173 ix86_find_base_term (rtx x
)
8179 if (GET_CODE (x
) != CONST
)
8182 if (GET_CODE (term
) == PLUS
8183 && (CONST_INT_P (XEXP (term
, 1))
8184 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
8185 term
= XEXP (term
, 0);
8186 if (GET_CODE (term
) != UNSPEC
8187 || XINT (term
, 1) != UNSPEC_GOTPCREL
)
8190 term
= XVECEXP (term
, 0, 0);
8192 if (GET_CODE (term
) != SYMBOL_REF
8193 && GET_CODE (term
) != LABEL_REF
)
8199 term
= ix86_delegitimize_address (x
);
8201 if (GET_CODE (term
) != SYMBOL_REF
8202 && GET_CODE (term
) != LABEL_REF
)
8209 put_condition_code (enum rtx_code code
, enum machine_mode mode
, int reverse
,
8214 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
8216 enum rtx_code second_code
, bypass_code
;
8217 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
8218 gcc_assert (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
);
8219 code
= ix86_fp_compare_code_to_integer (code
);
8223 code
= reverse_condition (code
);
8274 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
8278 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
8279 Those same assemblers have the same but opposite lossage on cmov. */
8281 suffix
= fp
? "nbe" : "a";
8282 else if (mode
== CCCmode
)
8305 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
8327 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
8328 suffix
= fp
? "nb" : "ae";
8331 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
8338 else if (mode
== CCCmode
)
8339 suffix
= fp
? "nb" : "ae";
8344 suffix
= fp
? "u" : "p";
8347 suffix
= fp
? "nu" : "np";
8352 fputs (suffix
, file
);
8355 /* Print the name of register X to FILE based on its machine mode and number.
8356 If CODE is 'w', pretend the mode is HImode.
8357 If CODE is 'b', pretend the mode is QImode.
8358 If CODE is 'k', pretend the mode is SImode.
8359 If CODE is 'q', pretend the mode is DImode.
8360 If CODE is 'h', pretend the reg is the 'high' byte register.
8361 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8364 print_reg (rtx x
, int code
, FILE *file
)
8366 gcc_assert (REGNO (x
) != ARG_POINTER_REGNUM
8367 && REGNO (x
) != FRAME_POINTER_REGNUM
8368 && REGNO (x
) != FLAGS_REG
8369 && REGNO (x
) != FPSR_REG
8370 && REGNO (x
) != FPCR_REG
);
8372 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
8375 if (code
== 'w' || MMX_REG_P (x
))
8377 else if (code
== 'b')
8379 else if (code
== 'k')
8381 else if (code
== 'q')
8383 else if (code
== 'y')
8385 else if (code
== 'h')
8388 code
= GET_MODE_SIZE (GET_MODE (x
));
8390 /* Irritatingly, AMD extended registers use different naming convention
8391 from the normal registers. */
8392 if (REX_INT_REG_P (x
))
8394 gcc_assert (TARGET_64BIT
);
8398 error ("extended registers have no high halves");
8401 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8404 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8407 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8410 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8413 error ("unsupported operand size for extended register");
8421 if (STACK_TOP_P (x
))
8423 fputs ("st(0)", file
);
8430 if (! ANY_FP_REG_P (x
))
8431 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
8436 fputs (hi_reg_name
[REGNO (x
)], file
);
8439 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
8441 fputs (qi_reg_name
[REGNO (x
)], file
);
8444 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
8446 fputs (qi_high_reg_name
[REGNO (x
)], file
);
8453 /* Locate some local-dynamic symbol still in use by this function
8454 so that we can print its name in some tls_local_dynamic_base
8458 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
8462 if (GET_CODE (x
) == SYMBOL_REF
8463 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
8465 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
8473 get_some_local_dynamic_name (void)
8477 if (cfun
->machine
->some_ld_name
)
8478 return cfun
->machine
->some_ld_name
;
8480 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
8482 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
8483 return cfun
->machine
->some_ld_name
;
8489 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8490 C -- print opcode suffix for set/cmov insn.
8491 c -- like C, but print reversed condition
8492 F,f -- likewise, but for floating-point.
8493 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8495 R -- print the prefix for register names.
8496 z -- print the opcode suffix for the size of the current operand.
8497 * -- print a star (in certain assembler syntax)
8498 A -- print an absolute memory reference.
8499 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8500 s -- print a shift double count, followed by the assemblers argument
8502 b -- print the QImode name of the register for the indicated operand.
8503 %b0 would print %al if operands[0] is reg 0.
8504 w -- likewise, print the HImode name of the register.
8505 k -- likewise, print the SImode name of the register.
8506 q -- likewise, print the DImode name of the register.
8507 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8508 y -- print "st(0)" instead of "st" as a register.
8509 D -- print condition for SSE cmp instruction.
8510 P -- if PIC, print an @PLT suffix.
8511 X -- don't print any sort of PIC '@' suffix for a symbol.
8512 & -- print some in-use local-dynamic symbol name.
8513 H -- print a memory address offset by 8; used for sse high-parts
8514 + -- print a branch hint as 'cs' or 'ds' prefix
8515 ; -- print a semicolon (after prefixes due to bug in older gas).
8519 print_operand (FILE *file
, rtx x
, int code
)
8526 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8531 assemble_name (file
, get_some_local_dynamic_name ());
8535 switch (ASSEMBLER_DIALECT
)
8542 /* Intel syntax. For absolute addresses, registers should not
8543 be surrounded by braces. */
8547 PRINT_OPERAND (file
, x
, 0);
8557 PRINT_OPERAND (file
, x
, 0);
8562 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8567 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8572 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8577 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8582 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8587 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8592 /* 387 opcodes don't get size suffixes if the operands are
8594 if (STACK_REG_P (x
))
8597 /* Likewise if using Intel opcodes. */
8598 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
8601 /* This is the size of op from size of operand. */
8602 switch (GET_MODE_SIZE (GET_MODE (x
)))
8611 #ifdef HAVE_GAS_FILDS_FISTS
8621 if (GET_MODE (x
) == SFmode
)
8636 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
8638 #ifdef GAS_MNEMONICS
8664 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
8666 PRINT_OPERAND (file
, x
, 0);
8672 /* Little bit of braindamage here. The SSE compare instructions
8673 does use completely different names for the comparisons that the
8674 fp conditional moves. */
8675 switch (GET_CODE (x
))
8690 fputs ("unord", file
);
8694 fputs ("neq", file
);
8698 fputs ("nlt", file
);
8702 fputs ("nle", file
);
8705 fputs ("ord", file
);
8712 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8713 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8715 switch (GET_MODE (x
))
8717 case HImode
: putc ('w', file
); break;
8719 case SFmode
: putc ('l', file
); break;
8721 case DFmode
: putc ('q', file
); break;
8722 default: gcc_unreachable ();
8729 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
8732 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8733 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8736 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
8739 /* Like above, but reverse condition */
8741 /* Check to see if argument to %c is really a constant
8742 and not a condition code which needs to be reversed. */
8743 if (!COMPARISON_P (x
))
8745 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8748 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
8751 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8752 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8755 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
8759 /* It doesn't actually matter what mode we use here, as we're
8760 only going to use this for printing. */
8761 x
= adjust_address_nv (x
, DImode
, 8);
8768 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
8771 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
8774 int pred_val
= INTVAL (XEXP (x
, 0));
8776 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
8777 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
8779 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
8780 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
8782 /* Emit hints only in the case default branch prediction
8783 heuristics would fail. */
8784 if (taken
!= cputaken
)
8786 /* We use 3e (DS) prefix for taken branches and
8787 2e (CS) prefix for not taken branches. */
8789 fputs ("ds ; ", file
);
8791 fputs ("cs ; ", file
);
8800 fputs (" ; ", file
);
8807 output_operand_lossage ("invalid operand code '%c'", code
);
8812 print_reg (x
, code
, file
);
8816 /* No `byte ptr' prefix for call instructions. */
8817 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
8820 switch (GET_MODE_SIZE (GET_MODE (x
)))
8822 case 1: size
= "BYTE"; break;
8823 case 2: size
= "WORD"; break;
8824 case 4: size
= "DWORD"; break;
8825 case 8: size
= "QWORD"; break;
8826 case 12: size
= "XWORD"; break;
8827 case 16: size
= "XMMWORD"; break;
8832 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8835 else if (code
== 'w')
8837 else if (code
== 'k')
8841 fputs (" PTR ", file
);
8845 /* Avoid (%rip) for call operands. */
8846 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
8847 && !CONST_INT_P (x
))
8848 output_addr_const (file
, x
);
8849 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
8850 output_operand_lossage ("invalid constraints for operand");
8855 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
8860 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8861 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
8863 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8865 fprintf (file
, "0x%08lx", l
);
8868 /* These float cases don't actually occur as immediate operands. */
8869 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
8873 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
8874 fprintf (file
, "%s", dstr
);
8877 else if (GET_CODE (x
) == CONST_DOUBLE
8878 && GET_MODE (x
) == XFmode
)
8882 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
8883 fprintf (file
, "%s", dstr
);
8888 /* We have patterns that allow zero sets of memory, for instance.
8889 In 64-bit mode, we should probably support all 8-byte vectors,
8890 since we can in fact encode that into an immediate. */
8891 if (GET_CODE (x
) == CONST_VECTOR
)
8893 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
8899 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
8901 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8904 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
8905 || GET_CODE (x
) == LABEL_REF
)
8907 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8910 fputs ("OFFSET FLAT:", file
);
8913 if (CONST_INT_P (x
))
8914 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
8916 output_pic_addr_const (file
, x
, code
);
8918 output_addr_const (file
, x
);
8922 /* Print a memory operand whose address is ADDR. */
8925 print_operand_address (FILE *file
, rtx addr
)
8927 struct ix86_address parts
;
8928 rtx base
, index
, disp
;
8930 int ok
= ix86_decompose_address (addr
, &parts
);
8935 index
= parts
.index
;
8937 scale
= parts
.scale
;
8945 if (USER_LABEL_PREFIX
[0] == 0)
8947 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
8953 if (!base
&& !index
)
8955 /* Displacement only requires special attention. */
8957 if (CONST_INT_P (disp
))
8959 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
8961 if (USER_LABEL_PREFIX
[0] == 0)
8963 fputs ("ds:", file
);
8965 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
8968 output_pic_addr_const (file
, disp
, 0);
8970 output_addr_const (file
, disp
);
8972 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8975 if (GET_CODE (disp
) == CONST
8976 && GET_CODE (XEXP (disp
, 0)) == PLUS
8977 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
8978 disp
= XEXP (XEXP (disp
, 0), 0);
8979 if (GET_CODE (disp
) == LABEL_REF
8980 || (GET_CODE (disp
) == SYMBOL_REF
8981 && SYMBOL_REF_TLS_MODEL (disp
) == 0))
8982 fputs ("(%rip)", file
);
8987 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8992 output_pic_addr_const (file
, disp
, 0);
8993 else if (GET_CODE (disp
) == LABEL_REF
)
8994 output_asm_label (disp
);
8996 output_addr_const (file
, disp
);
9001 print_reg (base
, 0, file
);
9005 print_reg (index
, 0, file
);
9007 fprintf (file
, ",%d", scale
);
9013 rtx offset
= NULL_RTX
;
9017 /* Pull out the offset of a symbol; print any symbol itself. */
9018 if (GET_CODE (disp
) == CONST
9019 && GET_CODE (XEXP (disp
, 0)) == PLUS
9020 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
9022 offset
= XEXP (XEXP (disp
, 0), 1);
9023 disp
= gen_rtx_CONST (VOIDmode
,
9024 XEXP (XEXP (disp
, 0), 0));
9028 output_pic_addr_const (file
, disp
, 0);
9029 else if (GET_CODE (disp
) == LABEL_REF
)
9030 output_asm_label (disp
);
9031 else if (CONST_INT_P (disp
))
9034 output_addr_const (file
, disp
);
9040 print_reg (base
, 0, file
);
9043 if (INTVAL (offset
) >= 0)
9045 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
9049 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
9056 print_reg (index
, 0, file
);
9058 fprintf (file
, "*%d", scale
);
9066 output_addr_const_extra (FILE *file
, rtx x
)
9070 if (GET_CODE (x
) != UNSPEC
)
9073 op
= XVECEXP (x
, 0, 0);
9074 switch (XINT (x
, 1))
9076 case UNSPEC_GOTTPOFF
:
9077 output_addr_const (file
, op
);
9078 /* FIXME: This might be @TPOFF in Sun ld. */
9079 fputs ("@GOTTPOFF", file
);
9082 output_addr_const (file
, op
);
9083 fputs ("@TPOFF", file
);
9086 output_addr_const (file
, op
);
9088 fputs ("@TPOFF", file
);
9090 fputs ("@NTPOFF", file
);
9093 output_addr_const (file
, op
);
9094 fputs ("@DTPOFF", file
);
9096 case UNSPEC_GOTNTPOFF
:
9097 output_addr_const (file
, op
);
9099 fputs ("@GOTTPOFF(%rip)", file
);
9101 fputs ("@GOTNTPOFF", file
);
9103 case UNSPEC_INDNTPOFF
:
9104 output_addr_const (file
, op
);
9105 fputs ("@INDNTPOFF", file
);
9115 /* Split one or more DImode RTL references into pairs of SImode
9116 references. The RTL can be REG, offsettable MEM, integer constant, or
9117 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9118 split and "num" is its length. lo_half and hi_half are output arrays
9119 that parallel "operands". */
9122 split_di (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
9126 rtx op
= operands
[num
];
9128 /* simplify_subreg refuse to split volatile memory addresses,
9129 but we still have to handle it. */
9132 lo_half
[num
] = adjust_address (op
, SImode
, 0);
9133 hi_half
[num
] = adjust_address (op
, SImode
, 4);
9137 lo_half
[num
] = simplify_gen_subreg (SImode
, op
,
9138 GET_MODE (op
) == VOIDmode
9139 ? DImode
: GET_MODE (op
), 0);
9140 hi_half
[num
] = simplify_gen_subreg (SImode
, op
,
9141 GET_MODE (op
) == VOIDmode
9142 ? DImode
: GET_MODE (op
), 4);
9146 /* Split one or more TImode RTL references into pairs of DImode
9147 references. The RTL can be REG, offsettable MEM, integer constant, or
9148 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9149 split and "num" is its length. lo_half and hi_half are output arrays
9150 that parallel "operands". */
9153 split_ti (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
9157 rtx op
= operands
[num
];
9159 /* simplify_subreg refuse to split volatile memory addresses, but we
9160 still have to handle it. */
9163 lo_half
[num
] = adjust_address (op
, DImode
, 0);
9164 hi_half
[num
] = adjust_address (op
, DImode
, 8);
9168 lo_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 0);
9169 hi_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 8);
9174 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
9175 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
9176 is the expression of the binary operation. The output may either be
9177 emitted here, or returned to the caller, like all output_* functions.
9179 There is no guarantee that the operands are the same mode, as they
9180 might be within FLOAT or FLOAT_EXTEND expressions. */
9182 #ifndef SYSV386_COMPAT
9183 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
9184 wants to fix the assemblers because that causes incompatibility
9185 with gcc. No-one wants to fix gcc because that causes
9186 incompatibility with assemblers... You can use the option of
9187 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
9188 #define SYSV386_COMPAT 1
9192 output_387_binary_op (rtx insn
, rtx
*operands
)
9194 static char buf
[30];
9197 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
9199 #ifdef ENABLE_CHECKING
9200 /* Even if we do not want to check the inputs, this documents input
9201 constraints. Which helps in understanding the following code. */
9202 if (STACK_REG_P (operands
[0])
9203 && ((REG_P (operands
[1])
9204 && REGNO (operands
[0]) == REGNO (operands
[1])
9205 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
9206 || (REG_P (operands
[2])
9207 && REGNO (operands
[0]) == REGNO (operands
[2])
9208 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
9209 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
9212 gcc_assert (is_sse
);
9215 switch (GET_CODE (operands
[3]))
9218 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
9219 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
9227 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
9228 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
9236 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
9237 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
9245 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
9246 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
9260 if (GET_MODE (operands
[0]) == SFmode
)
9261 strcat (buf
, "ss\t{%2, %0|%0, %2}");
9263 strcat (buf
, "sd\t{%2, %0|%0, %2}");
9268 switch (GET_CODE (operands
[3]))
9272 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
9274 rtx temp
= operands
[2];
9275 operands
[2] = operands
[1];
9279 /* know operands[0] == operands[1]. */
9281 if (MEM_P (operands
[2]))
9287 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
9289 if (STACK_TOP_P (operands
[0]))
9290 /* How is it that we are storing to a dead operand[2]?
9291 Well, presumably operands[1] is dead too. We can't
9292 store the result to st(0) as st(0) gets popped on this
9293 instruction. Instead store to operands[2] (which I
9294 think has to be st(1)). st(1) will be popped later.
9295 gcc <= 2.8.1 didn't have this check and generated
9296 assembly code that the Unixware assembler rejected. */
9297 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9299 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9303 if (STACK_TOP_P (operands
[0]))
9304 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9306 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9311 if (MEM_P (operands
[1]))
9317 if (MEM_P (operands
[2]))
9323 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
9326 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9327 derived assemblers, confusingly reverse the direction of
9328 the operation for fsub{r} and fdiv{r} when the
9329 destination register is not st(0). The Intel assembler
9330 doesn't have this brain damage. Read !SYSV386_COMPAT to
9331 figure out what the hardware really does. */
9332 if (STACK_TOP_P (operands
[0]))
9333 p
= "{p\t%0, %2|rp\t%2, %0}";
9335 p
= "{rp\t%2, %0|p\t%0, %2}";
9337 if (STACK_TOP_P (operands
[0]))
9338 /* As above for fmul/fadd, we can't store to st(0). */
9339 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9341 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9346 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
9349 if (STACK_TOP_P (operands
[0]))
9350 p
= "{rp\t%0, %1|p\t%1, %0}";
9352 p
= "{p\t%1, %0|rp\t%0, %1}";
9354 if (STACK_TOP_P (operands
[0]))
9355 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9357 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9362 if (STACK_TOP_P (operands
[0]))
9364 if (STACK_TOP_P (operands
[1]))
9365 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9367 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9370 else if (STACK_TOP_P (operands
[1]))
9373 p
= "{\t%1, %0|r\t%0, %1}";
9375 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9381 p
= "{r\t%2, %0|\t%0, %2}";
9383 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9396 /* Return needed mode for entity in optimize_mode_switching pass. */
9399 ix86_mode_needed (int entity
, rtx insn
)
9401 enum attr_i387_cw mode
;
9403 /* The mode UNINITIALIZED is used to store control word after a
9404 function call or ASM pattern. The mode ANY specify that function
9405 has no requirements on the control word and make no changes in the
9406 bits we are interested in. */
9409 || (NONJUMP_INSN_P (insn
)
9410 && (asm_noperands (PATTERN (insn
)) >= 0
9411 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
9412 return I387_CW_UNINITIALIZED
;
9414 if (recog_memoized (insn
) < 0)
9417 mode
= get_attr_i387_cw (insn
);
9422 if (mode
== I387_CW_TRUNC
)
9427 if (mode
== I387_CW_FLOOR
)
9432 if (mode
== I387_CW_CEIL
)
9437 if (mode
== I387_CW_MASK_PM
)
9448 /* Output code to initialize control word copies used by trunc?f?i and
9449 rounding patterns. CURRENT_MODE is set to current control word,
9450 while NEW_MODE is set to new control word. */
9453 emit_i387_cw_initialization (int mode
)
9455 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
9458 enum ix86_stack_slot slot
;
9460 rtx reg
= gen_reg_rtx (HImode
);
9462 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
9463 emit_move_insn (reg
, copy_rtx (stored_mode
));
9465 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
|| optimize_size
)
9470 /* round toward zero (truncate) */
9471 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
9472 slot
= SLOT_CW_TRUNC
;
9476 /* round down toward -oo */
9477 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
9478 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
9479 slot
= SLOT_CW_FLOOR
;
9483 /* round up toward +oo */
9484 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
9485 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
9486 slot
= SLOT_CW_CEIL
;
9489 case I387_CW_MASK_PM
:
9490 /* mask precision exception for nearbyint() */
9491 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
9492 slot
= SLOT_CW_MASK_PM
;
9504 /* round toward zero (truncate) */
9505 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
9506 slot
= SLOT_CW_TRUNC
;
9510 /* round down toward -oo */
9511 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
9512 slot
= SLOT_CW_FLOOR
;
9516 /* round up toward +oo */
9517 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
9518 slot
= SLOT_CW_CEIL
;
9521 case I387_CW_MASK_PM
:
9522 /* mask precision exception for nearbyint() */
9523 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
9524 slot
= SLOT_CW_MASK_PM
;
9532 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
9534 new_mode
= assign_386_stack_local (HImode
, slot
);
9535 emit_move_insn (new_mode
, reg
);
9538 /* Output code for INSN to convert a float to a signed int. OPERANDS
9539 are the insn operands. The output may be [HSD]Imode and the input
9540 operand may be [SDX]Fmode. */
9543 output_fix_trunc (rtx insn
, rtx
*operands
, int fisttp
)
9545 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
9546 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
9547 int round_mode
= get_attr_i387_cw (insn
);
9549 /* Jump through a hoop or two for DImode, since the hardware has no
9550 non-popping instruction. We used to do this a different way, but
9551 that was somewhat fragile and broke with post-reload splitters. */
9552 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
9553 output_asm_insn ("fld\t%y1", operands
);
9555 gcc_assert (STACK_TOP_P (operands
[1]));
9556 gcc_assert (MEM_P (operands
[0]));
9557 gcc_assert (GET_MODE (operands
[1]) != TFmode
);
9560 output_asm_insn ("fisttp%z0\t%0", operands
);
9563 if (round_mode
!= I387_CW_ANY
)
9564 output_asm_insn ("fldcw\t%3", operands
);
9565 if (stack_top_dies
|| dimode_p
)
9566 output_asm_insn ("fistp%z0\t%0", operands
);
9568 output_asm_insn ("fist%z0\t%0", operands
);
9569 if (round_mode
!= I387_CW_ANY
)
9570 output_asm_insn ("fldcw\t%2", operands
);
9576 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9577 have the values zero or one, indicates the ffreep insn's operand
9578 from the OPERANDS array. */
9581 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
9583 if (TARGET_USE_FFREEP
)
9584 #if HAVE_AS_IX86_FFREEP
9585 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
9588 static char retval
[] = ".word\t0xc_df";
9589 int regno
= REGNO (operands
[opno
]);
9591 gcc_assert (FP_REGNO_P (regno
));
9593 retval
[9] = '0' + (regno
- FIRST_STACK_REG
);
9598 return opno
? "fstp\t%y1" : "fstp\t%y0";
9602 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9603 should be used. UNORDERED_P is true when fucom should be used. */
9606 output_fp_compare (rtx insn
, rtx
*operands
, int eflags_p
, int unordered_p
)
9609 rtx cmp_op0
, cmp_op1
;
9610 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
9614 cmp_op0
= operands
[0];
9615 cmp_op1
= operands
[1];
9619 cmp_op0
= operands
[1];
9620 cmp_op1
= operands
[2];
9625 if (GET_MODE (operands
[0]) == SFmode
)
9627 return "ucomiss\t{%1, %0|%0, %1}";
9629 return "comiss\t{%1, %0|%0, %1}";
9632 return "ucomisd\t{%1, %0|%0, %1}";
9634 return "comisd\t{%1, %0|%0, %1}";
9637 gcc_assert (STACK_TOP_P (cmp_op0
));
9639 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
9641 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
9645 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
9646 return output_387_ffreep (operands
, 1);
9649 return "ftst\n\tfnstsw\t%0";
9652 if (STACK_REG_P (cmp_op1
)
9654 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
9655 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
9657 /* If both the top of the 387 stack dies, and the other operand
9658 is also a stack register that dies, then this must be a
9659 `fcompp' float compare */
9663 /* There is no double popping fcomi variant. Fortunately,
9664 eflags is immune from the fstp's cc clobbering. */
9666 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
9668 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
9669 return output_387_ffreep (operands
, 0);
9674 return "fucompp\n\tfnstsw\t%0";
9676 return "fcompp\n\tfnstsw\t%0";
9681 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9683 static const char * const alt
[16] =
9685 "fcom%z2\t%y2\n\tfnstsw\t%0",
9686 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9687 "fucom%z2\t%y2\n\tfnstsw\t%0",
9688 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9690 "ficom%z2\t%y2\n\tfnstsw\t%0",
9691 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9695 "fcomi\t{%y1, %0|%0, %y1}",
9696 "fcomip\t{%y1, %0|%0, %y1}",
9697 "fucomi\t{%y1, %0|%0, %y1}",
9698 "fucomip\t{%y1, %0|%0, %y1}",
9709 mask
= eflags_p
<< 3;
9710 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
9711 mask
|= unordered_p
<< 1;
9712 mask
|= stack_top_dies
;
9714 gcc_assert (mask
< 16);
9723 ix86_output_addr_vec_elt (FILE *file
, int value
)
9725 const char *directive
= ASM_LONG
;
9729 directive
= ASM_QUAD
;
9731 gcc_assert (!TARGET_64BIT
);
9734 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
9738 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
9740 const char *directive
= ASM_LONG
;
9743 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
9744 directive
= ASM_QUAD
;
9746 gcc_assert (!TARGET_64BIT
);
9748 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
9749 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
9750 fprintf (file
, "%s%s%d-%s%d\n",
9751 directive
, LPREFIX
, value
, LPREFIX
, rel
);
9752 else if (HAVE_AS_GOTOFF_IN_DATA
)
9753 fprintf (file
, "%s%s%d@GOTOFF\n", ASM_LONG
, LPREFIX
, value
);
9755 else if (TARGET_MACHO
)
9757 fprintf (file
, "%s%s%d-", ASM_LONG
, LPREFIX
, value
);
9758 machopic_output_function_base_name (file
);
9759 fprintf(file
, "\n");
9763 asm_fprintf (file
, "%s%U%s+[.-%s%d]\n",
9764 ASM_LONG
, GOT_SYMBOL_NAME
, LPREFIX
, value
);
9767 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9771 ix86_expand_clear (rtx dest
)
9775 /* We play register width games, which are only valid after reload. */
9776 gcc_assert (reload_completed
);
9778 /* Avoid HImode and its attendant prefix byte. */
9779 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
9780 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
9781 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
9783 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9784 if (reload_completed
&& (!TARGET_USE_MOV0
|| optimize_size
))
9786 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
9787 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
9793 /* X is an unchanging MEM. If it is a constant pool reference, return
9794 the constant pool rtx, else NULL. */
9797 maybe_get_pool_constant (rtx x
)
9799 x
= ix86_delegitimize_address (XEXP (x
, 0));
9801 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
9802 return get_pool_constant (x
);
9808 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
9810 int strict
= (reload_in_progress
|| reload_completed
);
9812 enum tls_model model
;
9817 if (GET_CODE (op1
) == SYMBOL_REF
)
9819 model
= SYMBOL_REF_TLS_MODEL (op1
);
9822 op1
= legitimize_tls_address (op1
, model
, true);
9823 op1
= force_operand (op1
, op0
);
9827 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9828 && SYMBOL_REF_DLLIMPORT_P (op1
))
9829 op1
= legitimize_dllimport_symbol (op1
, false);
9831 else if (GET_CODE (op1
) == CONST
9832 && GET_CODE (XEXP (op1
, 0)) == PLUS
9833 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
9835 rtx addend
= XEXP (XEXP (op1
, 0), 1);
9836 rtx symbol
= XEXP (XEXP (op1
, 0), 0);
9839 model
= SYMBOL_REF_TLS_MODEL (symbol
);
9841 tmp
= legitimize_tls_address (symbol
, model
, true);
9842 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9843 && SYMBOL_REF_DLLIMPORT_P (symbol
))
9844 tmp
= legitimize_dllimport_symbol (symbol
, true);
9848 tmp
= force_operand (tmp
, NULL
);
9849 tmp
= expand_simple_binop (Pmode
, PLUS
, tmp
, addend
,
9850 op0
, 1, OPTAB_DIRECT
);
9856 if (flag_pic
&& mode
== Pmode
&& symbolic_operand (op1
, Pmode
))
9858 if (TARGET_MACHO
&& !TARGET_64BIT
)
9863 rtx temp
= ((reload_in_progress
9864 || ((op0
&& REG_P (op0
))
9866 ? op0
: gen_reg_rtx (Pmode
));
9867 op1
= machopic_indirect_data_reference (op1
, temp
);
9868 op1
= machopic_legitimize_pic_address (op1
, mode
,
9869 temp
== op1
? 0 : temp
);
9871 else if (MACHOPIC_INDIRECT
)
9872 op1
= machopic_indirect_data_reference (op1
, 0);
9880 op1
= force_reg (Pmode
, op1
);
9881 else if (!TARGET_64BIT
|| !x86_64_movabs_operand (op1
, Pmode
))
9883 rtx reg
= !can_create_pseudo_p () ? op0
: NULL_RTX
;
9884 op1
= legitimize_pic_address (op1
, reg
);
9893 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
9894 || !push_operand (op0
, mode
))
9896 op1
= force_reg (mode
, op1
);
9898 if (push_operand (op0
, mode
)
9899 && ! general_no_elim_operand (op1
, mode
))
9900 op1
= copy_to_mode_reg (mode
, op1
);
9902 /* Force large constants in 64bit compilation into register
9903 to get them CSEed. */
9904 if (TARGET_64BIT
&& mode
== DImode
9905 && immediate_operand (op1
, mode
)
9906 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
9907 && !register_operand (op0
, mode
)
9908 && optimize
&& !reload_completed
&& !reload_in_progress
)
9909 op1
= copy_to_mode_reg (mode
, op1
);
9911 if (FLOAT_MODE_P (mode
))
9913 /* If we are loading a floating point constant to a register,
9914 force the value to memory now, since we'll get better code
9915 out the back end. */
9919 else if (GET_CODE (op1
) == CONST_DOUBLE
)
9921 op1
= validize_mem (force_const_mem (mode
, op1
));
9922 if (!register_operand (op0
, mode
))
9924 rtx temp
= gen_reg_rtx (mode
);
9925 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
9926 emit_move_insn (op0
, temp
);
9933 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
9937 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
9939 rtx op0
= operands
[0], op1
= operands
[1];
9940 unsigned int align
= GET_MODE_ALIGNMENT (mode
);
9942 /* Force constants other than zero into memory. We do not know how
9943 the instructions used to build constants modify the upper 64 bits
9944 of the register, once we have that information we may be able
9945 to handle some of them more efficiently. */
9946 if ((reload_in_progress
| reload_completed
) == 0
9947 && register_operand (op0
, mode
)
9948 && (CONSTANT_P (op1
)
9949 || (GET_CODE (op1
) == SUBREG
9950 && CONSTANT_P (SUBREG_REG (op1
))))
9951 && standard_sse_constant_p (op1
) <= 0)
9952 op1
= validize_mem (force_const_mem (mode
, op1
));
9954 /* TDmode values are passed as TImode on the stack. Timode values
9955 are moved via xmm registers, and moving them to stack can result in
9956 unaligned memory access. Use ix86_expand_vector_move_misalign()
9957 if memory operand is not aligned correctly. */
9958 if (can_create_pseudo_p ()
9959 && (mode
== TImode
) && !TARGET_64BIT
9960 && ((MEM_P (op0
) && (MEM_ALIGN (op0
) < align
))
9961 || (MEM_P (op1
) && (MEM_ALIGN (op1
) < align
))))
9965 /* ix86_expand_vector_move_misalign() does not like constants ... */
9966 if (CONSTANT_P (op1
)
9967 || (GET_CODE (op1
) == SUBREG
9968 && CONSTANT_P (SUBREG_REG (op1
))))
9969 op1
= validize_mem (force_const_mem (mode
, op1
));
9971 /* ... nor both arguments in memory. */
9972 if (!register_operand (op0
, mode
)
9973 && !register_operand (op1
, mode
))
9974 op1
= force_reg (mode
, op1
);
9976 tmp
[0] = op0
; tmp
[1] = op1
;
9977 ix86_expand_vector_move_misalign (mode
, tmp
);
9981 /* Make operand1 a register if it isn't already. */
9982 if (can_create_pseudo_p ()
9983 && !register_operand (op0
, mode
)
9984 && !register_operand (op1
, mode
))
9986 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
9990 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
9993 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9994 straight to ix86_expand_vector_move. */
9995 /* Code generation for scalar reg-reg moves of single and double precision data:
9996 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
10000 if (x86_sse_partial_reg_dependency == true)
10005 Code generation for scalar loads of double precision data:
10006 if (x86_sse_split_regs == true)
10007 movlpd mem, reg (gas syntax)
10011 Code generation for unaligned packed loads of single precision data
10012 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
10013 if (x86_sse_unaligned_move_optimal)
10016 if (x86_sse_partial_reg_dependency == true)
10028 Code generation for unaligned packed loads of double precision data
10029 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
10030 if (x86_sse_unaligned_move_optimal)
10033 if (x86_sse_split_regs == true)
10046 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
10055 /* If we're optimizing for size, movups is the smallest. */
10058 op0
= gen_lowpart (V4SFmode
, op0
);
10059 op1
= gen_lowpart (V4SFmode
, op1
);
10060 emit_insn (gen_sse_movups (op0
, op1
));
10064 /* ??? If we have typed data, then it would appear that using
10065 movdqu is the only way to get unaligned data loaded with
10067 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
10069 op0
= gen_lowpart (V16QImode
, op0
);
10070 op1
= gen_lowpart (V16QImode
, op1
);
10071 emit_insn (gen_sse2_movdqu (op0
, op1
));
10075 if (TARGET_SSE2
&& mode
== V2DFmode
)
10079 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL
)
10081 op0
= gen_lowpart (V2DFmode
, op0
);
10082 op1
= gen_lowpart (V2DFmode
, op1
);
10083 emit_insn (gen_sse2_movupd (op0
, op1
));
10087 /* When SSE registers are split into halves, we can avoid
10088 writing to the top half twice. */
10089 if (TARGET_SSE_SPLIT_REGS
)
10091 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
10096 /* ??? Not sure about the best option for the Intel chips.
10097 The following would seem to satisfy; the register is
10098 entirely cleared, breaking the dependency chain. We
10099 then store to the upper half, with a dependency depth
10100 of one. A rumor has it that Intel recommends two movsd
10101 followed by an unpacklpd, but this is unconfirmed. And
10102 given that the dependency depth of the unpacklpd would
10103 still be one, I'm not sure why this would be better. */
10104 zero
= CONST0_RTX (V2DFmode
);
10107 m
= adjust_address (op1
, DFmode
, 0);
10108 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
10109 m
= adjust_address (op1
, DFmode
, 8);
10110 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
10114 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL
)
10116 op0
= gen_lowpart (V4SFmode
, op0
);
10117 op1
= gen_lowpart (V4SFmode
, op1
);
10118 emit_insn (gen_sse_movups (op0
, op1
));
10122 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
10123 emit_move_insn (op0
, CONST0_RTX (mode
));
10125 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
10127 if (mode
!= V4SFmode
)
10128 op0
= gen_lowpart (V4SFmode
, op0
);
10129 m
= adjust_address (op1
, V2SFmode
, 0);
10130 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
10131 m
= adjust_address (op1
, V2SFmode
, 8);
10132 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
10135 else if (MEM_P (op0
))
10137 /* If we're optimizing for size, movups is the smallest. */
10140 op0
= gen_lowpart (V4SFmode
, op0
);
10141 op1
= gen_lowpart (V4SFmode
, op1
);
10142 emit_insn (gen_sse_movups (op0
, op1
));
10146 /* ??? Similar to above, only less clear because of quote
10147 typeless stores unquote. */
10148 if (TARGET_SSE2
&& !TARGET_SSE_TYPELESS_STORES
10149 && GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
10151 op0
= gen_lowpart (V16QImode
, op0
);
10152 op1
= gen_lowpart (V16QImode
, op1
);
10153 emit_insn (gen_sse2_movdqu (op0
, op1
));
10157 if (TARGET_SSE2
&& mode
== V2DFmode
)
10159 m
= adjust_address (op0
, DFmode
, 0);
10160 emit_insn (gen_sse2_storelpd (m
, op1
));
10161 m
= adjust_address (op0
, DFmode
, 8);
10162 emit_insn (gen_sse2_storehpd (m
, op1
));
10166 if (mode
!= V4SFmode
)
10167 op1
= gen_lowpart (V4SFmode
, op1
);
10168 m
= adjust_address (op0
, V2SFmode
, 0);
10169 emit_insn (gen_sse_storelps (m
, op1
));
10170 m
= adjust_address (op0
, V2SFmode
, 8);
10171 emit_insn (gen_sse_storehps (m
, op1
));
10175 gcc_unreachable ();
10178 /* Expand a push in MODE. This is some mode for which we do not support
10179 proper push instructions, at least from the registers that we expect
10180 the value to live in. */
10183 ix86_expand_push (enum machine_mode mode
, rtx x
)
10187 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
10188 GEN_INT (-GET_MODE_SIZE (mode
)),
10189 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
10190 if (tmp
!= stack_pointer_rtx
)
10191 emit_move_insn (stack_pointer_rtx
, tmp
);
10193 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
10194 emit_move_insn (tmp
, x
);
10197 /* Helper function of ix86_fixup_binary_operands to canonicalize
10198 operand order. Returns true if the operands should be swapped. */
10201 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
10204 rtx dst
= operands
[0];
10205 rtx src1
= operands
[1];
10206 rtx src2
= operands
[2];
10208 /* If the operation is not commutative, we can't do anything. */
10209 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
10212 /* Highest priority is that src1 should match dst. */
10213 if (rtx_equal_p (dst
, src1
))
10215 if (rtx_equal_p (dst
, src2
))
10218 /* Next highest priority is that immediate constants come second. */
10219 if (immediate_operand (src2
, mode
))
10221 if (immediate_operand (src1
, mode
))
10224 /* Lowest priority is that memory references should come second. */
10234 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
10235 destination to use for the operation. If different from the true
10236 destination in operands[0], a copy operation will be required. */
10239 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
10242 rtx dst
= operands
[0];
10243 rtx src1
= operands
[1];
10244 rtx src2
= operands
[2];
10246 /* Canonicalize operand order. */
10247 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
10254 /* Both source operands cannot be in memory. */
10255 if (MEM_P (src1
) && MEM_P (src2
))
10257 /* Optimization: Only read from memory once. */
10258 if (rtx_equal_p (src1
, src2
))
10260 src2
= force_reg (mode
, src2
);
10264 src2
= force_reg (mode
, src2
);
10267 /* If the destination is memory, and we do not have matching source
10268 operands, do things in registers. */
10269 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
10270 dst
= gen_reg_rtx (mode
);
10272 /* Source 1 cannot be a constant. */
10273 if (CONSTANT_P (src1
))
10274 src1
= force_reg (mode
, src1
);
10276 /* Source 1 cannot be a non-matching memory. */
10277 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
10278 src1
= force_reg (mode
, src1
);
10280 operands
[1] = src1
;
10281 operands
[2] = src2
;
10285 /* Similarly, but assume that the destination has already been
10286 set up properly. */
10289 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
10290 enum machine_mode mode
, rtx operands
[])
10292 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
10293 gcc_assert (dst
== operands
[0]);
10296 /* Attempt to expand a binary operator. Make the expansion closer to the
10297 actual machine, then just general_operand, which will allow 3 separate
10298 memory references (one output, two input) in a single insn. */
10301 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
10304 rtx src1
, src2
, dst
, op
, clob
;
10306 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
10307 src1
= operands
[1];
10308 src2
= operands
[2];
10310 /* Emit the instruction. */
10312 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
10313 if (reload_in_progress
)
10315 /* Reload doesn't know about the flags register, and doesn't know that
10316 it doesn't want to clobber it. We can only do this with PLUS. */
10317 gcc_assert (code
== PLUS
);
10322 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
10323 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
10326 /* Fix up the destination if needed. */
10327 if (dst
!= operands
[0])
10328 emit_move_insn (operands
[0], dst
);
10331 /* Return TRUE or FALSE depending on whether the binary operator meets the
10332 appropriate constraints. */
10335 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
10338 rtx dst
= operands
[0];
10339 rtx src1
= operands
[1];
10340 rtx src2
= operands
[2];
10342 /* Both source operands cannot be in memory. */
10343 if (MEM_P (src1
) && MEM_P (src2
))
10346 /* Canonicalize operand order for commutative operators. */
10347 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
10354 /* If the destination is memory, we must have a matching source operand. */
10355 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
10358 /* Source 1 cannot be a constant. */
10359 if (CONSTANT_P (src1
))
10362 /* Source 1 cannot be a non-matching memory. */
10363 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
10369 /* Attempt to expand a unary operator. Make the expansion closer to the
10370 actual machine, then just general_operand, which will allow 2 separate
10371 memory references (one output, one input) in a single insn. */
10374 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
10377 int matching_memory
;
10378 rtx src
, dst
, op
, clob
;
10383 /* If the destination is memory, and we do not have matching source
10384 operands, do things in registers. */
10385 matching_memory
= 0;
10388 if (rtx_equal_p (dst
, src
))
10389 matching_memory
= 1;
10391 dst
= gen_reg_rtx (mode
);
10394 /* When source operand is memory, destination must match. */
10395 if (MEM_P (src
) && !matching_memory
)
10396 src
= force_reg (mode
, src
);
10398 /* Emit the instruction. */
10400 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
10401 if (reload_in_progress
|| code
== NOT
)
10403 /* Reload doesn't know about the flags register, and doesn't know that
10404 it doesn't want to clobber it. */
10405 gcc_assert (code
== NOT
);
10410 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
10411 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
10414 /* Fix up the destination if needed. */
10415 if (dst
!= operands
[0])
10416 emit_move_insn (operands
[0], dst
);
10419 /* Return TRUE or FALSE depending on whether the unary operator meets the
10420 appropriate constraints. */
10423 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
10424 enum machine_mode mode ATTRIBUTE_UNUSED
,
10425 rtx operands
[2] ATTRIBUTE_UNUSED
)
10427 /* If one of operands is memory, source and destination must match. */
10428 if ((MEM_P (operands
[0])
10429 || MEM_P (operands
[1]))
10430 && ! rtx_equal_p (operands
[0], operands
[1]))
10435 /* Post-reload splitter for converting an SF or DFmode value in an
10436 SSE register into an unsigned SImode. */
10439 ix86_split_convert_uns_si_sse (rtx operands
[])
10441 enum machine_mode vecmode
;
10442 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
10444 large
= operands
[1];
10445 zero_or_two31
= operands
[2];
10446 input
= operands
[3];
10447 two31
= operands
[4];
10448 vecmode
= GET_MODE (large
);
10449 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
10451 /* Load up the value into the low element. We must ensure that the other
10452 elements are valid floats -- zero is the easiest such value. */
10455 if (vecmode
== V4SFmode
)
10456 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
10458 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
10462 input
= gen_rtx_REG (vecmode
, REGNO (input
));
10463 emit_move_insn (value
, CONST0_RTX (vecmode
));
10464 if (vecmode
== V4SFmode
)
10465 emit_insn (gen_sse_movss (value
, value
, input
));
10467 emit_insn (gen_sse2_movsd (value
, value
, input
));
10470 emit_move_insn (large
, two31
);
10471 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
10473 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
10474 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
10476 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
10477 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
10479 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
10480 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
10482 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
10483 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
10485 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
10486 if (vecmode
== V4SFmode
)
10487 emit_insn (gen_sse2_cvttps2dq (x
, value
));
10489 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
10492 emit_insn (gen_xorv4si3 (value
, value
, large
));
10495 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10496 Expects the 64-bit DImode to be supplied in a pair of integral
10497 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10498 -mfpmath=sse, !optimize_size only. */
10501 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
10503 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
10504 rtx int_xmm
, fp_xmm
;
10505 rtx biases
, exponents
;
10508 int_xmm
= gen_reg_rtx (V4SImode
);
10509 if (TARGET_INTER_UNIT_MOVES
)
10510 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
10511 else if (TARGET_SSE_SPLIT_REGS
)
10513 emit_insn (gen_rtx_CLOBBER (VOIDmode
, int_xmm
));
10514 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
10518 x
= gen_reg_rtx (V2DImode
);
10519 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
10520 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
10523 x
= gen_rtx_CONST_VECTOR (V4SImode
,
10524 gen_rtvec (4, GEN_INT (0x43300000UL
),
10525 GEN_INT (0x45300000UL
),
10526 const0_rtx
, const0_rtx
));
10527 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
10529 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10530 emit_insn (gen_sse2_punpckldq (int_xmm
, int_xmm
, exponents
));
10532 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10533 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10534 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10535 (0x1.0p84 + double(fp_value_hi_xmm)).
10536 Note these exponents differ by 32. */
10538 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
10540 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10541 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10542 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
10543 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
10544 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
10545 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
10546 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
10547 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
10548 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
10550 /* Add the upper and lower DFmode values together. */
10552 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
10555 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
10556 emit_insn (gen_sse2_unpckhpd (fp_xmm
, fp_xmm
, fp_xmm
));
10557 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
10560 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
10563 /* Convert an unsigned SImode value into a DFmode. Only currently used
10564 for SSE, but applicable anywhere. */
10567 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
10569 REAL_VALUE_TYPE TWO31r
;
10572 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
10573 NULL
, 1, OPTAB_DIRECT
);
10575 fp
= gen_reg_rtx (DFmode
);
10576 emit_insn (gen_floatsidf2 (fp
, x
));
10578 real_ldexp (&TWO31r
, &dconst1
, 31);
10579 x
= const_double_from_real_value (TWO31r
, DFmode
);
10581 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
10583 emit_move_insn (target
, x
);
10586 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10587 32-bit mode; otherwise we have a direct convert instruction. */
10590 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
10592 REAL_VALUE_TYPE TWO32r
;
10593 rtx fp_lo
, fp_hi
, x
;
10595 fp_lo
= gen_reg_rtx (DFmode
);
10596 fp_hi
= gen_reg_rtx (DFmode
);
10598 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
10600 real_ldexp (&TWO32r
, &dconst1
, 32);
10601 x
= const_double_from_real_value (TWO32r
, DFmode
);
10602 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
10604 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
10606 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
10609 emit_move_insn (target
, x
);
10612 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10613 For x86_32, -mfpmath=sse, !optimize_size only. */
10615 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
10617 REAL_VALUE_TYPE ONE16r
;
10618 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
10620 real_ldexp (&ONE16r
, &dconst1
, 16);
10621 x
= const_double_from_real_value (ONE16r
, SFmode
);
10622 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
10623 NULL
, 0, OPTAB_DIRECT
);
10624 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
10625 NULL
, 0, OPTAB_DIRECT
);
10626 fp_hi
= gen_reg_rtx (SFmode
);
10627 fp_lo
= gen_reg_rtx (SFmode
);
10628 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
10629 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
10630 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
10632 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
10634 if (!rtx_equal_p (target
, fp_hi
))
10635 emit_move_insn (target
, fp_hi
);
10638 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10639 then replicate the value for all elements of the vector
10643 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
10650 v
= gen_rtvec (4, value
, value
, value
, value
);
10651 return gen_rtx_CONST_VECTOR (V4SImode
, v
);
10655 v
= gen_rtvec (2, value
, value
);
10656 return gen_rtx_CONST_VECTOR (V2DImode
, v
);
10660 v
= gen_rtvec (4, value
, value
, value
, value
);
10662 v
= gen_rtvec (4, value
, CONST0_RTX (SFmode
),
10663 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
10664 return gen_rtx_CONST_VECTOR (V4SFmode
, v
);
10668 v
= gen_rtvec (2, value
, value
);
10670 v
= gen_rtvec (2, value
, CONST0_RTX (DFmode
));
10671 return gen_rtx_CONST_VECTOR (V2DFmode
, v
);
10674 gcc_unreachable ();
10678 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
10679 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
10680 for an SSE register. If VECT is true, then replicate the mask for
10681 all elements of the vector register. If INVERT is true, then create
10682 a mask excluding the sign bit. */
10685 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
10687 enum machine_mode vec_mode
, imode
;
10688 HOST_WIDE_INT hi
, lo
;
10693 /* Find the sign bit, sign extended to 2*HWI. */
10699 vec_mode
= (mode
== SImode
) ? V4SImode
: V4SFmode
;
10700 lo
= 0x80000000, hi
= lo
< 0;
10706 vec_mode
= (mode
== DImode
) ? V2DImode
: V2DFmode
;
10707 if (HOST_BITS_PER_WIDE_INT
>= 64)
10708 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
10710 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
10716 vec_mode
= VOIDmode
;
10717 gcc_assert (HOST_BITS_PER_WIDE_INT
>= 64);
10718 lo
= 0, hi
= (HOST_WIDE_INT
)1 << shift
;
10722 gcc_unreachable ();
10726 lo
= ~lo
, hi
= ~hi
;
10728 /* Force this value into the low part of a fp vector constant. */
10729 mask
= immed_double_const (lo
, hi
, imode
);
10730 mask
= gen_lowpart (mode
, mask
);
10732 if (vec_mode
== VOIDmode
)
10733 return force_reg (mode
, mask
);
10735 v
= ix86_build_const_vector (mode
, vect
, mask
);
10736 return force_reg (vec_mode
, v
);
10739 /* Generate code for floating point ABS or NEG. */
10742 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
10745 rtx mask
, set
, use
, clob
, dst
, src
;
10746 bool matching_memory
;
10747 bool use_sse
= false;
10748 bool vector_mode
= VECTOR_MODE_P (mode
);
10749 enum machine_mode elt_mode
= mode
;
10753 elt_mode
= GET_MODE_INNER (mode
);
10756 else if (mode
== TFmode
)
10758 else if (TARGET_SSE_MATH
)
10759 use_sse
= SSE_FLOAT_MODE_P (mode
);
10761 /* NEG and ABS performed with SSE use bitwise mask operations.
10762 Create the appropriate mask now. */
10764 mask
= ix86_build_signbit_mask (elt_mode
, vector_mode
, code
== ABS
);
10771 /* If the destination is memory, and we don't have matching source
10772 operands or we're using the x87, do things in registers. */
10773 matching_memory
= false;
10776 if (use_sse
&& rtx_equal_p (dst
, src
))
10777 matching_memory
= true;
10779 dst
= gen_reg_rtx (mode
);
10781 if (MEM_P (src
) && !matching_memory
)
10782 src
= force_reg (mode
, src
);
10786 set
= gen_rtx_fmt_ee (code
== NEG
? XOR
: AND
, mode
, src
, mask
);
10787 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
10792 set
= gen_rtx_fmt_e (code
, mode
, src
);
10793 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
10796 use
= gen_rtx_USE (VOIDmode
, mask
);
10797 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
10798 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
10799 gen_rtvec (3, set
, use
, clob
)));
10805 if (dst
!= operands
[0])
10806 emit_move_insn (operands
[0], dst
);
10809 /* Expand a copysign operation. Special case operand 0 being a constant. */
10812 ix86_expand_copysign (rtx operands
[])
10814 enum machine_mode mode
, vmode
;
10815 rtx dest
, op0
, op1
, mask
, nmask
;
10817 dest
= operands
[0];
10821 mode
= GET_MODE (dest
);
10822 vmode
= mode
== SFmode
? V4SFmode
: V2DFmode
;
10824 if (GET_CODE (op0
) == CONST_DOUBLE
)
10826 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
);
10828 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
10829 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
10831 if (mode
== SFmode
|| mode
== DFmode
)
10833 if (op0
== CONST0_RTX (mode
))
10834 op0
= CONST0_RTX (vmode
);
10839 if (mode
== SFmode
)
10840 v
= gen_rtvec (4, op0
, CONST0_RTX (SFmode
),
10841 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
10843 v
= gen_rtvec (2, op0
, CONST0_RTX (DFmode
));
10844 op0
= force_reg (vmode
, gen_rtx_CONST_VECTOR (vmode
, v
));
10848 mask
= ix86_build_signbit_mask (mode
, 0, 0);
10850 if (mode
== SFmode
)
10851 copysign_insn
= gen_copysignsf3_const
;
10852 else if (mode
== DFmode
)
10853 copysign_insn
= gen_copysigndf3_const
;
10855 copysign_insn
= gen_copysigntf3_const
;
10857 emit_insn (copysign_insn (dest
, op0
, op1
, mask
));
10861 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
10863 nmask
= ix86_build_signbit_mask (mode
, 0, 1);
10864 mask
= ix86_build_signbit_mask (mode
, 0, 0);
10866 if (mode
== SFmode
)
10867 copysign_insn
= gen_copysignsf3_var
;
10868 else if (mode
== DFmode
)
10869 copysign_insn
= gen_copysigndf3_var
;
10871 copysign_insn
= gen_copysigntf3_var
;
10873 emit_insn (copysign_insn (dest
, NULL_RTX
, op0
, op1
, nmask
, mask
));
10877 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
10878 be a constant, and so has already been expanded into a vector constant. */
10881 ix86_split_copysign_const (rtx operands
[])
10883 enum machine_mode mode
, vmode
;
10884 rtx dest
, op0
, op1
, mask
, x
;
10886 dest
= operands
[0];
10889 mask
= operands
[3];
10891 mode
= GET_MODE (dest
);
10892 vmode
= GET_MODE (mask
);
10894 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
10895 x
= gen_rtx_AND (vmode
, dest
, mask
);
10896 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10898 if (op0
!= CONST0_RTX (vmode
))
10900 x
= gen_rtx_IOR (vmode
, dest
, op0
);
10901 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10905 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
10906 so we have to do two masks. */
10909 ix86_split_copysign_var (rtx operands
[])
10911 enum machine_mode mode
, vmode
;
10912 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
10914 dest
= operands
[0];
10915 scratch
= operands
[1];
10918 nmask
= operands
[4];
10919 mask
= operands
[5];
10921 mode
= GET_MODE (dest
);
10922 vmode
= GET_MODE (mask
);
10924 if (rtx_equal_p (op0
, op1
))
10926 /* Shouldn't happen often (it's useless, obviously), but when it does
10927 we'd generate incorrect code if we continue below. */
10928 emit_move_insn (dest
, op0
);
10932 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
10934 gcc_assert (REGNO (op1
) == REGNO (scratch
));
10936 x
= gen_rtx_AND (vmode
, scratch
, mask
);
10937 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
10940 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
10941 x
= gen_rtx_NOT (vmode
, dest
);
10942 x
= gen_rtx_AND (vmode
, x
, op0
);
10943 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10947 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
10949 x
= gen_rtx_AND (vmode
, scratch
, mask
);
10951 else /* alternative 2,4 */
10953 gcc_assert (REGNO (mask
) == REGNO (scratch
));
10954 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
10955 x
= gen_rtx_AND (vmode
, scratch
, op1
);
10957 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
10959 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
10961 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
10962 x
= gen_rtx_AND (vmode
, dest
, nmask
);
10964 else /* alternative 3,4 */
10966 gcc_assert (REGNO (nmask
) == REGNO (dest
));
10968 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
10969 x
= gen_rtx_AND (vmode
, dest
, op0
);
10971 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10974 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
10975 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10978 /* Return TRUE or FALSE depending on whether the first SET in INSN
10979 has source and destination with matching CC modes, and that the
10980 CC mode is at least as constrained as REQ_MODE. */
10983 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
10986 enum machine_mode set_mode
;
10988 set
= PATTERN (insn
);
10989 if (GET_CODE (set
) == PARALLEL
)
10990 set
= XVECEXP (set
, 0, 0);
10991 gcc_assert (GET_CODE (set
) == SET
);
10992 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
10994 set_mode
= GET_MODE (SET_DEST (set
));
10998 if (req_mode
!= CCNOmode
10999 && (req_mode
!= CCmode
11000 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
11004 if (req_mode
== CCGCmode
)
11008 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
11012 if (req_mode
== CCZmode
)
11019 gcc_unreachable ();
11022 return (GET_MODE (SET_SRC (set
)) == set_mode
);
11025 /* Generate insn patterns to do an integer compare of OPERANDS. */
11028 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
11030 enum machine_mode cmpmode
;
11033 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
11034 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
11036 /* This is very simple, but making the interface the same as in the
11037 FP case makes the rest of the code easier. */
11038 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
11039 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
11041 /* Return the test that should be put into the flags user, i.e.
11042 the bcc, scc, or cmov instruction. */
11043 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
11046 /* Figure out whether to use ordered or unordered fp comparisons.
11047 Return the appropriate mode to use. */
11050 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
11052 /* ??? In order to make all comparisons reversible, we do all comparisons
11053 non-trapping when compiling for IEEE. Once gcc is able to distinguish
11054 all forms trapping and nontrapping comparisons, we can make inequality
11055 comparisons trapping again, since it results in better code when using
11056 FCOM based compares. */
11057 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
11061 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
11063 enum machine_mode mode
= GET_MODE (op0
);
11065 if (SCALAR_FLOAT_MODE_P (mode
))
11067 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
11068 return ix86_fp_compare_mode (code
);
11073 /* Only zero flag is needed. */
11074 case EQ
: /* ZF=0 */
11075 case NE
: /* ZF!=0 */
11077 /* Codes needing carry flag. */
11078 case GEU
: /* CF=0 */
11079 case LTU
: /* CF=1 */
11080 /* Detect overflow checks. They need just the carry flag. */
11081 if (GET_CODE (op0
) == PLUS
11082 && rtx_equal_p (op1
, XEXP (op0
, 0)))
11086 case GTU
: /* CF=0 & ZF=0 */
11087 case LEU
: /* CF=1 | ZF=1 */
11088 /* Detect overflow checks. They need just the carry flag. */
11089 if (GET_CODE (op0
) == MINUS
11090 && rtx_equal_p (op1
, XEXP (op0
, 0)))
11094 /* Codes possibly doable only with sign flag when
11095 comparing against zero. */
11096 case GE
: /* SF=OF or SF=0 */
11097 case LT
: /* SF<>OF or SF=1 */
11098 if (op1
== const0_rtx
)
11101 /* For other cases Carry flag is not required. */
11103 /* Codes doable only with sign flag when comparing
11104 against zero, but we miss jump instruction for it
11105 so we need to use relational tests against overflow
11106 that thus needs to be zero. */
11107 case GT
: /* ZF=0 & SF=OF */
11108 case LE
: /* ZF=1 | SF<>OF */
11109 if (op1
== const0_rtx
)
11113 /* strcmp pattern do (use flags) and combine may ask us for proper
11118 gcc_unreachable ();
11122 /* Return the fixed registers used for condition codes. */
11125 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
11132 /* If two condition code modes are compatible, return a condition code
11133 mode which is compatible with both. Otherwise, return
11136 static enum machine_mode
11137 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
11142 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
11145 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
11146 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
11152 gcc_unreachable ();
11182 /* These are only compatible with themselves, which we already
11188 /* Split comparison code CODE into comparisons we can do using branch
11189 instructions. BYPASS_CODE is comparison code for branch that will
11190 branch around FIRST_CODE and SECOND_CODE. If some of branches
11191 is not required, set value to UNKNOWN.
11192 We never require more than two branches. */
11195 ix86_fp_comparison_codes (enum rtx_code code
, enum rtx_code
*bypass_code
,
11196 enum rtx_code
*first_code
,
11197 enum rtx_code
*second_code
)
11199 *first_code
= code
;
11200 *bypass_code
= UNKNOWN
;
11201 *second_code
= UNKNOWN
;
11203 /* The fcomi comparison sets flags as follows:
11213 case GT
: /* GTU - CF=0 & ZF=0 */
11214 case GE
: /* GEU - CF=0 */
11215 case ORDERED
: /* PF=0 */
11216 case UNORDERED
: /* PF=1 */
11217 case UNEQ
: /* EQ - ZF=1 */
11218 case UNLT
: /* LTU - CF=1 */
11219 case UNLE
: /* LEU - CF=1 | ZF=1 */
11220 case LTGT
: /* EQ - ZF=0 */
11222 case LT
: /* LTU - CF=1 - fails on unordered */
11223 *first_code
= UNLT
;
11224 *bypass_code
= UNORDERED
;
11226 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
11227 *first_code
= UNLE
;
11228 *bypass_code
= UNORDERED
;
11230 case EQ
: /* EQ - ZF=1 - fails on unordered */
11231 *first_code
= UNEQ
;
11232 *bypass_code
= UNORDERED
;
11234 case NE
: /* NE - ZF=0 - fails on unordered */
11235 *first_code
= LTGT
;
11236 *second_code
= UNORDERED
;
11238 case UNGE
: /* GEU - CF=0 - fails on unordered */
11240 *second_code
= UNORDERED
;
11242 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
11244 *second_code
= UNORDERED
;
11247 gcc_unreachable ();
11249 if (!TARGET_IEEE_FP
)
11251 *second_code
= UNKNOWN
;
11252 *bypass_code
= UNKNOWN
;
11256 /* Return cost of comparison done fcom + arithmetics operations on AX.
11257 All following functions do use number of instructions as a cost metrics.
11258 In future this should be tweaked to compute bytes for optimize_size and
11259 take into account performance of various instructions on various CPUs. */
11261 ix86_fp_comparison_arithmetics_cost (enum rtx_code code
)
11263 if (!TARGET_IEEE_FP
)
11265 /* The cost of code output by ix86_expand_fp_compare. */
11289 gcc_unreachable ();
11293 /* Return cost of comparison done using fcomi operation.
11294 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11296 ix86_fp_comparison_fcomi_cost (enum rtx_code code
)
11298 enum rtx_code bypass_code
, first_code
, second_code
;
11299 /* Return arbitrarily high cost when instruction is not supported - this
11300 prevents gcc from using it. */
11303 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11304 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 2;
11307 /* Return cost of comparison done using sahf operation.
11308 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11310 ix86_fp_comparison_sahf_cost (enum rtx_code code
)
11312 enum rtx_code bypass_code
, first_code
, second_code
;
11313 /* Return arbitrarily high cost when instruction is not preferred - this
11314 avoids gcc from using it. */
11315 if (!(TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_size
)))
11317 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11318 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 3;
11321 /* Compute cost of the comparison done using any method.
11322 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11324 ix86_fp_comparison_cost (enum rtx_code code
)
11326 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
11329 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
11330 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
11332 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
11333 if (min
> sahf_cost
)
11335 if (min
> fcomi_cost
)
11340 /* Return true if we should use an FCOMI instruction for this
11344 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED
)
11346 enum rtx_code swapped_code
= swap_condition (code
);
11348 return ((ix86_fp_comparison_cost (code
)
11349 == ix86_fp_comparison_fcomi_cost (code
))
11350 || (ix86_fp_comparison_cost (swapped_code
)
11351 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
11354 /* Swap, force into registers, or otherwise massage the two operands
11355 to a fp comparison. The operands are updated in place; the new
11356 comparison code is returned. */
11358 static enum rtx_code
11359 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
11361 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
11362 rtx op0
= *pop0
, op1
= *pop1
;
11363 enum machine_mode op_mode
= GET_MODE (op0
);
11364 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
11366 /* All of the unordered compare instructions only work on registers.
11367 The same is true of the fcomi compare instructions. The XFmode
11368 compare instructions require registers except when comparing
11369 against zero or when converting operand 1 from fixed point to
11373 && (fpcmp_mode
== CCFPUmode
11374 || (op_mode
== XFmode
11375 && ! (standard_80387_constant_p (op0
) == 1
11376 || standard_80387_constant_p (op1
) == 1)
11377 && GET_CODE (op1
) != FLOAT
)
11378 || ix86_use_fcomi_compare (code
)))
11380 op0
= force_reg (op_mode
, op0
);
11381 op1
= force_reg (op_mode
, op1
);
11385 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
11386 things around if they appear profitable, otherwise force op0
11387 into a register. */
11389 if (standard_80387_constant_p (op0
) == 0
11391 && ! (standard_80387_constant_p (op1
) == 0
11395 tmp
= op0
, op0
= op1
, op1
= tmp
;
11396 code
= swap_condition (code
);
11400 op0
= force_reg (op_mode
, op0
);
11402 if (CONSTANT_P (op1
))
11404 int tmp
= standard_80387_constant_p (op1
);
11406 op1
= validize_mem (force_const_mem (op_mode
, op1
));
11410 op1
= force_reg (op_mode
, op1
);
11413 op1
= force_reg (op_mode
, op1
);
11417 /* Try to rearrange the comparison to make it cheaper. */
11418 if (ix86_fp_comparison_cost (code
)
11419 > ix86_fp_comparison_cost (swap_condition (code
))
11420 && (REG_P (op1
) || can_create_pseudo_p ()))
11423 tmp
= op0
, op0
= op1
, op1
= tmp
;
11424 code
= swap_condition (code
);
11426 op0
= force_reg (op_mode
, op0
);
11434 /* Convert comparison codes we use to represent FP comparison to integer
11435 code that will result in proper branch. Return UNKNOWN if no such code
11439 ix86_fp_compare_code_to_integer (enum rtx_code code
)
11468 /* Generate insn patterns to do a floating point compare of OPERANDS. */
11471 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
,
11472 rtx
*second_test
, rtx
*bypass_test
)
11474 enum machine_mode fpcmp_mode
, intcmp_mode
;
11476 int cost
= ix86_fp_comparison_cost (code
);
11477 enum rtx_code bypass_code
, first_code
, second_code
;
11479 fpcmp_mode
= ix86_fp_compare_mode (code
);
11480 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
11483 *second_test
= NULL_RTX
;
11485 *bypass_test
= NULL_RTX
;
11487 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11489 /* Do fcomi/sahf based test when profitable. */
11490 if (ix86_fp_comparison_arithmetics_cost (code
) > cost
11491 && (bypass_code
== UNKNOWN
|| bypass_test
)
11492 && (second_code
== UNKNOWN
|| second_test
))
11494 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
11495 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
11501 gcc_assert (TARGET_SAHF
);
11504 scratch
= gen_reg_rtx (HImode
);
11505 tmp2
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
11507 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, tmp2
)));
11510 /* The FP codes work out to act like unsigned. */
11511 intcmp_mode
= fpcmp_mode
;
11513 if (bypass_code
!= UNKNOWN
)
11514 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
11515 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
11517 if (second_code
!= UNKNOWN
)
11518 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
11519 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
11524 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11525 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
11526 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
11528 scratch
= gen_reg_rtx (HImode
);
11529 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
11531 /* In the unordered case, we have to check C2 for NaN's, which
11532 doesn't happen to work out to anything nice combination-wise.
11533 So do some bit twiddling on the value we've got in AH to come
11534 up with an appropriate set of condition codes. */
11536 intcmp_mode
= CCNOmode
;
11541 if (code
== GT
|| !TARGET_IEEE_FP
)
11543 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
11548 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11549 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
11550 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
11551 intcmp_mode
= CCmode
;
11557 if (code
== LT
&& TARGET_IEEE_FP
)
11559 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11560 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
11561 intcmp_mode
= CCmode
;
11566 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
11572 if (code
== GE
|| !TARGET_IEEE_FP
)
11574 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
11579 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11580 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
11587 if (code
== LE
&& TARGET_IEEE_FP
)
11589 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11590 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
11591 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
11592 intcmp_mode
= CCmode
;
11597 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
11603 if (code
== EQ
&& TARGET_IEEE_FP
)
11605 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11606 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
11607 intcmp_mode
= CCmode
;
11612 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
11619 if (code
== NE
&& TARGET_IEEE_FP
)
11621 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11622 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
11628 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
11634 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
11638 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
11643 gcc_unreachable ();
11647 /* Return the test that should be put into the flags user, i.e.
11648 the bcc, scc, or cmov instruction. */
11649 return gen_rtx_fmt_ee (code
, VOIDmode
,
11650 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
11655 ix86_expand_compare (enum rtx_code code
, rtx
*second_test
, rtx
*bypass_test
)
11658 op0
= ix86_compare_op0
;
11659 op1
= ix86_compare_op1
;
11662 *second_test
= NULL_RTX
;
11664 *bypass_test
= NULL_RTX
;
11666 if (ix86_compare_emitted
)
11668 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, ix86_compare_emitted
, const0_rtx
);
11669 ix86_compare_emitted
= NULL_RTX
;
11671 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
11673 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0
)));
11674 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
11675 second_test
, bypass_test
);
11678 ret
= ix86_expand_int_compare (code
, op0
, op1
);
11683 /* Return true if the CODE will result in nontrivial jump sequence. */
11685 ix86_fp_jump_nontrivial_p (enum rtx_code code
)
11687 enum rtx_code bypass_code
, first_code
, second_code
;
11690 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11691 return bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
;
11695 ix86_expand_branch (enum rtx_code code
, rtx label
)
11699 /* If we have emitted a compare insn, go straight to simple.
11700 ix86_expand_compare won't emit anything if ix86_compare_emitted
11702 if (ix86_compare_emitted
)
11705 switch (GET_MODE (ix86_compare_op0
))
11711 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
11712 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
11713 gen_rtx_LABEL_REF (VOIDmode
, label
),
11715 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
11724 enum rtx_code bypass_code
, first_code
, second_code
;
11726 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
11727 &ix86_compare_op1
);
11729 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11731 /* Check whether we will use the natural sequence with one jump. If
11732 so, we can expand jump early. Otherwise delay expansion by
11733 creating compound insn to not confuse optimizers. */
11734 if (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
)
11736 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
11737 gen_rtx_LABEL_REF (VOIDmode
, label
),
11738 pc_rtx
, NULL_RTX
, NULL_RTX
);
11742 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
11743 ix86_compare_op0
, ix86_compare_op1
);
11744 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
11745 gen_rtx_LABEL_REF (VOIDmode
, label
),
11747 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
11749 use_fcomi
= ix86_use_fcomi_compare (code
);
11750 vec
= rtvec_alloc (3 + !use_fcomi
);
11751 RTVEC_ELT (vec
, 0) = tmp
;
11753 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, FPSR_REG
));
11755 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, FLAGS_REG
));
11758 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
11760 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
11769 /* Expand DImode branch into multiple compare+branch. */
11771 rtx lo
[2], hi
[2], label2
;
11772 enum rtx_code code1
, code2
, code3
;
11773 enum machine_mode submode
;
11775 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
11777 tmp
= ix86_compare_op0
;
11778 ix86_compare_op0
= ix86_compare_op1
;
11779 ix86_compare_op1
= tmp
;
11780 code
= swap_condition (code
);
11782 if (GET_MODE (ix86_compare_op0
) == DImode
)
11784 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
11785 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
11790 split_ti (&ix86_compare_op0
, 1, lo
+0, hi
+0);
11791 split_ti (&ix86_compare_op1
, 1, lo
+1, hi
+1);
11795 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
11796 avoid two branches. This costs one extra insn, so disable when
11797 optimizing for size. */
11799 if ((code
== EQ
|| code
== NE
)
11801 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
11806 if (hi
[1] != const0_rtx
)
11807 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
11808 NULL_RTX
, 0, OPTAB_WIDEN
);
11811 if (lo
[1] != const0_rtx
)
11812 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
11813 NULL_RTX
, 0, OPTAB_WIDEN
);
11815 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
11816 NULL_RTX
, 0, OPTAB_WIDEN
);
11818 ix86_compare_op0
= tmp
;
11819 ix86_compare_op1
= const0_rtx
;
11820 ix86_expand_branch (code
, label
);
11824 /* Otherwise, if we are doing less-than or greater-or-equal-than,
11825 op1 is a constant and the low word is zero, then we can just
11826 examine the high word. */
11828 if (CONST_INT_P (hi
[1]) && lo
[1] == const0_rtx
)
11831 case LT
: case LTU
: case GE
: case GEU
:
11832 ix86_compare_op0
= hi
[0];
11833 ix86_compare_op1
= hi
[1];
11834 ix86_expand_branch (code
, label
);
11840 /* Otherwise, we need two or three jumps. */
11842 label2
= gen_label_rtx ();
11845 code2
= swap_condition (code
);
11846 code3
= unsigned_condition (code
);
11850 case LT
: case GT
: case LTU
: case GTU
:
11853 case LE
: code1
= LT
; code2
= GT
; break;
11854 case GE
: code1
= GT
; code2
= LT
; break;
11855 case LEU
: code1
= LTU
; code2
= GTU
; break;
11856 case GEU
: code1
= GTU
; code2
= LTU
; break;
11858 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
11859 case NE
: code2
= UNKNOWN
; break;
11862 gcc_unreachable ();
11867 * if (hi(a) < hi(b)) goto true;
11868 * if (hi(a) > hi(b)) goto false;
11869 * if (lo(a) < lo(b)) goto true;
11873 ix86_compare_op0
= hi
[0];
11874 ix86_compare_op1
= hi
[1];
11876 if (code1
!= UNKNOWN
)
11877 ix86_expand_branch (code1
, label
);
11878 if (code2
!= UNKNOWN
)
11879 ix86_expand_branch (code2
, label2
);
11881 ix86_compare_op0
= lo
[0];
11882 ix86_compare_op1
= lo
[1];
11883 ix86_expand_branch (code3
, label
);
11885 if (code2
!= UNKNOWN
)
11886 emit_label (label2
);
11891 gcc_unreachable ();
11895 /* Split branch based on floating point condition. */
11897 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
11898 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
11900 rtx second
, bypass
;
11901 rtx label
= NULL_RTX
;
11903 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
11906 if (target2
!= pc_rtx
)
11909 code
= reverse_condition_maybe_unordered (code
);
11914 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
11915 tmp
, &second
, &bypass
);
11917 /* Remove pushed operand from stack. */
11919 ix86_free_from_memory (GET_MODE (pushed
));
11921 if (split_branch_probability
>= 0)
11923 /* Distribute the probabilities across the jumps.
11924 Assume the BYPASS and SECOND to be always test
11926 probability
= split_branch_probability
;
11928 /* Value of 1 is low enough to make no need for probability
11929 to be updated. Later we may run some experiments and see
11930 if unordered values are more frequent in practice. */
11932 bypass_probability
= 1;
11934 second_probability
= 1;
11936 if (bypass
!= NULL_RTX
)
11938 label
= gen_label_rtx ();
11939 i
= emit_jump_insn (gen_rtx_SET
11941 gen_rtx_IF_THEN_ELSE (VOIDmode
,
11943 gen_rtx_LABEL_REF (VOIDmode
,
11946 if (bypass_probability
>= 0)
11948 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
11949 GEN_INT (bypass_probability
),
11952 i
= emit_jump_insn (gen_rtx_SET
11954 gen_rtx_IF_THEN_ELSE (VOIDmode
,
11955 condition
, target1
, target2
)));
11956 if (probability
>= 0)
11958 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
11959 GEN_INT (probability
),
11961 if (second
!= NULL_RTX
)
11963 i
= emit_jump_insn (gen_rtx_SET
11965 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
11967 if (second_probability
>= 0)
11969 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
11970 GEN_INT (second_probability
),
11973 if (label
!= NULL_RTX
)
11974 emit_label (label
);
11978 ix86_expand_setcc (enum rtx_code code
, rtx dest
)
11980 rtx ret
, tmp
, tmpreg
, equiv
;
11981 rtx second_test
, bypass_test
;
11983 if (GET_MODE (ix86_compare_op0
) == (TARGET_64BIT
? TImode
: DImode
))
11984 return 0; /* FAIL */
11986 gcc_assert (GET_MODE (dest
) == QImode
);
11988 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
11989 PUT_MODE (ret
, QImode
);
11994 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
11995 if (bypass_test
|| second_test
)
11997 rtx test
= second_test
;
11999 rtx tmp2
= gen_reg_rtx (QImode
);
12002 gcc_assert (!second_test
);
12003 test
= bypass_test
;
12005 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
12007 PUT_MODE (test
, QImode
);
12008 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
12011 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
12013 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
12016 /* Attach a REG_EQUAL note describing the comparison result. */
12017 if (ix86_compare_op0
&& ix86_compare_op1
)
12019 equiv
= simplify_gen_relational (code
, QImode
,
12020 GET_MODE (ix86_compare_op0
),
12021 ix86_compare_op0
, ix86_compare_op1
);
12022 set_unique_reg_note (get_last_insn (), REG_EQUAL
, equiv
);
12025 return 1; /* DONE */
12028 /* Expand comparison setting or clearing carry flag. Return true when
12029 successful and set pop for the operation. */
12031 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
12033 enum machine_mode mode
=
12034 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
12036 /* Do not handle DImode compares that go through special path. */
12037 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
12040 if (SCALAR_FLOAT_MODE_P (mode
))
12042 rtx second_test
= NULL
, bypass_test
= NULL
;
12043 rtx compare_op
, compare_seq
;
12045 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
12047 /* Shortcut: following common codes never translate
12048 into carry flag compares. */
12049 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
12050 || code
== ORDERED
|| code
== UNORDERED
)
12053 /* These comparisons require zero flag; swap operands so they won't. */
12054 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
12055 && !TARGET_IEEE_FP
)
12060 code
= swap_condition (code
);
12063 /* Try to expand the comparison and verify that we end up with
12064 carry flag based comparison. This fails to be true only when
12065 we decide to expand comparison using arithmetic that is not
12066 too common scenario. */
12068 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
12069 &second_test
, &bypass_test
);
12070 compare_seq
= get_insns ();
12073 if (second_test
|| bypass_test
)
12076 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
12077 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
12078 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
12080 code
= GET_CODE (compare_op
);
12082 if (code
!= LTU
&& code
!= GEU
)
12085 emit_insn (compare_seq
);
12090 if (!INTEGRAL_MODE_P (mode
))
12099 /* Convert a==0 into (unsigned)a<1. */
12102 if (op1
!= const0_rtx
)
12105 code
= (code
== EQ
? LTU
: GEU
);
12108 /* Convert a>b into b<a or a>=b-1. */
12111 if (CONST_INT_P (op1
))
12113 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
12114 /* Bail out on overflow. We still can swap operands but that
12115 would force loading of the constant into register. */
12116 if (op1
== const0_rtx
12117 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
12119 code
= (code
== GTU
? GEU
: LTU
);
12126 code
= (code
== GTU
? LTU
: GEU
);
12130 /* Convert a>=0 into (unsigned)a<0x80000000. */
12133 if (mode
== DImode
|| op1
!= const0_rtx
)
12135 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
12136 code
= (code
== LT
? GEU
: LTU
);
12140 if (mode
== DImode
|| op1
!= constm1_rtx
)
12142 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
12143 code
= (code
== LE
? GEU
: LTU
);
12149 /* Swapping operands may cause constant to appear as first operand. */
12150 if (!nonimmediate_operand (op0
, VOIDmode
))
12152 if (!can_create_pseudo_p ())
12154 op0
= force_reg (mode
, op0
);
12156 ix86_compare_op0
= op0
;
12157 ix86_compare_op1
= op1
;
12158 *pop
= ix86_expand_compare (code
, NULL
, NULL
);
12159 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
12164 ix86_expand_int_movcc (rtx operands
[])
12166 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
12167 rtx compare_seq
, compare_op
;
12168 rtx second_test
, bypass_test
;
12169 enum machine_mode mode
= GET_MODE (operands
[0]);
12170 bool sign_bit_compare_p
= false;;
12173 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
12174 compare_seq
= get_insns ();
12177 compare_code
= GET_CODE (compare_op
);
12179 if ((ix86_compare_op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
12180 || (ix86_compare_op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
12181 sign_bit_compare_p
= true;
12183 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
12184 HImode insns, we'd be swallowed in word prefix ops. */
12186 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
12187 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
12188 && CONST_INT_P (operands
[2])
12189 && CONST_INT_P (operands
[3]))
12191 rtx out
= operands
[0];
12192 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
12193 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
12194 HOST_WIDE_INT diff
;
12197 /* Sign bit compares are better done using shifts than we do by using
12199 if (sign_bit_compare_p
12200 || ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
12201 ix86_compare_op1
, &compare_op
))
12203 /* Detect overlap between destination and compare sources. */
12206 if (!sign_bit_compare_p
)
12208 bool fpcmp
= false;
12210 compare_code
= GET_CODE (compare_op
);
12212 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
12213 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
12216 compare_code
= ix86_fp_compare_code_to_integer (compare_code
);
12219 /* To simplify rest of code, restrict to the GEU case. */
12220 if (compare_code
== LTU
)
12222 HOST_WIDE_INT tmp
= ct
;
12225 compare_code
= reverse_condition (compare_code
);
12226 code
= reverse_condition (code
);
12231 PUT_CODE (compare_op
,
12232 reverse_condition_maybe_unordered
12233 (GET_CODE (compare_op
)));
12235 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
12239 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
12240 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
12241 tmp
= gen_reg_rtx (mode
);
12243 if (mode
== DImode
)
12244 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
, compare_op
));
12246 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
), compare_op
));
12250 if (code
== GT
|| code
== GE
)
12251 code
= reverse_condition (code
);
12254 HOST_WIDE_INT tmp
= ct
;
12259 tmp
= emit_store_flag (tmp
, code
, ix86_compare_op0
,
12260 ix86_compare_op1
, VOIDmode
, 0, -1);
12273 tmp
= expand_simple_binop (mode
, PLUS
,
12275 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
12286 tmp
= expand_simple_binop (mode
, IOR
,
12288 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
12290 else if (diff
== -1 && ct
)
12300 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
12302 tmp
= expand_simple_binop (mode
, PLUS
,
12303 copy_rtx (tmp
), GEN_INT (cf
),
12304 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
12312 * andl cf - ct, dest
12322 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
12325 tmp
= expand_simple_binop (mode
, AND
,
12327 gen_int_mode (cf
- ct
, mode
),
12328 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
12330 tmp
= expand_simple_binop (mode
, PLUS
,
12331 copy_rtx (tmp
), GEN_INT (ct
),
12332 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
12335 if (!rtx_equal_p (tmp
, out
))
12336 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
12338 return 1; /* DONE */
12343 enum machine_mode cmp_mode
= GET_MODE (ix86_compare_op0
);
12346 tmp
= ct
, ct
= cf
, cf
= tmp
;
12349 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
12351 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
12353 /* We may be reversing unordered compare to normal compare, that
12354 is not valid in general (we may convert non-trapping condition
12355 to trapping one), however on i386 we currently emit all
12356 comparisons unordered. */
12357 compare_code
= reverse_condition_maybe_unordered (compare_code
);
12358 code
= reverse_condition_maybe_unordered (code
);
12362 compare_code
= reverse_condition (compare_code
);
12363 code
= reverse_condition (code
);
12367 compare_code
= UNKNOWN
;
12368 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_INT
12369 && CONST_INT_P (ix86_compare_op1
))
12371 if (ix86_compare_op1
== const0_rtx
12372 && (code
== LT
|| code
== GE
))
12373 compare_code
= code
;
12374 else if (ix86_compare_op1
== constm1_rtx
)
12378 else if (code
== GT
)
12383 /* Optimize dest = (op0 < 0) ? -1 : cf. */
12384 if (compare_code
!= UNKNOWN
12385 && GET_MODE (ix86_compare_op0
) == GET_MODE (out
)
12386 && (cf
== -1 || ct
== -1))
12388 /* If lea code below could be used, only optimize
12389 if it results in a 2 insn sequence. */
12391 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
12392 || diff
== 3 || diff
== 5 || diff
== 9)
12393 || (compare_code
== LT
&& ct
== -1)
12394 || (compare_code
== GE
&& cf
== -1))
12397 * notl op1 (if necessary)
12405 code
= reverse_condition (code
);
12408 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
12409 ix86_compare_op1
, VOIDmode
, 0, -1);
12411 out
= expand_simple_binop (mode
, IOR
,
12413 out
, 1, OPTAB_DIRECT
);
12414 if (out
!= operands
[0])
12415 emit_move_insn (operands
[0], out
);
12417 return 1; /* DONE */
12422 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
12423 || diff
== 3 || diff
== 5 || diff
== 9)
12424 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
12426 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
12432 * lea cf(dest*(ct-cf)),dest
12436 * This also catches the degenerate setcc-only case.
12442 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
12443 ix86_compare_op1
, VOIDmode
, 0, 1);
12446 /* On x86_64 the lea instruction operates on Pmode, so we need
12447 to get arithmetics done in proper mode to match. */
12449 tmp
= copy_rtx (out
);
12453 out1
= copy_rtx (out
);
12454 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
12458 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
12464 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
12467 if (!rtx_equal_p (tmp
, out
))
12470 out
= force_operand (tmp
, copy_rtx (out
));
12472 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
12474 if (!rtx_equal_p (out
, operands
[0]))
12475 emit_move_insn (operands
[0], copy_rtx (out
));
12477 return 1; /* DONE */
12481 * General case: Jumpful:
12482 * xorl dest,dest cmpl op1, op2
12483 * cmpl op1, op2 movl ct, dest
12484 * setcc dest jcc 1f
12485 * decl dest movl cf, dest
12486 * andl (cf-ct),dest 1:
12489 * Size 20. Size 14.
12491 * This is reasonably steep, but branch mispredict costs are
12492 * high on modern cpus, so consider failing only if optimizing
12496 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
12497 && BRANCH_COST
>= 2)
12501 enum machine_mode cmp_mode
= GET_MODE (ix86_compare_op0
);
12506 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
12508 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
12510 /* We may be reversing unordered compare to normal compare,
12511 that is not valid in general (we may convert non-trapping
12512 condition to trapping one), however on i386 we currently
12513 emit all comparisons unordered. */
12514 code
= reverse_condition_maybe_unordered (code
);
12518 code
= reverse_condition (code
);
12519 if (compare_code
!= UNKNOWN
)
12520 compare_code
= reverse_condition (compare_code
);
12524 if (compare_code
!= UNKNOWN
)
12526 /* notl op1 (if needed)
12531 For x < 0 (resp. x <= -1) there will be no notl,
12532 so if possible swap the constants to get rid of the
12534 True/false will be -1/0 while code below (store flag
12535 followed by decrement) is 0/-1, so the constants need
12536 to be exchanged once more. */
12538 if (compare_code
== GE
|| !cf
)
12540 code
= reverse_condition (code
);
12545 HOST_WIDE_INT tmp
= cf
;
12550 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
12551 ix86_compare_op1
, VOIDmode
, 0, -1);
12555 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
12556 ix86_compare_op1
, VOIDmode
, 0, 1);
12558 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), constm1_rtx
,
12559 copy_rtx (out
), 1, OPTAB_DIRECT
);
12562 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
12563 gen_int_mode (cf
- ct
, mode
),
12564 copy_rtx (out
), 1, OPTAB_DIRECT
);
12566 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
12567 copy_rtx (out
), 1, OPTAB_DIRECT
);
12568 if (!rtx_equal_p (out
, operands
[0]))
12569 emit_move_insn (operands
[0], copy_rtx (out
));
12571 return 1; /* DONE */
12575 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
12577 /* Try a few things more with specific constants and a variable. */
12580 rtx var
, orig_out
, out
, tmp
;
12582 if (BRANCH_COST
<= 2)
12583 return 0; /* FAIL */
12585 /* If one of the two operands is an interesting constant, load a
12586 constant with the above and mask it in with a logical operation. */
12588 if (CONST_INT_P (operands
[2]))
12591 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
12592 operands
[3] = constm1_rtx
, op
= and_optab
;
12593 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
12594 operands
[3] = const0_rtx
, op
= ior_optab
;
12596 return 0; /* FAIL */
12598 else if (CONST_INT_P (operands
[3]))
12601 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
12602 operands
[2] = constm1_rtx
, op
= and_optab
;
12603 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
12604 operands
[2] = const0_rtx
, op
= ior_optab
;
12606 return 0; /* FAIL */
12609 return 0; /* FAIL */
12611 orig_out
= operands
[0];
12612 tmp
= gen_reg_rtx (mode
);
12615 /* Recurse to get the constant loaded. */
12616 if (ix86_expand_int_movcc (operands
) == 0)
12617 return 0; /* FAIL */
12619 /* Mask in the interesting variable. */
12620 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
12622 if (!rtx_equal_p (out
, orig_out
))
12623 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
12625 return 1; /* DONE */
12629 * For comparison with above,
12639 if (! nonimmediate_operand (operands
[2], mode
))
12640 operands
[2] = force_reg (mode
, operands
[2]);
12641 if (! nonimmediate_operand (operands
[3], mode
))
12642 operands
[3] = force_reg (mode
, operands
[3]);
12644 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
12646 rtx tmp
= gen_reg_rtx (mode
);
12647 emit_move_insn (tmp
, operands
[3]);
12650 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
12652 rtx tmp
= gen_reg_rtx (mode
);
12653 emit_move_insn (tmp
, operands
[2]);
12657 if (! register_operand (operands
[2], VOIDmode
)
12659 || ! register_operand (operands
[3], VOIDmode
)))
12660 operands
[2] = force_reg (mode
, operands
[2]);
12663 && ! register_operand (operands
[3], VOIDmode
))
12664 operands
[3] = force_reg (mode
, operands
[3]);
12666 emit_insn (compare_seq
);
12667 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12668 gen_rtx_IF_THEN_ELSE (mode
,
12669 compare_op
, operands
[2],
12672 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
12673 gen_rtx_IF_THEN_ELSE (mode
,
12675 copy_rtx (operands
[3]),
12676 copy_rtx (operands
[0]))));
12678 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
12679 gen_rtx_IF_THEN_ELSE (mode
,
12681 copy_rtx (operands
[2]),
12682 copy_rtx (operands
[0]))));
12684 return 1; /* DONE */
12687 /* Swap, force into registers, or otherwise massage the two operands
12688 to an sse comparison with a mask result. Thus we differ a bit from
12689 ix86_prepare_fp_compare_args which expects to produce a flags result.
12691 The DEST operand exists to help determine whether to commute commutative
12692 operators. The POP0/POP1 operands are updated in place. The new
12693 comparison code is returned, or UNKNOWN if not implementable. */
12695 static enum rtx_code
12696 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
12697 rtx
*pop0
, rtx
*pop1
)
12705 /* We have no LTGT as an operator. We could implement it with
12706 NE & ORDERED, but this requires an extra temporary. It's
12707 not clear that it's worth it. */
12714 /* These are supported directly. */
12721 /* For commutative operators, try to canonicalize the destination
12722 operand to be first in the comparison - this helps reload to
12723 avoid extra moves. */
12724 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
12732 /* These are not supported directly. Swap the comparison operands
12733 to transform into something that is supported. */
12737 code
= swap_condition (code
);
12741 gcc_unreachable ();
12747 /* Detect conditional moves that exactly match min/max operational
12748 semantics. Note that this is IEEE safe, as long as we don't
12749 interchange the operands.
12751 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12752 and TRUE if the operation is successful and instructions are emitted. */
12755 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
12756 rtx cmp_op1
, rtx if_true
, rtx if_false
)
12758 enum machine_mode mode
;
12764 else if (code
== UNGE
)
12767 if_true
= if_false
;
12773 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
12775 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
12780 mode
= GET_MODE (dest
);
12782 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
12783 but MODE may be a vector mode and thus not appropriate. */
12784 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
12786 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
12789 if_true
= force_reg (mode
, if_true
);
12790 v
= gen_rtvec (2, if_true
, if_false
);
12791 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
12795 code
= is_min
? SMIN
: SMAX
;
12796 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
12799 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
12803 /* Expand an sse vector comparison. Return the register with the result. */
12806 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
12807 rtx op_true
, rtx op_false
)
12809 enum machine_mode mode
= GET_MODE (dest
);
12812 cmp_op0
= force_reg (mode
, cmp_op0
);
12813 if (!nonimmediate_operand (cmp_op1
, mode
))
12814 cmp_op1
= force_reg (mode
, cmp_op1
);
12817 || reg_overlap_mentioned_p (dest
, op_true
)
12818 || reg_overlap_mentioned_p (dest
, op_false
))
12819 dest
= gen_reg_rtx (mode
);
12821 x
= gen_rtx_fmt_ee (code
, mode
, cmp_op0
, cmp_op1
);
12822 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12827 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
12828 operations. This is used for both scalar and vector conditional moves. */
12831 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
12833 enum machine_mode mode
= GET_MODE (dest
);
12836 if (op_false
== CONST0_RTX (mode
))
12838 op_true
= force_reg (mode
, op_true
);
12839 x
= gen_rtx_AND (mode
, cmp
, op_true
);
12840 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12842 else if (op_true
== CONST0_RTX (mode
))
12844 op_false
= force_reg (mode
, op_false
);
12845 x
= gen_rtx_NOT (mode
, cmp
);
12846 x
= gen_rtx_AND (mode
, x
, op_false
);
12847 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12851 op_true
= force_reg (mode
, op_true
);
12852 op_false
= force_reg (mode
, op_false
);
12854 t2
= gen_reg_rtx (mode
);
12856 t3
= gen_reg_rtx (mode
);
12860 x
= gen_rtx_AND (mode
, op_true
, cmp
);
12861 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
12863 x
= gen_rtx_NOT (mode
, cmp
);
12864 x
= gen_rtx_AND (mode
, x
, op_false
);
12865 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
12867 x
= gen_rtx_IOR (mode
, t3
, t2
);
12868 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12872 /* Expand a floating-point conditional move. Return true if successful. */
12875 ix86_expand_fp_movcc (rtx operands
[])
12877 enum machine_mode mode
= GET_MODE (operands
[0]);
12878 enum rtx_code code
= GET_CODE (operands
[1]);
12879 rtx tmp
, compare_op
, second_test
, bypass_test
;
12881 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
12883 enum machine_mode cmode
;
12885 /* Since we've no cmove for sse registers, don't force bad register
12886 allocation just to gain access to it. Deny movcc when the
12887 comparison mode doesn't match the move mode. */
12888 cmode
= GET_MODE (ix86_compare_op0
);
12889 if (cmode
== VOIDmode
)
12890 cmode
= GET_MODE (ix86_compare_op1
);
12894 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
12896 &ix86_compare_op1
);
12897 if (code
== UNKNOWN
)
12900 if (ix86_expand_sse_fp_minmax (operands
[0], code
, ix86_compare_op0
,
12901 ix86_compare_op1
, operands
[2],
12905 tmp
= ix86_expand_sse_cmp (operands
[0], code
, ix86_compare_op0
,
12906 ix86_compare_op1
, operands
[2], operands
[3]);
12907 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
12911 /* The floating point conditional move instructions don't directly
12912 support conditions resulting from a signed integer comparison. */
12914 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
12916 /* The floating point conditional move instructions don't directly
12917 support signed integer comparisons. */
12919 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
12921 gcc_assert (!second_test
&& !bypass_test
);
12922 tmp
= gen_reg_rtx (QImode
);
12923 ix86_expand_setcc (code
, tmp
);
12925 ix86_compare_op0
= tmp
;
12926 ix86_compare_op1
= const0_rtx
;
12927 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
12929 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
12931 tmp
= gen_reg_rtx (mode
);
12932 emit_move_insn (tmp
, operands
[3]);
12935 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
12937 tmp
= gen_reg_rtx (mode
);
12938 emit_move_insn (tmp
, operands
[2]);
12942 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12943 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
12944 operands
[2], operands
[3])));
12946 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12947 gen_rtx_IF_THEN_ELSE (mode
, bypass_test
,
12948 operands
[3], operands
[0])));
12950 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12951 gen_rtx_IF_THEN_ELSE (mode
, second_test
,
12952 operands
[2], operands
[0])));
12957 /* Expand a floating-point vector conditional move; a vcond operation
12958 rather than a movcc operation. */
12961 ix86_expand_fp_vcond (rtx operands
[])
12963 enum rtx_code code
= GET_CODE (operands
[3]);
12966 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
12967 &operands
[4], &operands
[5]);
12968 if (code
== UNKNOWN
)
12971 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
12972 operands
[5], operands
[1], operands
[2]))
12975 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
12976 operands
[1], operands
[2]);
12977 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
12981 /* Expand a signed/unsigned integral vector conditional move. */
12984 ix86_expand_int_vcond (rtx operands
[])
12986 enum machine_mode mode
= GET_MODE (operands
[0]);
12987 enum rtx_code code
= GET_CODE (operands
[3]);
12988 bool negate
= false;
12991 cop0
= operands
[4];
12992 cop1
= operands
[5];
12994 /* Canonicalize the comparison to EQ, GT, GTU. */
13005 code
= reverse_condition (code
);
13011 code
= reverse_condition (code
);
13017 code
= swap_condition (code
);
13018 x
= cop0
, cop0
= cop1
, cop1
= x
;
13022 gcc_unreachable ();
13025 /* Only SSE4.1/SSE4.2 supports V2DImode. */
13026 if (mode
== V2DImode
)
13031 /* SSE4.1 supports EQ. */
13032 if (!TARGET_SSE4_1
)
13038 /* SSE4.2 supports GT/GTU. */
13039 if (!TARGET_SSE4_2
)
13044 gcc_unreachable ();
13048 /* Unsigned parallel compare is not supported by the hardware. Play some
13049 tricks to turn this into a signed comparison against 0. */
13052 cop0
= force_reg (mode
, cop0
);
13061 /* Perform a parallel modulo subtraction. */
13062 t1
= gen_reg_rtx (mode
);
13063 emit_insn ((mode
== V4SImode
13065 : gen_subv2di3
) (t1
, cop0
, cop1
));
13067 /* Extract the original sign bit of op0. */
13068 mask
= ix86_build_signbit_mask (GET_MODE_INNER (mode
),
13070 t2
= gen_reg_rtx (mode
);
13071 emit_insn ((mode
== V4SImode
13073 : gen_andv2di3
) (t2
, cop0
, mask
));
13075 /* XOR it back into the result of the subtraction. This results
13076 in the sign bit set iff we saw unsigned underflow. */
13077 x
= gen_reg_rtx (mode
);
13078 emit_insn ((mode
== V4SImode
13080 : gen_xorv2di3
) (x
, t1
, t2
));
13088 /* Perform a parallel unsigned saturating subtraction. */
13089 x
= gen_reg_rtx (mode
);
13090 emit_insn (gen_rtx_SET (VOIDmode
, x
,
13091 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
13098 gcc_unreachable ();
13102 cop1
= CONST0_RTX (mode
);
13105 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
13106 operands
[1+negate
], operands
[2-negate
]);
13108 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
13109 operands
[2-negate
]);
13113 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
13114 true if we should do zero extension, else sign extension. HIGH_P is
13115 true if we want the N/2 high elements, else the low elements. */
13118 ix86_expand_sse_unpack (rtx operands
[2], bool unsigned_p
, bool high_p
)
13120 enum machine_mode imode
= GET_MODE (operands
[1]);
13121 rtx (*unpack
)(rtx
, rtx
, rtx
);
13128 unpack
= gen_vec_interleave_highv16qi
;
13130 unpack
= gen_vec_interleave_lowv16qi
;
13134 unpack
= gen_vec_interleave_highv8hi
;
13136 unpack
= gen_vec_interleave_lowv8hi
;
13140 unpack
= gen_vec_interleave_highv4si
;
13142 unpack
= gen_vec_interleave_lowv4si
;
13145 gcc_unreachable ();
13148 dest
= gen_lowpart (imode
, operands
[0]);
13151 se
= force_reg (imode
, CONST0_RTX (imode
));
13153 se
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
13154 operands
[1], pc_rtx
, pc_rtx
);
13156 emit_insn (unpack (dest
, operands
[1], se
));
13159 /* This function performs the same task as ix86_expand_sse_unpack,
13160 but with SSE4.1 instructions. */
13163 ix86_expand_sse4_unpack (rtx operands
[2], bool unsigned_p
, bool high_p
)
13165 enum machine_mode imode
= GET_MODE (operands
[1]);
13166 rtx (*unpack
)(rtx
, rtx
);
13173 unpack
= gen_sse4_1_zero_extendv8qiv8hi2
;
13175 unpack
= gen_sse4_1_extendv8qiv8hi2
;
13179 unpack
= gen_sse4_1_zero_extendv4hiv4si2
;
13181 unpack
= gen_sse4_1_extendv4hiv4si2
;
13185 unpack
= gen_sse4_1_zero_extendv2siv2di2
;
13187 unpack
= gen_sse4_1_extendv2siv2di2
;
13190 gcc_unreachable ();
13193 dest
= operands
[0];
13196 /* Shift higher 8 bytes to lower 8 bytes. */
13197 src
= gen_reg_rtx (imode
);
13198 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode
, src
),
13199 gen_lowpart (TImode
, operands
[1]),
13205 emit_insn (unpack (dest
, src
));
13208 /* Expand conditional increment or decrement using adb/sbb instructions.
13209 The default case using setcc followed by the conditional move can be
13210 done by generic code. */
13212 ix86_expand_int_addcc (rtx operands
[])
13214 enum rtx_code code
= GET_CODE (operands
[1]);
13216 rtx val
= const0_rtx
;
13217 bool fpcmp
= false;
13218 enum machine_mode mode
= GET_MODE (operands
[0]);
13220 if (operands
[3] != const1_rtx
13221 && operands
[3] != constm1_rtx
)
13223 if (!ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
13224 ix86_compare_op1
, &compare_op
))
13226 code
= GET_CODE (compare_op
);
13228 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
13229 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
13232 code
= ix86_fp_compare_code_to_integer (code
);
13239 PUT_CODE (compare_op
,
13240 reverse_condition_maybe_unordered
13241 (GET_CODE (compare_op
)));
13243 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
13245 PUT_MODE (compare_op
, mode
);
13247 /* Construct either adc or sbb insn. */
13248 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
13250 switch (GET_MODE (operands
[0]))
13253 emit_insn (gen_subqi3_carry (operands
[0], operands
[2], val
, compare_op
));
13256 emit_insn (gen_subhi3_carry (operands
[0], operands
[2], val
, compare_op
));
13259 emit_insn (gen_subsi3_carry (operands
[0], operands
[2], val
, compare_op
));
13262 emit_insn (gen_subdi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
13265 gcc_unreachable ();
13270 switch (GET_MODE (operands
[0]))
13273 emit_insn (gen_addqi3_carry (operands
[0], operands
[2], val
, compare_op
));
13276 emit_insn (gen_addhi3_carry (operands
[0], operands
[2], val
, compare_op
));
13279 emit_insn (gen_addsi3_carry (operands
[0], operands
[2], val
, compare_op
));
13282 emit_insn (gen_adddi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
13285 gcc_unreachable ();
13288 return 1; /* DONE */
13292 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
13293 works for floating pointer parameters and nonoffsetable memories.
13294 For pushes, it returns just stack offsets; the values will be saved
13295 in the right order. Maximally three parts are generated. */
13298 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
13303 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
13305 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
13307 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
13308 gcc_assert (size
>= 2 && size
<= 3);
13310 /* Optimize constant pool reference to immediates. This is used by fp
13311 moves, that force all constants to memory to allow combining. */
13312 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
13314 rtx tmp
= maybe_get_pool_constant (operand
);
13319 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
13321 /* The only non-offsetable memories we handle are pushes. */
13322 int ok
= push_operand (operand
, VOIDmode
);
13326 operand
= copy_rtx (operand
);
13327 PUT_MODE (operand
, Pmode
);
13328 parts
[0] = parts
[1] = parts
[2] = operand
;
13332 if (GET_CODE (operand
) == CONST_VECTOR
)
13334 enum machine_mode imode
= int_mode_for_mode (mode
);
13335 /* Caution: if we looked through a constant pool memory above,
13336 the operand may actually have a different mode now. That's
13337 ok, since we want to pun this all the way back to an integer. */
13338 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
13339 gcc_assert (operand
!= NULL
);
13345 if (mode
== DImode
)
13346 split_di (&operand
, 1, &parts
[0], &parts
[1]);
13349 if (REG_P (operand
))
13351 gcc_assert (reload_completed
);
13352 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
13353 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
13355 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
13357 else if (offsettable_memref_p (operand
))
13359 operand
= adjust_address (operand
, SImode
, 0);
13360 parts
[0] = operand
;
13361 parts
[1] = adjust_address (operand
, SImode
, 4);
13363 parts
[2] = adjust_address (operand
, SImode
, 8);
13365 else if (GET_CODE (operand
) == CONST_DOUBLE
)
13370 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
13374 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
13375 parts
[2] = gen_int_mode (l
[2], SImode
);
13378 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
13381 gcc_unreachable ();
13383 parts
[1] = gen_int_mode (l
[1], SImode
);
13384 parts
[0] = gen_int_mode (l
[0], SImode
);
13387 gcc_unreachable ();
13392 if (mode
== TImode
)
13393 split_ti (&operand
, 1, &parts
[0], &parts
[1]);
13394 if (mode
== XFmode
|| mode
== TFmode
)
13396 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
13397 if (REG_P (operand
))
13399 gcc_assert (reload_completed
);
13400 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
13401 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
13403 else if (offsettable_memref_p (operand
))
13405 operand
= adjust_address (operand
, DImode
, 0);
13406 parts
[0] = operand
;
13407 parts
[1] = adjust_address (operand
, upper_mode
, 8);
13409 else if (GET_CODE (operand
) == CONST_DOUBLE
)
13414 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
13415 real_to_target (l
, &r
, mode
);
13417 /* Do not use shift by 32 to avoid warning on 32bit systems. */
13418 if (HOST_BITS_PER_WIDE_INT
>= 64)
13421 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
13422 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
13425 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
13427 if (upper_mode
== SImode
)
13428 parts
[1] = gen_int_mode (l
[2], SImode
);
13429 else if (HOST_BITS_PER_WIDE_INT
>= 64)
13432 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
13433 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
13436 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
13439 gcc_unreachable ();
13446 /* Emit insns to perform a move or push of DI, DF, and XF values.
13447 Return false when normal moves are needed; true when all required
13448 insns have been emitted. Operands 2-4 contain the input values
13449 int the correct order; operands 5-7 contain the output values. */
13452 ix86_split_long_move (rtx operands
[])
13457 int collisions
= 0;
13458 enum machine_mode mode
= GET_MODE (operands
[0]);
13460 /* The DFmode expanders may ask us to move double.
13461 For 64bit target this is single move. By hiding the fact
13462 here we simplify i386.md splitters. */
13463 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
13465 /* Optimize constant pool reference to immediates. This is used by
13466 fp moves, that force all constants to memory to allow combining. */
13468 if (MEM_P (operands
[1])
13469 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
13470 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
13471 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
13472 if (push_operand (operands
[0], VOIDmode
))
13474 operands
[0] = copy_rtx (operands
[0]);
13475 PUT_MODE (operands
[0], Pmode
);
13478 operands
[0] = gen_lowpart (DImode
, operands
[0]);
13479 operands
[1] = gen_lowpart (DImode
, operands
[1]);
13480 emit_move_insn (operands
[0], operands
[1]);
13484 /* The only non-offsettable memory we handle is push. */
13485 if (push_operand (operands
[0], VOIDmode
))
13488 gcc_assert (!MEM_P (operands
[0])
13489 || offsettable_memref_p (operands
[0]));
13491 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
13492 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
13494 /* When emitting push, take care for source operands on the stack. */
13495 if (push
&& MEM_P (operands
[1])
13496 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
13499 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
13500 XEXP (part
[1][2], 0));
13501 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
13502 XEXP (part
[1][1], 0));
13505 /* We need to do copy in the right order in case an address register
13506 of the source overlaps the destination. */
13507 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
13509 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
13511 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
13514 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
13517 /* Collision in the middle part can be handled by reordering. */
13518 if (collisions
== 1 && nparts
== 3
13519 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
13522 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
13523 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
13526 /* If there are more collisions, we can't handle it by reordering.
13527 Do an lea to the last part and use only one colliding move. */
13528 else if (collisions
> 1)
13534 base
= part
[0][nparts
- 1];
13536 /* Handle the case when the last part isn't valid for lea.
13537 Happens in 64-bit mode storing the 12-byte XFmode. */
13538 if (GET_MODE (base
) != Pmode
)
13539 base
= gen_rtx_REG (Pmode
, REGNO (base
));
13541 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
13542 part
[1][0] = replace_equiv_address (part
[1][0], base
);
13543 part
[1][1] = replace_equiv_address (part
[1][1],
13544 plus_constant (base
, UNITS_PER_WORD
));
13546 part
[1][2] = replace_equiv_address (part
[1][2],
13547 plus_constant (base
, 8));
13557 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
13558 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, GEN_INT (-4)));
13559 emit_move_insn (part
[0][2], part
[1][2]);
13564 /* In 64bit mode we don't have 32bit push available. In case this is
13565 register, it is OK - we will just use larger counterpart. We also
13566 retype memory - these comes from attempt to avoid REX prefix on
13567 moving of second half of TFmode value. */
13568 if (GET_MODE (part
[1][1]) == SImode
)
13570 switch (GET_CODE (part
[1][1]))
13573 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
13577 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
13581 gcc_unreachable ();
13584 if (GET_MODE (part
[1][0]) == SImode
)
13585 part
[1][0] = part
[1][1];
13588 emit_move_insn (part
[0][1], part
[1][1]);
13589 emit_move_insn (part
[0][0], part
[1][0]);
13593 /* Choose correct order to not overwrite the source before it is copied. */
13594 if ((REG_P (part
[0][0])
13595 && REG_P (part
[1][1])
13596 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
13598 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
13600 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
13604 operands
[2] = part
[0][2];
13605 operands
[3] = part
[0][1];
13606 operands
[4] = part
[0][0];
13607 operands
[5] = part
[1][2];
13608 operands
[6] = part
[1][1];
13609 operands
[7] = part
[1][0];
13613 operands
[2] = part
[0][1];
13614 operands
[3] = part
[0][0];
13615 operands
[5] = part
[1][1];
13616 operands
[6] = part
[1][0];
13623 operands
[2] = part
[0][0];
13624 operands
[3] = part
[0][1];
13625 operands
[4] = part
[0][2];
13626 operands
[5] = part
[1][0];
13627 operands
[6] = part
[1][1];
13628 operands
[7] = part
[1][2];
13632 operands
[2] = part
[0][0];
13633 operands
[3] = part
[0][1];
13634 operands
[5] = part
[1][0];
13635 operands
[6] = part
[1][1];
13639 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
13642 if (CONST_INT_P (operands
[5])
13643 && operands
[5] != const0_rtx
13644 && REG_P (operands
[2]))
13646 if (CONST_INT_P (operands
[6])
13647 && INTVAL (operands
[6]) == INTVAL (operands
[5]))
13648 operands
[6] = operands
[2];
13651 && CONST_INT_P (operands
[7])
13652 && INTVAL (operands
[7]) == INTVAL (operands
[5]))
13653 operands
[7] = operands
[2];
13657 && CONST_INT_P (operands
[6])
13658 && operands
[6] != const0_rtx
13659 && REG_P (operands
[3])
13660 && CONST_INT_P (operands
[7])
13661 && INTVAL (operands
[7]) == INTVAL (operands
[6]))
13662 operands
[7] = operands
[3];
13665 emit_move_insn (operands
[2], operands
[5]);
13666 emit_move_insn (operands
[3], operands
[6]);
13668 emit_move_insn (operands
[4], operands
[7]);
13673 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
13674 left shift by a constant, either using a single shift or
13675 a sequence of add instructions. */
13678 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
13682 emit_insn ((mode
== DImode
13684 : gen_adddi3
) (operand
, operand
, operand
));
13686 else if (!optimize_size
13687 && count
* ix86_cost
->add
<= ix86_cost
->shift_const
)
13690 for (i
=0; i
<count
; i
++)
13692 emit_insn ((mode
== DImode
13694 : gen_adddi3
) (operand
, operand
, operand
));
13698 emit_insn ((mode
== DImode
13700 : gen_ashldi3
) (operand
, operand
, GEN_INT (count
)));
13704 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
13706 rtx low
[2], high
[2];
13708 const int single_width
= mode
== DImode
? 32 : 64;
13710 if (CONST_INT_P (operands
[2]))
13712 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
13713 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
13715 if (count
>= single_width
)
13717 emit_move_insn (high
[0], low
[1]);
13718 emit_move_insn (low
[0], const0_rtx
);
13720 if (count
> single_width
)
13721 ix86_expand_ashl_const (high
[0], count
- single_width
, mode
);
13725 if (!rtx_equal_p (operands
[0], operands
[1]))
13726 emit_move_insn (operands
[0], operands
[1]);
13727 emit_insn ((mode
== DImode
13729 : gen_x86_64_shld
) (high
[0], low
[0], GEN_INT (count
)));
13730 ix86_expand_ashl_const (low
[0], count
, mode
);
13735 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13737 if (operands
[1] == const1_rtx
)
13739 /* Assuming we've chosen a QImode capable registers, then 1 << N
13740 can be done with two 32/64-bit shifts, no branches, no cmoves. */
13741 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
13743 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
13745 ix86_expand_clear (low
[0]);
13746 ix86_expand_clear (high
[0]);
13747 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (single_width
)));
13749 d
= gen_lowpart (QImode
, low
[0]);
13750 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
13751 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
13752 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
13754 d
= gen_lowpart (QImode
, high
[0]);
13755 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
13756 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
13757 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
13760 /* Otherwise, we can get the same results by manually performing
13761 a bit extract operation on bit 5/6, and then performing the two
13762 shifts. The two methods of getting 0/1 into low/high are exactly
13763 the same size. Avoiding the shift in the bit extract case helps
13764 pentium4 a bit; no one else seems to care much either way. */
13769 if (TARGET_PARTIAL_REG_STALL
&& !optimize_size
)
13770 x
= gen_rtx_ZERO_EXTEND (mode
== DImode
? SImode
: DImode
, operands
[2]);
13772 x
= gen_lowpart (mode
== DImode
? SImode
: DImode
, operands
[2]);
13773 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
13775 emit_insn ((mode
== DImode
13777 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (mode
== DImode
? 5 : 6)));
13778 emit_insn ((mode
== DImode
13780 : gen_anddi3
) (high
[0], high
[0], GEN_INT (1)));
13781 emit_move_insn (low
[0], high
[0]);
13782 emit_insn ((mode
== DImode
13784 : gen_xordi3
) (low
[0], low
[0], GEN_INT (1)));
13787 emit_insn ((mode
== DImode
13789 : gen_ashldi3
) (low
[0], low
[0], operands
[2]));
13790 emit_insn ((mode
== DImode
13792 : gen_ashldi3
) (high
[0], high
[0], operands
[2]));
13796 if (operands
[1] == constm1_rtx
)
13798 /* For -1 << N, we can avoid the shld instruction, because we
13799 know that we're shifting 0...31/63 ones into a -1. */
13800 emit_move_insn (low
[0], constm1_rtx
);
13802 emit_move_insn (high
[0], low
[0]);
13804 emit_move_insn (high
[0], constm1_rtx
);
13808 if (!rtx_equal_p (operands
[0], operands
[1]))
13809 emit_move_insn (operands
[0], operands
[1]);
13811 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13812 emit_insn ((mode
== DImode
13814 : gen_x86_64_shld
) (high
[0], low
[0], operands
[2]));
13817 emit_insn ((mode
== DImode
? gen_ashlsi3
: gen_ashldi3
) (low
[0], low
[0], operands
[2]));
13819 if (TARGET_CMOVE
&& scratch
)
13821 ix86_expand_clear (scratch
);
13822 emit_insn ((mode
== DImode
13823 ? gen_x86_shift_adj_1
13824 : gen_x86_64_shift_adj
) (high
[0], low
[0], operands
[2], scratch
));
13827 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
13831 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
13833 rtx low
[2], high
[2];
13835 const int single_width
= mode
== DImode
? 32 : 64;
13837 if (CONST_INT_P (operands
[2]))
13839 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
13840 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
13842 if (count
== single_width
* 2 - 1)
13844 emit_move_insn (high
[0], high
[1]);
13845 emit_insn ((mode
== DImode
13847 : gen_ashrdi3
) (high
[0], high
[0],
13848 GEN_INT (single_width
- 1)));
13849 emit_move_insn (low
[0], high
[0]);
13852 else if (count
>= single_width
)
13854 emit_move_insn (low
[0], high
[1]);
13855 emit_move_insn (high
[0], low
[0]);
13856 emit_insn ((mode
== DImode
13858 : gen_ashrdi3
) (high
[0], high
[0],
13859 GEN_INT (single_width
- 1)));
13860 if (count
> single_width
)
13861 emit_insn ((mode
== DImode
13863 : gen_ashrdi3
) (low
[0], low
[0],
13864 GEN_INT (count
- single_width
)));
13868 if (!rtx_equal_p (operands
[0], operands
[1]))
13869 emit_move_insn (operands
[0], operands
[1]);
13870 emit_insn ((mode
== DImode
13872 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
13873 emit_insn ((mode
== DImode
13875 : gen_ashrdi3
) (high
[0], high
[0], GEN_INT (count
)));
13880 if (!rtx_equal_p (operands
[0], operands
[1]))
13881 emit_move_insn (operands
[0], operands
[1]);
13883 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13885 emit_insn ((mode
== DImode
13887 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
13888 emit_insn ((mode
== DImode
13890 : gen_ashrdi3
) (high
[0], high
[0], operands
[2]));
13892 if (TARGET_CMOVE
&& scratch
)
13894 emit_move_insn (scratch
, high
[0]);
13895 emit_insn ((mode
== DImode
13897 : gen_ashrdi3
) (scratch
, scratch
,
13898 GEN_INT (single_width
- 1)));
13899 emit_insn ((mode
== DImode
13900 ? gen_x86_shift_adj_1
13901 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
13905 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
13910 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
13912 rtx low
[2], high
[2];
13914 const int single_width
= mode
== DImode
? 32 : 64;
13916 if (CONST_INT_P (operands
[2]))
13918 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
13919 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
13921 if (count
>= single_width
)
13923 emit_move_insn (low
[0], high
[1]);
13924 ix86_expand_clear (high
[0]);
13926 if (count
> single_width
)
13927 emit_insn ((mode
== DImode
13929 : gen_lshrdi3
) (low
[0], low
[0],
13930 GEN_INT (count
- single_width
)));
13934 if (!rtx_equal_p (operands
[0], operands
[1]))
13935 emit_move_insn (operands
[0], operands
[1]);
13936 emit_insn ((mode
== DImode
13938 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
13939 emit_insn ((mode
== DImode
13941 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (count
)));
13946 if (!rtx_equal_p (operands
[0], operands
[1]))
13947 emit_move_insn (operands
[0], operands
[1]);
13949 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13951 emit_insn ((mode
== DImode
13953 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
13954 emit_insn ((mode
== DImode
13956 : gen_lshrdi3
) (high
[0], high
[0], operands
[2]));
13958 /* Heh. By reversing the arguments, we can reuse this pattern. */
13959 if (TARGET_CMOVE
&& scratch
)
13961 ix86_expand_clear (scratch
);
13962 emit_insn ((mode
== DImode
13963 ? gen_x86_shift_adj_1
13964 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
13968 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
13972 /* Predict just emitted jump instruction to be taken with probability PROB. */
13974 predict_jump (int prob
)
13976 rtx insn
= get_last_insn ();
13977 gcc_assert (JUMP_P (insn
));
13979 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
13984 /* Helper function for the string operations below. Dest VARIABLE whether
13985 it is aligned to VALUE bytes. If true, jump to the label. */
13987 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
13989 rtx label
= gen_label_rtx ();
13990 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
13991 if (GET_MODE (variable
) == DImode
)
13992 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
13994 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
13995 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
13998 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
14000 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
14004 /* Adjust COUNTER by the VALUE. */
14006 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
14008 if (GET_MODE (countreg
) == DImode
)
14009 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
14011 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
14014 /* Zero extend possibly SImode EXP to Pmode register. */
14016 ix86_zero_extend_to_Pmode (rtx exp
)
14019 if (GET_MODE (exp
) == VOIDmode
)
14020 return force_reg (Pmode
, exp
);
14021 if (GET_MODE (exp
) == Pmode
)
14022 return copy_to_mode_reg (Pmode
, exp
);
14023 r
= gen_reg_rtx (Pmode
);
14024 emit_insn (gen_zero_extendsidi2 (r
, exp
));
14028 /* Divide COUNTREG by SCALE. */
14030 scale_counter (rtx countreg
, int scale
)
14033 rtx piece_size_mask
;
14037 if (CONST_INT_P (countreg
))
14038 return GEN_INT (INTVAL (countreg
) / scale
);
14039 gcc_assert (REG_P (countreg
));
14041 piece_size_mask
= GEN_INT (scale
- 1);
14042 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
14043 GEN_INT (exact_log2 (scale
)),
14044 NULL
, 1, OPTAB_DIRECT
);
14048 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
14049 DImode for constant loop counts. */
14051 static enum machine_mode
14052 counter_mode (rtx count_exp
)
14054 if (GET_MODE (count_exp
) != VOIDmode
)
14055 return GET_MODE (count_exp
);
14056 if (GET_CODE (count_exp
) != CONST_INT
)
14058 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
14063 /* When SRCPTR is non-NULL, output simple loop to move memory
14064 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
14065 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
14066 equivalent loop to set memory by VALUE (supposed to be in MODE).
14068 The size is rounded down to whole number of chunk size moved at once.
14069 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
14073 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
14074 rtx destptr
, rtx srcptr
, rtx value
,
14075 rtx count
, enum machine_mode mode
, int unroll
,
14078 rtx out_label
, top_label
, iter
, tmp
;
14079 enum machine_mode iter_mode
= counter_mode (count
);
14080 rtx piece_size
= GEN_INT (GET_MODE_SIZE (mode
) * unroll
);
14081 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
14087 top_label
= gen_label_rtx ();
14088 out_label
= gen_label_rtx ();
14089 iter
= gen_reg_rtx (iter_mode
);
14091 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
14092 NULL
, 1, OPTAB_DIRECT
);
14093 /* Those two should combine. */
14094 if (piece_size
== const1_rtx
)
14096 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
14098 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
14100 emit_move_insn (iter
, const0_rtx
);
14102 emit_label (top_label
);
14104 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
14105 x_addr
= gen_rtx_PLUS (Pmode
, destptr
, tmp
);
14106 destmem
= change_address (destmem
, mode
, x_addr
);
14110 y_addr
= gen_rtx_PLUS (Pmode
, srcptr
, copy_rtx (tmp
));
14111 srcmem
= change_address (srcmem
, mode
, y_addr
);
14113 /* When unrolling for chips that reorder memory reads and writes,
14114 we can save registers by using single temporary.
14115 Also using 4 temporaries is overkill in 32bit mode. */
14116 if (!TARGET_64BIT
&& 0)
14118 for (i
= 0; i
< unroll
; i
++)
14123 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
14125 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
14127 emit_move_insn (destmem
, srcmem
);
14133 gcc_assert (unroll
<= 4);
14134 for (i
= 0; i
< unroll
; i
++)
14136 tmpreg
[i
] = gen_reg_rtx (mode
);
14140 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
14142 emit_move_insn (tmpreg
[i
], srcmem
);
14144 for (i
= 0; i
< unroll
; i
++)
14149 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
14151 emit_move_insn (destmem
, tmpreg
[i
]);
14156 for (i
= 0; i
< unroll
; i
++)
14160 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
14161 emit_move_insn (destmem
, value
);
14164 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
14165 true, OPTAB_LIB_WIDEN
);
14167 emit_move_insn (iter
, tmp
);
14169 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
14171 if (expected_size
!= -1)
14173 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
14174 if (expected_size
== 0)
14176 else if (expected_size
> REG_BR_PROB_BASE
)
14177 predict_jump (REG_BR_PROB_BASE
- 1);
14179 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
14182 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
14183 iter
= ix86_zero_extend_to_Pmode (iter
);
14184 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
14185 true, OPTAB_LIB_WIDEN
);
14186 if (tmp
!= destptr
)
14187 emit_move_insn (destptr
, tmp
);
14190 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
14191 true, OPTAB_LIB_WIDEN
);
14193 emit_move_insn (srcptr
, tmp
);
14195 emit_label (out_label
);
14198 /* Output "rep; mov" instruction.
14199 Arguments have same meaning as for previous function */
14201 expand_movmem_via_rep_mov (rtx destmem
, rtx srcmem
,
14202 rtx destptr
, rtx srcptr
,
14204 enum machine_mode mode
)
14210 /* If the size is known, it is shorter to use rep movs. */
14211 if (mode
== QImode
&& CONST_INT_P (count
)
14212 && !(INTVAL (count
) & 3))
14215 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
14216 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
14217 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
14218 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
14219 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
14220 if (mode
!= QImode
)
14222 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
14223 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
14224 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
14225 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
14226 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
14227 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
14231 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
14232 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
14234 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
14238 /* Output "rep; stos" instruction.
14239 Arguments have same meaning as for previous function */
14241 expand_setmem_via_rep_stos (rtx destmem
, rtx destptr
, rtx value
,
14243 enum machine_mode mode
)
14248 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
14249 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
14250 value
= force_reg (mode
, gen_lowpart (mode
, value
));
14251 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
14252 if (mode
!= QImode
)
14254 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
14255 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
14256 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
14259 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
14260 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
14264 emit_strmov (rtx destmem
, rtx srcmem
,
14265 rtx destptr
, rtx srcptr
, enum machine_mode mode
, int offset
)
14267 rtx src
= adjust_automodify_address_nv (srcmem
, mode
, srcptr
, offset
);
14268 rtx dest
= adjust_automodify_address_nv (destmem
, mode
, destptr
, offset
);
14269 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
14272 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
14274 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
14275 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
14278 if (CONST_INT_P (count
))
14280 HOST_WIDE_INT countval
= INTVAL (count
);
14283 if ((countval
& 0x10) && max_size
> 16)
14287 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
14288 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
+ 8);
14291 gcc_unreachable ();
14294 if ((countval
& 0x08) && max_size
> 8)
14297 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
14300 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
14301 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
+ 4);
14305 if ((countval
& 0x04) && max_size
> 4)
14307 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
14310 if ((countval
& 0x02) && max_size
> 2)
14312 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, HImode
, offset
);
14315 if ((countval
& 0x01) && max_size
> 1)
14317 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, QImode
, offset
);
14324 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
14325 count
, 1, OPTAB_DIRECT
);
14326 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
14327 count
, QImode
, 1, 4);
14331 /* When there are stringops, we can cheaply increase dest and src pointers.
14332 Otherwise we save code size by maintaining offset (zero is readily
14333 available from preceding rep operation) and using x86 addressing modes.
14335 if (TARGET_SINGLE_STRINGOP
)
14339 rtx label
= ix86_expand_aligntest (count
, 4, true);
14340 src
= change_address (srcmem
, SImode
, srcptr
);
14341 dest
= change_address (destmem
, SImode
, destptr
);
14342 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
14343 emit_label (label
);
14344 LABEL_NUSES (label
) = 1;
14348 rtx label
= ix86_expand_aligntest (count
, 2, true);
14349 src
= change_address (srcmem
, HImode
, srcptr
);
14350 dest
= change_address (destmem
, HImode
, destptr
);
14351 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
14352 emit_label (label
);
14353 LABEL_NUSES (label
) = 1;
14357 rtx label
= ix86_expand_aligntest (count
, 1, true);
14358 src
= change_address (srcmem
, QImode
, srcptr
);
14359 dest
= change_address (destmem
, QImode
, destptr
);
14360 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
14361 emit_label (label
);
14362 LABEL_NUSES (label
) = 1;
14367 rtx offset
= force_reg (Pmode
, const0_rtx
);
14372 rtx label
= ix86_expand_aligntest (count
, 4, true);
14373 src
= change_address (srcmem
, SImode
, srcptr
);
14374 dest
= change_address (destmem
, SImode
, destptr
);
14375 emit_move_insn (dest
, src
);
14376 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
14377 true, OPTAB_LIB_WIDEN
);
14379 emit_move_insn (offset
, tmp
);
14380 emit_label (label
);
14381 LABEL_NUSES (label
) = 1;
14385 rtx label
= ix86_expand_aligntest (count
, 2, true);
14386 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
14387 src
= change_address (srcmem
, HImode
, tmp
);
14388 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
14389 dest
= change_address (destmem
, HImode
, tmp
);
14390 emit_move_insn (dest
, src
);
14391 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
14392 true, OPTAB_LIB_WIDEN
);
14394 emit_move_insn (offset
, tmp
);
14395 emit_label (label
);
14396 LABEL_NUSES (label
) = 1;
14400 rtx label
= ix86_expand_aligntest (count
, 1, true);
14401 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
14402 src
= change_address (srcmem
, QImode
, tmp
);
14403 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
14404 dest
= change_address (destmem
, QImode
, tmp
);
14405 emit_move_insn (dest
, src
);
14406 emit_label (label
);
14407 LABEL_NUSES (label
) = 1;
14412 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14414 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
14415 rtx count
, int max_size
)
14418 expand_simple_binop (counter_mode (count
), AND
, count
,
14419 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
14420 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
14421 gen_lowpart (QImode
, value
), count
, QImode
,
14425 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14427 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx count
, int max_size
)
14431 if (CONST_INT_P (count
))
14433 HOST_WIDE_INT countval
= INTVAL (count
);
14436 if ((countval
& 0x10) && max_size
> 16)
14440 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
14441 emit_insn (gen_strset (destptr
, dest
, value
));
14442 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
+ 8);
14443 emit_insn (gen_strset (destptr
, dest
, value
));
14446 gcc_unreachable ();
14449 if ((countval
& 0x08) && max_size
> 8)
14453 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
14454 emit_insn (gen_strset (destptr
, dest
, value
));
14458 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
14459 emit_insn (gen_strset (destptr
, dest
, value
));
14460 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
+ 4);
14461 emit_insn (gen_strset (destptr
, dest
, value
));
14465 if ((countval
& 0x04) && max_size
> 4)
14467 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
14468 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
14471 if ((countval
& 0x02) && max_size
> 2)
14473 dest
= adjust_automodify_address_nv (destmem
, HImode
, destptr
, offset
);
14474 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
14477 if ((countval
& 0x01) && max_size
> 1)
14479 dest
= adjust_automodify_address_nv (destmem
, QImode
, destptr
, offset
);
14480 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
14487 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
14492 rtx label
= ix86_expand_aligntest (count
, 16, true);
14495 dest
= change_address (destmem
, DImode
, destptr
);
14496 emit_insn (gen_strset (destptr
, dest
, value
));
14497 emit_insn (gen_strset (destptr
, dest
, value
));
14501 dest
= change_address (destmem
, SImode
, destptr
);
14502 emit_insn (gen_strset (destptr
, dest
, value
));
14503 emit_insn (gen_strset (destptr
, dest
, value
));
14504 emit_insn (gen_strset (destptr
, dest
, value
));
14505 emit_insn (gen_strset (destptr
, dest
, value
));
14507 emit_label (label
);
14508 LABEL_NUSES (label
) = 1;
14512 rtx label
= ix86_expand_aligntest (count
, 8, true);
14515 dest
= change_address (destmem
, DImode
, destptr
);
14516 emit_insn (gen_strset (destptr
, dest
, value
));
14520 dest
= change_address (destmem
, SImode
, destptr
);
14521 emit_insn (gen_strset (destptr
, dest
, value
));
14522 emit_insn (gen_strset (destptr
, dest
, value
));
14524 emit_label (label
);
14525 LABEL_NUSES (label
) = 1;
14529 rtx label
= ix86_expand_aligntest (count
, 4, true);
14530 dest
= change_address (destmem
, SImode
, destptr
);
14531 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
14532 emit_label (label
);
14533 LABEL_NUSES (label
) = 1;
14537 rtx label
= ix86_expand_aligntest (count
, 2, true);
14538 dest
= change_address (destmem
, HImode
, destptr
);
14539 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
14540 emit_label (label
);
14541 LABEL_NUSES (label
) = 1;
14545 rtx label
= ix86_expand_aligntest (count
, 1, true);
14546 dest
= change_address (destmem
, QImode
, destptr
);
14547 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
14548 emit_label (label
);
14549 LABEL_NUSES (label
) = 1;
14553 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
14554 DESIRED_ALIGNMENT. */
14556 expand_movmem_prologue (rtx destmem
, rtx srcmem
,
14557 rtx destptr
, rtx srcptr
, rtx count
,
14558 int align
, int desired_alignment
)
14560 if (align
<= 1 && desired_alignment
> 1)
14562 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
14563 srcmem
= change_address (srcmem
, QImode
, srcptr
);
14564 destmem
= change_address (destmem
, QImode
, destptr
);
14565 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
14566 ix86_adjust_counter (count
, 1);
14567 emit_label (label
);
14568 LABEL_NUSES (label
) = 1;
14570 if (align
<= 2 && desired_alignment
> 2)
14572 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
14573 srcmem
= change_address (srcmem
, HImode
, srcptr
);
14574 destmem
= change_address (destmem
, HImode
, destptr
);
14575 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
14576 ix86_adjust_counter (count
, 2);
14577 emit_label (label
);
14578 LABEL_NUSES (label
) = 1;
14580 if (align
<= 4 && desired_alignment
> 4)
14582 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
14583 srcmem
= change_address (srcmem
, SImode
, srcptr
);
14584 destmem
= change_address (destmem
, SImode
, destptr
);
14585 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
14586 ix86_adjust_counter (count
, 4);
14587 emit_label (label
);
14588 LABEL_NUSES (label
) = 1;
14590 gcc_assert (desired_alignment
<= 8);
14593 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
14594 DESIRED_ALIGNMENT. */
14596 expand_setmem_prologue (rtx destmem
, rtx destptr
, rtx value
, rtx count
,
14597 int align
, int desired_alignment
)
14599 if (align
<= 1 && desired_alignment
> 1)
14601 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
14602 destmem
= change_address (destmem
, QImode
, destptr
);
14603 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (QImode
, value
)));
14604 ix86_adjust_counter (count
, 1);
14605 emit_label (label
);
14606 LABEL_NUSES (label
) = 1;
14608 if (align
<= 2 && desired_alignment
> 2)
14610 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
14611 destmem
= change_address (destmem
, HImode
, destptr
);
14612 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (HImode
, value
)));
14613 ix86_adjust_counter (count
, 2);
14614 emit_label (label
);
14615 LABEL_NUSES (label
) = 1;
14617 if (align
<= 4 && desired_alignment
> 4)
14619 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
14620 destmem
= change_address (destmem
, SImode
, destptr
);
14621 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
14622 ix86_adjust_counter (count
, 4);
14623 emit_label (label
);
14624 LABEL_NUSES (label
) = 1;
14626 gcc_assert (desired_alignment
<= 8);
14629 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
14630 static enum stringop_alg
14631 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
, bool memset
,
14632 int *dynamic_check
)
14634 const struct stringop_algs
* algs
;
14636 *dynamic_check
= -1;
14638 algs
= &ix86_cost
->memset
[TARGET_64BIT
!= 0];
14640 algs
= &ix86_cost
->memcpy
[TARGET_64BIT
!= 0];
14641 if (stringop_alg
!= no_stringop
)
14642 return stringop_alg
;
14643 /* rep; movq or rep; movl is the smallest variant. */
14644 else if (optimize_size
)
14646 if (!count
|| (count
& 3))
14647 return rep_prefix_1_byte
;
14649 return rep_prefix_4_byte
;
14651 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
14653 else if (expected_size
!= -1 && expected_size
< 4)
14654 return loop_1_byte
;
14655 else if (expected_size
!= -1)
14658 enum stringop_alg alg
= libcall
;
14659 for (i
= 0; i
< NAX_STRINGOP_ALGS
; i
++)
14661 gcc_assert (algs
->size
[i
].max
);
14662 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
14664 if (algs
->size
[i
].alg
!= libcall
)
14665 alg
= algs
->size
[i
].alg
;
14666 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
14667 last non-libcall inline algorithm. */
14668 if (TARGET_INLINE_ALL_STRINGOPS
)
14670 /* When the current size is best to be copied by a libcall,
14671 but we are still forced to inline, run the heuristic bellow
14672 that will pick code for medium sized blocks. */
14673 if (alg
!= libcall
)
14678 return algs
->size
[i
].alg
;
14681 gcc_assert (TARGET_INLINE_ALL_STRINGOPS
);
14683 /* When asked to inline the call anyway, try to pick meaningful choice.
14684 We look for maximal size of block that is faster to copy by hand and
14685 take blocks of at most of that size guessing that average size will
14686 be roughly half of the block.
14688 If this turns out to be bad, we might simply specify the preferred
14689 choice in ix86_costs. */
14690 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
14691 && algs
->unknown_size
== libcall
)
14694 enum stringop_alg alg
;
14697 for (i
= 0; i
< NAX_STRINGOP_ALGS
; i
++)
14698 if (algs
->size
[i
].alg
!= libcall
&& algs
->size
[i
].alg
)
14699 max
= algs
->size
[i
].max
;
14702 alg
= decide_alg (count
, max
/ 2, memset
, dynamic_check
);
14703 gcc_assert (*dynamic_check
== -1);
14704 gcc_assert (alg
!= libcall
);
14705 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
14706 *dynamic_check
= max
;
14709 return algs
->unknown_size
;
14712 /* Decide on alignment. We know that the operand is already aligned to ALIGN
14713 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
14715 decide_alignment (int align
,
14716 enum stringop_alg alg
,
14719 int desired_align
= 0;
14723 gcc_unreachable ();
14725 case unrolled_loop
:
14726 desired_align
= GET_MODE_SIZE (Pmode
);
14728 case rep_prefix_8_byte
:
14731 case rep_prefix_4_byte
:
14732 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14733 copying whole cacheline at once. */
14734 if (TARGET_PENTIUMPRO
)
14739 case rep_prefix_1_byte
:
14740 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14741 copying whole cacheline at once. */
14742 if (TARGET_PENTIUMPRO
)
14756 if (desired_align
< align
)
14757 desired_align
= align
;
14758 if (expected_size
!= -1 && expected_size
< 4)
14759 desired_align
= align
;
14760 return desired_align
;
14763 /* Return the smallest power of 2 greater than VAL. */
14765 smallest_pow2_greater_than (int val
)
14773 /* Expand string move (memcpy) operation. Use i386 string operations when
14774 profitable. expand_clrmem contains similar code. The code depends upon
14775 architecture, block size and alignment, but always has the same
14778 1) Prologue guard: Conditional that jumps up to epilogues for small
14779 blocks that can be handled by epilogue alone. This is faster but
14780 also needed for correctness, since prologue assume the block is larger
14781 than the desired alignment.
14783 Optional dynamic check for size and libcall for large
14784 blocks is emitted here too, with -minline-stringops-dynamically.
14786 2) Prologue: copy first few bytes in order to get destination aligned
14787 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
14788 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
14789 We emit either a jump tree on power of two sized blocks, or a byte loop.
14791 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
14792 with specified algorithm.
14794 4) Epilogue: code copying tail of the block that is too small to be
14795 handled by main body (or up to size guarded by prologue guard). */
14798 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
,
14799 rtx expected_align_exp
, rtx expected_size_exp
)
14805 rtx jump_around_label
= NULL
;
14806 HOST_WIDE_INT align
= 1;
14807 unsigned HOST_WIDE_INT count
= 0;
14808 HOST_WIDE_INT expected_size
= -1;
14809 int size_needed
= 0, epilogue_size_needed
;
14810 int desired_align
= 0;
14811 enum stringop_alg alg
;
14814 if (CONST_INT_P (align_exp
))
14815 align
= INTVAL (align_exp
);
14816 /* i386 can do misaligned access on reasonably increased cost. */
14817 if (CONST_INT_P (expected_align_exp
)
14818 && INTVAL (expected_align_exp
) > align
)
14819 align
= INTVAL (expected_align_exp
);
14820 if (CONST_INT_P (count_exp
))
14821 count
= expected_size
= INTVAL (count_exp
);
14822 if (CONST_INT_P (expected_size_exp
) && count
== 0)
14823 expected_size
= INTVAL (expected_size_exp
);
14825 /* Step 0: Decide on preferred algorithm, desired alignment and
14826 size of chunks to be copied by main loop. */
14828 alg
= decide_alg (count
, expected_size
, false, &dynamic_check
);
14829 desired_align
= decide_alignment (align
, alg
, expected_size
);
14831 if (!TARGET_ALIGN_STRINGOPS
)
14832 align
= desired_align
;
14834 if (alg
== libcall
)
14836 gcc_assert (alg
!= no_stringop
);
14838 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
14839 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
14840 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
14845 gcc_unreachable ();
14847 size_needed
= GET_MODE_SIZE (Pmode
);
14849 case unrolled_loop
:
14850 size_needed
= GET_MODE_SIZE (Pmode
) * (TARGET_64BIT
? 4 : 2);
14852 case rep_prefix_8_byte
:
14855 case rep_prefix_4_byte
:
14858 case rep_prefix_1_byte
:
14864 epilogue_size_needed
= size_needed
;
14866 /* Step 1: Prologue guard. */
14868 /* Alignment code needs count to be in register. */
14869 if (CONST_INT_P (count_exp
) && desired_align
> align
)
14871 enum machine_mode mode
= SImode
;
14872 if (TARGET_64BIT
&& (count
& ~0xffffffff))
14874 count_exp
= force_reg (mode
, count_exp
);
14876 gcc_assert (desired_align
>= 1 && align
>= 1);
14878 /* Ensure that alignment prologue won't copy past end of block. */
14879 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
14881 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
14882 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14883 Make sure it is power of 2. */
14884 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
14886 label
= gen_label_rtx ();
14887 emit_cmp_and_jump_insns (count_exp
,
14888 GEN_INT (epilogue_size_needed
),
14889 LTU
, 0, counter_mode (count_exp
), 1, label
);
14890 if (GET_CODE (count_exp
) == CONST_INT
)
14892 else if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
14893 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
14895 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
14897 /* Emit code to decide on runtime whether library call or inline should be
14899 if (dynamic_check
!= -1)
14901 rtx hot_label
= gen_label_rtx ();
14902 jump_around_label
= gen_label_rtx ();
14903 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
14904 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
14905 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
14906 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
14907 emit_jump (jump_around_label
);
14908 emit_label (hot_label
);
14911 /* Step 2: Alignment prologue. */
14913 if (desired_align
> align
)
14915 /* Except for the first move in epilogue, we no longer know
14916 constant offset in aliasing info. It don't seems to worth
14917 the pain to maintain it for the first move, so throw away
14919 src
= change_address (src
, BLKmode
, srcreg
);
14920 dst
= change_address (dst
, BLKmode
, destreg
);
14921 expand_movmem_prologue (dst
, src
, destreg
, srcreg
, count_exp
, align
,
14924 if (label
&& size_needed
== 1)
14926 emit_label (label
);
14927 LABEL_NUSES (label
) = 1;
14931 /* Step 3: Main loop. */
14937 gcc_unreachable ();
14939 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
14940 count_exp
, QImode
, 1, expected_size
);
14943 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
14944 count_exp
, Pmode
, 1, expected_size
);
14946 case unrolled_loop
:
14947 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
14948 registers for 4 temporaries anyway. */
14949 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
14950 count_exp
, Pmode
, TARGET_64BIT
? 4 : 2,
14953 case rep_prefix_8_byte
:
14954 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
14957 case rep_prefix_4_byte
:
14958 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
14961 case rep_prefix_1_byte
:
14962 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
14966 /* Adjust properly the offset of src and dest memory for aliasing. */
14967 if (CONST_INT_P (count_exp
))
14969 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
14970 (count
/ size_needed
) * size_needed
);
14971 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
14972 (count
/ size_needed
) * size_needed
);
14976 src
= change_address (src
, BLKmode
, srcreg
);
14977 dst
= change_address (dst
, BLKmode
, destreg
);
14980 /* Step 4: Epilogue to copy the remaining bytes. */
14984 /* When the main loop is done, COUNT_EXP might hold original count,
14985 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14986 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14987 bytes. Compensate if needed. */
14989 if (size_needed
< epilogue_size_needed
)
14992 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
14993 GEN_INT (size_needed
- 1), count_exp
, 1,
14995 if (tmp
!= count_exp
)
14996 emit_move_insn (count_exp
, tmp
);
14998 emit_label (label
);
14999 LABEL_NUSES (label
) = 1;
15002 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
15003 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
15004 epilogue_size_needed
);
15005 if (jump_around_label
)
15006 emit_label (jump_around_label
);
15010 /* Helper function for memcpy. For QImode value 0xXY produce
15011 0xXYXYXYXY of wide specified by MODE. This is essentially
15012 a * 0x10101010, but we can do slightly better than
15013 synth_mult by unwinding the sequence by hand on CPUs with
15016 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
15018 enum machine_mode valmode
= GET_MODE (val
);
15020 int nops
= mode
== DImode
? 3 : 2;
15022 gcc_assert (mode
== SImode
|| mode
== DImode
);
15023 if (val
== const0_rtx
)
15024 return copy_to_mode_reg (mode
, const0_rtx
);
15025 if (CONST_INT_P (val
))
15027 HOST_WIDE_INT v
= INTVAL (val
) & 255;
15031 if (mode
== DImode
)
15032 v
|= (v
<< 16) << 16;
15033 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
15036 if (valmode
== VOIDmode
)
15038 if (valmode
!= QImode
)
15039 val
= gen_lowpart (QImode
, val
);
15040 if (mode
== QImode
)
15042 if (!TARGET_PARTIAL_REG_STALL
)
15044 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
15045 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
15046 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
15047 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
15049 rtx reg
= convert_modes (mode
, QImode
, val
, true);
15050 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
15051 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
15056 rtx reg
= convert_modes (mode
, QImode
, val
, true);
15058 if (!TARGET_PARTIAL_REG_STALL
)
15059 if (mode
== SImode
)
15060 emit_insn (gen_movsi_insv_1 (reg
, reg
));
15062 emit_insn (gen_movdi_insv_1_rex64 (reg
, reg
));
15065 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
15066 NULL
, 1, OPTAB_DIRECT
);
15068 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
15070 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
15071 NULL
, 1, OPTAB_DIRECT
);
15072 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
15073 if (mode
== SImode
)
15075 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
15076 NULL
, 1, OPTAB_DIRECT
);
15077 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
15082 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
15083 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
15084 alignment from ALIGN to DESIRED_ALIGN. */
15086 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
, int align
)
15091 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
15092 promoted_val
= promote_duplicated_reg (DImode
, val
);
15093 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
15094 promoted_val
= promote_duplicated_reg (SImode
, val
);
15095 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
15096 promoted_val
= promote_duplicated_reg (HImode
, val
);
15098 promoted_val
= val
;
15100 return promoted_val
;
15103 /* Expand string clear operation (bzero). Use i386 string operations when
15104 profitable. See expand_movmem comment for explanation of individual
15105 steps performed. */
15107 ix86_expand_setmem (rtx dst
, rtx count_exp
, rtx val_exp
, rtx align_exp
,
15108 rtx expected_align_exp
, rtx expected_size_exp
)
15113 rtx jump_around_label
= NULL
;
15114 HOST_WIDE_INT align
= 1;
15115 unsigned HOST_WIDE_INT count
= 0;
15116 HOST_WIDE_INT expected_size
= -1;
15117 int size_needed
= 0, epilogue_size_needed
;
15118 int desired_align
= 0;
15119 enum stringop_alg alg
;
15120 rtx promoted_val
= NULL
;
15121 bool force_loopy_epilogue
= false;
15124 if (CONST_INT_P (align_exp
))
15125 align
= INTVAL (align_exp
);
15126 /* i386 can do misaligned access on reasonably increased cost. */
15127 if (CONST_INT_P (expected_align_exp
)
15128 && INTVAL (expected_align_exp
) > align
)
15129 align
= INTVAL (expected_align_exp
);
15130 if (CONST_INT_P (count_exp
))
15131 count
= expected_size
= INTVAL (count_exp
);
15132 if (CONST_INT_P (expected_size_exp
) && count
== 0)
15133 expected_size
= INTVAL (expected_size_exp
);
15135 /* Step 0: Decide on preferred algorithm, desired alignment and
15136 size of chunks to be copied by main loop. */
15138 alg
= decide_alg (count
, expected_size
, true, &dynamic_check
);
15139 desired_align
= decide_alignment (align
, alg
, expected_size
);
15141 if (!TARGET_ALIGN_STRINGOPS
)
15142 align
= desired_align
;
15144 if (alg
== libcall
)
15146 gcc_assert (alg
!= no_stringop
);
15148 count_exp
= copy_to_mode_reg (counter_mode (count_exp
), count_exp
);
15149 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
15154 gcc_unreachable ();
15156 size_needed
= GET_MODE_SIZE (Pmode
);
15158 case unrolled_loop
:
15159 size_needed
= GET_MODE_SIZE (Pmode
) * 4;
15161 case rep_prefix_8_byte
:
15164 case rep_prefix_4_byte
:
15167 case rep_prefix_1_byte
:
15172 epilogue_size_needed
= size_needed
;
15174 /* Step 1: Prologue guard. */
15176 /* Alignment code needs count to be in register. */
15177 if (CONST_INT_P (count_exp
) && desired_align
> align
)
15179 enum machine_mode mode
= SImode
;
15180 if (TARGET_64BIT
&& (count
& ~0xffffffff))
15182 count_exp
= force_reg (mode
, count_exp
);
15184 /* Do the cheap promotion to allow better CSE across the
15185 main loop and epilogue (ie one load of the big constant in the
15186 front of all code. */
15187 if (CONST_INT_P (val_exp
))
15188 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
15189 desired_align
, align
);
15190 /* Ensure that alignment prologue won't copy past end of block. */
15191 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
15193 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
15194 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15195 Make sure it is power of 2. */
15196 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
15198 /* To improve performance of small blocks, we jump around the VAL
15199 promoting mode. This mean that if the promoted VAL is not constant,
15200 we might not use it in the epilogue and have to use byte
15202 if (epilogue_size_needed
> 2 && !promoted_val
)
15203 force_loopy_epilogue
= true;
15204 label
= gen_label_rtx ();
15205 emit_cmp_and_jump_insns (count_exp
,
15206 GEN_INT (epilogue_size_needed
),
15207 LTU
, 0, counter_mode (count_exp
), 1, label
);
15208 if (GET_CODE (count_exp
) == CONST_INT
)
15210 else if (expected_size
== -1 || expected_size
<= epilogue_size_needed
)
15211 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
15213 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
15215 if (dynamic_check
!= -1)
15217 rtx hot_label
= gen_label_rtx ();
15218 jump_around_label
= gen_label_rtx ();
15219 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
15220 LEU
, 0, counter_mode (count_exp
), 1, hot_label
);
15221 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
15222 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
15223 emit_jump (jump_around_label
);
15224 emit_label (hot_label
);
15227 /* Step 2: Alignment prologue. */
15229 /* Do the expensive promotion once we branched off the small blocks. */
15231 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
15232 desired_align
, align
);
15233 gcc_assert (desired_align
>= 1 && align
>= 1);
15235 if (desired_align
> align
)
15237 /* Except for the first move in epilogue, we no longer know
15238 constant offset in aliasing info. It don't seems to worth
15239 the pain to maintain it for the first move, so throw away
15241 dst
= change_address (dst
, BLKmode
, destreg
);
15242 expand_setmem_prologue (dst
, destreg
, promoted_val
, count_exp
, align
,
15245 if (label
&& size_needed
== 1)
15247 emit_label (label
);
15248 LABEL_NUSES (label
) = 1;
15252 /* Step 3: Main loop. */
15258 gcc_unreachable ();
15260 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
15261 count_exp
, QImode
, 1, expected_size
);
15264 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
15265 count_exp
, Pmode
, 1, expected_size
);
15267 case unrolled_loop
:
15268 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
15269 count_exp
, Pmode
, 4, expected_size
);
15271 case rep_prefix_8_byte
:
15272 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
15275 case rep_prefix_4_byte
:
15276 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
15279 case rep_prefix_1_byte
:
15280 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
15284 /* Adjust properly the offset of src and dest memory for aliasing. */
15285 if (CONST_INT_P (count_exp
))
15286 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
15287 (count
/ size_needed
) * size_needed
);
15289 dst
= change_address (dst
, BLKmode
, destreg
);
15291 /* Step 4: Epilogue to copy the remaining bytes. */
15295 /* When the main loop is done, COUNT_EXP might hold original count,
15296 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15297 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15298 bytes. Compensate if needed. */
15300 if (size_needed
< desired_align
- align
)
15303 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
15304 GEN_INT (size_needed
- 1), count_exp
, 1,
15306 size_needed
= desired_align
- align
+ 1;
15307 if (tmp
!= count_exp
)
15308 emit_move_insn (count_exp
, tmp
);
15310 emit_label (label
);
15311 LABEL_NUSES (label
) = 1;
15313 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
15315 if (force_loopy_epilogue
)
15316 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
15319 expand_setmem_epilogue (dst
, destreg
, promoted_val
, count_exp
,
15322 if (jump_around_label
)
15323 emit_label (jump_around_label
);
15327 /* Expand the appropriate insns for doing strlen if not just doing
15330 out = result, initialized with the start address
15331 align_rtx = alignment of the address.
15332 scratch = scratch register, initialized with the startaddress when
15333 not aligned, otherwise undefined
15335 This is just the body. It needs the initializations mentioned above and
15336 some address computing at the end. These things are done in i386.md. */
15339 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
15343 rtx align_2_label
= NULL_RTX
;
15344 rtx align_3_label
= NULL_RTX
;
15345 rtx align_4_label
= gen_label_rtx ();
15346 rtx end_0_label
= gen_label_rtx ();
15348 rtx tmpreg
= gen_reg_rtx (SImode
);
15349 rtx scratch
= gen_reg_rtx (SImode
);
15353 if (CONST_INT_P (align_rtx
))
15354 align
= INTVAL (align_rtx
);
15356 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
15358 /* Is there a known alignment and is it less than 4? */
15361 rtx scratch1
= gen_reg_rtx (Pmode
);
15362 emit_move_insn (scratch1
, out
);
15363 /* Is there a known alignment and is it not 2? */
15366 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
15367 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
15369 /* Leave just the 3 lower bits. */
15370 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
15371 NULL_RTX
, 0, OPTAB_WIDEN
);
15373 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
15374 Pmode
, 1, align_4_label
);
15375 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
15376 Pmode
, 1, align_2_label
);
15377 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
15378 Pmode
, 1, align_3_label
);
15382 /* Since the alignment is 2, we have to check 2 or 0 bytes;
15383 check if is aligned to 4 - byte. */
15385 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
15386 NULL_RTX
, 0, OPTAB_WIDEN
);
15388 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
15389 Pmode
, 1, align_4_label
);
15392 mem
= change_address (src
, QImode
, out
);
15394 /* Now compare the bytes. */
15396 /* Compare the first n unaligned byte on a byte per byte basis. */
15397 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
15398 QImode
, 1, end_0_label
);
15400 /* Increment the address. */
15402 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
15404 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
15406 /* Not needed with an alignment of 2 */
15409 emit_label (align_2_label
);
15411 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
15415 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
15417 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
15419 emit_label (align_3_label
);
15422 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
15426 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
15428 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
15431 /* Generate loop to check 4 bytes at a time. It is not a good idea to
15432 align this loop. It gives only huge programs, but does not help to
15434 emit_label (align_4_label
);
15436 mem
= change_address (src
, SImode
, out
);
15437 emit_move_insn (scratch
, mem
);
15439 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
15441 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
15443 /* This formula yields a nonzero result iff one of the bytes is zero.
15444 This saves three branches inside loop and many cycles. */
15446 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
15447 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
15448 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
15449 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
15450 gen_int_mode (0x80808080, SImode
)));
15451 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
15456 rtx reg
= gen_reg_rtx (SImode
);
15457 rtx reg2
= gen_reg_rtx (Pmode
);
15458 emit_move_insn (reg
, tmpreg
);
15459 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
15461 /* If zero is not in the first two bytes, move two bytes forward. */
15462 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
15463 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
15464 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
15465 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
15466 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
15469 /* Emit lea manually to avoid clobbering of flags. */
15470 emit_insn (gen_rtx_SET (SImode
, reg2
,
15471 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
15473 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
15474 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
15475 emit_insn (gen_rtx_SET (VOIDmode
, out
,
15476 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
15483 rtx end_2_label
= gen_label_rtx ();
15484 /* Is zero in the first two bytes? */
15486 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
15487 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
15488 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
15489 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
15490 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
15492 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
15493 JUMP_LABEL (tmp
) = end_2_label
;
15495 /* Not in the first two. Move two bytes forward. */
15496 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
15498 emit_insn (gen_adddi3 (out
, out
, const2_rtx
));
15500 emit_insn (gen_addsi3 (out
, out
, const2_rtx
));
15502 emit_label (end_2_label
);
15506 /* Avoid branch in fixing the byte. */
15507 tmpreg
= gen_lowpart (QImode
, tmpreg
);
15508 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
15509 cmp
= gen_rtx_LTU (Pmode
, gen_rtx_REG (CCmode
, FLAGS_REG
), const0_rtx
);
15511 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3), cmp
));
15513 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3), cmp
));
15515 emit_label (end_0_label
);
15518 /* Expand strlen. */
15521 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
15523 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
15525 /* The generic case of strlen expander is long. Avoid it's
15526 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
15528 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
15529 && !TARGET_INLINE_ALL_STRINGOPS
15531 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
15534 addr
= force_reg (Pmode
, XEXP (src
, 0));
15535 scratch1
= gen_reg_rtx (Pmode
);
15537 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
15540 /* Well it seems that some optimizer does not combine a call like
15541 foo(strlen(bar), strlen(bar));
15542 when the move and the subtraction is done here. It does calculate
15543 the length just once when these instructions are done inside of
15544 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
15545 often used and I use one fewer register for the lifetime of
15546 output_strlen_unroll() this is better. */
15548 emit_move_insn (out
, addr
);
15550 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
15552 /* strlensi_unroll_1 returns the address of the zero at the end of
15553 the string, like memchr(), so compute the length by subtracting
15554 the start address. */
15556 emit_insn (gen_subdi3 (out
, out
, addr
));
15558 emit_insn (gen_subsi3 (out
, out
, addr
));
15563 scratch2
= gen_reg_rtx (Pmode
);
15564 scratch3
= gen_reg_rtx (Pmode
);
15565 scratch4
= force_reg (Pmode
, constm1_rtx
);
15567 emit_move_insn (scratch3
, addr
);
15568 eoschar
= force_reg (QImode
, eoschar
);
15570 src
= replace_equiv_address_nv (src
, scratch3
);
15572 /* If .md starts supporting :P, this can be done in .md. */
15573 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
15574 scratch4
), UNSPEC_SCAS
);
15575 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
15578 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
15579 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
15583 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
15584 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
15590 /* For given symbol (function) construct code to compute address of it's PLT
15591 entry in large x86-64 PIC model. */
15593 construct_plt_address (rtx symbol
)
15595 rtx tmp
= gen_reg_rtx (Pmode
);
15596 rtx unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, symbol
), UNSPEC_PLTOFF
);
15598 gcc_assert (GET_CODE (symbol
) == SYMBOL_REF
);
15599 gcc_assert (ix86_cmodel
== CM_LARGE_PIC
);
15601 emit_move_insn (tmp
, gen_rtx_CONST (Pmode
, unspec
));
15602 emit_insn (gen_adddi3 (tmp
, tmp
, pic_offset_table_rtx
));
15607 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
15608 rtx callarg2 ATTRIBUTE_UNUSED
,
15609 rtx pop
, int sibcall
)
15611 rtx use
= NULL
, call
;
15613 if (pop
== const0_rtx
)
15615 gcc_assert (!TARGET_64BIT
|| !pop
);
15617 if (TARGET_MACHO
&& !TARGET_64BIT
)
15620 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
15621 fnaddr
= machopic_indirect_call_target (fnaddr
);
15626 /* Static functions and indirect calls don't need the pic register. */
15627 if (flag_pic
&& (!TARGET_64BIT
|| ix86_cmodel
== CM_LARGE_PIC
)
15628 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
15629 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
15630 use_reg (&use
, pic_offset_table_rtx
);
15633 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
15635 rtx al
= gen_rtx_REG (QImode
, 0);
15636 emit_move_insn (al
, callarg2
);
15637 use_reg (&use
, al
);
15640 if (ix86_cmodel
== CM_LARGE_PIC
15641 && GET_CODE (fnaddr
) == MEM
15642 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
15643 && !local_symbolic_operand (XEXP (fnaddr
, 0), VOIDmode
))
15644 fnaddr
= gen_rtx_MEM (QImode
, construct_plt_address (XEXP (fnaddr
, 0)));
15645 else if (! call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
15647 fnaddr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
15648 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
15650 if (sibcall
&& TARGET_64BIT
15651 && !constant_call_address_operand (XEXP (fnaddr
, 0), Pmode
))
15654 addr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
15655 fnaddr
= gen_rtx_REG (Pmode
, R11_REG
);
15656 emit_move_insn (fnaddr
, addr
);
15657 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
15660 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
15662 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
15665 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
15666 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
15667 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, call
, pop
));
15670 call
= emit_call_insn (call
);
15672 CALL_INSN_FUNCTION_USAGE (call
) = use
;
15676 /* Clear stack slot assignments remembered from previous functions.
15677 This is called from INIT_EXPANDERS once before RTL is emitted for each
15680 static struct machine_function
*
15681 ix86_init_machine_status (void)
15683 struct machine_function
*f
;
15685 f
= GGC_CNEW (struct machine_function
);
15686 f
->use_fast_prologue_epilogue_nregs
= -1;
15687 f
->tls_descriptor_call_expanded_p
= 0;
15692 /* Return a MEM corresponding to a stack slot with mode MODE.
15693 Allocate a new slot if necessary.
15695 The RTL for a function can have several slots available: N is
15696 which slot to use. */
15699 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
15701 struct stack_local_entry
*s
;
15703 gcc_assert (n
< MAX_386_STACK_LOCALS
);
15705 /* Virtual slot is valid only before vregs are instantiated. */
15706 gcc_assert ((n
== SLOT_VIRTUAL
) == !virtuals_instantiated
);
15708 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
15709 if (s
->mode
== mode
&& s
->n
== n
)
15710 return copy_rtx (s
->rtl
);
15712 s
= (struct stack_local_entry
*)
15713 ggc_alloc (sizeof (struct stack_local_entry
));
15716 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
15718 s
->next
= ix86_stack_locals
;
15719 ix86_stack_locals
= s
;
15723 /* Construct the SYMBOL_REF for the tls_get_addr function. */
15725 static GTY(()) rtx ix86_tls_symbol
;
15727 ix86_tls_get_addr (void)
15730 if (!ix86_tls_symbol
)
15732 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
15733 (TARGET_ANY_GNU_TLS
15735 ? "___tls_get_addr"
15736 : "__tls_get_addr");
15739 return ix86_tls_symbol
;
15742 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15744 static GTY(()) rtx ix86_tls_module_base_symbol
;
15746 ix86_tls_module_base (void)
15749 if (!ix86_tls_module_base_symbol
)
15751 ix86_tls_module_base_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
15752 "_TLS_MODULE_BASE_");
15753 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
15754 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
15757 return ix86_tls_module_base_symbol
;
15760 /* Calculate the length of the memory address in the instruction
15761 encoding. Does not include the one-byte modrm, opcode, or prefix. */
15764 memory_address_length (rtx addr
)
15766 struct ix86_address parts
;
15767 rtx base
, index
, disp
;
15771 if (GET_CODE (addr
) == PRE_DEC
15772 || GET_CODE (addr
) == POST_INC
15773 || GET_CODE (addr
) == PRE_MODIFY
15774 || GET_CODE (addr
) == POST_MODIFY
)
15777 ok
= ix86_decompose_address (addr
, &parts
);
15780 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
15781 parts
.base
= SUBREG_REG (parts
.base
);
15782 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
15783 parts
.index
= SUBREG_REG (parts
.index
);
15786 index
= parts
.index
;
15791 - esp as the base always wants an index,
15792 - ebp as the base always wants a displacement. */
15794 /* Register Indirect. */
15795 if (base
&& !index
&& !disp
)
15797 /* esp (for its index) and ebp (for its displacement) need
15798 the two-byte modrm form. */
15799 if (addr
== stack_pointer_rtx
15800 || addr
== arg_pointer_rtx
15801 || addr
== frame_pointer_rtx
15802 || addr
== hard_frame_pointer_rtx
)
15806 /* Direct Addressing. */
15807 else if (disp
&& !base
&& !index
)
15812 /* Find the length of the displacement constant. */
15815 if (base
&& satisfies_constraint_K (disp
))
15820 /* ebp always wants a displacement. */
15821 else if (base
== hard_frame_pointer_rtx
)
15824 /* An index requires the two-byte modrm form.... */
15826 /* ...like esp, which always wants an index. */
15827 || base
== stack_pointer_rtx
15828 || base
== arg_pointer_rtx
15829 || base
== frame_pointer_rtx
)
15836 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15837 is set, expect that insn have 8bit immediate alternative. */
15839 ix86_attr_length_immediate_default (rtx insn
, int shortform
)
15843 extract_insn_cached (insn
);
15844 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
15845 if (CONSTANT_P (recog_data
.operand
[i
]))
15848 if (shortform
&& satisfies_constraint_K (recog_data
.operand
[i
]))
15852 switch (get_attr_mode (insn
))
15863 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
15868 fatal_insn ("unknown insn mode", insn
);
15874 /* Compute default value for "length_address" attribute. */
15876 ix86_attr_length_address_default (rtx insn
)
15880 if (get_attr_type (insn
) == TYPE_LEA
)
15882 rtx set
= PATTERN (insn
);
15884 if (GET_CODE (set
) == PARALLEL
)
15885 set
= XVECEXP (set
, 0, 0);
15887 gcc_assert (GET_CODE (set
) == SET
);
15889 return memory_address_length (SET_SRC (set
));
15892 extract_insn_cached (insn
);
15893 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
15894 if (MEM_P (recog_data
.operand
[i
]))
15896 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
15902 /* Return the maximum number of instructions a cpu can issue. */
15905 ix86_issue_rate (void)
15909 case PROCESSOR_PENTIUM
:
15913 case PROCESSOR_PENTIUMPRO
:
15914 case PROCESSOR_PENTIUM4
:
15915 case PROCESSOR_ATHLON
:
15917 case PROCESSOR_AMDFAM10
:
15918 case PROCESSOR_NOCONA
:
15919 case PROCESSOR_GENERIC32
:
15920 case PROCESSOR_GENERIC64
:
15923 case PROCESSOR_CORE2
:
15931 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
15932 by DEP_INSN and nothing set by DEP_INSN. */
15935 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
15939 /* Simplify the test for uninteresting insns. */
15940 if (insn_type
!= TYPE_SETCC
15941 && insn_type
!= TYPE_ICMOV
15942 && insn_type
!= TYPE_FCMOV
15943 && insn_type
!= TYPE_IBR
)
15946 if ((set
= single_set (dep_insn
)) != 0)
15948 set
= SET_DEST (set
);
15951 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
15952 && XVECLEN (PATTERN (dep_insn
), 0) == 2
15953 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
15954 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
15956 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
15957 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
15962 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
15965 /* This test is true if the dependent insn reads the flags but
15966 not any other potentially set register. */
15967 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
15970 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
15976 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
15977 address with operands set by DEP_INSN. */
15980 ix86_agi_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
15984 if (insn_type
== TYPE_LEA
15987 addr
= PATTERN (insn
);
15989 if (GET_CODE (addr
) == PARALLEL
)
15990 addr
= XVECEXP (addr
, 0, 0);
15992 gcc_assert (GET_CODE (addr
) == SET
);
15994 addr
= SET_SRC (addr
);
15999 extract_insn_cached (insn
);
16000 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
16001 if (MEM_P (recog_data
.operand
[i
]))
16003 addr
= XEXP (recog_data
.operand
[i
], 0);
16010 return modified_in_p (addr
, dep_insn
);
16014 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
16016 enum attr_type insn_type
, dep_insn_type
;
16017 enum attr_memory memory
;
16019 int dep_insn_code_number
;
16021 /* Anti and output dependencies have zero cost on all CPUs. */
16022 if (REG_NOTE_KIND (link
) != 0)
16025 dep_insn_code_number
= recog_memoized (dep_insn
);
16027 /* If we can't recognize the insns, we can't really do anything. */
16028 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
16031 insn_type
= get_attr_type (insn
);
16032 dep_insn_type
= get_attr_type (dep_insn
);
16036 case PROCESSOR_PENTIUM
:
16037 /* Address Generation Interlock adds a cycle of latency. */
16038 if (ix86_agi_dependent (insn
, dep_insn
, insn_type
))
16041 /* ??? Compares pair with jump/setcc. */
16042 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
16045 /* Floating point stores require value to be ready one cycle earlier. */
16046 if (insn_type
== TYPE_FMOV
16047 && get_attr_memory (insn
) == MEMORY_STORE
16048 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
16052 case PROCESSOR_PENTIUMPRO
:
16053 memory
= get_attr_memory (insn
);
16055 /* INT->FP conversion is expensive. */
16056 if (get_attr_fp_int_src (dep_insn
))
16059 /* There is one cycle extra latency between an FP op and a store. */
16060 if (insn_type
== TYPE_FMOV
16061 && (set
= single_set (dep_insn
)) != NULL_RTX
16062 && (set2
= single_set (insn
)) != NULL_RTX
16063 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
16064 && MEM_P (SET_DEST (set2
)))
16067 /* Show ability of reorder buffer to hide latency of load by executing
16068 in parallel with previous instruction in case
16069 previous instruction is not needed to compute the address. */
16070 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
16071 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
16073 /* Claim moves to take one cycle, as core can issue one load
16074 at time and the next load can start cycle later. */
16075 if (dep_insn_type
== TYPE_IMOV
16076 || dep_insn_type
== TYPE_FMOV
)
16084 memory
= get_attr_memory (insn
);
16086 /* The esp dependency is resolved before the instruction is really
16088 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
16089 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
16092 /* INT->FP conversion is expensive. */
16093 if (get_attr_fp_int_src (dep_insn
))
16096 /* Show ability of reorder buffer to hide latency of load by executing
16097 in parallel with previous instruction in case
16098 previous instruction is not needed to compute the address. */
16099 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
16100 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
16102 /* Claim moves to take one cycle, as core can issue one load
16103 at time and the next load can start cycle later. */
16104 if (dep_insn_type
== TYPE_IMOV
16105 || dep_insn_type
== TYPE_FMOV
)
16114 case PROCESSOR_ATHLON
:
16116 case PROCESSOR_AMDFAM10
:
16117 case PROCESSOR_GENERIC32
:
16118 case PROCESSOR_GENERIC64
:
16119 memory
= get_attr_memory (insn
);
16121 /* Show ability of reorder buffer to hide latency of load by executing
16122 in parallel with previous instruction in case
16123 previous instruction is not needed to compute the address. */
16124 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
16125 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
16127 enum attr_unit unit
= get_attr_unit (insn
);
16130 /* Because of the difference between the length of integer and
16131 floating unit pipeline preparation stages, the memory operands
16132 for floating point are cheaper.
16134 ??? For Athlon it the difference is most probably 2. */
16135 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
16138 loadcost
= TARGET_ATHLON
? 2 : 0;
16140 if (cost
>= loadcost
)
16153 /* How many alternative schedules to try. This should be as wide as the
16154 scheduling freedom in the DFA, but no wider. Making this value too
16155 large results extra work for the scheduler. */
16158 ia32_multipass_dfa_lookahead (void)
16160 if (ix86_tune
== PROCESSOR_PENTIUM
)
16163 if (ix86_tune
== PROCESSOR_PENTIUMPRO
16164 || ix86_tune
== PROCESSOR_K6
)
16172 /* Compute the alignment given to a constant that is being placed in memory.
16173 EXP is the constant and ALIGN is the alignment that the object would
16175 The value of this function is used instead of that alignment to align
16179 ix86_constant_alignment (tree exp
, int align
)
16181 if (TREE_CODE (exp
) == REAL_CST
)
16183 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
16185 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
16188 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
16189 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
16190 return BITS_PER_WORD
;
16195 /* Compute the alignment for a static variable.
16196 TYPE is the data type, and ALIGN is the alignment that
16197 the object would ordinarily have. The value of this function is used
16198 instead of that alignment to align the object. */
16201 ix86_data_alignment (tree type
, int align
)
16203 int max_align
= optimize_size
? BITS_PER_WORD
: MIN (256, MAX_OFILE_ALIGNMENT
);
16205 if (AGGREGATE_TYPE_P (type
)
16206 && TYPE_SIZE (type
)
16207 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
16208 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
16209 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
16210 && align
< max_align
)
16213 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16214 to 16byte boundary. */
16217 if (AGGREGATE_TYPE_P (type
)
16218 && TYPE_SIZE (type
)
16219 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
16220 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
16221 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
16225 if (TREE_CODE (type
) == ARRAY_TYPE
)
16227 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
16229 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
16232 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
16235 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
16237 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
16240 else if ((TREE_CODE (type
) == RECORD_TYPE
16241 || TREE_CODE (type
) == UNION_TYPE
16242 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
16243 && TYPE_FIELDS (type
))
16245 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
16247 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
16250 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
16251 || TREE_CODE (type
) == INTEGER_TYPE
)
16253 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
16255 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
16262 /* Compute the alignment for a local variable.
16263 TYPE is the data type, and ALIGN is the alignment that
16264 the object would ordinarily have. The value of this macro is used
16265 instead of that alignment to align the object. */
16268 ix86_local_alignment (tree type
, int align
)
16270 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16271 to 16byte boundary. */
16274 if (AGGREGATE_TYPE_P (type
)
16275 && TYPE_SIZE (type
)
16276 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
16277 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
16278 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
16281 if (TREE_CODE (type
) == ARRAY_TYPE
)
16283 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
16285 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
16288 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
16290 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
16292 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
16295 else if ((TREE_CODE (type
) == RECORD_TYPE
16296 || TREE_CODE (type
) == UNION_TYPE
16297 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
16298 && TYPE_FIELDS (type
))
16300 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
16302 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
16305 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
16306 || TREE_CODE (type
) == INTEGER_TYPE
)
16309 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
16311 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
16317 /* Emit RTL insns to initialize the variable parts of a trampoline.
16318 FNADDR is an RTX for the address of the function's pure code.
16319 CXT is an RTX for the static chain value for the function. */
16321 x86_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
16325 /* Compute offset from the end of the jmp to the target function. */
16326 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
16327 plus_constant (tramp
, 10),
16328 NULL_RTX
, 1, OPTAB_DIRECT
);
16329 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
16330 gen_int_mode (0xb9, QImode
));
16331 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
16332 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
16333 gen_int_mode (0xe9, QImode
));
16334 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
16339 /* Try to load address using shorter movl instead of movabs.
16340 We may want to support movq for kernel mode, but kernel does not use
16341 trampolines at the moment. */
16342 if (x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
16344 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
16345 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
16346 gen_int_mode (0xbb41, HImode
));
16347 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
16348 gen_lowpart (SImode
, fnaddr
));
16353 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
16354 gen_int_mode (0xbb49, HImode
));
16355 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
16359 /* Load static chain using movabs to r10. */
16360 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
16361 gen_int_mode (0xba49, HImode
));
16362 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
16365 /* Jump to the r11 */
16366 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
16367 gen_int_mode (0xff49, HImode
));
16368 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
16369 gen_int_mode (0xe3, QImode
));
16371 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
16374 #ifdef ENABLE_EXECUTE_STACK
16375 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
16376 LCT_NORMAL
, VOIDmode
, 1, tramp
, Pmode
);
16380 /* Codes for all the SSE/MMX builtins. */
16383 IX86_BUILTIN_ADDPS
,
16384 IX86_BUILTIN_ADDSS
,
16385 IX86_BUILTIN_DIVPS
,
16386 IX86_BUILTIN_DIVSS
,
16387 IX86_BUILTIN_MULPS
,
16388 IX86_BUILTIN_MULSS
,
16389 IX86_BUILTIN_SUBPS
,
16390 IX86_BUILTIN_SUBSS
,
16392 IX86_BUILTIN_CMPEQPS
,
16393 IX86_BUILTIN_CMPLTPS
,
16394 IX86_BUILTIN_CMPLEPS
,
16395 IX86_BUILTIN_CMPGTPS
,
16396 IX86_BUILTIN_CMPGEPS
,
16397 IX86_BUILTIN_CMPNEQPS
,
16398 IX86_BUILTIN_CMPNLTPS
,
16399 IX86_BUILTIN_CMPNLEPS
,
16400 IX86_BUILTIN_CMPNGTPS
,
16401 IX86_BUILTIN_CMPNGEPS
,
16402 IX86_BUILTIN_CMPORDPS
,
16403 IX86_BUILTIN_CMPUNORDPS
,
16404 IX86_BUILTIN_CMPEQSS
,
16405 IX86_BUILTIN_CMPLTSS
,
16406 IX86_BUILTIN_CMPLESS
,
16407 IX86_BUILTIN_CMPNEQSS
,
16408 IX86_BUILTIN_CMPNLTSS
,
16409 IX86_BUILTIN_CMPNLESS
,
16410 IX86_BUILTIN_CMPNGTSS
,
16411 IX86_BUILTIN_CMPNGESS
,
16412 IX86_BUILTIN_CMPORDSS
,
16413 IX86_BUILTIN_CMPUNORDSS
,
16415 IX86_BUILTIN_COMIEQSS
,
16416 IX86_BUILTIN_COMILTSS
,
16417 IX86_BUILTIN_COMILESS
,
16418 IX86_BUILTIN_COMIGTSS
,
16419 IX86_BUILTIN_COMIGESS
,
16420 IX86_BUILTIN_COMINEQSS
,
16421 IX86_BUILTIN_UCOMIEQSS
,
16422 IX86_BUILTIN_UCOMILTSS
,
16423 IX86_BUILTIN_UCOMILESS
,
16424 IX86_BUILTIN_UCOMIGTSS
,
16425 IX86_BUILTIN_UCOMIGESS
,
16426 IX86_BUILTIN_UCOMINEQSS
,
16428 IX86_BUILTIN_CVTPI2PS
,
16429 IX86_BUILTIN_CVTPS2PI
,
16430 IX86_BUILTIN_CVTSI2SS
,
16431 IX86_BUILTIN_CVTSI642SS
,
16432 IX86_BUILTIN_CVTSS2SI
,
16433 IX86_BUILTIN_CVTSS2SI64
,
16434 IX86_BUILTIN_CVTTPS2PI
,
16435 IX86_BUILTIN_CVTTSS2SI
,
16436 IX86_BUILTIN_CVTTSS2SI64
,
16438 IX86_BUILTIN_MAXPS
,
16439 IX86_BUILTIN_MAXSS
,
16440 IX86_BUILTIN_MINPS
,
16441 IX86_BUILTIN_MINSS
,
16443 IX86_BUILTIN_LOADUPS
,
16444 IX86_BUILTIN_STOREUPS
,
16445 IX86_BUILTIN_MOVSS
,
16447 IX86_BUILTIN_MOVHLPS
,
16448 IX86_BUILTIN_MOVLHPS
,
16449 IX86_BUILTIN_LOADHPS
,
16450 IX86_BUILTIN_LOADLPS
,
16451 IX86_BUILTIN_STOREHPS
,
16452 IX86_BUILTIN_STORELPS
,
16454 IX86_BUILTIN_MASKMOVQ
,
16455 IX86_BUILTIN_MOVMSKPS
,
16456 IX86_BUILTIN_PMOVMSKB
,
16458 IX86_BUILTIN_MOVNTPS
,
16459 IX86_BUILTIN_MOVNTQ
,
16461 IX86_BUILTIN_LOADDQU
,
16462 IX86_BUILTIN_STOREDQU
,
16464 IX86_BUILTIN_PACKSSWB
,
16465 IX86_BUILTIN_PACKSSDW
,
16466 IX86_BUILTIN_PACKUSWB
,
16468 IX86_BUILTIN_PADDB
,
16469 IX86_BUILTIN_PADDW
,
16470 IX86_BUILTIN_PADDD
,
16471 IX86_BUILTIN_PADDQ
,
16472 IX86_BUILTIN_PADDSB
,
16473 IX86_BUILTIN_PADDSW
,
16474 IX86_BUILTIN_PADDUSB
,
16475 IX86_BUILTIN_PADDUSW
,
16476 IX86_BUILTIN_PSUBB
,
16477 IX86_BUILTIN_PSUBW
,
16478 IX86_BUILTIN_PSUBD
,
16479 IX86_BUILTIN_PSUBQ
,
16480 IX86_BUILTIN_PSUBSB
,
16481 IX86_BUILTIN_PSUBSW
,
16482 IX86_BUILTIN_PSUBUSB
,
16483 IX86_BUILTIN_PSUBUSW
,
16486 IX86_BUILTIN_PANDN
,
16490 IX86_BUILTIN_PAVGB
,
16491 IX86_BUILTIN_PAVGW
,
16493 IX86_BUILTIN_PCMPEQB
,
16494 IX86_BUILTIN_PCMPEQW
,
16495 IX86_BUILTIN_PCMPEQD
,
16496 IX86_BUILTIN_PCMPGTB
,
16497 IX86_BUILTIN_PCMPGTW
,
16498 IX86_BUILTIN_PCMPGTD
,
16500 IX86_BUILTIN_PMADDWD
,
16502 IX86_BUILTIN_PMAXSW
,
16503 IX86_BUILTIN_PMAXUB
,
16504 IX86_BUILTIN_PMINSW
,
16505 IX86_BUILTIN_PMINUB
,
16507 IX86_BUILTIN_PMULHUW
,
16508 IX86_BUILTIN_PMULHW
,
16509 IX86_BUILTIN_PMULLW
,
16511 IX86_BUILTIN_PSADBW
,
16512 IX86_BUILTIN_PSHUFW
,
16514 IX86_BUILTIN_PSLLW
,
16515 IX86_BUILTIN_PSLLD
,
16516 IX86_BUILTIN_PSLLQ
,
16517 IX86_BUILTIN_PSRAW
,
16518 IX86_BUILTIN_PSRAD
,
16519 IX86_BUILTIN_PSRLW
,
16520 IX86_BUILTIN_PSRLD
,
16521 IX86_BUILTIN_PSRLQ
,
16522 IX86_BUILTIN_PSLLWI
,
16523 IX86_BUILTIN_PSLLDI
,
16524 IX86_BUILTIN_PSLLQI
,
16525 IX86_BUILTIN_PSRAWI
,
16526 IX86_BUILTIN_PSRADI
,
16527 IX86_BUILTIN_PSRLWI
,
16528 IX86_BUILTIN_PSRLDI
,
16529 IX86_BUILTIN_PSRLQI
,
16531 IX86_BUILTIN_PUNPCKHBW
,
16532 IX86_BUILTIN_PUNPCKHWD
,
16533 IX86_BUILTIN_PUNPCKHDQ
,
16534 IX86_BUILTIN_PUNPCKLBW
,
16535 IX86_BUILTIN_PUNPCKLWD
,
16536 IX86_BUILTIN_PUNPCKLDQ
,
16538 IX86_BUILTIN_SHUFPS
,
16540 IX86_BUILTIN_RCPPS
,
16541 IX86_BUILTIN_RCPSS
,
16542 IX86_BUILTIN_RSQRTPS
,
16543 IX86_BUILTIN_RSQRTSS
,
16544 IX86_BUILTIN_RSQRTF
,
16545 IX86_BUILTIN_SQRTPS
,
16546 IX86_BUILTIN_SQRTSS
,
16548 IX86_BUILTIN_UNPCKHPS
,
16549 IX86_BUILTIN_UNPCKLPS
,
16551 IX86_BUILTIN_ANDPS
,
16552 IX86_BUILTIN_ANDNPS
,
16554 IX86_BUILTIN_XORPS
,
16557 IX86_BUILTIN_LDMXCSR
,
16558 IX86_BUILTIN_STMXCSR
,
16559 IX86_BUILTIN_SFENCE
,
16561 /* 3DNow! Original */
16562 IX86_BUILTIN_FEMMS
,
16563 IX86_BUILTIN_PAVGUSB
,
16564 IX86_BUILTIN_PF2ID
,
16565 IX86_BUILTIN_PFACC
,
16566 IX86_BUILTIN_PFADD
,
16567 IX86_BUILTIN_PFCMPEQ
,
16568 IX86_BUILTIN_PFCMPGE
,
16569 IX86_BUILTIN_PFCMPGT
,
16570 IX86_BUILTIN_PFMAX
,
16571 IX86_BUILTIN_PFMIN
,
16572 IX86_BUILTIN_PFMUL
,
16573 IX86_BUILTIN_PFRCP
,
16574 IX86_BUILTIN_PFRCPIT1
,
16575 IX86_BUILTIN_PFRCPIT2
,
16576 IX86_BUILTIN_PFRSQIT1
,
16577 IX86_BUILTIN_PFRSQRT
,
16578 IX86_BUILTIN_PFSUB
,
16579 IX86_BUILTIN_PFSUBR
,
16580 IX86_BUILTIN_PI2FD
,
16581 IX86_BUILTIN_PMULHRW
,
16583 /* 3DNow! Athlon Extensions */
16584 IX86_BUILTIN_PF2IW
,
16585 IX86_BUILTIN_PFNACC
,
16586 IX86_BUILTIN_PFPNACC
,
16587 IX86_BUILTIN_PI2FW
,
16588 IX86_BUILTIN_PSWAPDSI
,
16589 IX86_BUILTIN_PSWAPDSF
,
16592 IX86_BUILTIN_ADDPD
,
16593 IX86_BUILTIN_ADDSD
,
16594 IX86_BUILTIN_DIVPD
,
16595 IX86_BUILTIN_DIVSD
,
16596 IX86_BUILTIN_MULPD
,
16597 IX86_BUILTIN_MULSD
,
16598 IX86_BUILTIN_SUBPD
,
16599 IX86_BUILTIN_SUBSD
,
16601 IX86_BUILTIN_CMPEQPD
,
16602 IX86_BUILTIN_CMPLTPD
,
16603 IX86_BUILTIN_CMPLEPD
,
16604 IX86_BUILTIN_CMPGTPD
,
16605 IX86_BUILTIN_CMPGEPD
,
16606 IX86_BUILTIN_CMPNEQPD
,
16607 IX86_BUILTIN_CMPNLTPD
,
16608 IX86_BUILTIN_CMPNLEPD
,
16609 IX86_BUILTIN_CMPNGTPD
,
16610 IX86_BUILTIN_CMPNGEPD
,
16611 IX86_BUILTIN_CMPORDPD
,
16612 IX86_BUILTIN_CMPUNORDPD
,
16613 IX86_BUILTIN_CMPEQSD
,
16614 IX86_BUILTIN_CMPLTSD
,
16615 IX86_BUILTIN_CMPLESD
,
16616 IX86_BUILTIN_CMPNEQSD
,
16617 IX86_BUILTIN_CMPNLTSD
,
16618 IX86_BUILTIN_CMPNLESD
,
16619 IX86_BUILTIN_CMPORDSD
,
16620 IX86_BUILTIN_CMPUNORDSD
,
16622 IX86_BUILTIN_COMIEQSD
,
16623 IX86_BUILTIN_COMILTSD
,
16624 IX86_BUILTIN_COMILESD
,
16625 IX86_BUILTIN_COMIGTSD
,
16626 IX86_BUILTIN_COMIGESD
,
16627 IX86_BUILTIN_COMINEQSD
,
16628 IX86_BUILTIN_UCOMIEQSD
,
16629 IX86_BUILTIN_UCOMILTSD
,
16630 IX86_BUILTIN_UCOMILESD
,
16631 IX86_BUILTIN_UCOMIGTSD
,
16632 IX86_BUILTIN_UCOMIGESD
,
16633 IX86_BUILTIN_UCOMINEQSD
,
16635 IX86_BUILTIN_MAXPD
,
16636 IX86_BUILTIN_MAXSD
,
16637 IX86_BUILTIN_MINPD
,
16638 IX86_BUILTIN_MINSD
,
16640 IX86_BUILTIN_ANDPD
,
16641 IX86_BUILTIN_ANDNPD
,
16643 IX86_BUILTIN_XORPD
,
16645 IX86_BUILTIN_SQRTPD
,
16646 IX86_BUILTIN_SQRTSD
,
16648 IX86_BUILTIN_UNPCKHPD
,
16649 IX86_BUILTIN_UNPCKLPD
,
16651 IX86_BUILTIN_SHUFPD
,
16653 IX86_BUILTIN_LOADUPD
,
16654 IX86_BUILTIN_STOREUPD
,
16655 IX86_BUILTIN_MOVSD
,
16657 IX86_BUILTIN_LOADHPD
,
16658 IX86_BUILTIN_LOADLPD
,
16660 IX86_BUILTIN_CVTDQ2PD
,
16661 IX86_BUILTIN_CVTDQ2PS
,
16663 IX86_BUILTIN_CVTPD2DQ
,
16664 IX86_BUILTIN_CVTPD2PI
,
16665 IX86_BUILTIN_CVTPD2PS
,
16666 IX86_BUILTIN_CVTTPD2DQ
,
16667 IX86_BUILTIN_CVTTPD2PI
,
16669 IX86_BUILTIN_CVTPI2PD
,
16670 IX86_BUILTIN_CVTSI2SD
,
16671 IX86_BUILTIN_CVTSI642SD
,
16673 IX86_BUILTIN_CVTSD2SI
,
16674 IX86_BUILTIN_CVTSD2SI64
,
16675 IX86_BUILTIN_CVTSD2SS
,
16676 IX86_BUILTIN_CVTSS2SD
,
16677 IX86_BUILTIN_CVTTSD2SI
,
16678 IX86_BUILTIN_CVTTSD2SI64
,
16680 IX86_BUILTIN_CVTPS2DQ
,
16681 IX86_BUILTIN_CVTPS2PD
,
16682 IX86_BUILTIN_CVTTPS2DQ
,
16684 IX86_BUILTIN_MOVNTI
,
16685 IX86_BUILTIN_MOVNTPD
,
16686 IX86_BUILTIN_MOVNTDQ
,
16689 IX86_BUILTIN_MASKMOVDQU
,
16690 IX86_BUILTIN_MOVMSKPD
,
16691 IX86_BUILTIN_PMOVMSKB128
,
16693 IX86_BUILTIN_PACKSSWB128
,
16694 IX86_BUILTIN_PACKSSDW128
,
16695 IX86_BUILTIN_PACKUSWB128
,
16697 IX86_BUILTIN_PADDB128
,
16698 IX86_BUILTIN_PADDW128
,
16699 IX86_BUILTIN_PADDD128
,
16700 IX86_BUILTIN_PADDQ128
,
16701 IX86_BUILTIN_PADDSB128
,
16702 IX86_BUILTIN_PADDSW128
,
16703 IX86_BUILTIN_PADDUSB128
,
16704 IX86_BUILTIN_PADDUSW128
,
16705 IX86_BUILTIN_PSUBB128
,
16706 IX86_BUILTIN_PSUBW128
,
16707 IX86_BUILTIN_PSUBD128
,
16708 IX86_BUILTIN_PSUBQ128
,
16709 IX86_BUILTIN_PSUBSB128
,
16710 IX86_BUILTIN_PSUBSW128
,
16711 IX86_BUILTIN_PSUBUSB128
,
16712 IX86_BUILTIN_PSUBUSW128
,
16714 IX86_BUILTIN_PAND128
,
16715 IX86_BUILTIN_PANDN128
,
16716 IX86_BUILTIN_POR128
,
16717 IX86_BUILTIN_PXOR128
,
16719 IX86_BUILTIN_PAVGB128
,
16720 IX86_BUILTIN_PAVGW128
,
16722 IX86_BUILTIN_PCMPEQB128
,
16723 IX86_BUILTIN_PCMPEQW128
,
16724 IX86_BUILTIN_PCMPEQD128
,
16725 IX86_BUILTIN_PCMPGTB128
,
16726 IX86_BUILTIN_PCMPGTW128
,
16727 IX86_BUILTIN_PCMPGTD128
,
16729 IX86_BUILTIN_PMADDWD128
,
16731 IX86_BUILTIN_PMAXSW128
,
16732 IX86_BUILTIN_PMAXUB128
,
16733 IX86_BUILTIN_PMINSW128
,
16734 IX86_BUILTIN_PMINUB128
,
16736 IX86_BUILTIN_PMULUDQ
,
16737 IX86_BUILTIN_PMULUDQ128
,
16738 IX86_BUILTIN_PMULHUW128
,
16739 IX86_BUILTIN_PMULHW128
,
16740 IX86_BUILTIN_PMULLW128
,
16742 IX86_BUILTIN_PSADBW128
,
16743 IX86_BUILTIN_PSHUFHW
,
16744 IX86_BUILTIN_PSHUFLW
,
16745 IX86_BUILTIN_PSHUFD
,
16747 IX86_BUILTIN_PSLLDQI128
,
16748 IX86_BUILTIN_PSLLWI128
,
16749 IX86_BUILTIN_PSLLDI128
,
16750 IX86_BUILTIN_PSLLQI128
,
16751 IX86_BUILTIN_PSRAWI128
,
16752 IX86_BUILTIN_PSRADI128
,
16753 IX86_BUILTIN_PSRLDQI128
,
16754 IX86_BUILTIN_PSRLWI128
,
16755 IX86_BUILTIN_PSRLDI128
,
16756 IX86_BUILTIN_PSRLQI128
,
16758 IX86_BUILTIN_PSLLDQ128
,
16759 IX86_BUILTIN_PSLLW128
,
16760 IX86_BUILTIN_PSLLD128
,
16761 IX86_BUILTIN_PSLLQ128
,
16762 IX86_BUILTIN_PSRAW128
,
16763 IX86_BUILTIN_PSRAD128
,
16764 IX86_BUILTIN_PSRLW128
,
16765 IX86_BUILTIN_PSRLD128
,
16766 IX86_BUILTIN_PSRLQ128
,
16768 IX86_BUILTIN_PUNPCKHBW128
,
16769 IX86_BUILTIN_PUNPCKHWD128
,
16770 IX86_BUILTIN_PUNPCKHDQ128
,
16771 IX86_BUILTIN_PUNPCKHQDQ128
,
16772 IX86_BUILTIN_PUNPCKLBW128
,
16773 IX86_BUILTIN_PUNPCKLWD128
,
16774 IX86_BUILTIN_PUNPCKLDQ128
,
16775 IX86_BUILTIN_PUNPCKLQDQ128
,
16777 IX86_BUILTIN_CLFLUSH
,
16778 IX86_BUILTIN_MFENCE
,
16779 IX86_BUILTIN_LFENCE
,
16781 /* Prescott New Instructions. */
16782 IX86_BUILTIN_ADDSUBPS
,
16783 IX86_BUILTIN_HADDPS
,
16784 IX86_BUILTIN_HSUBPS
,
16785 IX86_BUILTIN_MOVSHDUP
,
16786 IX86_BUILTIN_MOVSLDUP
,
16787 IX86_BUILTIN_ADDSUBPD
,
16788 IX86_BUILTIN_HADDPD
,
16789 IX86_BUILTIN_HSUBPD
,
16790 IX86_BUILTIN_LDDQU
,
16792 IX86_BUILTIN_MONITOR
,
16793 IX86_BUILTIN_MWAIT
,
16796 IX86_BUILTIN_PHADDW
,
16797 IX86_BUILTIN_PHADDD
,
16798 IX86_BUILTIN_PHADDSW
,
16799 IX86_BUILTIN_PHSUBW
,
16800 IX86_BUILTIN_PHSUBD
,
16801 IX86_BUILTIN_PHSUBSW
,
16802 IX86_BUILTIN_PMADDUBSW
,
16803 IX86_BUILTIN_PMULHRSW
,
16804 IX86_BUILTIN_PSHUFB
,
16805 IX86_BUILTIN_PSIGNB
,
16806 IX86_BUILTIN_PSIGNW
,
16807 IX86_BUILTIN_PSIGND
,
16808 IX86_BUILTIN_PALIGNR
,
16809 IX86_BUILTIN_PABSB
,
16810 IX86_BUILTIN_PABSW
,
16811 IX86_BUILTIN_PABSD
,
16813 IX86_BUILTIN_PHADDW128
,
16814 IX86_BUILTIN_PHADDD128
,
16815 IX86_BUILTIN_PHADDSW128
,
16816 IX86_BUILTIN_PHSUBW128
,
16817 IX86_BUILTIN_PHSUBD128
,
16818 IX86_BUILTIN_PHSUBSW128
,
16819 IX86_BUILTIN_PMADDUBSW128
,
16820 IX86_BUILTIN_PMULHRSW128
,
16821 IX86_BUILTIN_PSHUFB128
,
16822 IX86_BUILTIN_PSIGNB128
,
16823 IX86_BUILTIN_PSIGNW128
,
16824 IX86_BUILTIN_PSIGND128
,
16825 IX86_BUILTIN_PALIGNR128
,
16826 IX86_BUILTIN_PABSB128
,
16827 IX86_BUILTIN_PABSW128
,
16828 IX86_BUILTIN_PABSD128
,
16830 /* AMDFAM10 - SSE4A New Instructions. */
16831 IX86_BUILTIN_MOVNTSD
,
16832 IX86_BUILTIN_MOVNTSS
,
16833 IX86_BUILTIN_EXTRQI
,
16834 IX86_BUILTIN_EXTRQ
,
16835 IX86_BUILTIN_INSERTQI
,
16836 IX86_BUILTIN_INSERTQ
,
16839 IX86_BUILTIN_BLENDPD
,
16840 IX86_BUILTIN_BLENDPS
,
16841 IX86_BUILTIN_BLENDVPD
,
16842 IX86_BUILTIN_BLENDVPS
,
16843 IX86_BUILTIN_PBLENDVB128
,
16844 IX86_BUILTIN_PBLENDW128
,
16849 IX86_BUILTIN_INSERTPS128
,
16851 IX86_BUILTIN_MOVNTDQA
,
16852 IX86_BUILTIN_MPSADBW128
,
16853 IX86_BUILTIN_PACKUSDW128
,
16854 IX86_BUILTIN_PCMPEQQ
,
16855 IX86_BUILTIN_PHMINPOSUW128
,
16857 IX86_BUILTIN_PMAXSB128
,
16858 IX86_BUILTIN_PMAXSD128
,
16859 IX86_BUILTIN_PMAXUD128
,
16860 IX86_BUILTIN_PMAXUW128
,
16862 IX86_BUILTIN_PMINSB128
,
16863 IX86_BUILTIN_PMINSD128
,
16864 IX86_BUILTIN_PMINUD128
,
16865 IX86_BUILTIN_PMINUW128
,
16867 IX86_BUILTIN_PMOVSXBW128
,
16868 IX86_BUILTIN_PMOVSXBD128
,
16869 IX86_BUILTIN_PMOVSXBQ128
,
16870 IX86_BUILTIN_PMOVSXWD128
,
16871 IX86_BUILTIN_PMOVSXWQ128
,
16872 IX86_BUILTIN_PMOVSXDQ128
,
16874 IX86_BUILTIN_PMOVZXBW128
,
16875 IX86_BUILTIN_PMOVZXBD128
,
16876 IX86_BUILTIN_PMOVZXBQ128
,
16877 IX86_BUILTIN_PMOVZXWD128
,
16878 IX86_BUILTIN_PMOVZXWQ128
,
16879 IX86_BUILTIN_PMOVZXDQ128
,
16881 IX86_BUILTIN_PMULDQ128
,
16882 IX86_BUILTIN_PMULLD128
,
16884 IX86_BUILTIN_ROUNDPD
,
16885 IX86_BUILTIN_ROUNDPS
,
16886 IX86_BUILTIN_ROUNDSD
,
16887 IX86_BUILTIN_ROUNDSS
,
16889 IX86_BUILTIN_PTESTZ
,
16890 IX86_BUILTIN_PTESTC
,
16891 IX86_BUILTIN_PTESTNZC
,
16893 IX86_BUILTIN_VEC_INIT_V2SI
,
16894 IX86_BUILTIN_VEC_INIT_V4HI
,
16895 IX86_BUILTIN_VEC_INIT_V8QI
,
16896 IX86_BUILTIN_VEC_EXT_V2DF
,
16897 IX86_BUILTIN_VEC_EXT_V2DI
,
16898 IX86_BUILTIN_VEC_EXT_V4SF
,
16899 IX86_BUILTIN_VEC_EXT_V4SI
,
16900 IX86_BUILTIN_VEC_EXT_V8HI
,
16901 IX86_BUILTIN_VEC_EXT_V2SI
,
16902 IX86_BUILTIN_VEC_EXT_V4HI
,
16903 IX86_BUILTIN_VEC_EXT_V16QI
,
16904 IX86_BUILTIN_VEC_SET_V2DI
,
16905 IX86_BUILTIN_VEC_SET_V4SF
,
16906 IX86_BUILTIN_VEC_SET_V4SI
,
16907 IX86_BUILTIN_VEC_SET_V8HI
,
16908 IX86_BUILTIN_VEC_SET_V4HI
,
16909 IX86_BUILTIN_VEC_SET_V16QI
,
16911 IX86_BUILTIN_VEC_PACK_SFIX
,
16914 IX86_BUILTIN_CRC32QI
,
16915 IX86_BUILTIN_CRC32HI
,
16916 IX86_BUILTIN_CRC32SI
,
16917 IX86_BUILTIN_CRC32DI
,
16919 IX86_BUILTIN_PCMPESTRI128
,
16920 IX86_BUILTIN_PCMPESTRM128
,
16921 IX86_BUILTIN_PCMPESTRA128
,
16922 IX86_BUILTIN_PCMPESTRC128
,
16923 IX86_BUILTIN_PCMPESTRO128
,
16924 IX86_BUILTIN_PCMPESTRS128
,
16925 IX86_BUILTIN_PCMPESTRZ128
,
16926 IX86_BUILTIN_PCMPISTRI128
,
16927 IX86_BUILTIN_PCMPISTRM128
,
16928 IX86_BUILTIN_PCMPISTRA128
,
16929 IX86_BUILTIN_PCMPISTRC128
,
16930 IX86_BUILTIN_PCMPISTRO128
,
16931 IX86_BUILTIN_PCMPISTRS128
,
16932 IX86_BUILTIN_PCMPISTRZ128
,
16934 IX86_BUILTIN_PCMPGTQ
,
16936 /* TFmode support builtins. */
16938 IX86_BUILTIN_FABSQ
,
16939 IX86_BUILTIN_COPYSIGNQ
,
16944 /* Table for the ix86 builtin decls. */
16945 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
16947 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Do so,
16948 * if the target_flags include one of MASK. Stores the function decl
16949 * in the ix86_builtins array.
16950 * Returns the function decl or NULL_TREE, if the builtin was not added. */
16953 def_builtin (int mask
, const char *name
, tree type
, enum ix86_builtins code
)
16955 tree decl
= NULL_TREE
;
16957 if (mask
& ix86_isa_flags
16958 && (!(mask
& OPTION_MASK_ISA_64BIT
) || TARGET_64BIT
))
16960 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
16962 ix86_builtins
[(int) code
] = decl
;
16968 /* Like def_builtin, but also marks the function decl "const". */
16971 def_builtin_const (int mask
, const char *name
, tree type
,
16972 enum ix86_builtins code
)
16974 tree decl
= def_builtin (mask
, name
, type
, code
);
16976 TREE_READONLY (decl
) = 1;
16980 /* Bits for builtin_description.flag. */
16982 /* Set when we don't support the comparison natively, and should
16983 swap_comparison in order to support it. */
16984 #define BUILTIN_DESC_SWAP_OPERANDS 1
16986 struct builtin_description
16988 const unsigned int mask
;
16989 const enum insn_code icode
;
16990 const char *const name
;
16991 const enum ix86_builtins code
;
16992 const enum rtx_code comparison
;
16996 static const struct builtin_description bdesc_comi
[] =
16998 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
16999 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
17000 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
17001 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
17002 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
17003 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
17004 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
17005 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
17006 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
17007 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
17008 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
17009 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
17010 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
17011 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
17012 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
17013 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
17014 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
17015 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
17016 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
17017 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
17018 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
17019 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
17020 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
17021 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
17024 static const struct builtin_description bdesc_ptest
[] =
17027 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ
, EQ
, 0 },
17028 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC
, LTU
, 0 },
17029 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC
, GTU
, 0 },
17032 static const struct builtin_description bdesc_pcmpestr
[] =
17035 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128
, UNKNOWN
, 0 },
17036 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128
, UNKNOWN
, 0 },
17037 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128
, UNKNOWN
, (int) CCAmode
},
17038 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128
, UNKNOWN
, (int) CCCmode
},
17039 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128
, UNKNOWN
, (int) CCOmode
},
17040 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128
, UNKNOWN
, (int) CCSmode
},
17041 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128
, UNKNOWN
, (int) CCZmode
},
17044 static const struct builtin_description bdesc_pcmpistr
[] =
17047 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128
, UNKNOWN
, 0 },
17048 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128
, UNKNOWN
, 0 },
17049 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128
, UNKNOWN
, (int) CCAmode
},
17050 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128
, UNKNOWN
, (int) CCCmode
},
17051 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128
, UNKNOWN
, (int) CCOmode
},
17052 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128
, UNKNOWN
, (int) CCSmode
},
17053 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128
, UNKNOWN
, (int) CCZmode
},
17056 static const struct builtin_description bdesc_crc32
[] =
17059 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse4_2_crc32qi
, 0, IX86_BUILTIN_CRC32QI
, UNKNOWN
, 0 },
17060 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_crc32hi
, 0, IX86_BUILTIN_CRC32HI
, UNKNOWN
, 0 },
17061 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_crc32si
, 0, IX86_BUILTIN_CRC32SI
, UNKNOWN
, 0 },
17062 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_crc32di
, 0, IX86_BUILTIN_CRC32DI
, UNKNOWN
, 0 },
17065 /* SSE builtins with 3 arguments and the last argument must be an immediate or xmm0. */
17066 static const struct builtin_description bdesc_sse_3arg
[] =
17069 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendpd
, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD
, UNKNOWN
, 0 },
17070 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendps
, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS
, UNKNOWN
, 0 },
17071 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvpd
, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD
, UNKNOWN
, 0 },
17072 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvps
, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS
, UNKNOWN
, 0 },
17073 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dppd
, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD
, UNKNOWN
, 0 },
17074 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dpps
, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS
, UNKNOWN
, 0 },
17075 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_insertps
, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128
, UNKNOWN
, 0 },
17076 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mpsadbw
, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128
, UNKNOWN
, 0 },
17077 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendvb
, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128
, UNKNOWN
, 0 },
17078 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendw
, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128
, UNKNOWN
, 0 },
17079 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_roundsd
, 0, IX86_BUILTIN_ROUNDSD
, UNKNOWN
, 0 },
17080 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_roundss
, 0, IX86_BUILTIN_ROUNDSS
, UNKNOWN
, 0 },
17083 static const struct builtin_description bdesc_2arg
[] =
17086 { OPTION_MASK_ISA_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, UNKNOWN
, 0 },
17087 { OPTION_MASK_ISA_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, UNKNOWN
, 0 },
17088 { OPTION_MASK_ISA_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, UNKNOWN
, 0 },
17089 { OPTION_MASK_ISA_SSE
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, UNKNOWN
, 0 },
17090 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, UNKNOWN
, 0 },
17091 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, UNKNOWN
, 0 },
17092 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, UNKNOWN
, 0 },
17093 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, UNKNOWN
, 0 },
17095 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
17096 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
17097 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
17098 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, BUILTIN_DESC_SWAP_OPERANDS
},
17099 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, BUILTIN_DESC_SWAP_OPERANDS
},
17100 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
17101 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, 0 },
17102 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, 0 },
17103 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, 0 },
17104 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
, BUILTIN_DESC_SWAP_OPERANDS
},
17105 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
, BUILTIN_DESC_SWAP_OPERANDS
},
17106 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, 0 },
17107 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
17108 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
17109 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
17110 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
17111 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, 0 },
17112 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, 0 },
17113 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, 0 },
17114 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
, BUILTIN_DESC_SWAP_OPERANDS
},
17115 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
, BUILTIN_DESC_SWAP_OPERANDS
},
17116 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, ORDERED
, 0 },
17118 { OPTION_MASK_ISA_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, UNKNOWN
, 0 },
17119 { OPTION_MASK_ISA_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, UNKNOWN
, 0 },
17120 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, UNKNOWN
, 0 },
17121 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, UNKNOWN
, 0 },
17123 { OPTION_MASK_ISA_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, UNKNOWN
, 0 },
17124 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_nandv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, UNKNOWN
, 0 },
17125 { OPTION_MASK_ISA_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, UNKNOWN
, 0 },
17126 { OPTION_MASK_ISA_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, UNKNOWN
, 0 },
17128 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, UNKNOWN
, 0 },
17129 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, UNKNOWN
, 0 },
17130 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, UNKNOWN
, 0 },
17131 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, UNKNOWN
, 0 },
17132 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, UNKNOWN
, 0 },
17135 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, UNKNOWN
, 0 },
17136 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, UNKNOWN
, 0 },
17137 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, UNKNOWN
, 0 },
17138 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_adddi3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, UNKNOWN
, 0 },
17139 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, UNKNOWN
, 0 },
17140 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, UNKNOWN
, 0 },
17141 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, UNKNOWN
, 0 },
17142 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_subdi3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, UNKNOWN
, 0 },
17144 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, UNKNOWN
, 0 },
17145 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, UNKNOWN
, 0 },
17146 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, UNKNOWN
, 0 },
17147 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, UNKNOWN
, 0 },
17148 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, UNKNOWN
, 0 },
17149 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, UNKNOWN
, 0 },
17150 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, UNKNOWN
, 0 },
17151 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, UNKNOWN
, 0 },
17153 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, UNKNOWN
, 0 },
17154 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, UNKNOWN
, 0 },
17155 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, UNKNOWN
, 0 },
17157 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, UNKNOWN
, 0 },
17158 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_nandv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, UNKNOWN
, 0 },
17159 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, UNKNOWN
, 0 },
17160 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, UNKNOWN
, 0 },
17162 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, UNKNOWN
, 0 },
17163 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, UNKNOWN
, 0 },
17165 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, UNKNOWN
, 0 },
17166 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, UNKNOWN
, 0 },
17167 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, UNKNOWN
, 0 },
17168 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, UNKNOWN
, 0 },
17169 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, UNKNOWN
, 0 },
17170 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, UNKNOWN
, 0 },
17172 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, UNKNOWN
, 0 },
17173 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, UNKNOWN
, 0 },
17174 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, UNKNOWN
, 0 },
17175 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, UNKNOWN
, 0 },
17177 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, UNKNOWN
, 0 },
17178 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, UNKNOWN
, 0 },
17179 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, UNKNOWN
, 0 },
17180 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, UNKNOWN
, 0 },
17181 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, UNKNOWN
, 0 },
17182 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, UNKNOWN
, 0 },
17185 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, UNKNOWN
, 0 },
17186 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, UNKNOWN
, 0 },
17187 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, UNKNOWN
, 0 },
17189 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, UNKNOWN
, 0 },
17190 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, UNKNOWN
, 0 },
17191 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtsi2ssq
, 0, IX86_BUILTIN_CVTSI642SS
, UNKNOWN
, 0 },
17193 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, UNKNOWN
, 0 },
17194 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, UNKNOWN
, 0 },
17195 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, UNKNOWN
, 0 },
17196 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, UNKNOWN
, 0 },
17197 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, UNKNOWN
, 0 },
17198 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, UNKNOWN
, 0 },
17200 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, UNKNOWN
, 0 },
17201 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, UNKNOWN
, 0 },
17202 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, UNKNOWN
, 0 },
17203 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, UNKNOWN
, 0 },
17204 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, UNKNOWN
, 0 },
17205 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, UNKNOWN
, 0 },
17207 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, UNKNOWN
, 0 },
17208 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, UNKNOWN
, 0 },
17209 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, UNKNOWN
, 0 },
17210 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, UNKNOWN
, 0 },
17212 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, UNKNOWN
, 0 },
17213 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, UNKNOWN
, 0 },
17216 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, UNKNOWN
, 0 },
17217 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, UNKNOWN
, 0 },
17218 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, UNKNOWN
, 0 },
17219 { OPTION_MASK_ISA_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, UNKNOWN
, 0 },
17220 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, UNKNOWN
, 0 },
17221 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, UNKNOWN
, 0 },
17222 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, UNKNOWN
, 0 },
17223 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, UNKNOWN
, 0 },
17225 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, 0 },
17226 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, 0 },
17227 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, 0 },
17228 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, BUILTIN_DESC_SWAP_OPERANDS
},
17229 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, BUILTIN_DESC_SWAP_OPERANDS
},
17230 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, 0 },
17231 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, 0 },
17232 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, 0 },
17233 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, 0 },
17234 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
, BUILTIN_DESC_SWAP_OPERANDS
},
17235 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
, BUILTIN_DESC_SWAP_OPERANDS
},
17236 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, 0 },
17237 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, 0 },
17238 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, 0 },
17239 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, 0 },
17240 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, 0 },
17241 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, 0 },
17242 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, 0 },
17243 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, 0 },
17244 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, 0 },
17246 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, UNKNOWN
, 0 },
17247 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, UNKNOWN
, 0 },
17248 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, UNKNOWN
, 0 },
17249 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, UNKNOWN
, 0 },
17251 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, UNKNOWN
, 0 },
17252 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_nandv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, UNKNOWN
, 0 },
17253 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, UNKNOWN
, 0 },
17254 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, UNKNOWN
, 0 },
17256 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, UNKNOWN
, 0 },
17257 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_unpckhpd
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, UNKNOWN
, 0 },
17258 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_unpcklpd
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, UNKNOWN
, 0 },
17260 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_pack_sfix_v2df
, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX
, UNKNOWN
, 0 },
17263 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, UNKNOWN
, 0 },
17264 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, UNKNOWN
, 0 },
17265 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, UNKNOWN
, 0 },
17266 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, UNKNOWN
, 0 },
17267 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, UNKNOWN
, 0 },
17268 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, UNKNOWN
, 0 },
17269 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, UNKNOWN
, 0 },
17270 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, UNKNOWN
, 0 },
17272 { OPTION_MASK_ISA_MMX
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, UNKNOWN
, 0 },
17273 { OPTION_MASK_ISA_MMX
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, UNKNOWN
, 0 },
17274 { OPTION_MASK_ISA_MMX
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, UNKNOWN
, 0 },
17275 { OPTION_MASK_ISA_MMX
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, UNKNOWN
, 0 },
17276 { OPTION_MASK_ISA_MMX
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, UNKNOWN
, 0 },
17277 { OPTION_MASK_ISA_MMX
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, UNKNOWN
, 0 },
17278 { OPTION_MASK_ISA_MMX
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, UNKNOWN
, 0 },
17279 { OPTION_MASK_ISA_MMX
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, UNKNOWN
, 0 },
17281 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, UNKNOWN
, 0 },
17282 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, UNKNOWN
, 0 },
17284 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, UNKNOWN
, 0 },
17285 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_nandv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, UNKNOWN
, 0 },
17286 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, UNKNOWN
, 0 },
17287 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, UNKNOWN
, 0 },
17289 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, UNKNOWN
, 0 },
17290 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, UNKNOWN
, 0 },
17292 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, UNKNOWN
, 0 },
17293 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, UNKNOWN
, 0 },
17294 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, UNKNOWN
, 0 },
17295 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, UNKNOWN
, 0 },
17296 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, UNKNOWN
, 0 },
17297 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, UNKNOWN
, 0 },
17299 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, UNKNOWN
, 0 },
17300 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, UNKNOWN
, 0 },
17301 { OPTION_MASK_ISA_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, UNKNOWN
, 0 },
17302 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, UNKNOWN
, 0 },
17304 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpckhbw
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, UNKNOWN
, 0 },
17305 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpckhwd
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, UNKNOWN
, 0 },
17306 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpckhdq
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, UNKNOWN
, 0 },
17307 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpckhqdq
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, UNKNOWN
, 0 },
17308 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpcklbw
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, UNKNOWN
, 0 },
17309 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpcklwd
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, UNKNOWN
, 0 },
17310 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpckldq
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, UNKNOWN
, 0 },
17311 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_punpcklqdq
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, UNKNOWN
, 0 },
17313 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, UNKNOWN
, 0 },
17314 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, UNKNOWN
, 0 },
17315 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, UNKNOWN
, 0 },
17317 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, UNKNOWN
, 0 },
17318 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_psadbw
, 0, IX86_BUILTIN_PSADBW128
, UNKNOWN
, 0 },
17320 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulsidi3
, 0, IX86_BUILTIN_PMULUDQ
, UNKNOWN
, 0 },
17321 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, 0, IX86_BUILTIN_PMULUDQ128
, UNKNOWN
, 0 },
17323 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, 0, IX86_BUILTIN_PSLLWI128
, UNKNOWN
, 0 },
17324 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, 0, IX86_BUILTIN_PSLLDI128
, UNKNOWN
, 0 },
17325 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, 0, IX86_BUILTIN_PSLLQI128
, UNKNOWN
, 0 },
17327 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, 0, IX86_BUILTIN_PSRLWI128
, UNKNOWN
, 0 },
17328 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, 0, IX86_BUILTIN_PSRLDI128
, UNKNOWN
, 0 },
17329 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, 0, IX86_BUILTIN_PSRLQI128
, UNKNOWN
, 0 },
17331 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, 0, IX86_BUILTIN_PSRAWI128
, UNKNOWN
, 0 },
17332 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, 0, IX86_BUILTIN_PSRADI128
, UNKNOWN
, 0 },
17334 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmaddwd
, 0, IX86_BUILTIN_PMADDWD128
, UNKNOWN
, 0 },
17336 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsi2sd
, 0, IX86_BUILTIN_CVTSI2SD
, UNKNOWN
, 0 },
17337 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, 0, IX86_BUILTIN_CVTSI642SD
, UNKNOWN
, 0 },
17338 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2ss
, 0, IX86_BUILTIN_CVTSD2SS
, UNKNOWN
, 0 },
17339 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtss2sd
, 0, IX86_BUILTIN_CVTSS2SD
, UNKNOWN
, 0 },
17342 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, UNKNOWN
, 0 },
17343 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, UNKNOWN
, 0 },
17344 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, UNKNOWN
, 0 },
17345 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, UNKNOWN
, 0 },
17346 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, UNKNOWN
, 0 },
17347 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, UNKNOWN
, 0 },
17350 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, UNKNOWN
, 0 },
17351 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, UNKNOWN
, 0 },
17352 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, UNKNOWN
, 0 },
17353 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, UNKNOWN
, 0 },
17354 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, UNKNOWN
, 0 },
17355 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, UNKNOWN
, 0 },
17356 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, UNKNOWN
, 0 },
17357 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, UNKNOWN
, 0 },
17358 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, UNKNOWN
, 0 },
17359 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, UNKNOWN
, 0 },
17360 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, UNKNOWN
, 0 },
17361 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, UNKNOWN
, 0 },
17362 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubswv8hi3
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, UNKNOWN
, 0 },
17363 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubswv4hi3
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, UNKNOWN
, 0 },
17364 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, UNKNOWN
, 0 },
17365 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, UNKNOWN
, 0 },
17366 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, UNKNOWN
, 0 },
17367 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, UNKNOWN
, 0 },
17368 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, UNKNOWN
, 0 },
17369 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, UNKNOWN
, 0 },
17370 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, UNKNOWN
, 0 },
17371 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, UNKNOWN
, 0 },
17372 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, UNKNOWN
, 0 },
17373 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, UNKNOWN
, 0 },
17376 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_packusdw
, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128
, UNKNOWN
, 0 },
17377 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_eqv2di3
, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ
, UNKNOWN
, 0 },
17378 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv16qi3
, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128
, UNKNOWN
, 0 },
17379 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv4si3
, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128
, UNKNOWN
, 0 },
17380 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv4si3
, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128
, UNKNOWN
, 0 },
17381 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv8hi3
, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128
, UNKNOWN
, 0 },
17382 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv16qi3
, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128
, UNKNOWN
, 0 },
17383 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv4si3
, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128
, UNKNOWN
, 0 },
17384 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv4si3
, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128
, UNKNOWN
, 0 },
17385 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv8hi3
, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128
, UNKNOWN
, 0 },
17386 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mulv2siv2di3
, 0, IX86_BUILTIN_PMULDQ128
, UNKNOWN
, 0 },
17387 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_mulv4si3
, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128
, UNKNOWN
, 0 },
17390 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_gtv2di3
, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ
, UNKNOWN
, 0 },
17393 static const struct builtin_description bdesc_1arg
[] =
17395 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, UNKNOWN
, 0 },
17396 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, UNKNOWN
, 0 },
17398 { OPTION_MASK_ISA_SSE
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, UNKNOWN
, 0 },
17399 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, UNKNOWN
, 0 },
17400 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, UNKNOWN
, 0 },
17402 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, UNKNOWN
, 0 },
17403 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, UNKNOWN
, 0 },
17404 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtss2siq
, 0, IX86_BUILTIN_CVTSS2SI64
, UNKNOWN
, 0 },
17405 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, UNKNOWN
, 0 },
17406 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, UNKNOWN
, 0 },
17407 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvttss2siq
, 0, IX86_BUILTIN_CVTTSS2SI64
, UNKNOWN
, 0 },
17409 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB128
, UNKNOWN
, 0 },
17410 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movmskpd
, 0, IX86_BUILTIN_MOVMSKPD
, UNKNOWN
, 0 },
17412 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sqrtv2df2
, 0, IX86_BUILTIN_SQRTPD
, UNKNOWN
, 0 },
17414 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtdq2pd
, 0, IX86_BUILTIN_CVTDQ2PD
, UNKNOWN
, 0 },
17415 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtdq2ps
, 0, IX86_BUILTIN_CVTDQ2PS
, UNKNOWN
, 0 },
17417 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2dq
, 0, IX86_BUILTIN_CVTPD2DQ
, UNKNOWN
, 0 },
17418 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2pi
, 0, IX86_BUILTIN_CVTPD2PI
, UNKNOWN
, 0 },
17419 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2ps
, 0, IX86_BUILTIN_CVTPD2PS
, UNKNOWN
, 0 },
17420 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2dq
, 0, IX86_BUILTIN_CVTTPD2DQ
, UNKNOWN
, 0 },
17421 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2pi
, 0, IX86_BUILTIN_CVTTPD2PI
, UNKNOWN
, 0 },
17423 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpi2pd
, 0, IX86_BUILTIN_CVTPI2PD
, UNKNOWN
, 0 },
17425 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2si
, 0, IX86_BUILTIN_CVTSD2SI
, UNKNOWN
, 0 },
17426 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttsd2si
, 0, IX86_BUILTIN_CVTTSD2SI
, UNKNOWN
, 0 },
17427 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsd2siq
, 0, IX86_BUILTIN_CVTSD2SI64
, UNKNOWN
, 0 },
17428 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvttsd2siq
, 0, IX86_BUILTIN_CVTTSD2SI64
, UNKNOWN
, 0 },
17430 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2dq
, 0, IX86_BUILTIN_CVTPS2DQ
, UNKNOWN
, 0 },
17431 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2pd
, 0, IX86_BUILTIN_CVTPS2PD
, UNKNOWN
, 0 },
17432 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttps2dq
, 0, IX86_BUILTIN_CVTTPS2DQ
, UNKNOWN
, 0 },
17435 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movshdup
, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP
, UNKNOWN
, 0 },
17436 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movsldup
, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP
, UNKNOWN
, 0 },
17439 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, UNKNOWN
, 0 },
17440 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, UNKNOWN
, 0 },
17441 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, UNKNOWN
, 0 },
17442 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, UNKNOWN
, 0 },
17443 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, UNKNOWN
, 0 },
17444 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, UNKNOWN
, 0 },
17447 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_extendv8qiv8hi2
, 0, IX86_BUILTIN_PMOVSXBW128
, UNKNOWN
, 0 },
17448 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_extendv4qiv4si2
, 0, IX86_BUILTIN_PMOVSXBD128
, UNKNOWN
, 0 },
17449 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_extendv2qiv2di2
, 0, IX86_BUILTIN_PMOVSXBQ128
, UNKNOWN
, 0 },
17450 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_extendv4hiv4si2
, 0, IX86_BUILTIN_PMOVSXWD128
, UNKNOWN
, 0 },
17451 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_extendv2hiv2di2
, 0, IX86_BUILTIN_PMOVSXWQ128
, UNKNOWN
, 0 },
17452 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_extendv2siv2di2
, 0, IX86_BUILTIN_PMOVSXDQ128
, UNKNOWN
, 0 },
17453 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv8qiv8hi2
, 0, IX86_BUILTIN_PMOVZXBW128
, UNKNOWN
, 0 },
17454 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4qiv4si2
, 0, IX86_BUILTIN_PMOVZXBD128
, UNKNOWN
, 0 },
17455 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2qiv2di2
, 0, IX86_BUILTIN_PMOVZXBQ128
, UNKNOWN
, 0 },
17456 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4hiv4si2
, 0, IX86_BUILTIN_PMOVZXWD128
, UNKNOWN
, 0 },
17457 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2hiv2di2
, 0, IX86_BUILTIN_PMOVZXWQ128
, UNKNOWN
, 0 },
17458 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2siv2di2
, 0, IX86_BUILTIN_PMOVZXDQ128
, UNKNOWN
, 0 },
17459 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_phminposuw
, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128
, UNKNOWN
, 0 },
17461 /* Fake 1 arg builtins with a constant smaller than 8 bits as the 2nd arg. */
17462 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_roundpd
, 0, IX86_BUILTIN_ROUNDPD
, UNKNOWN
, 0 },
17463 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_roundps
, 0, IX86_BUILTIN_ROUNDPS
, UNKNOWN
, 0 },
17466 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
17467 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
17470 ix86_init_mmx_sse_builtins (void)
17472 const struct builtin_description
* d
;
17475 tree V16QI_type_node
= build_vector_type_for_mode (char_type_node
, V16QImode
);
17476 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
17477 tree V2SF_type_node
= build_vector_type_for_mode (float_type_node
, V2SFmode
);
17478 tree V2DI_type_node
17479 = build_vector_type_for_mode (long_long_integer_type_node
, V2DImode
);
17480 tree V2DF_type_node
= build_vector_type_for_mode (double_type_node
, V2DFmode
);
17481 tree V4SF_type_node
= build_vector_type_for_mode (float_type_node
, V4SFmode
);
17482 tree V4SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V4SImode
);
17483 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
17484 tree V8QI_type_node
= build_vector_type_for_mode (char_type_node
, V8QImode
);
17485 tree V8HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V8HImode
);
17487 tree pchar_type_node
= build_pointer_type (char_type_node
);
17488 tree pcchar_type_node
= build_pointer_type (
17489 build_type_variant (char_type_node
, 1, 0));
17490 tree pfloat_type_node
= build_pointer_type (float_type_node
);
17491 tree pcfloat_type_node
= build_pointer_type (
17492 build_type_variant (float_type_node
, 1, 0));
17493 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
17494 tree pv2di_type_node
= build_pointer_type (V2DI_type_node
);
17495 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
17498 tree int_ftype_v4sf_v4sf
17499 = build_function_type_list (integer_type_node
,
17500 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
17501 tree v4si_ftype_v4sf_v4sf
17502 = build_function_type_list (V4SI_type_node
,
17503 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
17504 /* MMX/SSE/integer conversions. */
17505 tree int_ftype_v4sf
17506 = build_function_type_list (integer_type_node
,
17507 V4SF_type_node
, NULL_TREE
);
17508 tree int64_ftype_v4sf
17509 = build_function_type_list (long_long_integer_type_node
,
17510 V4SF_type_node
, NULL_TREE
);
17511 tree int_ftype_v8qi
17512 = build_function_type_list (integer_type_node
, V8QI_type_node
, NULL_TREE
);
17513 tree v4sf_ftype_v4sf_int
17514 = build_function_type_list (V4SF_type_node
,
17515 V4SF_type_node
, integer_type_node
, NULL_TREE
);
17516 tree v4sf_ftype_v4sf_int64
17517 = build_function_type_list (V4SF_type_node
,
17518 V4SF_type_node
, long_long_integer_type_node
,
17520 tree v4sf_ftype_v4sf_v2si
17521 = build_function_type_list (V4SF_type_node
,
17522 V4SF_type_node
, V2SI_type_node
, NULL_TREE
);
17524 /* Miscellaneous. */
17525 tree v8qi_ftype_v4hi_v4hi
17526 = build_function_type_list (V8QI_type_node
,
17527 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
17528 tree v4hi_ftype_v2si_v2si
17529 = build_function_type_list (V4HI_type_node
,
17530 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
17531 tree v4sf_ftype_v4sf_v4sf_int
17532 = build_function_type_list (V4SF_type_node
,
17533 V4SF_type_node
, V4SF_type_node
,
17534 integer_type_node
, NULL_TREE
);
17535 tree v2si_ftype_v4hi_v4hi
17536 = build_function_type_list (V2SI_type_node
,
17537 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
17538 tree v4hi_ftype_v4hi_int
17539 = build_function_type_list (V4HI_type_node
,
17540 V4HI_type_node
, integer_type_node
, NULL_TREE
);
17541 tree v4hi_ftype_v4hi_di
17542 = build_function_type_list (V4HI_type_node
,
17543 V4HI_type_node
, long_long_unsigned_type_node
,
17545 tree v2si_ftype_v2si_di
17546 = build_function_type_list (V2SI_type_node
,
17547 V2SI_type_node
, long_long_unsigned_type_node
,
17549 tree void_ftype_void
17550 = build_function_type (void_type_node
, void_list_node
);
17551 tree void_ftype_unsigned
17552 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL_TREE
);
17553 tree void_ftype_unsigned_unsigned
17554 = build_function_type_list (void_type_node
, unsigned_type_node
,
17555 unsigned_type_node
, NULL_TREE
);
17556 tree void_ftype_pcvoid_unsigned_unsigned
17557 = build_function_type_list (void_type_node
, const_ptr_type_node
,
17558 unsigned_type_node
, unsigned_type_node
,
17560 tree unsigned_ftype_void
17561 = build_function_type (unsigned_type_node
, void_list_node
);
17562 tree v2si_ftype_v4sf
17563 = build_function_type_list (V2SI_type_node
, V4SF_type_node
, NULL_TREE
);
17564 /* Loads/stores. */
17565 tree void_ftype_v8qi_v8qi_pchar
17566 = build_function_type_list (void_type_node
,
17567 V8QI_type_node
, V8QI_type_node
,
17568 pchar_type_node
, NULL_TREE
);
17569 tree v4sf_ftype_pcfloat
17570 = build_function_type_list (V4SF_type_node
, pcfloat_type_node
, NULL_TREE
);
17571 /* @@@ the type is bogus */
17572 tree v4sf_ftype_v4sf_pv2si
17573 = build_function_type_list (V4SF_type_node
,
17574 V4SF_type_node
, pv2si_type_node
, NULL_TREE
);
17575 tree void_ftype_pv2si_v4sf
17576 = build_function_type_list (void_type_node
,
17577 pv2si_type_node
, V4SF_type_node
, NULL_TREE
);
17578 tree void_ftype_pfloat_v4sf
17579 = build_function_type_list (void_type_node
,
17580 pfloat_type_node
, V4SF_type_node
, NULL_TREE
);
17581 tree void_ftype_pdi_di
17582 = build_function_type_list (void_type_node
,
17583 pdi_type_node
, long_long_unsigned_type_node
,
17585 tree void_ftype_pv2di_v2di
17586 = build_function_type_list (void_type_node
,
17587 pv2di_type_node
, V2DI_type_node
, NULL_TREE
);
17588 /* Normal vector unops. */
17589 tree v4sf_ftype_v4sf
17590 = build_function_type_list (V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
17591 tree v16qi_ftype_v16qi
17592 = build_function_type_list (V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
17593 tree v8hi_ftype_v8hi
17594 = build_function_type_list (V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
17595 tree v4si_ftype_v4si
17596 = build_function_type_list (V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
17597 tree v8qi_ftype_v8qi
17598 = build_function_type_list (V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
17599 tree v4hi_ftype_v4hi
17600 = build_function_type_list (V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
17602 /* Normal vector binops. */
17603 tree v4sf_ftype_v4sf_v4sf
17604 = build_function_type_list (V4SF_type_node
,
17605 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
17606 tree v8qi_ftype_v8qi_v8qi
17607 = build_function_type_list (V8QI_type_node
,
17608 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
17609 tree v4hi_ftype_v4hi_v4hi
17610 = build_function_type_list (V4HI_type_node
,
17611 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
17612 tree v2si_ftype_v2si_v2si
17613 = build_function_type_list (V2SI_type_node
,
17614 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
17615 tree di_ftype_di_di
17616 = build_function_type_list (long_long_unsigned_type_node
,
17617 long_long_unsigned_type_node
,
17618 long_long_unsigned_type_node
, NULL_TREE
);
17620 tree di_ftype_di_di_int
17621 = build_function_type_list (long_long_unsigned_type_node
,
17622 long_long_unsigned_type_node
,
17623 long_long_unsigned_type_node
,
17624 integer_type_node
, NULL_TREE
);
17626 tree v2si_ftype_v2sf
17627 = build_function_type_list (V2SI_type_node
, V2SF_type_node
, NULL_TREE
);
17628 tree v2sf_ftype_v2si
17629 = build_function_type_list (V2SF_type_node
, V2SI_type_node
, NULL_TREE
);
17630 tree v2si_ftype_v2si
17631 = build_function_type_list (V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
17632 tree v2sf_ftype_v2sf
17633 = build_function_type_list (V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
17634 tree v2sf_ftype_v2sf_v2sf
17635 = build_function_type_list (V2SF_type_node
,
17636 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
17637 tree v2si_ftype_v2sf_v2sf
17638 = build_function_type_list (V2SI_type_node
,
17639 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
17640 tree pint_type_node
= build_pointer_type (integer_type_node
);
17641 tree pdouble_type_node
= build_pointer_type (double_type_node
);
17642 tree pcdouble_type_node
= build_pointer_type (
17643 build_type_variant (double_type_node
, 1, 0));
17644 tree int_ftype_v2df_v2df
17645 = build_function_type_list (integer_type_node
,
17646 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
17648 tree void_ftype_pcvoid
17649 = build_function_type_list (void_type_node
, const_ptr_type_node
, NULL_TREE
);
17650 tree v4sf_ftype_v4si
17651 = build_function_type_list (V4SF_type_node
, V4SI_type_node
, NULL_TREE
);
17652 tree v4si_ftype_v4sf
17653 = build_function_type_list (V4SI_type_node
, V4SF_type_node
, NULL_TREE
);
17654 tree v2df_ftype_v4si
17655 = build_function_type_list (V2DF_type_node
, V4SI_type_node
, NULL_TREE
);
17656 tree v4si_ftype_v2df
17657 = build_function_type_list (V4SI_type_node
, V2DF_type_node
, NULL_TREE
);
17658 tree v4si_ftype_v2df_v2df
17659 = build_function_type_list (V4SI_type_node
,
17660 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
17661 tree v2si_ftype_v2df
17662 = build_function_type_list (V2SI_type_node
, V2DF_type_node
, NULL_TREE
);
17663 tree v4sf_ftype_v2df
17664 = build_function_type_list (V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
17665 tree v2df_ftype_v2si
17666 = build_function_type_list (V2DF_type_node
, V2SI_type_node
, NULL_TREE
);
17667 tree v2df_ftype_v4sf
17668 = build_function_type_list (V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
17669 tree int_ftype_v2df
17670 = build_function_type_list (integer_type_node
, V2DF_type_node
, NULL_TREE
);
17671 tree int64_ftype_v2df
17672 = build_function_type_list (long_long_integer_type_node
,
17673 V2DF_type_node
, NULL_TREE
);
17674 tree v2df_ftype_v2df_int
17675 = build_function_type_list (V2DF_type_node
,
17676 V2DF_type_node
, integer_type_node
, NULL_TREE
);
17677 tree v2df_ftype_v2df_int64
17678 = build_function_type_list (V2DF_type_node
,
17679 V2DF_type_node
, long_long_integer_type_node
,
17681 tree v4sf_ftype_v4sf_v2df
17682 = build_function_type_list (V4SF_type_node
,
17683 V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
17684 tree v2df_ftype_v2df_v4sf
17685 = build_function_type_list (V2DF_type_node
,
17686 V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
17687 tree v2df_ftype_v2df_v2df_int
17688 = build_function_type_list (V2DF_type_node
,
17689 V2DF_type_node
, V2DF_type_node
,
17692 tree v2df_ftype_v2df_pcdouble
17693 = build_function_type_list (V2DF_type_node
,
17694 V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
17695 tree void_ftype_pdouble_v2df
17696 = build_function_type_list (void_type_node
,
17697 pdouble_type_node
, V2DF_type_node
, NULL_TREE
);
17698 tree void_ftype_pint_int
17699 = build_function_type_list (void_type_node
,
17700 pint_type_node
, integer_type_node
, NULL_TREE
);
17701 tree void_ftype_v16qi_v16qi_pchar
17702 = build_function_type_list (void_type_node
,
17703 V16QI_type_node
, V16QI_type_node
,
17704 pchar_type_node
, NULL_TREE
);
17705 tree v2df_ftype_pcdouble
17706 = build_function_type_list (V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
17707 tree v2df_ftype_v2df_v2df
17708 = build_function_type_list (V2DF_type_node
,
17709 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
17710 tree v16qi_ftype_v16qi_v16qi
17711 = build_function_type_list (V16QI_type_node
,
17712 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
17713 tree v8hi_ftype_v8hi_v8hi
17714 = build_function_type_list (V8HI_type_node
,
17715 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
17716 tree v4si_ftype_v4si_v4si
17717 = build_function_type_list (V4SI_type_node
,
17718 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
17719 tree v2di_ftype_v2di_v2di
17720 = build_function_type_list (V2DI_type_node
,
17721 V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
17722 tree v2di_ftype_v2df_v2df
17723 = build_function_type_list (V2DI_type_node
,
17724 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
17725 tree v2df_ftype_v2df
17726 = build_function_type_list (V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
17727 tree v2di_ftype_v2di_int
17728 = build_function_type_list (V2DI_type_node
,
17729 V2DI_type_node
, integer_type_node
, NULL_TREE
);
17730 tree v2di_ftype_v2di_v2di_int
17731 = build_function_type_list (V2DI_type_node
, V2DI_type_node
,
17732 V2DI_type_node
, integer_type_node
, NULL_TREE
);
17733 tree v4si_ftype_v4si_int
17734 = build_function_type_list (V4SI_type_node
,
17735 V4SI_type_node
, integer_type_node
, NULL_TREE
);
17736 tree v8hi_ftype_v8hi_int
17737 = build_function_type_list (V8HI_type_node
,
17738 V8HI_type_node
, integer_type_node
, NULL_TREE
);
17739 tree v4si_ftype_v8hi_v8hi
17740 = build_function_type_list (V4SI_type_node
,
17741 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
17742 tree di_ftype_v8qi_v8qi
17743 = build_function_type_list (long_long_unsigned_type_node
,
17744 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
17745 tree di_ftype_v2si_v2si
17746 = build_function_type_list (long_long_unsigned_type_node
,
17747 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
17748 tree v2di_ftype_v16qi_v16qi
17749 = build_function_type_list (V2DI_type_node
,
17750 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
17751 tree v2di_ftype_v4si_v4si
17752 = build_function_type_list (V2DI_type_node
,
17753 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
17754 tree int_ftype_v16qi
17755 = build_function_type_list (integer_type_node
, V16QI_type_node
, NULL_TREE
);
17756 tree v16qi_ftype_pcchar
17757 = build_function_type_list (V16QI_type_node
, pcchar_type_node
, NULL_TREE
);
17758 tree void_ftype_pchar_v16qi
17759 = build_function_type_list (void_type_node
,
17760 pchar_type_node
, V16QI_type_node
, NULL_TREE
);
17762 tree v2di_ftype_v2di_unsigned_unsigned
17763 = build_function_type_list (V2DI_type_node
, V2DI_type_node
,
17764 unsigned_type_node
, unsigned_type_node
,
17766 tree v2di_ftype_v2di_v2di_unsigned_unsigned
17767 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, V2DI_type_node
,
17768 unsigned_type_node
, unsigned_type_node
,
17770 tree v2di_ftype_v2di_v16qi
17771 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, V16QI_type_node
,
17773 tree v2df_ftype_v2df_v2df_v2df
17774 = build_function_type_list (V2DF_type_node
,
17775 V2DF_type_node
, V2DF_type_node
,
17776 V2DF_type_node
, NULL_TREE
);
17777 tree v4sf_ftype_v4sf_v4sf_v4sf
17778 = build_function_type_list (V4SF_type_node
,
17779 V4SF_type_node
, V4SF_type_node
,
17780 V4SF_type_node
, NULL_TREE
);
17781 tree v8hi_ftype_v16qi
17782 = build_function_type_list (V8HI_type_node
, V16QI_type_node
,
17784 tree v4si_ftype_v16qi
17785 = build_function_type_list (V4SI_type_node
, V16QI_type_node
,
17787 tree v2di_ftype_v16qi
17788 = build_function_type_list (V2DI_type_node
, V16QI_type_node
,
17790 tree v4si_ftype_v8hi
17791 = build_function_type_list (V4SI_type_node
, V8HI_type_node
,
17793 tree v2di_ftype_v8hi
17794 = build_function_type_list (V2DI_type_node
, V8HI_type_node
,
17796 tree v2di_ftype_v4si
17797 = build_function_type_list (V2DI_type_node
, V4SI_type_node
,
17799 tree v2di_ftype_pv2di
17800 = build_function_type_list (V2DI_type_node
, pv2di_type_node
,
17802 tree v16qi_ftype_v16qi_v16qi_int
17803 = build_function_type_list (V16QI_type_node
, V16QI_type_node
,
17804 V16QI_type_node
, integer_type_node
,
17806 tree v16qi_ftype_v16qi_v16qi_v16qi
17807 = build_function_type_list (V16QI_type_node
, V16QI_type_node
,
17808 V16QI_type_node
, V16QI_type_node
,
17810 tree v8hi_ftype_v8hi_v8hi_int
17811 = build_function_type_list (V8HI_type_node
, V8HI_type_node
,
17812 V8HI_type_node
, integer_type_node
,
17814 tree v4si_ftype_v4si_v4si_int
17815 = build_function_type_list (V4SI_type_node
, V4SI_type_node
,
17816 V4SI_type_node
, integer_type_node
,
17818 tree int_ftype_v2di_v2di
17819 = build_function_type_list (integer_type_node
,
17820 V2DI_type_node
, V2DI_type_node
,
17822 tree int_ftype_v16qi_int_v16qi_int_int
17823 = build_function_type_list (integer_type_node
,
17830 tree v16qi_ftype_v16qi_int_v16qi_int_int
17831 = build_function_type_list (V16QI_type_node
,
17838 tree int_ftype_v16qi_v16qi_int
17839 = build_function_type_list (integer_type_node
,
17846 /* The __float80 type. */
17847 if (TYPE_MODE (long_double_type_node
) == XFmode
)
17848 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
17852 /* The __float80 type. */
17853 tree float80_type_node
= make_node (REAL_TYPE
);
17855 TYPE_PRECISION (float80_type_node
) = 80;
17856 layout_type (float80_type_node
);
17857 (*lang_hooks
.types
.register_builtin_type
) (float80_type_node
,
17863 tree float128_type_node
= make_node (REAL_TYPE
);
17865 TYPE_PRECISION (float128_type_node
) = 128;
17866 layout_type (float128_type_node
);
17867 (*lang_hooks
.types
.register_builtin_type
) (float128_type_node
,
17870 /* TFmode support builtins. */
17871 ftype
= build_function_type (float128_type_node
,
17873 def_builtin (OPTION_MASK_ISA_64BIT
, "__builtin_infq", ftype
, IX86_BUILTIN_INFQ
);
17875 ftype
= build_function_type_list (float128_type_node
,
17876 float128_type_node
,
17878 def_builtin_const (OPTION_MASK_ISA_64BIT
, "__builtin_fabsq", ftype
, IX86_BUILTIN_FABSQ
);
17880 ftype
= build_function_type_list (float128_type_node
,
17881 float128_type_node
,
17882 float128_type_node
,
17884 def_builtin_const (OPTION_MASK_ISA_64BIT
, "__builtin_copysignq", ftype
, IX86_BUILTIN_COPYSIGNQ
);
17887 /* Add all SSE builtins that are more or less simple operations on
17889 for (i
= 0, d
= bdesc_sse_3arg
;
17890 i
< ARRAY_SIZE (bdesc_sse_3arg
);
17893 /* Use one of the operands; the target can have a different mode for
17894 mask-generating compares. */
17895 enum machine_mode mode
;
17900 mode
= insn_data
[d
->icode
].operand
[1].mode
;
17905 type
= v16qi_ftype_v16qi_v16qi_int
;
17908 type
= v8hi_ftype_v8hi_v8hi_int
;
17911 type
= v4si_ftype_v4si_v4si_int
;
17914 type
= v2di_ftype_v2di_v2di_int
;
17917 type
= v2df_ftype_v2df_v2df_int
;
17920 type
= v4sf_ftype_v4sf_v4sf_int
;
17923 gcc_unreachable ();
17926 /* Override for variable blends. */
17929 case CODE_FOR_sse4_1_blendvpd
:
17930 type
= v2df_ftype_v2df_v2df_v2df
;
17932 case CODE_FOR_sse4_1_blendvps
:
17933 type
= v4sf_ftype_v4sf_v4sf_v4sf
;
17935 case CODE_FOR_sse4_1_pblendvb
:
17936 type
= v16qi_ftype_v16qi_v16qi_v16qi
;
17942 def_builtin_const (d
->mask
, d
->name
, type
, d
->code
);
17945 /* Add all builtins that are more or less simple operations on two
17947 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
17949 /* Use one of the operands; the target can have a different mode for
17950 mask-generating compares. */
17951 enum machine_mode mode
;
17956 mode
= insn_data
[d
->icode
].operand
[1].mode
;
17961 type
= v16qi_ftype_v16qi_v16qi
;
17964 type
= v8hi_ftype_v8hi_v8hi
;
17967 type
= v4si_ftype_v4si_v4si
;
17970 type
= v2di_ftype_v2di_v2di
;
17973 type
= v2df_ftype_v2df_v2df
;
17976 type
= v4sf_ftype_v4sf_v4sf
;
17979 type
= v8qi_ftype_v8qi_v8qi
;
17982 type
= v4hi_ftype_v4hi_v4hi
;
17985 type
= v2si_ftype_v2si_v2si
;
17988 type
= di_ftype_di_di
;
17992 gcc_unreachable ();
17995 /* Override for comparisons. */
17996 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
17997 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
)
17998 type
= v4si_ftype_v4sf_v4sf
;
18000 if (d
->icode
== CODE_FOR_sse2_maskcmpv2df3
18001 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
18002 type
= v2di_ftype_v2df_v2df
;
18004 if (d
->icode
== CODE_FOR_vec_pack_sfix_v2df
)
18005 type
= v4si_ftype_v2df_v2df
;
18007 def_builtin_const (d
->mask
, d
->name
, type
, d
->code
);
18010 /* Add all builtins that are more or less simple operations on 1 operand. */
18011 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
18013 enum machine_mode mode
;
18018 mode
= insn_data
[d
->icode
].operand
[1].mode
;
18023 type
= v16qi_ftype_v16qi
;
18026 type
= v8hi_ftype_v8hi
;
18029 type
= v4si_ftype_v4si
;
18032 type
= v2df_ftype_v2df
;
18035 type
= v4sf_ftype_v4sf
;
18038 type
= v8qi_ftype_v8qi
;
18041 type
= v4hi_ftype_v4hi
;
18044 type
= v2si_ftype_v2si
;
18051 def_builtin_const (d
->mask
, d
->name
, type
, d
->code
);
18054 /* pcmpestr[im] insns. */
18055 for (i
= 0, d
= bdesc_pcmpestr
;
18056 i
< ARRAY_SIZE (bdesc_pcmpestr
);
18059 if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
18060 ftype
= v16qi_ftype_v16qi_int_v16qi_int_int
;
18062 ftype
= int_ftype_v16qi_int_v16qi_int_int
;
18063 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
18066 /* pcmpistr[im] insns. */
18067 for (i
= 0, d
= bdesc_pcmpistr
;
18068 i
< ARRAY_SIZE (bdesc_pcmpistr
);
18071 if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
18072 ftype
= v16qi_ftype_v16qi_v16qi_int
;
18074 ftype
= int_ftype_v16qi_v16qi_int
;
18075 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
18078 /* Add the remaining MMX insns with somewhat more complicated types. */
18079 def_builtin (OPTION_MASK_ISA_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
18080 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
18081 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
18082 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
18084 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
18085 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
18086 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
18088 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
18089 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
18091 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
18092 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
18094 /* comi/ucomi insns. */
18095 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
18096 if (d
->mask
== OPTION_MASK_ISA_SSE2
)
18097 def_builtin_const (d
->mask
, d
->name
, int_ftype_v2df_v2df
, d
->code
);
18099 def_builtin_const (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
18102 for (i
= 0, d
= bdesc_ptest
; i
< ARRAY_SIZE (bdesc_ptest
); i
++, d
++)
18103 def_builtin_const (d
->mask
, d
->name
, int_ftype_v2di_v2di
, d
->code
);
18105 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
18106 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
18107 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
18109 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
18110 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
18111 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
18112 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
18113 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
18114 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64
, IX86_BUILTIN_CVTSI642SS
);
18115 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
18116 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI64
);
18117 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
18118 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
18119 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI64
);
18121 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
18123 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_loadups", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADUPS
);
18124 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
18126 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
18127 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
18128 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
18129 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
18131 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
18132 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
18133 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
18134 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
18136 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
18138 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
18140 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
18141 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
18142 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
18143 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
18144 ftype
= build_function_type_list (float_type_node
,
18147 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_rsqrtf", ftype
, IX86_BUILTIN_RSQRTF
);
18148 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
18149 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
18151 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
18153 /* Original 3DNow! */
18154 def_builtin (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
18155 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
18156 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
18157 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
18158 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
18159 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
18160 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
18161 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
18162 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
18163 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
18164 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
18165 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
18166 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
18167 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
18168 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
18169 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
18170 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
18171 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
18172 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
18173 def_builtin_const (OPTION_MASK_ISA_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
18175 /* 3DNow! extension as used in the Athlon CPU. */
18176 def_builtin_const (OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
18177 def_builtin_const (OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
18178 def_builtin_const (OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
18179 def_builtin_const (OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
18180 def_builtin_const (OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
18181 def_builtin_const (OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
18184 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar
, IX86_BUILTIN_MASKMOVDQU
);
18186 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_loadupd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADUPD
);
18187 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREUPD
);
18189 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADHPD
);
18190 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADLPD
);
18192 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_movmskpd", int_ftype_v2df
, IX86_BUILTIN_MOVMSKPD
);
18193 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pmovmskb128", int_ftype_v16qi
, IX86_BUILTIN_PMOVMSKB128
);
18194 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_movnti", void_ftype_pint_int
, IX86_BUILTIN_MOVNTI
);
18195 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTPD
);
18196 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di
, IX86_BUILTIN_MOVNTDQ
);
18198 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pshufd", v4si_ftype_v4si_int
, IX86_BUILTIN_PSHUFD
);
18199 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFLW
);
18200 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFHW
);
18201 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi
, IX86_BUILTIN_PSADBW128
);
18203 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_sqrtpd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTPD
);
18204 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_sqrtsd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTSD
);
18206 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_SHUFPD
);
18208 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si
, IX86_BUILTIN_CVTDQ2PD
);
18209 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si
, IX86_BUILTIN_CVTDQ2PS
);
18211 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTPD2DQ
);
18212 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTPD2PI
);
18213 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df
, IX86_BUILTIN_CVTPD2PS
);
18214 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTTPD2DQ
);
18215 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTTPD2PI
);
18217 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si
, IX86_BUILTIN_CVTPI2PD
);
18219 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTSD2SI
);
18220 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvttsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI
);
18221 def_builtin_const (OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTSD2SI64
);
18222 def_builtin_const (OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI64
);
18224 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTPS2DQ
);
18225 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf
, IX86_BUILTIN_CVTPS2PD
);
18226 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2DQ
);
18228 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int
, IX86_BUILTIN_CVTSI2SD
);
18229 def_builtin_const (OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64
, IX86_BUILTIN_CVTSI642SD
);
18230 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df
, IX86_BUILTIN_CVTSD2SS
);
18231 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf
, IX86_BUILTIN_CVTSS2SD
);
18233 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_clflush", void_ftype_pcvoid
, IX86_BUILTIN_CLFLUSH
);
18234 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_lfence", void_ftype_void
, IX86_BUILTIN_LFENCE
);
18235 x86_mfence
= def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_mfence", void_ftype_void
, IX86_BUILTIN_MFENCE
);
18237 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQU
);
18238 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQU
);
18240 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si
, IX86_BUILTIN_PMULUDQ
);
18241 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si
, IX86_BUILTIN_PMULUDQ128
);
18243 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLDQI128
);
18244 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSLLWI128
);
18245 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSLLDI128
);
18246 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLQI128
);
18247 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi
, IX86_BUILTIN_PSLLW128
);
18248 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si
, IX86_BUILTIN_PSLLD128
);
18249 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSLLQ128
);
18251 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLDQI128
);
18252 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRLWI128
);
18253 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRLDI128
);
18254 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLQI128
);
18255 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi
, IX86_BUILTIN_PSRLW128
);
18256 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si
, IX86_BUILTIN_PSRLD128
);
18257 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSRLQ128
);
18259 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRAWI128
);
18260 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psradi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRADI128
);
18261 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi
, IX86_BUILTIN_PSRAW128
);
18262 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si
, IX86_BUILTIN_PSRAD128
);
18264 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi
, IX86_BUILTIN_PMADDWD128
);
18266 /* Prescott New Instructions. */
18267 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned
, IX86_BUILTIN_MONITOR
);
18268 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned
, IX86_BUILTIN_MWAIT
);
18269 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_lddqu", v16qi_ftype_pcchar
, IX86_BUILTIN_LDDQU
);
18272 def_builtin_const (OPTION_MASK_ISA_SSSE3
, "__builtin_ia32_palignr128", v2di_ftype_v2di_v2di_int
, IX86_BUILTIN_PALIGNR128
);
18273 def_builtin_const (OPTION_MASK_ISA_SSSE3
, "__builtin_ia32_palignr", di_ftype_di_di_int
, IX86_BUILTIN_PALIGNR
);
18276 def_builtin (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_movntdqa", v2di_ftype_pv2di
, IX86_BUILTIN_MOVNTDQA
);
18277 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovsxbw128", v8hi_ftype_v16qi
, IX86_BUILTIN_PMOVSXBW128
);
18278 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovsxbd128", v4si_ftype_v16qi
, IX86_BUILTIN_PMOVSXBD128
);
18279 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovsxbq128", v2di_ftype_v16qi
, IX86_BUILTIN_PMOVSXBQ128
);
18280 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovsxwd128", v4si_ftype_v8hi
, IX86_BUILTIN_PMOVSXWD128
);
18281 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovsxwq128", v2di_ftype_v8hi
, IX86_BUILTIN_PMOVSXWQ128
);
18282 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovsxdq128", v2di_ftype_v4si
, IX86_BUILTIN_PMOVSXDQ128
);
18283 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovzxbw128", v8hi_ftype_v16qi
, IX86_BUILTIN_PMOVZXBW128
);
18284 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovzxbd128", v4si_ftype_v16qi
, IX86_BUILTIN_PMOVZXBD128
);
18285 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovzxbq128", v2di_ftype_v16qi
, IX86_BUILTIN_PMOVZXBQ128
);
18286 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovzxwd128", v4si_ftype_v8hi
, IX86_BUILTIN_PMOVZXWD128
);
18287 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovzxwq128", v2di_ftype_v8hi
, IX86_BUILTIN_PMOVZXWQ128
);
18288 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmovzxdq128", v2di_ftype_v4si
, IX86_BUILTIN_PMOVZXDQ128
);
18289 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_pmuldq128", v2di_ftype_v4si_v4si
, IX86_BUILTIN_PMULDQ128
);
18290 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_roundpd", v2df_ftype_v2df_int
, IX86_BUILTIN_ROUNDPD
);
18291 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_roundps", v4sf_ftype_v4sf_int
, IX86_BUILTIN_ROUNDPS
);
18292 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_roundsd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_ROUNDSD
);
18293 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_roundss", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_ROUNDSS
);
18296 ftype
= build_function_type_list (unsigned_type_node
,
18297 unsigned_type_node
,
18298 unsigned_char_type_node
,
18300 def_builtin_const (OPTION_MASK_ISA_SSE4_2
, "__builtin_ia32_crc32qi", ftype
, IX86_BUILTIN_CRC32QI
);
18301 ftype
= build_function_type_list (unsigned_type_node
,
18302 unsigned_type_node
,
18303 short_unsigned_type_node
,
18305 def_builtin_const (OPTION_MASK_ISA_SSE4_2
, "__builtin_ia32_crc32hi", ftype
, IX86_BUILTIN_CRC32HI
);
18306 ftype
= build_function_type_list (unsigned_type_node
,
18307 unsigned_type_node
,
18308 unsigned_type_node
,
18310 def_builtin_const (OPTION_MASK_ISA_SSE4_2
, "__builtin_ia32_crc32si", ftype
, IX86_BUILTIN_CRC32SI
);
18311 ftype
= build_function_type_list (long_long_unsigned_type_node
,
18312 long_long_unsigned_type_node
,
18313 long_long_unsigned_type_node
,
18315 def_builtin_const (OPTION_MASK_ISA_SSE4_2
, "__builtin_ia32_crc32di", ftype
, IX86_BUILTIN_CRC32DI
);
18317 /* AMDFAM10 SSE4A New built-ins */
18318 def_builtin (OPTION_MASK_ISA_SSE4A
, "__builtin_ia32_movntsd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTSD
);
18319 def_builtin (OPTION_MASK_ISA_SSE4A
, "__builtin_ia32_movntss", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTSS
);
18320 def_builtin_const (OPTION_MASK_ISA_SSE4A
, "__builtin_ia32_extrqi", v2di_ftype_v2di_unsigned_unsigned
, IX86_BUILTIN_EXTRQI
);
18321 def_builtin_const (OPTION_MASK_ISA_SSE4A
, "__builtin_ia32_extrq", v2di_ftype_v2di_v16qi
, IX86_BUILTIN_EXTRQ
);
18322 def_builtin_const (OPTION_MASK_ISA_SSE4A
, "__builtin_ia32_insertqi", v2di_ftype_v2di_v2di_unsigned_unsigned
, IX86_BUILTIN_INSERTQI
);
18323 def_builtin_const (OPTION_MASK_ISA_SSE4A
, "__builtin_ia32_insertq", v2di_ftype_v2di_v2di
, IX86_BUILTIN_INSERTQ
);
18325 /* Access to the vec_init patterns. */
18326 ftype
= build_function_type_list (V2SI_type_node
, integer_type_node
,
18327 integer_type_node
, NULL_TREE
);
18328 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v2si", ftype
, IX86_BUILTIN_VEC_INIT_V2SI
);
18330 ftype
= build_function_type_list (V4HI_type_node
, short_integer_type_node
,
18331 short_integer_type_node
,
18332 short_integer_type_node
,
18333 short_integer_type_node
, NULL_TREE
);
18334 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v4hi", ftype
, IX86_BUILTIN_VEC_INIT_V4HI
);
18336 ftype
= build_function_type_list (V8QI_type_node
, char_type_node
,
18337 char_type_node
, char_type_node
,
18338 char_type_node
, char_type_node
,
18339 char_type_node
, char_type_node
,
18340 char_type_node
, NULL_TREE
);
18341 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v8qi", ftype
, IX86_BUILTIN_VEC_INIT_V8QI
);
18343 /* Access to the vec_extract patterns. */
18344 ftype
= build_function_type_list (double_type_node
, V2DF_type_node
,
18345 integer_type_node
, NULL_TREE
);
18346 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2df", ftype
, IX86_BUILTIN_VEC_EXT_V2DF
);
18348 ftype
= build_function_type_list (long_long_integer_type_node
,
18349 V2DI_type_node
, integer_type_node
,
18351 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2di", ftype
, IX86_BUILTIN_VEC_EXT_V2DI
);
18353 ftype
= build_function_type_list (float_type_node
, V4SF_type_node
,
18354 integer_type_node
, NULL_TREE
);
18355 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_vec_ext_v4sf", ftype
, IX86_BUILTIN_VEC_EXT_V4SF
);
18357 ftype
= build_function_type_list (intSI_type_node
, V4SI_type_node
,
18358 integer_type_node
, NULL_TREE
);
18359 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v4si", ftype
, IX86_BUILTIN_VEC_EXT_V4SI
);
18361 ftype
= build_function_type_list (intHI_type_node
, V8HI_type_node
,
18362 integer_type_node
, NULL_TREE
);
18363 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v8hi", ftype
, IX86_BUILTIN_VEC_EXT_V8HI
);
18365 ftype
= build_function_type_list (intHI_type_node
, V4HI_type_node
,
18366 integer_type_node
, NULL_TREE
);
18367 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_vec_ext_v4hi", ftype
, IX86_BUILTIN_VEC_EXT_V4HI
);
18369 ftype
= build_function_type_list (intSI_type_node
, V2SI_type_node
,
18370 integer_type_node
, NULL_TREE
);
18371 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_ext_v2si", ftype
, IX86_BUILTIN_VEC_EXT_V2SI
);
18373 ftype
= build_function_type_list (intQI_type_node
, V16QI_type_node
,
18374 integer_type_node
, NULL_TREE
);
18375 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v16qi", ftype
, IX86_BUILTIN_VEC_EXT_V16QI
);
18377 /* Access to the vec_set patterns. */
18378 ftype
= build_function_type_list (V2DI_type_node
, V2DI_type_node
,
18380 integer_type_node
, NULL_TREE
);
18381 def_builtin_const (OPTION_MASK_ISA_SSE4_1
| OPTION_MASK_ISA_64BIT
, "__builtin_ia32_vec_set_v2di", ftype
, IX86_BUILTIN_VEC_SET_V2DI
);
18383 ftype
= build_function_type_list (V4SF_type_node
, V4SF_type_node
,
18385 integer_type_node
, NULL_TREE
);
18386 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4sf", ftype
, IX86_BUILTIN_VEC_SET_V4SF
);
18388 ftype
= build_function_type_list (V4SI_type_node
, V4SI_type_node
,
18390 integer_type_node
, NULL_TREE
);
18391 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4si", ftype
, IX86_BUILTIN_VEC_SET_V4SI
);
18393 ftype
= build_function_type_list (V8HI_type_node
, V8HI_type_node
,
18395 integer_type_node
, NULL_TREE
);
18396 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_set_v8hi", ftype
, IX86_BUILTIN_VEC_SET_V8HI
);
18398 ftype
= build_function_type_list (V4HI_type_node
, V4HI_type_node
,
18400 integer_type_node
, NULL_TREE
);
18401 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, "__builtin_ia32_vec_set_v4hi", ftype
, IX86_BUILTIN_VEC_SET_V4HI
);
18403 ftype
= build_function_type_list (V16QI_type_node
, V16QI_type_node
,
18405 integer_type_node
, NULL_TREE
);
18406 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v16qi", ftype
, IX86_BUILTIN_VEC_SET_V16QI
);
18410 ix86_init_builtins (void)
18413 ix86_init_mmx_sse_builtins ();
18416 /* Errors in the source file can cause expand_expr to return const0_rtx
18417 where we expect a vector. To avoid crashing, use one of the vector
18418 clear instructions. */
18420 safe_vector_operand (rtx x
, enum machine_mode mode
)
18422 if (x
== const0_rtx
)
18423 x
= CONST0_RTX (mode
);
18427 /* Subroutine of ix86_expand_builtin to take care of SSE insns with
18428 4 operands. The third argument must be a constant smaller than 8
18432 ix86_expand_sse_4_operands_builtin (enum insn_code icode
, tree exp
,
18436 tree arg0
= CALL_EXPR_ARG (exp
, 0);
18437 tree arg1
= CALL_EXPR_ARG (exp
, 1);
18438 tree arg2
= CALL_EXPR_ARG (exp
, 2);
18439 rtx op0
= expand_normal (arg0
);
18440 rtx op1
= expand_normal (arg1
);
18441 rtx op2
= expand_normal (arg2
);
18442 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
18443 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
18444 enum machine_mode mode2
= insn_data
[icode
].operand
[2].mode
;
18445 enum machine_mode mode3
= insn_data
[icode
].operand
[3].mode
;
18447 if (VECTOR_MODE_P (mode1
))
18448 op0
= safe_vector_operand (op0
, mode1
);
18449 if (VECTOR_MODE_P (mode2
))
18450 op1
= safe_vector_operand (op1
, mode2
);
18451 if (VECTOR_MODE_P (mode3
))
18452 op2
= safe_vector_operand (op2
, mode3
);
18456 || GET_MODE (target
) != tmode
18457 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18458 target
= gen_reg_rtx (tmode
);
18460 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18461 op0
= copy_to_mode_reg (mode1
, op0
);
18462 if ((optimize
&& !register_operand (op1
, mode2
))
18463 || !(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18464 op1
= copy_to_mode_reg (mode2
, op1
);
18466 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
18469 case CODE_FOR_sse4_1_blendvpd
:
18470 case CODE_FOR_sse4_1_blendvps
:
18471 case CODE_FOR_sse4_1_pblendvb
:
18472 op2
= copy_to_mode_reg (mode3
, op2
);
18475 case CODE_FOR_sse4_1_roundsd
:
18476 case CODE_FOR_sse4_1_roundss
:
18477 error ("the third argument must be a 4-bit immediate");
18481 error ("the third argument must be an 8-bit immediate");
18485 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
18492 /* Subroutine of ix86_expand_builtin to take care of crc32 insns. */
18495 ix86_expand_crc32 (enum insn_code icode
, tree exp
, rtx target
)
18498 tree arg0
= CALL_EXPR_ARG (exp
, 0);
18499 tree arg1
= CALL_EXPR_ARG (exp
, 1);
18500 rtx op0
= expand_normal (arg0
);
18501 rtx op1
= expand_normal (arg1
);
18502 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
18503 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
18504 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
18508 || GET_MODE (target
) != tmode
18509 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18510 target
= gen_reg_rtx (tmode
);
18512 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
18513 op0
= copy_to_mode_reg (mode0
, op0
);
18514 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
18516 op1
= copy_to_reg (op1
);
18517 op1
= simplify_gen_subreg (mode1
, op1
, GET_MODE (op1
), 0);
18520 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
18527 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
18530 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
18533 tree arg0
= CALL_EXPR_ARG (exp
, 0);
18534 tree arg1
= CALL_EXPR_ARG (exp
, 1);
18535 rtx op0
= expand_normal (arg0
);
18536 rtx op1
= expand_normal (arg1
);
18537 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
18538 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
18539 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
18541 if (VECTOR_MODE_P (mode0
))
18542 op0
= safe_vector_operand (op0
, mode0
);
18543 if (VECTOR_MODE_P (mode1
))
18544 op1
= safe_vector_operand (op1
, mode1
);
18546 if (optimize
|| !target
18547 || GET_MODE (target
) != tmode
18548 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18549 target
= gen_reg_rtx (tmode
);
18551 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
18553 rtx x
= gen_reg_rtx (V4SImode
);
18554 emit_insn (gen_sse2_loadd (x
, op1
));
18555 op1
= gen_lowpart (TImode
, x
);
18558 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
18559 op0
= copy_to_mode_reg (mode0
, op0
);
18560 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
18561 op1
= copy_to_mode_reg (mode1
, op1
);
18563 /* ??? Using ix86_fixup_binary_operands is problematic when
18564 we've got mismatched modes. Fake it. */
18570 if (tmode
== mode0
&& tmode
== mode1
)
18572 target
= ix86_fixup_binary_operands (UNKNOWN
, tmode
, xops
);
18576 else if (optimize
|| !ix86_binary_operator_ok (UNKNOWN
, tmode
, xops
))
18578 op0
= force_reg (mode0
, op0
);
18579 op1
= force_reg (mode1
, op1
);
18580 target
= gen_reg_rtx (tmode
);
18583 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
18590 /* Subroutine of ix86_expand_builtin to take care of stores. */
18593 ix86_expand_store_builtin (enum insn_code icode
, tree exp
)
18596 tree arg0
= CALL_EXPR_ARG (exp
, 0);
18597 tree arg1
= CALL_EXPR_ARG (exp
, 1);
18598 rtx op0
= expand_normal (arg0
);
18599 rtx op1
= expand_normal (arg1
);
18600 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
18601 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
18603 if (VECTOR_MODE_P (mode1
))
18604 op1
= safe_vector_operand (op1
, mode1
);
18606 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
18607 op1
= copy_to_mode_reg (mode1
, op1
);
18609 pat
= GEN_FCN (icode
) (op0
, op1
);
18615 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
18618 ix86_expand_unop_builtin (enum insn_code icode
, tree exp
,
18619 rtx target
, int do_load
)
18622 tree arg0
= CALL_EXPR_ARG (exp
, 0);
18623 rtx op0
= expand_normal (arg0
);
18624 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
18625 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
18627 if (optimize
|| !target
18628 || GET_MODE (target
) != tmode
18629 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18630 target
= gen_reg_rtx (tmode
);
18632 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
18635 if (VECTOR_MODE_P (mode0
))
18636 op0
= safe_vector_operand (op0
, mode0
);
18638 if ((optimize
&& !register_operand (op0
, mode0
))
18639 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
18640 op0
= copy_to_mode_reg (mode0
, op0
);
18645 case CODE_FOR_sse4_1_roundpd
:
18646 case CODE_FOR_sse4_1_roundps
:
18648 tree arg1
= CALL_EXPR_ARG (exp
, 1);
18649 rtx op1
= expand_normal (arg1
);
18650 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
18652 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
18654 error ("the second argument must be a 4-bit immediate");
18657 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
18661 pat
= GEN_FCN (icode
) (target
, op0
);
18671 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
18672 sqrtss, rsqrtss, rcpss. */
18675 ix86_expand_unop1_builtin (enum insn_code icode
, tree exp
, rtx target
)
18678 tree arg0
= CALL_EXPR_ARG (exp
, 0);
18679 rtx op1
, op0
= expand_normal (arg0
);
18680 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
18681 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
18683 if (optimize
|| !target
18684 || GET_MODE (target
) != tmode
18685 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18686 target
= gen_reg_rtx (tmode
);
18688 if (VECTOR_MODE_P (mode0
))
18689 op0
= safe_vector_operand (op0
, mode0
);
18691 if ((optimize
&& !register_operand (op0
, mode0
))
18692 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
18693 op0
= copy_to_mode_reg (mode0
, op0
);
18696 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode0
))
18697 op1
= copy_to_mode_reg (mode0
, op1
);
18699 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
18706 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
18709 ix86_expand_sse_compare (const struct builtin_description
*d
, tree exp
,
18713 tree arg0
= CALL_EXPR_ARG (exp
, 0);
18714 tree arg1
= CALL_EXPR_ARG (exp
, 1);
18715 rtx op0
= expand_normal (arg0
);
18716 rtx op1
= expand_normal (arg1
);
18718 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
18719 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
18720 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
18721 enum rtx_code comparison
= d
->comparison
;
18723 if (VECTOR_MODE_P (mode0
))
18724 op0
= safe_vector_operand (op0
, mode0
);
18725 if (VECTOR_MODE_P (mode1
))
18726 op1
= safe_vector_operand (op1
, mode1
);
18728 /* Swap operands if we have a comparison that isn't available in
18730 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
18732 rtx tmp
= gen_reg_rtx (mode1
);
18733 emit_move_insn (tmp
, op1
);
18738 if (optimize
|| !target
18739 || GET_MODE (target
) != tmode
18740 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
18741 target
= gen_reg_rtx (tmode
);
18743 if ((optimize
&& !register_operand (op0
, mode0
))
18744 || ! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
18745 op0
= copy_to_mode_reg (mode0
, op0
);
18746 if ((optimize
&& !register_operand (op1
, mode1
))
18747 || ! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
18748 op1
= copy_to_mode_reg (mode1
, op1
);
18750 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
18751 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
18758 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
18761 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
18765 tree arg0
= CALL_EXPR_ARG (exp
, 0);
18766 tree arg1
= CALL_EXPR_ARG (exp
, 1);
18767 rtx op0
= expand_normal (arg0
);
18768 rtx op1
= expand_normal (arg1
);
18769 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
18770 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
18771 enum rtx_code comparison
= d
->comparison
;
18773 if (VECTOR_MODE_P (mode0
))
18774 op0
= safe_vector_operand (op0
, mode0
);
18775 if (VECTOR_MODE_P (mode1
))
18776 op1
= safe_vector_operand (op1
, mode1
);
18778 /* Swap operands if we have a comparison that isn't available in
18780 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
18787 target
= gen_reg_rtx (SImode
);
18788 emit_move_insn (target
, const0_rtx
);
18789 target
= gen_rtx_SUBREG (QImode
, target
, 0);
18791 if ((optimize
&& !register_operand (op0
, mode0
))
18792 || !(*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
18793 op0
= copy_to_mode_reg (mode0
, op0
);
18794 if ((optimize
&& !register_operand (op1
, mode1
))
18795 || !(*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
18796 op1
= copy_to_mode_reg (mode1
, op1
);
18798 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
18802 emit_insn (gen_rtx_SET (VOIDmode
,
18803 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
18804 gen_rtx_fmt_ee (comparison
, QImode
,
18808 return SUBREG_REG (target
);
18811 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
18814 ix86_expand_sse_ptest (const struct builtin_description
*d
, tree exp
,
18818 tree arg0
= CALL_EXPR_ARG (exp
, 0);
18819 tree arg1
= CALL_EXPR_ARG (exp
, 1);
18820 rtx op0
= expand_normal (arg0
);
18821 rtx op1
= expand_normal (arg1
);
18822 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
18823 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
18824 enum rtx_code comparison
= d
->comparison
;
18826 if (VECTOR_MODE_P (mode0
))
18827 op0
= safe_vector_operand (op0
, mode0
);
18828 if (VECTOR_MODE_P (mode1
))
18829 op1
= safe_vector_operand (op1
, mode1
);
18831 target
= gen_reg_rtx (SImode
);
18832 emit_move_insn (target
, const0_rtx
);
18833 target
= gen_rtx_SUBREG (QImode
, target
, 0);
18835 if ((optimize
&& !register_operand (op0
, mode0
))
18836 || !(*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
18837 op0
= copy_to_mode_reg (mode0
, op0
);
18838 if ((optimize
&& !register_operand (op1
, mode1
))
18839 || !(*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
18840 op1
= copy_to_mode_reg (mode1
, op1
);
18842 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
18846 emit_insn (gen_rtx_SET (VOIDmode
,
18847 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
18848 gen_rtx_fmt_ee (comparison
, QImode
,
18852 return SUBREG_REG (target
);
18855 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
18858 ix86_expand_sse_pcmpestr (const struct builtin_description
*d
,
18859 tree exp
, rtx target
)
18862 tree arg0
= CALL_EXPR_ARG (exp
, 0);
18863 tree arg1
= CALL_EXPR_ARG (exp
, 1);
18864 tree arg2
= CALL_EXPR_ARG (exp
, 2);
18865 tree arg3
= CALL_EXPR_ARG (exp
, 3);
18866 tree arg4
= CALL_EXPR_ARG (exp
, 4);
18867 rtx scratch0
, scratch1
;
18868 rtx op0
= expand_normal (arg0
);
18869 rtx op1
= expand_normal (arg1
);
18870 rtx op2
= expand_normal (arg2
);
18871 rtx op3
= expand_normal (arg3
);
18872 rtx op4
= expand_normal (arg4
);
18873 enum machine_mode tmode0
, tmode1
, modev2
, modei3
, modev4
, modei5
, modeimm
;
18875 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
18876 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
18877 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
18878 modei3
= insn_data
[d
->icode
].operand
[3].mode
;
18879 modev4
= insn_data
[d
->icode
].operand
[4].mode
;
18880 modei5
= insn_data
[d
->icode
].operand
[5].mode
;
18881 modeimm
= insn_data
[d
->icode
].operand
[6].mode
;
18883 if (VECTOR_MODE_P (modev2
))
18884 op0
= safe_vector_operand (op0
, modev2
);
18885 if (VECTOR_MODE_P (modev4
))
18886 op2
= safe_vector_operand (op2
, modev4
);
18888 if (! (*insn_data
[d
->icode
].operand
[2].predicate
) (op0
, modev2
))
18889 op0
= copy_to_mode_reg (modev2
, op0
);
18890 if (! (*insn_data
[d
->icode
].operand
[3].predicate
) (op1
, modei3
))
18891 op1
= copy_to_mode_reg (modei3
, op1
);
18892 if ((optimize
&& !register_operand (op2
, modev4
))
18893 || !(*insn_data
[d
->icode
].operand
[4].predicate
) (op2
, modev4
))
18894 op2
= copy_to_mode_reg (modev4
, op2
);
18895 if (! (*insn_data
[d
->icode
].operand
[5].predicate
) (op3
, modei5
))
18896 op3
= copy_to_mode_reg (modei5
, op3
);
18898 if (! (*insn_data
[d
->icode
].operand
[6].predicate
) (op4
, modeimm
))
18900 error ("the fifth argument must be a 8-bit immediate");
18904 if (d
->code
== IX86_BUILTIN_PCMPESTRI128
)
18906 if (optimize
|| !target
18907 || GET_MODE (target
) != tmode0
18908 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode0
))
18909 target
= gen_reg_rtx (tmode0
);
18911 scratch1
= gen_reg_rtx (tmode1
);
18913 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
, op3
, op4
);
18915 else if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
18917 if (optimize
|| !target
18918 || GET_MODE (target
) != tmode1
18919 || ! (*insn_data
[d
->icode
].operand
[1].predicate
) (target
, tmode1
))
18920 target
= gen_reg_rtx (tmode1
);
18922 scratch0
= gen_reg_rtx (tmode0
);
18924 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
, op3
, op4
);
18928 gcc_assert (d
->flag
);
18930 scratch0
= gen_reg_rtx (tmode0
);
18931 scratch1
= gen_reg_rtx (tmode1
);
18933 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
, op3
, op4
);
18943 target
= gen_reg_rtx (SImode
);
18944 emit_move_insn (target
, const0_rtx
);
18945 target
= gen_rtx_SUBREG (QImode
, target
, 0);
18948 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
18949 gen_rtx_fmt_ee (EQ
, QImode
,
18950 gen_rtx_REG ((enum machine_mode
) d
->flag
,
18953 return SUBREG_REG (target
);
18960 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
18963 ix86_expand_sse_pcmpistr (const struct builtin_description
*d
,
18964 tree exp
, rtx target
)
18967 tree arg0
= CALL_EXPR_ARG (exp
, 0);
18968 tree arg1
= CALL_EXPR_ARG (exp
, 1);
18969 tree arg2
= CALL_EXPR_ARG (exp
, 2);
18970 rtx scratch0
, scratch1
;
18971 rtx op0
= expand_normal (arg0
);
18972 rtx op1
= expand_normal (arg1
);
18973 rtx op2
= expand_normal (arg2
);
18974 enum machine_mode tmode0
, tmode1
, modev2
, modev3
, modeimm
;
18976 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
18977 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
18978 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
18979 modev3
= insn_data
[d
->icode
].operand
[3].mode
;
18980 modeimm
= insn_data
[d
->icode
].operand
[4].mode
;
18982 if (VECTOR_MODE_P (modev2
))
18983 op0
= safe_vector_operand (op0
, modev2
);
18984 if (VECTOR_MODE_P (modev3
))
18985 op1
= safe_vector_operand (op1
, modev3
);
18987 if (! (*insn_data
[d
->icode
].operand
[2].predicate
) (op0
, modev2
))
18988 op0
= copy_to_mode_reg (modev2
, op0
);
18989 if ((optimize
&& !register_operand (op1
, modev3
))
18990 || !(*insn_data
[d
->icode
].operand
[3].predicate
) (op1
, modev3
))
18991 op1
= copy_to_mode_reg (modev3
, op1
);
18993 if (! (*insn_data
[d
->icode
].operand
[4].predicate
) (op2
, modeimm
))
18995 error ("the third argument must be a 8-bit immediate");
18999 if (d
->code
== IX86_BUILTIN_PCMPISTRI128
)
19001 if (optimize
|| !target
19002 || GET_MODE (target
) != tmode0
19003 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode0
))
19004 target
= gen_reg_rtx (tmode0
);
19006 scratch1
= gen_reg_rtx (tmode1
);
19008 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
);
19010 else if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
19012 if (optimize
|| !target
19013 || GET_MODE (target
) != tmode1
19014 || ! (*insn_data
[d
->icode
].operand
[1].predicate
) (target
, tmode1
))
19015 target
= gen_reg_rtx (tmode1
);
19017 scratch0
= gen_reg_rtx (tmode0
);
19019 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
);
19023 gcc_assert (d
->flag
);
19025 scratch0
= gen_reg_rtx (tmode0
);
19026 scratch1
= gen_reg_rtx (tmode1
);
19028 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
);
19038 target
= gen_reg_rtx (SImode
);
19039 emit_move_insn (target
, const0_rtx
);
19040 target
= gen_rtx_SUBREG (QImode
, target
, 0);
19043 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
19044 gen_rtx_fmt_ee (EQ
, QImode
,
19045 gen_rtx_REG ((enum machine_mode
) d
->flag
,
19048 return SUBREG_REG (target
);
19054 /* Return the integer constant in ARG. Constrain it to be in the range
19055 of the subparts of VEC_TYPE; issue an error if not. */
19058 get_element_number (tree vec_type
, tree arg
)
19060 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
19062 if (!host_integerp (arg
, 1)
19063 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
19065 error ("selector must be an integer constant in the range 0..%wi", max
);
19072 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
19073 ix86_expand_vector_init. We DO have language-level syntax for this, in
19074 the form of (type){ init-list }. Except that since we can't place emms
19075 instructions from inside the compiler, we can't allow the use of MMX
19076 registers unless the user explicitly asks for it. So we do *not* define
19077 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
19078 we have builtins invoked by mmintrin.h that gives us license to emit
19079 these sorts of instructions. */
19082 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
19084 enum machine_mode tmode
= TYPE_MODE (type
);
19085 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
19086 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
19087 rtvec v
= rtvec_alloc (n_elt
);
19089 gcc_assert (VECTOR_MODE_P (tmode
));
19090 gcc_assert (call_expr_nargs (exp
) == n_elt
);
19092 for (i
= 0; i
< n_elt
; ++i
)
19094 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
19095 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
19098 if (!target
|| !register_operand (target
, tmode
))
19099 target
= gen_reg_rtx (tmode
);
19101 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
19105 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
19106 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
19107 had a language-level syntax for referencing vector elements. */
19110 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
19112 enum machine_mode tmode
, mode0
;
19117 arg0
= CALL_EXPR_ARG (exp
, 0);
19118 arg1
= CALL_EXPR_ARG (exp
, 1);
19120 op0
= expand_normal (arg0
);
19121 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
19123 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
19124 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
19125 gcc_assert (VECTOR_MODE_P (mode0
));
19127 op0
= force_reg (mode0
, op0
);
19129 if (optimize
|| !target
|| !register_operand (target
, tmode
))
19130 target
= gen_reg_rtx (tmode
);
19132 ix86_expand_vector_extract (true, target
, op0
, elt
);
19137 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
19138 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
19139 a language-level syntax for referencing vector elements. */
19142 ix86_expand_vec_set_builtin (tree exp
)
19144 enum machine_mode tmode
, mode1
;
19145 tree arg0
, arg1
, arg2
;
19147 rtx op0
, op1
, target
;
19149 arg0
= CALL_EXPR_ARG (exp
, 0);
19150 arg1
= CALL_EXPR_ARG (exp
, 1);
19151 arg2
= CALL_EXPR_ARG (exp
, 2);
19153 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
19154 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
19155 gcc_assert (VECTOR_MODE_P (tmode
));
19157 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, EXPAND_NORMAL
);
19158 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, EXPAND_NORMAL
);
19159 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
19161 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
19162 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
19164 op0
= force_reg (tmode
, op0
);
19165 op1
= force_reg (mode1
, op1
);
19167 /* OP0 is the source of these builtin functions and shouldn't be
19168 modified. Create a copy, use it and return it as target. */
19169 target
= gen_reg_rtx (tmode
);
19170 emit_move_insn (target
, op0
);
19171 ix86_expand_vector_set (true, target
, op1
, elt
);
19176 /* Expand an expression EXP that calls a built-in function,
19177 with result going to TARGET if that's convenient
19178 (and in mode MODE if that's convenient).
19179 SUBTARGET may be used as the target for computing one of EXP's operands.
19180 IGNORE is nonzero if the value is to be ignored. */
19183 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
19184 enum machine_mode mode ATTRIBUTE_UNUSED
,
19185 int ignore ATTRIBUTE_UNUSED
)
19187 const struct builtin_description
*d
;
19189 enum insn_code icode
;
19190 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
19191 tree arg0
, arg1
, arg2
, arg3
;
19192 rtx op0
, op1
, op2
, op3
, pat
;
19193 enum machine_mode tmode
, mode0
, mode1
, mode2
, mode3
, mode4
;
19194 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
19198 case IX86_BUILTIN_EMMS
:
19199 emit_insn (gen_mmx_emms ());
19202 case IX86_BUILTIN_SFENCE
:
19203 emit_insn (gen_sse_sfence ());
19206 case IX86_BUILTIN_MASKMOVQ
:
19207 case IX86_BUILTIN_MASKMOVDQU
:
19208 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
19209 ? CODE_FOR_mmx_maskmovq
19210 : CODE_FOR_sse2_maskmovdqu
);
19211 /* Note the arg order is different from the operand order. */
19212 arg1
= CALL_EXPR_ARG (exp
, 0);
19213 arg2
= CALL_EXPR_ARG (exp
, 1);
19214 arg0
= CALL_EXPR_ARG (exp
, 2);
19215 op0
= expand_normal (arg0
);
19216 op1
= expand_normal (arg1
);
19217 op2
= expand_normal (arg2
);
19218 mode0
= insn_data
[icode
].operand
[0].mode
;
19219 mode1
= insn_data
[icode
].operand
[1].mode
;
19220 mode2
= insn_data
[icode
].operand
[2].mode
;
19222 op0
= force_reg (Pmode
, op0
);
19223 op0
= gen_rtx_MEM (mode1
, op0
);
19225 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
19226 op0
= copy_to_mode_reg (mode0
, op0
);
19227 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
19228 op1
= copy_to_mode_reg (mode1
, op1
);
19229 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
19230 op2
= copy_to_mode_reg (mode2
, op2
);
19231 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
19237 case IX86_BUILTIN_RSQRTF
:
19238 return ix86_expand_unop1_builtin (CODE_FOR_rsqrtsf2
, exp
, target
);
19240 case IX86_BUILTIN_SQRTSS
:
19241 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2
, exp
, target
);
19242 case IX86_BUILTIN_RSQRTSS
:
19243 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2
, exp
, target
);
19244 case IX86_BUILTIN_RCPSS
:
19245 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2
, exp
, target
);
19247 case IX86_BUILTIN_LOADUPS
:
19248 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, exp
, target
, 1);
19250 case IX86_BUILTIN_STOREUPS
:
19251 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, exp
);
19253 case IX86_BUILTIN_LOADHPS
:
19254 case IX86_BUILTIN_LOADLPS
:
19255 case IX86_BUILTIN_LOADHPD
:
19256 case IX86_BUILTIN_LOADLPD
:
19257 icode
= (fcode
== IX86_BUILTIN_LOADHPS
? CODE_FOR_sse_loadhps
19258 : fcode
== IX86_BUILTIN_LOADLPS
? CODE_FOR_sse_loadlps
19259 : fcode
== IX86_BUILTIN_LOADHPD
? CODE_FOR_sse2_loadhpd
19260 : CODE_FOR_sse2_loadlpd
);
19261 arg0
= CALL_EXPR_ARG (exp
, 0);
19262 arg1
= CALL_EXPR_ARG (exp
, 1);
19263 op0
= expand_normal (arg0
);
19264 op1
= expand_normal (arg1
);
19265 tmode
= insn_data
[icode
].operand
[0].mode
;
19266 mode0
= insn_data
[icode
].operand
[1].mode
;
19267 mode1
= insn_data
[icode
].operand
[2].mode
;
19269 op0
= force_reg (mode0
, op0
);
19270 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
19271 if (optimize
|| target
== 0
19272 || GET_MODE (target
) != tmode
19273 || !register_operand (target
, tmode
))
19274 target
= gen_reg_rtx (tmode
);
19275 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
19281 case IX86_BUILTIN_STOREHPS
:
19282 case IX86_BUILTIN_STORELPS
:
19283 icode
= (fcode
== IX86_BUILTIN_STOREHPS
? CODE_FOR_sse_storehps
19284 : CODE_FOR_sse_storelps
);
19285 arg0
= CALL_EXPR_ARG (exp
, 0);
19286 arg1
= CALL_EXPR_ARG (exp
, 1);
19287 op0
= expand_normal (arg0
);
19288 op1
= expand_normal (arg1
);
19289 mode0
= insn_data
[icode
].operand
[0].mode
;
19290 mode1
= insn_data
[icode
].operand
[1].mode
;
19292 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
19293 op1
= force_reg (mode1
, op1
);
19295 pat
= GEN_FCN (icode
) (op0
, op1
);
19301 case IX86_BUILTIN_MOVNTPS
:
19302 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, exp
);
19303 case IX86_BUILTIN_MOVNTQ
:
19304 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, exp
);
19306 case IX86_BUILTIN_LDMXCSR
:
19307 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
19308 target
= assign_386_stack_local (SImode
, SLOT_VIRTUAL
);
19309 emit_move_insn (target
, op0
);
19310 emit_insn (gen_sse_ldmxcsr (target
));
19313 case IX86_BUILTIN_STMXCSR
:
19314 target
= assign_386_stack_local (SImode
, SLOT_VIRTUAL
);
19315 emit_insn (gen_sse_stmxcsr (target
));
19316 return copy_to_mode_reg (SImode
, target
);
19318 case IX86_BUILTIN_SHUFPS
:
19319 case IX86_BUILTIN_SHUFPD
:
19320 icode
= (fcode
== IX86_BUILTIN_SHUFPS
19321 ? CODE_FOR_sse_shufps
19322 : CODE_FOR_sse2_shufpd
);
19323 arg0
= CALL_EXPR_ARG (exp
, 0);
19324 arg1
= CALL_EXPR_ARG (exp
, 1);
19325 arg2
= CALL_EXPR_ARG (exp
, 2);
19326 op0
= expand_normal (arg0
);
19327 op1
= expand_normal (arg1
);
19328 op2
= expand_normal (arg2
);
19329 tmode
= insn_data
[icode
].operand
[0].mode
;
19330 mode0
= insn_data
[icode
].operand
[1].mode
;
19331 mode1
= insn_data
[icode
].operand
[2].mode
;
19332 mode2
= insn_data
[icode
].operand
[3].mode
;
19334 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
19335 op0
= copy_to_mode_reg (mode0
, op0
);
19336 if ((optimize
&& !register_operand (op1
, mode1
))
19337 || !(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
19338 op1
= copy_to_mode_reg (mode1
, op1
);
19339 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
19341 /* @@@ better error message */
19342 error ("mask must be an immediate");
19343 return gen_reg_rtx (tmode
);
19345 if (optimize
|| target
== 0
19346 || GET_MODE (target
) != tmode
19347 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19348 target
= gen_reg_rtx (tmode
);
19349 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
19355 case IX86_BUILTIN_PSHUFW
:
19356 case IX86_BUILTIN_PSHUFD
:
19357 case IX86_BUILTIN_PSHUFHW
:
19358 case IX86_BUILTIN_PSHUFLW
:
19359 icode
= ( fcode
== IX86_BUILTIN_PSHUFHW
? CODE_FOR_sse2_pshufhw
19360 : fcode
== IX86_BUILTIN_PSHUFLW
? CODE_FOR_sse2_pshuflw
19361 : fcode
== IX86_BUILTIN_PSHUFD
? CODE_FOR_sse2_pshufd
19362 : CODE_FOR_mmx_pshufw
);
19363 arg0
= CALL_EXPR_ARG (exp
, 0);
19364 arg1
= CALL_EXPR_ARG (exp
, 1);
19365 op0
= expand_normal (arg0
);
19366 op1
= expand_normal (arg1
);
19367 tmode
= insn_data
[icode
].operand
[0].mode
;
19368 mode1
= insn_data
[icode
].operand
[1].mode
;
19369 mode2
= insn_data
[icode
].operand
[2].mode
;
19371 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
19372 op0
= copy_to_mode_reg (mode1
, op0
);
19373 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
19375 /* @@@ better error message */
19376 error ("mask must be an immediate");
19380 || GET_MODE (target
) != tmode
19381 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19382 target
= gen_reg_rtx (tmode
);
19383 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
19389 case IX86_BUILTIN_PSLLWI128
:
19390 icode
= CODE_FOR_ashlv8hi3
;
19392 case IX86_BUILTIN_PSLLDI128
:
19393 icode
= CODE_FOR_ashlv4si3
;
19395 case IX86_BUILTIN_PSLLQI128
:
19396 icode
= CODE_FOR_ashlv2di3
;
19398 case IX86_BUILTIN_PSRAWI128
:
19399 icode
= CODE_FOR_ashrv8hi3
;
19401 case IX86_BUILTIN_PSRADI128
:
19402 icode
= CODE_FOR_ashrv4si3
;
19404 case IX86_BUILTIN_PSRLWI128
:
19405 icode
= CODE_FOR_lshrv8hi3
;
19407 case IX86_BUILTIN_PSRLDI128
:
19408 icode
= CODE_FOR_lshrv4si3
;
19410 case IX86_BUILTIN_PSRLQI128
:
19411 icode
= CODE_FOR_lshrv2di3
;
19414 arg0
= CALL_EXPR_ARG (exp
, 0);
19415 arg1
= CALL_EXPR_ARG (exp
, 1);
19416 op0
= expand_normal (arg0
);
19417 op1
= expand_normal (arg1
);
19419 if (!CONST_INT_P (op1
))
19421 error ("shift must be an immediate");
19424 if (INTVAL (op1
) < 0 || INTVAL (op1
) > 255)
19425 op1
= GEN_INT (255);
19427 tmode
= insn_data
[icode
].operand
[0].mode
;
19428 mode1
= insn_data
[icode
].operand
[1].mode
;
19429 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
19430 op0
= copy_to_reg (op0
);
19432 target
= gen_reg_rtx (tmode
);
19433 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
19439 case IX86_BUILTIN_PSLLW128
:
19440 icode
= CODE_FOR_ashlv8hi3
;
19442 case IX86_BUILTIN_PSLLD128
:
19443 icode
= CODE_FOR_ashlv4si3
;
19445 case IX86_BUILTIN_PSLLQ128
:
19446 icode
= CODE_FOR_ashlv2di3
;
19448 case IX86_BUILTIN_PSRAW128
:
19449 icode
= CODE_FOR_ashrv8hi3
;
19451 case IX86_BUILTIN_PSRAD128
:
19452 icode
= CODE_FOR_ashrv4si3
;
19454 case IX86_BUILTIN_PSRLW128
:
19455 icode
= CODE_FOR_lshrv8hi3
;
19457 case IX86_BUILTIN_PSRLD128
:
19458 icode
= CODE_FOR_lshrv4si3
;
19460 case IX86_BUILTIN_PSRLQ128
:
19461 icode
= CODE_FOR_lshrv2di3
;
19464 arg0
= CALL_EXPR_ARG (exp
, 0);
19465 arg1
= CALL_EXPR_ARG (exp
, 1);
19466 op0
= expand_normal (arg0
);
19467 op1
= expand_normal (arg1
);
19469 tmode
= insn_data
[icode
].operand
[0].mode
;
19470 mode1
= insn_data
[icode
].operand
[1].mode
;
19472 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
19473 op0
= copy_to_reg (op0
);
19475 op1
= simplify_gen_subreg (TImode
, op1
, GET_MODE (op1
), 0);
19476 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, TImode
))
19477 op1
= copy_to_reg (op1
);
19479 target
= gen_reg_rtx (tmode
);
19480 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
19486 case IX86_BUILTIN_PSLLDQI128
:
19487 case IX86_BUILTIN_PSRLDQI128
:
19488 icode
= (fcode
== IX86_BUILTIN_PSLLDQI128
? CODE_FOR_sse2_ashlti3
19489 : CODE_FOR_sse2_lshrti3
);
19490 arg0
= CALL_EXPR_ARG (exp
, 0);
19491 arg1
= CALL_EXPR_ARG (exp
, 1);
19492 op0
= expand_normal (arg0
);
19493 op1
= expand_normal (arg1
);
19494 tmode
= insn_data
[icode
].operand
[0].mode
;
19495 mode1
= insn_data
[icode
].operand
[1].mode
;
19496 mode2
= insn_data
[icode
].operand
[2].mode
;
19498 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
19500 op0
= copy_to_reg (op0
);
19501 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
19503 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
19505 error ("shift must be an immediate");
19508 target
= gen_reg_rtx (V2DImode
);
19509 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, V2DImode
, 0),
19516 case IX86_BUILTIN_FEMMS
:
19517 emit_insn (gen_mmx_femms ());
19520 case IX86_BUILTIN_PAVGUSB
:
19521 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3
, exp
, target
);
19523 case IX86_BUILTIN_PF2ID
:
19524 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id
, exp
, target
, 0);
19526 case IX86_BUILTIN_PFACC
:
19527 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3
, exp
, target
);
19529 case IX86_BUILTIN_PFADD
:
19530 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3
, exp
, target
);
19532 case IX86_BUILTIN_PFCMPEQ
:
19533 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3
, exp
, target
);
19535 case IX86_BUILTIN_PFCMPGE
:
19536 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3
, exp
, target
);
19538 case IX86_BUILTIN_PFCMPGT
:
19539 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3
, exp
, target
);
19541 case IX86_BUILTIN_PFMAX
:
19542 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3
, exp
, target
);
19544 case IX86_BUILTIN_PFMIN
:
19545 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3
, exp
, target
);
19547 case IX86_BUILTIN_PFMUL
:
19548 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3
, exp
, target
);
19550 case IX86_BUILTIN_PFRCP
:
19551 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2
, exp
, target
, 0);
19553 case IX86_BUILTIN_PFRCPIT1
:
19554 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3
, exp
, target
);
19556 case IX86_BUILTIN_PFRCPIT2
:
19557 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3
, exp
, target
);
19559 case IX86_BUILTIN_PFRSQIT1
:
19560 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3
, exp
, target
);
19562 case IX86_BUILTIN_PFRSQRT
:
19563 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2
, exp
, target
, 0);
19565 case IX86_BUILTIN_PFSUB
:
19566 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3
, exp
, target
);
19568 case IX86_BUILTIN_PFSUBR
:
19569 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3
, exp
, target
);
19571 case IX86_BUILTIN_PI2FD
:
19572 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2
, exp
, target
, 0);
19574 case IX86_BUILTIN_PMULHRW
:
19575 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3
, exp
, target
);
19577 case IX86_BUILTIN_PF2IW
:
19578 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw
, exp
, target
, 0);
19580 case IX86_BUILTIN_PFNACC
:
19581 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3
, exp
, target
);
19583 case IX86_BUILTIN_PFPNACC
:
19584 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3
, exp
, target
);
19586 case IX86_BUILTIN_PI2FW
:
19587 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw
, exp
, target
, 0);
19589 case IX86_BUILTIN_PSWAPDSI
:
19590 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2
, exp
, target
, 0);
19592 case IX86_BUILTIN_PSWAPDSF
:
19593 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2
, exp
, target
, 0);
19595 case IX86_BUILTIN_SQRTSD
:
19596 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2
, exp
, target
);
19597 case IX86_BUILTIN_LOADUPD
:
19598 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd
, exp
, target
, 1);
19599 case IX86_BUILTIN_STOREUPD
:
19600 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd
, exp
);
19602 case IX86_BUILTIN_MFENCE
:
19603 emit_insn (gen_sse2_mfence ());
19605 case IX86_BUILTIN_LFENCE
:
19606 emit_insn (gen_sse2_lfence ());
19609 case IX86_BUILTIN_CLFLUSH
:
19610 arg0
= CALL_EXPR_ARG (exp
, 0);
19611 op0
= expand_normal (arg0
);
19612 icode
= CODE_FOR_sse2_clflush
;
19613 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, Pmode
))
19614 op0
= copy_to_mode_reg (Pmode
, op0
);
19616 emit_insn (gen_sse2_clflush (op0
));
19619 case IX86_BUILTIN_MOVNTPD
:
19620 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df
, exp
);
19621 case IX86_BUILTIN_MOVNTDQ
:
19622 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di
, exp
);
19623 case IX86_BUILTIN_MOVNTI
:
19624 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi
, exp
);
19626 case IX86_BUILTIN_LOADDQU
:
19627 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu
, exp
, target
, 1);
19628 case IX86_BUILTIN_STOREDQU
:
19629 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu
, exp
);
19631 case IX86_BUILTIN_MONITOR
:
19632 arg0
= CALL_EXPR_ARG (exp
, 0);
19633 arg1
= CALL_EXPR_ARG (exp
, 1);
19634 arg2
= CALL_EXPR_ARG (exp
, 2);
19635 op0
= expand_normal (arg0
);
19636 op1
= expand_normal (arg1
);
19637 op2
= expand_normal (arg2
);
19639 op0
= copy_to_mode_reg (Pmode
, op0
);
19641 op1
= copy_to_mode_reg (SImode
, op1
);
19643 op2
= copy_to_mode_reg (SImode
, op2
);
19645 emit_insn (gen_sse3_monitor (op0
, op1
, op2
));
19647 emit_insn (gen_sse3_monitor64 (op0
, op1
, op2
));
19650 case IX86_BUILTIN_MWAIT
:
19651 arg0
= CALL_EXPR_ARG (exp
, 0);
19652 arg1
= CALL_EXPR_ARG (exp
, 1);
19653 op0
= expand_normal (arg0
);
19654 op1
= expand_normal (arg1
);
19656 op0
= copy_to_mode_reg (SImode
, op0
);
19658 op1
= copy_to_mode_reg (SImode
, op1
);
19659 emit_insn (gen_sse3_mwait (op0
, op1
));
19662 case IX86_BUILTIN_LDDQU
:
19663 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu
, exp
,
19666 case IX86_BUILTIN_PALIGNR
:
19667 case IX86_BUILTIN_PALIGNR128
:
19668 if (fcode
== IX86_BUILTIN_PALIGNR
)
19670 icode
= CODE_FOR_ssse3_palignrdi
;
19675 icode
= CODE_FOR_ssse3_palignrti
;
19678 arg0
= CALL_EXPR_ARG (exp
, 0);
19679 arg1
= CALL_EXPR_ARG (exp
, 1);
19680 arg2
= CALL_EXPR_ARG (exp
, 2);
19681 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, EXPAND_NORMAL
);
19682 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, EXPAND_NORMAL
);
19683 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, EXPAND_NORMAL
);
19684 tmode
= insn_data
[icode
].operand
[0].mode
;
19685 mode1
= insn_data
[icode
].operand
[1].mode
;
19686 mode2
= insn_data
[icode
].operand
[2].mode
;
19687 mode3
= insn_data
[icode
].operand
[3].mode
;
19689 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
19691 op0
= copy_to_reg (op0
);
19692 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
19694 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
19696 op1
= copy_to_reg (op1
);
19697 op1
= simplify_gen_subreg (mode2
, op1
, GET_MODE (op1
), 0);
19699 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
19701 error ("shift must be an immediate");
19704 target
= gen_reg_rtx (mode
);
19705 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, mode
, 0),
19712 case IX86_BUILTIN_MOVNTDQA
:
19713 return ix86_expand_unop_builtin (CODE_FOR_sse4_1_movntdqa
, exp
,
19716 case IX86_BUILTIN_MOVNTSD
:
19717 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df
, exp
);
19719 case IX86_BUILTIN_MOVNTSS
:
19720 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf
, exp
);
19722 case IX86_BUILTIN_INSERTQ
:
19723 case IX86_BUILTIN_EXTRQ
:
19724 icode
= (fcode
== IX86_BUILTIN_EXTRQ
19725 ? CODE_FOR_sse4a_extrq
19726 : CODE_FOR_sse4a_insertq
);
19727 arg0
= CALL_EXPR_ARG (exp
, 0);
19728 arg1
= CALL_EXPR_ARG (exp
, 1);
19729 op0
= expand_normal (arg0
);
19730 op1
= expand_normal (arg1
);
19731 tmode
= insn_data
[icode
].operand
[0].mode
;
19732 mode1
= insn_data
[icode
].operand
[1].mode
;
19733 mode2
= insn_data
[icode
].operand
[2].mode
;
19734 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
19735 op0
= copy_to_mode_reg (mode1
, op0
);
19736 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
19737 op1
= copy_to_mode_reg (mode2
, op1
);
19738 if (optimize
|| target
== 0
19739 || GET_MODE (target
) != tmode
19740 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19741 target
= gen_reg_rtx (tmode
);
19742 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
19748 case IX86_BUILTIN_EXTRQI
:
19749 icode
= CODE_FOR_sse4a_extrqi
;
19750 arg0
= CALL_EXPR_ARG (exp
, 0);
19751 arg1
= CALL_EXPR_ARG (exp
, 1);
19752 arg2
= CALL_EXPR_ARG (exp
, 2);
19753 op0
= expand_normal (arg0
);
19754 op1
= expand_normal (arg1
);
19755 op2
= expand_normal (arg2
);
19756 tmode
= insn_data
[icode
].operand
[0].mode
;
19757 mode1
= insn_data
[icode
].operand
[1].mode
;
19758 mode2
= insn_data
[icode
].operand
[2].mode
;
19759 mode3
= insn_data
[icode
].operand
[3].mode
;
19760 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
19761 op0
= copy_to_mode_reg (mode1
, op0
);
19762 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
19764 error ("index mask must be an immediate");
19765 return gen_reg_rtx (tmode
);
19767 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
19769 error ("length mask must be an immediate");
19770 return gen_reg_rtx (tmode
);
19772 if (optimize
|| target
== 0
19773 || GET_MODE (target
) != tmode
19774 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19775 target
= gen_reg_rtx (tmode
);
19776 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
19782 case IX86_BUILTIN_INSERTQI
:
19783 icode
= CODE_FOR_sse4a_insertqi
;
19784 arg0
= CALL_EXPR_ARG (exp
, 0);
19785 arg1
= CALL_EXPR_ARG (exp
, 1);
19786 arg2
= CALL_EXPR_ARG (exp
, 2);
19787 arg3
= CALL_EXPR_ARG (exp
, 3);
19788 op0
= expand_normal (arg0
);
19789 op1
= expand_normal (arg1
);
19790 op2
= expand_normal (arg2
);
19791 op3
= expand_normal (arg3
);
19792 tmode
= insn_data
[icode
].operand
[0].mode
;
19793 mode1
= insn_data
[icode
].operand
[1].mode
;
19794 mode2
= insn_data
[icode
].operand
[2].mode
;
19795 mode3
= insn_data
[icode
].operand
[3].mode
;
19796 mode4
= insn_data
[icode
].operand
[4].mode
;
19798 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
19799 op0
= copy_to_mode_reg (mode1
, op0
);
19801 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
19802 op1
= copy_to_mode_reg (mode2
, op1
);
19804 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
19806 error ("index mask must be an immediate");
19807 return gen_reg_rtx (tmode
);
19809 if (! (*insn_data
[icode
].operand
[4].predicate
) (op3
, mode4
))
19811 error ("length mask must be an immediate");
19812 return gen_reg_rtx (tmode
);
19814 if (optimize
|| target
== 0
19815 || GET_MODE (target
) != tmode
19816 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19817 target
= gen_reg_rtx (tmode
);
19818 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
, op3
);
19824 case IX86_BUILTIN_VEC_INIT_V2SI
:
19825 case IX86_BUILTIN_VEC_INIT_V4HI
:
19826 case IX86_BUILTIN_VEC_INIT_V8QI
:
19827 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
19829 case IX86_BUILTIN_VEC_EXT_V2DF
:
19830 case IX86_BUILTIN_VEC_EXT_V2DI
:
19831 case IX86_BUILTIN_VEC_EXT_V4SF
:
19832 case IX86_BUILTIN_VEC_EXT_V4SI
:
19833 case IX86_BUILTIN_VEC_EXT_V8HI
:
19834 case IX86_BUILTIN_VEC_EXT_V2SI
:
19835 case IX86_BUILTIN_VEC_EXT_V4HI
:
19836 case IX86_BUILTIN_VEC_EXT_V16QI
:
19837 return ix86_expand_vec_ext_builtin (exp
, target
);
19839 case IX86_BUILTIN_VEC_SET_V2DI
:
19840 case IX86_BUILTIN_VEC_SET_V4SF
:
19841 case IX86_BUILTIN_VEC_SET_V4SI
:
19842 case IX86_BUILTIN_VEC_SET_V8HI
:
19843 case IX86_BUILTIN_VEC_SET_V4HI
:
19844 case IX86_BUILTIN_VEC_SET_V16QI
:
19845 return ix86_expand_vec_set_builtin (exp
);
19847 case IX86_BUILTIN_INFQ
:
19849 REAL_VALUE_TYPE inf
;
19853 tmp
= CONST_DOUBLE_FROM_REAL_VALUE (inf
, mode
);
19855 tmp
= validize_mem (force_const_mem (mode
, tmp
));
19858 target
= gen_reg_rtx (mode
);
19860 emit_move_insn (target
, tmp
);
19864 case IX86_BUILTIN_FABSQ
:
19865 return ix86_expand_unop_builtin (CODE_FOR_abstf2
, exp
, target
, 0);
19867 case IX86_BUILTIN_COPYSIGNQ
:
19868 return ix86_expand_binop_builtin (CODE_FOR_copysigntf3
, exp
, target
);
19874 for (i
= 0, d
= bdesc_sse_3arg
;
19875 i
< ARRAY_SIZE (bdesc_sse_3arg
);
19877 if (d
->code
== fcode
)
19878 return ix86_expand_sse_4_operands_builtin (d
->icode
, exp
,
19881 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
19882 if (d
->code
== fcode
)
19884 /* Compares are treated specially. */
19885 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
19886 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
19887 || d
->icode
== CODE_FOR_sse2_maskcmpv2df3
19888 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
19889 return ix86_expand_sse_compare (d
, exp
, target
);
19891 return ix86_expand_binop_builtin (d
->icode
, exp
, target
);
19894 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
19895 if (d
->code
== fcode
)
19896 return ix86_expand_unop_builtin (d
->icode
, exp
, target
, 0);
19898 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
19899 if (d
->code
== fcode
)
19900 return ix86_expand_sse_comi (d
, exp
, target
);
19902 for (i
= 0, d
= bdesc_ptest
; i
< ARRAY_SIZE (bdesc_ptest
); i
++, d
++)
19903 if (d
->code
== fcode
)
19904 return ix86_expand_sse_ptest (d
, exp
, target
);
19906 for (i
= 0, d
= bdesc_crc32
; i
< ARRAY_SIZE (bdesc_crc32
); i
++, d
++)
19907 if (d
->code
== fcode
)
19908 return ix86_expand_crc32 (d
->icode
, exp
, target
);
19910 for (i
= 0, d
= bdesc_pcmpestr
;
19911 i
< ARRAY_SIZE (bdesc_pcmpestr
);
19913 if (d
->code
== fcode
)
19914 return ix86_expand_sse_pcmpestr (d
, exp
, target
);
19916 for (i
= 0, d
= bdesc_pcmpistr
;
19917 i
< ARRAY_SIZE (bdesc_pcmpistr
);
19919 if (d
->code
== fcode
)
19920 return ix86_expand_sse_pcmpistr (d
, exp
, target
);
19922 gcc_unreachable ();
19925 /* Returns a function decl for a vectorized version of the builtin function
19926 with builtin function code FN and the result vector type TYPE, or NULL_TREE
19927 if it is not available. */
19930 ix86_builtin_vectorized_function (unsigned int fn
, tree type_out
,
19933 enum machine_mode in_mode
, out_mode
;
19936 if (TREE_CODE (type_out
) != VECTOR_TYPE
19937 || TREE_CODE (type_in
) != VECTOR_TYPE
)
19940 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
19941 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
19942 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
19943 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
19947 case BUILT_IN_SQRT
:
19948 if (out_mode
== DFmode
&& out_n
== 2
19949 && in_mode
== DFmode
&& in_n
== 2)
19950 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
19953 case BUILT_IN_SQRTF
:
19954 if (out_mode
== SFmode
&& out_n
== 4
19955 && in_mode
== SFmode
&& in_n
== 4)
19956 return ix86_builtins
[IX86_BUILTIN_SQRTPS
];
19959 case BUILT_IN_LRINT
:
19960 if (out_mode
== SImode
&& out_n
== 4
19961 && in_mode
== DFmode
&& in_n
== 2)
19962 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX
];
19965 case BUILT_IN_LRINTF
:
19966 if (out_mode
== SImode
&& out_n
== 4
19967 && in_mode
== SFmode
&& in_n
== 4)
19968 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
19975 /* Dispatch to a handler for a vectorization library. */
19976 if (ix86_veclib_handler
)
19977 return (*ix86_veclib_handler
)(fn
, type_out
, type_in
);
19982 /* Handler for an ACML-style interface to a library with vectorized
19986 ix86_veclibabi_acml (enum built_in_function fn
, tree type_out
, tree type_in
)
19988 char name
[20] = "__vr.._";
19989 tree fntype
, new_fndecl
, args
;
19992 enum machine_mode el_mode
, in_mode
;
19995 /* The ACML is 64bits only and suitable for unsafe math only as
19996 it does not correctly support parts of IEEE with the required
19997 precision such as denormals. */
19999 || !flag_unsafe_math_optimizations
)
20002 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
20003 n
= TYPE_VECTOR_SUBPARTS (type_out
);
20004 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
20005 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
20006 if (el_mode
!= in_mode
20016 case BUILT_IN_LOG2
:
20017 case BUILT_IN_LOG10
:
20020 if (el_mode
!= DFmode
20025 case BUILT_IN_SINF
:
20026 case BUILT_IN_COSF
:
20027 case BUILT_IN_EXPF
:
20028 case BUILT_IN_POWF
:
20029 case BUILT_IN_LOGF
:
20030 case BUILT_IN_LOG2F
:
20031 case BUILT_IN_LOG10F
:
20034 if (el_mode
!= SFmode
20043 bname
= IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls
[fn
]));
20044 sprintf (name
+ 7, "%s", bname
+10);
20047 for (args
= DECL_ARGUMENTS (implicit_built_in_decls
[fn
]); args
;
20048 args
= TREE_CHAIN (args
))
20052 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
20054 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
20056 /* Build a function declaration for the vectorized function. */
20057 new_fndecl
= build_decl (FUNCTION_DECL
, get_identifier (name
), fntype
);
20058 TREE_PUBLIC (new_fndecl
) = 1;
20059 DECL_EXTERNAL (new_fndecl
) = 1;
20060 DECL_IS_NOVOPS (new_fndecl
) = 1;
20061 TREE_READONLY (new_fndecl
) = 1;
20067 /* Returns a decl of a function that implements conversion of the
20068 input vector of type TYPE, or NULL_TREE if it is not available. */
20071 ix86_vectorize_builtin_conversion (unsigned int code
, tree type
)
20073 if (TREE_CODE (type
) != VECTOR_TYPE
)
20079 switch (TYPE_MODE (type
))
20082 return ix86_builtins
[IX86_BUILTIN_CVTDQ2PS
];
20087 case FIX_TRUNC_EXPR
:
20088 switch (TYPE_MODE (type
))
20091 return ix86_builtins
[IX86_BUILTIN_CVTTPS2DQ
];
20101 /* Returns a code for a target-specific builtin that implements
20102 reciprocal of the function, or NULL_TREE if not available. */
20105 ix86_builtin_reciprocal (unsigned int fn
, bool md_fn
,
20106 bool sqrt ATTRIBUTE_UNUSED
)
20108 if (! (TARGET_SSE_MATH
&& TARGET_RECIP
&& !optimize_size
20109 && flag_finite_math_only
&& !flag_trapping_math
20110 && flag_unsafe_math_optimizations
))
20114 /* Machine dependent builtins. */
20117 /* Vectorized version of sqrt to rsqrt conversion. */
20118 case IX86_BUILTIN_SQRTPS
:
20119 return ix86_builtins
[IX86_BUILTIN_RSQRTPS
];
20125 /* Normal builtins. */
20128 /* Sqrt to rsqrt conversion. */
20129 case BUILT_IN_SQRTF
:
20130 return ix86_builtins
[IX86_BUILTIN_RSQRTF
];
20137 /* Store OPERAND to the memory after reload is completed. This means
20138 that we can't easily use assign_stack_local. */
20140 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
20144 gcc_assert (reload_completed
);
20145 if (TARGET_RED_ZONE
)
20147 result
= gen_rtx_MEM (mode
,
20148 gen_rtx_PLUS (Pmode
,
20150 GEN_INT (-RED_ZONE_SIZE
)));
20151 emit_move_insn (result
, operand
);
20153 else if (!TARGET_RED_ZONE
&& TARGET_64BIT
)
20159 operand
= gen_lowpart (DImode
, operand
);
20163 gen_rtx_SET (VOIDmode
,
20164 gen_rtx_MEM (DImode
,
20165 gen_rtx_PRE_DEC (DImode
,
20166 stack_pointer_rtx
)),
20170 gcc_unreachable ();
20172 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
20181 split_di (&operand
, 1, operands
, operands
+ 1);
20183 gen_rtx_SET (VOIDmode
,
20184 gen_rtx_MEM (SImode
,
20185 gen_rtx_PRE_DEC (Pmode
,
20186 stack_pointer_rtx
)),
20189 gen_rtx_SET (VOIDmode
,
20190 gen_rtx_MEM (SImode
,
20191 gen_rtx_PRE_DEC (Pmode
,
20192 stack_pointer_rtx
)),
20197 /* Store HImodes as SImodes. */
20198 operand
= gen_lowpart (SImode
, operand
);
20202 gen_rtx_SET (VOIDmode
,
20203 gen_rtx_MEM (GET_MODE (operand
),
20204 gen_rtx_PRE_DEC (SImode
,
20205 stack_pointer_rtx
)),
20209 gcc_unreachable ();
20211 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
20216 /* Free operand from the memory. */
20218 ix86_free_from_memory (enum machine_mode mode
)
20220 if (!TARGET_RED_ZONE
)
20224 if (mode
== DImode
|| TARGET_64BIT
)
20228 /* Use LEA to deallocate stack space. In peephole2 it will be converted
20229 to pop or add instruction if registers are available. */
20230 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
20231 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
20236 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
20237 QImode must go into class Q_REGS.
20238 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
20239 movdf to do mem-to-mem moves through integer regs. */
20241 ix86_preferred_reload_class (rtx x
, enum reg_class regclass
)
20243 enum machine_mode mode
= GET_MODE (x
);
20245 /* We're only allowed to return a subclass of CLASS. Many of the
20246 following checks fail for NO_REGS, so eliminate that early. */
20247 if (regclass
== NO_REGS
)
20250 /* All classes can load zeros. */
20251 if (x
== CONST0_RTX (mode
))
20254 /* Force constants into memory if we are loading a (nonzero) constant into
20255 an MMX or SSE register. This is because there are no MMX/SSE instructions
20256 to load from a constant. */
20258 && (MAYBE_MMX_CLASS_P (regclass
) || MAYBE_SSE_CLASS_P (regclass
)))
20261 /* Prefer SSE regs only, if we can use them for math. */
20262 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
20263 return SSE_CLASS_P (regclass
) ? regclass
: NO_REGS
;
20265 /* Floating-point constants need more complex checks. */
20266 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
20268 /* General regs can load everything. */
20269 if (reg_class_subset_p (regclass
, GENERAL_REGS
))
20272 /* Floats can load 0 and 1 plus some others. Note that we eliminated
20273 zero above. We only want to wind up preferring 80387 registers if
20274 we plan on doing computation with them. */
20276 && standard_80387_constant_p (x
))
20278 /* Limit class to non-sse. */
20279 if (regclass
== FLOAT_SSE_REGS
)
20281 if (regclass
== FP_TOP_SSE_REGS
)
20283 if (regclass
== FP_SECOND_SSE_REGS
)
20284 return FP_SECOND_REG
;
20285 if (regclass
== FLOAT_INT_REGS
|| regclass
== FLOAT_REGS
)
20292 /* Generally when we see PLUS here, it's the function invariant
20293 (plus soft-fp const_int). Which can only be computed into general
20295 if (GET_CODE (x
) == PLUS
)
20296 return reg_class_subset_p (regclass
, GENERAL_REGS
) ? regclass
: NO_REGS
;
20298 /* QImode constants are easy to load, but non-constant QImode data
20299 must go into Q_REGS. */
20300 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
20302 if (reg_class_subset_p (regclass
, Q_REGS
))
20304 if (reg_class_subset_p (Q_REGS
, regclass
))
20312 /* Discourage putting floating-point values in SSE registers unless
20313 SSE math is being used, and likewise for the 387 registers. */
20315 ix86_preferred_output_reload_class (rtx x
, enum reg_class regclass
)
20317 enum machine_mode mode
= GET_MODE (x
);
20319 /* Restrict the output reload class to the register bank that we are doing
20320 math on. If we would like not to return a subset of CLASS, reject this
20321 alternative: if reload cannot do this, it will still use its choice. */
20322 mode
= GET_MODE (x
);
20323 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
20324 return MAYBE_SSE_CLASS_P (regclass
) ? SSE_REGS
: NO_REGS
;
20326 if (X87_FLOAT_MODE_P (mode
))
20328 if (regclass
== FP_TOP_SSE_REGS
)
20330 else if (regclass
== FP_SECOND_SSE_REGS
)
20331 return FP_SECOND_REG
;
20333 return FLOAT_CLASS_P (regclass
) ? regclass
: NO_REGS
;
20339 /* If we are copying between general and FP registers, we need a memory
20340 location. The same is true for SSE and MMX registers.
20342 To optimize register_move_cost performance, allow inline variant.
20344 The macro can't work reliably when one of the CLASSES is class containing
20345 registers from multiple units (SSE, MMX, integer). We avoid this by never
20346 combining those units in single alternative in the machine description.
20347 Ensure that this constraint holds to avoid unexpected surprises.
20349 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
20350 enforce these sanity checks. */
20353 inline_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
20354 enum machine_mode mode
, int strict
)
20356 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
20357 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
20358 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
20359 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
20360 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
20361 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
20363 gcc_assert (!strict
);
20367 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
20370 /* ??? This is a lie. We do have moves between mmx/general, and for
20371 mmx/sse2. But by saying we need secondary memory we discourage the
20372 register allocator from using the mmx registers unless needed. */
20373 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
20376 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
20378 /* SSE1 doesn't have any direct moves from other classes. */
20382 /* If the target says that inter-unit moves are more expensive
20383 than moving through memory, then don't generate them. */
20384 if (!TARGET_INTER_UNIT_MOVES
)
20387 /* Between SSE and general, we have moves no larger than word size. */
20388 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
20396 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
20397 enum machine_mode mode
, int strict
)
20399 return inline_secondary_memory_needed (class1
, class2
, mode
, strict
);
20402 /* Return true if the registers in CLASS cannot represent the change from
20403 modes FROM to TO. */
20406 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
20407 enum reg_class regclass
)
20412 /* x87 registers can't do subreg at all, as all values are reformatted
20413 to extended precision. */
20414 if (MAYBE_FLOAT_CLASS_P (regclass
))
20417 if (MAYBE_SSE_CLASS_P (regclass
) || MAYBE_MMX_CLASS_P (regclass
))
20419 /* Vector registers do not support QI or HImode loads. If we don't
20420 disallow a change to these modes, reload will assume it's ok to
20421 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
20422 the vec_dupv4hi pattern. */
20423 if (GET_MODE_SIZE (from
) < 4)
20426 /* Vector registers do not support subreg with nonzero offsets, which
20427 are otherwise valid for integer registers. Since we can't see
20428 whether we have a nonzero offset from here, prohibit all
20429 nonparadoxical subregs changing size. */
20430 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
20437 /* Return the cost of moving data of mode M between a
20438 register and memory. A value of 2 is the default; this cost is
20439 relative to those in `REGISTER_MOVE_COST'.
20441 This function is used extensively by register_move_cost that is used to
20442 build tables at startup. Make it inline in this case.
20443 When IN is 2, return maximum of in and out move cost.
20445 If moving between registers and memory is more expensive than
20446 between two registers, you should define this macro to express the
20449 Model also increased moving costs of QImode registers in non
20453 inline_memory_move_cost (enum machine_mode mode
, enum reg_class regclass
,
20457 if (FLOAT_CLASS_P (regclass
))
20475 return MAX (ix86_cost
->fp_load
[index
], ix86_cost
->fp_store
[index
]);
20476 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
20478 if (SSE_CLASS_P (regclass
))
20481 switch (GET_MODE_SIZE (mode
))
20496 return MAX (ix86_cost
->sse_load
[index
], ix86_cost
->sse_store
[index
]);
20497 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
20499 if (MMX_CLASS_P (regclass
))
20502 switch (GET_MODE_SIZE (mode
))
20514 return MAX (ix86_cost
->mmx_load
[index
], ix86_cost
->mmx_store
[index
]);
20515 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
20517 switch (GET_MODE_SIZE (mode
))
20520 if (Q_CLASS_P (regclass
) || TARGET_64BIT
)
20523 return ix86_cost
->int_store
[0];
20524 if (TARGET_PARTIAL_REG_DEPENDENCY
&& !optimize_size
)
20525 cost
= ix86_cost
->movzbl_load
;
20527 cost
= ix86_cost
->int_load
[0];
20529 return MAX (cost
, ix86_cost
->int_store
[0]);
20535 return MAX (ix86_cost
->movzbl_load
, ix86_cost
->int_store
[0] + 4);
20537 return ix86_cost
->movzbl_load
;
20539 return ix86_cost
->int_store
[0] + 4;
20544 return MAX (ix86_cost
->int_load
[1], ix86_cost
->int_store
[1]);
20545 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
20547 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
20548 if (mode
== TFmode
)
20551 cost
= MAX (ix86_cost
->int_load
[2] , ix86_cost
->int_store
[2]);
20553 cost
= ix86_cost
->int_load
[2];
20555 cost
= ix86_cost
->int_store
[2];
20556 return (cost
* (((int) GET_MODE_SIZE (mode
)
20557 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
20562 ix86_memory_move_cost (enum machine_mode mode
, enum reg_class regclass
, int in
)
20564 return inline_memory_move_cost (mode
, regclass
, in
);
20568 /* Return the cost of moving data from a register in class CLASS1 to
20569 one in class CLASS2.
20571 It is not required that the cost always equal 2 when FROM is the same as TO;
20572 on some machines it is expensive to move between registers if they are not
20573 general registers. */
20576 ix86_register_move_cost (enum machine_mode mode
, enum reg_class class1
,
20577 enum reg_class class2
)
20579 /* In case we require secondary memory, compute cost of the store followed
20580 by load. In order to avoid bad register allocation choices, we need
20581 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
20583 if (inline_secondary_memory_needed (class1
, class2
, mode
, 0))
20587 cost
+= inline_memory_move_cost (mode
, class1
, 2);
20588 cost
+= inline_memory_move_cost (mode
, class2
, 2);
20590 /* In case of copying from general_purpose_register we may emit multiple
20591 stores followed by single load causing memory size mismatch stall.
20592 Count this as arbitrarily high cost of 20. */
20593 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
20596 /* In the case of FP/MMX moves, the registers actually overlap, and we
20597 have to switch modes in order to treat them differently. */
20598 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
20599 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
20605 /* Moves between SSE/MMX and integer unit are expensive. */
20606 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
20607 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
20609 /* ??? By keeping returned value relatively high, we limit the number
20610 of moves between integer and MMX/SSE registers for all targets.
20611 Additionally, high value prevents problem with x86_modes_tieable_p(),
20612 where integer modes in MMX/SSE registers are not tieable
20613 because of missing QImode and HImode moves to, from or between
20614 MMX/SSE registers. */
20615 return MAX (ix86_cost
->mmxsse_to_integer
, 8);
20617 if (MAYBE_FLOAT_CLASS_P (class1
))
20618 return ix86_cost
->fp_move
;
20619 if (MAYBE_SSE_CLASS_P (class1
))
20620 return ix86_cost
->sse_move
;
20621 if (MAYBE_MMX_CLASS_P (class1
))
20622 return ix86_cost
->mmx_move
;
20626 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
20629 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
20631 /* Flags and only flags can only hold CCmode values. */
20632 if (CC_REGNO_P (regno
))
20633 return GET_MODE_CLASS (mode
) == MODE_CC
;
20634 if (GET_MODE_CLASS (mode
) == MODE_CC
20635 || GET_MODE_CLASS (mode
) == MODE_RANDOM
20636 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
20638 if (FP_REGNO_P (regno
))
20639 return VALID_FP_MODE_P (mode
);
20640 if (SSE_REGNO_P (regno
))
20642 /* We implement the move patterns for all vector modes into and
20643 out of SSE registers, even when no operation instructions
20645 return (VALID_SSE_REG_MODE (mode
)
20646 || VALID_SSE2_REG_MODE (mode
)
20647 || VALID_MMX_REG_MODE (mode
)
20648 || VALID_MMX_REG_MODE_3DNOW (mode
));
20650 if (MMX_REGNO_P (regno
))
20652 /* We implement the move patterns for 3DNOW modes even in MMX mode,
20653 so if the register is available at all, then we can move data of
20654 the given mode into or out of it. */
20655 return (VALID_MMX_REG_MODE (mode
)
20656 || VALID_MMX_REG_MODE_3DNOW (mode
));
20659 if (mode
== QImode
)
20661 /* Take care for QImode values - they can be in non-QI regs,
20662 but then they do cause partial register stalls. */
20663 if (regno
< 4 || TARGET_64BIT
)
20665 if (!TARGET_PARTIAL_REG_STALL
)
20667 return reload_in_progress
|| reload_completed
;
20669 /* We handle both integer and floats in the general purpose registers. */
20670 else if (VALID_INT_MODE_P (mode
))
20672 else if (VALID_FP_MODE_P (mode
))
20674 else if (VALID_DFP_MODE_P (mode
))
20676 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
20677 on to use that value in smaller contexts, this can easily force a
20678 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
20679 supporting DImode, allow it. */
20680 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
20686 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
20687 tieable integer mode. */
20690 ix86_tieable_integer_mode_p (enum machine_mode mode
)
20699 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
20702 return TARGET_64BIT
;
20709 /* Return true if MODE1 is accessible in a register that can hold MODE2
20710 without copying. That is, all register classes that can hold MODE2
20711 can also hold MODE1. */
20714 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
20716 if (mode1
== mode2
)
20719 if (ix86_tieable_integer_mode_p (mode1
)
20720 && ix86_tieable_integer_mode_p (mode2
))
20723 /* MODE2 being XFmode implies fp stack or general regs, which means we
20724 can tie any smaller floating point modes to it. Note that we do not
20725 tie this with TFmode. */
20726 if (mode2
== XFmode
)
20727 return mode1
== SFmode
|| mode1
== DFmode
;
20729 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
20730 that we can tie it with SFmode. */
20731 if (mode2
== DFmode
)
20732 return mode1
== SFmode
;
20734 /* If MODE2 is only appropriate for an SSE register, then tie with
20735 any other mode acceptable to SSE registers. */
20736 if (GET_MODE_SIZE (mode2
) == 16
20737 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
20738 return (GET_MODE_SIZE (mode1
) == 16
20739 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
20741 /* If MODE2 is appropriate for an MMX register, then tie
20742 with any other mode acceptable to MMX registers. */
20743 if (GET_MODE_SIZE (mode2
) == 8
20744 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
20745 return (GET_MODE_SIZE (mode1
) == 8
20746 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
20751 /* Compute a (partial) cost for rtx X. Return true if the complete
20752 cost has been computed, and false if subexpressions should be
20753 scanned. In either case, *TOTAL contains the cost result. */
20756 ix86_rtx_costs (rtx x
, int code
, int outer_code_i
, int *total
)
20758 enum rtx_code outer_code
= (enum rtx_code
) outer_code_i
;
20759 enum machine_mode mode
= GET_MODE (x
);
20767 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
20769 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
20771 else if (flag_pic
&& SYMBOLIC_CONST (x
)
20773 || (!GET_CODE (x
) != LABEL_REF
20774 && (GET_CODE (x
) != SYMBOL_REF
20775 || !SYMBOL_REF_LOCAL_P (x
)))))
20782 if (mode
== VOIDmode
)
20785 switch (standard_80387_constant_p (x
))
20790 default: /* Other constants */
20795 /* Start with (MEM (SYMBOL_REF)), since that's where
20796 it'll probably end up. Add a penalty for size. */
20797 *total
= (COSTS_N_INSNS (1)
20798 + (flag_pic
!= 0 && !TARGET_64BIT
)
20799 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
20805 /* The zero extensions is often completely free on x86_64, so make
20806 it as cheap as possible. */
20807 if (TARGET_64BIT
&& mode
== DImode
20808 && GET_MODE (XEXP (x
, 0)) == SImode
)
20810 else if (TARGET_ZERO_EXTEND_WITH_AND
)
20811 *total
= ix86_cost
->add
;
20813 *total
= ix86_cost
->movzx
;
20817 *total
= ix86_cost
->movsx
;
20821 if (CONST_INT_P (XEXP (x
, 1))
20822 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
20824 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
20827 *total
= ix86_cost
->add
;
20830 if ((value
== 2 || value
== 3)
20831 && ix86_cost
->lea
<= ix86_cost
->shift_const
)
20833 *total
= ix86_cost
->lea
;
20843 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
20845 if (CONST_INT_P (XEXP (x
, 1)))
20847 if (INTVAL (XEXP (x
, 1)) > 32)
20848 *total
= ix86_cost
->shift_const
+ COSTS_N_INSNS (2);
20850 *total
= ix86_cost
->shift_const
* 2;
20854 if (GET_CODE (XEXP (x
, 1)) == AND
)
20855 *total
= ix86_cost
->shift_var
* 2;
20857 *total
= ix86_cost
->shift_var
* 6 + COSTS_N_INSNS (2);
20862 if (CONST_INT_P (XEXP (x
, 1)))
20863 *total
= ix86_cost
->shift_const
;
20865 *total
= ix86_cost
->shift_var
;
20870 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
20872 /* ??? SSE scalar cost should be used here. */
20873 *total
= ix86_cost
->fmul
;
20876 else if (X87_FLOAT_MODE_P (mode
))
20878 *total
= ix86_cost
->fmul
;
20881 else if (FLOAT_MODE_P (mode
))
20883 /* ??? SSE vector cost should be used here. */
20884 *total
= ix86_cost
->fmul
;
20889 rtx op0
= XEXP (x
, 0);
20890 rtx op1
= XEXP (x
, 1);
20892 if (CONST_INT_P (XEXP (x
, 1)))
20894 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
20895 for (nbits
= 0; value
!= 0; value
&= value
- 1)
20899 /* This is arbitrary. */
20902 /* Compute costs correctly for widening multiplication. */
20903 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op1
) == ZERO_EXTEND
)
20904 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
20905 == GET_MODE_SIZE (mode
))
20907 int is_mulwiden
= 0;
20908 enum machine_mode inner_mode
= GET_MODE (op0
);
20910 if (GET_CODE (op0
) == GET_CODE (op1
))
20911 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
20912 else if (CONST_INT_P (op1
))
20914 if (GET_CODE (op0
) == SIGN_EXTEND
)
20915 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
20918 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
20922 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
20925 *total
= (ix86_cost
->mult_init
[MODE_INDEX (mode
)]
20926 + nbits
* ix86_cost
->mult_bit
20927 + rtx_cost (op0
, outer_code
) + rtx_cost (op1
, outer_code
));
20936 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
20937 /* ??? SSE cost should be used here. */
20938 *total
= ix86_cost
->fdiv
;
20939 else if (X87_FLOAT_MODE_P (mode
))
20940 *total
= ix86_cost
->fdiv
;
20941 else if (FLOAT_MODE_P (mode
))
20942 /* ??? SSE vector cost should be used here. */
20943 *total
= ix86_cost
->fdiv
;
20945 *total
= ix86_cost
->divide
[MODE_INDEX (mode
)];
20949 if (GET_MODE_CLASS (mode
) == MODE_INT
20950 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
20952 if (GET_CODE (XEXP (x
, 0)) == PLUS
20953 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
20954 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
20955 && CONSTANT_P (XEXP (x
, 1)))
20957 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
20958 if (val
== 2 || val
== 4 || val
== 8)
20960 *total
= ix86_cost
->lea
;
20961 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
20962 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
20964 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
20968 else if (GET_CODE (XEXP (x
, 0)) == MULT
20969 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
20971 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
20972 if (val
== 2 || val
== 4 || val
== 8)
20974 *total
= ix86_cost
->lea
;
20975 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
20976 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
20980 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
20982 *total
= ix86_cost
->lea
;
20983 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
20984 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
20985 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
20992 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
20994 /* ??? SSE cost should be used here. */
20995 *total
= ix86_cost
->fadd
;
20998 else if (X87_FLOAT_MODE_P (mode
))
21000 *total
= ix86_cost
->fadd
;
21003 else if (FLOAT_MODE_P (mode
))
21005 /* ??? SSE vector cost should be used here. */
21006 *total
= ix86_cost
->fadd
;
21014 if (!TARGET_64BIT
&& mode
== DImode
)
21016 *total
= (ix86_cost
->add
* 2
21017 + (rtx_cost (XEXP (x
, 0), outer_code
)
21018 << (GET_MODE (XEXP (x
, 0)) != DImode
))
21019 + (rtx_cost (XEXP (x
, 1), outer_code
)
21020 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
21026 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
21028 /* ??? SSE cost should be used here. */
21029 *total
= ix86_cost
->fchs
;
21032 else if (X87_FLOAT_MODE_P (mode
))
21034 *total
= ix86_cost
->fchs
;
21037 else if (FLOAT_MODE_P (mode
))
21039 /* ??? SSE vector cost should be used here. */
21040 *total
= ix86_cost
->fchs
;
21046 if (!TARGET_64BIT
&& mode
== DImode
)
21047 *total
= ix86_cost
->add
* 2;
21049 *total
= ix86_cost
->add
;
21053 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
21054 && XEXP (XEXP (x
, 0), 1) == const1_rtx
21055 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
21056 && XEXP (x
, 1) == const0_rtx
)
21058 /* This kind of construct is implemented using test[bwl].
21059 Treat it as if we had an AND. */
21060 *total
= (ix86_cost
->add
21061 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
)
21062 + rtx_cost (const1_rtx
, outer_code
));
21068 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
21073 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
21074 /* ??? SSE cost should be used here. */
21075 *total
= ix86_cost
->fabs
;
21076 else if (X87_FLOAT_MODE_P (mode
))
21077 *total
= ix86_cost
->fabs
;
21078 else if (FLOAT_MODE_P (mode
))
21079 /* ??? SSE vector cost should be used here. */
21080 *total
= ix86_cost
->fabs
;
21084 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
21085 /* ??? SSE cost should be used here. */
21086 *total
= ix86_cost
->fsqrt
;
21087 else if (X87_FLOAT_MODE_P (mode
))
21088 *total
= ix86_cost
->fsqrt
;
21089 else if (FLOAT_MODE_P (mode
))
21090 /* ??? SSE vector cost should be used here. */
21091 *total
= ix86_cost
->fsqrt
;
21095 if (XINT (x
, 1) == UNSPEC_TP
)
21106 static int current_machopic_label_num
;
21108 /* Given a symbol name and its associated stub, write out the
21109 definition of the stub. */
21112 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
21114 unsigned int length
;
21115 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
21116 int label
= ++current_machopic_label_num
;
21118 /* For 64-bit we shouldn't get here. */
21119 gcc_assert (!TARGET_64BIT
);
21121 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
21122 symb
= (*targetm
.strip_name_encoding
) (symb
);
21124 length
= strlen (stub
);
21125 binder_name
= alloca (length
+ 32);
21126 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
21128 length
= strlen (symb
);
21129 symbol_name
= alloca (length
+ 32);
21130 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
21132 sprintf (lazy_ptr_name
, "L%d$lz", label
);
21135 switch_to_section (darwin_sections
[machopic_picsymbol_stub_section
]);
21137 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
21139 fprintf (file
, "%s:\n", stub
);
21140 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
21144 fprintf (file
, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label
, label
);
21145 fprintf (file
, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name
, label
);
21146 fprintf (file
, "\tjmp\t*%%edx\n");
21149 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
21151 fprintf (file
, "%s:\n", binder_name
);
21155 fprintf (file
, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name
, label
);
21156 fprintf (file
, "\tpushl\t%%eax\n");
21159 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
21161 fprintf (file
, "\tjmp\tdyld_stub_binding_helper\n");
21163 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr_section
]);
21164 fprintf (file
, "%s:\n", lazy_ptr_name
);
21165 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
21166 fprintf (file
, "\t.long %s\n", binder_name
);
21170 darwin_x86_file_end (void)
21172 darwin_file_end ();
21175 #endif /* TARGET_MACHO */
21177 /* Order the registers for register allocator. */
21180 x86_order_regs_for_local_alloc (void)
21185 /* First allocate the local general purpose registers. */
21186 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
21187 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
21188 reg_alloc_order
[pos
++] = i
;
21190 /* Global general purpose registers. */
21191 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
21192 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
21193 reg_alloc_order
[pos
++] = i
;
21195 /* x87 registers come first in case we are doing FP math
21197 if (!TARGET_SSE_MATH
)
21198 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
21199 reg_alloc_order
[pos
++] = i
;
21201 /* SSE registers. */
21202 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
21203 reg_alloc_order
[pos
++] = i
;
21204 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
21205 reg_alloc_order
[pos
++] = i
;
21207 /* x87 registers. */
21208 if (TARGET_SSE_MATH
)
21209 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
21210 reg_alloc_order
[pos
++] = i
;
21212 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
21213 reg_alloc_order
[pos
++] = i
;
21215 /* Initialize the rest of array as we do not allocate some registers
21217 while (pos
< FIRST_PSEUDO_REGISTER
)
21218 reg_alloc_order
[pos
++] = 0;
21221 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
21222 struct attribute_spec.handler. */
21224 ix86_handle_struct_attribute (tree
*node
, tree name
,
21225 tree args ATTRIBUTE_UNUSED
,
21226 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
21229 if (DECL_P (*node
))
21231 if (TREE_CODE (*node
) == TYPE_DECL
)
21232 type
= &TREE_TYPE (*node
);
21237 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
21238 || TREE_CODE (*type
) == UNION_TYPE
)))
21240 warning (OPT_Wattributes
, "%qs attribute ignored",
21241 IDENTIFIER_POINTER (name
));
21242 *no_add_attrs
= true;
21245 else if ((is_attribute_p ("ms_struct", name
)
21246 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
21247 || ((is_attribute_p ("gcc_struct", name
)
21248 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
21250 warning (OPT_Wattributes
, "%qs incompatible attribute ignored",
21251 IDENTIFIER_POINTER (name
));
21252 *no_add_attrs
= true;
21259 ix86_ms_bitfield_layout_p (const_tree record_type
)
21261 return (TARGET_MS_BITFIELD_LAYOUT
&&
21262 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
21263 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
21266 /* Returns an expression indicating where the this parameter is
21267 located on entry to the FUNCTION. */
21270 x86_this_parameter (tree function
)
21272 tree type
= TREE_TYPE (function
);
21273 bool aggr
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
21277 const int *parm_regs
;
21279 if (TARGET_64BIT_MS_ABI
)
21280 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
21282 parm_regs
= x86_64_int_parameter_registers
;
21283 return gen_rtx_REG (DImode
, parm_regs
[aggr
]);
21286 if (ix86_function_regparm (type
, function
) > 0
21287 && !type_has_variadic_args_p (type
))
21290 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
21292 return gen_rtx_REG (SImode
, regno
);
21295 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, aggr
? 8 : 4));
21298 /* Determine whether x86_output_mi_thunk can succeed. */
21301 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED
,
21302 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
21303 HOST_WIDE_INT vcall_offset
, const_tree function
)
21305 /* 64-bit can handle anything. */
21309 /* For 32-bit, everything's fine if we have one free register. */
21310 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
21313 /* Need a free register for vcall_offset. */
21317 /* Need a free register for GOT references. */
21318 if (flag_pic
&& !(*targetm
.binds_local_p
) (function
))
21321 /* Otherwise ok. */
21325 /* Output the assembler code for a thunk function. THUNK_DECL is the
21326 declaration for the thunk function itself, FUNCTION is the decl for
21327 the target function. DELTA is an immediate constant offset to be
21328 added to THIS. If VCALL_OFFSET is nonzero, the word at
21329 *(*this + vcall_offset) should be added to THIS. */
21332 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED
,
21333 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
21334 HOST_WIDE_INT vcall_offset
, tree function
)
21337 rtx this_param
= x86_this_parameter (function
);
21340 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
21341 pull it in now and let DELTA benefit. */
21342 if (REG_P (this_param
))
21343 this_reg
= this_param
;
21344 else if (vcall_offset
)
21346 /* Put the this parameter into %eax. */
21347 xops
[0] = this_param
;
21348 xops
[1] = this_reg
= gen_rtx_REG (Pmode
, 0);
21349 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
21352 this_reg
= NULL_RTX
;
21354 /* Adjust the this parameter by a fixed constant. */
21357 xops
[0] = GEN_INT (delta
);
21358 xops
[1] = this_reg
? this_reg
: this_param
;
21361 if (!x86_64_general_operand (xops
[0], DImode
))
21363 tmp
= gen_rtx_REG (DImode
, R10_REG
);
21365 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops
);
21367 xops
[1] = this_param
;
21369 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
21372 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
21375 /* Adjust the this parameter by a value stored in the vtable. */
21379 tmp
= gen_rtx_REG (DImode
, R10_REG
);
21382 int tmp_regno
= 2 /* ECX */;
21383 if (lookup_attribute ("fastcall",
21384 TYPE_ATTRIBUTES (TREE_TYPE (function
))))
21385 tmp_regno
= 0 /* EAX */;
21386 tmp
= gen_rtx_REG (SImode
, tmp_regno
);
21389 xops
[0] = gen_rtx_MEM (Pmode
, this_reg
);
21392 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
21394 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
21396 /* Adjust the this parameter. */
21397 xops
[0] = gen_rtx_MEM (Pmode
, plus_constant (tmp
, vcall_offset
));
21398 if (TARGET_64BIT
&& !memory_operand (xops
[0], Pmode
))
21400 rtx tmp2
= gen_rtx_REG (DImode
, R11_REG
);
21401 xops
[0] = GEN_INT (vcall_offset
);
21403 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
21404 xops
[0] = gen_rtx_MEM (Pmode
, gen_rtx_PLUS (Pmode
, tmp
, tmp2
));
21406 xops
[1] = this_reg
;
21408 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
21410 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
21413 /* If necessary, drop THIS back to its stack slot. */
21414 if (this_reg
&& this_reg
!= this_param
)
21416 xops
[0] = this_reg
;
21417 xops
[1] = this_param
;
21418 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
21421 xops
[0] = XEXP (DECL_RTL (function
), 0);
21424 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
21425 output_asm_insn ("jmp\t%P0", xops
);
21426 /* All thunks should be in the same object as their target,
21427 and thus binds_local_p should be true. */
21428 else if (TARGET_64BIT_MS_ABI
)
21429 gcc_unreachable ();
21432 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, xops
[0]), UNSPEC_GOTPCREL
);
21433 tmp
= gen_rtx_CONST (Pmode
, tmp
);
21434 tmp
= gen_rtx_MEM (QImode
, tmp
);
21436 output_asm_insn ("jmp\t%A0", xops
);
21441 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
21442 output_asm_insn ("jmp\t%P0", xops
);
21447 rtx sym_ref
= XEXP (DECL_RTL (function
), 0);
21448 tmp
= (gen_rtx_SYMBOL_REF
21450 machopic_indirection_name (sym_ref
, /*stub_p=*/true)));
21451 tmp
= gen_rtx_MEM (QImode
, tmp
);
21453 output_asm_insn ("jmp\t%0", xops
);
21456 #endif /* TARGET_MACHO */
21458 tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
21459 output_set_got (tmp
, NULL_RTX
);
21462 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops
);
21463 output_asm_insn ("jmp\t{*}%1", xops
);
21469 x86_file_start (void)
21471 default_file_start ();
21473 darwin_file_start ();
21475 if (X86_FILE_START_VERSION_DIRECTIVE
)
21476 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
21477 if (X86_FILE_START_FLTUSED
)
21478 fputs ("\t.global\t__fltused\n", asm_out_file
);
21479 if (ix86_asm_dialect
== ASM_INTEL
)
21480 fputs ("\t.intel_syntax\n", asm_out_file
);
21484 x86_field_alignment (tree field
, int computed
)
21486 enum machine_mode mode
;
21487 tree type
= TREE_TYPE (field
);
21489 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
21491 mode
= TYPE_MODE (TREE_CODE (type
) == ARRAY_TYPE
21492 ? get_inner_array_type (type
) : type
);
21493 if (mode
== DFmode
|| mode
== DCmode
21494 || GET_MODE_CLASS (mode
) == MODE_INT
21495 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
21496 return MIN (32, computed
);
21500 /* Output assembler code to FILE to increment profiler label # LABELNO
21501 for profiling a function entry. */
21503 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
21507 #ifndef NO_PROFILE_COUNTERS
21508 fprintf (file
, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX
, labelno
);
21511 if (!TARGET_64BIT_MS_ABI
&& flag_pic
)
21512 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME
);
21514 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
21518 #ifndef NO_PROFILE_COUNTERS
21519 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
21520 LPREFIX
, labelno
, PROFILE_COUNT_REGISTER
);
21522 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME
);
21526 #ifndef NO_PROFILE_COUNTERS
21527 fprintf (file
, "\tmovl\t$%sP%d,%%%s\n", LPREFIX
, labelno
,
21528 PROFILE_COUNT_REGISTER
);
21530 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
21534 /* We don't have exact information about the insn sizes, but we may assume
21535 quite safely that we are informed about all 1 byte insns and memory
21536 address sizes. This is enough to eliminate unnecessary padding in
21540 min_insn_size (rtx insn
)
21544 if (!INSN_P (insn
) || !active_insn_p (insn
))
21547 /* Discard alignments we've emit and jump instructions. */
21548 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
21549 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
21552 && (GET_CODE (PATTERN (insn
)) == ADDR_VEC
21553 || GET_CODE (PATTERN (insn
)) == ADDR_DIFF_VEC
))
21556 /* Important case - calls are always 5 bytes.
21557 It is common to have many calls in the row. */
21559 && symbolic_reference_mentioned_p (PATTERN (insn
))
21560 && !SIBLING_CALL_P (insn
))
21562 if (get_attr_length (insn
) <= 1)
21565 /* For normal instructions we may rely on the sizes of addresses
21566 and the presence of symbol to require 4 bytes of encoding.
21567 This is not the case for jumps where references are PC relative. */
21568 if (!JUMP_P (insn
))
21570 l
= get_attr_length_address (insn
);
21571 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
21580 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
21584 ix86_avoid_jump_misspredicts (void)
21586 rtx insn
, start
= get_insns ();
21587 int nbytes
= 0, njumps
= 0;
21590 /* Look for all minimal intervals of instructions containing 4 jumps.
21591 The intervals are bounded by START and INSN. NBYTES is the total
21592 size of instructions in the interval including INSN and not including
21593 START. When the NBYTES is smaller than 16 bytes, it is possible
21594 that the end of START and INSN ends up in the same 16byte page.
21596 The smallest offset in the page INSN can start is the case where START
21597 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
21598 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
21600 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
21603 nbytes
+= min_insn_size (insn
);
21605 fprintf(dump_file
, "Insn %i estimated to %i bytes\n",
21606 INSN_UID (insn
), min_insn_size (insn
));
21608 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
21609 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
21617 start
= NEXT_INSN (start
);
21618 if ((JUMP_P (start
)
21619 && GET_CODE (PATTERN (start
)) != ADDR_VEC
21620 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
21622 njumps
--, isjump
= 1;
21625 nbytes
-= min_insn_size (start
);
21627 gcc_assert (njumps
>= 0);
21629 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
21630 INSN_UID (start
), INSN_UID (insn
), nbytes
);
21632 if (njumps
== 3 && isjump
&& nbytes
< 16)
21634 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
21637 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
21638 INSN_UID (insn
), padsize
);
21639 emit_insn_before (gen_align (GEN_INT (padsize
)), insn
);
21644 /* AMD Athlon works faster
21645 when RET is not destination of conditional jump or directly preceded
21646 by other jump instruction. We avoid the penalty by inserting NOP just
21647 before the RET instructions in such cases. */
21649 ix86_pad_returns (void)
21654 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
21656 basic_block bb
= e
->src
;
21657 rtx ret
= BB_END (bb
);
21659 bool replace
= false;
21661 if (!JUMP_P (ret
) || GET_CODE (PATTERN (ret
)) != RETURN
21662 || !maybe_hot_bb_p (bb
))
21664 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
21665 if (active_insn_p (prev
) || LABEL_P (prev
))
21667 if (prev
&& LABEL_P (prev
))
21672 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
21673 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
21674 && !(e
->flags
& EDGE_FALLTHRU
))
21679 prev
= prev_active_insn (ret
);
21681 && ((JUMP_P (prev
) && any_condjump_p (prev
))
21684 /* Empty functions get branch mispredict even when the jump destination
21685 is not visible to us. */
21686 if (!prev
&& cfun
->function_frequency
> FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
21691 emit_insn_before (gen_return_internal_long (), ret
);
21697 /* Implement machine specific optimizations. We implement padding of returns
21698 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
21702 if (TARGET_PAD_RETURNS
&& optimize
&& !optimize_size
)
21703 ix86_pad_returns ();
21704 if (TARGET_FOUR_JUMP_LIMIT
&& optimize
&& !optimize_size
)
21705 ix86_avoid_jump_misspredicts ();
21708 /* Return nonzero when QImode register that must be represented via REX prefix
21711 x86_extended_QIreg_mentioned_p (rtx insn
)
21714 extract_insn_cached (insn
);
21715 for (i
= 0; i
< recog_data
.n_operands
; i
++)
21716 if (REG_P (recog_data
.operand
[i
])
21717 && REGNO (recog_data
.operand
[i
]) >= 4)
21722 /* Return nonzero when P points to register encoded via REX prefix.
21723 Called via for_each_rtx. */
21725 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
21727 unsigned int regno
;
21730 regno
= REGNO (*p
);
21731 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
21734 /* Return true when INSN mentions register that must be encoded using REX
21737 x86_extended_reg_mentioned_p (rtx insn
)
21739 return for_each_rtx (&PATTERN (insn
), extended_reg_mentioned_1
, NULL
);
21742 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
21743 optabs would emit if we didn't have TFmode patterns. */
21746 x86_emit_floatuns (rtx operands
[2])
21748 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
21749 enum machine_mode mode
, inmode
;
21751 inmode
= GET_MODE (operands
[1]);
21752 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
21755 in
= force_reg (inmode
, operands
[1]);
21756 mode
= GET_MODE (out
);
21757 neglab
= gen_label_rtx ();
21758 donelab
= gen_label_rtx ();
21759 f0
= gen_reg_rtx (mode
);
21761 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
21763 expand_float (out
, in
, 0);
21765 emit_jump_insn (gen_jump (donelab
));
21768 emit_label (neglab
);
21770 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
21772 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
21774 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
21776 expand_float (f0
, i0
, 0);
21778 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
21780 emit_label (donelab
);
21783 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
21784 with all elements equal to VAR. Return true if successful. */
21787 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
21788 rtx target
, rtx val
)
21790 enum machine_mode smode
, wsmode
, wvmode
;
21805 val
= force_reg (GET_MODE_INNER (mode
), val
);
21806 x
= gen_rtx_VEC_DUPLICATE (mode
, val
);
21807 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
21813 if (TARGET_SSE
|| TARGET_3DNOW_A
)
21815 val
= gen_lowpart (SImode
, val
);
21816 x
= gen_rtx_TRUNCATE (HImode
, val
);
21817 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
21818 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
21840 /* Extend HImode to SImode using a paradoxical SUBREG. */
21841 tmp1
= gen_reg_rtx (SImode
);
21842 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
21843 /* Insert the SImode value as low element of V4SImode vector. */
21844 tmp2
= gen_reg_rtx (V4SImode
);
21845 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
21846 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
21847 CONST0_RTX (V4SImode
),
21849 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
21850 /* Cast the V4SImode vector back to a V8HImode vector. */
21851 tmp1
= gen_reg_rtx (V8HImode
);
21852 emit_move_insn (tmp1
, gen_lowpart (V8HImode
, tmp2
));
21853 /* Duplicate the low short through the whole low SImode word. */
21854 emit_insn (gen_sse2_punpcklwd (tmp1
, tmp1
, tmp1
));
21855 /* Cast the V8HImode vector back to a V4SImode vector. */
21856 tmp2
= gen_reg_rtx (V4SImode
);
21857 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
21858 /* Replicate the low element of the V4SImode vector. */
21859 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
21860 /* Cast the V2SImode back to V8HImode, and store in target. */
21861 emit_move_insn (target
, gen_lowpart (V8HImode
, tmp2
));
21872 /* Extend QImode to SImode using a paradoxical SUBREG. */
21873 tmp1
= gen_reg_rtx (SImode
);
21874 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
21875 /* Insert the SImode value as low element of V4SImode vector. */
21876 tmp2
= gen_reg_rtx (V4SImode
);
21877 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
21878 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
21879 CONST0_RTX (V4SImode
),
21881 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
21882 /* Cast the V4SImode vector back to a V16QImode vector. */
21883 tmp1
= gen_reg_rtx (V16QImode
);
21884 emit_move_insn (tmp1
, gen_lowpart (V16QImode
, tmp2
));
21885 /* Duplicate the low byte through the whole low SImode word. */
21886 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
21887 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
21888 /* Cast the V16QImode vector back to a V4SImode vector. */
21889 tmp2
= gen_reg_rtx (V4SImode
);
21890 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
21891 /* Replicate the low element of the V4SImode vector. */
21892 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
21893 /* Cast the V2SImode back to V16QImode, and store in target. */
21894 emit_move_insn (target
, gen_lowpart (V16QImode
, tmp2
));
21902 /* Replicate the value once into the next wider mode and recurse. */
21903 val
= convert_modes (wsmode
, smode
, val
, true);
21904 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
21905 GEN_INT (GET_MODE_BITSIZE (smode
)),
21906 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
21907 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
21909 x
= gen_reg_rtx (wvmode
);
21910 if (!ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
))
21911 gcc_unreachable ();
21912 emit_move_insn (target
, gen_lowpart (mode
, x
));
21920 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
21921 whose ONE_VAR element is VAR, and other elements are zero. Return true
21925 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
21926 rtx target
, rtx var
, int one_var
)
21928 enum machine_mode vsimode
;
21944 var
= force_reg (GET_MODE_INNER (mode
), var
);
21945 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
21946 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
21951 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
21952 new_target
= gen_reg_rtx (mode
);
21954 new_target
= target
;
21955 var
= force_reg (GET_MODE_INNER (mode
), var
);
21956 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
21957 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
21958 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
21961 /* We need to shuffle the value to the correct position, so
21962 create a new pseudo to store the intermediate result. */
21964 /* With SSE2, we can use the integer shuffle insns. */
21965 if (mode
!= V4SFmode
&& TARGET_SSE2
)
21967 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
21969 GEN_INT (one_var
== 1 ? 0 : 1),
21970 GEN_INT (one_var
== 2 ? 0 : 1),
21971 GEN_INT (one_var
== 3 ? 0 : 1)));
21972 if (target
!= new_target
)
21973 emit_move_insn (target
, new_target
);
21977 /* Otherwise convert the intermediate result to V4SFmode and
21978 use the SSE1 shuffle instructions. */
21979 if (mode
!= V4SFmode
)
21981 tmp
= gen_reg_rtx (V4SFmode
);
21982 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
21987 emit_insn (gen_sse_shufps_1 (tmp
, tmp
, tmp
,
21989 GEN_INT (one_var
== 1 ? 0 : 1),
21990 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
21991 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
21993 if (mode
!= V4SFmode
)
21994 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
21995 else if (tmp
!= target
)
21996 emit_move_insn (target
, tmp
);
21998 else if (target
!= new_target
)
21999 emit_move_insn (target
, new_target
);
22004 vsimode
= V4SImode
;
22010 vsimode
= V2SImode
;
22016 /* Zero extend the variable element to SImode and recurse. */
22017 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
22019 x
= gen_reg_rtx (vsimode
);
22020 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
22022 gcc_unreachable ();
22024 emit_move_insn (target
, gen_lowpart (mode
, x
));
22032 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
22033 consisting of the values in VALS. It is known that all elements
22034 except ONE_VAR are constants. Return true if successful. */
22037 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
22038 rtx target
, rtx vals
, int one_var
)
22040 rtx var
= XVECEXP (vals
, 0, one_var
);
22041 enum machine_mode wmode
;
22044 const_vec
= copy_rtx (vals
);
22045 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
22046 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
22054 /* For the two element vectors, it's just as easy to use
22055 the general case. */
22071 /* There's no way to set one QImode entry easily. Combine
22072 the variable value with its adjacent constant value, and
22073 promote to an HImode set. */
22074 x
= XVECEXP (vals
, 0, one_var
^ 1);
22077 var
= convert_modes (HImode
, QImode
, var
, true);
22078 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
22079 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
22080 x
= GEN_INT (INTVAL (x
) & 0xff);
22084 var
= convert_modes (HImode
, QImode
, var
, true);
22085 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
22087 if (x
!= const0_rtx
)
22088 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
22089 1, OPTAB_LIB_WIDEN
);
22091 x
= gen_reg_rtx (wmode
);
22092 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
22093 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
22095 emit_move_insn (target
, gen_lowpart (mode
, x
));
22102 emit_move_insn (target
, const_vec
);
22103 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
22107 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
22108 all values variable, and none identical. */
22111 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
22112 rtx target
, rtx vals
)
22114 enum machine_mode half_mode
= GET_MODE_INNER (mode
);
22115 rtx op0
= NULL
, op1
= NULL
;
22116 bool use_vec_concat
= false;
22122 if (!mmx_ok
&& !TARGET_SSE
)
22128 /* For the two element vectors, we always implement VEC_CONCAT. */
22129 op0
= XVECEXP (vals
, 0, 0);
22130 op1
= XVECEXP (vals
, 0, 1);
22131 use_vec_concat
= true;
22135 half_mode
= V2SFmode
;
22138 half_mode
= V2SImode
;
22144 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
22145 Recurse to load the two halves. */
22147 op0
= gen_reg_rtx (half_mode
);
22148 v
= gen_rtvec (2, XVECEXP (vals
, 0, 0), XVECEXP (vals
, 0, 1));
22149 ix86_expand_vector_init (false, op0
, gen_rtx_PARALLEL (half_mode
, v
));
22151 op1
= gen_reg_rtx (half_mode
);
22152 v
= gen_rtvec (2, XVECEXP (vals
, 0, 2), XVECEXP (vals
, 0, 3));
22153 ix86_expand_vector_init (false, op1
, gen_rtx_PARALLEL (half_mode
, v
));
22155 use_vec_concat
= true;
22166 gcc_unreachable ();
22169 if (use_vec_concat
)
22171 if (!register_operand (op0
, half_mode
))
22172 op0
= force_reg (half_mode
, op0
);
22173 if (!register_operand (op1
, half_mode
))
22174 op1
= force_reg (half_mode
, op1
);
22176 emit_insn (gen_rtx_SET (VOIDmode
, target
,
22177 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
22181 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
22182 enum machine_mode inner_mode
;
22183 rtx words
[4], shift
;
22185 inner_mode
= GET_MODE_INNER (mode
);
22186 n_elts
= GET_MODE_NUNITS (mode
);
22187 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
22188 n_elt_per_word
= n_elts
/ n_words
;
22189 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
22191 for (i
= 0; i
< n_words
; ++i
)
22193 rtx word
= NULL_RTX
;
22195 for (j
= 0; j
< n_elt_per_word
; ++j
)
22197 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
22198 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
22204 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
22205 word
, 1, OPTAB_LIB_WIDEN
);
22206 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
22207 word
, 1, OPTAB_LIB_WIDEN
);
22215 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
22216 else if (n_words
== 2)
22218 rtx tmp
= gen_reg_rtx (mode
);
22219 emit_insn (gen_rtx_CLOBBER (VOIDmode
, tmp
));
22220 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
22221 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
22222 emit_move_insn (target
, tmp
);
22224 else if (n_words
== 4)
22226 rtx tmp
= gen_reg_rtx (V4SImode
);
22227 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
22228 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
22229 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
22232 gcc_unreachable ();
22236 /* Initialize vector TARGET via VALS. Suppress the use of MMX
22237 instructions unless MMX_OK is true. */
22240 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
22242 enum machine_mode mode
= GET_MODE (target
);
22243 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
22244 int n_elts
= GET_MODE_NUNITS (mode
);
22245 int n_var
= 0, one_var
= -1;
22246 bool all_same
= true, all_const_zero
= true;
22250 for (i
= 0; i
< n_elts
; ++i
)
22252 x
= XVECEXP (vals
, 0, i
);
22253 if (!CONSTANT_P (x
))
22254 n_var
++, one_var
= i
;
22255 else if (x
!= CONST0_RTX (inner_mode
))
22256 all_const_zero
= false;
22257 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
22261 /* Constants are best loaded from the constant pool. */
22264 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
22268 /* If all values are identical, broadcast the value. */
22270 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
22271 XVECEXP (vals
, 0, 0)))
22274 /* Values where only one field is non-constant are best loaded from
22275 the pool and overwritten via move later. */
22279 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
22280 XVECEXP (vals
, 0, one_var
),
22284 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
22288 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
22292 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
22294 enum machine_mode mode
= GET_MODE (target
);
22295 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
22296 bool use_vec_merge
= false;
22305 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
22306 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
22308 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
22310 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
22311 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
22317 use_vec_merge
= TARGET_SSE4_1
;
22325 /* For the two element vectors, we implement a VEC_CONCAT with
22326 the extraction of the other element. */
22328 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
22329 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
22332 op0
= val
, op1
= tmp
;
22334 op0
= tmp
, op1
= val
;
22336 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
22337 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
22342 use_vec_merge
= TARGET_SSE4_1
;
22349 use_vec_merge
= true;
22353 /* tmp = target = A B C D */
22354 tmp
= copy_to_reg (target
);
22355 /* target = A A B B */
22356 emit_insn (gen_sse_unpcklps (target
, target
, target
));
22357 /* target = X A B B */
22358 ix86_expand_vector_set (false, target
, val
, 0);
22359 /* target = A X C D */
22360 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
22361 GEN_INT (1), GEN_INT (0),
22362 GEN_INT (2+4), GEN_INT (3+4)));
22366 /* tmp = target = A B C D */
22367 tmp
= copy_to_reg (target
);
22368 /* tmp = X B C D */
22369 ix86_expand_vector_set (false, tmp
, val
, 0);
22370 /* target = A B X D */
22371 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
22372 GEN_INT (0), GEN_INT (1),
22373 GEN_INT (0+4), GEN_INT (3+4)));
22377 /* tmp = target = A B C D */
22378 tmp
= copy_to_reg (target
);
22379 /* tmp = X B C D */
22380 ix86_expand_vector_set (false, tmp
, val
, 0);
22381 /* target = A B X D */
22382 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
22383 GEN_INT (0), GEN_INT (1),
22384 GEN_INT (2+4), GEN_INT (0+4)));
22388 gcc_unreachable ();
22393 use_vec_merge
= TARGET_SSE4_1
;
22397 /* Element 0 handled by vec_merge below. */
22400 use_vec_merge
= true;
22406 /* With SSE2, use integer shuffles to swap element 0 and ELT,
22407 store into element 0, then shuffle them back. */
22411 order
[0] = GEN_INT (elt
);
22412 order
[1] = const1_rtx
;
22413 order
[2] = const2_rtx
;
22414 order
[3] = GEN_INT (3);
22415 order
[elt
] = const0_rtx
;
22417 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
22418 order
[1], order
[2], order
[3]));
22420 ix86_expand_vector_set (false, target
, val
, 0);
22422 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
22423 order
[1], order
[2], order
[3]));
22427 /* For SSE1, we have to reuse the V4SF code. */
22428 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
22429 gen_lowpart (SFmode
, val
), elt
);
22434 use_vec_merge
= TARGET_SSE2
;
22437 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
22441 use_vec_merge
= TARGET_SSE4_1
;
22451 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
22452 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
22453 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
22457 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
22459 emit_move_insn (mem
, target
);
22461 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
22462 emit_move_insn (tmp
, val
);
22464 emit_move_insn (target
, mem
);
22469 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
22471 enum machine_mode mode
= GET_MODE (vec
);
22472 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
22473 bool use_vec_extr
= false;
22486 use_vec_extr
= true;
22490 use_vec_extr
= TARGET_SSE4_1
;
22502 tmp
= gen_reg_rtx (mode
);
22503 emit_insn (gen_sse_shufps_1 (tmp
, vec
, vec
,
22504 GEN_INT (elt
), GEN_INT (elt
),
22505 GEN_INT (elt
+4), GEN_INT (elt
+4)));
22509 tmp
= gen_reg_rtx (mode
);
22510 emit_insn (gen_sse_unpckhps (tmp
, vec
, vec
));
22514 gcc_unreachable ();
22517 use_vec_extr
= true;
22522 use_vec_extr
= TARGET_SSE4_1
;
22536 tmp
= gen_reg_rtx (mode
);
22537 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
22538 GEN_INT (elt
), GEN_INT (elt
),
22539 GEN_INT (elt
), GEN_INT (elt
)));
22543 tmp
= gen_reg_rtx (mode
);
22544 emit_insn (gen_sse2_punpckhdq (tmp
, vec
, vec
));
22548 gcc_unreachable ();
22551 use_vec_extr
= true;
22556 /* For SSE1, we have to reuse the V4SF code. */
22557 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
22558 gen_lowpart (V4SFmode
, vec
), elt
);
22564 use_vec_extr
= TARGET_SSE2
;
22567 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
22571 use_vec_extr
= TARGET_SSE4_1
;
22575 /* ??? Could extract the appropriate HImode element and shift. */
22582 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
22583 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
22585 /* Let the rtl optimizers know about the zero extension performed. */
22586 if (inner_mode
== QImode
|| inner_mode
== HImode
)
22588 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
22589 target
= gen_lowpart (SImode
, target
);
22592 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
22596 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
22598 emit_move_insn (mem
, vec
);
22600 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
22601 emit_move_insn (target
, tmp
);
22605 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
22606 pattern to reduce; DEST is the destination; IN is the input vector. */
22609 ix86_expand_reduc_v4sf (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
22611 rtx tmp1
, tmp2
, tmp3
;
22613 tmp1
= gen_reg_rtx (V4SFmode
);
22614 tmp2
= gen_reg_rtx (V4SFmode
);
22615 tmp3
= gen_reg_rtx (V4SFmode
);
22617 emit_insn (gen_sse_movhlps (tmp1
, in
, in
));
22618 emit_insn (fn (tmp2
, tmp1
, in
));
22620 emit_insn (gen_sse_shufps_1 (tmp3
, tmp2
, tmp2
,
22621 GEN_INT (1), GEN_INT (1),
22622 GEN_INT (1+4), GEN_INT (1+4)));
22623 emit_insn (fn (dest
, tmp2
, tmp3
));
22626 /* Target hook for scalar_mode_supported_p. */
22628 ix86_scalar_mode_supported_p (enum machine_mode mode
)
22630 if (DECIMAL_FLOAT_MODE_P (mode
))
22632 else if (mode
== TFmode
)
22633 return TARGET_64BIT
;
22635 return default_scalar_mode_supported_p (mode
);
22638 /* Implements target hook vector_mode_supported_p. */
22640 ix86_vector_mode_supported_p (enum machine_mode mode
)
22642 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
22644 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
22646 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
22648 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
22653 /* Target hook for c_mode_for_suffix. */
22654 static enum machine_mode
22655 ix86_c_mode_for_suffix (char suffix
)
22657 if (TARGET_64BIT
&& suffix
== 'q')
22659 if (TARGET_MMX
&& suffix
== 'w')
22665 /* Worker function for TARGET_MD_ASM_CLOBBERS.
22667 We do this in the new i386 backend to maintain source compatibility
22668 with the old cc0-based compiler. */
22671 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
22672 tree inputs ATTRIBUTE_UNUSED
,
22675 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
22677 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
22682 /* Implements target vector targetm.asm.encode_section_info. This
22683 is not used by netware. */
22685 static void ATTRIBUTE_UNUSED
22686 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
22688 default_encode_section_info (decl
, rtl
, first
);
22690 if (TREE_CODE (decl
) == VAR_DECL
22691 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
22692 && ix86_in_large_data_p (decl
))
22693 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
22696 /* Worker function for REVERSE_CONDITION. */
22699 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
22701 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
22702 ? reverse_condition (code
)
22703 : reverse_condition_maybe_unordered (code
));
22706 /* Output code to perform an x87 FP register move, from OPERANDS[1]
22710 output_387_reg_move (rtx insn
, rtx
*operands
)
22712 if (REG_P (operands
[0]))
22714 if (REG_P (operands
[1])
22715 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
22717 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
22718 return output_387_ffreep (operands
, 0);
22719 return "fstp\t%y0";
22721 if (STACK_TOP_P (operands
[0]))
22722 return "fld%z1\t%y1";
22725 else if (MEM_P (operands
[0]))
22727 gcc_assert (REG_P (operands
[1]));
22728 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
22729 return "fstp%z0\t%y0";
22732 /* There is no non-popping store to memory for XFmode.
22733 So if we need one, follow the store with a load. */
22734 if (GET_MODE (operands
[0]) == XFmode
)
22735 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
22737 return "fst%z0\t%y0";
22744 /* Output code to perform a conditional jump to LABEL, if C2 flag in
22745 FP status register is set. */
22748 ix86_emit_fp_unordered_jump (rtx label
)
22750 rtx reg
= gen_reg_rtx (HImode
);
22753 emit_insn (gen_x86_fnstsw_1 (reg
));
22755 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_size
))
22757 emit_insn (gen_x86_sahf_1 (reg
));
22759 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
22760 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
22764 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
22766 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
22767 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
22770 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
22771 gen_rtx_LABEL_REF (VOIDmode
, label
),
22773 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
22775 emit_jump_insn (temp
);
22776 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
22779 /* Output code to perform a log1p XFmode calculation. */
22781 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
22783 rtx label1
= gen_label_rtx ();
22784 rtx label2
= gen_label_rtx ();
22786 rtx tmp
= gen_reg_rtx (XFmode
);
22787 rtx tmp2
= gen_reg_rtx (XFmode
);
22789 emit_insn (gen_absxf2 (tmp
, op1
));
22790 emit_insn (gen_cmpxf (tmp
,
22791 CONST_DOUBLE_FROM_REAL_VALUE (
22792 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
22794 emit_jump_insn (gen_bge (label1
));
22796 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
22797 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
22798 emit_jump (label2
);
22800 emit_label (label1
);
22801 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
22802 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
22803 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
22804 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
22806 emit_label (label2
);
22809 /* Output code to perform a Newton-Rhapson approximation of a single precision
22810 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
22812 void ix86_emit_swdivsf (rtx res
, rtx a
, rtx b
, enum machine_mode mode
)
22814 rtx x0
, x1
, e0
, e1
, two
;
22816 x0
= gen_reg_rtx (mode
);
22817 e0
= gen_reg_rtx (mode
);
22818 e1
= gen_reg_rtx (mode
);
22819 x1
= gen_reg_rtx (mode
);
22821 two
= CONST_DOUBLE_FROM_REAL_VALUE (dconst2
, SFmode
);
22823 if (VECTOR_MODE_P (mode
))
22824 two
= ix86_build_const_vector (SFmode
, true, two
);
22826 two
= force_reg (mode
, two
);
22828 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
22830 /* x0 = 1./b estimate */
22831 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
22832 gen_rtx_UNSPEC (mode
, gen_rtvec (1, b
),
22835 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
22836 gen_rtx_MULT (mode
, x0
, b
)));
22838 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
22839 gen_rtx_MINUS (mode
, two
, e0
)));
22841 emit_insn (gen_rtx_SET (VOIDmode
, x1
,
22842 gen_rtx_MULT (mode
, x0
, e1
)));
22844 emit_insn (gen_rtx_SET (VOIDmode
, res
,
22845 gen_rtx_MULT (mode
, a
, x1
)));
22848 /* Output code to perform a Newton-Rhapson approximation of a
22849 single precision floating point [reciprocal] square root. */
22851 void ix86_emit_swsqrtsf (rtx res
, rtx a
, enum machine_mode mode
,
22854 rtx x0
, e0
, e1
, e2
, e3
, three
, half
, zero
, mask
;
22856 x0
= gen_reg_rtx (mode
);
22857 e0
= gen_reg_rtx (mode
);
22858 e1
= gen_reg_rtx (mode
);
22859 e2
= gen_reg_rtx (mode
);
22860 e3
= gen_reg_rtx (mode
);
22862 three
= CONST_DOUBLE_FROM_REAL_VALUE (dconst3
, SFmode
);
22863 half
= CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf
, SFmode
);
22865 mask
= gen_reg_rtx (mode
);
22867 if (VECTOR_MODE_P (mode
))
22869 three
= ix86_build_const_vector (SFmode
, true, three
);
22870 half
= ix86_build_const_vector (SFmode
, true, half
);
22873 three
= force_reg (mode
, three
);
22874 half
= force_reg (mode
, half
);
22876 zero
= force_reg (mode
, CONST0_RTX(mode
));
22878 /* sqrt(a) = 0.5 * a * rsqrtss(a) * (3.0 - a * rsqrtss(a) * rsqrtss(a))
22879 1.0 / sqrt(a) = 0.5 * rsqrtss(a) * (3.0 - a * rsqrtss(a) * rsqrtss(a)) */
22881 /* Compare a to zero. */
22882 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
22883 gen_rtx_NE (mode
, a
, zero
)));
22885 /* x0 = 1./sqrt(a) estimate */
22886 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
22887 gen_rtx_UNSPEC (mode
, gen_rtvec (1, a
),
22889 /* Filter out infinity. */
22890 if (VECTOR_MODE_P (mode
))
22891 emit_insn (gen_rtx_SET (VOIDmode
, gen_lowpart (V4SFmode
, x0
),
22893 gen_lowpart (V4SFmode
, x0
),
22894 gen_lowpart (V4SFmode
, mask
))));
22896 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
22897 gen_rtx_AND (mode
, x0
, mask
)));
22900 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
22901 gen_rtx_MULT (mode
, x0
, a
)));
22903 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
22904 gen_rtx_MULT (mode
, e0
, x0
)));
22906 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
22907 gen_rtx_MINUS (mode
, three
, e1
)));
22910 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
22911 gen_rtx_MULT (mode
, half
, x0
)));
22914 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
22915 gen_rtx_MULT (mode
, half
, e0
)));
22916 /* ret = e2 * e3 */
22917 emit_insn (gen_rtx_SET (VOIDmode
, res
,
22918 gen_rtx_MULT (mode
, e2
, e3
)));
22921 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
22923 static void ATTRIBUTE_UNUSED
22924 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
22927 /* With Binutils 2.15, the "@unwind" marker must be specified on
22928 every occurrence of the ".eh_frame" section, not just the first
22931 && strcmp (name
, ".eh_frame") == 0)
22933 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
22934 flags
& SECTION_WRITE
? "aw" : "a");
22937 default_elf_asm_named_section (name
, flags
, decl
);
22940 /* Return the mangling of TYPE if it is an extended fundamental type. */
22942 static const char *
22943 ix86_mangle_type (const_tree type
)
22945 type
= TYPE_MAIN_VARIANT (type
);
22947 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
22948 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
22951 switch (TYPE_MODE (type
))
22954 /* __float128 is "g". */
22957 /* "long double" or __float80 is "e". */
22964 /* For 32-bit code we can save PIC register setup by using
22965 __stack_chk_fail_local hidden function instead of calling
22966 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
22967 register, so it is better to call __stack_chk_fail directly. */
22970 ix86_stack_protect_fail (void)
22972 return TARGET_64BIT
22973 ? default_external_stack_protect_fail ()
22974 : default_hidden_stack_protect_fail ();
22977 /* Select a format to encode pointers in exception handling data. CODE
22978 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
22979 true if the symbol may be affected by dynamic relocations.
22981 ??? All x86 object file formats are capable of representing this.
22982 After all, the relocation needed is the same as for the call insn.
22983 Whether or not a particular assembler allows us to enter such, I
22984 guess we'll have to see. */
22986 asm_preferred_eh_data_format (int code
, int global
)
22990 int type
= DW_EH_PE_sdata8
;
22992 || ix86_cmodel
== CM_SMALL_PIC
22993 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
22994 type
= DW_EH_PE_sdata4
;
22995 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
22997 if (ix86_cmodel
== CM_SMALL
22998 || (ix86_cmodel
== CM_MEDIUM
&& code
))
22999 return DW_EH_PE_udata4
;
23000 return DW_EH_PE_absptr
;
23003 /* Expand copysign from SIGN to the positive value ABS_VALUE
23004 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
23007 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
23009 enum machine_mode mode
= GET_MODE (sign
);
23010 rtx sgn
= gen_reg_rtx (mode
);
23011 if (mask
== NULL_RTX
)
23013 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), false);
23014 if (!VECTOR_MODE_P (mode
))
23016 /* We need to generate a scalar mode mask in this case. */
23017 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
23018 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
23019 mask
= gen_reg_rtx (mode
);
23020 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
23024 mask
= gen_rtx_NOT (mode
, mask
);
23025 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
23026 gen_rtx_AND (mode
, mask
, sign
)));
23027 emit_insn (gen_rtx_SET (VOIDmode
, result
,
23028 gen_rtx_IOR (mode
, abs_value
, sgn
)));
23031 /* Expand fabs (OP0) and return a new rtx that holds the result. The
23032 mask for masking out the sign-bit is stored in *SMASK, if that is
23035 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
23037 enum machine_mode mode
= GET_MODE (op0
);
23040 xa
= gen_reg_rtx (mode
);
23041 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), true);
23042 if (!VECTOR_MODE_P (mode
))
23044 /* We need to generate a scalar mode mask in this case. */
23045 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
23046 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
23047 mask
= gen_reg_rtx (mode
);
23048 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
23050 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
23051 gen_rtx_AND (mode
, op0
, mask
)));
23059 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
23060 swapping the operands if SWAP_OPERANDS is true. The expanded
23061 code is a forward jump to a newly created label in case the
23062 comparison is true. The generated label rtx is returned. */
23064 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
23065 bool swap_operands
)
23076 label
= gen_label_rtx ();
23077 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
23078 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
23079 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
23080 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
23081 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
23082 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
23083 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
23084 JUMP_LABEL (tmp
) = label
;
23089 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
23090 using comparison code CODE. Operands are swapped for the comparison if
23091 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
23093 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
23094 bool swap_operands
)
23096 enum machine_mode mode
= GET_MODE (op0
);
23097 rtx mask
= gen_reg_rtx (mode
);
23106 if (mode
== DFmode
)
23107 emit_insn (gen_sse2_maskcmpdf3 (mask
, op0
, op1
,
23108 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
23110 emit_insn (gen_sse_maskcmpsf3 (mask
, op0
, op1
,
23111 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
23116 /* Generate and return a rtx of mode MODE for 2**n where n is the number
23117 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
23119 ix86_gen_TWO52 (enum machine_mode mode
)
23121 REAL_VALUE_TYPE TWO52r
;
23124 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
23125 TWO52
= const_double_from_real_value (TWO52r
, mode
);
23126 TWO52
= force_reg (mode
, TWO52
);
23131 /* Expand SSE sequence for computing lround from OP1 storing
23134 ix86_expand_lround (rtx op0
, rtx op1
)
23136 /* C code for the stuff we're doing below:
23137 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
23140 enum machine_mode mode
= GET_MODE (op1
);
23141 const struct real_format
*fmt
;
23142 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
23145 /* load nextafter (0.5, 0.0) */
23146 fmt
= REAL_MODE_FORMAT (mode
);
23147 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1);
23148 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
23150 /* adj = copysign (0.5, op1) */
23151 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
23152 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
23154 /* adj = op1 + adj */
23155 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
23157 /* op0 = (imode)adj */
23158 expand_fix (op0
, adj
, 0);
23161 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
23164 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
23166 /* C code for the stuff we're doing below (for do_floor):
23168 xi -= (double)xi > op1 ? 1 : 0;
23171 enum machine_mode fmode
= GET_MODE (op1
);
23172 enum machine_mode imode
= GET_MODE (op0
);
23173 rtx ireg
, freg
, label
, tmp
;
23175 /* reg = (long)op1 */
23176 ireg
= gen_reg_rtx (imode
);
23177 expand_fix (ireg
, op1
, 0);
23179 /* freg = (double)reg */
23180 freg
= gen_reg_rtx (fmode
);
23181 expand_float (freg
, ireg
, 0);
23183 /* ireg = (freg > op1) ? ireg - 1 : ireg */
23184 label
= ix86_expand_sse_compare_and_jump (UNLE
,
23185 freg
, op1
, !do_floor
);
23186 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
23187 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
23188 emit_move_insn (ireg
, tmp
);
23190 emit_label (label
);
23191 LABEL_NUSES (label
) = 1;
23193 emit_move_insn (op0
, ireg
);
23196 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
23197 result in OPERAND0. */
23199 ix86_expand_rint (rtx operand0
, rtx operand1
)
23201 /* C code for the stuff we're doing below:
23202 xa = fabs (operand1);
23203 if (!isless (xa, 2**52))
23205 xa = xa + 2**52 - 2**52;
23206 return copysign (xa, operand1);
23208 enum machine_mode mode
= GET_MODE (operand0
);
23209 rtx res
, xa
, label
, TWO52
, mask
;
23211 res
= gen_reg_rtx (mode
);
23212 emit_move_insn (res
, operand1
);
23214 /* xa = abs (operand1) */
23215 xa
= ix86_expand_sse_fabs (res
, &mask
);
23217 /* if (!isless (xa, TWO52)) goto label; */
23218 TWO52
= ix86_gen_TWO52 (mode
);
23219 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
23221 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
23222 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
23224 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
23226 emit_label (label
);
23227 LABEL_NUSES (label
) = 1;
23229 emit_move_insn (operand0
, res
);
23232 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
23235 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
23237 /* C code for the stuff we expand below.
23238 double xa = fabs (x), x2;
23239 if (!isless (xa, TWO52))
23241 xa = xa + TWO52 - TWO52;
23242 x2 = copysign (xa, x);
23251 enum machine_mode mode
= GET_MODE (operand0
);
23252 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
23254 TWO52
= ix86_gen_TWO52 (mode
);
23256 /* Temporary for holding the result, initialized to the input
23257 operand to ease control flow. */
23258 res
= gen_reg_rtx (mode
);
23259 emit_move_insn (res
, operand1
);
23261 /* xa = abs (operand1) */
23262 xa
= ix86_expand_sse_fabs (res
, &mask
);
23264 /* if (!isless (xa, TWO52)) goto label; */
23265 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
23267 /* xa = xa + TWO52 - TWO52; */
23268 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
23269 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
23271 /* xa = copysign (xa, operand1) */
23272 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
23274 /* generate 1.0 or -1.0 */
23275 one
= force_reg (mode
,
23276 const_double_from_real_value (do_floor
23277 ? dconst1
: dconstm1
, mode
));
23279 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
23280 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
23281 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
23282 gen_rtx_AND (mode
, one
, tmp
)));
23283 /* We always need to subtract here to preserve signed zero. */
23284 tmp
= expand_simple_binop (mode
, MINUS
,
23285 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
23286 emit_move_insn (res
, tmp
);
23288 emit_label (label
);
23289 LABEL_NUSES (label
) = 1;
23291 emit_move_insn (operand0
, res
);
23294 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
23297 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
23299 /* C code for the stuff we expand below.
23300 double xa = fabs (x), x2;
23301 if (!isless (xa, TWO52))
23303 x2 = (double)(long)x;
23310 if (HONOR_SIGNED_ZEROS (mode))
23311 return copysign (x2, x);
23314 enum machine_mode mode
= GET_MODE (operand0
);
23315 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
23317 TWO52
= ix86_gen_TWO52 (mode
);
23319 /* Temporary for holding the result, initialized to the input
23320 operand to ease control flow. */
23321 res
= gen_reg_rtx (mode
);
23322 emit_move_insn (res
, operand1
);
23324 /* xa = abs (operand1) */
23325 xa
= ix86_expand_sse_fabs (res
, &mask
);
23327 /* if (!isless (xa, TWO52)) goto label; */
23328 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
23330 /* xa = (double)(long)x */
23331 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
23332 expand_fix (xi
, res
, 0);
23333 expand_float (xa
, xi
, 0);
23336 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
23338 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
23339 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
23340 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
23341 gen_rtx_AND (mode
, one
, tmp
)));
23342 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
23343 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
23344 emit_move_insn (res
, tmp
);
23346 if (HONOR_SIGNED_ZEROS (mode
))
23347 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
23349 emit_label (label
);
23350 LABEL_NUSES (label
) = 1;
23352 emit_move_insn (operand0
, res
);
23355 /* Expand SSE sequence for computing round from OPERAND1 storing
23356 into OPERAND0. Sequence that works without relying on DImode truncation
23357 via cvttsd2siq that is only available on 64bit targets. */
23359 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
23361 /* C code for the stuff we expand below.
23362 double xa = fabs (x), xa2, x2;
23363 if (!isless (xa, TWO52))
23365 Using the absolute value and copying back sign makes
23366 -0.0 -> -0.0 correct.
23367 xa2 = xa + TWO52 - TWO52;
23372 else if (dxa > 0.5)
23374 x2 = copysign (xa2, x);
23377 enum machine_mode mode
= GET_MODE (operand0
);
23378 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
23380 TWO52
= ix86_gen_TWO52 (mode
);
23382 /* Temporary for holding the result, initialized to the input
23383 operand to ease control flow. */
23384 res
= gen_reg_rtx (mode
);
23385 emit_move_insn (res
, operand1
);
23387 /* xa = abs (operand1) */
23388 xa
= ix86_expand_sse_fabs (res
, &mask
);
23390 /* if (!isless (xa, TWO52)) goto label; */
23391 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
23393 /* xa2 = xa + TWO52 - TWO52; */
23394 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
23395 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
23397 /* dxa = xa2 - xa; */
23398 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
23400 /* generate 0.5, 1.0 and -0.5 */
23401 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
23402 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
23403 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
23407 tmp
= gen_reg_rtx (mode
);
23408 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
23409 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
23410 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
23411 gen_rtx_AND (mode
, one
, tmp
)));
23412 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
23413 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
23414 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
23415 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
23416 gen_rtx_AND (mode
, one
, tmp
)));
23417 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
23419 /* res = copysign (xa2, operand1) */
23420 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
23422 emit_label (label
);
23423 LABEL_NUSES (label
) = 1;
23425 emit_move_insn (operand0
, res
);
23428 /* Expand SSE sequence for computing trunc from OPERAND1 storing
23431 ix86_expand_trunc (rtx operand0
, rtx operand1
)
23433 /* C code for SSE variant we expand below.
23434 double xa = fabs (x), x2;
23435 if (!isless (xa, TWO52))
23437 x2 = (double)(long)x;
23438 if (HONOR_SIGNED_ZEROS (mode))
23439 return copysign (x2, x);
23442 enum machine_mode mode
= GET_MODE (operand0
);
23443 rtx xa
, xi
, TWO52
, label
, res
, mask
;
23445 TWO52
= ix86_gen_TWO52 (mode
);
23447 /* Temporary for holding the result, initialized to the input
23448 operand to ease control flow. */
23449 res
= gen_reg_rtx (mode
);
23450 emit_move_insn (res
, operand1
);
23452 /* xa = abs (operand1) */
23453 xa
= ix86_expand_sse_fabs (res
, &mask
);
23455 /* if (!isless (xa, TWO52)) goto label; */
23456 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
23458 /* x = (double)(long)x */
23459 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
23460 expand_fix (xi
, res
, 0);
23461 expand_float (res
, xi
, 0);
23463 if (HONOR_SIGNED_ZEROS (mode
))
23464 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
23466 emit_label (label
);
23467 LABEL_NUSES (label
) = 1;
23469 emit_move_insn (operand0
, res
);
23472 /* Expand SSE sequence for computing trunc from OPERAND1 storing
23475 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
23477 enum machine_mode mode
= GET_MODE (operand0
);
23478 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
23480 /* C code for SSE variant we expand below.
23481 double xa = fabs (x), x2;
23482 if (!isless (xa, TWO52))
23484 xa2 = xa + TWO52 - TWO52;
23488 x2 = copysign (xa2, x);
23492 TWO52
= ix86_gen_TWO52 (mode
);
23494 /* Temporary for holding the result, initialized to the input
23495 operand to ease control flow. */
23496 res
= gen_reg_rtx (mode
);
23497 emit_move_insn (res
, operand1
);
23499 /* xa = abs (operand1) */
23500 xa
= ix86_expand_sse_fabs (res
, &smask
);
23502 /* if (!isless (xa, TWO52)) goto label; */
23503 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
23505 /* res = xa + TWO52 - TWO52; */
23506 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
23507 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
23508 emit_move_insn (res
, tmp
);
23511 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
23513 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
23514 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
23515 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
23516 gen_rtx_AND (mode
, mask
, one
)));
23517 tmp
= expand_simple_binop (mode
, MINUS
,
23518 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
23519 emit_move_insn (res
, tmp
);
23521 /* res = copysign (res, operand1) */
23522 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
23524 emit_label (label
);
23525 LABEL_NUSES (label
) = 1;
23527 emit_move_insn (operand0
, res
);
23530 /* Expand SSE sequence for computing round from OPERAND1 storing
23533 ix86_expand_round (rtx operand0
, rtx operand1
)
23535 /* C code for the stuff we're doing below:
23536 double xa = fabs (x);
23537 if (!isless (xa, TWO52))
23539 xa = (double)(long)(xa + nextafter (0.5, 0.0));
23540 return copysign (xa, x);
23542 enum machine_mode mode
= GET_MODE (operand0
);
23543 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
23544 const struct real_format
*fmt
;
23545 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
23547 /* Temporary for holding the result, initialized to the input
23548 operand to ease control flow. */
23549 res
= gen_reg_rtx (mode
);
23550 emit_move_insn (res
, operand1
);
23552 TWO52
= ix86_gen_TWO52 (mode
);
23553 xa
= ix86_expand_sse_fabs (res
, &mask
);
23554 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
23556 /* load nextafter (0.5, 0.0) */
23557 fmt
= REAL_MODE_FORMAT (mode
);
23558 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1);
23559 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
23561 /* xa = xa + 0.5 */
23562 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
23563 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
23565 /* xa = (double)(int64_t)xa */
23566 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
23567 expand_fix (xi
, xa
, 0);
23568 expand_float (xa
, xi
, 0);
23570 /* res = copysign (xa, operand1) */
23571 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
23573 emit_label (label
);
23574 LABEL_NUSES (label
) = 1;
23576 emit_move_insn (operand0
, res
);
23580 /* Table of valid machine attributes. */
23581 static const struct attribute_spec ix86_attribute_table
[] =
23583 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
23584 /* Stdcall attribute says callee is responsible for popping arguments
23585 if they are not variable. */
23586 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
23587 /* Fastcall attribute says callee is responsible for popping arguments
23588 if they are not variable. */
23589 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
23590 /* Cdecl attribute says the callee is a normal C declaration */
23591 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
23592 /* Regparm attribute specifies how many integer arguments are to be
23593 passed in registers. */
23594 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
},
23595 /* Sseregparm attribute says we are using x86_64 calling conventions
23596 for FP arguments. */
23597 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
23598 /* force_align_arg_pointer says this function realigns the stack at entry. */
23599 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
23600 false, true, true, ix86_handle_cconv_attribute
},
23601 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23602 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
},
23603 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
},
23604 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
23606 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
23607 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
23608 #ifdef SUBTARGET_ATTRIBUTE_TABLE
23609 SUBTARGET_ATTRIBUTE_TABLE
,
23611 { NULL
, 0, 0, false, false, false, NULL
}
23614 /* Initialize the GCC target structure. */
23615 #undef TARGET_ATTRIBUTE_TABLE
23616 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
23617 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23618 # undef TARGET_MERGE_DECL_ATTRIBUTES
23619 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
23622 #undef TARGET_COMP_TYPE_ATTRIBUTES
23623 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
23625 #undef TARGET_INIT_BUILTINS
23626 #define TARGET_INIT_BUILTINS ix86_init_builtins
23627 #undef TARGET_EXPAND_BUILTIN
23628 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
23630 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
23631 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
23632 ix86_builtin_vectorized_function
23634 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
23635 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
23637 #undef TARGET_BUILTIN_RECIPROCAL
23638 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
23640 #undef TARGET_ASM_FUNCTION_EPILOGUE
23641 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
23643 #undef TARGET_ENCODE_SECTION_INFO
23644 #ifndef SUBTARGET_ENCODE_SECTION_INFO
23645 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
23647 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
23650 #undef TARGET_ASM_OPEN_PAREN
23651 #define TARGET_ASM_OPEN_PAREN ""
23652 #undef TARGET_ASM_CLOSE_PAREN
23653 #define TARGET_ASM_CLOSE_PAREN ""
23655 #undef TARGET_ASM_ALIGNED_HI_OP
23656 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
23657 #undef TARGET_ASM_ALIGNED_SI_OP
23658 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
23660 #undef TARGET_ASM_ALIGNED_DI_OP
23661 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
23664 #undef TARGET_ASM_UNALIGNED_HI_OP
23665 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
23666 #undef TARGET_ASM_UNALIGNED_SI_OP
23667 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
23668 #undef TARGET_ASM_UNALIGNED_DI_OP
23669 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
23671 #undef TARGET_SCHED_ADJUST_COST
23672 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
23673 #undef TARGET_SCHED_ISSUE_RATE
23674 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
23675 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
23676 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
23677 ia32_multipass_dfa_lookahead
23679 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
23680 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
23683 #undef TARGET_HAVE_TLS
23684 #define TARGET_HAVE_TLS true
23686 #undef TARGET_CANNOT_FORCE_CONST_MEM
23687 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
23688 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
23689 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
23691 #undef TARGET_DELEGITIMIZE_ADDRESS
23692 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
23694 #undef TARGET_MS_BITFIELD_LAYOUT_P
23695 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
23698 #undef TARGET_BINDS_LOCAL_P
23699 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
23701 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23702 #undef TARGET_BINDS_LOCAL_P
23703 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
23706 #undef TARGET_ASM_OUTPUT_MI_THUNK
23707 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
23708 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
23709 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
23711 #undef TARGET_ASM_FILE_START
23712 #define TARGET_ASM_FILE_START x86_file_start
23714 #undef TARGET_DEFAULT_TARGET_FLAGS
23715 #define TARGET_DEFAULT_TARGET_FLAGS \
23717 | TARGET_SUBTARGET_DEFAULT \
23718 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
23720 #undef TARGET_HANDLE_OPTION
23721 #define TARGET_HANDLE_OPTION ix86_handle_option
23723 #undef TARGET_RTX_COSTS
23724 #define TARGET_RTX_COSTS ix86_rtx_costs
23725 #undef TARGET_ADDRESS_COST
23726 #define TARGET_ADDRESS_COST ix86_address_cost
23728 #undef TARGET_FIXED_CONDITION_CODE_REGS
23729 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
23730 #undef TARGET_CC_MODES_COMPATIBLE
23731 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
23733 #undef TARGET_MACHINE_DEPENDENT_REORG
23734 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
23736 #undef TARGET_BUILD_BUILTIN_VA_LIST
23737 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
23739 #undef TARGET_MD_ASM_CLOBBERS
23740 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
23742 #undef TARGET_PROMOTE_PROTOTYPES
23743 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
23744 #undef TARGET_STRUCT_VALUE_RTX
23745 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
23746 #undef TARGET_SETUP_INCOMING_VARARGS
23747 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
23748 #undef TARGET_MUST_PASS_IN_STACK
23749 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
23750 #undef TARGET_PASS_BY_REFERENCE
23751 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
23752 #undef TARGET_INTERNAL_ARG_POINTER
23753 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
23754 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
23755 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
23756 #undef TARGET_STRICT_ARGUMENT_NAMING
23757 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
23759 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
23760 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
23762 #undef TARGET_SCALAR_MODE_SUPPORTED_P
23763 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
23765 #undef TARGET_VECTOR_MODE_SUPPORTED_P
23766 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
23768 #undef TARGET_C_MODE_FOR_SUFFIX
23769 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
23772 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
23773 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
23776 #ifdef SUBTARGET_INSERT_ATTRIBUTES
23777 #undef TARGET_INSERT_ATTRIBUTES
23778 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
23781 #undef TARGET_MANGLE_TYPE
23782 #define TARGET_MANGLE_TYPE ix86_mangle_type
23784 #undef TARGET_STACK_PROTECT_FAIL
23785 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
23787 #undef TARGET_FUNCTION_VALUE
23788 #define TARGET_FUNCTION_VALUE ix86_function_value
23790 struct gcc_target targetm
= TARGET_INITIALIZER
;
23792 #include "gt-i386.h"