1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2013 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
28 #include "hard-reg-set.h"
29 #include "insn-config.h"
30 #include "conditions.h"
32 #include "insn-codes.h"
33 #include "insn-attr.h"
40 #include "diagnostic-core.h"
42 #include "basic-block.h"
45 #include "target-def.h"
46 #include "common/common-target.h"
47 #include "langhooks.h"
53 #include "tm-constrs.h"
57 #include "sched-int.h"
61 #include "diagnostic.h"
63 #include "tree-pass.h"
64 #include "tree-flow.h"
66 static rtx
legitimize_dllimport_symbol (rtx
, bool);
68 #ifndef CHECK_STACK_LIMIT
69 #define CHECK_STACK_LIMIT (-1)
72 /* Return index of given mode in mult and division cost tables. */
73 #define MODE_INDEX(mode) \
74 ((mode) == QImode ? 0 \
75 : (mode) == HImode ? 1 \
76 : (mode) == SImode ? 2 \
77 : (mode) == DImode ? 3 \
80 /* Processor costs (relative to an add) */
81 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
82 #define COSTS_N_BYTES(N) ((N) * 2)
84 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
87 struct processor_costs ix86_size_cost
= {/* costs for tuning for size */
88 COSTS_N_BYTES (2), /* cost of an add instruction */
89 COSTS_N_BYTES (3), /* cost of a lea instruction */
90 COSTS_N_BYTES (2), /* variable shift costs */
91 COSTS_N_BYTES (3), /* constant shift costs */
92 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
93 COSTS_N_BYTES (3), /* HI */
94 COSTS_N_BYTES (3), /* SI */
95 COSTS_N_BYTES (3), /* DI */
96 COSTS_N_BYTES (5)}, /* other */
97 0, /* cost of multiply per each bit set */
98 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
99 COSTS_N_BYTES (3), /* HI */
100 COSTS_N_BYTES (3), /* SI */
101 COSTS_N_BYTES (3), /* DI */
102 COSTS_N_BYTES (5)}, /* other */
103 COSTS_N_BYTES (3), /* cost of movsx */
104 COSTS_N_BYTES (3), /* cost of movzx */
105 0, /* "large" insn */
107 2, /* cost for loading QImode using movzbl */
108 {2, 2, 2}, /* cost of loading integer registers
109 in QImode, HImode and SImode.
110 Relative to reg-reg move (2). */
111 {2, 2, 2}, /* cost of storing integer registers */
112 2, /* cost of reg,reg fld/fst */
113 {2, 2, 2}, /* cost of loading fp registers
114 in SFmode, DFmode and XFmode */
115 {2, 2, 2}, /* cost of storing fp registers
116 in SFmode, DFmode and XFmode */
117 3, /* cost of moving MMX register */
118 {3, 3}, /* cost of loading MMX registers
119 in SImode and DImode */
120 {3, 3}, /* cost of storing MMX registers
121 in SImode and DImode */
122 3, /* cost of moving SSE register */
123 {3, 3, 3}, /* cost of loading SSE registers
124 in SImode, DImode and TImode */
125 {3, 3, 3}, /* cost of storing SSE registers
126 in SImode, DImode and TImode */
127 3, /* MMX or SSE register to integer */
128 0, /* size of l1 cache */
129 0, /* size of l2 cache */
130 0, /* size of prefetch block */
131 0, /* number of parallel prefetches */
133 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
134 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
135 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
136 COSTS_N_BYTES (2), /* cost of FABS instruction. */
137 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
138 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
139 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
140 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}}},
141 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
142 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}}},
143 1, /* scalar_stmt_cost. */
144 1, /* scalar load_cost. */
145 1, /* scalar_store_cost. */
146 1, /* vec_stmt_cost. */
147 1, /* vec_to_scalar_cost. */
148 1, /* scalar_to_vec_cost. */
149 1, /* vec_align_load_cost. */
150 1, /* vec_unalign_load_cost. */
151 1, /* vec_store_cost. */
152 1, /* cond_taken_branch_cost. */
153 1, /* cond_not_taken_branch_cost. */
156 /* Processor costs (relative to an add) */
158 struct processor_costs i386_cost
= { /* 386 specific costs */
159 COSTS_N_INSNS (1), /* cost of an add instruction */
160 COSTS_N_INSNS (1), /* cost of a lea instruction */
161 COSTS_N_INSNS (3), /* variable shift costs */
162 COSTS_N_INSNS (2), /* constant shift costs */
163 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
164 COSTS_N_INSNS (6), /* HI */
165 COSTS_N_INSNS (6), /* SI */
166 COSTS_N_INSNS (6), /* DI */
167 COSTS_N_INSNS (6)}, /* other */
168 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
169 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
170 COSTS_N_INSNS (23), /* HI */
171 COSTS_N_INSNS (23), /* SI */
172 COSTS_N_INSNS (23), /* DI */
173 COSTS_N_INSNS (23)}, /* other */
174 COSTS_N_INSNS (3), /* cost of movsx */
175 COSTS_N_INSNS (2), /* cost of movzx */
176 15, /* "large" insn */
178 4, /* cost for loading QImode using movzbl */
179 {2, 4, 2}, /* cost of loading integer registers
180 in QImode, HImode and SImode.
181 Relative to reg-reg move (2). */
182 {2, 4, 2}, /* cost of storing integer registers */
183 2, /* cost of reg,reg fld/fst */
184 {8, 8, 8}, /* cost of loading fp registers
185 in SFmode, DFmode and XFmode */
186 {8, 8, 8}, /* cost of storing fp registers
187 in SFmode, DFmode and XFmode */
188 2, /* cost of moving MMX register */
189 {4, 8}, /* cost of loading MMX registers
190 in SImode and DImode */
191 {4, 8}, /* cost of storing MMX registers
192 in SImode and DImode */
193 2, /* cost of moving SSE register */
194 {4, 8, 16}, /* cost of loading SSE registers
195 in SImode, DImode and TImode */
196 {4, 8, 16}, /* cost of storing SSE registers
197 in SImode, DImode and TImode */
198 3, /* MMX or SSE register to integer */
199 0, /* size of l1 cache */
200 0, /* size of l2 cache */
201 0, /* size of prefetch block */
202 0, /* number of parallel prefetches */
204 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
205 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
206 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
207 COSTS_N_INSNS (22), /* cost of FABS instruction. */
208 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
209 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
210 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
211 DUMMY_STRINGOP_ALGS
},
212 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
213 DUMMY_STRINGOP_ALGS
},
214 1, /* scalar_stmt_cost. */
215 1, /* scalar load_cost. */
216 1, /* scalar_store_cost. */
217 1, /* vec_stmt_cost. */
218 1, /* vec_to_scalar_cost. */
219 1, /* scalar_to_vec_cost. */
220 1, /* vec_align_load_cost. */
221 2, /* vec_unalign_load_cost. */
222 1, /* vec_store_cost. */
223 3, /* cond_taken_branch_cost. */
224 1, /* cond_not_taken_branch_cost. */
228 struct processor_costs i486_cost
= { /* 486 specific costs */
229 COSTS_N_INSNS (1), /* cost of an add instruction */
230 COSTS_N_INSNS (1), /* cost of a lea instruction */
231 COSTS_N_INSNS (3), /* variable shift costs */
232 COSTS_N_INSNS (2), /* constant shift costs */
233 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
234 COSTS_N_INSNS (12), /* HI */
235 COSTS_N_INSNS (12), /* SI */
236 COSTS_N_INSNS (12), /* DI */
237 COSTS_N_INSNS (12)}, /* other */
238 1, /* cost of multiply per each bit set */
239 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
240 COSTS_N_INSNS (40), /* HI */
241 COSTS_N_INSNS (40), /* SI */
242 COSTS_N_INSNS (40), /* DI */
243 COSTS_N_INSNS (40)}, /* other */
244 COSTS_N_INSNS (3), /* cost of movsx */
245 COSTS_N_INSNS (2), /* cost of movzx */
246 15, /* "large" insn */
248 4, /* cost for loading QImode using movzbl */
249 {2, 4, 2}, /* cost of loading integer registers
250 in QImode, HImode and SImode.
251 Relative to reg-reg move (2). */
252 {2, 4, 2}, /* cost of storing integer registers */
253 2, /* cost of reg,reg fld/fst */
254 {8, 8, 8}, /* cost of loading fp registers
255 in SFmode, DFmode and XFmode */
256 {8, 8, 8}, /* cost of storing fp registers
257 in SFmode, DFmode and XFmode */
258 2, /* cost of moving MMX register */
259 {4, 8}, /* cost of loading MMX registers
260 in SImode and DImode */
261 {4, 8}, /* cost of storing MMX registers
262 in SImode and DImode */
263 2, /* cost of moving SSE register */
264 {4, 8, 16}, /* cost of loading SSE registers
265 in SImode, DImode and TImode */
266 {4, 8, 16}, /* cost of storing SSE registers
267 in SImode, DImode and TImode */
268 3, /* MMX or SSE register to integer */
269 4, /* size of l1 cache. 486 has 8kB cache
270 shared for code and data, so 4kB is
271 not really precise. */
272 4, /* size of l2 cache */
273 0, /* size of prefetch block */
274 0, /* number of parallel prefetches */
276 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
277 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
278 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
279 COSTS_N_INSNS (3), /* cost of FABS instruction. */
280 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
281 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
282 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
, false}}},
283 DUMMY_STRINGOP_ALGS
},
284 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
, false}}},
285 DUMMY_STRINGOP_ALGS
},
286 1, /* scalar_stmt_cost. */
287 1, /* scalar load_cost. */
288 1, /* scalar_store_cost. */
289 1, /* vec_stmt_cost. */
290 1, /* vec_to_scalar_cost. */
291 1, /* scalar_to_vec_cost. */
292 1, /* vec_align_load_cost. */
293 2, /* vec_unalign_load_cost. */
294 1, /* vec_store_cost. */
295 3, /* cond_taken_branch_cost. */
296 1, /* cond_not_taken_branch_cost. */
300 struct processor_costs pentium_cost
= {
301 COSTS_N_INSNS (1), /* cost of an add instruction */
302 COSTS_N_INSNS (1), /* cost of a lea instruction */
303 COSTS_N_INSNS (4), /* variable shift costs */
304 COSTS_N_INSNS (1), /* constant shift costs */
305 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
306 COSTS_N_INSNS (11), /* HI */
307 COSTS_N_INSNS (11), /* SI */
308 COSTS_N_INSNS (11), /* DI */
309 COSTS_N_INSNS (11)}, /* other */
310 0, /* cost of multiply per each bit set */
311 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
312 COSTS_N_INSNS (25), /* HI */
313 COSTS_N_INSNS (25), /* SI */
314 COSTS_N_INSNS (25), /* DI */
315 COSTS_N_INSNS (25)}, /* other */
316 COSTS_N_INSNS (3), /* cost of movsx */
317 COSTS_N_INSNS (2), /* cost of movzx */
318 8, /* "large" insn */
320 6, /* cost for loading QImode using movzbl */
321 {2, 4, 2}, /* cost of loading integer registers
322 in QImode, HImode and SImode.
323 Relative to reg-reg move (2). */
324 {2, 4, 2}, /* cost of storing integer registers */
325 2, /* cost of reg,reg fld/fst */
326 {2, 2, 6}, /* cost of loading fp registers
327 in SFmode, DFmode and XFmode */
328 {4, 4, 6}, /* cost of storing fp registers
329 in SFmode, DFmode and XFmode */
330 8, /* cost of moving MMX register */
331 {8, 8}, /* cost of loading MMX registers
332 in SImode and DImode */
333 {8, 8}, /* cost of storing MMX registers
334 in SImode and DImode */
335 2, /* cost of moving SSE register */
336 {4, 8, 16}, /* cost of loading SSE registers
337 in SImode, DImode and TImode */
338 {4, 8, 16}, /* cost of storing SSE registers
339 in SImode, DImode and TImode */
340 3, /* MMX or SSE register to integer */
341 8, /* size of l1 cache. */
342 8, /* size of l2 cache */
343 0, /* size of prefetch block */
344 0, /* number of parallel prefetches */
346 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
347 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
348 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
349 COSTS_N_INSNS (1), /* cost of FABS instruction. */
350 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
351 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
352 {{libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
353 DUMMY_STRINGOP_ALGS
},
354 {{libcall
, {{-1, rep_prefix_4_byte
, false}}},
355 DUMMY_STRINGOP_ALGS
},
356 1, /* scalar_stmt_cost. */
357 1, /* scalar load_cost. */
358 1, /* scalar_store_cost. */
359 1, /* vec_stmt_cost. */
360 1, /* vec_to_scalar_cost. */
361 1, /* scalar_to_vec_cost. */
362 1, /* vec_align_load_cost. */
363 2, /* vec_unalign_load_cost. */
364 1, /* vec_store_cost. */
365 3, /* cond_taken_branch_cost. */
366 1, /* cond_not_taken_branch_cost. */
370 struct processor_costs pentiumpro_cost
= {
371 COSTS_N_INSNS (1), /* cost of an add instruction */
372 COSTS_N_INSNS (1), /* cost of a lea instruction */
373 COSTS_N_INSNS (1), /* variable shift costs */
374 COSTS_N_INSNS (1), /* constant shift costs */
375 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
376 COSTS_N_INSNS (4), /* HI */
377 COSTS_N_INSNS (4), /* SI */
378 COSTS_N_INSNS (4), /* DI */
379 COSTS_N_INSNS (4)}, /* other */
380 0, /* cost of multiply per each bit set */
381 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
382 COSTS_N_INSNS (17), /* HI */
383 COSTS_N_INSNS (17), /* SI */
384 COSTS_N_INSNS (17), /* DI */
385 COSTS_N_INSNS (17)}, /* other */
386 COSTS_N_INSNS (1), /* cost of movsx */
387 COSTS_N_INSNS (1), /* cost of movzx */
388 8, /* "large" insn */
390 2, /* cost for loading QImode using movzbl */
391 {4, 4, 4}, /* cost of loading integer registers
392 in QImode, HImode and SImode.
393 Relative to reg-reg move (2). */
394 {2, 2, 2}, /* cost of storing integer registers */
395 2, /* cost of reg,reg fld/fst */
396 {2, 2, 6}, /* cost of loading fp registers
397 in SFmode, DFmode and XFmode */
398 {4, 4, 6}, /* cost of storing fp registers
399 in SFmode, DFmode and XFmode */
400 2, /* cost of moving MMX register */
401 {2, 2}, /* cost of loading MMX registers
402 in SImode and DImode */
403 {2, 2}, /* cost of storing MMX registers
404 in SImode and DImode */
405 2, /* cost of moving SSE register */
406 {2, 2, 8}, /* cost of loading SSE registers
407 in SImode, DImode and TImode */
408 {2, 2, 8}, /* cost of storing SSE registers
409 in SImode, DImode and TImode */
410 3, /* MMX or SSE register to integer */
411 8, /* size of l1 cache. */
412 256, /* size of l2 cache */
413 32, /* size of prefetch block */
414 6, /* number of parallel prefetches */
416 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
417 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
418 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
419 COSTS_N_INSNS (2), /* cost of FABS instruction. */
420 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
421 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
422 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
423 (we ensure the alignment). For small blocks inline loop is still a
424 noticeable win, for bigger blocks either rep movsl or rep movsb is
425 way to go. Rep movsb has apparently more expensive startup time in CPU,
426 but after 4K the difference is down in the noise. */
427 {{rep_prefix_4_byte
, {{128, loop
, false}, {1024, unrolled_loop
, false},
428 {8192, rep_prefix_4_byte
, false},
429 {-1, rep_prefix_1_byte
, false}}},
430 DUMMY_STRINGOP_ALGS
},
431 {{rep_prefix_4_byte
, {{1024, unrolled_loop
, false},
432 {8192, rep_prefix_4_byte
, false},
433 {-1, libcall
, false}}},
434 DUMMY_STRINGOP_ALGS
},
435 1, /* scalar_stmt_cost. */
436 1, /* scalar load_cost. */
437 1, /* scalar_store_cost. */
438 1, /* vec_stmt_cost. */
439 1, /* vec_to_scalar_cost. */
440 1, /* scalar_to_vec_cost. */
441 1, /* vec_align_load_cost. */
442 2, /* vec_unalign_load_cost. */
443 1, /* vec_store_cost. */
444 3, /* cond_taken_branch_cost. */
445 1, /* cond_not_taken_branch_cost. */
449 struct processor_costs geode_cost
= {
450 COSTS_N_INSNS (1), /* cost of an add instruction */
451 COSTS_N_INSNS (1), /* cost of a lea instruction */
452 COSTS_N_INSNS (2), /* variable shift costs */
453 COSTS_N_INSNS (1), /* constant shift costs */
454 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
455 COSTS_N_INSNS (4), /* HI */
456 COSTS_N_INSNS (7), /* SI */
457 COSTS_N_INSNS (7), /* DI */
458 COSTS_N_INSNS (7)}, /* other */
459 0, /* cost of multiply per each bit set */
460 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
461 COSTS_N_INSNS (23), /* HI */
462 COSTS_N_INSNS (39), /* SI */
463 COSTS_N_INSNS (39), /* DI */
464 COSTS_N_INSNS (39)}, /* other */
465 COSTS_N_INSNS (1), /* cost of movsx */
466 COSTS_N_INSNS (1), /* cost of movzx */
467 8, /* "large" insn */
469 1, /* cost for loading QImode using movzbl */
470 {1, 1, 1}, /* cost of loading integer registers
471 in QImode, HImode and SImode.
472 Relative to reg-reg move (2). */
473 {1, 1, 1}, /* cost of storing integer registers */
474 1, /* cost of reg,reg fld/fst */
475 {1, 1, 1}, /* cost of loading fp registers
476 in SFmode, DFmode and XFmode */
477 {4, 6, 6}, /* cost of storing fp registers
478 in SFmode, DFmode and XFmode */
480 1, /* cost of moving MMX register */
481 {1, 1}, /* cost of loading MMX registers
482 in SImode and DImode */
483 {1, 1}, /* cost of storing MMX registers
484 in SImode and DImode */
485 1, /* cost of moving SSE register */
486 {1, 1, 1}, /* cost of loading SSE registers
487 in SImode, DImode and TImode */
488 {1, 1, 1}, /* cost of storing SSE registers
489 in SImode, DImode and TImode */
490 1, /* MMX or SSE register to integer */
491 64, /* size of l1 cache. */
492 128, /* size of l2 cache. */
493 32, /* size of prefetch block */
494 1, /* number of parallel prefetches */
496 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
497 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
498 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
499 COSTS_N_INSNS (1), /* cost of FABS instruction. */
500 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
501 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
502 {{libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
503 DUMMY_STRINGOP_ALGS
},
504 {{libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
505 DUMMY_STRINGOP_ALGS
},
506 1, /* scalar_stmt_cost. */
507 1, /* scalar load_cost. */
508 1, /* scalar_store_cost. */
509 1, /* vec_stmt_cost. */
510 1, /* vec_to_scalar_cost. */
511 1, /* scalar_to_vec_cost. */
512 1, /* vec_align_load_cost. */
513 2, /* vec_unalign_load_cost. */
514 1, /* vec_store_cost. */
515 3, /* cond_taken_branch_cost. */
516 1, /* cond_not_taken_branch_cost. */
520 struct processor_costs k6_cost
= {
521 COSTS_N_INSNS (1), /* cost of an add instruction */
522 COSTS_N_INSNS (2), /* cost of a lea instruction */
523 COSTS_N_INSNS (1), /* variable shift costs */
524 COSTS_N_INSNS (1), /* constant shift costs */
525 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
526 COSTS_N_INSNS (3), /* HI */
527 COSTS_N_INSNS (3), /* SI */
528 COSTS_N_INSNS (3), /* DI */
529 COSTS_N_INSNS (3)}, /* other */
530 0, /* cost of multiply per each bit set */
531 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
532 COSTS_N_INSNS (18), /* HI */
533 COSTS_N_INSNS (18), /* SI */
534 COSTS_N_INSNS (18), /* DI */
535 COSTS_N_INSNS (18)}, /* other */
536 COSTS_N_INSNS (2), /* cost of movsx */
537 COSTS_N_INSNS (2), /* cost of movzx */
538 8, /* "large" insn */
540 3, /* cost for loading QImode using movzbl */
541 {4, 5, 4}, /* cost of loading integer registers
542 in QImode, HImode and SImode.
543 Relative to reg-reg move (2). */
544 {2, 3, 2}, /* cost of storing integer registers */
545 4, /* cost of reg,reg fld/fst */
546 {6, 6, 6}, /* cost of loading fp registers
547 in SFmode, DFmode and XFmode */
548 {4, 4, 4}, /* cost of storing fp registers
549 in SFmode, DFmode and XFmode */
550 2, /* cost of moving MMX register */
551 {2, 2}, /* cost of loading MMX registers
552 in SImode and DImode */
553 {2, 2}, /* cost of storing MMX registers
554 in SImode and DImode */
555 2, /* cost of moving SSE register */
556 {2, 2, 8}, /* cost of loading SSE registers
557 in SImode, DImode and TImode */
558 {2, 2, 8}, /* cost of storing SSE registers
559 in SImode, DImode and TImode */
560 6, /* MMX or SSE register to integer */
561 32, /* size of l1 cache. */
562 32, /* size of l2 cache. Some models
563 have integrated l2 cache, but
564 optimizing for k6 is not important
565 enough to worry about that. */
566 32, /* size of prefetch block */
567 1, /* number of parallel prefetches */
569 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
570 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
571 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
572 COSTS_N_INSNS (2), /* cost of FABS instruction. */
573 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
574 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
575 {{libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
576 DUMMY_STRINGOP_ALGS
},
577 {{libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
578 DUMMY_STRINGOP_ALGS
},
579 1, /* scalar_stmt_cost. */
580 1, /* scalar load_cost. */
581 1, /* scalar_store_cost. */
582 1, /* vec_stmt_cost. */
583 1, /* vec_to_scalar_cost. */
584 1, /* scalar_to_vec_cost. */
585 1, /* vec_align_load_cost. */
586 2, /* vec_unalign_load_cost. */
587 1, /* vec_store_cost. */
588 3, /* cond_taken_branch_cost. */
589 1, /* cond_not_taken_branch_cost. */
593 struct processor_costs athlon_cost
= {
594 COSTS_N_INSNS (1), /* cost of an add instruction */
595 COSTS_N_INSNS (2), /* cost of a lea instruction */
596 COSTS_N_INSNS (1), /* variable shift costs */
597 COSTS_N_INSNS (1), /* constant shift costs */
598 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
599 COSTS_N_INSNS (5), /* HI */
600 COSTS_N_INSNS (5), /* SI */
601 COSTS_N_INSNS (5), /* DI */
602 COSTS_N_INSNS (5)}, /* other */
603 0, /* cost of multiply per each bit set */
604 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
605 COSTS_N_INSNS (26), /* HI */
606 COSTS_N_INSNS (42), /* SI */
607 COSTS_N_INSNS (74), /* DI */
608 COSTS_N_INSNS (74)}, /* other */
609 COSTS_N_INSNS (1), /* cost of movsx */
610 COSTS_N_INSNS (1), /* cost of movzx */
611 8, /* "large" insn */
613 4, /* cost for loading QImode using movzbl */
614 {3, 4, 3}, /* cost of loading integer registers
615 in QImode, HImode and SImode.
616 Relative to reg-reg move (2). */
617 {3, 4, 3}, /* cost of storing integer registers */
618 4, /* cost of reg,reg fld/fst */
619 {4, 4, 12}, /* cost of loading fp registers
620 in SFmode, DFmode and XFmode */
621 {6, 6, 8}, /* cost of storing fp registers
622 in SFmode, DFmode and XFmode */
623 2, /* cost of moving MMX register */
624 {4, 4}, /* cost of loading MMX registers
625 in SImode and DImode */
626 {4, 4}, /* cost of storing MMX registers
627 in SImode and DImode */
628 2, /* cost of moving SSE register */
629 {4, 4, 6}, /* cost of loading SSE registers
630 in SImode, DImode and TImode */
631 {4, 4, 5}, /* cost of storing SSE registers
632 in SImode, DImode and TImode */
633 5, /* MMX or SSE register to integer */
634 64, /* size of l1 cache. */
635 256, /* size of l2 cache. */
636 64, /* size of prefetch block */
637 6, /* number of parallel prefetches */
639 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
640 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
641 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
642 COSTS_N_INSNS (2), /* cost of FABS instruction. */
643 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
644 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
645 /* For some reason, Athlon deals better with REP prefix (relative to loops)
646 compared to K8. Alignment becomes important after 8 bytes for memcpy and
647 128 bytes for memset. */
648 {{libcall
, {{2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
649 DUMMY_STRINGOP_ALGS
},
650 {{libcall
, {{2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
651 DUMMY_STRINGOP_ALGS
},
652 1, /* scalar_stmt_cost. */
653 1, /* scalar load_cost. */
654 1, /* scalar_store_cost. */
655 1, /* vec_stmt_cost. */
656 1, /* vec_to_scalar_cost. */
657 1, /* scalar_to_vec_cost. */
658 1, /* vec_align_load_cost. */
659 2, /* vec_unalign_load_cost. */
660 1, /* vec_store_cost. */
661 3, /* cond_taken_branch_cost. */
662 1, /* cond_not_taken_branch_cost. */
666 struct processor_costs k8_cost
= {
667 COSTS_N_INSNS (1), /* cost of an add instruction */
668 COSTS_N_INSNS (2), /* cost of a lea instruction */
669 COSTS_N_INSNS (1), /* variable shift costs */
670 COSTS_N_INSNS (1), /* constant shift costs */
671 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
672 COSTS_N_INSNS (4), /* HI */
673 COSTS_N_INSNS (3), /* SI */
674 COSTS_N_INSNS (4), /* DI */
675 COSTS_N_INSNS (5)}, /* other */
676 0, /* cost of multiply per each bit set */
677 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
678 COSTS_N_INSNS (26), /* HI */
679 COSTS_N_INSNS (42), /* SI */
680 COSTS_N_INSNS (74), /* DI */
681 COSTS_N_INSNS (74)}, /* other */
682 COSTS_N_INSNS (1), /* cost of movsx */
683 COSTS_N_INSNS (1), /* cost of movzx */
684 8, /* "large" insn */
686 4, /* cost for loading QImode using movzbl */
687 {3, 4, 3}, /* cost of loading integer registers
688 in QImode, HImode and SImode.
689 Relative to reg-reg move (2). */
690 {3, 4, 3}, /* cost of storing integer registers */
691 4, /* cost of reg,reg fld/fst */
692 {4, 4, 12}, /* cost of loading fp registers
693 in SFmode, DFmode and XFmode */
694 {6, 6, 8}, /* cost of storing fp registers
695 in SFmode, DFmode and XFmode */
696 2, /* cost of moving MMX register */
697 {3, 3}, /* cost of loading MMX registers
698 in SImode and DImode */
699 {4, 4}, /* cost of storing MMX registers
700 in SImode and DImode */
701 2, /* cost of moving SSE register */
702 {4, 3, 6}, /* cost of loading SSE registers
703 in SImode, DImode and TImode */
704 {4, 4, 5}, /* cost of storing SSE registers
705 in SImode, DImode and TImode */
706 5, /* MMX or SSE register to integer */
707 64, /* size of l1 cache. */
708 512, /* size of l2 cache. */
709 64, /* size of prefetch block */
710 /* New AMD processors never drop prefetches; if they cannot be performed
711 immediately, they are queued. We set number of simultaneous prefetches
712 to a large constant to reflect this (it probably is not a good idea not
713 to limit number of prefetches at all, as their execution also takes some
715 100, /* number of parallel prefetches */
717 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
718 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
719 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
720 COSTS_N_INSNS (2), /* cost of FABS instruction. */
721 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
722 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
723 /* K8 has optimized REP instruction for medium sized blocks, but for very
724 small blocks it is better to use loop. For large blocks, libcall can
725 do nontemporary accesses and beat inline considerably. */
726 {{libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
727 {-1, rep_prefix_4_byte
, false}}},
728 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
729 {-1, libcall
, false}}}},
730 {{libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
731 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
732 {libcall
, {{48, unrolled_loop
, false},
733 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}},
734 4, /* scalar_stmt_cost. */
735 2, /* scalar load_cost. */
736 2, /* scalar_store_cost. */
737 5, /* vec_stmt_cost. */
738 0, /* vec_to_scalar_cost. */
739 2, /* scalar_to_vec_cost. */
740 2, /* vec_align_load_cost. */
741 3, /* vec_unalign_load_cost. */
742 3, /* vec_store_cost. */
743 3, /* cond_taken_branch_cost. */
744 2, /* cond_not_taken_branch_cost. */
747 struct processor_costs amdfam10_cost
= {
748 COSTS_N_INSNS (1), /* cost of an add instruction */
749 COSTS_N_INSNS (2), /* cost of a lea instruction */
750 COSTS_N_INSNS (1), /* variable shift costs */
751 COSTS_N_INSNS (1), /* constant shift costs */
752 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
753 COSTS_N_INSNS (4), /* HI */
754 COSTS_N_INSNS (3), /* SI */
755 COSTS_N_INSNS (4), /* DI */
756 COSTS_N_INSNS (5)}, /* other */
757 0, /* cost of multiply per each bit set */
758 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
759 COSTS_N_INSNS (35), /* HI */
760 COSTS_N_INSNS (51), /* SI */
761 COSTS_N_INSNS (83), /* DI */
762 COSTS_N_INSNS (83)}, /* other */
763 COSTS_N_INSNS (1), /* cost of movsx */
764 COSTS_N_INSNS (1), /* cost of movzx */
765 8, /* "large" insn */
767 4, /* cost for loading QImode using movzbl */
768 {3, 4, 3}, /* cost of loading integer registers
769 in QImode, HImode and SImode.
770 Relative to reg-reg move (2). */
771 {3, 4, 3}, /* cost of storing integer registers */
772 4, /* cost of reg,reg fld/fst */
773 {4, 4, 12}, /* cost of loading fp registers
774 in SFmode, DFmode and XFmode */
775 {6, 6, 8}, /* cost of storing fp registers
776 in SFmode, DFmode and XFmode */
777 2, /* cost of moving MMX register */
778 {3, 3}, /* cost of loading MMX registers
779 in SImode and DImode */
780 {4, 4}, /* cost of storing MMX registers
781 in SImode and DImode */
782 2, /* cost of moving SSE register */
783 {4, 4, 3}, /* cost of loading SSE registers
784 in SImode, DImode and TImode */
785 {4, 4, 5}, /* cost of storing SSE registers
786 in SImode, DImode and TImode */
787 3, /* MMX or SSE register to integer */
789 MOVD reg64, xmmreg Double FSTORE 4
790 MOVD reg32, xmmreg Double FSTORE 4
792 MOVD reg64, xmmreg Double FADD 3
794 MOVD reg32, xmmreg Double FADD 3
796 64, /* size of l1 cache. */
797 512, /* size of l2 cache. */
798 64, /* size of prefetch block */
799 /* New AMD processors never drop prefetches; if they cannot be performed
800 immediately, they are queued. We set number of simultaneous prefetches
801 to a large constant to reflect this (it probably is not a good idea not
802 to limit number of prefetches at all, as their execution also takes some
804 100, /* number of parallel prefetches */
806 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
807 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
808 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
809 COSTS_N_INSNS (2), /* cost of FABS instruction. */
810 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
811 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
813 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
814 very small blocks it is better to use loop. For large blocks, libcall can
815 do nontemporary accesses and beat inline considerably. */
816 {{libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
817 {-1, rep_prefix_4_byte
, false}}},
818 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
819 {-1, libcall
, false}}}},
820 {{libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
821 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
822 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
823 {-1, libcall
, false}}}},
824 4, /* scalar_stmt_cost. */
825 2, /* scalar load_cost. */
826 2, /* scalar_store_cost. */
827 6, /* vec_stmt_cost. */
828 0, /* vec_to_scalar_cost. */
829 2, /* scalar_to_vec_cost. */
830 2, /* vec_align_load_cost. */
831 2, /* vec_unalign_load_cost. */
832 2, /* vec_store_cost. */
833 2, /* cond_taken_branch_cost. */
834 1, /* cond_not_taken_branch_cost. */
837 struct processor_costs bdver1_cost
= {
838 COSTS_N_INSNS (1), /* cost of an add instruction */
839 COSTS_N_INSNS (1), /* cost of a lea instruction */
840 COSTS_N_INSNS (1), /* variable shift costs */
841 COSTS_N_INSNS (1), /* constant shift costs */
842 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
843 COSTS_N_INSNS (4), /* HI */
844 COSTS_N_INSNS (4), /* SI */
845 COSTS_N_INSNS (6), /* DI */
846 COSTS_N_INSNS (6)}, /* other */
847 0, /* cost of multiply per each bit set */
848 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
849 COSTS_N_INSNS (35), /* HI */
850 COSTS_N_INSNS (51), /* SI */
851 COSTS_N_INSNS (83), /* DI */
852 COSTS_N_INSNS (83)}, /* other */
853 COSTS_N_INSNS (1), /* cost of movsx */
854 COSTS_N_INSNS (1), /* cost of movzx */
855 8, /* "large" insn */
857 4, /* cost for loading QImode using movzbl */
858 {5, 5, 4}, /* cost of loading integer registers
859 in QImode, HImode and SImode.
860 Relative to reg-reg move (2). */
861 {4, 4, 4}, /* cost of storing integer registers */
862 2, /* cost of reg,reg fld/fst */
863 {5, 5, 12}, /* cost of loading fp registers
864 in SFmode, DFmode and XFmode */
865 {4, 4, 8}, /* cost of storing fp registers
866 in SFmode, DFmode and XFmode */
867 2, /* cost of moving MMX register */
868 {4, 4}, /* cost of loading MMX registers
869 in SImode and DImode */
870 {4, 4}, /* cost of storing MMX registers
871 in SImode and DImode */
872 2, /* cost of moving SSE register */
873 {4, 4, 4}, /* cost of loading SSE registers
874 in SImode, DImode and TImode */
875 {4, 4, 4}, /* cost of storing SSE registers
876 in SImode, DImode and TImode */
877 2, /* MMX or SSE register to integer */
879 MOVD reg64, xmmreg Double FSTORE 4
880 MOVD reg32, xmmreg Double FSTORE 4
882 MOVD reg64, xmmreg Double FADD 3
884 MOVD reg32, xmmreg Double FADD 3
886 16, /* size of l1 cache. */
887 2048, /* size of l2 cache. */
888 64, /* size of prefetch block */
889 /* New AMD processors never drop prefetches; if they cannot be performed
890 immediately, they are queued. We set number of simultaneous prefetches
891 to a large constant to reflect this (it probably is not a good idea not
892 to limit number of prefetches at all, as their execution also takes some
894 100, /* number of parallel prefetches */
896 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
897 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
898 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
899 COSTS_N_INSNS (2), /* cost of FABS instruction. */
900 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
901 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
903 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
904 very small blocks it is better to use loop. For large blocks, libcall
905 can do nontemporary accesses and beat inline considerably. */
906 {{libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
907 {-1, rep_prefix_4_byte
, false}}},
908 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
909 {-1, libcall
, false}}}},
910 {{libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
911 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
912 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
913 {-1, libcall
, false}}}},
914 6, /* scalar_stmt_cost. */
915 4, /* scalar load_cost. */
916 4, /* scalar_store_cost. */
917 6, /* vec_stmt_cost. */
918 0, /* vec_to_scalar_cost. */
919 2, /* scalar_to_vec_cost. */
920 4, /* vec_align_load_cost. */
921 4, /* vec_unalign_load_cost. */
922 4, /* vec_store_cost. */
923 2, /* cond_taken_branch_cost. */
924 1, /* cond_not_taken_branch_cost. */
927 struct processor_costs bdver2_cost
= {
928 COSTS_N_INSNS (1), /* cost of an add instruction */
929 COSTS_N_INSNS (1), /* cost of a lea instruction */
930 COSTS_N_INSNS (1), /* variable shift costs */
931 COSTS_N_INSNS (1), /* constant shift costs */
932 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
933 COSTS_N_INSNS (4), /* HI */
934 COSTS_N_INSNS (4), /* SI */
935 COSTS_N_INSNS (6), /* DI */
936 COSTS_N_INSNS (6)}, /* other */
937 0, /* cost of multiply per each bit set */
938 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
939 COSTS_N_INSNS (35), /* HI */
940 COSTS_N_INSNS (51), /* SI */
941 COSTS_N_INSNS (83), /* DI */
942 COSTS_N_INSNS (83)}, /* other */
943 COSTS_N_INSNS (1), /* cost of movsx */
944 COSTS_N_INSNS (1), /* cost of movzx */
945 8, /* "large" insn */
947 4, /* cost for loading QImode using movzbl */
948 {5, 5, 4}, /* cost of loading integer registers
949 in QImode, HImode and SImode.
950 Relative to reg-reg move (2). */
951 {4, 4, 4}, /* cost of storing integer registers */
952 2, /* cost of reg,reg fld/fst */
953 {5, 5, 12}, /* cost of loading fp registers
954 in SFmode, DFmode and XFmode */
955 {4, 4, 8}, /* cost of storing fp registers
956 in SFmode, DFmode and XFmode */
957 2, /* cost of moving MMX register */
958 {4, 4}, /* cost of loading MMX registers
959 in SImode and DImode */
960 {4, 4}, /* cost of storing MMX registers
961 in SImode and DImode */
962 2, /* cost of moving SSE register */
963 {4, 4, 4}, /* cost of loading SSE registers
964 in SImode, DImode and TImode */
965 {4, 4, 4}, /* cost of storing SSE registers
966 in SImode, DImode and TImode */
967 2, /* MMX or SSE register to integer */
969 MOVD reg64, xmmreg Double FSTORE 4
970 MOVD reg32, xmmreg Double FSTORE 4
972 MOVD reg64, xmmreg Double FADD 3
974 MOVD reg32, xmmreg Double FADD 3
976 16, /* size of l1 cache. */
977 2048, /* size of l2 cache. */
978 64, /* size of prefetch block */
979 /* New AMD processors never drop prefetches; if they cannot be performed
980 immediately, they are queued. We set number of simultaneous prefetches
981 to a large constant to reflect this (it probably is not a good idea not
982 to limit number of prefetches at all, as their execution also takes some
984 100, /* number of parallel prefetches */
986 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
987 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
988 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
989 COSTS_N_INSNS (2), /* cost of FABS instruction. */
990 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
991 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
993 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
994 very small blocks it is better to use loop. For large blocks, libcall
995 can do nontemporary accesses and beat inline considerably. */
996 {{libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
997 {-1, rep_prefix_4_byte
, false}}},
998 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
999 {-1, libcall
, false}}}},
1000 {{libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1001 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1002 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1003 {-1, libcall
, false}}}},
1004 6, /* scalar_stmt_cost. */
1005 4, /* scalar load_cost. */
1006 4, /* scalar_store_cost. */
1007 6, /* vec_stmt_cost. */
1008 0, /* vec_to_scalar_cost. */
1009 2, /* scalar_to_vec_cost. */
1010 4, /* vec_align_load_cost. */
1011 4, /* vec_unalign_load_cost. */
1012 4, /* vec_store_cost. */
1013 2, /* cond_taken_branch_cost. */
1014 1, /* cond_not_taken_branch_cost. */
1017 struct processor_costs bdver3_cost
= {
1018 COSTS_N_INSNS (1), /* cost of an add instruction */
1019 COSTS_N_INSNS (1), /* cost of a lea instruction */
1020 COSTS_N_INSNS (1), /* variable shift costs */
1021 COSTS_N_INSNS (1), /* constant shift costs */
1022 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1023 COSTS_N_INSNS (4), /* HI */
1024 COSTS_N_INSNS (4), /* SI */
1025 COSTS_N_INSNS (6), /* DI */
1026 COSTS_N_INSNS (6)}, /* other */
1027 0, /* cost of multiply per each bit set */
1028 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1029 COSTS_N_INSNS (35), /* HI */
1030 COSTS_N_INSNS (51), /* SI */
1031 COSTS_N_INSNS (83), /* DI */
1032 COSTS_N_INSNS (83)}, /* other */
1033 COSTS_N_INSNS (1), /* cost of movsx */
1034 COSTS_N_INSNS (1), /* cost of movzx */
1035 8, /* "large" insn */
1037 4, /* cost for loading QImode using movzbl */
1038 {5, 5, 4}, /* cost of loading integer registers
1039 in QImode, HImode and SImode.
1040 Relative to reg-reg move (2). */
1041 {4, 4, 4}, /* cost of storing integer registers */
1042 2, /* cost of reg,reg fld/fst */
1043 {5, 5, 12}, /* cost of loading fp registers
1044 in SFmode, DFmode and XFmode */
1045 {4, 4, 8}, /* cost of storing fp registers
1046 in SFmode, DFmode and XFmode */
1047 2, /* cost of moving MMX register */
1048 {4, 4}, /* cost of loading MMX registers
1049 in SImode and DImode */
1050 {4, 4}, /* cost of storing MMX registers
1051 in SImode and DImode */
1052 2, /* cost of moving SSE register */
1053 {4, 4, 4}, /* cost of loading SSE registers
1054 in SImode, DImode and TImode */
1055 {4, 4, 4}, /* cost of storing SSE registers
1056 in SImode, DImode and TImode */
1057 2, /* MMX or SSE register to integer */
1058 16, /* size of l1 cache. */
1059 2048, /* size of l2 cache. */
1060 64, /* size of prefetch block */
1061 /* New AMD processors never drop prefetches; if they cannot be performed
1062 immediately, they are queued. We set number of simultaneous prefetches
1063 to a large constant to reflect this (it probably is not a good idea not
1064 to limit number of prefetches at all, as their execution also takes some
1066 100, /* number of parallel prefetches */
1067 2, /* Branch cost */
1068 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1069 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1070 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1071 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1072 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1073 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1075 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1076 very small blocks it is better to use loop. For large blocks, libcall
1077 can do nontemporary accesses and beat inline considerably. */
1078 {{libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1079 {-1, rep_prefix_4_byte
, false}}},
1080 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1081 {-1, libcall
, false}}}},
1082 {{libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1083 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1084 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1085 {-1, libcall
, false}}}},
1086 6, /* scalar_stmt_cost. */
1087 4, /* scalar load_cost. */
1088 4, /* scalar_store_cost. */
1089 6, /* vec_stmt_cost. */
1090 0, /* vec_to_scalar_cost. */
1091 2, /* scalar_to_vec_cost. */
1092 4, /* vec_align_load_cost. */
1093 4, /* vec_unalign_load_cost. */
1094 4, /* vec_store_cost. */
1095 2, /* cond_taken_branch_cost. */
1096 1, /* cond_not_taken_branch_cost. */
1099 struct processor_costs btver1_cost
= {
1100 COSTS_N_INSNS (1), /* cost of an add instruction */
1101 COSTS_N_INSNS (2), /* cost of a lea instruction */
1102 COSTS_N_INSNS (1), /* variable shift costs */
1103 COSTS_N_INSNS (1), /* constant shift costs */
1104 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1105 COSTS_N_INSNS (4), /* HI */
1106 COSTS_N_INSNS (3), /* SI */
1107 COSTS_N_INSNS (4), /* DI */
1108 COSTS_N_INSNS (5)}, /* other */
1109 0, /* cost of multiply per each bit set */
1110 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1111 COSTS_N_INSNS (35), /* HI */
1112 COSTS_N_INSNS (51), /* SI */
1113 COSTS_N_INSNS (83), /* DI */
1114 COSTS_N_INSNS (83)}, /* other */
1115 COSTS_N_INSNS (1), /* cost of movsx */
1116 COSTS_N_INSNS (1), /* cost of movzx */
1117 8, /* "large" insn */
1119 4, /* cost for loading QImode using movzbl */
1120 {3, 4, 3}, /* cost of loading integer registers
1121 in QImode, HImode and SImode.
1122 Relative to reg-reg move (2). */
1123 {3, 4, 3}, /* cost of storing integer registers */
1124 4, /* cost of reg,reg fld/fst */
1125 {4, 4, 12}, /* cost of loading fp registers
1126 in SFmode, DFmode and XFmode */
1127 {6, 6, 8}, /* cost of storing fp registers
1128 in SFmode, DFmode and XFmode */
1129 2, /* cost of moving MMX register */
1130 {3, 3}, /* cost of loading MMX registers
1131 in SImode and DImode */
1132 {4, 4}, /* cost of storing MMX registers
1133 in SImode and DImode */
1134 2, /* cost of moving SSE register */
1135 {4, 4, 3}, /* cost of loading SSE registers
1136 in SImode, DImode and TImode */
1137 {4, 4, 5}, /* cost of storing SSE registers
1138 in SImode, DImode and TImode */
1139 3, /* MMX or SSE register to integer */
1141 MOVD reg64, xmmreg Double FSTORE 4
1142 MOVD reg32, xmmreg Double FSTORE 4
1144 MOVD reg64, xmmreg Double FADD 3
1146 MOVD reg32, xmmreg Double FADD 3
1148 32, /* size of l1 cache. */
1149 512, /* size of l2 cache. */
1150 64, /* size of prefetch block */
1151 100, /* number of parallel prefetches */
1152 2, /* Branch cost */
1153 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1154 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1155 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1156 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1157 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1158 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1160 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1161 very small blocks it is better to use loop. For large blocks, libcall can
1162 do nontemporary accesses and beat inline considerably. */
1163 {{libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1164 {-1, rep_prefix_4_byte
, false}}},
1165 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1166 {-1, libcall
, false}}}},
1167 {{libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1168 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1169 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1170 {-1, libcall
, false}}}},
1171 4, /* scalar_stmt_cost. */
1172 2, /* scalar load_cost. */
1173 2, /* scalar_store_cost. */
1174 6, /* vec_stmt_cost. */
1175 0, /* vec_to_scalar_cost. */
1176 2, /* scalar_to_vec_cost. */
1177 2, /* vec_align_load_cost. */
1178 2, /* vec_unalign_load_cost. */
1179 2, /* vec_store_cost. */
1180 2, /* cond_taken_branch_cost. */
1181 1, /* cond_not_taken_branch_cost. */
1184 struct processor_costs btver2_cost
= {
1185 COSTS_N_INSNS (1), /* cost of an add instruction */
1186 COSTS_N_INSNS (2), /* cost of a lea instruction */
1187 COSTS_N_INSNS (1), /* variable shift costs */
1188 COSTS_N_INSNS (1), /* constant shift costs */
1189 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1190 COSTS_N_INSNS (4), /* HI */
1191 COSTS_N_INSNS (3), /* SI */
1192 COSTS_N_INSNS (4), /* DI */
1193 COSTS_N_INSNS (5)}, /* other */
1194 0, /* cost of multiply per each bit set */
1195 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1196 COSTS_N_INSNS (35), /* HI */
1197 COSTS_N_INSNS (51), /* SI */
1198 COSTS_N_INSNS (83), /* DI */
1199 COSTS_N_INSNS (83)}, /* other */
1200 COSTS_N_INSNS (1), /* cost of movsx */
1201 COSTS_N_INSNS (1), /* cost of movzx */
1202 8, /* "large" insn */
1204 4, /* cost for loading QImode using movzbl */
1205 {3, 4, 3}, /* cost of loading integer registers
1206 in QImode, HImode and SImode.
1207 Relative to reg-reg move (2). */
1208 {3, 4, 3}, /* cost of storing integer registers */
1209 4, /* cost of reg,reg fld/fst */
1210 {4, 4, 12}, /* cost of loading fp registers
1211 in SFmode, DFmode and XFmode */
1212 {6, 6, 8}, /* cost of storing fp registers
1213 in SFmode, DFmode and XFmode */
1214 2, /* cost of moving MMX register */
1215 {3, 3}, /* cost of loading MMX registers
1216 in SImode and DImode */
1217 {4, 4}, /* cost of storing MMX registers
1218 in SImode and DImode */
1219 2, /* cost of moving SSE register */
1220 {4, 4, 3}, /* cost of loading SSE registers
1221 in SImode, DImode and TImode */
1222 {4, 4, 5}, /* cost of storing SSE registers
1223 in SImode, DImode and TImode */
1224 3, /* MMX or SSE register to integer */
1226 MOVD reg64, xmmreg Double FSTORE 4
1227 MOVD reg32, xmmreg Double FSTORE 4
1229 MOVD reg64, xmmreg Double FADD 3
1231 MOVD reg32, xmmreg Double FADD 3
1233 32, /* size of l1 cache. */
1234 2048, /* size of l2 cache. */
1235 64, /* size of prefetch block */
1236 100, /* number of parallel prefetches */
1237 2, /* Branch cost */
1238 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1239 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1240 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1241 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1242 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1243 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1245 {{libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1246 {-1, rep_prefix_4_byte
, false}}},
1247 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1248 {-1, libcall
, false}}}},
1249 {{libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1250 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1251 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1252 {-1, libcall
, false}}}},
1253 4, /* scalar_stmt_cost. */
1254 2, /* scalar load_cost. */
1255 2, /* scalar_store_cost. */
1256 6, /* vec_stmt_cost. */
1257 0, /* vec_to_scalar_cost. */
1258 2, /* scalar_to_vec_cost. */
1259 2, /* vec_align_load_cost. */
1260 2, /* vec_unalign_load_cost. */
1261 2, /* vec_store_cost. */
1262 2, /* cond_taken_branch_cost. */
1263 1, /* cond_not_taken_branch_cost. */
1267 struct processor_costs pentium4_cost
= {
1268 COSTS_N_INSNS (1), /* cost of an add instruction */
1269 COSTS_N_INSNS (3), /* cost of a lea instruction */
1270 COSTS_N_INSNS (4), /* variable shift costs */
1271 COSTS_N_INSNS (4), /* constant shift costs */
1272 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1273 COSTS_N_INSNS (15), /* HI */
1274 COSTS_N_INSNS (15), /* SI */
1275 COSTS_N_INSNS (15), /* DI */
1276 COSTS_N_INSNS (15)}, /* other */
1277 0, /* cost of multiply per each bit set */
1278 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1279 COSTS_N_INSNS (56), /* HI */
1280 COSTS_N_INSNS (56), /* SI */
1281 COSTS_N_INSNS (56), /* DI */
1282 COSTS_N_INSNS (56)}, /* other */
1283 COSTS_N_INSNS (1), /* cost of movsx */
1284 COSTS_N_INSNS (1), /* cost of movzx */
1285 16, /* "large" insn */
1287 2, /* cost for loading QImode using movzbl */
1288 {4, 5, 4}, /* cost of loading integer registers
1289 in QImode, HImode and SImode.
1290 Relative to reg-reg move (2). */
1291 {2, 3, 2}, /* cost of storing integer registers */
1292 2, /* cost of reg,reg fld/fst */
1293 {2, 2, 6}, /* cost of loading fp registers
1294 in SFmode, DFmode and XFmode */
1295 {4, 4, 6}, /* cost of storing fp registers
1296 in SFmode, DFmode and XFmode */
1297 2, /* cost of moving MMX register */
1298 {2, 2}, /* cost of loading MMX registers
1299 in SImode and DImode */
1300 {2, 2}, /* cost of storing MMX registers
1301 in SImode and DImode */
1302 12, /* cost of moving SSE register */
1303 {12, 12, 12}, /* cost of loading SSE registers
1304 in SImode, DImode and TImode */
1305 {2, 2, 8}, /* cost of storing SSE registers
1306 in SImode, DImode and TImode */
1307 10, /* MMX or SSE register to integer */
1308 8, /* size of l1 cache. */
1309 256, /* size of l2 cache. */
1310 64, /* size of prefetch block */
1311 6, /* number of parallel prefetches */
1312 2, /* Branch cost */
1313 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1314 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1315 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1316 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1317 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1318 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1319 {{libcall
, {{12, loop_1_byte
, false}, {-1, rep_prefix_4_byte
, false}}},
1320 DUMMY_STRINGOP_ALGS
},
1321 {{libcall
, {{6, loop_1_byte
, false}, {48, loop
, false},
1322 {20480, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1323 DUMMY_STRINGOP_ALGS
},
1324 1, /* scalar_stmt_cost. */
1325 1, /* scalar load_cost. */
1326 1, /* scalar_store_cost. */
1327 1, /* vec_stmt_cost. */
1328 1, /* vec_to_scalar_cost. */
1329 1, /* scalar_to_vec_cost. */
1330 1, /* vec_align_load_cost. */
1331 2, /* vec_unalign_load_cost. */
1332 1, /* vec_store_cost. */
1333 3, /* cond_taken_branch_cost. */
1334 1, /* cond_not_taken_branch_cost. */
1338 struct processor_costs nocona_cost
= {
1339 COSTS_N_INSNS (1), /* cost of an add instruction */
1340 COSTS_N_INSNS (1), /* cost of a lea instruction */
1341 COSTS_N_INSNS (1), /* variable shift costs */
1342 COSTS_N_INSNS (1), /* constant shift costs */
1343 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1344 COSTS_N_INSNS (10), /* HI */
1345 COSTS_N_INSNS (10), /* SI */
1346 COSTS_N_INSNS (10), /* DI */
1347 COSTS_N_INSNS (10)}, /* other */
1348 0, /* cost of multiply per each bit set */
1349 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1350 COSTS_N_INSNS (66), /* HI */
1351 COSTS_N_INSNS (66), /* SI */
1352 COSTS_N_INSNS (66), /* DI */
1353 COSTS_N_INSNS (66)}, /* other */
1354 COSTS_N_INSNS (1), /* cost of movsx */
1355 COSTS_N_INSNS (1), /* cost of movzx */
1356 16, /* "large" insn */
1357 17, /* MOVE_RATIO */
1358 4, /* cost for loading QImode using movzbl */
1359 {4, 4, 4}, /* cost of loading integer registers
1360 in QImode, HImode and SImode.
1361 Relative to reg-reg move (2). */
1362 {4, 4, 4}, /* cost of storing integer registers */
1363 3, /* cost of reg,reg fld/fst */
1364 {12, 12, 12}, /* cost of loading fp registers
1365 in SFmode, DFmode and XFmode */
1366 {4, 4, 4}, /* cost of storing fp registers
1367 in SFmode, DFmode and XFmode */
1368 6, /* cost of moving MMX register */
1369 {12, 12}, /* cost of loading MMX registers
1370 in SImode and DImode */
1371 {12, 12}, /* cost of storing MMX registers
1372 in SImode and DImode */
1373 6, /* cost of moving SSE register */
1374 {12, 12, 12}, /* cost of loading SSE registers
1375 in SImode, DImode and TImode */
1376 {12, 12, 12}, /* cost of storing SSE registers
1377 in SImode, DImode and TImode */
1378 8, /* MMX or SSE register to integer */
1379 8, /* size of l1 cache. */
1380 1024, /* size of l2 cache. */
1381 128, /* size of prefetch block */
1382 8, /* number of parallel prefetches */
1383 1, /* Branch cost */
1384 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1385 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1386 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1387 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1388 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1389 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1390 {{libcall
, {{12, loop_1_byte
, false}, {-1, rep_prefix_4_byte
, false}}},
1391 {libcall
, {{32, loop
, false}, {20000, rep_prefix_8_byte
, false},
1392 {100000, unrolled_loop
, false}, {-1, libcall
, false}}}},
1393 {{libcall
, {{6, loop_1_byte
, false}, {48, loop
, false},
1394 {20480, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1395 {libcall
, {{24, loop
, false}, {64, unrolled_loop
, false},
1396 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}},
1397 1, /* scalar_stmt_cost. */
1398 1, /* scalar load_cost. */
1399 1, /* scalar_store_cost. */
1400 1, /* vec_stmt_cost. */
1401 1, /* vec_to_scalar_cost. */
1402 1, /* scalar_to_vec_cost. */
1403 1, /* vec_align_load_cost. */
1404 2, /* vec_unalign_load_cost. */
1405 1, /* vec_store_cost. */
1406 3, /* cond_taken_branch_cost. */
1407 1, /* cond_not_taken_branch_cost. */
1411 struct processor_costs atom_cost
= {
1412 COSTS_N_INSNS (1), /* cost of an add instruction */
1413 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1414 COSTS_N_INSNS (1), /* variable shift costs */
1415 COSTS_N_INSNS (1), /* constant shift costs */
1416 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1417 COSTS_N_INSNS (4), /* HI */
1418 COSTS_N_INSNS (3), /* SI */
1419 COSTS_N_INSNS (4), /* DI */
1420 COSTS_N_INSNS (2)}, /* other */
1421 0, /* cost of multiply per each bit set */
1422 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1423 COSTS_N_INSNS (26), /* HI */
1424 COSTS_N_INSNS (42), /* SI */
1425 COSTS_N_INSNS (74), /* DI */
1426 COSTS_N_INSNS (74)}, /* other */
1427 COSTS_N_INSNS (1), /* cost of movsx */
1428 COSTS_N_INSNS (1), /* cost of movzx */
1429 8, /* "large" insn */
1430 17, /* MOVE_RATIO */
1431 4, /* cost for loading QImode using movzbl */
1432 {4, 4, 4}, /* cost of loading integer registers
1433 in QImode, HImode and SImode.
1434 Relative to reg-reg move (2). */
1435 {4, 4, 4}, /* cost of storing integer registers */
1436 4, /* cost of reg,reg fld/fst */
1437 {12, 12, 12}, /* cost of loading fp registers
1438 in SFmode, DFmode and XFmode */
1439 {6, 6, 8}, /* cost of storing fp registers
1440 in SFmode, DFmode and XFmode */
1441 2, /* cost of moving MMX register */
1442 {8, 8}, /* cost of loading MMX registers
1443 in SImode and DImode */
1444 {8, 8}, /* cost of storing MMX registers
1445 in SImode and DImode */
1446 2, /* cost of moving SSE register */
1447 {8, 8, 8}, /* cost of loading SSE registers
1448 in SImode, DImode and TImode */
1449 {8, 8, 8}, /* cost of storing SSE registers
1450 in SImode, DImode and TImode */
1451 5, /* MMX or SSE register to integer */
1452 32, /* size of l1 cache. */
1453 256, /* size of l2 cache. */
1454 64, /* size of prefetch block */
1455 6, /* number of parallel prefetches */
1456 3, /* Branch cost */
1457 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1458 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1459 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1460 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1461 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1462 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1463 {{libcall
, {{11, loop
, false}, {-1, rep_prefix_4_byte
, false}}},
1464 {libcall
, {{32, loop
, false}, {64, rep_prefix_4_byte
, false},
1465 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}},
1466 {{libcall
, {{8, loop
, false}, {15, unrolled_loop
, false},
1467 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1468 {libcall
, {{24, loop
, false}, {32, unrolled_loop
, false},
1469 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}},
1470 1, /* scalar_stmt_cost. */
1471 1, /* scalar load_cost. */
1472 1, /* scalar_store_cost. */
1473 1, /* vec_stmt_cost. */
1474 1, /* vec_to_scalar_cost. */
1475 1, /* scalar_to_vec_cost. */
1476 1, /* vec_align_load_cost. */
1477 2, /* vec_unalign_load_cost. */
1478 1, /* vec_store_cost. */
1479 3, /* cond_taken_branch_cost. */
1480 1, /* cond_not_taken_branch_cost. */
1483 /* Generic64 should produce code tuned for Nocona and K8. */
1485 struct processor_costs generic64_cost
= {
1486 COSTS_N_INSNS (1), /* cost of an add instruction */
1487 /* On all chips taken into consideration lea is 2 cycles and more. With
1488 this cost however our current implementation of synth_mult results in
1489 use of unnecessary temporary registers causing regression on several
1490 SPECfp benchmarks. */
1491 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1492 COSTS_N_INSNS (1), /* variable shift costs */
1493 COSTS_N_INSNS (1), /* constant shift costs */
1494 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1495 COSTS_N_INSNS (4), /* HI */
1496 COSTS_N_INSNS (3), /* SI */
1497 COSTS_N_INSNS (4), /* DI */
1498 COSTS_N_INSNS (2)}, /* other */
1499 0, /* cost of multiply per each bit set */
1500 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1501 COSTS_N_INSNS (26), /* HI */
1502 COSTS_N_INSNS (42), /* SI */
1503 COSTS_N_INSNS (74), /* DI */
1504 COSTS_N_INSNS (74)}, /* other */
1505 COSTS_N_INSNS (1), /* cost of movsx */
1506 COSTS_N_INSNS (1), /* cost of movzx */
1507 8, /* "large" insn */
1508 17, /* MOVE_RATIO */
1509 4, /* cost for loading QImode using movzbl */
1510 {4, 4, 4}, /* cost of loading integer registers
1511 in QImode, HImode and SImode.
1512 Relative to reg-reg move (2). */
1513 {4, 4, 4}, /* cost of storing integer registers */
1514 4, /* cost of reg,reg fld/fst */
1515 {12, 12, 12}, /* cost of loading fp registers
1516 in SFmode, DFmode and XFmode */
1517 {6, 6, 8}, /* cost of storing fp registers
1518 in SFmode, DFmode and XFmode */
1519 2, /* cost of moving MMX register */
1520 {8, 8}, /* cost of loading MMX registers
1521 in SImode and DImode */
1522 {8, 8}, /* cost of storing MMX registers
1523 in SImode and DImode */
1524 2, /* cost of moving SSE register */
1525 {8, 8, 8}, /* cost of loading SSE registers
1526 in SImode, DImode and TImode */
1527 {8, 8, 8}, /* cost of storing SSE registers
1528 in SImode, DImode and TImode */
1529 5, /* MMX or SSE register to integer */
1530 32, /* size of l1 cache. */
1531 512, /* size of l2 cache. */
1532 64, /* size of prefetch block */
1533 6, /* number of parallel prefetches */
1534 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1535 value is increased to perhaps more appropriate value of 5. */
1536 3, /* Branch cost */
1537 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1538 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1539 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1540 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1541 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1542 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1543 {DUMMY_STRINGOP_ALGS
,
1544 {libcall
, {{32, loop
, false}, {8192, rep_prefix_8_byte
, false},
1545 {-1, libcall
, false}}}},
1546 {DUMMY_STRINGOP_ALGS
,
1547 {libcall
, {{32, loop
, false}, {8192, rep_prefix_8_byte
, false},
1548 {-1, libcall
, false}}}},
1549 1, /* scalar_stmt_cost. */
1550 1, /* scalar load_cost. */
1551 1, /* scalar_store_cost. */
1552 1, /* vec_stmt_cost. */
1553 1, /* vec_to_scalar_cost. */
1554 1, /* scalar_to_vec_cost. */
1555 1, /* vec_align_load_cost. */
1556 2, /* vec_unalign_load_cost. */
1557 1, /* vec_store_cost. */
1558 3, /* cond_taken_branch_cost. */
1559 1, /* cond_not_taken_branch_cost. */
1562 /* core_cost should produce code tuned for Core familly of CPUs. */
1564 struct processor_costs core_cost
= {
1565 COSTS_N_INSNS (1), /* cost of an add instruction */
1566 /* On all chips taken into consideration lea is 2 cycles and more. With
1567 this cost however our current implementation of synth_mult results in
1568 use of unnecessary temporary registers causing regression on several
1569 SPECfp benchmarks. */
1570 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1571 COSTS_N_INSNS (1), /* variable shift costs */
1572 COSTS_N_INSNS (1), /* constant shift costs */
1573 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1574 COSTS_N_INSNS (4), /* HI */
1575 COSTS_N_INSNS (3), /* SI */
1576 COSTS_N_INSNS (4), /* DI */
1577 COSTS_N_INSNS (2)}, /* other */
1578 0, /* cost of multiply per each bit set */
1579 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1580 COSTS_N_INSNS (26), /* HI */
1581 COSTS_N_INSNS (42), /* SI */
1582 COSTS_N_INSNS (74), /* DI */
1583 COSTS_N_INSNS (74)}, /* other */
1584 COSTS_N_INSNS (1), /* cost of movsx */
1585 COSTS_N_INSNS (1), /* cost of movzx */
1586 8, /* "large" insn */
1587 17, /* MOVE_RATIO */
1588 4, /* cost for loading QImode using movzbl */
1589 {4, 4, 4}, /* cost of loading integer registers
1590 in QImode, HImode and SImode.
1591 Relative to reg-reg move (2). */
1592 {4, 4, 4}, /* cost of storing integer registers */
1593 4, /* cost of reg,reg fld/fst */
1594 {12, 12, 12}, /* cost of loading fp registers
1595 in SFmode, DFmode and XFmode */
1596 {6, 6, 8}, /* cost of storing fp registers
1597 in SFmode, DFmode and XFmode */
1598 2, /* cost of moving MMX register */
1599 {8, 8}, /* cost of loading MMX registers
1600 in SImode and DImode */
1601 {8, 8}, /* cost of storing MMX registers
1602 in SImode and DImode */
1603 2, /* cost of moving SSE register */
1604 {8, 8, 8}, /* cost of loading SSE registers
1605 in SImode, DImode and TImode */
1606 {8, 8, 8}, /* cost of storing SSE registers
1607 in SImode, DImode and TImode */
1608 5, /* MMX or SSE register to integer */
1609 64, /* size of l1 cache. */
1610 512, /* size of l2 cache. */
1611 64, /* size of prefetch block */
1612 6, /* number of parallel prefetches */
1613 /* FIXME perhaps more appropriate value is 5. */
1614 3, /* Branch cost */
1615 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1616 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1617 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1618 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1619 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1620 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1621 {{libcall
, {{1024, rep_prefix_4_byte
, true}, {-1, libcall
, false}}},
1622 {libcall
, {{24, loop
, true}, {128, rep_prefix_8_byte
, true},
1623 {-1, libcall
, false}}}},
1624 {{libcall
, {{6, loop_1_byte
, true},
1626 {8192, rep_prefix_4_byte
, true},
1627 {-1, libcall
, false}}},
1628 {libcall
, {{24, loop
, true}, {512, rep_prefix_8_byte
, true},
1629 {-1, libcall
, false}}}},
1630 1, /* scalar_stmt_cost. */
1631 1, /* scalar load_cost. */
1632 1, /* scalar_store_cost. */
1633 1, /* vec_stmt_cost. */
1634 1, /* vec_to_scalar_cost. */
1635 1, /* scalar_to_vec_cost. */
1636 1, /* vec_align_load_cost. */
1637 2, /* vec_unalign_load_cost. */
1638 1, /* vec_store_cost. */
1639 3, /* cond_taken_branch_cost. */
1640 1, /* cond_not_taken_branch_cost. */
1643 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1646 struct processor_costs generic32_cost
= {
1647 COSTS_N_INSNS (1), /* cost of an add instruction */
1648 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1649 COSTS_N_INSNS (1), /* variable shift costs */
1650 COSTS_N_INSNS (1), /* constant shift costs */
1651 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1652 COSTS_N_INSNS (4), /* HI */
1653 COSTS_N_INSNS (3), /* SI */
1654 COSTS_N_INSNS (4), /* DI */
1655 COSTS_N_INSNS (2)}, /* other */
1656 0, /* cost of multiply per each bit set */
1657 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1658 COSTS_N_INSNS (26), /* HI */
1659 COSTS_N_INSNS (42), /* SI */
1660 COSTS_N_INSNS (74), /* DI */
1661 COSTS_N_INSNS (74)}, /* other */
1662 COSTS_N_INSNS (1), /* cost of movsx */
1663 COSTS_N_INSNS (1), /* cost of movzx */
1664 8, /* "large" insn */
1665 17, /* MOVE_RATIO */
1666 4, /* cost for loading QImode using movzbl */
1667 {4, 4, 4}, /* cost of loading integer registers
1668 in QImode, HImode and SImode.
1669 Relative to reg-reg move (2). */
1670 {4, 4, 4}, /* cost of storing integer registers */
1671 4, /* cost of reg,reg fld/fst */
1672 {12, 12, 12}, /* cost of loading fp registers
1673 in SFmode, DFmode and XFmode */
1674 {6, 6, 8}, /* cost of storing fp registers
1675 in SFmode, DFmode and XFmode */
1676 2, /* cost of moving MMX register */
1677 {8, 8}, /* cost of loading MMX registers
1678 in SImode and DImode */
1679 {8, 8}, /* cost of storing MMX registers
1680 in SImode and DImode */
1681 2, /* cost of moving SSE register */
1682 {8, 8, 8}, /* cost of loading SSE registers
1683 in SImode, DImode and TImode */
1684 {8, 8, 8}, /* cost of storing SSE registers
1685 in SImode, DImode and TImode */
1686 5, /* MMX or SSE register to integer */
1687 32, /* size of l1 cache. */
1688 256, /* size of l2 cache. */
1689 64, /* size of prefetch block */
1690 6, /* number of parallel prefetches */
1691 3, /* Branch cost */
1692 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1693 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1694 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1695 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1696 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1697 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1698 {{libcall
, {{32, loop
, false}, {8192, rep_prefix_4_byte
, false},
1699 {-1, libcall
, false}}},
1700 DUMMY_STRINGOP_ALGS
},
1701 {{libcall
, {{32, loop
, false}, {8192, rep_prefix_4_byte
, false},
1702 {-1, libcall
, false}}},
1703 DUMMY_STRINGOP_ALGS
},
1704 1, /* scalar_stmt_cost. */
1705 1, /* scalar load_cost. */
1706 1, /* scalar_store_cost. */
1707 1, /* vec_stmt_cost. */
1708 1, /* vec_to_scalar_cost. */
1709 1, /* scalar_to_vec_cost. */
1710 1, /* vec_align_load_cost. */
1711 2, /* vec_unalign_load_cost. */
1712 1, /* vec_store_cost. */
1713 3, /* cond_taken_branch_cost. */
1714 1, /* cond_not_taken_branch_cost. */
1717 /* Set by -mtune. */
1718 const struct processor_costs
*ix86_tune_cost
= &pentium_cost
;
1720 /* Set by -mtune or -Os. */
1721 const struct processor_costs
*ix86_cost
= &pentium_cost
;
1723 /* Processor feature/optimization bitmasks. */
1724 #define m_386 (1<<PROCESSOR_I386)
1725 #define m_486 (1<<PROCESSOR_I486)
1726 #define m_PENT (1<<PROCESSOR_PENTIUM)
1727 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1728 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1729 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1730 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1731 #define m_CORE2 (1<<PROCESSOR_CORE2)
1732 #define m_COREI7 (1<<PROCESSOR_COREI7)
1733 #define m_CORE2I7 (m_CORE2 | m_COREI7)
1734 #define m_ATOM (1<<PROCESSOR_ATOM)
1736 #define m_GEODE (1<<PROCESSOR_GEODE)
1737 #define m_K6 (1<<PROCESSOR_K6)
1738 #define m_K6_GEODE (m_K6 | m_GEODE)
1739 #define m_K8 (1<<PROCESSOR_K8)
1740 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1741 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1742 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1743 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1744 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1745 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
1746 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3)
1747 #define m_BTVER (m_BTVER1 | m_BTVER2)
1748 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1749 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
1750 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
1752 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1753 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1755 /* Generic instruction choice should be common subset of supported CPUs
1756 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1757 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1759 /* Feature tests against the various tunings. */
1760 unsigned char ix86_tune_features
[X86_TUNE_LAST
];
1762 /* Feature tests against the various tunings used to create ix86_tune_features
1763 based on the processor mask. */
1764 static unsigned int initial_ix86_tune_features
[X86_TUNE_LAST
] = {
1765 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1766 negatively, so enabling for Generic64 seems like good code size
1767 tradeoff. We can't enable it for 32bit generic because it does not
1768 work well with PPro base chips. */
1769 m_386
| m_CORE2I7
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC64
,
1771 /* X86_TUNE_PUSH_MEMORY */
1772 m_386
| m_P4_NOCONA
| m_CORE2I7
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1774 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1777 /* X86_TUNE_UNROLL_STRLEN */
1778 m_486
| m_PENT
| m_PPRO
| m_ATOM
| m_CORE2I7
| m_K6
| m_AMD_MULTIPLE
| m_GENERIC
,
1780 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1781 on simulation result. But after P4 was made, no performance benefit
1782 was observed with branch hints. It also increases the code size.
1783 As a result, icc never generates branch hints. */
1786 /* X86_TUNE_DOUBLE_WITH_ADD */
1789 /* X86_TUNE_USE_SAHF */
1790 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER
| m_GENERIC
,
1792 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1793 partial dependencies. */
1794 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1796 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1797 register stalls on Generic32 compilation setting as well. However
1798 in current implementation the partial register stalls are not eliminated
1799 very well - they can be introduced via subregs synthesized by combine
1800 and can happen in caller/callee saving sequences. Because this option
1801 pays back little on PPro based chips and is in conflict with partial reg
1802 dependencies used by Athlon/P4 based chips, it is better to leave it off
1803 for generic32 for now. */
1806 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1807 m_CORE2I7
| m_GENERIC
,
1809 /* X86_TUNE_LCP_STALL: Avoid an expensive length-changing prefix stall
1810 * on 16-bit immediate moves into memory on Core2 and Corei7. */
1811 m_CORE2I7
| m_GENERIC
,
1813 /* X86_TUNE_USE_HIMODE_FIOP */
1814 m_386
| m_486
| m_K6_GEODE
,
1816 /* X86_TUNE_USE_SIMODE_FIOP */
1817 ~(m_PENT
| m_PPRO
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
),
1819 /* X86_TUNE_USE_MOV0 */
1822 /* X86_TUNE_USE_CLTD */
1823 ~(m_PENT
| m_ATOM
| m_K6
),
1825 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1828 /* X86_TUNE_SPLIT_LONG_MOVES */
1831 /* X86_TUNE_READ_MODIFY_WRITE */
1834 /* X86_TUNE_READ_MODIFY */
1837 /* X86_TUNE_PROMOTE_QIMODE */
1838 m_386
| m_486
| m_PENT
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1840 /* X86_TUNE_FAST_PREFIX */
1841 ~(m_386
| m_486
| m_PENT
),
1843 /* X86_TUNE_SINGLE_STRINGOP */
1844 m_386
| m_P4_NOCONA
,
1846 /* X86_TUNE_QIMODE_MATH */
1849 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1850 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1851 might be considered for Generic32 if our scheme for avoiding partial
1852 stalls was more effective. */
1855 /* X86_TUNE_PROMOTE_QI_REGS */
1858 /* X86_TUNE_PROMOTE_HI_REGS */
1861 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
1862 over esp addition. */
1863 m_386
| m_486
| m_PENT
| m_PPRO
,
1865 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
1866 over esp addition. */
1869 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
1870 over esp subtraction. */
1871 m_386
| m_486
| m_PENT
| m_K6_GEODE
,
1873 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
1874 over esp subtraction. */
1875 m_PENT
| m_K6_GEODE
,
1877 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1878 for DFmode copies */
1879 ~(m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GEODE
| m_AMD_MULTIPLE
| m_ATOM
| m_GENERIC
),
1881 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1882 m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
1884 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1885 conflict here in between PPro/Pentium4 based chips that thread 128bit
1886 SSE registers as single units versus K8 based chips that divide SSE
1887 registers to two 64bit halves. This knob promotes all store destinations
1888 to be 128bit to allow register renaming on 128bit SSE units, but usually
1889 results in one extra microop on 64bit SSE units. Experimental results
1890 shows that disabling this option on P4 brings over 20% SPECfp regression,
1891 while enabling it on K8 brings roughly 2.4% regression that can be partly
1892 masked by careful scheduling of moves. */
1893 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMDFAM10
| m_BDVER
| m_GENERIC
,
1895 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
1896 m_COREI7
| m_AMDFAM10
| m_BDVER
| m_BTVER
,
1898 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
1901 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
1904 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1905 are resolved on SSE register parts instead of whole registers, so we may
1906 maintain just lower part of scalar values in proper format leaving the
1907 upper part undefined. */
1910 /* X86_TUNE_SSE_TYPELESS_STORES */
1913 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1914 m_PPRO
| m_P4_NOCONA
,
1916 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1917 m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
1919 /* X86_TUNE_PROLOGUE_USING_MOVE */
1920 m_PPRO
| m_ATHLON_K8
,
1922 /* X86_TUNE_EPILOGUE_USING_MOVE */
1923 m_PPRO
| m_ATHLON_K8
,
1925 /* X86_TUNE_SHIFT1 */
1928 /* X86_TUNE_USE_FFREEP */
1931 /* X86_TUNE_INTER_UNIT_MOVES */
1932 ~(m_AMD_MULTIPLE
| m_GENERIC
),
1934 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1935 ~(m_AMDFAM10
| m_BDVER
),
1937 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1938 than 4 branch instructions in the 16 byte window. */
1939 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
1941 /* X86_TUNE_SCHEDULE */
1942 m_PENT
| m_PPRO
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1944 /* X86_TUNE_USE_BT */
1945 m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
1947 /* X86_TUNE_USE_INCDEC */
1948 ~(m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GENERIC
),
1950 /* X86_TUNE_PAD_RETURNS */
1951 m_CORE2I7
| m_AMD_MULTIPLE
| m_GENERIC
,
1953 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
1956 /* X86_TUNE_EXT_80387_CONSTANTS */
1957 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_ATHLON_K8
| m_GENERIC
,
1959 /* X86_TUNE_AVOID_VECTOR_DECODE */
1960 m_CORE2I7
| m_K8
| m_GENERIC64
,
1962 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1963 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1966 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1967 vector path on AMD machines. */
1968 m_CORE2I7
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER
| m_GENERIC64
,
1970 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1972 m_CORE2I7
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER
| m_GENERIC64
,
1974 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1978 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1979 but one byte longer. */
1982 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1983 operand that cannot be represented using a modRM byte. The XOR
1984 replacement is long decoded, so this split helps here as well. */
1987 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1989 m_CORE2I7
| m_AMDFAM10
| m_GENERIC
,
1991 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1992 from integer to FP. */
1995 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1996 with a subsequent conditional jump instruction into a single
1997 compare-and-branch uop. */
2000 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2001 will impact LEA instruction selection. */
2004 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2008 /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
2009 at -O3. For the moment, the prefetching seems badly tuned for Intel
2011 m_K6_GEODE
| m_AMD_MULTIPLE
,
2013 /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
2014 the auto-vectorizer. */
2017 /* X86_TUNE_REASSOC_INT_TO_PARALLEL: Try to produce parallel computations
2018 during reassociation of integer computation. */
2021 /* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
2022 during reassociation of fp computation. */
2025 /* X86_TUNE_GENERAL_REGS_SSE_SPILL: Try to spill general regs to SSE
2026 regs instead of memory. */
2027 m_COREI7
| m_CORE2I7
,
2029 /* X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE: Try to avoid memory operands for
2030 a conditional move. */
2034 /* Feature tests against the various architecture variations. */
2035 unsigned char ix86_arch_features
[X86_ARCH_LAST
];
2037 /* Feature tests against the various architecture variations, used to create
2038 ix86_arch_features based on the processor mask. */
2039 static unsigned int initial_ix86_arch_features
[X86_ARCH_LAST
] = {
2040 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2041 ~(m_386
| m_486
| m_PENT
| m_K6
),
2043 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2046 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2049 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2052 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2056 static const unsigned int x86_accumulate_outgoing_args
2057 = m_PPRO
| m_P4_NOCONA
| m_ATOM
| m_CORE2I7
| m_AMD_MULTIPLE
| m_GENERIC
;
2059 static const unsigned int x86_arch_always_fancy_math_387
2060 = m_PENT
| m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
;
2062 static const unsigned int x86_avx256_split_unaligned_load
2063 = m_COREI7
| m_GENERIC
;
2065 static const unsigned int x86_avx256_split_unaligned_store
2066 = m_COREI7
| m_BDVER
| m_GENERIC
;
2068 /* In case the average insn count for single function invocation is
2069 lower than this constant, emit fast (but longer) prologue and
2071 #define FAST_PROLOGUE_INSN_COUNT 20
2073 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2074 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
2075 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
2076 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
2078 /* Array of the smallest class containing reg number REGNO, indexed by
2079 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2081 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
2083 /* ax, dx, cx, bx */
2084 AREG
, DREG
, CREG
, BREG
,
2085 /* si, di, bp, sp */
2086 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
2088 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
2089 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
2092 /* flags, fpsr, fpcr, frame */
2093 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
2095 SSE_FIRST_REG
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2098 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
2101 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2102 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2103 /* SSE REX registers */
2104 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2108 /* The "default" register map used in 32bit mode. */
2110 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2112 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2113 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2114 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2115 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2116 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2117 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2118 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2121 /* The "default" register map used in 64bit mode. */
2123 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
2125 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2126 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2127 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2128 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2129 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2130 8,9,10,11,12,13,14,15, /* extended integer registers */
2131 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2134 /* Define the register numbers to be used in Dwarf debugging information.
2135 The SVR4 reference port C compiler uses the following register numbers
2136 in its Dwarf output code:
2137 0 for %eax (gcc regno = 0)
2138 1 for %ecx (gcc regno = 2)
2139 2 for %edx (gcc regno = 1)
2140 3 for %ebx (gcc regno = 3)
2141 4 for %esp (gcc regno = 7)
2142 5 for %ebp (gcc regno = 6)
2143 6 for %esi (gcc regno = 4)
2144 7 for %edi (gcc regno = 5)
2145 The following three DWARF register numbers are never generated by
2146 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2147 believes these numbers have these meanings.
2148 8 for %eip (no gcc equivalent)
2149 9 for %eflags (gcc regno = 17)
2150 10 for %trapno (no gcc equivalent)
2151 It is not at all clear how we should number the FP stack registers
2152 for the x86 architecture. If the version of SDB on x86/svr4 were
2153 a bit less brain dead with respect to floating-point then we would
2154 have a precedent to follow with respect to DWARF register numbers
2155 for x86 FP registers, but the SDB on x86/svr4 is so completely
2156 broken with respect to FP registers that it is hardly worth thinking
2157 of it as something to strive for compatibility with.
2158 The version of x86/svr4 SDB I have at the moment does (partially)
2159 seem to believe that DWARF register number 11 is associated with
2160 the x86 register %st(0), but that's about all. Higher DWARF
2161 register numbers don't seem to be associated with anything in
2162 particular, and even for DWARF regno 11, SDB only seems to under-
2163 stand that it should say that a variable lives in %st(0) (when
2164 asked via an `=' command) if we said it was in DWARF regno 11,
2165 but SDB still prints garbage when asked for the value of the
2166 variable in question (via a `/' command).
2167 (Also note that the labels SDB prints for various FP stack regs
2168 when doing an `x' command are all wrong.)
2169 Note that these problems generally don't affect the native SVR4
2170 C compiler because it doesn't allow the use of -O with -g and
2171 because when it is *not* optimizing, it allocates a memory
2172 location for each floating-point variable, and the memory
2173 location is what gets described in the DWARF AT_location
2174 attribute for the variable in question.
2175 Regardless of the severe mental illness of the x86/svr4 SDB, we
2176 do something sensible here and we use the following DWARF
2177 register numbers. Note that these are all stack-top-relative
2179 11 for %st(0) (gcc regno = 8)
2180 12 for %st(1) (gcc regno = 9)
2181 13 for %st(2) (gcc regno = 10)
2182 14 for %st(3) (gcc regno = 11)
2183 15 for %st(4) (gcc regno = 12)
2184 16 for %st(5) (gcc regno = 13)
2185 17 for %st(6) (gcc regno = 14)
2186 18 for %st(7) (gcc regno = 15)
2188 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2190 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2191 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2192 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2193 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2194 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2195 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2196 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2199 /* Define parameter passing and return registers. */
2201 static int const x86_64_int_parameter_registers
[6] =
2203 DI_REG
, SI_REG
, DX_REG
, CX_REG
, R8_REG
, R9_REG
2206 static int const x86_64_ms_abi_int_parameter_registers
[4] =
2208 CX_REG
, DX_REG
, R8_REG
, R9_REG
2211 static int const x86_64_int_return_registers
[4] =
2213 AX_REG
, DX_REG
, DI_REG
, SI_REG
2216 /* Define the structure for the machine field in struct function. */
2218 struct GTY(()) stack_local_entry
{
2219 unsigned short mode
;
2222 struct stack_local_entry
*next
;
2225 /* Structure describing stack frame layout.
2226 Stack grows downward:
2232 saved static chain if ix86_static_chain_on_stack
2234 saved frame pointer if frame_pointer_needed
2235 <- HARD_FRAME_POINTER
2241 <- sse_regs_save_offset
2244 [va_arg registers] |
2248 [padding2] | = to_allocate
2257 int outgoing_arguments_size
;
2259 /* The offsets relative to ARG_POINTER. */
2260 HOST_WIDE_INT frame_pointer_offset
;
2261 HOST_WIDE_INT hard_frame_pointer_offset
;
2262 HOST_WIDE_INT stack_pointer_offset
;
2263 HOST_WIDE_INT hfp_save_offset
;
2264 HOST_WIDE_INT reg_save_offset
;
2265 HOST_WIDE_INT sse_reg_save_offset
;
2267 /* When save_regs_using_mov is set, emit prologue using
2268 move instead of push instructions. */
2269 bool save_regs_using_mov
;
2272 /* Which cpu are we scheduling for. */
2273 enum attr_cpu ix86_schedule
;
2275 /* Which cpu are we optimizing for. */
2276 enum processor_type ix86_tune
;
2278 /* Which instruction set architecture to use. */
2279 enum processor_type ix86_arch
;
2281 /* True if processor has SSE prefetch instruction. */
2282 unsigned char x86_prefetch_sse
;
2284 /* -mstackrealign option */
2285 static const char ix86_force_align_arg_pointer_string
[]
2286 = "force_align_arg_pointer";
2288 static rtx (*ix86_gen_leave
) (void);
2289 static rtx (*ix86_gen_add3
) (rtx
, rtx
, rtx
);
2290 static rtx (*ix86_gen_sub3
) (rtx
, rtx
, rtx
);
2291 static rtx (*ix86_gen_sub3_carry
) (rtx
, rtx
, rtx
, rtx
, rtx
);
2292 static rtx (*ix86_gen_one_cmpl2
) (rtx
, rtx
);
2293 static rtx (*ix86_gen_monitor
) (rtx
, rtx
, rtx
);
2294 static rtx (*ix86_gen_andsp
) (rtx
, rtx
, rtx
);
2295 static rtx (*ix86_gen_allocate_stack_worker
) (rtx
, rtx
);
2296 static rtx (*ix86_gen_adjust_stack_and_probe
) (rtx
, rtx
, rtx
);
2297 static rtx (*ix86_gen_probe_stack_range
) (rtx
, rtx
, rtx
);
2298 static rtx (*ix86_gen_tls_global_dynamic_64
) (rtx
, rtx
, rtx
);
2299 static rtx (*ix86_gen_tls_local_dynamic_base_64
) (rtx
, rtx
);
2301 /* Preferred alignment for stack boundary in bits. */
2302 unsigned int ix86_preferred_stack_boundary
;
2304 /* Alignment for incoming stack boundary in bits specified at
2306 static unsigned int ix86_user_incoming_stack_boundary
;
2308 /* Default alignment for incoming stack boundary in bits. */
2309 static unsigned int ix86_default_incoming_stack_boundary
;
2311 /* Alignment for incoming stack boundary in bits. */
2312 unsigned int ix86_incoming_stack_boundary
;
2314 /* Calling abi specific va_list type nodes. */
2315 static GTY(()) tree sysv_va_list_type_node
;
2316 static GTY(()) tree ms_va_list_type_node
;
2318 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2319 char internal_label_prefix
[16];
2320 int internal_label_prefix_len
;
2322 /* Fence to use after loop using movnt. */
2325 /* Register class used for passing given 64bit part of the argument.
2326 These represent classes as documented by the PS ABI, with the exception
2327 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2328 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2330 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2331 whenever possible (upper half does contain padding). */
2332 enum x86_64_reg_class
2335 X86_64_INTEGER_CLASS
,
2336 X86_64_INTEGERSI_CLASS
,
2343 X86_64_COMPLEX_X87_CLASS
,
2347 #define MAX_CLASSES 4
2349 /* Table of constants used by fldpi, fldln2, etc.... */
2350 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
2351 static bool ext_80387_constants_init
= 0;
2354 static struct machine_function
* ix86_init_machine_status (void);
2355 static rtx
ix86_function_value (const_tree
, const_tree
, bool);
2356 static bool ix86_function_value_regno_p (const unsigned int);
2357 static unsigned int ix86_function_arg_boundary (enum machine_mode
,
2359 static rtx
ix86_static_chain (const_tree
, bool);
2360 static int ix86_function_regparm (const_tree
, const_tree
);
2361 static void ix86_compute_frame_layout (struct ix86_frame
*);
2362 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
2364 static void ix86_add_new_builtins (HOST_WIDE_INT
);
2365 static tree
ix86_canonical_va_list_type (tree
);
2366 static void predict_jump (int);
2367 static unsigned int split_stack_prologue_scratch_regno (void);
2368 static bool i386_asm_output_addr_const_extra (FILE *, rtx
);
2370 enum ix86_function_specific_strings
2372 IX86_FUNCTION_SPECIFIC_ARCH
,
2373 IX86_FUNCTION_SPECIFIC_TUNE
,
2374 IX86_FUNCTION_SPECIFIC_MAX
2377 static char *ix86_target_string (HOST_WIDE_INT
, int, const char *,
2378 const char *, enum fpmath_unit
, bool);
2379 static void ix86_debug_options (void) ATTRIBUTE_UNUSED
;
2380 static void ix86_function_specific_save (struct cl_target_option
*);
2381 static void ix86_function_specific_restore (struct cl_target_option
*);
2382 static void ix86_function_specific_print (FILE *, int,
2383 struct cl_target_option
*);
2384 static bool ix86_valid_target_attribute_p (tree
, tree
, tree
, int);
2385 static bool ix86_valid_target_attribute_inner_p (tree
, char *[],
2386 struct gcc_options
*);
2387 static bool ix86_can_inline_p (tree
, tree
);
2388 static void ix86_set_current_function (tree
);
2389 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2391 static enum calling_abi
ix86_function_abi (const_tree
);
2394 #ifndef SUBTARGET32_DEFAULT_CPU
2395 #define SUBTARGET32_DEFAULT_CPU "i386"
2398 /* Whether -mtune= or -march= were specified */
2399 static int ix86_tune_defaulted
;
2400 static int ix86_arch_specified
;
2402 /* Vectorization library interface and handlers. */
2403 static tree (*ix86_veclib_handler
) (enum built_in_function
, tree
, tree
);
2405 static tree
ix86_veclibabi_svml (enum built_in_function
, tree
, tree
);
2406 static tree
ix86_veclibabi_acml (enum built_in_function
, tree
, tree
);
2408 /* Processor target table, indexed by processor number */
2411 const struct processor_costs
*cost
; /* Processor costs */
2412 const int align_loop
; /* Default alignments. */
2413 const int align_loop_max_skip
;
2414 const int align_jump
;
2415 const int align_jump_max_skip
;
2416 const int align_func
;
2419 static const struct ptt processor_target_table
[PROCESSOR_max
] =
2421 {&i386_cost
, 4, 3, 4, 3, 4},
2422 {&i486_cost
, 16, 15, 16, 15, 16},
2423 {&pentium_cost
, 16, 7, 16, 7, 16},
2424 {&pentiumpro_cost
, 16, 15, 16, 10, 16},
2425 {&geode_cost
, 0, 0, 0, 0, 0},
2426 {&k6_cost
, 32, 7, 32, 7, 32},
2427 {&athlon_cost
, 16, 7, 16, 7, 16},
2428 {&pentium4_cost
, 0, 0, 0, 0, 0},
2429 {&k8_cost
, 16, 7, 16, 7, 16},
2430 {&nocona_cost
, 0, 0, 0, 0, 0},
2432 {&core_cost
, 16, 10, 16, 10, 16},
2434 {&core_cost
, 16, 10, 16, 10, 16},
2435 {&generic32_cost
, 16, 7, 16, 7, 16},
2436 {&generic64_cost
, 16, 10, 16, 10, 16},
2437 {&amdfam10_cost
, 32, 24, 32, 7, 32},
2438 {&bdver1_cost
, 32, 24, 32, 7, 32},
2439 {&bdver2_cost
, 32, 24, 32, 7, 32},
2440 {&bdver3_cost
, 32, 24, 32, 7, 32},
2441 {&btver1_cost
, 32, 24, 32, 7, 32},
2442 {&btver2_cost
, 32, 24, 32, 7, 32},
2443 {&atom_cost
, 16, 15, 16, 7, 16}
2446 static const char *const cpu_names
[TARGET_CPU_DEFAULT_max
] =
2479 gate_insert_vzeroupper (void)
2481 return TARGET_VZEROUPPER
;
2485 rest_of_handle_insert_vzeroupper (void)
2489 /* vzeroupper instructions are inserted immediately after reload to
2490 account for possible spills from 256bit registers. The pass
2491 reuses mode switching infrastructure by re-running mode insertion
2492 pass, so disable entities that have already been processed. */
2493 for (i
= 0; i
< MAX_386_ENTITIES
; i
++)
2494 ix86_optimize_mode_switching
[i
] = 0;
2496 ix86_optimize_mode_switching
[AVX_U128
] = 1;
2498 /* Call optimize_mode_switching. */
2499 pass_mode_switching
.pass
.execute ();
2503 struct rtl_opt_pass pass_insert_vzeroupper
=
2507 "vzeroupper", /* name */
2508 OPTGROUP_NONE
, /* optinfo_flags */
2509 gate_insert_vzeroupper
, /* gate */
2510 rest_of_handle_insert_vzeroupper
, /* execute */
2513 0, /* static_pass_number */
2514 TV_NONE
, /* tv_id */
2515 0, /* properties_required */
2516 0, /* properties_provided */
2517 0, /* properties_destroyed */
2518 0, /* todo_flags_start */
2519 TODO_df_finish
| TODO_verify_rtl_sharing
|
2520 0, /* todo_flags_finish */
2524 /* Return true if a red-zone is in use. */
2527 ix86_using_red_zone (void)
2529 return TARGET_RED_ZONE
&& !TARGET_64BIT_MS_ABI
;
2532 /* Return a string that documents the current -m options. The caller is
2533 responsible for freeing the string. */
2536 ix86_target_string (HOST_WIDE_INT isa
, int flags
, const char *arch
,
2537 const char *tune
, enum fpmath_unit fpmath
,
2540 struct ix86_target_opts
2542 const char *option
; /* option string */
2543 HOST_WIDE_INT mask
; /* isa mask options */
2546 /* This table is ordered so that options like -msse4.2 that imply
2547 preceding options while match those first. */
2548 static struct ix86_target_opts isa_opts
[] =
2550 { "-mfma4", OPTION_MASK_ISA_FMA4
},
2551 { "-mfma", OPTION_MASK_ISA_FMA
},
2552 { "-mxop", OPTION_MASK_ISA_XOP
},
2553 { "-mlwp", OPTION_MASK_ISA_LWP
},
2554 { "-msse4a", OPTION_MASK_ISA_SSE4A
},
2555 { "-msse4.2", OPTION_MASK_ISA_SSE4_2
},
2556 { "-msse4.1", OPTION_MASK_ISA_SSE4_1
},
2557 { "-mssse3", OPTION_MASK_ISA_SSSE3
},
2558 { "-msse3", OPTION_MASK_ISA_SSE3
},
2559 { "-msse2", OPTION_MASK_ISA_SSE2
},
2560 { "-msse", OPTION_MASK_ISA_SSE
},
2561 { "-m3dnow", OPTION_MASK_ISA_3DNOW
},
2562 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A
},
2563 { "-mmmx", OPTION_MASK_ISA_MMX
},
2564 { "-mabm", OPTION_MASK_ISA_ABM
},
2565 { "-mbmi", OPTION_MASK_ISA_BMI
},
2566 { "-mbmi2", OPTION_MASK_ISA_BMI2
},
2567 { "-mlzcnt", OPTION_MASK_ISA_LZCNT
},
2568 { "-mhle", OPTION_MASK_ISA_HLE
},
2569 { "-mfxsr", OPTION_MASK_ISA_FXSR
},
2570 { "-mrdseed", OPTION_MASK_ISA_RDSEED
},
2571 { "-mprfchw", OPTION_MASK_ISA_PRFCHW
},
2572 { "-madx", OPTION_MASK_ISA_ADX
},
2573 { "-mtbm", OPTION_MASK_ISA_TBM
},
2574 { "-mpopcnt", OPTION_MASK_ISA_POPCNT
},
2575 { "-mmovbe", OPTION_MASK_ISA_MOVBE
},
2576 { "-mcrc32", OPTION_MASK_ISA_CRC32
},
2577 { "-maes", OPTION_MASK_ISA_AES
},
2578 { "-mpclmul", OPTION_MASK_ISA_PCLMUL
},
2579 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE
},
2580 { "-mrdrnd", OPTION_MASK_ISA_RDRND
},
2581 { "-mf16c", OPTION_MASK_ISA_F16C
},
2582 { "-mrtm", OPTION_MASK_ISA_RTM
},
2583 { "-mxsave", OPTION_MASK_ISA_XSAVE
},
2584 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT
},
2588 static struct ix86_target_opts flag_opts
[] =
2590 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE
},
2591 { "-mlong-double-64", MASK_LONG_DOUBLE_64
},
2592 { "-m80387", MASK_80387
},
2593 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS
},
2594 { "-malign-double", MASK_ALIGN_DOUBLE
},
2595 { "-mcld", MASK_CLD
},
2596 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS
},
2597 { "-mieee-fp", MASK_IEEE_FP
},
2598 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS
},
2599 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY
},
2600 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT
},
2601 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS
},
2602 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387
},
2603 { "-mno-push-args", MASK_NO_PUSH_ARGS
},
2604 { "-mno-red-zone", MASK_NO_RED_ZONE
},
2605 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER
},
2606 { "-mrecip", MASK_RECIP
},
2607 { "-mrtd", MASK_RTD
},
2608 { "-msseregparm", MASK_SSEREGPARM
},
2609 { "-mstack-arg-probe", MASK_STACK_PROBE
},
2610 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS
},
2611 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS
},
2612 { "-m8bit-idiv", MASK_USE_8BIT_IDIV
},
2613 { "-mvzeroupper", MASK_VZEROUPPER
},
2614 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD
},
2615 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE
},
2616 { "-mprefer-avx128", MASK_PREFER_AVX128
},
2619 const char *opts
[ARRAY_SIZE (isa_opts
) + ARRAY_SIZE (flag_opts
) + 6][2];
2622 char target_other
[40];
2632 memset (opts
, '\0', sizeof (opts
));
2634 /* Add -march= option. */
2637 opts
[num
][0] = "-march=";
2638 opts
[num
++][1] = arch
;
2641 /* Add -mtune= option. */
2644 opts
[num
][0] = "-mtune=";
2645 opts
[num
++][1] = tune
;
2648 /* Add -m32/-m64/-mx32. */
2649 if ((isa
& OPTION_MASK_ISA_64BIT
) != 0)
2651 if ((isa
& OPTION_MASK_ABI_64
) != 0)
2655 isa
&= ~ (OPTION_MASK_ISA_64BIT
2656 | OPTION_MASK_ABI_64
2657 | OPTION_MASK_ABI_X32
);
2661 opts
[num
++][0] = abi
;
2663 /* Pick out the options in isa options. */
2664 for (i
= 0; i
< ARRAY_SIZE (isa_opts
); i
++)
2666 if ((isa
& isa_opts
[i
].mask
) != 0)
2668 opts
[num
++][0] = isa_opts
[i
].option
;
2669 isa
&= ~ isa_opts
[i
].mask
;
2673 if (isa
&& add_nl_p
)
2675 opts
[num
++][0] = isa_other
;
2676 sprintf (isa_other
, "(other isa: %#" HOST_WIDE_INT_PRINT
"x)",
2680 /* Add flag options. */
2681 for (i
= 0; i
< ARRAY_SIZE (flag_opts
); i
++)
2683 if ((flags
& flag_opts
[i
].mask
) != 0)
2685 opts
[num
++][0] = flag_opts
[i
].option
;
2686 flags
&= ~ flag_opts
[i
].mask
;
2690 if (flags
&& add_nl_p
)
2692 opts
[num
++][0] = target_other
;
2693 sprintf (target_other
, "(other flags: %#x)", flags
);
2696 /* Add -fpmath= option. */
2699 opts
[num
][0] = "-mfpmath=";
2700 switch ((int) fpmath
)
2703 opts
[num
++][1] = "387";
2707 opts
[num
++][1] = "sse";
2710 case FPMATH_387
| FPMATH_SSE
:
2711 opts
[num
++][1] = "sse+387";
2723 gcc_assert (num
< ARRAY_SIZE (opts
));
2725 /* Size the string. */
2727 sep_len
= (add_nl_p
) ? 3 : 1;
2728 for (i
= 0; i
< num
; i
++)
2731 for (j
= 0; j
< 2; j
++)
2733 len
+= strlen (opts
[i
][j
]);
2736 /* Build the string. */
2737 ret
= ptr
= (char *) xmalloc (len
);
2740 for (i
= 0; i
< num
; i
++)
2744 for (j
= 0; j
< 2; j
++)
2745 len2
[j
] = (opts
[i
][j
]) ? strlen (opts
[i
][j
]) : 0;
2752 if (add_nl_p
&& line_len
+ len2
[0] + len2
[1] > 70)
2760 for (j
= 0; j
< 2; j
++)
2763 memcpy (ptr
, opts
[i
][j
], len2
[j
]);
2765 line_len
+= len2
[j
];
2770 gcc_assert (ret
+ len
>= ptr
);
2775 /* Return true, if profiling code should be emitted before
2776 prologue. Otherwise it returns false.
2777 Note: For x86 with "hotfix" it is sorried. */
2779 ix86_profile_before_prologue (void)
2781 return flag_fentry
!= 0;
2784 /* Function that is callable from the debugger to print the current
2787 ix86_debug_options (void)
2789 char *opts
= ix86_target_string (ix86_isa_flags
, target_flags
,
2790 ix86_arch_string
, ix86_tune_string
,
2795 fprintf (stderr
, "%s\n\n", opts
);
2799 fputs ("<no options>\n\n", stderr
);
2804 /* Override various settings based on options. If MAIN_ARGS_P, the
2805 options are from the command line, otherwise they are from
2809 ix86_option_override_internal (bool main_args_p
)
2812 unsigned int ix86_arch_mask
, ix86_tune_mask
;
2813 const bool ix86_tune_specified
= (ix86_tune_string
!= NULL
);
2818 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
2819 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
2820 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
2821 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
2822 #define PTA_AES (HOST_WIDE_INT_1 << 4)
2823 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
2824 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
2825 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
2826 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
2827 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
2828 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
2829 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
2830 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
2831 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
2832 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
2833 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
2834 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
2835 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
2836 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
2837 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
2838 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
2839 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
2840 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
2841 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
2842 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
2843 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
2844 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
2845 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
2846 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
2847 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
2848 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
2849 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
2850 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
2851 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
2852 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
2853 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
2854 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
2855 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
2856 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
2857 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
2859 /* if this reaches 64, need to widen struct pta flags below */
2863 const char *const name
; /* processor name or nickname. */
2864 const enum processor_type processor
;
2865 const enum attr_cpu schedule
;
2866 const unsigned HOST_WIDE_INT flags
;
2868 const processor_alias_table
[] =
2870 {"i386", PROCESSOR_I386
, CPU_NONE
, 0},
2871 {"i486", PROCESSOR_I486
, CPU_NONE
, 0},
2872 {"i586", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
2873 {"pentium", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
2874 {"pentium-mmx", PROCESSOR_PENTIUM
, CPU_PENTIUM
, PTA_MMX
},
2875 {"winchip-c6", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
},
2876 {"winchip2", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
2877 {"c3", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
2878 {"c3-2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
| PTA_SSE
},
2879 {"i686", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
2880 {"pentiumpro", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
2881 {"pentium2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
| PTA_FXSR
},
2882 {"pentium3", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2883 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
2884 {"pentium3m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2885 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
2886 {"pentium-m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2887 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
2888 {"pentium4", PROCESSOR_PENTIUM4
, CPU_NONE
,
2889 PTA_MMX
|PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
2890 {"pentium4m", PROCESSOR_PENTIUM4
, CPU_NONE
,
2891 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
2892 {"prescott", PROCESSOR_NOCONA
, CPU_NONE
,
2893 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_FXSR
},
2894 {"nocona", PROCESSOR_NOCONA
, CPU_NONE
,
2895 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2896 | PTA_CX16
| PTA_NO_SAHF
| PTA_FXSR
},
2897 {"core2", PROCESSOR_CORE2
, CPU_CORE2
,
2898 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2899 | PTA_SSSE3
| PTA_CX16
| PTA_FXSR
},
2900 {"corei7", PROCESSOR_COREI7
, CPU_COREI7
,
2901 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2902 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_CX16
| PTA_FXSR
},
2903 {"corei7-avx", PROCESSOR_COREI7
, CPU_COREI7
,
2904 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2905 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
2906 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
2907 | PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
2908 {"core-avx-i", PROCESSOR_COREI7
, CPU_COREI7
,
2909 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2910 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
2911 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
2912 | PTA_RDRND
| PTA_F16C
| PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
2913 {"core-avx2", PROCESSOR_COREI7
, CPU_COREI7
,
2914 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2915 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
| PTA_AVX2
2916 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
2917 | PTA_RDRND
| PTA_F16C
| PTA_BMI
| PTA_BMI2
| PTA_LZCNT
2918 | PTA_FMA
| PTA_MOVBE
| PTA_RTM
| PTA_HLE
| PTA_FXSR
| PTA_XSAVE
2920 {"atom", PROCESSOR_ATOM
, CPU_ATOM
,
2921 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2922 | PTA_SSSE3
| PTA_CX16
| PTA_MOVBE
| PTA_FXSR
},
2923 {"geode", PROCESSOR_GEODE
, CPU_GEODE
,
2924 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
2925 {"k6", PROCESSOR_K6
, CPU_K6
, PTA_MMX
},
2926 {"k6-2", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
},
2927 {"k6-3", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
},
2928 {"athlon", PROCESSOR_ATHLON
, CPU_ATHLON
,
2929 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
2930 {"athlon-tbird", PROCESSOR_ATHLON
, CPU_ATHLON
,
2931 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
2932 {"athlon-4", PROCESSOR_ATHLON
, CPU_ATHLON
,
2933 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
2934 {"athlon-xp", PROCESSOR_ATHLON
, CPU_ATHLON
,
2935 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
2936 {"athlon-mp", PROCESSOR_ATHLON
, CPU_ATHLON
,
2937 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
2938 {"x86-64", PROCESSOR_K8
, CPU_K8
,
2939 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_NO_SAHF
},
2940 {"k8", PROCESSOR_K8
, CPU_K8
,
2941 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2942 | PTA_SSE2
| PTA_NO_SAHF
},
2943 {"k8-sse3", PROCESSOR_K8
, CPU_K8
,
2944 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2945 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
2946 {"opteron", PROCESSOR_K8
, CPU_K8
,
2947 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2948 | PTA_SSE2
| PTA_NO_SAHF
},
2949 {"opteron-sse3", PROCESSOR_K8
, CPU_K8
,
2950 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2951 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
2952 {"athlon64", PROCESSOR_K8
, CPU_K8
,
2953 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2954 | PTA_SSE2
| PTA_NO_SAHF
},
2955 {"athlon64-sse3", PROCESSOR_K8
, CPU_K8
,
2956 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2957 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
2958 {"athlon-fx", PROCESSOR_K8
, CPU_K8
,
2959 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2960 | PTA_SSE2
| PTA_NO_SAHF
},
2961 {"amdfam10", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
2962 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2963 | PTA_SSE2
| PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
},
2964 {"barcelona", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
2965 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2966 | PTA_SSE2
| PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
},
2967 {"bdver1", PROCESSOR_BDVER1
, CPU_BDVER1
,
2968 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2969 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
2970 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
2971 | PTA_XOP
| PTA_LWP
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
},
2972 {"bdver2", PROCESSOR_BDVER2
, CPU_BDVER2
,
2973 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2974 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
2975 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
2976 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
2977 | PTA_FMA
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
},
2978 {"bdver3", PROCESSOR_BDVER3
, CPU_BDVER3
,
2979 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2980 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
2981 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
2982 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
2983 | PTA_FMA
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
2985 {"btver1", PROCESSOR_BTVER1
, CPU_GENERIC64
,
2986 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2987 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
| PTA_PRFCHW
2988 | PTA_FXSR
| PTA_XSAVE
},
2989 {"btver2", PROCESSOR_BTVER2
, CPU_GENERIC64
,
2990 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2991 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
| PTA_SSE4_1
2992 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
2993 | PTA_BMI
| PTA_F16C
| PTA_MOVBE
| PTA_PRFCHW
2994 | PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
2996 {"generic32", PROCESSOR_GENERIC32
, CPU_PENTIUMPRO
,
2997 PTA_HLE
/* flags are only used for -march switch. */ },
2998 {"generic64", PROCESSOR_GENERIC64
, CPU_GENERIC64
,
3000 | PTA_HLE
/* flags are only used for -march switch. */ },
3003 /* -mrecip options. */
3006 const char *string
; /* option name */
3007 unsigned int mask
; /* mask bits to set */
3009 const recip_options
[] =
3011 { "all", RECIP_MASK_ALL
},
3012 { "none", RECIP_MASK_NONE
},
3013 { "div", RECIP_MASK_DIV
},
3014 { "sqrt", RECIP_MASK_SQRT
},
3015 { "vec-div", RECIP_MASK_VEC_DIV
},
3016 { "vec-sqrt", RECIP_MASK_VEC_SQRT
},
3019 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
3021 /* Set up prefix/suffix so the error messages refer to either the command
3022 line argument, or the attribute(target). */
3031 prefix
= "option(\"";
3036 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3037 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3038 if (TARGET_64BIT_DEFAULT
&& !TARGET_64BIT
)
3039 ix86_isa_flags
&= ~(OPTION_MASK_ABI_64
| OPTION_MASK_ABI_X32
);
3040 #ifdef TARGET_BI_ARCH
3043 #if TARGET_BI_ARCH == 1
3044 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3045 is on and OPTION_MASK_ABI_X32 is off. We turn off
3046 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3049 ix86_isa_flags
&= ~OPTION_MASK_ABI_64
;
3051 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3052 on and OPTION_MASK_ABI_64 is off. We turn off
3053 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3056 ix86_isa_flags
&= ~OPTION_MASK_ABI_X32
;
3063 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3064 OPTION_MASK_ABI_64 for TARGET_X32. */
3065 ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3066 ix86_isa_flags
&= ~OPTION_MASK_ABI_64
;
3068 else if (TARGET_LP64
)
3070 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3071 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3072 ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3073 ix86_isa_flags
&= ~OPTION_MASK_ABI_X32
;
3076 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3077 SUBTARGET_OVERRIDE_OPTIONS
;
3080 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3081 SUBSUBTARGET_OVERRIDE_OPTIONS
;
3084 /* -fPIC is the default for x86_64. */
3085 if (TARGET_MACHO
&& TARGET_64BIT
)
3088 /* Need to check -mtune=generic first. */
3089 if (ix86_tune_string
)
3091 if (!strcmp (ix86_tune_string
, "generic")
3092 || !strcmp (ix86_tune_string
, "i686")
3093 /* As special support for cross compilers we read -mtune=native
3094 as -mtune=generic. With native compilers we won't see the
3095 -mtune=native, as it was changed by the driver. */
3096 || !strcmp (ix86_tune_string
, "native"))
3099 ix86_tune_string
= "generic64";
3101 ix86_tune_string
= "generic32";
3103 /* If this call is for setting the option attribute, allow the
3104 generic32/generic64 that was previously set. */
3105 else if (!main_args_p
3106 && (!strcmp (ix86_tune_string
, "generic32")
3107 || !strcmp (ix86_tune_string
, "generic64")))
3109 else if (!strncmp (ix86_tune_string
, "generic", 7))
3110 error ("bad value (%s) for %stune=%s %s",
3111 ix86_tune_string
, prefix
, suffix
, sw
);
3112 else if (!strcmp (ix86_tune_string
, "x86-64"))
3113 warning (OPT_Wdeprecated
, "%stune=x86-64%s is deprecated; use "
3114 "%stune=k8%s or %stune=generic%s instead as appropriate",
3115 prefix
, suffix
, prefix
, suffix
, prefix
, suffix
);
3119 if (ix86_arch_string
)
3120 ix86_tune_string
= ix86_arch_string
;
3121 if (!ix86_tune_string
)
3123 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
3124 ix86_tune_defaulted
= 1;
3127 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3128 need to use a sensible tune option. */
3129 if (!strcmp (ix86_tune_string
, "generic")
3130 || !strcmp (ix86_tune_string
, "x86-64")
3131 || !strcmp (ix86_tune_string
, "i686"))
3134 ix86_tune_string
= "generic64";
3136 ix86_tune_string
= "generic32";
3140 if (ix86_stringop_alg
== rep_prefix_8_byte
&& !TARGET_64BIT
)
3142 /* rep; movq isn't available in 32-bit code. */
3143 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3144 ix86_stringop_alg
= no_stringop
;
3147 if (!ix86_arch_string
)
3148 ix86_arch_string
= TARGET_64BIT
? "x86-64" : SUBTARGET32_DEFAULT_CPU
;
3150 ix86_arch_specified
= 1;
3152 if (global_options_set
.x_ix86_pmode
)
3154 if ((TARGET_LP64
&& ix86_pmode
== PMODE_SI
)
3155 || (!TARGET_64BIT
&& ix86_pmode
== PMODE_DI
))
3156 error ("address mode %qs not supported in the %s bit mode",
3157 TARGET_64BIT
? "short" : "long",
3158 TARGET_64BIT
? "64" : "32");
3161 ix86_pmode
= TARGET_LP64
? PMODE_DI
: PMODE_SI
;
3163 if (!global_options_set
.x_ix86_abi
)
3164 ix86_abi
= DEFAULT_ABI
;
3166 if (global_options_set
.x_ix86_cmodel
)
3168 switch (ix86_cmodel
)
3173 ix86_cmodel
= CM_SMALL_PIC
;
3175 error ("code model %qs not supported in the %s bit mode",
3182 ix86_cmodel
= CM_MEDIUM_PIC
;
3184 error ("code model %qs not supported in the %s bit mode",
3186 else if (TARGET_X32
)
3187 error ("code model %qs not supported in x32 mode",
3194 ix86_cmodel
= CM_LARGE_PIC
;
3196 error ("code model %qs not supported in the %s bit mode",
3198 else if (TARGET_X32
)
3199 error ("code model %qs not supported in x32 mode",
3205 error ("code model %s does not support PIC mode", "32");
3207 error ("code model %qs not supported in the %s bit mode",
3214 error ("code model %s does not support PIC mode", "kernel");
3215 ix86_cmodel
= CM_32
;
3218 error ("code model %qs not supported in the %s bit mode",
3228 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3229 use of rip-relative addressing. This eliminates fixups that
3230 would otherwise be needed if this object is to be placed in a
3231 DLL, and is essentially just as efficient as direct addressing. */
3232 if (TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
3233 ix86_cmodel
= CM_SMALL_PIC
, flag_pic
= 1;
3234 else if (TARGET_64BIT
)
3235 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
3237 ix86_cmodel
= CM_32
;
3239 if (TARGET_MACHO
&& ix86_asm_dialect
== ASM_INTEL
)
3241 error ("-masm=intel not supported in this configuration");
3242 ix86_asm_dialect
= ASM_ATT
;
3244 if ((TARGET_64BIT
!= 0) != ((ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) != 0))
3245 sorry ("%i-bit mode not compiled in",
3246 (ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) ? 64 : 32);
3248 for (i
= 0; i
< pta_size
; i
++)
3249 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
3251 ix86_schedule
= processor_alias_table
[i
].schedule
;
3252 ix86_arch
= processor_alias_table
[i
].processor
;
3253 /* Default cpu tuning to the architecture. */
3254 ix86_tune
= ix86_arch
;
3256 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
3257 error ("CPU you selected does not support x86-64 "
3260 if (processor_alias_table
[i
].flags
& PTA_MMX
3261 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MMX
))
3262 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
;
3263 if (processor_alias_table
[i
].flags
& PTA_3DNOW
3264 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW
))
3265 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW
;
3266 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
3267 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW_A
))
3268 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW_A
;
3269 if (processor_alias_table
[i
].flags
& PTA_SSE
3270 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE
))
3271 ix86_isa_flags
|= OPTION_MASK_ISA_SSE
;
3272 if (processor_alias_table
[i
].flags
& PTA_SSE2
3273 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE2
))
3274 ix86_isa_flags
|= OPTION_MASK_ISA_SSE2
;
3275 if (processor_alias_table
[i
].flags
& PTA_SSE3
3276 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE3
))
3277 ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
3278 if (processor_alias_table
[i
].flags
& PTA_SSSE3
3279 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSSE3
))
3280 ix86_isa_flags
|= OPTION_MASK_ISA_SSSE3
;
3281 if (processor_alias_table
[i
].flags
& PTA_SSE4_1
3282 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_1
))
3283 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_1
;
3284 if (processor_alias_table
[i
].flags
& PTA_SSE4_2
3285 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_2
))
3286 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_2
;
3287 if (processor_alias_table
[i
].flags
& PTA_AVX
3288 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX
))
3289 ix86_isa_flags
|= OPTION_MASK_ISA_AVX
;
3290 if (processor_alias_table
[i
].flags
& PTA_AVX2
3291 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX2
))
3292 ix86_isa_flags
|= OPTION_MASK_ISA_AVX2
;
3293 if (processor_alias_table
[i
].flags
& PTA_FMA
3294 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA
))
3295 ix86_isa_flags
|= OPTION_MASK_ISA_FMA
;
3296 if (processor_alias_table
[i
].flags
& PTA_SSE4A
3297 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4A
))
3298 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4A
;
3299 if (processor_alias_table
[i
].flags
& PTA_FMA4
3300 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA4
))
3301 ix86_isa_flags
|= OPTION_MASK_ISA_FMA4
;
3302 if (processor_alias_table
[i
].flags
& PTA_XOP
3303 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_XOP
))
3304 ix86_isa_flags
|= OPTION_MASK_ISA_XOP
;
3305 if (processor_alias_table
[i
].flags
& PTA_LWP
3306 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_LWP
))
3307 ix86_isa_flags
|= OPTION_MASK_ISA_LWP
;
3308 if (processor_alias_table
[i
].flags
& PTA_ABM
3309 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_ABM
))
3310 ix86_isa_flags
|= OPTION_MASK_ISA_ABM
;
3311 if (processor_alias_table
[i
].flags
& PTA_BMI
3312 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI
))
3313 ix86_isa_flags
|= OPTION_MASK_ISA_BMI
;
3314 if (processor_alias_table
[i
].flags
& (PTA_LZCNT
| PTA_ABM
)
3315 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_LZCNT
))
3316 ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
;
3317 if (processor_alias_table
[i
].flags
& PTA_TBM
3318 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_TBM
))
3319 ix86_isa_flags
|= OPTION_MASK_ISA_TBM
;
3320 if (processor_alias_table
[i
].flags
& PTA_BMI2
3321 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI2
))
3322 ix86_isa_flags
|= OPTION_MASK_ISA_BMI2
;
3323 if (processor_alias_table
[i
].flags
& PTA_CX16
3324 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_CX16
))
3325 ix86_isa_flags
|= OPTION_MASK_ISA_CX16
;
3326 if (processor_alias_table
[i
].flags
& (PTA_POPCNT
| PTA_ABM
)
3327 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_POPCNT
))
3328 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
;
3329 if (!(TARGET_64BIT
&& (processor_alias_table
[i
].flags
& PTA_NO_SAHF
))
3330 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SAHF
))
3331 ix86_isa_flags
|= OPTION_MASK_ISA_SAHF
;
3332 if (processor_alias_table
[i
].flags
& PTA_MOVBE
3333 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MOVBE
))
3334 ix86_isa_flags
|= OPTION_MASK_ISA_MOVBE
;
3335 if (processor_alias_table
[i
].flags
& PTA_AES
3336 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AES
))
3337 ix86_isa_flags
|= OPTION_MASK_ISA_AES
;
3338 if (processor_alias_table
[i
].flags
& PTA_PCLMUL
3339 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_PCLMUL
))
3340 ix86_isa_flags
|= OPTION_MASK_ISA_PCLMUL
;
3341 if (processor_alias_table
[i
].flags
& PTA_FSGSBASE
3342 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FSGSBASE
))
3343 ix86_isa_flags
|= OPTION_MASK_ISA_FSGSBASE
;
3344 if (processor_alias_table
[i
].flags
& PTA_RDRND
3345 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDRND
))
3346 ix86_isa_flags
|= OPTION_MASK_ISA_RDRND
;
3347 if (processor_alias_table
[i
].flags
& PTA_F16C
3348 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_F16C
))
3349 ix86_isa_flags
|= OPTION_MASK_ISA_F16C
;
3350 if (processor_alias_table
[i
].flags
& PTA_RTM
3351 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RTM
))
3352 ix86_isa_flags
|= OPTION_MASK_ISA_RTM
;
3353 if (processor_alias_table
[i
].flags
& PTA_HLE
3354 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_HLE
))
3355 ix86_isa_flags
|= OPTION_MASK_ISA_HLE
;
3356 if (processor_alias_table
[i
].flags
& PTA_PRFCHW
3357 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_PRFCHW
))
3358 ix86_isa_flags
|= OPTION_MASK_ISA_PRFCHW
;
3359 if (processor_alias_table
[i
].flags
& PTA_RDSEED
3360 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDSEED
))
3361 ix86_isa_flags
|= OPTION_MASK_ISA_RDSEED
;
3362 if (processor_alias_table
[i
].flags
& PTA_ADX
3363 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_ADX
))
3364 ix86_isa_flags
|= OPTION_MASK_ISA_ADX
;
3365 if (processor_alias_table
[i
].flags
& PTA_FXSR
3366 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FXSR
))
3367 ix86_isa_flags
|= OPTION_MASK_ISA_FXSR
;
3368 if (processor_alias_table
[i
].flags
& PTA_XSAVE
3369 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_XSAVE
))
3370 ix86_isa_flags
|= OPTION_MASK_ISA_XSAVE
;
3371 if (processor_alias_table
[i
].flags
& PTA_XSAVEOPT
3372 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_XSAVEOPT
))
3373 ix86_isa_flags
|= OPTION_MASK_ISA_XSAVEOPT
;
3374 if (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
))
3375 x86_prefetch_sse
= true;
3380 if (!strcmp (ix86_arch_string
, "generic"))
3381 error ("generic CPU can be used only for %stune=%s %s",
3382 prefix
, suffix
, sw
);
3383 else if (!strncmp (ix86_arch_string
, "generic", 7) || i
== pta_size
)
3384 error ("bad value (%s) for %sarch=%s %s",
3385 ix86_arch_string
, prefix
, suffix
, sw
);
3387 ix86_arch_mask
= 1u << ix86_arch
;
3388 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
3389 ix86_arch_features
[i
] = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
3391 for (i
= 0; i
< pta_size
; i
++)
3392 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
3394 ix86_schedule
= processor_alias_table
[i
].schedule
;
3395 ix86_tune
= processor_alias_table
[i
].processor
;
3398 if (!(processor_alias_table
[i
].flags
& PTA_64BIT
))
3400 if (ix86_tune_defaulted
)
3402 ix86_tune_string
= "x86-64";
3403 for (i
= 0; i
< pta_size
; i
++)
3404 if (! strcmp (ix86_tune_string
,
3405 processor_alias_table
[i
].name
))
3407 ix86_schedule
= processor_alias_table
[i
].schedule
;
3408 ix86_tune
= processor_alias_table
[i
].processor
;
3411 error ("CPU you selected does not support x86-64 "
3417 /* Adjust tuning when compiling for 32-bit ABI. */
3420 case PROCESSOR_GENERIC64
:
3421 ix86_tune
= PROCESSOR_GENERIC32
;
3422 ix86_schedule
= CPU_PENTIUMPRO
;
3429 /* Intel CPUs have always interpreted SSE prefetch instructions as
3430 NOPs; so, we can enable SSE prefetch instructions even when
3431 -mtune (rather than -march) points us to a processor that has them.
3432 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3433 higher processors. */
3435 && (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
)))
3436 x86_prefetch_sse
= true;
3440 if (ix86_tune_specified
&& i
== pta_size
)
3441 error ("bad value (%s) for %stune=%s %s",
3442 ix86_tune_string
, prefix
, suffix
, sw
);
3444 ix86_tune_mask
= 1u << ix86_tune
;
3445 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
3446 ix86_tune_features
[i
] = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
3448 #ifndef USE_IX86_FRAME_POINTER
3449 #define USE_IX86_FRAME_POINTER 0
3452 #ifndef USE_X86_64_FRAME_POINTER
3453 #define USE_X86_64_FRAME_POINTER 0
3456 /* Set the default values for switches whose default depends on TARGET_64BIT
3457 in case they weren't overwritten by command line options. */
3460 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3461 flag_omit_frame_pointer
= !USE_X86_64_FRAME_POINTER
;
3462 if (flag_asynchronous_unwind_tables
== 2)
3463 flag_unwind_tables
= flag_asynchronous_unwind_tables
= 1;
3464 if (flag_pcc_struct_return
== 2)
3465 flag_pcc_struct_return
= 0;
3469 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3470 flag_omit_frame_pointer
= !(USE_IX86_FRAME_POINTER
|| optimize_size
);
3471 if (flag_asynchronous_unwind_tables
== 2)
3472 flag_asynchronous_unwind_tables
= !USE_IX86_FRAME_POINTER
;
3473 if (flag_pcc_struct_return
== 2)
3474 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
3477 ix86_tune_cost
= processor_target_table
[ix86_tune
].cost
;
3479 ix86_cost
= &ix86_size_cost
;
3481 ix86_cost
= ix86_tune_cost
;
3483 /* Arrange to set up i386_stack_locals for all functions. */
3484 init_machine_status
= ix86_init_machine_status
;
3486 /* Validate -mregparm= value. */
3487 if (global_options_set
.x_ix86_regparm
)
3490 warning (0, "-mregparm is ignored in 64-bit mode");
3491 if (ix86_regparm
> REGPARM_MAX
)
3493 error ("-mregparm=%d is not between 0 and %d",
3494 ix86_regparm
, REGPARM_MAX
);
3499 ix86_regparm
= REGPARM_MAX
;
3501 /* Default align_* from the processor table. */
3502 if (align_loops
== 0)
3504 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
3505 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
3507 if (align_jumps
== 0)
3509 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
3510 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
3512 if (align_functions
== 0)
3514 align_functions
= processor_target_table
[ix86_tune
].align_func
;
3517 /* Provide default for -mbranch-cost= value. */
3518 if (!global_options_set
.x_ix86_branch_cost
)
3519 ix86_branch_cost
= ix86_cost
->branch_cost
;
3523 target_flags
|= TARGET_SUBTARGET64_DEFAULT
& ~target_flags_explicit
;
3525 /* Enable by default the SSE and MMX builtins. Do allow the user to
3526 explicitly disable any of these. In particular, disabling SSE and
3527 MMX for kernel code is extremely useful. */
3528 if (!ix86_arch_specified
)
3530 |= ((OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_MMX
3531 | TARGET_SUBTARGET64_ISA_DEFAULT
) & ~ix86_isa_flags_explicit
);
3534 warning (0, "%srtd%s is ignored in 64bit mode", prefix
, suffix
);
3538 target_flags
|= TARGET_SUBTARGET32_DEFAULT
& ~target_flags_explicit
;
3540 if (!ix86_arch_specified
)
3542 |= TARGET_SUBTARGET32_ISA_DEFAULT
& ~ix86_isa_flags_explicit
;
3544 /* i386 ABI does not specify red zone. It still makes sense to use it
3545 when programmer takes care to stack from being destroyed. */
3546 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
3547 target_flags
|= MASK_NO_RED_ZONE
;
3550 /* Keep nonleaf frame pointers. */
3551 if (flag_omit_frame_pointer
)
3552 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
3553 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
3554 flag_omit_frame_pointer
= 1;
3556 /* If we're doing fast math, we don't care about comparison order
3557 wrt NaNs. This lets us use a shorter comparison sequence. */
3558 if (flag_finite_math_only
)
3559 target_flags
&= ~MASK_IEEE_FP
;
3561 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3562 since the insns won't need emulation. */
3563 if (x86_arch_always_fancy_math_387
& ix86_arch_mask
)
3564 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
3566 /* Likewise, if the target doesn't have a 387, or we've specified
3567 software floating point, don't use 387 inline intrinsics. */
3569 target_flags
|= MASK_NO_FANCY_MATH_387
;
3571 /* Turn on MMX builtins for -msse. */
3573 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
& ~ix86_isa_flags_explicit
;
3575 /* Enable SSE prefetch. */
3576 if (TARGET_SSE
|| TARGET_PRFCHW
)
3577 x86_prefetch_sse
= true;
3579 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3580 if (TARGET_SSE4_2
|| TARGET_ABM
)
3581 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
& ~ix86_isa_flags_explicit
;
3583 /* Turn on lzcnt instruction for -mabm. */
3585 ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
& ~ix86_isa_flags_explicit
;
3587 /* Validate -mpreferred-stack-boundary= value or default it to
3588 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3589 ix86_preferred_stack_boundary
= PREFERRED_STACK_BOUNDARY_DEFAULT
;
3590 if (global_options_set
.x_ix86_preferred_stack_boundary_arg
)
3592 int min
= (TARGET_64BIT
? (TARGET_SSE
? 4 : 3) : 2);
3593 int max
= (TARGET_SEH
? 4 : 12);
3595 if (ix86_preferred_stack_boundary_arg
< min
3596 || ix86_preferred_stack_boundary_arg
> max
)
3599 error ("-mpreferred-stack-boundary is not supported "
3602 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3603 ix86_preferred_stack_boundary_arg
, min
, max
);
3606 ix86_preferred_stack_boundary
3607 = (1 << ix86_preferred_stack_boundary_arg
) * BITS_PER_UNIT
;
3610 /* Set the default value for -mstackrealign. */
3611 if (ix86_force_align_arg_pointer
== -1)
3612 ix86_force_align_arg_pointer
= STACK_REALIGN_DEFAULT
;
3614 ix86_default_incoming_stack_boundary
= PREFERRED_STACK_BOUNDARY
;
3616 /* Validate -mincoming-stack-boundary= value or default it to
3617 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3618 ix86_incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
3619 if (global_options_set
.x_ix86_incoming_stack_boundary_arg
)
3621 if (ix86_incoming_stack_boundary_arg
< (TARGET_64BIT
? 4 : 2)
3622 || ix86_incoming_stack_boundary_arg
> 12)
3623 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3624 ix86_incoming_stack_boundary_arg
, TARGET_64BIT
? 4 : 2);
3627 ix86_user_incoming_stack_boundary
3628 = (1 << ix86_incoming_stack_boundary_arg
) * BITS_PER_UNIT
;
3629 ix86_incoming_stack_boundary
3630 = ix86_user_incoming_stack_boundary
;
3634 /* Accept -msseregparm only if at least SSE support is enabled. */
3635 if (TARGET_SSEREGPARM
3637 error ("%ssseregparm%s used without SSE enabled", prefix
, suffix
);
3639 if (global_options_set
.x_ix86_fpmath
)
3641 if (ix86_fpmath
& FPMATH_SSE
)
3645 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3646 ix86_fpmath
= FPMATH_387
;
3648 else if ((ix86_fpmath
& FPMATH_387
) && !TARGET_80387
)
3650 warning (0, "387 instruction set disabled, using SSE arithmetics");
3651 ix86_fpmath
= FPMATH_SSE
;
3656 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
3658 /* If the i387 is disabled, then do not return values in it. */
3660 target_flags
&= ~MASK_FLOAT_RETURNS
;
3662 /* Use external vectorized library in vectorizing intrinsics. */
3663 if (global_options_set
.x_ix86_veclibabi_type
)
3664 switch (ix86_veclibabi_type
)
3666 case ix86_veclibabi_type_svml
:
3667 ix86_veclib_handler
= ix86_veclibabi_svml
;
3670 case ix86_veclibabi_type_acml
:
3671 ix86_veclib_handler
= ix86_veclibabi_acml
;
3678 if ((!USE_IX86_FRAME_POINTER
3679 || (x86_accumulate_outgoing_args
& ix86_tune_mask
))
3680 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3682 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3684 /* ??? Unwind info is not correct around the CFG unless either a frame
3685 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3686 unwind info generation to be aware of the CFG and propagating states
3688 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
3689 || flag_exceptions
|| flag_non_call_exceptions
)
3690 && flag_omit_frame_pointer
3691 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3693 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3694 warning (0, "unwind tables currently require either a frame pointer "
3695 "or %saccumulate-outgoing-args%s for correctness",
3697 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3700 /* If stack probes are required, the space used for large function
3701 arguments on the stack must also be probed, so enable
3702 -maccumulate-outgoing-args so this happens in the prologue. */
3703 if (TARGET_STACK_PROBE
3704 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3706 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3707 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3708 "for correctness", prefix
, suffix
);
3709 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3712 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3715 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
3716 p
= strchr (internal_label_prefix
, 'X');
3717 internal_label_prefix_len
= p
- internal_label_prefix
;
3721 /* When scheduling description is not available, disable scheduler pass
3722 so it won't slow down the compilation and make x87 code slower. */
3723 if (!TARGET_SCHEDULE
)
3724 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
3726 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3727 ix86_tune_cost
->simultaneous_prefetches
,
3728 global_options
.x_param_values
,
3729 global_options_set
.x_param_values
);
3730 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3731 ix86_tune_cost
->prefetch_block
,
3732 global_options
.x_param_values
,
3733 global_options_set
.x_param_values
);
3734 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3735 ix86_tune_cost
->l1_cache_size
,
3736 global_options
.x_param_values
,
3737 global_options_set
.x_param_values
);
3738 maybe_set_param_value (PARAM_L2_CACHE_SIZE
,
3739 ix86_tune_cost
->l2_cache_size
,
3740 global_options
.x_param_values
,
3741 global_options_set
.x_param_values
);
3743 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3744 if (flag_prefetch_loop_arrays
< 0
3746 && (optimize
>= 3 || flag_profile_use
)
3747 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL
)
3748 flag_prefetch_loop_arrays
= 1;
3750 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3751 can be optimized to ap = __builtin_next_arg (0). */
3752 if (!TARGET_64BIT
&& !flag_split_stack
)
3753 targetm
.expand_builtin_va_start
= NULL
;
3757 ix86_gen_leave
= gen_leave_rex64
;
3758 if (Pmode
== DImode
)
3760 ix86_gen_monitor
= gen_sse3_monitor64_di
;
3761 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_di
;
3762 ix86_gen_tls_local_dynamic_base_64
3763 = gen_tls_local_dynamic_base_64_di
;
3767 ix86_gen_monitor
= gen_sse3_monitor64_si
;
3768 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_si
;
3769 ix86_gen_tls_local_dynamic_base_64
3770 = gen_tls_local_dynamic_base_64_si
;
3775 ix86_gen_leave
= gen_leave
;
3776 ix86_gen_monitor
= gen_sse3_monitor
;
3779 if (Pmode
== DImode
)
3781 ix86_gen_add3
= gen_adddi3
;
3782 ix86_gen_sub3
= gen_subdi3
;
3783 ix86_gen_sub3_carry
= gen_subdi3_carry
;
3784 ix86_gen_one_cmpl2
= gen_one_cmpldi2
;
3785 ix86_gen_andsp
= gen_anddi3
;
3786 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_di
;
3787 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probedi
;
3788 ix86_gen_probe_stack_range
= gen_probe_stack_rangedi
;
3792 ix86_gen_add3
= gen_addsi3
;
3793 ix86_gen_sub3
= gen_subsi3
;
3794 ix86_gen_sub3_carry
= gen_subsi3_carry
;
3795 ix86_gen_one_cmpl2
= gen_one_cmplsi2
;
3796 ix86_gen_andsp
= gen_andsi3
;
3797 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_si
;
3798 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probesi
;
3799 ix86_gen_probe_stack_range
= gen_probe_stack_rangesi
;
3803 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3805 target_flags
|= MASK_CLD
& ~target_flags_explicit
;
3808 if (!TARGET_64BIT
&& flag_pic
)
3810 if (flag_fentry
> 0)
3811 sorry ("-mfentry isn%'t supported for 32-bit in combination "
3815 else if (TARGET_SEH
)
3817 if (flag_fentry
== 0)
3818 sorry ("-mno-fentry isn%'t compatible with SEH");
3821 else if (flag_fentry
< 0)
3823 #if defined(PROFILE_BEFORE_PROLOGUE)
3832 /* When not optimize for size, enable vzeroupper optimization for
3833 TARGET_AVX with -fexpensive-optimizations and split 32-byte
3834 AVX unaligned load/store. */
3837 if (flag_expensive_optimizations
3838 && !(target_flags_explicit
& MASK_VZEROUPPER
))
3839 target_flags
|= MASK_VZEROUPPER
;
3840 if ((x86_avx256_split_unaligned_load
& ix86_tune_mask
)
3841 && !(target_flags_explicit
& MASK_AVX256_SPLIT_UNALIGNED_LOAD
))
3842 target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_LOAD
;
3843 if ((x86_avx256_split_unaligned_store
& ix86_tune_mask
)
3844 && !(target_flags_explicit
& MASK_AVX256_SPLIT_UNALIGNED_STORE
))
3845 target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_STORE
;
3846 /* Enable 128-bit AVX instruction generation
3847 for the auto-vectorizer. */
3848 if (TARGET_AVX128_OPTIMAL
3849 && !(target_flags_explicit
& MASK_PREFER_AVX128
))
3850 target_flags
|= MASK_PREFER_AVX128
;
3855 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
3856 target_flags
&= ~MASK_VZEROUPPER
;
3859 if (ix86_recip_name
)
3861 char *p
= ASTRDUP (ix86_recip_name
);
3863 unsigned int mask
, i
;
3866 while ((q
= strtok (p
, ",")) != NULL
)
3877 if (!strcmp (q
, "default"))
3878 mask
= RECIP_MASK_ALL
;
3881 for (i
= 0; i
< ARRAY_SIZE (recip_options
); i
++)
3882 if (!strcmp (q
, recip_options
[i
].string
))
3884 mask
= recip_options
[i
].mask
;
3888 if (i
== ARRAY_SIZE (recip_options
))
3890 error ("unknown option for -mrecip=%s", q
);
3892 mask
= RECIP_MASK_NONE
;
3896 recip_mask_explicit
|= mask
;
3898 recip_mask
&= ~mask
;
3905 recip_mask
|= RECIP_MASK_ALL
& ~recip_mask_explicit
;
3906 else if (target_flags_explicit
& MASK_RECIP
)
3907 recip_mask
&= ~(RECIP_MASK_ALL
& ~recip_mask_explicit
);
3909 /* Default long double to 64-bit for Bionic. */
3910 if (TARGET_HAS_BIONIC
3911 && !(target_flags_explicit
& MASK_LONG_DOUBLE_64
))
3912 target_flags
|= MASK_LONG_DOUBLE_64
;
3914 /* Save the initial options in case the user does function specific
3917 target_option_default_node
= target_option_current_node
3918 = build_target_option_node ();
3921 /* Implement the TARGET_OPTION_OVERRIDE hook. */
3924 ix86_option_override (void)
3926 static struct register_pass_info insert_vzeroupper_info
3927 = { &pass_insert_vzeroupper
.pass
, "reload",
3928 1, PASS_POS_INSERT_AFTER
3931 ix86_option_override_internal (true);
3934 /* This needs to be done at start up. It's convenient to do it here. */
3935 register_pass (&insert_vzeroupper_info
);
3938 /* Update register usage after having seen the compiler flags. */
3941 ix86_conditional_register_usage (void)
3946 /* The PIC register, if it exists, is fixed. */
3947 j
= PIC_OFFSET_TABLE_REGNUM
;
3948 if (j
!= INVALID_REGNUM
)
3949 fixed_regs
[j
] = call_used_regs
[j
] = 1;
3951 /* For 32-bit targets, squash the REX registers. */
3954 for (i
= FIRST_REX_INT_REG
; i
<= LAST_REX_INT_REG
; i
++)
3955 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
3956 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
3957 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
3960 /* See the definition of CALL_USED_REGISTERS in i386.h. */
3961 c_mask
= (TARGET_64BIT_MS_ABI
? (1 << 3)
3962 : TARGET_64BIT
? (1 << 2)
3965 CLEAR_HARD_REG_SET (reg_class_contents
[(int)CLOBBERED_REGS
]);
3967 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
3969 /* Set/reset conditionally defined registers from
3970 CALL_USED_REGISTERS initializer. */
3971 if (call_used_regs
[i
] > 1)
3972 call_used_regs
[i
] = !!(call_used_regs
[i
] & c_mask
);
3974 /* Calculate registers of CLOBBERED_REGS register set
3975 as call used registers from GENERAL_REGS register set. */
3976 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)GENERAL_REGS
], i
)
3977 && call_used_regs
[i
])
3978 SET_HARD_REG_BIT (reg_class_contents
[(int)CLOBBERED_REGS
], i
);
3981 /* If MMX is disabled, squash the registers. */
3983 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
3984 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)MMX_REGS
], i
))
3985 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
3987 /* If SSE is disabled, squash the registers. */
3989 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
3990 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)SSE_REGS
], i
))
3991 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
3993 /* If the FPU is disabled, squash the registers. */
3994 if (! (TARGET_80387
|| TARGET_FLOAT_RETURNS_IN_80387
))
3995 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
3996 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)FLOAT_REGS
], i
))
3997 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4001 /* Save the current options */
4004 ix86_function_specific_save (struct cl_target_option
*ptr
)
4006 ptr
->arch
= ix86_arch
;
4007 ptr
->schedule
= ix86_schedule
;
4008 ptr
->tune
= ix86_tune
;
4009 ptr
->branch_cost
= ix86_branch_cost
;
4010 ptr
->tune_defaulted
= ix86_tune_defaulted
;
4011 ptr
->arch_specified
= ix86_arch_specified
;
4012 ptr
->x_ix86_isa_flags_explicit
= ix86_isa_flags_explicit
;
4013 ptr
->ix86_target_flags_explicit
= target_flags_explicit
;
4014 ptr
->x_recip_mask_explicit
= recip_mask_explicit
;
4016 /* The fields are char but the variables are not; make sure the
4017 values fit in the fields. */
4018 gcc_assert (ptr
->arch
== ix86_arch
);
4019 gcc_assert (ptr
->schedule
== ix86_schedule
);
4020 gcc_assert (ptr
->tune
== ix86_tune
);
4021 gcc_assert (ptr
->branch_cost
== ix86_branch_cost
);
4024 /* Restore the current options */
4027 ix86_function_specific_restore (struct cl_target_option
*ptr
)
4029 enum processor_type old_tune
= ix86_tune
;
4030 enum processor_type old_arch
= ix86_arch
;
4031 unsigned int ix86_arch_mask
, ix86_tune_mask
;
4034 ix86_arch
= (enum processor_type
) ptr
->arch
;
4035 ix86_schedule
= (enum attr_cpu
) ptr
->schedule
;
4036 ix86_tune
= (enum processor_type
) ptr
->tune
;
4037 ix86_branch_cost
= ptr
->branch_cost
;
4038 ix86_tune_defaulted
= ptr
->tune_defaulted
;
4039 ix86_arch_specified
= ptr
->arch_specified
;
4040 ix86_isa_flags_explicit
= ptr
->x_ix86_isa_flags_explicit
;
4041 target_flags_explicit
= ptr
->ix86_target_flags_explicit
;
4042 recip_mask_explicit
= ptr
->x_recip_mask_explicit
;
4044 /* Recreate the arch feature tests if the arch changed */
4045 if (old_arch
!= ix86_arch
)
4047 ix86_arch_mask
= 1u << ix86_arch
;
4048 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
4049 ix86_arch_features
[i
]
4050 = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
4053 /* Recreate the tune optimization tests */
4054 if (old_tune
!= ix86_tune
)
4056 ix86_tune_mask
= 1u << ix86_tune
;
4057 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
4058 ix86_tune_features
[i
]
4059 = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
4063 /* Print the current options */
4066 ix86_function_specific_print (FILE *file
, int indent
,
4067 struct cl_target_option
*ptr
)
4070 = ix86_target_string (ptr
->x_ix86_isa_flags
, ptr
->x_target_flags
,
4071 NULL
, NULL
, ptr
->x_ix86_fpmath
, false);
4073 fprintf (file
, "%*sarch = %d (%s)\n",
4076 ((ptr
->arch
< TARGET_CPU_DEFAULT_max
)
4077 ? cpu_names
[ptr
->arch
]
4080 fprintf (file
, "%*stune = %d (%s)\n",
4083 ((ptr
->tune
< TARGET_CPU_DEFAULT_max
)
4084 ? cpu_names
[ptr
->tune
]
4087 fprintf (file
, "%*sbranch_cost = %d\n", indent
, "", ptr
->branch_cost
);
4091 fprintf (file
, "%*s%s\n", indent
, "", target_string
);
4092 free (target_string
);
4097 /* Inner function to process the attribute((target(...))), take an argument and
4098 set the current options from the argument. If we have a list, recursively go
4102 ix86_valid_target_attribute_inner_p (tree args
, char *p_strings
[],
4103 struct gcc_options
*enum_opts_set
)
4108 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4109 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4110 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4111 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4112 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4128 enum ix86_opt_type type
;
4133 IX86_ATTR_ISA ("3dnow", OPT_m3dnow
),
4134 IX86_ATTR_ISA ("abm", OPT_mabm
),
4135 IX86_ATTR_ISA ("bmi", OPT_mbmi
),
4136 IX86_ATTR_ISA ("bmi2", OPT_mbmi2
),
4137 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt
),
4138 IX86_ATTR_ISA ("tbm", OPT_mtbm
),
4139 IX86_ATTR_ISA ("aes", OPT_maes
),
4140 IX86_ATTR_ISA ("avx", OPT_mavx
),
4141 IX86_ATTR_ISA ("avx2", OPT_mavx2
),
4142 IX86_ATTR_ISA ("mmx", OPT_mmmx
),
4143 IX86_ATTR_ISA ("pclmul", OPT_mpclmul
),
4144 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt
),
4145 IX86_ATTR_ISA ("sse", OPT_msse
),
4146 IX86_ATTR_ISA ("sse2", OPT_msse2
),
4147 IX86_ATTR_ISA ("sse3", OPT_msse3
),
4148 IX86_ATTR_ISA ("sse4", OPT_msse4
),
4149 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1
),
4150 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2
),
4151 IX86_ATTR_ISA ("sse4a", OPT_msse4a
),
4152 IX86_ATTR_ISA ("ssse3", OPT_mssse3
),
4153 IX86_ATTR_ISA ("fma4", OPT_mfma4
),
4154 IX86_ATTR_ISA ("fma", OPT_mfma
),
4155 IX86_ATTR_ISA ("xop", OPT_mxop
),
4156 IX86_ATTR_ISA ("lwp", OPT_mlwp
),
4157 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase
),
4158 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd
),
4159 IX86_ATTR_ISA ("f16c", OPT_mf16c
),
4160 IX86_ATTR_ISA ("rtm", OPT_mrtm
),
4161 IX86_ATTR_ISA ("hle", OPT_mhle
),
4162 IX86_ATTR_ISA ("prfchw", OPT_mprfchw
),
4163 IX86_ATTR_ISA ("rdseed", OPT_mrdseed
),
4164 IX86_ATTR_ISA ("adx", OPT_madx
),
4165 IX86_ATTR_ISA ("fxsr", OPT_mfxsr
),
4166 IX86_ATTR_ISA ("xsave", OPT_mxsave
),
4167 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt
),
4170 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_
),
4172 /* string options */
4173 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH
),
4174 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE
),
4177 IX86_ATTR_YES ("cld",
4181 IX86_ATTR_NO ("fancy-math-387",
4182 OPT_mfancy_math_387
,
4183 MASK_NO_FANCY_MATH_387
),
4185 IX86_ATTR_YES ("ieee-fp",
4189 IX86_ATTR_YES ("inline-all-stringops",
4190 OPT_minline_all_stringops
,
4191 MASK_INLINE_ALL_STRINGOPS
),
4193 IX86_ATTR_YES ("inline-stringops-dynamically",
4194 OPT_minline_stringops_dynamically
,
4195 MASK_INLINE_STRINGOPS_DYNAMICALLY
),
4197 IX86_ATTR_NO ("align-stringops",
4198 OPT_mno_align_stringops
,
4199 MASK_NO_ALIGN_STRINGOPS
),
4201 IX86_ATTR_YES ("recip",
4207 /* If this is a list, recurse to get the options. */
4208 if (TREE_CODE (args
) == TREE_LIST
)
4212 for (; args
; args
= TREE_CHAIN (args
))
4213 if (TREE_VALUE (args
)
4214 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args
),
4215 p_strings
, enum_opts_set
))
4221 else if (TREE_CODE (args
) != STRING_CST
)
4224 /* Handle multiple arguments separated by commas. */
4225 next_optstr
= ASTRDUP (TREE_STRING_POINTER (args
));
4227 while (next_optstr
&& *next_optstr
!= '\0')
4229 char *p
= next_optstr
;
4231 char *comma
= strchr (next_optstr
, ',');
4232 const char *opt_string
;
4233 size_t len
, opt_len
;
4238 enum ix86_opt_type type
= ix86_opt_unknown
;
4244 len
= comma
- next_optstr
;
4245 next_optstr
= comma
+ 1;
4253 /* Recognize no-xxx. */
4254 if (len
> 3 && p
[0] == 'n' && p
[1] == 'o' && p
[2] == '-')
4263 /* Find the option. */
4266 for (i
= 0; i
< ARRAY_SIZE (attrs
); i
++)
4268 type
= attrs
[i
].type
;
4269 opt_len
= attrs
[i
].len
;
4270 if (ch
== attrs
[i
].string
[0]
4271 && ((type
!= ix86_opt_str
&& type
!= ix86_opt_enum
)
4274 && memcmp (p
, attrs
[i
].string
, opt_len
) == 0)
4277 mask
= attrs
[i
].mask
;
4278 opt_string
= attrs
[i
].string
;
4283 /* Process the option. */
4286 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4290 else if (type
== ix86_opt_isa
)
4292 struct cl_decoded_option decoded
;
4294 generate_option (opt
, NULL
, opt_set_p
, CL_TARGET
, &decoded
);
4295 ix86_handle_option (&global_options
, &global_options_set
,
4296 &decoded
, input_location
);
4299 else if (type
== ix86_opt_yes
|| type
== ix86_opt_no
)
4301 if (type
== ix86_opt_no
)
4302 opt_set_p
= !opt_set_p
;
4305 target_flags
|= mask
;
4307 target_flags
&= ~mask
;
4310 else if (type
== ix86_opt_str
)
4314 error ("option(\"%s\") was already specified", opt_string
);
4318 p_strings
[opt
] = xstrdup (p
+ opt_len
);
4321 else if (type
== ix86_opt_enum
)
4326 arg_ok
= opt_enum_arg_to_value (opt
, p
+ opt_len
, &value
, CL_TARGET
);
4328 set_option (&global_options
, enum_opts_set
, opt
, value
,
4329 p
+ opt_len
, DK_UNSPECIFIED
, input_location
,
4333 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4345 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4348 ix86_valid_target_attribute_tree (tree args
)
4350 const char *orig_arch_string
= ix86_arch_string
;
4351 const char *orig_tune_string
= ix86_tune_string
;
4352 enum fpmath_unit orig_fpmath_set
= global_options_set
.x_ix86_fpmath
;
4353 int orig_tune_defaulted
= ix86_tune_defaulted
;
4354 int orig_arch_specified
= ix86_arch_specified
;
4355 char *option_strings
[IX86_FUNCTION_SPECIFIC_MAX
] = { NULL
, NULL
};
4358 struct cl_target_option
*def
4359 = TREE_TARGET_OPTION (target_option_default_node
);
4360 struct gcc_options enum_opts_set
;
4362 memset (&enum_opts_set
, 0, sizeof (enum_opts_set
));
4364 /* Process each of the options on the chain. */
4365 if (! ix86_valid_target_attribute_inner_p (args
, option_strings
,
4369 /* If the changed options are different from the default, rerun
4370 ix86_option_override_internal, and then save the options away.
4371 The string options are are attribute options, and will be undone
4372 when we copy the save structure. */
4373 if (ix86_isa_flags
!= def
->x_ix86_isa_flags
4374 || target_flags
!= def
->x_target_flags
4375 || option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
]
4376 || option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
]
4377 || enum_opts_set
.x_ix86_fpmath
)
4379 /* If we are using the default tune= or arch=, undo the string assigned,
4380 and use the default. */
4381 if (option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
])
4382 ix86_arch_string
= option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
];
4383 else if (!orig_arch_specified
)
4384 ix86_arch_string
= NULL
;
4386 if (option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
])
4387 ix86_tune_string
= option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
];
4388 else if (orig_tune_defaulted
)
4389 ix86_tune_string
= NULL
;
4391 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4392 if (enum_opts_set
.x_ix86_fpmath
)
4393 global_options_set
.x_ix86_fpmath
= (enum fpmath_unit
) 1;
4394 else if (!TARGET_64BIT
&& TARGET_SSE
)
4396 ix86_fpmath
= (enum fpmath_unit
) (FPMATH_SSE
| FPMATH_387
);
4397 global_options_set
.x_ix86_fpmath
= (enum fpmath_unit
) 1;
4400 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4401 ix86_option_override_internal (false);
4403 /* Add any builtin functions with the new isa if any. */
4404 ix86_add_new_builtins (ix86_isa_flags
);
4406 /* Save the current options unless we are validating options for
4408 t
= build_target_option_node ();
4410 ix86_arch_string
= orig_arch_string
;
4411 ix86_tune_string
= orig_tune_string
;
4412 global_options_set
.x_ix86_fpmath
= orig_fpmath_set
;
4414 /* Free up memory allocated to hold the strings */
4415 for (i
= 0; i
< IX86_FUNCTION_SPECIFIC_MAX
; i
++)
4416 free (option_strings
[i
]);
4422 /* Hook to validate attribute((target("string"))). */
4425 ix86_valid_target_attribute_p (tree fndecl
,
4426 tree
ARG_UNUSED (name
),
4428 int ARG_UNUSED (flags
))
4430 struct cl_target_option cur_target
;
4432 tree old_optimize
= build_optimization_node ();
4433 tree new_target
, new_optimize
;
4434 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
4436 /* If the function changed the optimization levels as well as setting target
4437 options, start with the optimizations specified. */
4438 if (func_optimize
&& func_optimize
!= old_optimize
)
4439 cl_optimization_restore (&global_options
,
4440 TREE_OPTIMIZATION (func_optimize
));
4442 /* The target attributes may also change some optimization flags, so update
4443 the optimization options if necessary. */
4444 cl_target_option_save (&cur_target
, &global_options
);
4445 new_target
= ix86_valid_target_attribute_tree (args
);
4446 new_optimize
= build_optimization_node ();
4453 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
4455 if (old_optimize
!= new_optimize
)
4456 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
4459 cl_target_option_restore (&global_options
, &cur_target
);
4461 if (old_optimize
!= new_optimize
)
4462 cl_optimization_restore (&global_options
,
4463 TREE_OPTIMIZATION (old_optimize
));
4469 /* Hook to determine if one function can safely inline another. */
4472 ix86_can_inline_p (tree caller
, tree callee
)
4475 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
4476 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
4478 /* If callee has no option attributes, then it is ok to inline. */
4482 /* If caller has no option attributes, but callee does then it is not ok to
4484 else if (!caller_tree
)
4489 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
4490 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
4492 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4493 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4495 if ((caller_opts
->x_ix86_isa_flags
& callee_opts
->x_ix86_isa_flags
)
4496 != callee_opts
->x_ix86_isa_flags
)
4499 /* See if we have the same non-isa options. */
4500 else if (caller_opts
->x_target_flags
!= callee_opts
->x_target_flags
)
4503 /* See if arch, tune, etc. are the same. */
4504 else if (caller_opts
->arch
!= callee_opts
->arch
)
4507 else if (caller_opts
->tune
!= callee_opts
->tune
)
4510 else if (caller_opts
->x_ix86_fpmath
!= callee_opts
->x_ix86_fpmath
)
4513 else if (caller_opts
->branch_cost
!= callee_opts
->branch_cost
)
4524 /* Remember the last target of ix86_set_current_function. */
4525 static GTY(()) tree ix86_previous_fndecl
;
4527 /* Establish appropriate back-end context for processing the function
4528 FNDECL. The argument might be NULL to indicate processing at top
4529 level, outside of any function scope. */
4531 ix86_set_current_function (tree fndecl
)
4533 /* Only change the context if the function changes. This hook is called
4534 several times in the course of compiling a function, and we don't want to
4535 slow things down too much or call target_reinit when it isn't safe. */
4536 if (fndecl
&& fndecl
!= ix86_previous_fndecl
)
4538 tree old_tree
= (ix86_previous_fndecl
4539 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl
)
4542 tree new_tree
= (fndecl
4543 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl
)
4546 ix86_previous_fndecl
= fndecl
;
4547 if (old_tree
== new_tree
)
4552 cl_target_option_restore (&global_options
,
4553 TREE_TARGET_OPTION (new_tree
));
4559 struct cl_target_option
*def
4560 = TREE_TARGET_OPTION (target_option_current_node
);
4562 cl_target_option_restore (&global_options
, def
);
4569 /* Return true if this goes in large data/bss. */
4572 ix86_in_large_data_p (tree exp
)
4574 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
4577 /* Functions are never large data. */
4578 if (TREE_CODE (exp
) == FUNCTION_DECL
)
4581 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
4583 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
4584 if (strcmp (section
, ".ldata") == 0
4585 || strcmp (section
, ".lbss") == 0)
4591 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
4593 /* If this is an incomplete type with size 0, then we can't put it
4594 in data because it might be too big when completed. */
4595 if (!size
|| size
> ix86_section_threshold
)
4602 /* Switch to the appropriate section for output of DECL.
4603 DECL is either a `VAR_DECL' node or a constant of some sort.
4604 RELOC indicates whether forming the initial value of DECL requires
4605 link-time relocations. */
4607 static section
* x86_64_elf_select_section (tree
, int, unsigned HOST_WIDE_INT
)
4611 x86_64_elf_select_section (tree decl
, int reloc
,
4612 unsigned HOST_WIDE_INT align
)
4614 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4615 && ix86_in_large_data_p (decl
))
4617 const char *sname
= NULL
;
4618 unsigned int flags
= SECTION_WRITE
;
4619 switch (categorize_decl_for_section (decl
, reloc
))
4624 case SECCAT_DATA_REL
:
4625 sname
= ".ldata.rel";
4627 case SECCAT_DATA_REL_LOCAL
:
4628 sname
= ".ldata.rel.local";
4630 case SECCAT_DATA_REL_RO
:
4631 sname
= ".ldata.rel.ro";
4633 case SECCAT_DATA_REL_RO_LOCAL
:
4634 sname
= ".ldata.rel.ro.local";
4638 flags
|= SECTION_BSS
;
4641 case SECCAT_RODATA_MERGE_STR
:
4642 case SECCAT_RODATA_MERGE_STR_INIT
:
4643 case SECCAT_RODATA_MERGE_CONST
:
4647 case SECCAT_SRODATA
:
4654 /* We don't split these for medium model. Place them into
4655 default sections and hope for best. */
4660 /* We might get called with string constants, but get_named_section
4661 doesn't like them as they are not DECLs. Also, we need to set
4662 flags in that case. */
4664 return get_section (sname
, flags
, NULL
);
4665 return get_named_section (decl
, sname
, reloc
);
4668 return default_elf_select_section (decl
, reloc
, align
);
4671 /* Build up a unique section name, expressed as a
4672 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4673 RELOC indicates whether the initial value of EXP requires
4674 link-time relocations. */
4676 static void ATTRIBUTE_UNUSED
4677 x86_64_elf_unique_section (tree decl
, int reloc
)
4679 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4680 && ix86_in_large_data_p (decl
))
4682 const char *prefix
= NULL
;
4683 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4684 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
4686 switch (categorize_decl_for_section (decl
, reloc
))
4689 case SECCAT_DATA_REL
:
4690 case SECCAT_DATA_REL_LOCAL
:
4691 case SECCAT_DATA_REL_RO
:
4692 case SECCAT_DATA_REL_RO_LOCAL
:
4693 prefix
= one_only
? ".ld" : ".ldata";
4696 prefix
= one_only
? ".lb" : ".lbss";
4699 case SECCAT_RODATA_MERGE_STR
:
4700 case SECCAT_RODATA_MERGE_STR_INIT
:
4701 case SECCAT_RODATA_MERGE_CONST
:
4702 prefix
= one_only
? ".lr" : ".lrodata";
4704 case SECCAT_SRODATA
:
4711 /* We don't split these for medium model. Place them into
4712 default sections and hope for best. */
4717 const char *name
, *linkonce
;
4720 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
4721 name
= targetm
.strip_name_encoding (name
);
4723 /* If we're using one_only, then there needs to be a .gnu.linkonce
4724 prefix to the section name. */
4725 linkonce
= one_only
? ".gnu.linkonce" : "";
4727 string
= ACONCAT ((linkonce
, prefix
, ".", name
, NULL
));
4729 DECL_SECTION_NAME (decl
) = build_string (strlen (string
), string
);
4733 default_unique_section (decl
, reloc
);
4736 #ifdef COMMON_ASM_OP
4737 /* This says how to output assembler code to declare an
4738 uninitialized external linkage data object.
4740 For medium model x86-64 we need to use .largecomm opcode for
4743 x86_elf_aligned_common (FILE *file
,
4744 const char *name
, unsigned HOST_WIDE_INT size
,
4747 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4748 && size
> (unsigned int)ix86_section_threshold
)
4749 fputs (".largecomm\t", file
);
4751 fputs (COMMON_ASM_OP
, file
);
4752 assemble_name (file
, name
);
4753 fprintf (file
, "," HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
4754 size
, align
/ BITS_PER_UNIT
);
4758 /* Utility function for targets to use in implementing
4759 ASM_OUTPUT_ALIGNED_BSS. */
4762 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
4763 const char *name
, unsigned HOST_WIDE_INT size
,
4766 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4767 && size
> (unsigned int)ix86_section_threshold
)
4768 switch_to_section (get_named_section (decl
, ".lbss", 0));
4770 switch_to_section (bss_section
);
4771 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
4772 #ifdef ASM_DECLARE_OBJECT_NAME
4773 last_assemble_variable_decl
= decl
;
4774 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
4776 /* Standard thing is just output label for the object. */
4777 ASM_OUTPUT_LABEL (file
, name
);
4778 #endif /* ASM_DECLARE_OBJECT_NAME */
4779 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
4782 /* Decide whether we must probe the stack before any space allocation
4783 on this target. It's essentially TARGET_STACK_PROBE except when
4784 -fstack-check causes the stack to be already probed differently. */
4787 ix86_target_stack_probe (void)
4789 /* Do not probe the stack twice if static stack checking is enabled. */
4790 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
4793 return TARGET_STACK_PROBE
;
4796 /* Decide whether we can make a sibling call to a function. DECL is the
4797 declaration of the function being targeted by the call and EXP is the
4798 CALL_EXPR representing the call. */
4801 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
4803 tree type
, decl_or_type
;
4806 /* If we are generating position-independent code, we cannot sibcall
4807 optimize any indirect call, or a direct call to a global function,
4808 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
4812 && (!decl
|| !targetm
.binds_local_p (decl
)))
4815 /* If we need to align the outgoing stack, then sibcalling would
4816 unalign the stack, which may break the called function. */
4817 if (ix86_minimum_incoming_stack_boundary (true)
4818 < PREFERRED_STACK_BOUNDARY
)
4823 decl_or_type
= decl
;
4824 type
= TREE_TYPE (decl
);
4828 /* We're looking at the CALL_EXPR, we need the type of the function. */
4829 type
= CALL_EXPR_FN (exp
); /* pointer expression */
4830 type
= TREE_TYPE (type
); /* pointer type */
4831 type
= TREE_TYPE (type
); /* function type */
4832 decl_or_type
= type
;
4835 /* Check that the return value locations are the same. Like
4836 if we are returning floats on the 80387 register stack, we cannot
4837 make a sibcall from a function that doesn't return a float to a
4838 function that does or, conversely, from a function that does return
4839 a float to a function that doesn't; the necessary stack adjustment
4840 would not be executed. This is also the place we notice
4841 differences in the return value ABI. Note that it is ok for one
4842 of the functions to have void return type as long as the return
4843 value of the other is passed in a register. */
4844 a
= ix86_function_value (TREE_TYPE (exp
), decl_or_type
, false);
4845 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
4847 if (STACK_REG_P (a
) || STACK_REG_P (b
))
4849 if (!rtx_equal_p (a
, b
))
4852 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
4854 else if (!rtx_equal_p (a
, b
))
4859 /* The SYSV ABI has more call-clobbered registers;
4860 disallow sibcalls from MS to SYSV. */
4861 if (cfun
->machine
->call_abi
== MS_ABI
4862 && ix86_function_type_abi (type
) == SYSV_ABI
)
4867 /* If this call is indirect, we'll need to be able to use a
4868 call-clobbered register for the address of the target function.
4869 Make sure that all such registers are not used for passing
4870 parameters. Note that DLLIMPORT functions are indirect. */
4872 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& DECL_DLLIMPORT_P (decl
)))
4874 if (ix86_function_regparm (type
, NULL
) >= 3)
4876 /* ??? Need to count the actual number of registers to be used,
4877 not the possible number of registers. Fix later. */
4883 /* Otherwise okay. That also includes certain types of indirect calls. */
4887 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4888 and "sseregparm" calling convention attributes;
4889 arguments as in struct attribute_spec.handler. */
4892 ix86_handle_cconv_attribute (tree
*node
, tree name
,
4894 int flags ATTRIBUTE_UNUSED
,
4897 if (TREE_CODE (*node
) != FUNCTION_TYPE
4898 && TREE_CODE (*node
) != METHOD_TYPE
4899 && TREE_CODE (*node
) != FIELD_DECL
4900 && TREE_CODE (*node
) != TYPE_DECL
)
4902 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
4904 *no_add_attrs
= true;
4908 /* Can combine regparm with all attributes but fastcall, and thiscall. */
4909 if (is_attribute_p ("regparm", name
))
4913 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
4915 error ("fastcall and regparm attributes are not compatible");
4918 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
4920 error ("regparam and thiscall attributes are not compatible");
4923 cst
= TREE_VALUE (args
);
4924 if (TREE_CODE (cst
) != INTEGER_CST
)
4926 warning (OPT_Wattributes
,
4927 "%qE attribute requires an integer constant argument",
4929 *no_add_attrs
= true;
4931 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
4933 warning (OPT_Wattributes
, "argument to %qE attribute larger than %d",
4935 *no_add_attrs
= true;
4943 /* Do not warn when emulating the MS ABI. */
4944 if ((TREE_CODE (*node
) != FUNCTION_TYPE
4945 && TREE_CODE (*node
) != METHOD_TYPE
)
4946 || ix86_function_type_abi (*node
) != MS_ABI
)
4947 warning (OPT_Wattributes
, "%qE attribute ignored",
4949 *no_add_attrs
= true;
4953 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4954 if (is_attribute_p ("fastcall", name
))
4956 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
4958 error ("fastcall and cdecl attributes are not compatible");
4960 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
4962 error ("fastcall and stdcall attributes are not compatible");
4964 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
4966 error ("fastcall and regparm attributes are not compatible");
4968 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
4970 error ("fastcall and thiscall attributes are not compatible");
4974 /* Can combine stdcall with fastcall (redundant), regparm and
4976 else if (is_attribute_p ("stdcall", name
))
4978 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
4980 error ("stdcall and cdecl attributes are not compatible");
4982 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
4984 error ("stdcall and fastcall attributes are not compatible");
4986 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
4988 error ("stdcall and thiscall attributes are not compatible");
4992 /* Can combine cdecl with regparm and sseregparm. */
4993 else if (is_attribute_p ("cdecl", name
))
4995 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
4997 error ("stdcall and cdecl attributes are not compatible");
4999 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5001 error ("fastcall and cdecl attributes are not compatible");
5003 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5005 error ("cdecl and thiscall attributes are not compatible");
5008 else if (is_attribute_p ("thiscall", name
))
5010 if (TREE_CODE (*node
) != METHOD_TYPE
&& pedantic
)
5011 warning (OPT_Wattributes
, "%qE attribute is used for none class-method",
5013 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5015 error ("stdcall and thiscall attributes are not compatible");
5017 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5019 error ("fastcall and thiscall attributes are not compatible");
5021 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5023 error ("cdecl and thiscall attributes are not compatible");
5027 /* Can combine sseregparm with all attributes. */
5032 /* The transactional memory builtins are implicitly regparm or fastcall
5033 depending on the ABI. Override the generic do-nothing attribute that
5034 these builtins were declared with, and replace it with one of the two
5035 attributes that we expect elsewhere. */
5038 ix86_handle_tm_regparm_attribute (tree
*node
, tree name ATTRIBUTE_UNUSED
,
5039 tree args ATTRIBUTE_UNUSED
,
5040 int flags ATTRIBUTE_UNUSED
,
5045 /* In no case do we want to add the placeholder attribute. */
5046 *no_add_attrs
= true;
5048 /* The 64-bit ABI is unchanged for transactional memory. */
5052 /* ??? Is there a better way to validate 32-bit windows? We have
5053 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5054 if (CHECK_STACK_LIMIT
> 0)
5055 alt
= tree_cons (get_identifier ("fastcall"), NULL
, NULL
);
5058 alt
= tree_cons (NULL
, build_int_cst (NULL
, 2), NULL
);
5059 alt
= tree_cons (get_identifier ("regparm"), alt
, NULL
);
5061 decl_attributes (node
, alt
, flags
);
5066 /* This function determines from TYPE the calling-convention. */
5069 ix86_get_callcvt (const_tree type
)
5071 unsigned int ret
= 0;
5076 return IX86_CALLCVT_CDECL
;
5078 attrs
= TYPE_ATTRIBUTES (type
);
5079 if (attrs
!= NULL_TREE
)
5081 if (lookup_attribute ("cdecl", attrs
))
5082 ret
|= IX86_CALLCVT_CDECL
;
5083 else if (lookup_attribute ("stdcall", attrs
))
5084 ret
|= IX86_CALLCVT_STDCALL
;
5085 else if (lookup_attribute ("fastcall", attrs
))
5086 ret
|= IX86_CALLCVT_FASTCALL
;
5087 else if (lookup_attribute ("thiscall", attrs
))
5088 ret
|= IX86_CALLCVT_THISCALL
;
5090 /* Regparam isn't allowed for thiscall and fastcall. */
5091 if ((ret
& (IX86_CALLCVT_THISCALL
| IX86_CALLCVT_FASTCALL
)) == 0)
5093 if (lookup_attribute ("regparm", attrs
))
5094 ret
|= IX86_CALLCVT_REGPARM
;
5095 if (lookup_attribute ("sseregparm", attrs
))
5096 ret
|= IX86_CALLCVT_SSEREGPARM
;
5099 if (IX86_BASE_CALLCVT(ret
) != 0)
5103 is_stdarg
= stdarg_p (type
);
5104 if (TARGET_RTD
&& !is_stdarg
)
5105 return IX86_CALLCVT_STDCALL
| ret
;
5109 || TREE_CODE (type
) != METHOD_TYPE
5110 || ix86_function_type_abi (type
) != MS_ABI
)
5111 return IX86_CALLCVT_CDECL
| ret
;
5113 return IX86_CALLCVT_THISCALL
;
5116 /* Return 0 if the attributes for two types are incompatible, 1 if they
5117 are compatible, and 2 if they are nearly compatible (which causes a
5118 warning to be generated). */
5121 ix86_comp_type_attributes (const_tree type1
, const_tree type2
)
5123 unsigned int ccvt1
, ccvt2
;
5125 if (TREE_CODE (type1
) != FUNCTION_TYPE
5126 && TREE_CODE (type1
) != METHOD_TYPE
)
5129 ccvt1
= ix86_get_callcvt (type1
);
5130 ccvt2
= ix86_get_callcvt (type2
);
5133 if (ix86_function_regparm (type1
, NULL
)
5134 != ix86_function_regparm (type2
, NULL
))
5140 /* Return the regparm value for a function with the indicated TYPE and DECL.
5141 DECL may be NULL when calling function indirectly
5142 or considering a libcall. */
5145 ix86_function_regparm (const_tree type
, const_tree decl
)
5152 return (ix86_function_type_abi (type
) == SYSV_ABI
5153 ? X86_64_REGPARM_MAX
: X86_64_MS_REGPARM_MAX
);
5154 ccvt
= ix86_get_callcvt (type
);
5155 regparm
= ix86_regparm
;
5157 if ((ccvt
& IX86_CALLCVT_REGPARM
) != 0)
5159 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
5162 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
5166 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5168 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5171 /* Use register calling convention for local functions when possible. */
5173 && TREE_CODE (decl
) == FUNCTION_DECL
5175 && !(profile_flag
&& !flag_fentry
))
5177 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5178 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE (decl
));
5179 if (i
&& i
->local
&& i
->can_change_signature
)
5181 int local_regparm
, globals
= 0, regno
;
5183 /* Make sure no regparm register is taken by a
5184 fixed register variable. */
5185 for (local_regparm
= 0; local_regparm
< REGPARM_MAX
; local_regparm
++)
5186 if (fixed_regs
[local_regparm
])
5189 /* We don't want to use regparm(3) for nested functions as
5190 these use a static chain pointer in the third argument. */
5191 if (local_regparm
== 3 && DECL_STATIC_CHAIN (decl
))
5194 /* In 32-bit mode save a register for the split stack. */
5195 if (!TARGET_64BIT
&& local_regparm
== 3 && flag_split_stack
)
5198 /* Each fixed register usage increases register pressure,
5199 so less registers should be used for argument passing.
5200 This functionality can be overriden by an explicit
5202 for (regno
= AX_REG
; regno
<= DI_REG
; regno
++)
5203 if (fixed_regs
[regno
])
5207 = globals
< local_regparm
? local_regparm
- globals
: 0;
5209 if (local_regparm
> regparm
)
5210 regparm
= local_regparm
;
5217 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5218 DFmode (2) arguments in SSE registers for a function with the
5219 indicated TYPE and DECL. DECL may be NULL when calling function
5220 indirectly or considering a libcall. Otherwise return 0. */
5223 ix86_function_sseregparm (const_tree type
, const_tree decl
, bool warn
)
5225 gcc_assert (!TARGET_64BIT
);
5227 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5228 by the sseregparm attribute. */
5229 if (TARGET_SSEREGPARM
5230 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
5237 error ("calling %qD with attribute sseregparm without "
5238 "SSE/SSE2 enabled", decl
);
5240 error ("calling %qT with attribute sseregparm without "
5241 "SSE/SSE2 enabled", type
);
5249 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5250 (and DFmode for SSE2) arguments in SSE registers. */
5251 if (decl
&& TARGET_SSE_MATH
&& optimize
5252 && !(profile_flag
&& !flag_fentry
))
5254 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5255 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
5256 if (i
&& i
->local
&& i
->can_change_signature
)
5257 return TARGET_SSE2
? 2 : 1;
5263 /* Return true if EAX is live at the start of the function. Used by
5264 ix86_expand_prologue to determine if we need special help before
5265 calling allocate_stack_worker. */
5268 ix86_eax_live_at_start_p (void)
5270 /* Cheat. Don't bother working forward from ix86_function_regparm
5271 to the function type to whether an actual argument is located in
5272 eax. Instead just look at cfg info, which is still close enough
5273 to correct at this point. This gives false positives for broken
5274 functions that might use uninitialized data that happens to be
5275 allocated in eax, but who cares? */
5276 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR
), 0);
5280 ix86_keep_aggregate_return_pointer (tree fntype
)
5286 attr
= lookup_attribute ("callee_pop_aggregate_return",
5287 TYPE_ATTRIBUTES (fntype
));
5289 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
))) == 0);
5291 /* For 32-bit MS-ABI the default is to keep aggregate
5293 if (ix86_function_type_abi (fntype
) == MS_ABI
)
5296 return KEEP_AGGREGATE_RETURN_POINTER
!= 0;
5299 /* Value is the number of bytes of arguments automatically
5300 popped when returning from a subroutine call.
5301 FUNDECL is the declaration node of the function (as a tree),
5302 FUNTYPE is the data type of the function (as a tree),
5303 or for a library call it is an identifier node for the subroutine name.
5304 SIZE is the number of bytes of arguments passed on the stack.
5306 On the 80386, the RTD insn may be used to pop them if the number
5307 of args is fixed, but if the number is variable then the caller
5308 must pop them all. RTD can't be used for library calls now
5309 because the library is compiled with the Unix compiler.
5310 Use of RTD is a selectable option, since it is incompatible with
5311 standard Unix calling sequences. If the option is not selected,
5312 the caller must always pop the args.
5314 The attribute stdcall is equivalent to RTD on a per module basis. */
5317 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
5321 /* None of the 64-bit ABIs pop arguments. */
5325 ccvt
= ix86_get_callcvt (funtype
);
5327 if ((ccvt
& (IX86_CALLCVT_STDCALL
| IX86_CALLCVT_FASTCALL
5328 | IX86_CALLCVT_THISCALL
)) != 0
5329 && ! stdarg_p (funtype
))
5332 /* Lose any fake structure return argument if it is passed on the stack. */
5333 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
5334 && !ix86_keep_aggregate_return_pointer (funtype
))
5336 int nregs
= ix86_function_regparm (funtype
, fundecl
);
5338 return GET_MODE_SIZE (Pmode
);
5344 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5347 ix86_legitimate_combined_insn (rtx insn
)
5349 /* Check operand constraints in case hard registers were propagated
5350 into insn pattern. This check prevents combine pass from
5351 generating insn patterns with invalid hard register operands.
5352 These invalid insns can eventually confuse reload to error out
5353 with a spill failure. See also PRs 46829 and 46843. */
5354 if ((INSN_CODE (insn
) = recog (PATTERN (insn
), insn
, 0)) >= 0)
5358 extract_insn (insn
);
5359 preprocess_constraints ();
5361 for (i
= 0; i
< recog_data
.n_operands
; i
++)
5363 rtx op
= recog_data
.operand
[i
];
5364 enum machine_mode mode
= GET_MODE (op
);
5365 struct operand_alternative
*op_alt
;
5370 /* A unary operator may be accepted by the predicate, but it
5371 is irrelevant for matching constraints. */
5375 if (GET_CODE (op
) == SUBREG
)
5377 if (REG_P (SUBREG_REG (op
))
5378 && REGNO (SUBREG_REG (op
)) < FIRST_PSEUDO_REGISTER
)
5379 offset
= subreg_regno_offset (REGNO (SUBREG_REG (op
)),
5380 GET_MODE (SUBREG_REG (op
)),
5383 op
= SUBREG_REG (op
);
5386 if (!(REG_P (op
) && HARD_REGISTER_P (op
)))
5389 op_alt
= recog_op_alt
[i
];
5391 /* Operand has no constraints, anything is OK. */
5392 win
= !recog_data
.n_alternatives
;
5394 for (j
= 0; j
< recog_data
.n_alternatives
; j
++)
5396 if (op_alt
[j
].anything_ok
5397 || (op_alt
[j
].matches
!= -1
5399 (recog_data
.operand
[i
],
5400 recog_data
.operand
[op_alt
[j
].matches
]))
5401 || reg_fits_class_p (op
, op_alt
[j
].cl
, offset
, mode
))
5416 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5418 static unsigned HOST_WIDE_INT
5419 ix86_asan_shadow_offset (void)
5421 return (unsigned HOST_WIDE_INT
) 1 << (TARGET_LP64
? 44 : 29);
5424 /* Argument support functions. */
5426 /* Return true when register may be used to pass function parameters. */
5428 ix86_function_arg_regno_p (int regno
)
5431 const int *parm_regs
;
5436 return (regno
< REGPARM_MAX
5437 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
5439 return (regno
< REGPARM_MAX
5440 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
5441 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
5442 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
5443 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
5448 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
5453 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
5454 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
5458 /* TODO: The function should depend on current function ABI but
5459 builtins.c would need updating then. Therefore we use the
5462 /* RAX is used as hidden argument to va_arg functions. */
5463 if (ix86_abi
== SYSV_ABI
&& regno
== AX_REG
)
5466 if (ix86_abi
== MS_ABI
)
5467 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
5469 parm_regs
= x86_64_int_parameter_registers
;
5470 for (i
= 0; i
< (ix86_abi
== MS_ABI
5471 ? X86_64_MS_REGPARM_MAX
: X86_64_REGPARM_MAX
); i
++)
5472 if (regno
== parm_regs
[i
])
5477 /* Return if we do not know how to pass TYPE solely in registers. */
5480 ix86_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
5482 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
5485 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5486 The layout_type routine is crafty and tries to trick us into passing
5487 currently unsupported vector types on the stack by using TImode. */
5488 return (!TARGET_64BIT
&& mode
== TImode
5489 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
5492 /* It returns the size, in bytes, of the area reserved for arguments passed
5493 in registers for the function represented by fndecl dependent to the used
5496 ix86_reg_parm_stack_space (const_tree fndecl
)
5498 enum calling_abi call_abi
= SYSV_ABI
;
5499 if (fndecl
!= NULL_TREE
&& TREE_CODE (fndecl
) == FUNCTION_DECL
)
5500 call_abi
= ix86_function_abi (fndecl
);
5502 call_abi
= ix86_function_type_abi (fndecl
);
5503 if (TARGET_64BIT
&& call_abi
== MS_ABI
)
5508 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5511 ix86_function_type_abi (const_tree fntype
)
5513 if (fntype
!= NULL_TREE
&& TYPE_ATTRIBUTES (fntype
) != NULL_TREE
)
5515 enum calling_abi abi
= ix86_abi
;
5516 if (abi
== SYSV_ABI
)
5518 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype
)))
5521 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype
)))
5529 ix86_function_ms_hook_prologue (const_tree fn
)
5531 if (fn
&& lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn
)))
5533 if (decl_function_context (fn
) != NULL_TREE
)
5534 error_at (DECL_SOURCE_LOCATION (fn
),
5535 "ms_hook_prologue is not compatible with nested function");
5542 static enum calling_abi
5543 ix86_function_abi (const_tree fndecl
)
5547 return ix86_function_type_abi (TREE_TYPE (fndecl
));
5550 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5553 ix86_cfun_abi (void)
5557 return cfun
->machine
->call_abi
;
5560 /* Write the extra assembler code needed to declare a function properly. */
5563 ix86_asm_output_function_label (FILE *asm_out_file
, const char *fname
,
5566 bool is_ms_hook
= ix86_function_ms_hook_prologue (decl
);
5570 int i
, filler_count
= (TARGET_64BIT
? 32 : 16);
5571 unsigned int filler_cc
= 0xcccccccc;
5573 for (i
= 0; i
< filler_count
; i
+= 4)
5574 fprintf (asm_out_file
, ASM_LONG
" %#x\n", filler_cc
);
5577 #ifdef SUBTARGET_ASM_UNWIND_INIT
5578 SUBTARGET_ASM_UNWIND_INIT (asm_out_file
);
5581 ASM_OUTPUT_LABEL (asm_out_file
, fname
);
5583 /* Output magic byte marker, if hot-patch attribute is set. */
5588 /* leaq [%rsp + 0], %rsp */
5589 asm_fprintf (asm_out_file
, ASM_BYTE
5590 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5594 /* movl.s %edi, %edi
5596 movl.s %esp, %ebp */
5597 asm_fprintf (asm_out_file
, ASM_BYTE
5598 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5604 extern void init_regs (void);
5606 /* Implementation of call abi switching target hook. Specific to FNDECL
5607 the specific call register sets are set. See also
5608 ix86_conditional_register_usage for more details. */
5610 ix86_call_abi_override (const_tree fndecl
)
5612 if (fndecl
== NULL_TREE
)
5613 cfun
->machine
->call_abi
= ix86_abi
;
5615 cfun
->machine
->call_abi
= ix86_function_type_abi (TREE_TYPE (fndecl
));
5618 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5619 expensive re-initialization of init_regs each time we switch function context
5620 since this is needed only during RTL expansion. */
5622 ix86_maybe_switch_abi (void)
5625 call_used_regs
[SI_REG
] == (cfun
->machine
->call_abi
== MS_ABI
))
5629 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5630 for a call to a function whose data type is FNTYPE.
5631 For a library call, FNTYPE is 0. */
5634 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
5635 tree fntype
, /* tree ptr for function decl */
5636 rtx libname
, /* SYMBOL_REF of library name or 0 */
5640 struct cgraph_local_info
*i
;
5642 memset (cum
, 0, sizeof (*cum
));
5646 i
= cgraph_local_info (fndecl
);
5647 cum
->call_abi
= ix86_function_abi (fndecl
);
5652 cum
->call_abi
= ix86_function_type_abi (fntype
);
5655 cum
->caller
= caller
;
5657 /* Set up the number of registers to use for passing arguments. */
5659 if (TARGET_64BIT
&& cum
->call_abi
== MS_ABI
&& !ACCUMULATE_OUTGOING_ARGS
)
5660 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5661 "or subtarget optimization implying it");
5662 cum
->nregs
= ix86_regparm
;
5665 cum
->nregs
= (cum
->call_abi
== SYSV_ABI
5666 ? X86_64_REGPARM_MAX
5667 : X86_64_MS_REGPARM_MAX
);
5671 cum
->sse_nregs
= SSE_REGPARM_MAX
;
5674 cum
->sse_nregs
= (cum
->call_abi
== SYSV_ABI
5675 ? X86_64_SSE_REGPARM_MAX
5676 : X86_64_MS_SSE_REGPARM_MAX
);
5680 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
5681 cum
->warn_avx
= true;
5682 cum
->warn_sse
= true;
5683 cum
->warn_mmx
= true;
5685 /* Because type might mismatch in between caller and callee, we need to
5686 use actual type of function for local calls.
5687 FIXME: cgraph_analyze can be told to actually record if function uses
5688 va_start so for local functions maybe_vaarg can be made aggressive
5690 FIXME: once typesytem is fixed, we won't need this code anymore. */
5691 if (i
&& i
->local
&& i
->can_change_signature
)
5692 fntype
= TREE_TYPE (fndecl
);
5693 cum
->maybe_vaarg
= (fntype
5694 ? (!prototype_p (fntype
) || stdarg_p (fntype
))
5699 /* If there are variable arguments, then we won't pass anything
5700 in registers in 32-bit mode. */
5701 if (stdarg_p (fntype
))
5712 /* Use ecx and edx registers if function has fastcall attribute,
5713 else look for regparm information. */
5716 unsigned int ccvt
= ix86_get_callcvt (fntype
);
5717 if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5720 cum
->fastcall
= 1; /* Same first register as in fastcall. */
5722 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5728 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
5731 /* Set up the number of SSE registers used for passing SFmode
5732 and DFmode arguments. Warn for mismatching ABI. */
5733 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
, true);
5737 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5738 But in the case of vector types, it is some vector mode.
5740 When we have only some of our vector isa extensions enabled, then there
5741 are some modes for which vector_mode_supported_p is false. For these
5742 modes, the generic vector support in gcc will choose some non-vector mode
5743 in order to implement the type. By computing the natural mode, we'll
5744 select the proper ABI location for the operand and not depend on whatever
5745 the middle-end decides to do with these vector types.
5747 The midde-end can't deal with the vector types > 16 bytes. In this
5748 case, we return the original mode and warn ABI change if CUM isn't
5751 static enum machine_mode
5752 type_natural_mode (const_tree type
, const CUMULATIVE_ARGS
*cum
)
5754 enum machine_mode mode
= TYPE_MODE (type
);
5756 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
5758 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5759 if ((size
== 8 || size
== 16 || size
== 32)
5760 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5761 && TYPE_VECTOR_SUBPARTS (type
) > 1)
5763 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
5765 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
5766 mode
= MIN_MODE_VECTOR_FLOAT
;
5768 mode
= MIN_MODE_VECTOR_INT
;
5770 /* Get the mode which has this inner mode and number of units. */
5771 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
5772 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
5773 && GET_MODE_INNER (mode
) == innermode
)
5775 if (size
== 32 && !TARGET_AVX
)
5777 static bool warnedavx
;
5784 warning (0, "AVX vector argument without AVX "
5785 "enabled changes the ABI");
5787 return TYPE_MODE (type
);
5789 else if ((size
== 8 || size
== 16) && !TARGET_SSE
)
5791 static bool warnedsse
;
5798 warning (0, "SSE vector argument without SSE "
5799 "enabled changes the ABI");
5814 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5815 this may not agree with the mode that the type system has chosen for the
5816 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5817 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5820 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
5825 if (orig_mode
!= BLKmode
)
5826 tmp
= gen_rtx_REG (orig_mode
, regno
);
5829 tmp
= gen_rtx_REG (mode
, regno
);
5830 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
5831 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
5837 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5838 of this code is to classify each 8bytes of incoming argument by the register
5839 class and assign registers accordingly. */
5841 /* Return the union class of CLASS1 and CLASS2.
5842 See the x86-64 PS ABI for details. */
5844 static enum x86_64_reg_class
5845 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
5847 /* Rule #1: If both classes are equal, this is the resulting class. */
5848 if (class1
== class2
)
5851 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5853 if (class1
== X86_64_NO_CLASS
)
5855 if (class2
== X86_64_NO_CLASS
)
5858 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5859 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
5860 return X86_64_MEMORY_CLASS
;
5862 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5863 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
5864 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
5865 return X86_64_INTEGERSI_CLASS
;
5866 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
5867 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
5868 return X86_64_INTEGER_CLASS
;
5870 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5872 if (class1
== X86_64_X87_CLASS
5873 || class1
== X86_64_X87UP_CLASS
5874 || class1
== X86_64_COMPLEX_X87_CLASS
5875 || class2
== X86_64_X87_CLASS
5876 || class2
== X86_64_X87UP_CLASS
5877 || class2
== X86_64_COMPLEX_X87_CLASS
)
5878 return X86_64_MEMORY_CLASS
;
5880 /* Rule #6: Otherwise class SSE is used. */
5881 return X86_64_SSE_CLASS
;
5884 /* Classify the argument of type TYPE and mode MODE.
5885 CLASSES will be filled by the register class used to pass each word
5886 of the operand. The number of words is returned. In case the parameter
5887 should be passed in memory, 0 is returned. As a special case for zero
5888 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5890 BIT_OFFSET is used internally for handling records and specifies offset
5891 of the offset in bits modulo 256 to avoid overflow cases.
5893 See the x86-64 PS ABI for details.
5897 classify_argument (enum machine_mode mode
, const_tree type
,
5898 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
5900 HOST_WIDE_INT bytes
=
5901 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
5903 = (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
5905 /* Variable sized entities are always passed/returned in memory. */
5909 if (mode
!= VOIDmode
5910 && targetm
.calls
.must_pass_in_stack (mode
, type
))
5913 if (type
&& AGGREGATE_TYPE_P (type
))
5917 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
5919 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5923 for (i
= 0; i
< words
; i
++)
5924 classes
[i
] = X86_64_NO_CLASS
;
5926 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5927 signalize memory class, so handle it as special case. */
5930 classes
[0] = X86_64_NO_CLASS
;
5934 /* Classify each field of record and merge classes. */
5935 switch (TREE_CODE (type
))
5938 /* And now merge the fields of structure. */
5939 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5941 if (TREE_CODE (field
) == FIELD_DECL
)
5945 if (TREE_TYPE (field
) == error_mark_node
)
5948 /* Bitfields are always classified as integer. Handle them
5949 early, since later code would consider them to be
5950 misaligned integers. */
5951 if (DECL_BIT_FIELD (field
))
5953 for (i
= (int_bit_position (field
)
5954 + (bit_offset
% 64)) / 8 / 8;
5955 i
< ((int_bit_position (field
) + (bit_offset
% 64))
5956 + tree_low_cst (DECL_SIZE (field
), 0)
5959 merge_classes (X86_64_INTEGER_CLASS
,
5966 type
= TREE_TYPE (field
);
5968 /* Flexible array member is ignored. */
5969 if (TYPE_MODE (type
) == BLKmode
5970 && TREE_CODE (type
) == ARRAY_TYPE
5971 && TYPE_SIZE (type
) == NULL_TREE
5972 && TYPE_DOMAIN (type
) != NULL_TREE
5973 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type
))
5978 if (!warned
&& warn_psabi
)
5981 inform (input_location
,
5982 "the ABI of passing struct with"
5983 " a flexible array member has"
5984 " changed in GCC 4.4");
5988 num
= classify_argument (TYPE_MODE (type
), type
,
5990 (int_bit_position (field
)
5991 + bit_offset
) % 256);
5994 pos
= (int_bit_position (field
)
5995 + (bit_offset
% 64)) / 8 / 8;
5996 for (i
= 0; i
< num
&& (i
+ pos
) < words
; i
++)
5998 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
6005 /* Arrays are handled as small records. */
6008 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
6009 TREE_TYPE (type
), subclasses
, bit_offset
);
6013 /* The partial classes are now full classes. */
6014 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
6015 subclasses
[0] = X86_64_SSE_CLASS
;
6016 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
6017 && !((bit_offset
% 64) == 0 && bytes
== 4))
6018 subclasses
[0] = X86_64_INTEGER_CLASS
;
6020 for (i
= 0; i
< words
; i
++)
6021 classes
[i
] = subclasses
[i
% num
];
6026 case QUAL_UNION_TYPE
:
6027 /* Unions are similar to RECORD_TYPE but offset is always 0.
6029 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6031 if (TREE_CODE (field
) == FIELD_DECL
)
6035 if (TREE_TYPE (field
) == error_mark_node
)
6038 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
6039 TREE_TYPE (field
), subclasses
,
6043 for (i
= 0; i
< num
; i
++)
6044 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
6055 /* When size > 16 bytes, if the first one isn't
6056 X86_64_SSE_CLASS or any other ones aren't
6057 X86_64_SSEUP_CLASS, everything should be passed in
6059 if (classes
[0] != X86_64_SSE_CLASS
)
6062 for (i
= 1; i
< words
; i
++)
6063 if (classes
[i
] != X86_64_SSEUP_CLASS
)
6067 /* Final merger cleanup. */
6068 for (i
= 0; i
< words
; i
++)
6070 /* If one class is MEMORY, everything should be passed in
6072 if (classes
[i
] == X86_64_MEMORY_CLASS
)
6075 /* The X86_64_SSEUP_CLASS should be always preceded by
6076 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6077 if (classes
[i
] == X86_64_SSEUP_CLASS
6078 && classes
[i
- 1] != X86_64_SSE_CLASS
6079 && classes
[i
- 1] != X86_64_SSEUP_CLASS
)
6081 /* The first one should never be X86_64_SSEUP_CLASS. */
6082 gcc_assert (i
!= 0);
6083 classes
[i
] = X86_64_SSE_CLASS
;
6086 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6087 everything should be passed in memory. */
6088 if (classes
[i
] == X86_64_X87UP_CLASS
6089 && (classes
[i
- 1] != X86_64_X87_CLASS
))
6093 /* The first one should never be X86_64_X87UP_CLASS. */
6094 gcc_assert (i
!= 0);
6095 if (!warned
&& warn_psabi
)
6098 inform (input_location
,
6099 "the ABI of passing union with long double"
6100 " has changed in GCC 4.4");
6108 /* Compute alignment needed. We align all types to natural boundaries with
6109 exception of XFmode that is aligned to 64bits. */
6110 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
6112 int mode_alignment
= GET_MODE_BITSIZE (mode
);
6115 mode_alignment
= 128;
6116 else if (mode
== XCmode
)
6117 mode_alignment
= 256;
6118 if (COMPLEX_MODE_P (mode
))
6119 mode_alignment
/= 2;
6120 /* Misaligned fields are always returned in memory. */
6121 if (bit_offset
% mode_alignment
)
6125 /* for V1xx modes, just use the base mode */
6126 if (VECTOR_MODE_P (mode
) && mode
!= V1DImode
&& mode
!= V1TImode
6127 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
6128 mode
= GET_MODE_INNER (mode
);
6130 /* Classification of atomic types. */
6135 classes
[0] = X86_64_SSE_CLASS
;
6138 classes
[0] = X86_64_SSE_CLASS
;
6139 classes
[1] = X86_64_SSEUP_CLASS
;
6149 int size
= (bit_offset
% 64)+ (int) GET_MODE_BITSIZE (mode
);
6153 classes
[0] = X86_64_INTEGERSI_CLASS
;
6156 else if (size
<= 64)
6158 classes
[0] = X86_64_INTEGER_CLASS
;
6161 else if (size
<= 64+32)
6163 classes
[0] = X86_64_INTEGER_CLASS
;
6164 classes
[1] = X86_64_INTEGERSI_CLASS
;
6167 else if (size
<= 64+64)
6169 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6177 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6181 /* OImode shouldn't be used directly. */
6186 if (!(bit_offset
% 64))
6187 classes
[0] = X86_64_SSESF_CLASS
;
6189 classes
[0] = X86_64_SSE_CLASS
;
6192 classes
[0] = X86_64_SSEDF_CLASS
;
6195 classes
[0] = X86_64_X87_CLASS
;
6196 classes
[1] = X86_64_X87UP_CLASS
;
6199 classes
[0] = X86_64_SSE_CLASS
;
6200 classes
[1] = X86_64_SSEUP_CLASS
;
6203 classes
[0] = X86_64_SSE_CLASS
;
6204 if (!(bit_offset
% 64))
6210 if (!warned
&& warn_psabi
)
6213 inform (input_location
,
6214 "the ABI of passing structure with complex float"
6215 " member has changed in GCC 4.4");
6217 classes
[1] = X86_64_SSESF_CLASS
;
6221 classes
[0] = X86_64_SSEDF_CLASS
;
6222 classes
[1] = X86_64_SSEDF_CLASS
;
6225 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
6228 /* This modes is larger than 16 bytes. */
6236 classes
[0] = X86_64_SSE_CLASS
;
6237 classes
[1] = X86_64_SSEUP_CLASS
;
6238 classes
[2] = X86_64_SSEUP_CLASS
;
6239 classes
[3] = X86_64_SSEUP_CLASS
;
6247 classes
[0] = X86_64_SSE_CLASS
;
6248 classes
[1] = X86_64_SSEUP_CLASS
;
6256 classes
[0] = X86_64_SSE_CLASS
;
6262 gcc_assert (VECTOR_MODE_P (mode
));
6267 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
6269 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
6270 classes
[0] = X86_64_INTEGERSI_CLASS
;
6272 classes
[0] = X86_64_INTEGER_CLASS
;
6273 classes
[1] = X86_64_INTEGER_CLASS
;
6274 return 1 + (bytes
> 8);
6278 /* Examine the argument and return set number of register required in each
6279 class. Return 0 iff parameter should be passed in memory. */
6281 examine_argument (enum machine_mode mode
, const_tree type
, int in_return
,
6282 int *int_nregs
, int *sse_nregs
)
6284 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6285 int n
= classify_argument (mode
, type
, regclass
, 0);
6291 for (n
--; n
>= 0; n
--)
6292 switch (regclass
[n
])
6294 case X86_64_INTEGER_CLASS
:
6295 case X86_64_INTEGERSI_CLASS
:
6298 case X86_64_SSE_CLASS
:
6299 case X86_64_SSESF_CLASS
:
6300 case X86_64_SSEDF_CLASS
:
6303 case X86_64_NO_CLASS
:
6304 case X86_64_SSEUP_CLASS
:
6306 case X86_64_X87_CLASS
:
6307 case X86_64_X87UP_CLASS
:
6311 case X86_64_COMPLEX_X87_CLASS
:
6312 return in_return
? 2 : 0;
6313 case X86_64_MEMORY_CLASS
:
6319 /* Construct container for the argument used by GCC interface. See
6320 FUNCTION_ARG for the detailed description. */
6323 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
6324 const_tree type
, int in_return
, int nintregs
, int nsseregs
,
6325 const int *intreg
, int sse_regno
)
6327 /* The following variables hold the static issued_error state. */
6328 static bool issued_sse_arg_error
;
6329 static bool issued_sse_ret_error
;
6330 static bool issued_x87_ret_error
;
6332 enum machine_mode tmpmode
;
6334 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
6335 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6339 int needed_sseregs
, needed_intregs
;
6340 rtx exp
[MAX_CLASSES
];
6343 n
= classify_argument (mode
, type
, regclass
, 0);
6346 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
6349 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
6352 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6353 some less clueful developer tries to use floating-point anyway. */
6354 if (needed_sseregs
&& !TARGET_SSE
)
6358 if (!issued_sse_ret_error
)
6360 error ("SSE register return with SSE disabled");
6361 issued_sse_ret_error
= true;
6364 else if (!issued_sse_arg_error
)
6366 error ("SSE register argument with SSE disabled");
6367 issued_sse_arg_error
= true;
6372 /* Likewise, error if the ABI requires us to return values in the
6373 x87 registers and the user specified -mno-80387. */
6374 if (!TARGET_80387
&& in_return
)
6375 for (i
= 0; i
< n
; i
++)
6376 if (regclass
[i
] == X86_64_X87_CLASS
6377 || regclass
[i
] == X86_64_X87UP_CLASS
6378 || regclass
[i
] == X86_64_COMPLEX_X87_CLASS
)
6380 if (!issued_x87_ret_error
)
6382 error ("x87 register return with x87 disabled");
6383 issued_x87_ret_error
= true;
6388 /* First construct simple cases. Avoid SCmode, since we want to use
6389 single register to pass this type. */
6390 if (n
== 1 && mode
!= SCmode
)
6391 switch (regclass
[0])
6393 case X86_64_INTEGER_CLASS
:
6394 case X86_64_INTEGERSI_CLASS
:
6395 return gen_rtx_REG (mode
, intreg
[0]);
6396 case X86_64_SSE_CLASS
:
6397 case X86_64_SSESF_CLASS
:
6398 case X86_64_SSEDF_CLASS
:
6399 if (mode
!= BLKmode
)
6400 return gen_reg_or_parallel (mode
, orig_mode
,
6401 SSE_REGNO (sse_regno
));
6403 case X86_64_X87_CLASS
:
6404 case X86_64_COMPLEX_X87_CLASS
:
6405 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
6406 case X86_64_NO_CLASS
:
6407 /* Zero sized array, struct or class. */
6413 && regclass
[0] == X86_64_SSE_CLASS
6414 && regclass
[1] == X86_64_SSEUP_CLASS
6416 return gen_reg_or_parallel (mode
, orig_mode
,
6417 SSE_REGNO (sse_regno
));
6419 && regclass
[0] == X86_64_SSE_CLASS
6420 && regclass
[1] == X86_64_SSEUP_CLASS
6421 && regclass
[2] == X86_64_SSEUP_CLASS
6422 && regclass
[3] == X86_64_SSEUP_CLASS
6424 return gen_reg_or_parallel (mode
, orig_mode
,
6425 SSE_REGNO (sse_regno
));
6427 && regclass
[0] == X86_64_X87_CLASS
6428 && regclass
[1] == X86_64_X87UP_CLASS
)
6429 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
6432 && regclass
[0] == X86_64_INTEGER_CLASS
6433 && regclass
[1] == X86_64_INTEGER_CLASS
6434 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
6435 && intreg
[0] + 1 == intreg
[1])
6436 return gen_rtx_REG (mode
, intreg
[0]);
6438 /* Otherwise figure out the entries of the PARALLEL. */
6439 for (i
= 0; i
< n
; i
++)
6443 switch (regclass
[i
])
6445 case X86_64_NO_CLASS
:
6447 case X86_64_INTEGER_CLASS
:
6448 case X86_64_INTEGERSI_CLASS
:
6449 /* Merge TImodes on aligned occasions here too. */
6450 if (i
* 8 + 8 > bytes
)
6452 = mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
6453 else if (regclass
[i
] == X86_64_INTEGERSI_CLASS
)
6457 /* We've requested 24 bytes we
6458 don't have mode for. Use DImode. */
6459 if (tmpmode
== BLKmode
)
6462 = gen_rtx_EXPR_LIST (VOIDmode
,
6463 gen_rtx_REG (tmpmode
, *intreg
),
6467 case X86_64_SSESF_CLASS
:
6469 = gen_rtx_EXPR_LIST (VOIDmode
,
6470 gen_rtx_REG (SFmode
,
6471 SSE_REGNO (sse_regno
)),
6475 case X86_64_SSEDF_CLASS
:
6477 = gen_rtx_EXPR_LIST (VOIDmode
,
6478 gen_rtx_REG (DFmode
,
6479 SSE_REGNO (sse_regno
)),
6483 case X86_64_SSE_CLASS
:
6491 if (i
== 0 && regclass
[1] == X86_64_SSEUP_CLASS
)
6501 && regclass
[1] == X86_64_SSEUP_CLASS
6502 && regclass
[2] == X86_64_SSEUP_CLASS
6503 && regclass
[3] == X86_64_SSEUP_CLASS
);
6511 = gen_rtx_EXPR_LIST (VOIDmode
,
6512 gen_rtx_REG (tmpmode
,
6513 SSE_REGNO (sse_regno
)),
6522 /* Empty aligned struct, union or class. */
6526 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
6527 for (i
= 0; i
< nexps
; i
++)
6528 XVECEXP (ret
, 0, i
) = exp
[i
];
6532 /* Update the data in CUM to advance over an argument of mode MODE
6533 and data type TYPE. (TYPE is null for libcalls where that information
6534 may not be available.) */
6537 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6538 const_tree type
, HOST_WIDE_INT bytes
,
6539 HOST_WIDE_INT words
)
6555 cum
->words
+= words
;
6556 cum
->nregs
-= words
;
6557 cum
->regno
+= words
;
6559 if (cum
->nregs
<= 0)
6567 /* OImode shouldn't be used directly. */
6571 if (cum
->float_in_sse
< 2)
6574 if (cum
->float_in_sse
< 1)
6591 if (!type
|| !AGGREGATE_TYPE_P (type
))
6593 cum
->sse_words
+= words
;
6594 cum
->sse_nregs
-= 1;
6595 cum
->sse_regno
+= 1;
6596 if (cum
->sse_nregs
<= 0)
6610 if (!type
|| !AGGREGATE_TYPE_P (type
))
6612 cum
->mmx_words
+= words
;
6613 cum
->mmx_nregs
-= 1;
6614 cum
->mmx_regno
+= 1;
6615 if (cum
->mmx_nregs
<= 0)
6626 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6627 const_tree type
, HOST_WIDE_INT words
, bool named
)
6629 int int_nregs
, sse_nregs
;
6631 /* Unnamed 256bit vector mode parameters are passed on stack. */
6632 if (!named
&& VALID_AVX256_REG_MODE (mode
))
6635 if (examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
)
6636 && sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
6638 cum
->nregs
-= int_nregs
;
6639 cum
->sse_nregs
-= sse_nregs
;
6640 cum
->regno
+= int_nregs
;
6641 cum
->sse_regno
+= sse_nregs
;
6645 int align
= ix86_function_arg_boundary (mode
, type
) / BITS_PER_WORD
;
6646 cum
->words
= (cum
->words
+ align
- 1) & ~(align
- 1);
6647 cum
->words
+= words
;
6652 function_arg_advance_ms_64 (CUMULATIVE_ARGS
*cum
, HOST_WIDE_INT bytes
,
6653 HOST_WIDE_INT words
)
6655 /* Otherwise, this should be passed indirect. */
6656 gcc_assert (bytes
== 1 || bytes
== 2 || bytes
== 4 || bytes
== 8);
6658 cum
->words
+= words
;
6666 /* Update the data in CUM to advance over an argument of mode MODE and
6667 data type TYPE. (TYPE is null for libcalls where that information
6668 may not be available.) */
6671 ix86_function_arg_advance (cumulative_args_t cum_v
, enum machine_mode mode
,
6672 const_tree type
, bool named
)
6674 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6675 HOST_WIDE_INT bytes
, words
;
6677 if (mode
== BLKmode
)
6678 bytes
= int_size_in_bytes (type
);
6680 bytes
= GET_MODE_SIZE (mode
);
6681 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6684 mode
= type_natural_mode (type
, NULL
);
6686 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6687 function_arg_advance_ms_64 (cum
, bytes
, words
);
6688 else if (TARGET_64BIT
)
6689 function_arg_advance_64 (cum
, mode
, type
, words
, named
);
6691 function_arg_advance_32 (cum
, mode
, type
, bytes
, words
);
6694 /* Define where to put the arguments to a function.
6695 Value is zero to push the argument on the stack,
6696 or a hard register in which to store the argument.
6698 MODE is the argument's machine mode.
6699 TYPE is the data type of the argument (as a tree).
6700 This is null for libcalls where that information may
6702 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6703 the preceding args and about the function being called.
6704 NAMED is nonzero if this argument is a named parameter
6705 (otherwise it is an extra parameter matching an ellipsis). */
6708 function_arg_32 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6709 enum machine_mode orig_mode
, const_tree type
,
6710 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
6712 static bool warnedsse
, warnedmmx
;
6714 /* Avoid the AL settings for the Unix64 ABI. */
6715 if (mode
== VOIDmode
)
6731 if (words
<= cum
->nregs
)
6733 int regno
= cum
->regno
;
6735 /* Fastcall allocates the first two DWORD (SImode) or
6736 smaller arguments to ECX and EDX if it isn't an
6742 || (type
&& AGGREGATE_TYPE_P (type
)))
6745 /* ECX not EAX is the first allocated register. */
6746 if (regno
== AX_REG
)
6749 return gen_rtx_REG (mode
, regno
);
6754 if (cum
->float_in_sse
< 2)
6757 if (cum
->float_in_sse
< 1)
6761 /* In 32bit, we pass TImode in xmm registers. */
6768 if (!type
|| !AGGREGATE_TYPE_P (type
))
6770 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
6773 warning (0, "SSE vector argument without SSE enabled "
6777 return gen_reg_or_parallel (mode
, orig_mode
,
6778 cum
->sse_regno
+ FIRST_SSE_REG
);
6783 /* OImode shouldn't be used directly. */
6792 if (!type
|| !AGGREGATE_TYPE_P (type
))
6795 return gen_reg_or_parallel (mode
, orig_mode
,
6796 cum
->sse_regno
+ FIRST_SSE_REG
);
6806 if (!type
|| !AGGREGATE_TYPE_P (type
))
6808 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
6811 warning (0, "MMX vector argument without MMX enabled "
6815 return gen_reg_or_parallel (mode
, orig_mode
,
6816 cum
->mmx_regno
+ FIRST_MMX_REG
);
6825 function_arg_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6826 enum machine_mode orig_mode
, const_tree type
, bool named
)
6828 /* Handle a hidden AL argument containing number of registers
6829 for varargs x86-64 functions. */
6830 if (mode
== VOIDmode
)
6831 return GEN_INT (cum
->maybe_vaarg
6832 ? (cum
->sse_nregs
< 0
6833 ? X86_64_SSE_REGPARM_MAX
6848 /* Unnamed 256bit vector mode parameters are passed on stack. */
6854 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
6856 &x86_64_int_parameter_registers
[cum
->regno
],
6861 function_arg_ms_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6862 enum machine_mode orig_mode
, bool named
,
6863 HOST_WIDE_INT bytes
)
6867 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6868 We use value of -2 to specify that current function call is MSABI. */
6869 if (mode
== VOIDmode
)
6870 return GEN_INT (-2);
6872 /* If we've run out of registers, it goes on the stack. */
6873 if (cum
->nregs
== 0)
6876 regno
= x86_64_ms_abi_int_parameter_registers
[cum
->regno
];
6878 /* Only floating point modes are passed in anything but integer regs. */
6879 if (TARGET_SSE
&& (mode
== SFmode
|| mode
== DFmode
))
6882 regno
= cum
->regno
+ FIRST_SSE_REG
;
6887 /* Unnamed floating parameters are passed in both the
6888 SSE and integer registers. */
6889 t1
= gen_rtx_REG (mode
, cum
->regno
+ FIRST_SSE_REG
);
6890 t2
= gen_rtx_REG (mode
, regno
);
6891 t1
= gen_rtx_EXPR_LIST (VOIDmode
, t1
, const0_rtx
);
6892 t2
= gen_rtx_EXPR_LIST (VOIDmode
, t2
, const0_rtx
);
6893 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, t1
, t2
));
6896 /* Handle aggregated types passed in register. */
6897 if (orig_mode
== BLKmode
)
6899 if (bytes
> 0 && bytes
<= 8)
6900 mode
= (bytes
> 4 ? DImode
: SImode
);
6901 if (mode
== BLKmode
)
6905 return gen_reg_or_parallel (mode
, orig_mode
, regno
);
6908 /* Return where to put the arguments to a function.
6909 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6911 MODE is the argument's machine mode. TYPE is the data type of the
6912 argument. It is null for libcalls where that information may not be
6913 available. CUM gives information about the preceding args and about
6914 the function being called. NAMED is nonzero if this argument is a
6915 named parameter (otherwise it is an extra parameter matching an
6919 ix86_function_arg (cumulative_args_t cum_v
, enum machine_mode omode
,
6920 const_tree type
, bool named
)
6922 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6923 enum machine_mode mode
= omode
;
6924 HOST_WIDE_INT bytes
, words
;
6927 if (mode
== BLKmode
)
6928 bytes
= int_size_in_bytes (type
);
6930 bytes
= GET_MODE_SIZE (mode
);
6931 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6933 /* To simplify the code below, represent vector types with a vector mode
6934 even if MMX/SSE are not active. */
6935 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
6936 mode
= type_natural_mode (type
, cum
);
6938 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6939 arg
= function_arg_ms_64 (cum
, mode
, omode
, named
, bytes
);
6940 else if (TARGET_64BIT
)
6941 arg
= function_arg_64 (cum
, mode
, omode
, type
, named
);
6943 arg
= function_arg_32 (cum
, mode
, omode
, type
, bytes
, words
);
6948 /* A C expression that indicates when an argument must be passed by
6949 reference. If nonzero for an argument, a copy of that argument is
6950 made in memory and a pointer to the argument is passed instead of
6951 the argument itself. The pointer is passed in whatever way is
6952 appropriate for passing a pointer to that type. */
6955 ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED
,
6956 enum machine_mode mode ATTRIBUTE_UNUSED
,
6957 const_tree type
, bool named ATTRIBUTE_UNUSED
)
6959 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6961 /* See Windows x64 Software Convention. */
6962 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6964 int msize
= (int) GET_MODE_SIZE (mode
);
6967 /* Arrays are passed by reference. */
6968 if (TREE_CODE (type
) == ARRAY_TYPE
)
6971 if (AGGREGATE_TYPE_P (type
))
6973 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6974 are passed by reference. */
6975 msize
= int_size_in_bytes (type
);
6979 /* __m128 is passed by reference. */
6981 case 1: case 2: case 4: case 8:
6987 else if (TARGET_64BIT
&& type
&& int_size_in_bytes (type
) == -1)
6993 /* Return true when TYPE should be 128bit aligned for 32bit argument
6994 passing ABI. XXX: This function is obsolete and is only used for
6995 checking psABI compatibility with previous versions of GCC. */
6998 ix86_compat_aligned_value_p (const_tree type
)
7000 enum machine_mode mode
= TYPE_MODE (type
);
7001 if (((TARGET_SSE
&& SSE_REG_MODE_P (mode
))
7005 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
7007 if (TYPE_ALIGN (type
) < 128)
7010 if (AGGREGATE_TYPE_P (type
))
7012 /* Walk the aggregates recursively. */
7013 switch (TREE_CODE (type
))
7017 case QUAL_UNION_TYPE
:
7021 /* Walk all the structure fields. */
7022 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
7024 if (TREE_CODE (field
) == FIELD_DECL
7025 && ix86_compat_aligned_value_p (TREE_TYPE (field
)))
7032 /* Just for use if some languages passes arrays by value. */
7033 if (ix86_compat_aligned_value_p (TREE_TYPE (type
)))
7044 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7045 XXX: This function is obsolete and is only used for checking psABI
7046 compatibility with previous versions of GCC. */
7049 ix86_compat_function_arg_boundary (enum machine_mode mode
,
7050 const_tree type
, unsigned int align
)
7052 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7053 natural boundaries. */
7054 if (!TARGET_64BIT
&& mode
!= TDmode
&& mode
!= TFmode
)
7056 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7057 make an exception for SSE modes since these require 128bit
7060 The handling here differs from field_alignment. ICC aligns MMX
7061 arguments to 4 byte boundaries, while structure fields are aligned
7062 to 8 byte boundaries. */
7065 if (!(TARGET_SSE
&& SSE_REG_MODE_P (mode
)))
7066 align
= PARM_BOUNDARY
;
7070 if (!ix86_compat_aligned_value_p (type
))
7071 align
= PARM_BOUNDARY
;
7074 if (align
> BIGGEST_ALIGNMENT
)
7075 align
= BIGGEST_ALIGNMENT
;
7079 /* Return true when TYPE should be 128bit aligned for 32bit argument
7083 ix86_contains_aligned_value_p (const_tree type
)
7085 enum machine_mode mode
= TYPE_MODE (type
);
7087 if (mode
== XFmode
|| mode
== XCmode
)
7090 if (TYPE_ALIGN (type
) < 128)
7093 if (AGGREGATE_TYPE_P (type
))
7095 /* Walk the aggregates recursively. */
7096 switch (TREE_CODE (type
))
7100 case QUAL_UNION_TYPE
:
7104 /* Walk all the structure fields. */
7105 for (field
= TYPE_FIELDS (type
);
7107 field
= DECL_CHAIN (field
))
7109 if (TREE_CODE (field
) == FIELD_DECL
7110 && ix86_contains_aligned_value_p (TREE_TYPE (field
)))
7117 /* Just for use if some languages passes arrays by value. */
7118 if (ix86_contains_aligned_value_p (TREE_TYPE (type
)))
7127 return TYPE_ALIGN (type
) >= 128;
7132 /* Gives the alignment boundary, in bits, of an argument with the
7133 specified mode and type. */
7136 ix86_function_arg_boundary (enum machine_mode mode
, const_tree type
)
7141 /* Since the main variant type is used for call, we convert it to
7142 the main variant type. */
7143 type
= TYPE_MAIN_VARIANT (type
);
7144 align
= TYPE_ALIGN (type
);
7147 align
= GET_MODE_ALIGNMENT (mode
);
7148 if (align
< PARM_BOUNDARY
)
7149 align
= PARM_BOUNDARY
;
7153 unsigned int saved_align
= align
;
7157 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7160 if (mode
== XFmode
|| mode
== XCmode
)
7161 align
= PARM_BOUNDARY
;
7163 else if (!ix86_contains_aligned_value_p (type
))
7164 align
= PARM_BOUNDARY
;
7167 align
= PARM_BOUNDARY
;
7172 && align
!= ix86_compat_function_arg_boundary (mode
, type
,
7176 inform (input_location
,
7177 "The ABI for passing parameters with %d-byte"
7178 " alignment has changed in GCC 4.6",
7179 align
/ BITS_PER_UNIT
);
7186 /* Return true if N is a possible register number of function value. */
7189 ix86_function_value_regno_p (const unsigned int regno
)
7196 case FIRST_FLOAT_REG
:
7197 /* TODO: The function should depend on current function ABI but
7198 builtins.c would need updating then. Therefore we use the
7200 if (TARGET_64BIT
&& ix86_abi
== MS_ABI
)
7202 return TARGET_FLOAT_RETURNS_IN_80387
;
7208 if (TARGET_MACHO
|| TARGET_64BIT
)
7216 /* Define how to find the value returned by a function.
7217 VALTYPE is the data type of the value (as a tree).
7218 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7219 otherwise, FUNC is 0. */
7222 function_value_32 (enum machine_mode orig_mode
, enum machine_mode mode
,
7223 const_tree fntype
, const_tree fn
)
7227 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7228 we normally prevent this case when mmx is not available. However
7229 some ABIs may require the result to be returned like DImode. */
7230 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7231 regno
= FIRST_MMX_REG
;
7233 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7234 we prevent this case when sse is not available. However some ABIs
7235 may require the result to be returned like integer TImode. */
7236 else if (mode
== TImode
7237 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7238 regno
= FIRST_SSE_REG
;
7240 /* 32-byte vector modes in %ymm0. */
7241 else if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 32)
7242 regno
= FIRST_SSE_REG
;
7244 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7245 else if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
)
7246 regno
= FIRST_FLOAT_REG
;
7248 /* Most things go in %eax. */
7251 /* Override FP return register with %xmm0 for local functions when
7252 SSE math is enabled or for functions with sseregparm attribute. */
7253 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
7255 int sse_level
= ix86_function_sseregparm (fntype
, fn
, false);
7256 if ((sse_level
>= 1 && mode
== SFmode
)
7257 || (sse_level
== 2 && mode
== DFmode
))
7258 regno
= FIRST_SSE_REG
;
7261 /* OImode shouldn't be used directly. */
7262 gcc_assert (mode
!= OImode
);
7264 return gen_rtx_REG (orig_mode
, regno
);
7268 function_value_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
7273 /* Handle libcalls, which don't provide a type node. */
7274 if (valtype
== NULL
)
7288 regno
= FIRST_SSE_REG
;
7292 regno
= FIRST_FLOAT_REG
;
7300 return gen_rtx_REG (mode
, regno
);
7302 else if (POINTER_TYPE_P (valtype
))
7304 /* Pointers are always returned in word_mode. */
7308 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
7309 X86_64_REGPARM_MAX
, X86_64_SSE_REGPARM_MAX
,
7310 x86_64_int_return_registers
, 0);
7312 /* For zero sized structures, construct_container returns NULL, but we
7313 need to keep rest of compiler happy by returning meaningful value. */
7315 ret
= gen_rtx_REG (orig_mode
, AX_REG
);
7321 function_value_ms_64 (enum machine_mode orig_mode
, enum machine_mode mode
)
7323 unsigned int regno
= AX_REG
;
7327 switch (GET_MODE_SIZE (mode
))
7330 if((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7331 && !COMPLEX_MODE_P (mode
))
7332 regno
= FIRST_SSE_REG
;
7336 if (mode
== SFmode
|| mode
== DFmode
)
7337 regno
= FIRST_SSE_REG
;
7343 return gen_rtx_REG (orig_mode
, regno
);
7347 ix86_function_value_1 (const_tree valtype
, const_tree fntype_or_decl
,
7348 enum machine_mode orig_mode
, enum machine_mode mode
)
7350 const_tree fn
, fntype
;
7353 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
7354 fn
= fntype_or_decl
;
7355 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
7357 if (TARGET_64BIT
&& ix86_function_type_abi (fntype
) == MS_ABI
)
7358 return function_value_ms_64 (orig_mode
, mode
);
7359 else if (TARGET_64BIT
)
7360 return function_value_64 (orig_mode
, mode
, valtype
);
7362 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
7366 ix86_function_value (const_tree valtype
, const_tree fntype_or_decl
,
7367 bool outgoing ATTRIBUTE_UNUSED
)
7369 enum machine_mode mode
, orig_mode
;
7371 orig_mode
= TYPE_MODE (valtype
);
7372 mode
= type_natural_mode (valtype
, NULL
);
7373 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
7376 /* Pointer function arguments and return values are promoted to
7379 static enum machine_mode
7380 ix86_promote_function_mode (const_tree type
, enum machine_mode mode
,
7381 int *punsignedp
, const_tree fntype
,
7384 if (type
!= NULL_TREE
&& POINTER_TYPE_P (type
))
7386 *punsignedp
= POINTERS_EXTEND_UNSIGNED
;
7389 return default_promote_function_mode (type
, mode
, punsignedp
, fntype
,
7393 /* Return true if a structure, union or array with MODE containing FIELD
7394 should be accessed using BLKmode. */
7397 ix86_member_type_forces_blk (const_tree field
, enum machine_mode mode
)
7399 /* Union with XFmode must be in BLKmode. */
7400 return (mode
== XFmode
7401 && (TREE_CODE (DECL_FIELD_CONTEXT (field
)) == UNION_TYPE
7402 || TREE_CODE (DECL_FIELD_CONTEXT (field
)) == QUAL_UNION_TYPE
));
7406 ix86_libcall_value (enum machine_mode mode
)
7408 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
7411 /* Return true iff type is returned in memory. */
7413 static bool ATTRIBUTE_UNUSED
7414 return_in_memory_32 (const_tree type
, enum machine_mode mode
)
7418 if (mode
== BLKmode
)
7421 size
= int_size_in_bytes (type
);
7423 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
7426 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
7428 /* User-created vectors small enough to fit in EAX. */
7432 /* MMX/3dNow values are returned in MM0,
7433 except when it doesn't exits or the ABI prescribes otherwise. */
7435 return !TARGET_MMX
|| TARGET_VECT8_RETURNS
;
7437 /* SSE values are returned in XMM0, except when it doesn't exist. */
7441 /* AVX values are returned in YMM0, except when it doesn't exist. */
7452 /* OImode shouldn't be used directly. */
7453 gcc_assert (mode
!= OImode
);
7458 static bool ATTRIBUTE_UNUSED
7459 return_in_memory_64 (const_tree type
, enum machine_mode mode
)
7461 int needed_intregs
, needed_sseregs
;
7462 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
7465 static bool ATTRIBUTE_UNUSED
7466 return_in_memory_ms_64 (const_tree type
, enum machine_mode mode
)
7468 HOST_WIDE_INT size
= int_size_in_bytes (type
);
7470 /* __m128 is returned in xmm0. */
7471 if ((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7472 && !COMPLEX_MODE_P (mode
) && (GET_MODE_SIZE (mode
) == 16 || size
== 16))
7475 /* Otherwise, the size must be exactly in [1248]. */
7476 return size
!= 1 && size
!= 2 && size
!= 4 && size
!= 8;
7480 ix86_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
7482 #ifdef SUBTARGET_RETURN_IN_MEMORY
7483 return SUBTARGET_RETURN_IN_MEMORY (type
, fntype
);
7485 const enum machine_mode mode
= type_natural_mode (type
, NULL
);
7489 if (ix86_function_type_abi (fntype
) == MS_ABI
)
7490 return return_in_memory_ms_64 (type
, mode
);
7492 return return_in_memory_64 (type
, mode
);
7495 return return_in_memory_32 (type
, mode
);
7499 /* When returning SSE vector types, we have a choice of either
7500 (1) being abi incompatible with a -march switch, or
7501 (2) generating an error.
7502 Given no good solution, I think the safest thing is one warning.
7503 The user won't be able to use -Werror, but....
7505 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7506 called in response to actually generating a caller or callee that
7507 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7508 via aggregate_value_p for general type probing from tree-ssa. */
7511 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
7513 static bool warnedsse
, warnedmmx
;
7515 if (!TARGET_64BIT
&& type
)
7517 /* Look at the return type of the function, not the function type. */
7518 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
7520 if (!TARGET_SSE
&& !warnedsse
)
7523 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7526 warning (0, "SSE vector return without SSE enabled "
7531 if (!TARGET_MMX
&& !warnedmmx
)
7533 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7536 warning (0, "MMX vector return without MMX enabled "
7546 /* Create the va_list data type. */
7548 /* Returns the calling convention specific va_list date type.
7549 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7552 ix86_build_builtin_va_list_abi (enum calling_abi abi
)
7554 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
7556 /* For i386 we use plain pointer to argument area. */
7557 if (!TARGET_64BIT
|| abi
== MS_ABI
)
7558 return build_pointer_type (char_type_node
);
7560 record
= lang_hooks
.types
.make_type (RECORD_TYPE
);
7561 type_decl
= build_decl (BUILTINS_LOCATION
,
7562 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
7564 f_gpr
= build_decl (BUILTINS_LOCATION
,
7565 FIELD_DECL
, get_identifier ("gp_offset"),
7566 unsigned_type_node
);
7567 f_fpr
= build_decl (BUILTINS_LOCATION
,
7568 FIELD_DECL
, get_identifier ("fp_offset"),
7569 unsigned_type_node
);
7570 f_ovf
= build_decl (BUILTINS_LOCATION
,
7571 FIELD_DECL
, get_identifier ("overflow_arg_area"),
7573 f_sav
= build_decl (BUILTINS_LOCATION
,
7574 FIELD_DECL
, get_identifier ("reg_save_area"),
7577 va_list_gpr_counter_field
= f_gpr
;
7578 va_list_fpr_counter_field
= f_fpr
;
7580 DECL_FIELD_CONTEXT (f_gpr
) = record
;
7581 DECL_FIELD_CONTEXT (f_fpr
) = record
;
7582 DECL_FIELD_CONTEXT (f_ovf
) = record
;
7583 DECL_FIELD_CONTEXT (f_sav
) = record
;
7585 TYPE_STUB_DECL (record
) = type_decl
;
7586 TYPE_NAME (record
) = type_decl
;
7587 TYPE_FIELDS (record
) = f_gpr
;
7588 DECL_CHAIN (f_gpr
) = f_fpr
;
7589 DECL_CHAIN (f_fpr
) = f_ovf
;
7590 DECL_CHAIN (f_ovf
) = f_sav
;
7592 layout_type (record
);
7594 /* The correct type is an array type of one element. */
7595 return build_array_type (record
, build_index_type (size_zero_node
));
7598 /* Setup the builtin va_list data type and for 64-bit the additional
7599 calling convention specific va_list data types. */
7602 ix86_build_builtin_va_list (void)
7604 tree ret
= ix86_build_builtin_va_list_abi (ix86_abi
);
7606 /* Initialize abi specific va_list builtin types. */
7610 if (ix86_abi
== MS_ABI
)
7612 t
= ix86_build_builtin_va_list_abi (SYSV_ABI
);
7613 if (TREE_CODE (t
) != RECORD_TYPE
)
7614 t
= build_variant_type_copy (t
);
7615 sysv_va_list_type_node
= t
;
7620 if (TREE_CODE (t
) != RECORD_TYPE
)
7621 t
= build_variant_type_copy (t
);
7622 sysv_va_list_type_node
= t
;
7624 if (ix86_abi
!= MS_ABI
)
7626 t
= ix86_build_builtin_va_list_abi (MS_ABI
);
7627 if (TREE_CODE (t
) != RECORD_TYPE
)
7628 t
= build_variant_type_copy (t
);
7629 ms_va_list_type_node
= t
;
7634 if (TREE_CODE (t
) != RECORD_TYPE
)
7635 t
= build_variant_type_copy (t
);
7636 ms_va_list_type_node
= t
;
7643 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7646 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
7652 /* GPR size of varargs save area. */
7653 if (cfun
->va_list_gpr_size
)
7654 ix86_varargs_gpr_size
= X86_64_REGPARM_MAX
* UNITS_PER_WORD
;
7656 ix86_varargs_gpr_size
= 0;
7658 /* FPR size of varargs save area. We don't need it if we don't pass
7659 anything in SSE registers. */
7660 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
7661 ix86_varargs_fpr_size
= X86_64_SSE_REGPARM_MAX
* 16;
7663 ix86_varargs_fpr_size
= 0;
7665 if (! ix86_varargs_gpr_size
&& ! ix86_varargs_fpr_size
)
7668 save_area
= frame_pointer_rtx
;
7669 set
= get_varargs_alias_set ();
7671 max
= cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
7672 if (max
> X86_64_REGPARM_MAX
)
7673 max
= X86_64_REGPARM_MAX
;
7675 for (i
= cum
->regno
; i
< max
; i
++)
7677 mem
= gen_rtx_MEM (word_mode
,
7678 plus_constant (Pmode
, save_area
, i
* UNITS_PER_WORD
));
7679 MEM_NOTRAP_P (mem
) = 1;
7680 set_mem_alias_set (mem
, set
);
7681 emit_move_insn (mem
,
7682 gen_rtx_REG (word_mode
,
7683 x86_64_int_parameter_registers
[i
]));
7686 if (ix86_varargs_fpr_size
)
7688 enum machine_mode smode
;
7691 /* Now emit code to save SSE registers. The AX parameter contains number
7692 of SSE parameter registers used to call this function, though all we
7693 actually check here is the zero/non-zero status. */
7695 label
= gen_label_rtx ();
7696 test
= gen_rtx_EQ (VOIDmode
, gen_rtx_REG (QImode
, AX_REG
), const0_rtx
);
7697 emit_jump_insn (gen_cbranchqi4 (test
, XEXP (test
, 0), XEXP (test
, 1),
7700 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7701 we used movdqa (i.e. TImode) instead? Perhaps even better would
7702 be if we could determine the real mode of the data, via a hook
7703 into pass_stdarg. Ignore all that for now. */
7705 if (crtl
->stack_alignment_needed
< GET_MODE_ALIGNMENT (smode
))
7706 crtl
->stack_alignment_needed
= GET_MODE_ALIGNMENT (smode
);
7708 max
= cum
->sse_regno
+ cfun
->va_list_fpr_size
/ 16;
7709 if (max
> X86_64_SSE_REGPARM_MAX
)
7710 max
= X86_64_SSE_REGPARM_MAX
;
7712 for (i
= cum
->sse_regno
; i
< max
; ++i
)
7714 mem
= plus_constant (Pmode
, save_area
,
7715 i
* 16 + ix86_varargs_gpr_size
);
7716 mem
= gen_rtx_MEM (smode
, mem
);
7717 MEM_NOTRAP_P (mem
) = 1;
7718 set_mem_alias_set (mem
, set
);
7719 set_mem_align (mem
, GET_MODE_ALIGNMENT (smode
));
7721 emit_move_insn (mem
, gen_rtx_REG (smode
, SSE_REGNO (i
)));
7729 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS
*cum
)
7731 alias_set_type set
= get_varargs_alias_set ();
7734 /* Reset to zero, as there might be a sysv vaarg used
7736 ix86_varargs_gpr_size
= 0;
7737 ix86_varargs_fpr_size
= 0;
7739 for (i
= cum
->regno
; i
< X86_64_MS_REGPARM_MAX
; i
++)
7743 mem
= gen_rtx_MEM (Pmode
,
7744 plus_constant (Pmode
, virtual_incoming_args_rtx
,
7745 i
* UNITS_PER_WORD
));
7746 MEM_NOTRAP_P (mem
) = 1;
7747 set_mem_alias_set (mem
, set
);
7749 reg
= gen_rtx_REG (Pmode
, x86_64_ms_abi_int_parameter_registers
[i
]);
7750 emit_move_insn (mem
, reg
);
7755 ix86_setup_incoming_varargs (cumulative_args_t cum_v
, enum machine_mode mode
,
7756 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
7759 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7760 CUMULATIVE_ARGS next_cum
;
7763 /* This argument doesn't appear to be used anymore. Which is good,
7764 because the old code here didn't suppress rtl generation. */
7765 gcc_assert (!no_rtl
);
7770 fntype
= TREE_TYPE (current_function_decl
);
7772 /* For varargs, we do not want to skip the dummy va_dcl argument.
7773 For stdargs, we do want to skip the last named argument. */
7775 if (stdarg_p (fntype
))
7776 ix86_function_arg_advance (pack_cumulative_args (&next_cum
), mode
, type
,
7779 if (cum
->call_abi
== MS_ABI
)
7780 setup_incoming_varargs_ms_64 (&next_cum
);
7782 setup_incoming_varargs_64 (&next_cum
);
7785 /* Checks if TYPE is of kind va_list char *. */
7788 is_va_list_char_pointer (tree type
)
7792 /* For 32-bit it is always true. */
7795 canonic
= ix86_canonical_va_list_type (type
);
7796 return (canonic
== ms_va_list_type_node
7797 || (ix86_abi
== MS_ABI
&& canonic
== va_list_type_node
));
7800 /* Implement va_start. */
7803 ix86_va_start (tree valist
, rtx nextarg
)
7805 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
7806 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
7807 tree gpr
, fpr
, ovf
, sav
, t
;
7811 if (flag_split_stack
7812 && cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7814 unsigned int scratch_regno
;
7816 /* When we are splitting the stack, we can't refer to the stack
7817 arguments using internal_arg_pointer, because they may be on
7818 the old stack. The split stack prologue will arrange to
7819 leave a pointer to the old stack arguments in a scratch
7820 register, which we here copy to a pseudo-register. The split
7821 stack prologue can't set the pseudo-register directly because
7822 it (the prologue) runs before any registers have been saved. */
7824 scratch_regno
= split_stack_prologue_scratch_regno ();
7825 if (scratch_regno
!= INVALID_REGNUM
)
7829 reg
= gen_reg_rtx (Pmode
);
7830 cfun
->machine
->split_stack_varargs_pointer
= reg
;
7833 emit_move_insn (reg
, gen_rtx_REG (Pmode
, scratch_regno
));
7837 push_topmost_sequence ();
7838 emit_insn_after (seq
, entry_of_function ());
7839 pop_topmost_sequence ();
7843 /* Only 64bit target needs something special. */
7844 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
7846 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7847 std_expand_builtin_va_start (valist
, nextarg
);
7852 va_r
= expand_expr (valist
, NULL_RTX
, VOIDmode
, EXPAND_WRITE
);
7853 next
= expand_binop (ptr_mode
, add_optab
,
7854 cfun
->machine
->split_stack_varargs_pointer
,
7855 crtl
->args
.arg_offset_rtx
,
7856 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
7857 convert_move (va_r
, next
, 0);
7862 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
7863 f_fpr
= DECL_CHAIN (f_gpr
);
7864 f_ovf
= DECL_CHAIN (f_fpr
);
7865 f_sav
= DECL_CHAIN (f_ovf
);
7867 valist
= build_simple_mem_ref (valist
);
7868 TREE_TYPE (valist
) = TREE_TYPE (sysv_va_list_type_node
);
7869 /* The following should be folded into the MEM_REF offset. */
7870 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), unshare_expr (valist
),
7872 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), unshare_expr (valist
),
7874 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), unshare_expr (valist
),
7876 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), unshare_expr (valist
),
7879 /* Count number of gp and fp argument registers used. */
7880 words
= crtl
->args
.info
.words
;
7881 n_gpr
= crtl
->args
.info
.regno
;
7882 n_fpr
= crtl
->args
.info
.sse_regno
;
7884 if (cfun
->va_list_gpr_size
)
7886 type
= TREE_TYPE (gpr
);
7887 t
= build2 (MODIFY_EXPR
, type
,
7888 gpr
, build_int_cst (type
, n_gpr
* 8));
7889 TREE_SIDE_EFFECTS (t
) = 1;
7890 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7893 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
7895 type
= TREE_TYPE (fpr
);
7896 t
= build2 (MODIFY_EXPR
, type
, fpr
,
7897 build_int_cst (type
, n_fpr
* 16 + 8*X86_64_REGPARM_MAX
));
7898 TREE_SIDE_EFFECTS (t
) = 1;
7899 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7902 /* Find the overflow area. */
7903 type
= TREE_TYPE (ovf
);
7904 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7905 ovf_rtx
= crtl
->args
.internal_arg_pointer
;
7907 ovf_rtx
= cfun
->machine
->split_stack_varargs_pointer
;
7908 t
= make_tree (type
, ovf_rtx
);
7910 t
= fold_build_pointer_plus_hwi (t
, words
* UNITS_PER_WORD
);
7911 t
= build2 (MODIFY_EXPR
, type
, ovf
, t
);
7912 TREE_SIDE_EFFECTS (t
) = 1;
7913 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7915 if (ix86_varargs_gpr_size
|| ix86_varargs_fpr_size
)
7917 /* Find the register save area.
7918 Prologue of the function save it right above stack frame. */
7919 type
= TREE_TYPE (sav
);
7920 t
= make_tree (type
, frame_pointer_rtx
);
7921 if (!ix86_varargs_gpr_size
)
7922 t
= fold_build_pointer_plus_hwi (t
, -8 * X86_64_REGPARM_MAX
);
7923 t
= build2 (MODIFY_EXPR
, type
, sav
, t
);
7924 TREE_SIDE_EFFECTS (t
) = 1;
7925 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7929 /* Implement va_arg. */
7932 ix86_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
7935 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
7936 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
7937 tree gpr
, fpr
, ovf
, sav
, t
;
7939 tree lab_false
, lab_over
= NULL_TREE
;
7944 enum machine_mode nat_mode
;
7945 unsigned int arg_boundary
;
7947 /* Only 64bit target needs something special. */
7948 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
7949 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
7951 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
7952 f_fpr
= DECL_CHAIN (f_gpr
);
7953 f_ovf
= DECL_CHAIN (f_fpr
);
7954 f_sav
= DECL_CHAIN (f_ovf
);
7956 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
),
7957 build_va_arg_indirect_ref (valist
), f_gpr
, NULL_TREE
);
7958 valist
= build_va_arg_indirect_ref (valist
);
7959 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
7960 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
7961 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
7963 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
7965 type
= build_pointer_type (type
);
7966 size
= int_size_in_bytes (type
);
7967 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
7969 nat_mode
= type_natural_mode (type
, NULL
);
7978 /* Unnamed 256bit vector mode parameters are passed on stack. */
7979 if (!TARGET_64BIT_MS_ABI
)
7986 container
= construct_container (nat_mode
, TYPE_MODE (type
),
7987 type
, 0, X86_64_REGPARM_MAX
,
7988 X86_64_SSE_REGPARM_MAX
, intreg
,
7993 /* Pull the value out of the saved registers. */
7995 addr
= create_tmp_var (ptr_type_node
, "addr");
7999 int needed_intregs
, needed_sseregs
;
8001 tree int_addr
, sse_addr
;
8003 lab_false
= create_artificial_label (UNKNOWN_LOCATION
);
8004 lab_over
= create_artificial_label (UNKNOWN_LOCATION
);
8006 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
8008 need_temp
= (!REG_P (container
)
8009 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
8010 || TYPE_ALIGN (type
) > 128));
8012 /* In case we are passing structure, verify that it is consecutive block
8013 on the register save area. If not we need to do moves. */
8014 if (!need_temp
&& !REG_P (container
))
8016 /* Verify that all registers are strictly consecutive */
8017 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
8021 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8023 rtx slot
= XVECEXP (container
, 0, i
);
8024 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
8025 || INTVAL (XEXP (slot
, 1)) != i
* 16)
8033 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8035 rtx slot
= XVECEXP (container
, 0, i
);
8036 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
8037 || INTVAL (XEXP (slot
, 1)) != i
* 8)
8049 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
8050 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
8053 /* First ensure that we fit completely in registers. */
8056 t
= build_int_cst (TREE_TYPE (gpr
),
8057 (X86_64_REGPARM_MAX
- needed_intregs
+ 1) * 8);
8058 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
8059 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8060 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8061 gimplify_and_add (t
, pre_p
);
8065 t
= build_int_cst (TREE_TYPE (fpr
),
8066 (X86_64_SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
8067 + X86_64_REGPARM_MAX
* 8);
8068 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
8069 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8070 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8071 gimplify_and_add (t
, pre_p
);
8074 /* Compute index to start of area used for integer regs. */
8077 /* int_addr = gpr + sav; */
8078 t
= fold_build_pointer_plus (sav
, gpr
);
8079 gimplify_assign (int_addr
, t
, pre_p
);
8083 /* sse_addr = fpr + sav; */
8084 t
= fold_build_pointer_plus (sav
, fpr
);
8085 gimplify_assign (sse_addr
, t
, pre_p
);
8089 int i
, prev_size
= 0;
8090 tree temp
= create_tmp_var (type
, "va_arg_tmp");
8093 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
8094 gimplify_assign (addr
, t
, pre_p
);
8096 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
8098 rtx slot
= XVECEXP (container
, 0, i
);
8099 rtx reg
= XEXP (slot
, 0);
8100 enum machine_mode mode
= GET_MODE (reg
);
8106 tree dest_addr
, dest
;
8107 int cur_size
= GET_MODE_SIZE (mode
);
8109 gcc_assert (prev_size
<= INTVAL (XEXP (slot
, 1)));
8110 prev_size
= INTVAL (XEXP (slot
, 1));
8111 if (prev_size
+ cur_size
> size
)
8113 cur_size
= size
- prev_size
;
8114 mode
= mode_for_size (cur_size
* BITS_PER_UNIT
, MODE_INT
, 1);
8115 if (mode
== BLKmode
)
8118 piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
8119 if (mode
== GET_MODE (reg
))
8120 addr_type
= build_pointer_type (piece_type
);
8122 addr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8124 daddr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8127 if (SSE_REGNO_P (REGNO (reg
)))
8129 src_addr
= sse_addr
;
8130 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
8134 src_addr
= int_addr
;
8135 src_offset
= REGNO (reg
) * 8;
8137 src_addr
= fold_convert (addr_type
, src_addr
);
8138 src_addr
= fold_build_pointer_plus_hwi (src_addr
, src_offset
);
8140 dest_addr
= fold_convert (daddr_type
, addr
);
8141 dest_addr
= fold_build_pointer_plus_hwi (dest_addr
, prev_size
);
8142 if (cur_size
== GET_MODE_SIZE (mode
))
8144 src
= build_va_arg_indirect_ref (src_addr
);
8145 dest
= build_va_arg_indirect_ref (dest_addr
);
8147 gimplify_assign (dest
, src
, pre_p
);
8152 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY
),
8153 3, dest_addr
, src_addr
,
8154 size_int (cur_size
));
8155 gimplify_and_add (copy
, pre_p
);
8157 prev_size
+= cur_size
;
8163 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
8164 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
8165 gimplify_assign (gpr
, t
, pre_p
);
8170 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
8171 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
8172 gimplify_assign (fpr
, t
, pre_p
);
8175 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
8177 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_false
));
8180 /* ... otherwise out of the overflow area. */
8182 /* When we align parameter on stack for caller, if the parameter
8183 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8184 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8185 here with caller. */
8186 arg_boundary
= ix86_function_arg_boundary (VOIDmode
, type
);
8187 if ((unsigned int) arg_boundary
> MAX_SUPPORTED_STACK_ALIGNMENT
)
8188 arg_boundary
= MAX_SUPPORTED_STACK_ALIGNMENT
;
8190 /* Care for on-stack alignment if needed. */
8191 if (arg_boundary
<= 64 || size
== 0)
8195 HOST_WIDE_INT align
= arg_boundary
/ 8;
8196 t
= fold_build_pointer_plus_hwi (ovf
, align
- 1);
8197 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
8198 build_int_cst (TREE_TYPE (t
), -align
));
8201 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
8202 gimplify_assign (addr
, t
, pre_p
);
8204 t
= fold_build_pointer_plus_hwi (t
, rsize
* UNITS_PER_WORD
);
8205 gimplify_assign (unshare_expr (ovf
), t
, pre_p
);
8208 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_over
));
8210 ptrtype
= build_pointer_type_for_mode (type
, ptr_mode
, true);
8211 addr
= fold_convert (ptrtype
, addr
);
8214 addr
= build_va_arg_indirect_ref (addr
);
8215 return build_va_arg_indirect_ref (addr
);
8218 /* Return true if OPNUM's MEM should be matched
8219 in movabs* patterns. */
8222 ix86_check_movabs (rtx insn
, int opnum
)
8226 set
= PATTERN (insn
);
8227 if (GET_CODE (set
) == PARALLEL
)
8228 set
= XVECEXP (set
, 0, 0);
8229 gcc_assert (GET_CODE (set
) == SET
);
8230 mem
= XEXP (set
, opnum
);
8231 while (GET_CODE (mem
) == SUBREG
)
8232 mem
= SUBREG_REG (mem
);
8233 gcc_assert (MEM_P (mem
));
8234 return volatile_ok
|| !MEM_VOLATILE_P (mem
);
8237 /* Initialize the table of extra 80387 mathematical constants. */
8240 init_ext_80387_constants (void)
8242 static const char * cst
[5] =
8244 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8245 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8246 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8247 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8248 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8252 for (i
= 0; i
< 5; i
++)
8254 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
8255 /* Ensure each constant is rounded to XFmode precision. */
8256 real_convert (&ext_80387_constants_table
[i
],
8257 XFmode
, &ext_80387_constants_table
[i
]);
8260 ext_80387_constants_init
= 1;
8263 /* Return non-zero if the constant is something that
8264 can be loaded with a special instruction. */
8267 standard_80387_constant_p (rtx x
)
8269 enum machine_mode mode
= GET_MODE (x
);
8273 if (!(X87_FLOAT_MODE_P (mode
) && (GET_CODE (x
) == CONST_DOUBLE
)))
8276 if (x
== CONST0_RTX (mode
))
8278 if (x
== CONST1_RTX (mode
))
8281 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8283 /* For XFmode constants, try to find a special 80387 instruction when
8284 optimizing for size or on those CPUs that benefit from them. */
8286 && (optimize_function_for_size_p (cfun
) || TARGET_EXT_80387_CONSTANTS
))
8290 if (! ext_80387_constants_init
)
8291 init_ext_80387_constants ();
8293 for (i
= 0; i
< 5; i
++)
8294 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
8298 /* Load of the constant -0.0 or -1.0 will be split as
8299 fldz;fchs or fld1;fchs sequence. */
8300 if (real_isnegzero (&r
))
8302 if (real_identical (&r
, &dconstm1
))
8308 /* Return the opcode of the special instruction to be used to load
8312 standard_80387_constant_opcode (rtx x
)
8314 switch (standard_80387_constant_p (x
))
8338 /* Return the CONST_DOUBLE representing the 80387 constant that is
8339 loaded by the specified special instruction. The argument IDX
8340 matches the return value from standard_80387_constant_p. */
8343 standard_80387_constant_rtx (int idx
)
8347 if (! ext_80387_constants_init
)
8348 init_ext_80387_constants ();
8364 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
8368 /* Return 1 if X is all 0s and 2 if x is all 1s
8369 in supported SSE/AVX vector mode. */
8372 standard_sse_constant_p (rtx x
)
8374 enum machine_mode mode
= GET_MODE (x
);
8376 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
8378 if (vector_all_ones_operand (x
, mode
))
8400 /* Return the opcode of the special instruction to be used to load
8404 standard_sse_constant_opcode (rtx insn
, rtx x
)
8406 switch (standard_sse_constant_p (x
))
8409 switch (get_attr_mode (insn
))
8412 return "%vpxor\t%0, %d0";
8414 return "%vxorpd\t%0, %d0";
8416 return "%vxorps\t%0, %d0";
8419 return "vpxor\t%x0, %x0, %x0";
8421 return "vxorpd\t%x0, %x0, %x0";
8423 return "vxorps\t%x0, %x0, %x0";
8431 return "vpcmpeqd\t%0, %0, %0";
8433 return "pcmpeqd\t%0, %0";
8441 /* Returns true if OP contains a symbol reference */
8444 symbolic_reference_mentioned_p (rtx op
)
8449 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
8452 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
8453 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
8459 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
8460 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
8464 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
8471 /* Return true if it is appropriate to emit `ret' instructions in the
8472 body of a function. Do this only if the epilogue is simple, needing a
8473 couple of insns. Prior to reloading, we can't tell how many registers
8474 must be saved, so return false then. Return false if there is no frame
8475 marker to de-allocate. */
8478 ix86_can_use_return_insn_p (void)
8480 struct ix86_frame frame
;
8482 if (! reload_completed
|| frame_pointer_needed
)
8485 /* Don't allow more than 32k pop, since that's all we can do
8486 with one instruction. */
8487 if (crtl
->args
.pops_args
&& crtl
->args
.size
>= 32768)
8490 ix86_compute_frame_layout (&frame
);
8491 return (frame
.stack_pointer_offset
== UNITS_PER_WORD
8492 && (frame
.nregs
+ frame
.nsseregs
) == 0);
8495 /* Value should be nonzero if functions must have frame pointers.
8496 Zero means the frame pointer need not be set up (and parms may
8497 be accessed via the stack pointer) in functions that seem suitable. */
8500 ix86_frame_pointer_required (void)
8502 /* If we accessed previous frames, then the generated code expects
8503 to be able to access the saved ebp value in our frame. */
8504 if (cfun
->machine
->accesses_prev_frame
)
8507 /* Several x86 os'es need a frame pointer for other reasons,
8508 usually pertaining to setjmp. */
8509 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
8512 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
8513 if (TARGET_32BIT_MS_ABI
&& cfun
->calls_setjmp
)
8516 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
8517 allocation is 4GB. */
8518 if (TARGET_64BIT_MS_ABI
&& get_frame_size () > SEH_MAX_FRAME_SIZE
)
8521 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8522 turns off the frame pointer by default. Turn it back on now if
8523 we've not got a leaf function. */
8524 if (TARGET_OMIT_LEAF_FRAME_POINTER
8526 || ix86_current_function_calls_tls_descriptor
))
8529 if (crtl
->profile
&& !flag_fentry
)
8535 /* Record that the current function accesses previous call frames. */
8538 ix86_setup_frame_addresses (void)
8540 cfun
->machine
->accesses_prev_frame
= 1;
8543 #ifndef USE_HIDDEN_LINKONCE
8544 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8545 # define USE_HIDDEN_LINKONCE 1
8547 # define USE_HIDDEN_LINKONCE 0
8551 static int pic_labels_used
;
8553 /* Fills in the label name that should be used for a pc thunk for
8554 the given register. */
8557 get_pc_thunk_name (char name
[32], unsigned int regno
)
8559 gcc_assert (!TARGET_64BIT
);
8561 if (USE_HIDDEN_LINKONCE
)
8562 sprintf (name
, "__x86.get_pc_thunk.%s", reg_names
[regno
]);
8564 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
8568 /* This function generates code for -fpic that loads %ebx with
8569 the return address of the caller and then returns. */
8572 ix86_code_end (void)
8577 for (regno
= AX_REG
; regno
<= SP_REG
; regno
++)
8582 if (!(pic_labels_used
& (1 << regno
)))
8585 get_pc_thunk_name (name
, regno
);
8587 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
8588 get_identifier (name
),
8589 build_function_type_list (void_type_node
, NULL_TREE
));
8590 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
8591 NULL_TREE
, void_type_node
);
8592 TREE_PUBLIC (decl
) = 1;
8593 TREE_STATIC (decl
) = 1;
8594 DECL_IGNORED_P (decl
) = 1;
8599 switch_to_section (darwin_sections
[text_coal_section
]);
8600 fputs ("\t.weak_definition\t", asm_out_file
);
8601 assemble_name (asm_out_file
, name
);
8602 fputs ("\n\t.private_extern\t", asm_out_file
);
8603 assemble_name (asm_out_file
, name
);
8604 putc ('\n', asm_out_file
);
8605 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8606 DECL_WEAK (decl
) = 1;
8610 if (USE_HIDDEN_LINKONCE
)
8612 DECL_COMDAT_GROUP (decl
) = DECL_ASSEMBLER_NAME (decl
);
8614 targetm
.asm_out
.unique_section (decl
, 0);
8615 switch_to_section (get_named_section (decl
, NULL
, 0));
8617 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
8618 fputs ("\t.hidden\t", asm_out_file
);
8619 assemble_name (asm_out_file
, name
);
8620 putc ('\n', asm_out_file
);
8621 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
8625 switch_to_section (text_section
);
8626 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8629 DECL_INITIAL (decl
) = make_node (BLOCK
);
8630 current_function_decl
= decl
;
8631 init_function_start (decl
);
8632 first_function_block_is_cold
= false;
8633 /* Make sure unwind info is emitted for the thunk if needed. */
8634 final_start_function (emit_barrier (), asm_out_file
, 1);
8636 /* Pad stack IP move with 4 instructions (two NOPs count
8637 as one instruction). */
8638 if (TARGET_PAD_SHORT_FUNCTION
)
8643 fputs ("\tnop\n", asm_out_file
);
8646 xops
[0] = gen_rtx_REG (Pmode
, regno
);
8647 xops
[1] = gen_rtx_MEM (Pmode
, stack_pointer_rtx
);
8648 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops
);
8649 fputs ("\tret\n", asm_out_file
);
8650 final_end_function ();
8651 init_insn_lengths ();
8652 free_after_compilation (cfun
);
8654 current_function_decl
= NULL
;
8657 if (flag_split_stack
)
8658 file_end_indicate_split_stack ();
8661 /* Emit code for the SET_GOT patterns. */
8664 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
8670 if (TARGET_VXWORKS_RTP
&& flag_pic
)
8672 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8673 xops
[2] = gen_rtx_MEM (Pmode
,
8674 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
8675 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
8677 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8678 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8679 an unadorned address. */
8680 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
8681 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
8682 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
8686 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
8690 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
8692 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops
);
8695 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8696 is what will be referenced by the Mach-O PIC subsystem. */
8698 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
8701 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8702 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
8707 get_pc_thunk_name (name
, REGNO (dest
));
8708 pic_labels_used
|= 1 << REGNO (dest
);
8710 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
8711 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
8712 output_asm_insn ("call\t%X2", xops
);
8713 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8714 is what will be referenced by the Mach-O PIC subsystem. */
8717 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
8719 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8720 CODE_LABEL_NUMBER (label
));
8725 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops
);
8730 /* Generate an "push" pattern for input ARG. */
8735 struct machine_function
*m
= cfun
->machine
;
8737 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
8738 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
8739 m
->fs
.sp_offset
+= UNITS_PER_WORD
;
8741 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
8742 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
8744 return gen_rtx_SET (VOIDmode
,
8745 gen_rtx_MEM (word_mode
,
8746 gen_rtx_PRE_DEC (Pmode
,
8747 stack_pointer_rtx
)),
8751 /* Generate an "pop" pattern for input ARG. */
8756 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
8757 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
8759 return gen_rtx_SET (VOIDmode
,
8761 gen_rtx_MEM (word_mode
,
8762 gen_rtx_POST_INC (Pmode
,
8763 stack_pointer_rtx
)));
8766 /* Return >= 0 if there is an unused call-clobbered register available
8767 for the entire function. */
8770 ix86_select_alt_pic_regnum (void)
8774 && !ix86_current_function_calls_tls_descriptor
)
8777 /* Can't use the same register for both PIC and DRAP. */
8779 drap
= REGNO (crtl
->drap_reg
);
8782 for (i
= 2; i
>= 0; --i
)
8783 if (i
!= drap
&& !df_regs_ever_live_p (i
))
8787 return INVALID_REGNUM
;
8790 /* Return TRUE if we need to save REGNO. */
8793 ix86_save_reg (unsigned int regno
, bool maybe_eh_return
)
8795 if (pic_offset_table_rtx
8796 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
8797 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
8799 || crtl
->calls_eh_return
8800 || crtl
->uses_const_pool
))
8801 return ix86_select_alt_pic_regnum () == INVALID_REGNUM
;
8803 if (crtl
->calls_eh_return
&& maybe_eh_return
)
8808 unsigned test
= EH_RETURN_DATA_REGNO (i
);
8809 if (test
== INVALID_REGNUM
)
8816 if (crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
8819 return (df_regs_ever_live_p (regno
)
8820 && !call_used_regs
[regno
]
8821 && !fixed_regs
[regno
]
8822 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
8825 /* Return number of saved general prupose registers. */
8828 ix86_nsaved_regs (void)
8833 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8834 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
8839 /* Return number of saved SSE registrers. */
8842 ix86_nsaved_sseregs (void)
8847 if (!TARGET_64BIT_MS_ABI
)
8849 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8850 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
8855 /* Given FROM and TO register numbers, say whether this elimination is
8856 allowed. If stack alignment is needed, we can only replace argument
8857 pointer with hard frame pointer, or replace frame pointer with stack
8858 pointer. Otherwise, frame pointer elimination is automatically
8859 handled and all other eliminations are valid. */
8862 ix86_can_eliminate (const int from
, const int to
)
8864 if (stack_realign_fp
)
8865 return ((from
== ARG_POINTER_REGNUM
8866 && to
== HARD_FRAME_POINTER_REGNUM
)
8867 || (from
== FRAME_POINTER_REGNUM
8868 && to
== STACK_POINTER_REGNUM
));
8870 return to
== STACK_POINTER_REGNUM
? !frame_pointer_needed
: true;
8873 /* Return the offset between two registers, one to be eliminated, and the other
8874 its replacement, at the start of a routine. */
8877 ix86_initial_elimination_offset (int from
, int to
)
8879 struct ix86_frame frame
;
8880 ix86_compute_frame_layout (&frame
);
8882 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
8883 return frame
.hard_frame_pointer_offset
;
8884 else if (from
== FRAME_POINTER_REGNUM
8885 && to
== HARD_FRAME_POINTER_REGNUM
)
8886 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
8889 gcc_assert (to
== STACK_POINTER_REGNUM
);
8891 if (from
== ARG_POINTER_REGNUM
)
8892 return frame
.stack_pointer_offset
;
8894 gcc_assert (from
== FRAME_POINTER_REGNUM
);
8895 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
8899 /* In a dynamically-aligned function, we can't know the offset from
8900 stack pointer to frame pointer, so we must ensure that setjmp
8901 eliminates fp against the hard fp (%ebp) rather than trying to
8902 index from %esp up to the top of the frame across a gap that is
8903 of unknown (at compile-time) size. */
8905 ix86_builtin_setjmp_frame_value (void)
8907 return stack_realign_fp
? hard_frame_pointer_rtx
: virtual_stack_vars_rtx
;
8910 /* When using -fsplit-stack, the allocation routines set a field in
8911 the TCB to the bottom of the stack plus this much space, measured
8914 #define SPLIT_STACK_AVAILABLE 256
8916 /* Fill structure ix86_frame about frame of currently computed function. */
8919 ix86_compute_frame_layout (struct ix86_frame
*frame
)
8921 unsigned HOST_WIDE_INT stack_alignment_needed
;
8922 HOST_WIDE_INT offset
;
8923 unsigned HOST_WIDE_INT preferred_alignment
;
8924 HOST_WIDE_INT size
= get_frame_size ();
8925 HOST_WIDE_INT to_allocate
;
8927 frame
->nregs
= ix86_nsaved_regs ();
8928 frame
->nsseregs
= ix86_nsaved_sseregs ();
8930 stack_alignment_needed
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
8931 preferred_alignment
= crtl
->preferred_stack_boundary
/ BITS_PER_UNIT
;
8933 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
8934 function prologues and leaf. */
8935 if ((TARGET_64BIT_MS_ABI
&& preferred_alignment
< 16)
8936 && (!crtl
->is_leaf
|| cfun
->calls_alloca
!= 0
8937 || ix86_current_function_calls_tls_descriptor
))
8939 preferred_alignment
= 16;
8940 stack_alignment_needed
= 16;
8941 crtl
->preferred_stack_boundary
= 128;
8942 crtl
->stack_alignment_needed
= 128;
8945 gcc_assert (!size
|| stack_alignment_needed
);
8946 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
8947 gcc_assert (preferred_alignment
<= stack_alignment_needed
);
8949 /* For SEH we have to limit the amount of code movement into the prologue.
8950 At present we do this via a BLOCKAGE, at which point there's very little
8951 scheduling that can be done, which means that there's very little point
8952 in doing anything except PUSHs. */
8954 cfun
->machine
->use_fast_prologue_epilogue
= false;
8956 /* During reload iteration the amount of registers saved can change.
8957 Recompute the value as needed. Do not recompute when amount of registers
8958 didn't change as reload does multiple calls to the function and does not
8959 expect the decision to change within single iteration. */
8960 else if (!optimize_function_for_size_p (cfun
)
8961 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
8963 int count
= frame
->nregs
;
8964 struct cgraph_node
*node
= cgraph_get_node (current_function_decl
);
8966 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
8968 /* The fast prologue uses move instead of push to save registers. This
8969 is significantly longer, but also executes faster as modern hardware
8970 can execute the moves in parallel, but can't do that for push/pop.
8972 Be careful about choosing what prologue to emit: When function takes
8973 many instructions to execute we may use slow version as well as in
8974 case function is known to be outside hot spot (this is known with
8975 feedback only). Weight the size of function by number of registers
8976 to save as it is cheap to use one or two push instructions but very
8977 slow to use many of them. */
8979 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
8980 if (node
->frequency
< NODE_FREQUENCY_NORMAL
8981 || (flag_branch_probabilities
8982 && node
->frequency
< NODE_FREQUENCY_HOT
))
8983 cfun
->machine
->use_fast_prologue_epilogue
= false;
8985 cfun
->machine
->use_fast_prologue_epilogue
8986 = !expensive_function_p (count
);
8989 frame
->save_regs_using_mov
8990 = (TARGET_PROLOGUE_USING_MOVE
&& cfun
->machine
->use_fast_prologue_epilogue
8991 /* If static stack checking is enabled and done with probes,
8992 the registers need to be saved before allocating the frame. */
8993 && flag_stack_check
!= STATIC_BUILTIN_STACK_CHECK
);
8995 /* Skip return address. */
8996 offset
= UNITS_PER_WORD
;
8998 /* Skip pushed static chain. */
8999 if (ix86_static_chain_on_stack
)
9000 offset
+= UNITS_PER_WORD
;
9002 /* Skip saved base pointer. */
9003 if (frame_pointer_needed
)
9004 offset
+= UNITS_PER_WORD
;
9005 frame
->hfp_save_offset
= offset
;
9007 /* The traditional frame pointer location is at the top of the frame. */
9008 frame
->hard_frame_pointer_offset
= offset
;
9010 /* Register save area */
9011 offset
+= frame
->nregs
* UNITS_PER_WORD
;
9012 frame
->reg_save_offset
= offset
;
9014 /* On SEH target, registers are pushed just before the frame pointer
9017 frame
->hard_frame_pointer_offset
= offset
;
9019 /* Align and set SSE register save area. */
9020 if (frame
->nsseregs
)
9022 /* The only ABI that has saved SSE registers (Win64) also has a
9023 16-byte aligned default stack, and thus we don't need to be
9024 within the re-aligned local stack frame to save them. */
9025 gcc_assert (INCOMING_STACK_BOUNDARY
>= 128);
9026 offset
= (offset
+ 16 - 1) & -16;
9027 offset
+= frame
->nsseregs
* 16;
9029 frame
->sse_reg_save_offset
= offset
;
9031 /* The re-aligned stack starts here. Values before this point are not
9032 directly comparable with values below this point. In order to make
9033 sure that no value happens to be the same before and after, force
9034 the alignment computation below to add a non-zero value. */
9035 if (stack_realign_fp
)
9036 offset
= (offset
+ stack_alignment_needed
) & -stack_alignment_needed
;
9039 frame
->va_arg_size
= ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
;
9040 offset
+= frame
->va_arg_size
;
9042 /* Align start of frame for local function. */
9043 if (stack_realign_fp
9044 || offset
!= frame
->sse_reg_save_offset
9047 || cfun
->calls_alloca
9048 || ix86_current_function_calls_tls_descriptor
)
9049 offset
= (offset
+ stack_alignment_needed
- 1) & -stack_alignment_needed
;
9051 /* Frame pointer points here. */
9052 frame
->frame_pointer_offset
= offset
;
9056 /* Add outgoing arguments area. Can be skipped if we eliminated
9057 all the function calls as dead code.
9058 Skipping is however impossible when function calls alloca. Alloca
9059 expander assumes that last crtl->outgoing_args_size
9060 of stack frame are unused. */
9061 if (ACCUMULATE_OUTGOING_ARGS
9062 && (!crtl
->is_leaf
|| cfun
->calls_alloca
9063 || ix86_current_function_calls_tls_descriptor
))
9065 offset
+= crtl
->outgoing_args_size
;
9066 frame
->outgoing_arguments_size
= crtl
->outgoing_args_size
;
9069 frame
->outgoing_arguments_size
= 0;
9071 /* Align stack boundary. Only needed if we're calling another function
9073 if (!crtl
->is_leaf
|| cfun
->calls_alloca
9074 || ix86_current_function_calls_tls_descriptor
)
9075 offset
= (offset
+ preferred_alignment
- 1) & -preferred_alignment
;
9077 /* We've reached end of stack frame. */
9078 frame
->stack_pointer_offset
= offset
;
9080 /* Size prologue needs to allocate. */
9081 to_allocate
= offset
- frame
->sse_reg_save_offset
;
9083 if ((!to_allocate
&& frame
->nregs
<= 1)
9084 || (TARGET_64BIT
&& to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
9085 frame
->save_regs_using_mov
= false;
9087 if (ix86_using_red_zone ()
9088 && crtl
->sp_is_unchanging
9090 && !ix86_current_function_calls_tls_descriptor
)
9092 frame
->red_zone_size
= to_allocate
;
9093 if (frame
->save_regs_using_mov
)
9094 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
9095 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
9096 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
9099 frame
->red_zone_size
= 0;
9100 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
9102 /* The SEH frame pointer location is near the bottom of the frame.
9103 This is enforced by the fact that the difference between the
9104 stack pointer and the frame pointer is limited to 240 bytes in
9105 the unwind data structure. */
9110 /* If we can leave the frame pointer where it is, do so. Also, returns
9111 the establisher frame for __builtin_frame_address (0). */
9112 diff
= frame
->stack_pointer_offset
- frame
->hard_frame_pointer_offset
;
9113 if (diff
<= SEH_MAX_FRAME_SIZE
9114 && (diff
> 240 || (diff
& 15) != 0)
9115 && !crtl
->accesses_prior_frames
)
9117 /* Ideally we'd determine what portion of the local stack frame
9118 (within the constraint of the lowest 240) is most heavily used.
9119 But without that complication, simply bias the frame pointer
9120 by 128 bytes so as to maximize the amount of the local stack
9121 frame that is addressable with 8-bit offsets. */
9122 frame
->hard_frame_pointer_offset
= frame
->stack_pointer_offset
- 128;
9127 /* This is semi-inlined memory_address_length, but simplified
9128 since we know that we're always dealing with reg+offset, and
9129 to avoid having to create and discard all that rtl. */
9132 choose_baseaddr_len (unsigned int regno
, HOST_WIDE_INT offset
)
9138 /* EBP and R13 cannot be encoded without an offset. */
9139 len
= (regno
== BP_REG
|| regno
== R13_REG
);
9141 else if (IN_RANGE (offset
, -128, 127))
9144 /* ESP and R12 must be encoded with a SIB byte. */
9145 if (regno
== SP_REG
|| regno
== R12_REG
)
9151 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9152 The valid base registers are taken from CFUN->MACHINE->FS. */
9155 choose_baseaddr (HOST_WIDE_INT cfa_offset
)
9157 const struct machine_function
*m
= cfun
->machine
;
9158 rtx base_reg
= NULL
;
9159 HOST_WIDE_INT base_offset
= 0;
9161 if (m
->use_fast_prologue_epilogue
)
9163 /* Choose the base register most likely to allow the most scheduling
9164 opportunities. Generally FP is valid throughout the function,
9165 while DRAP must be reloaded within the epilogue. But choose either
9166 over the SP due to increased encoding size. */
9170 base_reg
= hard_frame_pointer_rtx
;
9171 base_offset
= m
->fs
.fp_offset
- cfa_offset
;
9173 else if (m
->fs
.drap_valid
)
9175 base_reg
= crtl
->drap_reg
;
9176 base_offset
= 0 - cfa_offset
;
9178 else if (m
->fs
.sp_valid
)
9180 base_reg
= stack_pointer_rtx
;
9181 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9186 HOST_WIDE_INT toffset
;
9189 /* Choose the base register with the smallest address encoding.
9190 With a tie, choose FP > DRAP > SP. */
9193 base_reg
= stack_pointer_rtx
;
9194 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9195 len
= choose_baseaddr_len (STACK_POINTER_REGNUM
, base_offset
);
9197 if (m
->fs
.drap_valid
)
9199 toffset
= 0 - cfa_offset
;
9200 tlen
= choose_baseaddr_len (REGNO (crtl
->drap_reg
), toffset
);
9203 base_reg
= crtl
->drap_reg
;
9204 base_offset
= toffset
;
9210 toffset
= m
->fs
.fp_offset
- cfa_offset
;
9211 tlen
= choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM
, toffset
);
9214 base_reg
= hard_frame_pointer_rtx
;
9215 base_offset
= toffset
;
9220 gcc_assert (base_reg
!= NULL
);
9222 return plus_constant (Pmode
, base_reg
, base_offset
);
9225 /* Emit code to save registers in the prologue. */
9228 ix86_emit_save_regs (void)
9233 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
-- > 0; )
9234 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9236 insn
= emit_insn (gen_push (gen_rtx_REG (word_mode
, regno
)));
9237 RTX_FRAME_RELATED_P (insn
) = 1;
9241 /* Emit a single register save at CFA - CFA_OFFSET. */
9244 ix86_emit_save_reg_using_mov (enum machine_mode mode
, unsigned int regno
,
9245 HOST_WIDE_INT cfa_offset
)
9247 struct machine_function
*m
= cfun
->machine
;
9248 rtx reg
= gen_rtx_REG (mode
, regno
);
9249 rtx mem
, addr
, base
, insn
;
9251 addr
= choose_baseaddr (cfa_offset
);
9252 mem
= gen_frame_mem (mode
, addr
);
9254 /* For SSE saves, we need to indicate the 128-bit alignment. */
9255 set_mem_align (mem
, GET_MODE_ALIGNMENT (mode
));
9257 insn
= emit_move_insn (mem
, reg
);
9258 RTX_FRAME_RELATED_P (insn
) = 1;
9261 if (GET_CODE (base
) == PLUS
)
9262 base
= XEXP (base
, 0);
9263 gcc_checking_assert (REG_P (base
));
9265 /* When saving registers into a re-aligned local stack frame, avoid
9266 any tricky guessing by dwarf2out. */
9267 if (m
->fs
.realigned
)
9269 gcc_checking_assert (stack_realign_drap
);
9271 if (regno
== REGNO (crtl
->drap_reg
))
9273 /* A bit of a hack. We force the DRAP register to be saved in
9274 the re-aligned stack frame, which provides us with a copy
9275 of the CFA that will last past the prologue. Install it. */
9276 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9277 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
9278 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9279 mem
= gen_rtx_MEM (mode
, addr
);
9280 add_reg_note (insn
, REG_CFA_DEF_CFA
, mem
);
9284 /* The frame pointer is a stable reference within the
9285 aligned frame. Use it. */
9286 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9287 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
9288 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9289 mem
= gen_rtx_MEM (mode
, addr
);
9290 add_reg_note (insn
, REG_CFA_EXPRESSION
,
9291 gen_rtx_SET (VOIDmode
, mem
, reg
));
9295 /* The memory may not be relative to the current CFA register,
9296 which means that we may need to generate a new pattern for
9297 use by the unwind info. */
9298 else if (base
!= m
->fs
.cfa_reg
)
9300 addr
= plus_constant (Pmode
, m
->fs
.cfa_reg
,
9301 m
->fs
.cfa_offset
- cfa_offset
);
9302 mem
= gen_rtx_MEM (mode
, addr
);
9303 add_reg_note (insn
, REG_CFA_OFFSET
, gen_rtx_SET (VOIDmode
, mem
, reg
));
9307 /* Emit code to save registers using MOV insns.
9308 First register is stored at CFA - CFA_OFFSET. */
9310 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9314 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9315 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9317 ix86_emit_save_reg_using_mov (word_mode
, regno
, cfa_offset
);
9318 cfa_offset
-= UNITS_PER_WORD
;
9322 /* Emit code to save SSE registers using MOV insns.
9323 First register is stored at CFA - CFA_OFFSET. */
9325 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9329 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9330 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9332 ix86_emit_save_reg_using_mov (V4SFmode
, regno
, cfa_offset
);
9337 static GTY(()) rtx queued_cfa_restores
;
9339 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9340 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9341 Don't add the note if the previously saved value will be left untouched
9342 within stack red-zone till return, as unwinders can find the same value
9343 in the register and on the stack. */
9346 ix86_add_cfa_restore_note (rtx insn
, rtx reg
, HOST_WIDE_INT cfa_offset
)
9348 if (!crtl
->shrink_wrapped
9349 && cfa_offset
<= cfun
->machine
->fs
.red_zone_offset
)
9354 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
9355 RTX_FRAME_RELATED_P (insn
) = 1;
9359 = alloc_reg_note (REG_CFA_RESTORE
, reg
, queued_cfa_restores
);
9362 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9365 ix86_add_queued_cfa_restore_notes (rtx insn
)
9368 if (!queued_cfa_restores
)
9370 for (last
= queued_cfa_restores
; XEXP (last
, 1); last
= XEXP (last
, 1))
9372 XEXP (last
, 1) = REG_NOTES (insn
);
9373 REG_NOTES (insn
) = queued_cfa_restores
;
9374 queued_cfa_restores
= NULL_RTX
;
9375 RTX_FRAME_RELATED_P (insn
) = 1;
9378 /* Expand prologue or epilogue stack adjustment.
9379 The pattern exist to put a dependency on all ebp-based memory accesses.
9380 STYLE should be negative if instructions should be marked as frame related,
9381 zero if %r11 register is live and cannot be freely used and positive
9385 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
,
9386 int style
, bool set_cfa
)
9388 struct machine_function
*m
= cfun
->machine
;
9390 bool add_frame_related_expr
= false;
9392 if (Pmode
== SImode
)
9393 insn
= gen_pro_epilogue_adjust_stack_si_add (dest
, src
, offset
);
9394 else if (x86_64_immediate_operand (offset
, DImode
))
9395 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, offset
);
9399 /* r11 is used by indirect sibcall return as well, set before the
9400 epilogue and used after the epilogue. */
9402 tmp
= gen_rtx_REG (DImode
, R11_REG
);
9405 gcc_assert (src
!= hard_frame_pointer_rtx
9406 && dest
!= hard_frame_pointer_rtx
);
9407 tmp
= hard_frame_pointer_rtx
;
9409 insn
= emit_insn (gen_rtx_SET (DImode
, tmp
, offset
));
9411 add_frame_related_expr
= true;
9413 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, tmp
);
9416 insn
= emit_insn (insn
);
9418 ix86_add_queued_cfa_restore_notes (insn
);
9424 gcc_assert (m
->fs
.cfa_reg
== src
);
9425 m
->fs
.cfa_offset
+= INTVAL (offset
);
9426 m
->fs
.cfa_reg
= dest
;
9428 r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9429 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9430 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, r
);
9431 RTX_FRAME_RELATED_P (insn
) = 1;
9435 RTX_FRAME_RELATED_P (insn
) = 1;
9436 if (add_frame_related_expr
)
9438 rtx r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9439 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9440 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, r
);
9444 if (dest
== stack_pointer_rtx
)
9446 HOST_WIDE_INT ooffset
= m
->fs
.sp_offset
;
9447 bool valid
= m
->fs
.sp_valid
;
9449 if (src
== hard_frame_pointer_rtx
)
9451 valid
= m
->fs
.fp_valid
;
9452 ooffset
= m
->fs
.fp_offset
;
9454 else if (src
== crtl
->drap_reg
)
9456 valid
= m
->fs
.drap_valid
;
9461 /* Else there are two possibilities: SP itself, which we set
9462 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9463 taken care of this by hand along the eh_return path. */
9464 gcc_checking_assert (src
== stack_pointer_rtx
9465 || offset
== const0_rtx
);
9468 m
->fs
.sp_offset
= ooffset
- INTVAL (offset
);
9469 m
->fs
.sp_valid
= valid
;
9473 /* Find an available register to be used as dynamic realign argument
9474 pointer regsiter. Such a register will be written in prologue and
9475 used in begin of body, so it must not be
9476 1. parameter passing register.
9478 We reuse static-chain register if it is available. Otherwise, we
9479 use DI for i386 and R13 for x86-64. We chose R13 since it has
9482 Return: the regno of chosen register. */
9485 find_drap_reg (void)
9487 tree decl
= cfun
->decl
;
9491 /* Use R13 for nested function or function need static chain.
9492 Since function with tail call may use any caller-saved
9493 registers in epilogue, DRAP must not use caller-saved
9494 register in such case. */
9495 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9502 /* Use DI for nested function or function need static chain.
9503 Since function with tail call may use any caller-saved
9504 registers in epilogue, DRAP must not use caller-saved
9505 register in such case. */
9506 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9509 /* Reuse static chain register if it isn't used for parameter
9511 if (ix86_function_regparm (TREE_TYPE (decl
), decl
) <= 2)
9513 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (decl
));
9514 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) == 0)
9521 /* Return minimum incoming stack alignment. */
9524 ix86_minimum_incoming_stack_boundary (bool sibcall
)
9526 unsigned int incoming_stack_boundary
;
9528 /* Prefer the one specified at command line. */
9529 if (ix86_user_incoming_stack_boundary
)
9530 incoming_stack_boundary
= ix86_user_incoming_stack_boundary
;
9531 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9532 if -mstackrealign is used, it isn't used for sibcall check and
9533 estimated stack alignment is 128bit. */
9536 && ix86_force_align_arg_pointer
9537 && crtl
->stack_alignment_estimated
== 128)
9538 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9540 incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
9542 /* Incoming stack alignment can be changed on individual functions
9543 via force_align_arg_pointer attribute. We use the smallest
9544 incoming stack boundary. */
9545 if (incoming_stack_boundary
> MIN_STACK_BOUNDARY
9546 && lookup_attribute (ix86_force_align_arg_pointer_string
,
9547 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
9548 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9550 /* The incoming stack frame has to be aligned at least at
9551 parm_stack_boundary. */
9552 if (incoming_stack_boundary
< crtl
->parm_stack_boundary
)
9553 incoming_stack_boundary
= crtl
->parm_stack_boundary
;
9555 /* Stack at entrance of main is aligned by runtime. We use the
9556 smallest incoming stack boundary. */
9557 if (incoming_stack_boundary
> MAIN_STACK_BOUNDARY
9558 && DECL_NAME (current_function_decl
)
9559 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
9560 && DECL_FILE_SCOPE_P (current_function_decl
))
9561 incoming_stack_boundary
= MAIN_STACK_BOUNDARY
;
9563 return incoming_stack_boundary
;
9566 /* Update incoming stack boundary and estimated stack alignment. */
9569 ix86_update_stack_boundary (void)
9571 ix86_incoming_stack_boundary
9572 = ix86_minimum_incoming_stack_boundary (false);
9574 /* x86_64 vararg needs 16byte stack alignment for register save
9578 && crtl
->stack_alignment_estimated
< 128)
9579 crtl
->stack_alignment_estimated
= 128;
9582 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9583 needed or an rtx for DRAP otherwise. */
9586 ix86_get_drap_rtx (void)
9588 if (ix86_force_drap
|| !ACCUMULATE_OUTGOING_ARGS
)
9589 crtl
->need_drap
= true;
9591 if (stack_realign_drap
)
9593 /* Assign DRAP to vDRAP and returns vDRAP */
9594 unsigned int regno
= find_drap_reg ();
9599 arg_ptr
= gen_rtx_REG (Pmode
, regno
);
9600 crtl
->drap_reg
= arg_ptr
;
9603 drap_vreg
= copy_to_reg (arg_ptr
);
9607 insn
= emit_insn_before (seq
, NEXT_INSN (entry_of_function ()));
9610 add_reg_note (insn
, REG_CFA_SET_VDRAP
, drap_vreg
);
9611 RTX_FRAME_RELATED_P (insn
) = 1;
9619 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9622 ix86_internal_arg_pointer (void)
9624 return virtual_incoming_args_rtx
;
9627 struct scratch_reg
{
9632 /* Return a short-lived scratch register for use on function entry.
9633 In 32-bit mode, it is valid only after the registers are saved
9634 in the prologue. This register must be released by means of
9635 release_scratch_register_on_entry once it is dead. */
9638 get_scratch_register_on_entry (struct scratch_reg
*sr
)
9646 /* We always use R11 in 64-bit mode. */
9651 tree decl
= current_function_decl
, fntype
= TREE_TYPE (decl
);
9653 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
9655 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
9656 bool static_chain_p
= DECL_STATIC_CHAIN (decl
);
9657 int regparm
= ix86_function_regparm (fntype
, decl
);
9659 = crtl
->drap_reg
? REGNO (crtl
->drap_reg
) : INVALID_REGNUM
;
9661 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9662 for the static chain register. */
9663 if ((regparm
< 1 || (fastcall_p
&& !static_chain_p
))
9664 && drap_regno
!= AX_REG
)
9666 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
9667 for the static chain register. */
9668 else if (thiscall_p
&& !static_chain_p
&& drap_regno
!= AX_REG
)
9670 else if (regparm
< 2 && !thiscall_p
&& drap_regno
!= DX_REG
)
9672 /* ecx is the static chain register. */
9673 else if (regparm
< 3 && !fastcall_p
&& !thiscall_p
9675 && drap_regno
!= CX_REG
)
9677 else if (ix86_save_reg (BX_REG
, true))
9679 /* esi is the static chain register. */
9680 else if (!(regparm
== 3 && static_chain_p
)
9681 && ix86_save_reg (SI_REG
, true))
9683 else if (ix86_save_reg (DI_REG
, true))
9687 regno
= (drap_regno
== AX_REG
? DX_REG
: AX_REG
);
9692 sr
->reg
= gen_rtx_REG (Pmode
, regno
);
9695 rtx insn
= emit_insn (gen_push (sr
->reg
));
9696 RTX_FRAME_RELATED_P (insn
) = 1;
9700 /* Release a scratch register obtained from the preceding function. */
9703 release_scratch_register_on_entry (struct scratch_reg
*sr
)
9707 struct machine_function
*m
= cfun
->machine
;
9708 rtx x
, insn
= emit_insn (gen_pop (sr
->reg
));
9710 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9711 RTX_FRAME_RELATED_P (insn
) = 1;
9712 x
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (UNITS_PER_WORD
));
9713 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
9714 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
9715 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
9719 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9721 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9724 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size
)
9726 /* We skip the probe for the first interval + a small dope of 4 words and
9727 probe that many bytes past the specified size to maintain a protection
9728 area at the botton of the stack. */
9729 const int dope
= 4 * UNITS_PER_WORD
;
9730 rtx size_rtx
= GEN_INT (size
), last
;
9732 /* See if we have a constant small number of probes to generate. If so,
9733 that's the easy case. The run-time loop is made up of 11 insns in the
9734 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9735 for n # of intervals. */
9736 if (size
<= 5 * PROBE_INTERVAL
)
9738 HOST_WIDE_INT i
, adjust
;
9739 bool first_probe
= true;
9741 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9742 values of N from 1 until it exceeds SIZE. If only one probe is
9743 needed, this will not generate any code. Then adjust and probe
9744 to PROBE_INTERVAL + SIZE. */
9745 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
9749 adjust
= 2 * PROBE_INTERVAL
+ dope
;
9750 first_probe
= false;
9753 adjust
= PROBE_INTERVAL
;
9755 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9756 plus_constant (Pmode
, stack_pointer_rtx
,
9758 emit_stack_probe (stack_pointer_rtx
);
9762 adjust
= size
+ PROBE_INTERVAL
+ dope
;
9764 adjust
= size
+ PROBE_INTERVAL
- i
;
9766 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9767 plus_constant (Pmode
, stack_pointer_rtx
,
9769 emit_stack_probe (stack_pointer_rtx
);
9771 /* Adjust back to account for the additional first interval. */
9772 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9773 plus_constant (Pmode
, stack_pointer_rtx
,
9774 PROBE_INTERVAL
+ dope
)));
9777 /* Otherwise, do the same as above, but in a loop. Note that we must be
9778 extra careful with variables wrapping around because we might be at
9779 the very top (or the very bottom) of the address space and we have
9780 to be able to handle this case properly; in particular, we use an
9781 equality test for the loop condition. */
9784 HOST_WIDE_INT rounded_size
;
9785 struct scratch_reg sr
;
9787 get_scratch_register_on_entry (&sr
);
9790 /* Step 1: round SIZE to the previous multiple of the interval. */
9792 rounded_size
= size
& -PROBE_INTERVAL
;
9795 /* Step 2: compute initial and final value of the loop counter. */
9797 /* SP = SP_0 + PROBE_INTERVAL. */
9798 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9799 plus_constant (Pmode
, stack_pointer_rtx
,
9800 - (PROBE_INTERVAL
+ dope
))));
9802 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9803 emit_move_insn (sr
.reg
, GEN_INT (-rounded_size
));
9804 emit_insn (gen_rtx_SET (VOIDmode
, sr
.reg
,
9805 gen_rtx_PLUS (Pmode
, sr
.reg
,
9806 stack_pointer_rtx
)));
9811 while (SP != LAST_ADDR)
9813 SP = SP + PROBE_INTERVAL
9817 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9818 values of N from 1 until it is equal to ROUNDED_SIZE. */
9820 emit_insn (ix86_gen_adjust_stack_and_probe (sr
.reg
, sr
.reg
, size_rtx
));
9823 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9824 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9826 if (size
!= rounded_size
)
9828 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9829 plus_constant (Pmode
, stack_pointer_rtx
,
9830 rounded_size
- size
)));
9831 emit_stack_probe (stack_pointer_rtx
);
9834 /* Adjust back to account for the additional first interval. */
9835 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9836 plus_constant (Pmode
, stack_pointer_rtx
,
9837 PROBE_INTERVAL
+ dope
)));
9839 release_scratch_register_on_entry (&sr
);
9842 gcc_assert (cfun
->machine
->fs
.cfa_reg
!= stack_pointer_rtx
);
9844 /* Even if the stack pointer isn't the CFA register, we need to correctly
9845 describe the adjustments made to it, in particular differentiate the
9846 frame-related ones from the frame-unrelated ones. */
9849 rtx expr
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (2));
9850 XVECEXP (expr
, 0, 0)
9851 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9852 plus_constant (Pmode
, stack_pointer_rtx
, -size
));
9853 XVECEXP (expr
, 0, 1)
9854 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9855 plus_constant (Pmode
, stack_pointer_rtx
,
9856 PROBE_INTERVAL
+ dope
+ size
));
9857 add_reg_note (last
, REG_FRAME_RELATED_EXPR
, expr
);
9858 RTX_FRAME_RELATED_P (last
) = 1;
9860 cfun
->machine
->fs
.sp_offset
+= size
;
9863 /* Make sure nothing is scheduled before we are done. */
9864 emit_insn (gen_blockage ());
9867 /* Adjust the stack pointer up to REG while probing it. */
9870 output_adjust_stack_and_probe (rtx reg
)
9872 static int labelno
= 0;
9873 char loop_lab
[32], end_lab
[32];
9876 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
9877 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
9879 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
9881 /* Jump to END_LAB if SP == LAST_ADDR. */
9882 xops
[0] = stack_pointer_rtx
;
9884 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
9885 fputs ("\tje\t", asm_out_file
);
9886 assemble_name_raw (asm_out_file
, end_lab
);
9887 fputc ('\n', asm_out_file
);
9889 /* SP = SP + PROBE_INTERVAL. */
9890 xops
[1] = GEN_INT (PROBE_INTERVAL
);
9891 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
9894 xops
[1] = const0_rtx
;
9895 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops
);
9897 fprintf (asm_out_file
, "\tjmp\t");
9898 assemble_name_raw (asm_out_file
, loop_lab
);
9899 fputc ('\n', asm_out_file
);
9901 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
9906 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9907 inclusive. These are offsets from the current stack pointer. */
9910 ix86_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
)
9912 /* See if we have a constant small number of probes to generate. If so,
9913 that's the easy case. The run-time loop is made up of 7 insns in the
9914 generic case while the compile-time loop is made up of n insns for n #
9916 if (size
<= 7 * PROBE_INTERVAL
)
9920 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9921 it exceeds SIZE. If only one probe is needed, this will not
9922 generate any code. Then probe at FIRST + SIZE. */
9923 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
9924 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
9927 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
9931 /* Otherwise, do the same as above, but in a loop. Note that we must be
9932 extra careful with variables wrapping around because we might be at
9933 the very top (or the very bottom) of the address space and we have
9934 to be able to handle this case properly; in particular, we use an
9935 equality test for the loop condition. */
9938 HOST_WIDE_INT rounded_size
, last
;
9939 struct scratch_reg sr
;
9941 get_scratch_register_on_entry (&sr
);
9944 /* Step 1: round SIZE to the previous multiple of the interval. */
9946 rounded_size
= size
& -PROBE_INTERVAL
;
9949 /* Step 2: compute initial and final value of the loop counter. */
9951 /* TEST_OFFSET = FIRST. */
9952 emit_move_insn (sr
.reg
, GEN_INT (-first
));
9954 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9955 last
= first
+ rounded_size
;
9960 while (TEST_ADDR != LAST_ADDR)
9962 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9966 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9967 until it is equal to ROUNDED_SIZE. */
9969 emit_insn (ix86_gen_probe_stack_range (sr
.reg
, sr
.reg
, GEN_INT (-last
)));
9972 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9973 that SIZE is equal to ROUNDED_SIZE. */
9975 if (size
!= rounded_size
)
9976 emit_stack_probe (plus_constant (Pmode
,
9977 gen_rtx_PLUS (Pmode
,
9980 rounded_size
- size
));
9982 release_scratch_register_on_entry (&sr
);
9985 /* Make sure nothing is scheduled before we are done. */
9986 emit_insn (gen_blockage ());
9989 /* Probe a range of stack addresses from REG to END, inclusive. These are
9990 offsets from the current stack pointer. */
9993 output_probe_stack_range (rtx reg
, rtx end
)
9995 static int labelno
= 0;
9996 char loop_lab
[32], end_lab
[32];
9999 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
10000 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
10002 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
10004 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10007 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
10008 fputs ("\tje\t", asm_out_file
);
10009 assemble_name_raw (asm_out_file
, end_lab
);
10010 fputc ('\n', asm_out_file
);
10012 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10013 xops
[1] = GEN_INT (PROBE_INTERVAL
);
10014 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
10016 /* Probe at TEST_ADDR. */
10017 xops
[0] = stack_pointer_rtx
;
10019 xops
[2] = const0_rtx
;
10020 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops
);
10022 fprintf (asm_out_file
, "\tjmp\t");
10023 assemble_name_raw (asm_out_file
, loop_lab
);
10024 fputc ('\n', asm_out_file
);
10026 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
10031 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10032 to be generated in correct form. */
10034 ix86_finalize_stack_realign_flags (void)
10036 /* Check if stack realign is really needed after reload, and
10037 stores result in cfun */
10038 unsigned int incoming_stack_boundary
10039 = (crtl
->parm_stack_boundary
> ix86_incoming_stack_boundary
10040 ? crtl
->parm_stack_boundary
: ix86_incoming_stack_boundary
);
10041 unsigned int stack_realign
= (incoming_stack_boundary
10043 ? crtl
->max_used_stack_slot_alignment
10044 : crtl
->stack_alignment_needed
));
10046 if (crtl
->stack_realign_finalized
)
10048 /* After stack_realign_needed is finalized, we can't no longer
10050 gcc_assert (crtl
->stack_realign_needed
== stack_realign
);
10054 /* If the only reason for frame_pointer_needed is that we conservatively
10055 assumed stack realignment might be needed, but in the end nothing that
10056 needed the stack alignment had been spilled, clear frame_pointer_needed
10057 and say we don't need stack realignment. */
10059 && !crtl
->need_drap
10060 && frame_pointer_needed
10062 && flag_omit_frame_pointer
10063 && crtl
->sp_is_unchanging
10064 && !ix86_current_function_calls_tls_descriptor
10065 && !crtl
->accesses_prior_frames
10066 && !cfun
->calls_alloca
10067 && !crtl
->calls_eh_return
10068 && !(flag_stack_check
&& STACK_CHECK_MOVING_SP
)
10069 && !ix86_frame_pointer_required ()
10070 && get_frame_size () == 0
10071 && ix86_nsaved_sseregs () == 0
10072 && ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
== 0)
10074 HARD_REG_SET set_up_by_prologue
, prologue_used
;
10077 CLEAR_HARD_REG_SET (prologue_used
);
10078 CLEAR_HARD_REG_SET (set_up_by_prologue
);
10079 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, STACK_POINTER_REGNUM
);
10080 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, ARG_POINTER_REGNUM
);
10081 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
,
10082 HARD_FRAME_POINTER_REGNUM
);
10086 FOR_BB_INSNS (bb
, insn
)
10087 if (NONDEBUG_INSN_P (insn
)
10088 && requires_stack_frame_p (insn
, prologue_used
,
10089 set_up_by_prologue
))
10091 crtl
->stack_realign_needed
= stack_realign
;
10092 crtl
->stack_realign_finalized
= true;
10097 frame_pointer_needed
= false;
10098 stack_realign
= false;
10099 crtl
->max_used_stack_slot_alignment
= incoming_stack_boundary
;
10100 crtl
->stack_alignment_needed
= incoming_stack_boundary
;
10101 crtl
->stack_alignment_estimated
= incoming_stack_boundary
;
10102 if (crtl
->preferred_stack_boundary
> incoming_stack_boundary
)
10103 crtl
->preferred_stack_boundary
= incoming_stack_boundary
;
10104 df_finish_pass (true);
10105 df_scan_alloc (NULL
);
10107 df_compute_regs_ever_live (true);
10111 crtl
->stack_realign_needed
= stack_realign
;
10112 crtl
->stack_realign_finalized
= true;
10115 /* Expand the prologue into a bunch of separate insns. */
10118 ix86_expand_prologue (void)
10120 struct machine_function
*m
= cfun
->machine
;
10123 struct ix86_frame frame
;
10124 HOST_WIDE_INT allocate
;
10125 bool int_registers_saved
;
10126 bool sse_registers_saved
;
10128 ix86_finalize_stack_realign_flags ();
10130 /* DRAP should not coexist with stack_realign_fp */
10131 gcc_assert (!(crtl
->drap_reg
&& stack_realign_fp
));
10133 memset (&m
->fs
, 0, sizeof (m
->fs
));
10135 /* Initialize CFA state for before the prologue. */
10136 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10137 m
->fs
.cfa_offset
= INCOMING_FRAME_SP_OFFSET
;
10139 /* Track SP offset to the CFA. We continue tracking this after we've
10140 swapped the CFA register away from SP. In the case of re-alignment
10141 this is fudged; we're interested to offsets within the local frame. */
10142 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10143 m
->fs
.sp_valid
= true;
10145 ix86_compute_frame_layout (&frame
);
10147 if (!TARGET_64BIT
&& ix86_function_ms_hook_prologue (current_function_decl
))
10149 /* We should have already generated an error for any use of
10150 ms_hook on a nested function. */
10151 gcc_checking_assert (!ix86_static_chain_on_stack
);
10153 /* Check if profiling is active and we shall use profiling before
10154 prologue variant. If so sorry. */
10155 if (crtl
->profile
&& flag_fentry
!= 0)
10156 sorry ("ms_hook_prologue attribute isn%'t compatible "
10157 "with -mfentry for 32-bit");
10159 /* In ix86_asm_output_function_label we emitted:
10160 8b ff movl.s %edi,%edi
10162 8b ec movl.s %esp,%ebp
10164 This matches the hookable function prologue in Win32 API
10165 functions in Microsoft Windows XP Service Pack 2 and newer.
10166 Wine uses this to enable Windows apps to hook the Win32 API
10167 functions provided by Wine.
10169 What that means is that we've already set up the frame pointer. */
10171 if (frame_pointer_needed
10172 && !(crtl
->drap_reg
&& crtl
->stack_realign_needed
))
10176 /* We've decided to use the frame pointer already set up.
10177 Describe this to the unwinder by pretending that both
10178 push and mov insns happen right here.
10180 Putting the unwind info here at the end of the ms_hook
10181 is done so that we can make absolutely certain we get
10182 the required byte sequence at the start of the function,
10183 rather than relying on an assembler that can produce
10184 the exact encoding required.
10186 However it does mean (in the unpatched case) that we have
10187 a 1 insn window where the asynchronous unwind info is
10188 incorrect. However, if we placed the unwind info at
10189 its correct location we would have incorrect unwind info
10190 in the patched case. Which is probably all moot since
10191 I don't expect Wine generates dwarf2 unwind info for the
10192 system libraries that use this feature. */
10194 insn
= emit_insn (gen_blockage ());
10196 push
= gen_push (hard_frame_pointer_rtx
);
10197 mov
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
10198 stack_pointer_rtx
);
10199 RTX_FRAME_RELATED_P (push
) = 1;
10200 RTX_FRAME_RELATED_P (mov
) = 1;
10202 RTX_FRAME_RELATED_P (insn
) = 1;
10203 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10204 gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, push
, mov
)));
10206 /* Note that gen_push incremented m->fs.cfa_offset, even
10207 though we didn't emit the push insn here. */
10208 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10209 m
->fs
.fp_offset
= m
->fs
.cfa_offset
;
10210 m
->fs
.fp_valid
= true;
10214 /* The frame pointer is not needed so pop %ebp again.
10215 This leaves us with a pristine state. */
10216 emit_insn (gen_pop (hard_frame_pointer_rtx
));
10220 /* The first insn of a function that accepts its static chain on the
10221 stack is to push the register that would be filled in by a direct
10222 call. This insn will be skipped by the trampoline. */
10223 else if (ix86_static_chain_on_stack
)
10225 insn
= emit_insn (gen_push (ix86_static_chain (cfun
->decl
, false)));
10226 emit_insn (gen_blockage ());
10228 /* We don't want to interpret this push insn as a register save,
10229 only as a stack adjustment. The real copy of the register as
10230 a save will be done later, if needed. */
10231 t
= plus_constant (Pmode
, stack_pointer_rtx
, -UNITS_PER_WORD
);
10232 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
10233 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, t
);
10234 RTX_FRAME_RELATED_P (insn
) = 1;
10237 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10238 of DRAP is needed and stack realignment is really needed after reload */
10239 if (stack_realign_drap
)
10241 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10243 /* Only need to push parameter pointer reg if it is caller saved. */
10244 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10246 /* Push arg pointer reg */
10247 insn
= emit_insn (gen_push (crtl
->drap_reg
));
10248 RTX_FRAME_RELATED_P (insn
) = 1;
10251 /* Grab the argument pointer. */
10252 t
= plus_constant (Pmode
, stack_pointer_rtx
, m
->fs
.sp_offset
);
10253 insn
= emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10254 RTX_FRAME_RELATED_P (insn
) = 1;
10255 m
->fs
.cfa_reg
= crtl
->drap_reg
;
10256 m
->fs
.cfa_offset
= 0;
10258 /* Align the stack. */
10259 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10261 GEN_INT (-align_bytes
)));
10262 RTX_FRAME_RELATED_P (insn
) = 1;
10264 /* Replicate the return address on the stack so that return
10265 address can be reached via (argp - 1) slot. This is needed
10266 to implement macro RETURN_ADDR_RTX and intrinsic function
10267 expand_builtin_return_addr etc. */
10268 t
= plus_constant (Pmode
, crtl
->drap_reg
, -UNITS_PER_WORD
);
10269 t
= gen_frame_mem (word_mode
, t
);
10270 insn
= emit_insn (gen_push (t
));
10271 RTX_FRAME_RELATED_P (insn
) = 1;
10273 /* For the purposes of frame and register save area addressing,
10274 we've started over with a new frame. */
10275 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10276 m
->fs
.realigned
= true;
10279 int_registers_saved
= (frame
.nregs
== 0);
10280 sse_registers_saved
= (frame
.nsseregs
== 0);
10282 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10284 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10285 slower on all targets. Also sdb doesn't like it. */
10286 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
10287 RTX_FRAME_RELATED_P (insn
) = 1;
10289 /* Push registers now, before setting the frame pointer
10291 if (!int_registers_saved
10293 && !frame
.save_regs_using_mov
)
10295 ix86_emit_save_regs ();
10296 int_registers_saved
= true;
10297 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10300 if (m
->fs
.sp_offset
== frame
.hard_frame_pointer_offset
)
10302 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
10303 RTX_FRAME_RELATED_P (insn
) = 1;
10305 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10306 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10307 m
->fs
.fp_offset
= m
->fs
.sp_offset
;
10308 m
->fs
.fp_valid
= true;
10312 if (!int_registers_saved
)
10314 /* If saving registers via PUSH, do so now. */
10315 if (!frame
.save_regs_using_mov
)
10317 ix86_emit_save_regs ();
10318 int_registers_saved
= true;
10319 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10322 /* When using red zone we may start register saving before allocating
10323 the stack frame saving one cycle of the prologue. However, avoid
10324 doing this if we have to probe the stack; at least on x86_64 the
10325 stack probe can turn into a call that clobbers a red zone location. */
10326 else if (ix86_using_red_zone ()
10327 && (! TARGET_STACK_PROBE
10328 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
))
10330 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10331 int_registers_saved
= true;
10335 if (stack_realign_fp
)
10337 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10338 gcc_assert (align_bytes
> MIN_STACK_BOUNDARY
/ BITS_PER_UNIT
);
10340 /* The computation of the size of the re-aligned stack frame means
10341 that we must allocate the size of the register save area before
10342 performing the actual alignment. Otherwise we cannot guarantee
10343 that there's enough storage above the realignment point. */
10344 if (m
->fs
.sp_offset
!= frame
.sse_reg_save_offset
)
10345 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10346 GEN_INT (m
->fs
.sp_offset
10347 - frame
.sse_reg_save_offset
),
10350 /* Align the stack. */
10351 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10353 GEN_INT (-align_bytes
)));
10355 /* For the purposes of register save area addressing, the stack
10356 pointer is no longer valid. As for the value of sp_offset,
10357 see ix86_compute_frame_layout, which we need to match in order
10358 to pass verification of stack_pointer_offset at the end. */
10359 m
->fs
.sp_offset
= (m
->fs
.sp_offset
+ align_bytes
) & -align_bytes
;
10360 m
->fs
.sp_valid
= false;
10363 allocate
= frame
.stack_pointer_offset
- m
->fs
.sp_offset
;
10365 if (flag_stack_usage_info
)
10367 /* We start to count from ARG_POINTER. */
10368 HOST_WIDE_INT stack_size
= frame
.stack_pointer_offset
;
10370 /* If it was realigned, take into account the fake frame. */
10371 if (stack_realign_drap
)
10373 if (ix86_static_chain_on_stack
)
10374 stack_size
+= UNITS_PER_WORD
;
10376 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10377 stack_size
+= UNITS_PER_WORD
;
10379 /* This over-estimates by 1 minimal-stack-alignment-unit but
10380 mitigates that by counting in the new return address slot. */
10381 current_function_dynamic_stack_size
10382 += crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10385 current_function_static_stack_size
= stack_size
;
10388 /* On SEH target with very large frame size, allocate an area to save
10389 SSE registers (as the very large allocation won't be described). */
10391 && frame
.stack_pointer_offset
> SEH_MAX_FRAME_SIZE
10392 && !sse_registers_saved
)
10394 HOST_WIDE_INT sse_size
=
10395 frame
.sse_reg_save_offset
- frame
.reg_save_offset
;
10397 gcc_assert (int_registers_saved
);
10399 /* No need to do stack checking as the area will be immediately
10401 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10402 GEN_INT (-sse_size
), -1,
10403 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10404 allocate
-= sse_size
;
10405 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
10406 sse_registers_saved
= true;
10409 /* The stack has already been decremented by the instruction calling us
10410 so probe if the size is non-negative to preserve the protection area. */
10411 if (allocate
>= 0 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
10413 /* We expect the registers to be saved when probes are used. */
10414 gcc_assert (int_registers_saved
);
10416 if (STACK_CHECK_MOVING_SP
)
10418 ix86_adjust_stack_and_probe (allocate
);
10423 HOST_WIDE_INT size
= allocate
;
10425 if (TARGET_64BIT
&& size
>= (HOST_WIDE_INT
) 0x80000000)
10426 size
= 0x80000000 - STACK_CHECK_PROTECT
- 1;
10428 if (TARGET_STACK_PROBE
)
10429 ix86_emit_probe_stack_range (0, size
+ STACK_CHECK_PROTECT
);
10431 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
);
10437 else if (!ix86_target_stack_probe ()
10438 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
)
10440 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10441 GEN_INT (-allocate
), -1,
10442 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10446 rtx eax
= gen_rtx_REG (Pmode
, AX_REG
);
10448 rtx (*adjust_stack_insn
)(rtx
, rtx
, rtx
);
10449 const bool sp_is_cfa_reg
= (m
->fs
.cfa_reg
== stack_pointer_rtx
);
10450 bool eax_live
= false;
10451 bool r10_live
= false;
10454 r10_live
= (DECL_STATIC_CHAIN (current_function_decl
) != 0);
10455 if (!TARGET_64BIT_MS_ABI
)
10456 eax_live
= ix86_eax_live_at_start_p ();
10458 /* Note that SEH directives need to continue tracking the stack
10459 pointer even after the frame pointer has been set up. */
10462 insn
= emit_insn (gen_push (eax
));
10463 allocate
-= UNITS_PER_WORD
;
10464 if (sp_is_cfa_reg
|| TARGET_SEH
)
10467 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
10468 RTX_FRAME_RELATED_P (insn
) = 1;
10474 r10
= gen_rtx_REG (Pmode
, R10_REG
);
10475 insn
= emit_insn (gen_push (r10
));
10476 allocate
-= UNITS_PER_WORD
;
10477 if (sp_is_cfa_reg
|| TARGET_SEH
)
10480 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
10481 RTX_FRAME_RELATED_P (insn
) = 1;
10485 emit_move_insn (eax
, GEN_INT (allocate
));
10486 emit_insn (ix86_gen_allocate_stack_worker (eax
, eax
));
10488 /* Use the fact that AX still contains ALLOCATE. */
10489 adjust_stack_insn
= (Pmode
== DImode
10490 ? gen_pro_epilogue_adjust_stack_di_sub
10491 : gen_pro_epilogue_adjust_stack_si_sub
);
10493 insn
= emit_insn (adjust_stack_insn (stack_pointer_rtx
,
10494 stack_pointer_rtx
, eax
));
10496 if (sp_is_cfa_reg
|| TARGET_SEH
)
10499 m
->fs
.cfa_offset
+= allocate
;
10500 RTX_FRAME_RELATED_P (insn
) = 1;
10501 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10502 gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10503 plus_constant (Pmode
, stack_pointer_rtx
,
10506 m
->fs
.sp_offset
+= allocate
;
10508 if (r10_live
&& eax_live
)
10510 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10511 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
10512 gen_frame_mem (word_mode
, t
));
10513 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
- UNITS_PER_WORD
);
10514 emit_move_insn (gen_rtx_REG (word_mode
, AX_REG
),
10515 gen_frame_mem (word_mode
, t
));
10517 else if (eax_live
|| r10_live
)
10519 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10520 emit_move_insn (gen_rtx_REG (word_mode
,
10521 (eax_live
? AX_REG
: R10_REG
)),
10522 gen_frame_mem (word_mode
, t
));
10525 gcc_assert (m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10527 /* If we havn't already set up the frame pointer, do so now. */
10528 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10530 insn
= ix86_gen_add3 (hard_frame_pointer_rtx
, stack_pointer_rtx
,
10531 GEN_INT (frame
.stack_pointer_offset
10532 - frame
.hard_frame_pointer_offset
));
10533 insn
= emit_insn (insn
);
10534 RTX_FRAME_RELATED_P (insn
) = 1;
10535 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL
);
10537 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10538 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10539 m
->fs
.fp_offset
= frame
.hard_frame_pointer_offset
;
10540 m
->fs
.fp_valid
= true;
10543 if (!int_registers_saved
)
10544 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10545 if (!sse_registers_saved
)
10546 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
10548 pic_reg_used
= false;
10549 if (pic_offset_table_rtx
10550 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
10553 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
10555 if (alt_pic_reg_used
!= INVALID_REGNUM
)
10556 SET_REGNO (pic_offset_table_rtx
, alt_pic_reg_used
);
10558 pic_reg_used
= true;
10565 if (ix86_cmodel
== CM_LARGE_PIC
)
10567 rtx label
, tmp_reg
;
10569 gcc_assert (Pmode
== DImode
);
10570 label
= gen_label_rtx ();
10571 emit_label (label
);
10572 LABEL_PRESERVE_P (label
) = 1;
10573 tmp_reg
= gen_rtx_REG (Pmode
, R11_REG
);
10574 gcc_assert (REGNO (pic_offset_table_rtx
) != REGNO (tmp_reg
));
10575 insn
= emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
,
10577 insn
= emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
10578 insn
= emit_insn (ix86_gen_add3 (pic_offset_table_rtx
,
10579 pic_offset_table_rtx
, tmp_reg
));
10582 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
10586 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
10587 RTX_FRAME_RELATED_P (insn
) = 1;
10588 add_reg_note (insn
, REG_CFA_FLUSH_QUEUE
, NULL_RTX
);
10592 /* In the pic_reg_used case, make sure that the got load isn't deleted
10593 when mcount needs it. Blockage to avoid call movement across mcount
10594 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10596 if (crtl
->profile
&& !flag_fentry
&& pic_reg_used
)
10597 emit_insn (gen_prologue_use (pic_offset_table_rtx
));
10599 if (crtl
->drap_reg
&& !crtl
->stack_realign_needed
)
10601 /* vDRAP is setup but after reload it turns out stack realign
10602 isn't necessary, here we will emit prologue to setup DRAP
10603 without stack realign adjustment */
10604 t
= choose_baseaddr (0);
10605 emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10608 /* Prevent instructions from being scheduled into register save push
10609 sequence when access to the redzone area is done through frame pointer.
10610 The offset between the frame pointer and the stack pointer is calculated
10611 relative to the value of the stack pointer at the end of the function
10612 prologue, and moving instructions that access redzone area via frame
10613 pointer inside push sequence violates this assumption. */
10614 if (frame_pointer_needed
&& frame
.red_zone_size
)
10615 emit_insn (gen_memory_blockage ());
10617 /* Emit cld instruction if stringops are used in the function. */
10618 if (TARGET_CLD
&& ix86_current_function_needs_cld
)
10619 emit_insn (gen_cld ());
10621 /* SEH requires that the prologue end within 256 bytes of the start of
10622 the function. Prevent instruction schedules that would extend that.
10623 Further, prevent alloca modifications to the stack pointer from being
10624 combined with prologue modifications. */
10626 emit_insn (gen_prologue_use (stack_pointer_rtx
));
10629 /* Emit code to restore REG using a POP insn. */
10632 ix86_emit_restore_reg_using_pop (rtx reg
)
10634 struct machine_function
*m
= cfun
->machine
;
10635 rtx insn
= emit_insn (gen_pop (reg
));
10637 ix86_add_cfa_restore_note (insn
, reg
, m
->fs
.sp_offset
);
10638 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
10640 if (m
->fs
.cfa_reg
== crtl
->drap_reg
10641 && REGNO (reg
) == REGNO (crtl
->drap_reg
))
10643 /* Previously we'd represented the CFA as an expression
10644 like *(%ebp - 8). We've just popped that value from
10645 the stack, which means we need to reset the CFA to
10646 the drap register. This will remain until we restore
10647 the stack pointer. */
10648 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10649 RTX_FRAME_RELATED_P (insn
) = 1;
10651 /* This means that the DRAP register is valid for addressing too. */
10652 m
->fs
.drap_valid
= true;
10656 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10658 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
10659 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
10660 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
10661 RTX_FRAME_RELATED_P (insn
) = 1;
10663 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10666 /* When the frame pointer is the CFA, and we pop it, we are
10667 swapping back to the stack pointer as the CFA. This happens
10668 for stack frames that don't allocate other data, so we assume
10669 the stack pointer is now pointing at the return address, i.e.
10670 the function entry state, which makes the offset be 1 word. */
10671 if (reg
== hard_frame_pointer_rtx
)
10673 m
->fs
.fp_valid
= false;
10674 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10676 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10677 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10679 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10680 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
10681 GEN_INT (m
->fs
.cfa_offset
)));
10682 RTX_FRAME_RELATED_P (insn
) = 1;
10687 /* Emit code to restore saved registers using POP insns. */
10690 ix86_emit_restore_regs_using_pop (void)
10692 unsigned int regno
;
10694 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10695 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, false))
10696 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode
, regno
));
10699 /* Emit code and notes for the LEAVE instruction. */
10702 ix86_emit_leave (void)
10704 struct machine_function
*m
= cfun
->machine
;
10705 rtx insn
= emit_insn (ix86_gen_leave ());
10707 ix86_add_queued_cfa_restore_notes (insn
);
10709 gcc_assert (m
->fs
.fp_valid
);
10710 m
->fs
.sp_valid
= true;
10711 m
->fs
.sp_offset
= m
->fs
.fp_offset
- UNITS_PER_WORD
;
10712 m
->fs
.fp_valid
= false;
10714 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10716 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10717 m
->fs
.cfa_offset
= m
->fs
.sp_offset
;
10719 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10720 plus_constant (Pmode
, stack_pointer_rtx
,
10722 RTX_FRAME_RELATED_P (insn
) = 1;
10724 ix86_add_cfa_restore_note (insn
, hard_frame_pointer_rtx
,
10728 /* Emit code to restore saved registers using MOV insns.
10729 First register is restored from CFA - CFA_OFFSET. */
10731 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10732 bool maybe_eh_return
)
10734 struct machine_function
*m
= cfun
->machine
;
10735 unsigned int regno
;
10737 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10738 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10740 rtx reg
= gen_rtx_REG (word_mode
, regno
);
10743 mem
= choose_baseaddr (cfa_offset
);
10744 mem
= gen_frame_mem (word_mode
, mem
);
10745 insn
= emit_move_insn (reg
, mem
);
10747 if (m
->fs
.cfa_reg
== crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
10749 /* Previously we'd represented the CFA as an expression
10750 like *(%ebp - 8). We've just popped that value from
10751 the stack, which means we need to reset the CFA to
10752 the drap register. This will remain until we restore
10753 the stack pointer. */
10754 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10755 RTX_FRAME_RELATED_P (insn
) = 1;
10757 /* This means that the DRAP register is valid for addressing. */
10758 m
->fs
.drap_valid
= true;
10761 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
10763 cfa_offset
-= UNITS_PER_WORD
;
10767 /* Emit code to restore saved registers using MOV insns.
10768 First register is restored from CFA - CFA_OFFSET. */
10770 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10771 bool maybe_eh_return
)
10773 unsigned int regno
;
10775 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10776 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10778 rtx reg
= gen_rtx_REG (V4SFmode
, regno
);
10781 mem
= choose_baseaddr (cfa_offset
);
10782 mem
= gen_rtx_MEM (V4SFmode
, mem
);
10783 set_mem_align (mem
, 128);
10784 emit_move_insn (reg
, mem
);
10786 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
10792 /* Restore function stack, frame, and registers. */
10795 ix86_expand_epilogue (int style
)
10797 struct machine_function
*m
= cfun
->machine
;
10798 struct machine_frame_state frame_state_save
= m
->fs
;
10799 struct ix86_frame frame
;
10800 bool restore_regs_via_mov
;
10803 ix86_finalize_stack_realign_flags ();
10804 ix86_compute_frame_layout (&frame
);
10806 m
->fs
.sp_valid
= (!frame_pointer_needed
10807 || (crtl
->sp_is_unchanging
10808 && !stack_realign_fp
));
10809 gcc_assert (!m
->fs
.sp_valid
10810 || m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10812 /* The FP must be valid if the frame pointer is present. */
10813 gcc_assert (frame_pointer_needed
== m
->fs
.fp_valid
);
10814 gcc_assert (!m
->fs
.fp_valid
10815 || m
->fs
.fp_offset
== frame
.hard_frame_pointer_offset
);
10817 /* We must have *some* valid pointer to the stack frame. */
10818 gcc_assert (m
->fs
.sp_valid
|| m
->fs
.fp_valid
);
10820 /* The DRAP is never valid at this point. */
10821 gcc_assert (!m
->fs
.drap_valid
);
10823 /* See the comment about red zone and frame
10824 pointer usage in ix86_expand_prologue. */
10825 if (frame_pointer_needed
&& frame
.red_zone_size
)
10826 emit_insn (gen_memory_blockage ());
10828 using_drap
= crtl
->drap_reg
&& crtl
->stack_realign_needed
;
10829 gcc_assert (!using_drap
|| m
->fs
.cfa_reg
== crtl
->drap_reg
);
10831 /* Determine the CFA offset of the end of the red-zone. */
10832 m
->fs
.red_zone_offset
= 0;
10833 if (ix86_using_red_zone () && crtl
->args
.pops_args
< 65536)
10835 /* The red-zone begins below the return address. */
10836 m
->fs
.red_zone_offset
= RED_ZONE_SIZE
+ UNITS_PER_WORD
;
10838 /* When the register save area is in the aligned portion of
10839 the stack, determine the maximum runtime displacement that
10840 matches up with the aligned frame. */
10841 if (stack_realign_drap
)
10842 m
->fs
.red_zone_offset
-= (crtl
->stack_alignment_needed
/ BITS_PER_UNIT
10846 /* Special care must be taken for the normal return case of a function
10847 using eh_return: the eax and edx registers are marked as saved, but
10848 not restored along this path. Adjust the save location to match. */
10849 if (crtl
->calls_eh_return
&& style
!= 2)
10850 frame
.reg_save_offset
-= 2 * UNITS_PER_WORD
;
10852 /* EH_RETURN requires the use of moves to function properly. */
10853 if (crtl
->calls_eh_return
)
10854 restore_regs_via_mov
= true;
10855 /* SEH requires the use of pops to identify the epilogue. */
10856 else if (TARGET_SEH
)
10857 restore_regs_via_mov
= false;
10858 /* If we're only restoring one register and sp is not valid then
10859 using a move instruction to restore the register since it's
10860 less work than reloading sp and popping the register. */
10861 else if (!m
->fs
.sp_valid
&& frame
.nregs
<= 1)
10862 restore_regs_via_mov
= true;
10863 else if (TARGET_EPILOGUE_USING_MOVE
10864 && cfun
->machine
->use_fast_prologue_epilogue
10865 && (frame
.nregs
> 1
10866 || m
->fs
.sp_offset
!= frame
.reg_save_offset
))
10867 restore_regs_via_mov
= true;
10868 else if (frame_pointer_needed
10870 && m
->fs
.sp_offset
!= frame
.reg_save_offset
)
10871 restore_regs_via_mov
= true;
10872 else if (frame_pointer_needed
10873 && TARGET_USE_LEAVE
10874 && cfun
->machine
->use_fast_prologue_epilogue
10875 && frame
.nregs
== 1)
10876 restore_regs_via_mov
= true;
10878 restore_regs_via_mov
= false;
10880 if (restore_regs_via_mov
|| frame
.nsseregs
)
10882 /* Ensure that the entire register save area is addressable via
10883 the stack pointer, if we will restore via sp. */
10885 && m
->fs
.sp_offset
> 0x7fffffff
10886 && !(m
->fs
.fp_valid
|| m
->fs
.drap_valid
)
10887 && (frame
.nsseregs
+ frame
.nregs
) != 0)
10889 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10890 GEN_INT (m
->fs
.sp_offset
10891 - frame
.sse_reg_save_offset
),
10893 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10897 /* If there are any SSE registers to restore, then we have to do it
10898 via moves, since there's obviously no pop for SSE regs. */
10899 if (frame
.nsseregs
)
10900 ix86_emit_restore_sse_regs_using_mov (frame
.sse_reg_save_offset
,
10903 if (restore_regs_via_mov
)
10908 ix86_emit_restore_regs_using_mov (frame
.reg_save_offset
, style
== 2);
10910 /* eh_return epilogues need %ecx added to the stack pointer. */
10913 rtx insn
, sa
= EH_RETURN_STACKADJ_RTX
;
10915 /* Stack align doesn't work with eh_return. */
10916 gcc_assert (!stack_realign_drap
);
10917 /* Neither does regparm nested functions. */
10918 gcc_assert (!ix86_static_chain_on_stack
);
10920 if (frame_pointer_needed
)
10922 t
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
10923 t
= plus_constant (Pmode
, t
, m
->fs
.fp_offset
- UNITS_PER_WORD
);
10924 emit_insn (gen_rtx_SET (VOIDmode
, sa
, t
));
10926 t
= gen_frame_mem (Pmode
, hard_frame_pointer_rtx
);
10927 insn
= emit_move_insn (hard_frame_pointer_rtx
, t
);
10929 /* Note that we use SA as a temporary CFA, as the return
10930 address is at the proper place relative to it. We
10931 pretend this happens at the FP restore insn because
10932 prior to this insn the FP would be stored at the wrong
10933 offset relative to SA, and after this insn we have no
10934 other reasonable register to use for the CFA. We don't
10935 bother resetting the CFA to the SP for the duration of
10936 the return insn. */
10937 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10938 plus_constant (Pmode
, sa
, UNITS_PER_WORD
));
10939 ix86_add_queued_cfa_restore_notes (insn
);
10940 add_reg_note (insn
, REG_CFA_RESTORE
, hard_frame_pointer_rtx
);
10941 RTX_FRAME_RELATED_P (insn
) = 1;
10943 m
->fs
.cfa_reg
= sa
;
10944 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
10945 m
->fs
.fp_valid
= false;
10947 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
10948 const0_rtx
, style
, false);
10952 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
10953 t
= plus_constant (Pmode
, t
, m
->fs
.sp_offset
- UNITS_PER_WORD
);
10954 insn
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
));
10955 ix86_add_queued_cfa_restore_notes (insn
);
10957 gcc_assert (m
->fs
.cfa_reg
== stack_pointer_rtx
);
10958 if (m
->fs
.cfa_offset
!= UNITS_PER_WORD
)
10960 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
10961 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10962 plus_constant (Pmode
, stack_pointer_rtx
,
10964 RTX_FRAME_RELATED_P (insn
) = 1;
10967 m
->fs
.sp_offset
= UNITS_PER_WORD
;
10968 m
->fs
.sp_valid
= true;
10973 /* SEH requires that the function end with (1) a stack adjustment
10974 if necessary, (2) a sequence of pops, and (3) a return or
10975 jump instruction. Prevent insns from the function body from
10976 being scheduled into this sequence. */
10979 /* Prevent a catch region from being adjacent to the standard
10980 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
10981 several other flags that would be interesting to test are
10983 if (flag_non_call_exceptions
)
10984 emit_insn (gen_nops (const1_rtx
));
10986 emit_insn (gen_blockage ());
10989 /* First step is to deallocate the stack frame so that we can
10990 pop the registers. Also do it on SEH target for very large
10991 frame as the emitted instructions aren't allowed by the ABI in
10993 if (!m
->fs
.sp_valid
10995 && (m
->fs
.sp_offset
- frame
.reg_save_offset
10996 >= SEH_MAX_FRAME_SIZE
)))
10998 pro_epilogue_adjust_stack (stack_pointer_rtx
, hard_frame_pointer_rtx
,
10999 GEN_INT (m
->fs
.fp_offset
11000 - frame
.reg_save_offset
),
11003 else if (m
->fs
.sp_offset
!= frame
.reg_save_offset
)
11005 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11006 GEN_INT (m
->fs
.sp_offset
11007 - frame
.reg_save_offset
),
11009 m
->fs
.cfa_reg
== stack_pointer_rtx
);
11012 ix86_emit_restore_regs_using_pop ();
11015 /* If we used a stack pointer and haven't already got rid of it,
11017 if (m
->fs
.fp_valid
)
11019 /* If the stack pointer is valid and pointing at the frame
11020 pointer store address, then we only need a pop. */
11021 if (m
->fs
.sp_valid
&& m
->fs
.sp_offset
== frame
.hfp_save_offset
)
11022 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
11023 /* Leave results in shorter dependency chains on CPUs that are
11024 able to grok it fast. */
11025 else if (TARGET_USE_LEAVE
11026 || optimize_function_for_size_p (cfun
)
11027 || !cfun
->machine
->use_fast_prologue_epilogue
)
11028 ix86_emit_leave ();
11031 pro_epilogue_adjust_stack (stack_pointer_rtx
,
11032 hard_frame_pointer_rtx
,
11033 const0_rtx
, style
, !using_drap
);
11034 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
11040 int param_ptr_offset
= UNITS_PER_WORD
;
11043 gcc_assert (stack_realign_drap
);
11045 if (ix86_static_chain_on_stack
)
11046 param_ptr_offset
+= UNITS_PER_WORD
;
11047 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
11048 param_ptr_offset
+= UNITS_PER_WORD
;
11050 insn
= emit_insn (gen_rtx_SET
11051 (VOIDmode
, stack_pointer_rtx
,
11052 gen_rtx_PLUS (Pmode
,
11054 GEN_INT (-param_ptr_offset
))));
11055 m
->fs
.cfa_reg
= stack_pointer_rtx
;
11056 m
->fs
.cfa_offset
= param_ptr_offset
;
11057 m
->fs
.sp_offset
= param_ptr_offset
;
11058 m
->fs
.realigned
= false;
11060 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11061 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11062 GEN_INT (param_ptr_offset
)));
11063 RTX_FRAME_RELATED_P (insn
) = 1;
11065 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
11066 ix86_emit_restore_reg_using_pop (crtl
->drap_reg
);
11069 /* At this point the stack pointer must be valid, and we must have
11070 restored all of the registers. We may not have deallocated the
11071 entire stack frame. We've delayed this until now because it may
11072 be possible to merge the local stack deallocation with the
11073 deallocation forced by ix86_static_chain_on_stack. */
11074 gcc_assert (m
->fs
.sp_valid
);
11075 gcc_assert (!m
->fs
.fp_valid
);
11076 gcc_assert (!m
->fs
.realigned
);
11077 if (m
->fs
.sp_offset
!= UNITS_PER_WORD
)
11079 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11080 GEN_INT (m
->fs
.sp_offset
- UNITS_PER_WORD
),
11084 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11086 /* Sibcall epilogues don't want a return instruction. */
11089 m
->fs
= frame_state_save
;
11093 if (crtl
->args
.pops_args
&& crtl
->args
.size
)
11095 rtx popc
= GEN_INT (crtl
->args
.pops_args
);
11097 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11098 address, do explicit add, and jump indirectly to the caller. */
11100 if (crtl
->args
.pops_args
>= 65536)
11102 rtx ecx
= gen_rtx_REG (SImode
, CX_REG
);
11105 /* There is no "pascal" calling convention in any 64bit ABI. */
11106 gcc_assert (!TARGET_64BIT
);
11108 insn
= emit_insn (gen_pop (ecx
));
11109 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
11110 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
11112 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
11113 copy_rtx (XVECEXP (PATTERN (insn
), 0, 1)));
11114 add_reg_note (insn
, REG_CFA_REGISTER
,
11115 gen_rtx_SET (VOIDmode
, ecx
, pc_rtx
));
11116 RTX_FRAME_RELATED_P (insn
) = 1;
11118 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11120 emit_jump_insn (gen_simple_return_indirect_internal (ecx
));
11123 emit_jump_insn (gen_simple_return_pop_internal (popc
));
11126 emit_jump_insn (gen_simple_return_internal ());
11128 /* Restore the state back to the state from the prologue,
11129 so that it's correct for the next epilogue. */
11130 m
->fs
= frame_state_save
;
11133 /* Reset from the function's potential modifications. */
11136 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
11137 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
11139 if (pic_offset_table_rtx
)
11140 SET_REGNO (pic_offset_table_rtx
, REAL_PIC_OFFSET_TABLE_REGNUM
);
11142 /* Mach-O doesn't support labels at the end of objects, so if
11143 it looks like we might want one, insert a NOP. */
11145 rtx insn
= get_last_insn ();
11146 rtx deleted_debug_label
= NULL_RTX
;
11149 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
11151 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11152 notes only, instead set their CODE_LABEL_NUMBER to -1,
11153 otherwise there would be code generation differences
11154 in between -g and -g0. */
11155 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11156 deleted_debug_label
= insn
;
11157 insn
= PREV_INSN (insn
);
11162 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
)))
11163 fputs ("\tnop\n", file
);
11164 else if (deleted_debug_label
)
11165 for (insn
= deleted_debug_label
; insn
; insn
= NEXT_INSN (insn
))
11166 if (NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11167 CODE_LABEL_NUMBER (insn
) = -1;
11173 /* Return a scratch register to use in the split stack prologue. The
11174 split stack prologue is used for -fsplit-stack. It is the first
11175 instructions in the function, even before the regular prologue.
11176 The scratch register can be any caller-saved register which is not
11177 used for parameters or for the static chain. */
11179 static unsigned int
11180 split_stack_prologue_scratch_regno (void)
11186 bool is_fastcall
, is_thiscall
;
11189 is_fastcall
= (lookup_attribute ("fastcall",
11190 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
11192 is_thiscall
= (lookup_attribute ("thiscall",
11193 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
11195 regparm
= ix86_function_regparm (TREE_TYPE (cfun
->decl
), cfun
->decl
);
11199 if (DECL_STATIC_CHAIN (cfun
->decl
))
11201 sorry ("-fsplit-stack does not support fastcall with "
11202 "nested function");
11203 return INVALID_REGNUM
;
11207 else if (is_thiscall
)
11209 if (!DECL_STATIC_CHAIN (cfun
->decl
))
11213 else if (regparm
< 3)
11215 if (!DECL_STATIC_CHAIN (cfun
->decl
))
11221 sorry ("-fsplit-stack does not support 2 register "
11222 " parameters for a nested function");
11223 return INVALID_REGNUM
;
11230 /* FIXME: We could make this work by pushing a register
11231 around the addition and comparison. */
11232 sorry ("-fsplit-stack does not support 3 register parameters");
11233 return INVALID_REGNUM
;
11238 /* A SYMBOL_REF for the function which allocates new stackspace for
11241 static GTY(()) rtx split_stack_fn
;
11243 /* A SYMBOL_REF for the more stack function when using the large
11246 static GTY(()) rtx split_stack_fn_large
;
11248 /* Handle -fsplit-stack. These are the first instructions in the
11249 function, even before the regular prologue. */
11252 ix86_expand_split_stack_prologue (void)
11254 struct ix86_frame frame
;
11255 HOST_WIDE_INT allocate
;
11256 unsigned HOST_WIDE_INT args_size
;
11257 rtx label
, limit
, current
, jump_insn
, allocate_rtx
, call_insn
, call_fusage
;
11258 rtx scratch_reg
= NULL_RTX
;
11259 rtx varargs_label
= NULL_RTX
;
11262 gcc_assert (flag_split_stack
&& reload_completed
);
11264 ix86_finalize_stack_realign_flags ();
11265 ix86_compute_frame_layout (&frame
);
11266 allocate
= frame
.stack_pointer_offset
- INCOMING_FRAME_SP_OFFSET
;
11268 /* This is the label we will branch to if we have enough stack
11269 space. We expect the basic block reordering pass to reverse this
11270 branch if optimizing, so that we branch in the unlikely case. */
11271 label
= gen_label_rtx ();
11273 /* We need to compare the stack pointer minus the frame size with
11274 the stack boundary in the TCB. The stack boundary always gives
11275 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11276 can compare directly. Otherwise we need to do an addition. */
11278 limit
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
11279 UNSPEC_STACK_CHECK
);
11280 limit
= gen_rtx_CONST (Pmode
, limit
);
11281 limit
= gen_rtx_MEM (Pmode
, limit
);
11282 if (allocate
< SPLIT_STACK_AVAILABLE
)
11283 current
= stack_pointer_rtx
;
11286 unsigned int scratch_regno
;
11289 /* We need a scratch register to hold the stack pointer minus
11290 the required frame size. Since this is the very start of the
11291 function, the scratch register can be any caller-saved
11292 register which is not used for parameters. */
11293 offset
= GEN_INT (- allocate
);
11294 scratch_regno
= split_stack_prologue_scratch_regno ();
11295 if (scratch_regno
== INVALID_REGNUM
)
11297 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11298 if (!TARGET_64BIT
|| x86_64_immediate_operand (offset
, Pmode
))
11300 /* We don't use ix86_gen_add3 in this case because it will
11301 want to split to lea, but when not optimizing the insn
11302 will not be split after this point. */
11303 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11304 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11309 emit_move_insn (scratch_reg
, offset
);
11310 emit_insn (ix86_gen_add3 (scratch_reg
, scratch_reg
,
11311 stack_pointer_rtx
));
11313 current
= scratch_reg
;
11316 ix86_expand_branch (GEU
, current
, limit
, label
);
11317 jump_insn
= get_last_insn ();
11318 JUMP_LABEL (jump_insn
) = label
;
11320 /* Mark the jump as very likely to be taken. */
11321 add_reg_note (jump_insn
, REG_BR_PROB
,
11322 GEN_INT (REG_BR_PROB_BASE
- REG_BR_PROB_BASE
/ 100));
11324 if (split_stack_fn
== NULL_RTX
)
11325 split_stack_fn
= gen_rtx_SYMBOL_REF (Pmode
, "__morestack");
11326 fn
= split_stack_fn
;
11328 /* Get more stack space. We pass in the desired stack space and the
11329 size of the arguments to copy to the new stack. In 32-bit mode
11330 we push the parameters; __morestack will return on a new stack
11331 anyhow. In 64-bit mode we pass the parameters in r10 and
11333 allocate_rtx
= GEN_INT (allocate
);
11334 args_size
= crtl
->args
.size
>= 0 ? crtl
->args
.size
: 0;
11335 call_fusage
= NULL_RTX
;
11340 reg10
= gen_rtx_REG (Pmode
, R10_REG
);
11341 reg11
= gen_rtx_REG (Pmode
, R11_REG
);
11343 /* If this function uses a static chain, it will be in %r10.
11344 Preserve it across the call to __morestack. */
11345 if (DECL_STATIC_CHAIN (cfun
->decl
))
11349 rax
= gen_rtx_REG (word_mode
, AX_REG
);
11350 emit_move_insn (rax
, gen_rtx_REG (word_mode
, R10_REG
));
11351 use_reg (&call_fusage
, rax
);
11354 if (ix86_cmodel
== CM_LARGE
|| ix86_cmodel
== CM_LARGE_PIC
)
11356 HOST_WIDE_INT argval
;
11358 gcc_assert (Pmode
== DImode
);
11359 /* When using the large model we need to load the address
11360 into a register, and we've run out of registers. So we
11361 switch to a different calling convention, and we call a
11362 different function: __morestack_large. We pass the
11363 argument size in the upper 32 bits of r10 and pass the
11364 frame size in the lower 32 bits. */
11365 gcc_assert ((allocate
& (HOST_WIDE_INT
) 0xffffffff) == allocate
);
11366 gcc_assert ((args_size
& 0xffffffff) == args_size
);
11368 if (split_stack_fn_large
== NULL_RTX
)
11369 split_stack_fn_large
=
11370 gen_rtx_SYMBOL_REF (Pmode
, "__morestack_large_model");
11372 if (ix86_cmodel
== CM_LARGE_PIC
)
11376 label
= gen_label_rtx ();
11377 emit_label (label
);
11378 LABEL_PRESERVE_P (label
) = 1;
11379 emit_insn (gen_set_rip_rex64 (reg10
, label
));
11380 emit_insn (gen_set_got_offset_rex64 (reg11
, label
));
11381 emit_insn (ix86_gen_add3 (reg10
, reg10
, reg11
));
11382 x
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, split_stack_fn_large
),
11384 x
= gen_rtx_CONST (Pmode
, x
);
11385 emit_move_insn (reg11
, x
);
11386 x
= gen_rtx_PLUS (Pmode
, reg10
, reg11
);
11387 x
= gen_const_mem (Pmode
, x
);
11388 emit_move_insn (reg11
, x
);
11391 emit_move_insn (reg11
, split_stack_fn_large
);
11395 argval
= ((args_size
<< 16) << 16) + allocate
;
11396 emit_move_insn (reg10
, GEN_INT (argval
));
11400 emit_move_insn (reg10
, allocate_rtx
);
11401 emit_move_insn (reg11
, GEN_INT (args_size
));
11402 use_reg (&call_fusage
, reg11
);
11405 use_reg (&call_fusage
, reg10
);
11409 emit_insn (gen_push (GEN_INT (args_size
)));
11410 emit_insn (gen_push (allocate_rtx
));
11412 call_insn
= ix86_expand_call (NULL_RTX
, gen_rtx_MEM (QImode
, fn
),
11413 GEN_INT (UNITS_PER_WORD
), constm1_rtx
,
11415 add_function_usage_to (call_insn
, call_fusage
);
11417 /* In order to make call/return prediction work right, we now need
11418 to execute a return instruction. See
11419 libgcc/config/i386/morestack.S for the details on how this works.
11421 For flow purposes gcc must not see this as a return
11422 instruction--we need control flow to continue at the subsequent
11423 label. Therefore, we use an unspec. */
11424 gcc_assert (crtl
->args
.pops_args
< 65536);
11425 emit_insn (gen_split_stack_return (GEN_INT (crtl
->args
.pops_args
)));
11427 /* If we are in 64-bit mode and this function uses a static chain,
11428 we saved %r10 in %rax before calling _morestack. */
11429 if (TARGET_64BIT
&& DECL_STATIC_CHAIN (cfun
->decl
))
11430 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
11431 gen_rtx_REG (word_mode
, AX_REG
));
11433 /* If this function calls va_start, we need to store a pointer to
11434 the arguments on the old stack, because they may not have been
11435 all copied to the new stack. At this point the old stack can be
11436 found at the frame pointer value used by __morestack, because
11437 __morestack has set that up before calling back to us. Here we
11438 store that pointer in a scratch register, and in
11439 ix86_expand_prologue we store the scratch register in a stack
11441 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11443 unsigned int scratch_regno
;
11447 scratch_regno
= split_stack_prologue_scratch_regno ();
11448 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11449 frame_reg
= gen_rtx_REG (Pmode
, BP_REG
);
11453 return address within this function
11454 return address of caller of this function
11456 So we add three words to get to the stack arguments.
11460 return address within this function
11461 first argument to __morestack
11462 second argument to __morestack
11463 return address of caller of this function
11465 So we add five words to get to the stack arguments.
11467 words
= TARGET_64BIT
? 3 : 5;
11468 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11469 gen_rtx_PLUS (Pmode
, frame_reg
,
11470 GEN_INT (words
* UNITS_PER_WORD
))));
11472 varargs_label
= gen_label_rtx ();
11473 emit_jump_insn (gen_jump (varargs_label
));
11474 JUMP_LABEL (get_last_insn ()) = varargs_label
;
11479 emit_label (label
);
11480 LABEL_NUSES (label
) = 1;
11482 /* If this function calls va_start, we now have to set the scratch
11483 register for the case where we do not call __morestack. In this
11484 case we need to set it based on the stack pointer. */
11485 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11487 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11488 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11489 GEN_INT (UNITS_PER_WORD
))));
11491 emit_label (varargs_label
);
11492 LABEL_NUSES (varargs_label
) = 1;
11496 /* We may have to tell the dataflow pass that the split stack prologue
11497 is initializing a scratch register. */
11500 ix86_live_on_entry (bitmap regs
)
11502 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11504 gcc_assert (flag_split_stack
);
11505 bitmap_set_bit (regs
, split_stack_prologue_scratch_regno ());
11509 /* Determine if op is suitable SUBREG RTX for address. */
11512 ix86_address_subreg_operand (rtx op
)
11514 enum machine_mode mode
;
11519 mode
= GET_MODE (op
);
11521 if (GET_MODE_CLASS (mode
) != MODE_INT
)
11524 /* Don't allow SUBREGs that span more than a word. It can lead to spill
11525 failures when the register is one word out of a two word structure. */
11526 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
11529 /* Allow only SUBREGs of non-eliminable hard registers. */
11530 return register_no_elim_operand (op
, mode
);
11533 /* Extract the parts of an RTL expression that is a valid memory address
11534 for an instruction. Return 0 if the structure of the address is
11535 grossly off. Return -1 if the address contains ASHIFT, so it is not
11536 strictly valid, but still used for computing length of lea instruction. */
11539 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
11541 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
11542 rtx base_reg
, index_reg
;
11543 HOST_WIDE_INT scale
= 1;
11544 rtx scale_rtx
= NULL_RTX
;
11547 enum ix86_address_seg seg
= SEG_DEFAULT
;
11549 /* Allow zero-extended SImode addresses,
11550 they will be emitted with addr32 prefix. */
11551 if (TARGET_64BIT
&& GET_MODE (addr
) == DImode
)
11553 if (GET_CODE (addr
) == ZERO_EXTEND
11554 && GET_MODE (XEXP (addr
, 0)) == SImode
)
11556 addr
= XEXP (addr
, 0);
11557 if (CONST_INT_P (addr
))
11560 else if (GET_CODE (addr
) == AND
11561 && const_32bit_mask (XEXP (addr
, 1), DImode
))
11563 addr
= simplify_gen_subreg (SImode
, XEXP (addr
, 0), DImode
, 0);
11564 if (addr
== NULL_RTX
)
11567 if (CONST_INT_P (addr
))
11572 /* Allow SImode subregs of DImode addresses,
11573 they will be emitted with addr32 prefix. */
11574 if (TARGET_64BIT
&& GET_MODE (addr
) == SImode
)
11576 if (GET_CODE (addr
) == SUBREG
11577 && GET_MODE (SUBREG_REG (addr
)) == DImode
)
11579 addr
= SUBREG_REG (addr
);
11580 if (CONST_INT_P (addr
))
11587 else if (GET_CODE (addr
) == SUBREG
)
11589 if (ix86_address_subreg_operand (SUBREG_REG (addr
)))
11594 else if (GET_CODE (addr
) == PLUS
)
11596 rtx addends
[4], op
;
11604 addends
[n
++] = XEXP (op
, 1);
11607 while (GET_CODE (op
) == PLUS
);
11612 for (i
= n
; i
>= 0; --i
)
11615 switch (GET_CODE (op
))
11620 index
= XEXP (op
, 0);
11621 scale_rtx
= XEXP (op
, 1);
11627 index
= XEXP (op
, 0);
11628 tmp
= XEXP (op
, 1);
11629 if (!CONST_INT_P (tmp
))
11631 scale
= INTVAL (tmp
);
11632 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11634 scale
= 1 << scale
;
11639 if (GET_CODE (op
) != UNSPEC
)
11644 if (XINT (op
, 1) == UNSPEC_TP
11645 && TARGET_TLS_DIRECT_SEG_REFS
11646 && seg
== SEG_DEFAULT
)
11647 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
11653 if (!ix86_address_subreg_operand (SUBREG_REG (op
)))
11680 else if (GET_CODE (addr
) == MULT
)
11682 index
= XEXP (addr
, 0); /* index*scale */
11683 scale_rtx
= XEXP (addr
, 1);
11685 else if (GET_CODE (addr
) == ASHIFT
)
11687 /* We're called for lea too, which implements ashift on occasion. */
11688 index
= XEXP (addr
, 0);
11689 tmp
= XEXP (addr
, 1);
11690 if (!CONST_INT_P (tmp
))
11692 scale
= INTVAL (tmp
);
11693 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11695 scale
= 1 << scale
;
11698 else if (CONST_INT_P (addr
))
11700 if (!x86_64_immediate_operand (addr
, VOIDmode
))
11703 /* Constant addresses are sign extended to 64bit, we have to
11704 prevent addresses from 0x80000000 to 0xffffffff in x32 mode. */
11706 && val_signbit_known_set_p (SImode
, INTVAL (addr
)))
11712 disp
= addr
; /* displacement */
11718 else if (GET_CODE (index
) == SUBREG
11719 && ix86_address_subreg_operand (SUBREG_REG (index
)))
11725 /* Address override works only on the (%reg) part of %fs:(%reg). */
11726 if (seg
!= SEG_DEFAULT
11727 && ((base
&& GET_MODE (base
) != word_mode
)
11728 || (index
&& GET_MODE (index
) != word_mode
)))
11731 /* Extract the integral value of scale. */
11734 if (!CONST_INT_P (scale_rtx
))
11736 scale
= INTVAL (scale_rtx
);
11739 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
11740 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
11742 /* Avoid useless 0 displacement. */
11743 if (disp
== const0_rtx
&& (base
|| index
))
11746 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11747 if (base_reg
&& index_reg
&& scale
== 1
11748 && (index_reg
== arg_pointer_rtx
11749 || index_reg
== frame_pointer_rtx
11750 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
11753 tmp
= base
, base
= index
, index
= tmp
;
11754 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
11757 /* Special case: %ebp cannot be encoded as a base without a displacement.
11761 && (base_reg
== hard_frame_pointer_rtx
11762 || base_reg
== frame_pointer_rtx
11763 || base_reg
== arg_pointer_rtx
11764 || (REG_P (base_reg
)
11765 && (REGNO (base_reg
) == HARD_FRAME_POINTER_REGNUM
11766 || REGNO (base_reg
) == R13_REG
))))
11769 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11770 Avoid this by transforming to [%esi+0].
11771 Reload calls address legitimization without cfun defined, so we need
11772 to test cfun for being non-NULL. */
11773 if (TARGET_K6
&& cfun
&& optimize_function_for_speed_p (cfun
)
11774 && base_reg
&& !index_reg
&& !disp
11775 && REG_P (base_reg
) && REGNO (base_reg
) == SI_REG
)
11778 /* Special case: encode reg+reg instead of reg*2. */
11779 if (!base
&& index
&& scale
== 2)
11780 base
= index
, base_reg
= index_reg
, scale
= 1;
11782 /* Special case: scaling cannot be encoded without base or displacement. */
11783 if (!base
&& !disp
&& index
&& scale
!= 1)
11787 out
->index
= index
;
11789 out
->scale
= scale
;
11795 /* Return cost of the memory address x.
11796 For i386, it is better to use a complex address than let gcc copy
11797 the address into a reg and make a new pseudo. But not if the address
11798 requires to two regs - that would mean more pseudos with longer
11801 ix86_address_cost (rtx x
, enum machine_mode mode ATTRIBUTE_UNUSED
,
11802 addr_space_t as ATTRIBUTE_UNUSED
,
11803 bool speed ATTRIBUTE_UNUSED
)
11805 struct ix86_address parts
;
11807 int ok
= ix86_decompose_address (x
, &parts
);
11811 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
11812 parts
.base
= SUBREG_REG (parts
.base
);
11813 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
11814 parts
.index
= SUBREG_REG (parts
.index
);
11816 /* Attempt to minimize number of registers in the address. */
11818 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
11820 && (!REG_P (parts
.index
)
11821 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
11825 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
11827 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
11828 && parts
.base
!= parts
.index
)
11831 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11832 since it's predecode logic can't detect the length of instructions
11833 and it degenerates to vector decoded. Increase cost of such
11834 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11835 to split such addresses or even refuse such addresses at all.
11837 Following addressing modes are affected:
11842 The first and last case may be avoidable by explicitly coding the zero in
11843 memory address, but I don't have AMD-K6 machine handy to check this
11847 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
11848 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
11849 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
11855 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11856 this is used for to form addresses to local data when -fPIC is in
11860 darwin_local_data_pic (rtx disp
)
11862 return (GET_CODE (disp
) == UNSPEC
11863 && XINT (disp
, 1) == UNSPEC_MACHOPIC_OFFSET
);
11866 /* Determine if a given RTX is a valid constant. We already know this
11867 satisfies CONSTANT_P. */
11870 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
11872 switch (GET_CODE (x
))
11877 if (GET_CODE (x
) == PLUS
)
11879 if (!CONST_INT_P (XEXP (x
, 1)))
11884 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
11887 /* Only some unspecs are valid as "constants". */
11888 if (GET_CODE (x
) == UNSPEC
)
11889 switch (XINT (x
, 1))
11892 case UNSPEC_GOTOFF
:
11893 case UNSPEC_PLTOFF
:
11894 return TARGET_64BIT
;
11896 case UNSPEC_NTPOFF
:
11897 x
= XVECEXP (x
, 0, 0);
11898 return (GET_CODE (x
) == SYMBOL_REF
11899 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
11900 case UNSPEC_DTPOFF
:
11901 x
= XVECEXP (x
, 0, 0);
11902 return (GET_CODE (x
) == SYMBOL_REF
11903 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
11908 /* We must have drilled down to a symbol. */
11909 if (GET_CODE (x
) == LABEL_REF
)
11911 if (GET_CODE (x
) != SYMBOL_REF
)
11916 /* TLS symbols are never valid. */
11917 if (SYMBOL_REF_TLS_MODEL (x
))
11920 /* DLLIMPORT symbols are never valid. */
11921 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11922 && SYMBOL_REF_DLLIMPORT_P (x
))
11926 /* mdynamic-no-pic */
11927 if (MACHO_DYNAMIC_NO_PIC_P
)
11928 return machopic_symbol_defined_p (x
);
11933 if (GET_MODE (x
) == TImode
11934 && x
!= CONST0_RTX (TImode
)
11940 if (!standard_sse_constant_p (x
))
11947 /* Otherwise we handle everything else in the move patterns. */
11951 /* Determine if it's legal to put X into the constant pool. This
11952 is not possible for the address of thread-local symbols, which
11953 is checked above. */
11956 ix86_cannot_force_const_mem (enum machine_mode mode
, rtx x
)
11958 /* We can always put integral constants and vectors in memory. */
11959 switch (GET_CODE (x
))
11969 return !ix86_legitimate_constant_p (mode
, x
);
11973 /* Nonzero if the constant value X is a legitimate general operand
11974 when generating PIC code. It is given that flag_pic is on and
11975 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11978 legitimate_pic_operand_p (rtx x
)
11982 switch (GET_CODE (x
))
11985 inner
= XEXP (x
, 0);
11986 if (GET_CODE (inner
) == PLUS
11987 && CONST_INT_P (XEXP (inner
, 1)))
11988 inner
= XEXP (inner
, 0);
11990 /* Only some unspecs are valid as "constants". */
11991 if (GET_CODE (inner
) == UNSPEC
)
11992 switch (XINT (inner
, 1))
11995 case UNSPEC_GOTOFF
:
11996 case UNSPEC_PLTOFF
:
11997 return TARGET_64BIT
;
11999 x
= XVECEXP (inner
, 0, 0);
12000 return (GET_CODE (x
) == SYMBOL_REF
12001 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
12002 case UNSPEC_MACHOPIC_OFFSET
:
12003 return legitimate_pic_address_disp_p (x
);
12011 return legitimate_pic_address_disp_p (x
);
12018 /* Determine if a given CONST RTX is a valid memory displacement
12022 legitimate_pic_address_disp_p (rtx disp
)
12026 /* In 64bit mode we can allow direct addresses of symbols and labels
12027 when they are not dynamic symbols. */
12030 rtx op0
= disp
, op1
;
12032 switch (GET_CODE (disp
))
12038 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
12040 op0
= XEXP (XEXP (disp
, 0), 0);
12041 op1
= XEXP (XEXP (disp
, 0), 1);
12042 if (!CONST_INT_P (op1
)
12043 || INTVAL (op1
) >= 16*1024*1024
12044 || INTVAL (op1
) < -16*1024*1024)
12046 if (GET_CODE (op0
) == LABEL_REF
)
12048 if (GET_CODE (op0
) == CONST
12049 && GET_CODE (XEXP (op0
, 0)) == UNSPEC
12050 && XINT (XEXP (op0
, 0), 1) == UNSPEC_PCREL
)
12052 if (GET_CODE (op0
) == UNSPEC
12053 && XINT (op0
, 1) == UNSPEC_PCREL
)
12055 if (GET_CODE (op0
) != SYMBOL_REF
)
12060 /* TLS references should always be enclosed in UNSPEC. */
12061 if (SYMBOL_REF_TLS_MODEL (op0
))
12063 if (!SYMBOL_REF_FAR_ADDR_P (op0
) && SYMBOL_REF_LOCAL_P (op0
)
12064 && ix86_cmodel
!= CM_LARGE_PIC
)
12072 if (GET_CODE (disp
) != CONST
)
12074 disp
= XEXP (disp
, 0);
12078 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12079 of GOT tables. We should not need these anyway. */
12080 if (GET_CODE (disp
) != UNSPEC
12081 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
12082 && XINT (disp
, 1) != UNSPEC_GOTOFF
12083 && XINT (disp
, 1) != UNSPEC_PCREL
12084 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
12087 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
12088 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
12094 if (GET_CODE (disp
) == PLUS
)
12096 if (!CONST_INT_P (XEXP (disp
, 1)))
12098 disp
= XEXP (disp
, 0);
12102 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
12105 if (GET_CODE (disp
) != UNSPEC
)
12108 switch (XINT (disp
, 1))
12113 /* We need to check for both symbols and labels because VxWorks loads
12114 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12116 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
12117 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
12118 case UNSPEC_GOTOFF
:
12119 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12120 While ABI specify also 32bit relocation but we don't produce it in
12121 small PIC model at all. */
12122 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
12123 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
12125 return gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
12127 case UNSPEC_GOTTPOFF
:
12128 case UNSPEC_GOTNTPOFF
:
12129 case UNSPEC_INDNTPOFF
:
12132 disp
= XVECEXP (disp
, 0, 0);
12133 return (GET_CODE (disp
) == SYMBOL_REF
12134 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
12135 case UNSPEC_NTPOFF
:
12136 disp
= XVECEXP (disp
, 0, 0);
12137 return (GET_CODE (disp
) == SYMBOL_REF
12138 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
12139 case UNSPEC_DTPOFF
:
12140 disp
= XVECEXP (disp
, 0, 0);
12141 return (GET_CODE (disp
) == SYMBOL_REF
12142 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
12148 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12149 replace the input X, or the original X if no replacement is called for.
12150 The output parameter *WIN is 1 if the calling macro should goto WIN,
12151 0 if it should not. */
12154 ix86_legitimize_reload_address (rtx x
,
12155 enum machine_mode mode ATTRIBUTE_UNUSED
,
12156 int opnum
, int type
,
12157 int ind_levels ATTRIBUTE_UNUSED
)
12159 /* Reload can generate:
12161 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12165 This RTX is rejected from ix86_legitimate_address_p due to
12166 non-strictness of base register 97. Following this rejection,
12167 reload pushes all three components into separate registers,
12168 creating invalid memory address RTX.
12170 Following code reloads only the invalid part of the
12171 memory address RTX. */
12173 if (GET_CODE (x
) == PLUS
12174 && REG_P (XEXP (x
, 1))
12175 && GET_CODE (XEXP (x
, 0)) == PLUS
12176 && REG_P (XEXP (XEXP (x
, 0), 1)))
12179 bool something_reloaded
= false;
12181 base
= XEXP (XEXP (x
, 0), 1);
12182 if (!REG_OK_FOR_BASE_STRICT_P (base
))
12184 push_reload (base
, NULL_RTX
, &XEXP (XEXP (x
, 0), 1), NULL
,
12185 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
12186 opnum
, (enum reload_type
) type
);
12187 something_reloaded
= true;
12190 index
= XEXP (x
, 1);
12191 if (!REG_OK_FOR_INDEX_STRICT_P (index
))
12193 push_reload (index
, NULL_RTX
, &XEXP (x
, 1), NULL
,
12194 INDEX_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
12195 opnum
, (enum reload_type
) type
);
12196 something_reloaded
= true;
12199 gcc_assert (something_reloaded
);
12206 /* Recognizes RTL expressions that are valid memory addresses for an
12207 instruction. The MODE argument is the machine mode for the MEM
12208 expression that wants to use this address.
12210 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12211 convert common non-canonical forms to canonical form so that they will
12215 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
12216 rtx addr
, bool strict
)
12218 struct ix86_address parts
;
12219 rtx base
, index
, disp
;
12220 HOST_WIDE_INT scale
;
12222 if (ix86_decompose_address (addr
, &parts
) <= 0)
12223 /* Decomposition failed. */
12227 index
= parts
.index
;
12229 scale
= parts
.scale
;
12231 /* Validate base register. */
12238 else if (GET_CODE (base
) == SUBREG
&& REG_P (SUBREG_REG (base
)))
12239 reg
= SUBREG_REG (base
);
12241 /* Base is not a register. */
12244 if (GET_MODE (base
) != SImode
&& GET_MODE (base
) != DImode
)
12247 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
12248 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
12249 /* Base is not valid. */
12253 /* Validate index register. */
12260 else if (GET_CODE (index
) == SUBREG
&& REG_P (SUBREG_REG (index
)))
12261 reg
= SUBREG_REG (index
);
12263 /* Index is not a register. */
12266 if (GET_MODE (index
) != SImode
&& GET_MODE (index
) != DImode
)
12269 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
12270 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
12271 /* Index is not valid. */
12275 /* Index and base should have the same mode. */
12277 && GET_MODE (base
) != GET_MODE (index
))
12280 /* Validate scale factor. */
12284 /* Scale without index. */
12287 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
12288 /* Scale is not a valid multiplier. */
12292 /* Validate displacement. */
12295 if (GET_CODE (disp
) == CONST
12296 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
12297 && XINT (XEXP (disp
, 0), 1) != UNSPEC_MACHOPIC_OFFSET
)
12298 switch (XINT (XEXP (disp
, 0), 1))
12300 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12301 used. While ABI specify also 32bit relocations, we don't produce
12302 them at all and use IP relative instead. */
12304 case UNSPEC_GOTOFF
:
12305 gcc_assert (flag_pic
);
12307 goto is_legitimate_pic
;
12309 /* 64bit address unspec. */
12312 case UNSPEC_GOTPCREL
:
12314 gcc_assert (flag_pic
);
12315 goto is_legitimate_pic
;
12317 case UNSPEC_GOTTPOFF
:
12318 case UNSPEC_GOTNTPOFF
:
12319 case UNSPEC_INDNTPOFF
:
12320 case UNSPEC_NTPOFF
:
12321 case UNSPEC_DTPOFF
:
12324 case UNSPEC_STACK_CHECK
:
12325 gcc_assert (flag_split_stack
);
12329 /* Invalid address unspec. */
12333 else if (SYMBOLIC_CONST (disp
)
12337 && MACHOPIC_INDIRECT
12338 && !machopic_operand_p (disp
)
12344 if (TARGET_64BIT
&& (index
|| base
))
12346 /* foo@dtpoff(%rX) is ok. */
12347 if (GET_CODE (disp
) != CONST
12348 || GET_CODE (XEXP (disp
, 0)) != PLUS
12349 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
12350 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
12351 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
12352 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
12353 /* Non-constant pic memory reference. */
12356 else if ((!TARGET_MACHO
|| flag_pic
)
12357 && ! legitimate_pic_address_disp_p (disp
))
12358 /* Displacement is an invalid pic construct. */
12361 else if (MACHO_DYNAMIC_NO_PIC_P
12362 && !ix86_legitimate_constant_p (Pmode
, disp
))
12363 /* displacment must be referenced via non_lazy_pointer */
12367 /* This code used to verify that a symbolic pic displacement
12368 includes the pic_offset_table_rtx register.
12370 While this is good idea, unfortunately these constructs may
12371 be created by "adds using lea" optimization for incorrect
12380 This code is nonsensical, but results in addressing
12381 GOT table with pic_offset_table_rtx base. We can't
12382 just refuse it easily, since it gets matched by
12383 "addsi3" pattern, that later gets split to lea in the
12384 case output register differs from input. While this
12385 can be handled by separate addsi pattern for this case
12386 that never results in lea, this seems to be easier and
12387 correct fix for crash to disable this test. */
12389 else if (GET_CODE (disp
) != LABEL_REF
12390 && !CONST_INT_P (disp
)
12391 && (GET_CODE (disp
) != CONST
12392 || !ix86_legitimate_constant_p (Pmode
, disp
))
12393 && (GET_CODE (disp
) != SYMBOL_REF
12394 || !ix86_legitimate_constant_p (Pmode
, disp
)))
12395 /* Displacement is not constant. */
12397 else if (TARGET_64BIT
12398 && !x86_64_immediate_operand (disp
, VOIDmode
))
12399 /* Displacement is out of range. */
12403 /* Everything looks valid. */
12407 /* Determine if a given RTX is a valid constant address. */
12410 constant_address_p (rtx x
)
12412 return CONSTANT_P (x
) && ix86_legitimate_address_p (Pmode
, x
, 1);
12415 /* Return a unique alias set for the GOT. */
12417 static alias_set_type
12418 ix86_GOT_alias_set (void)
12420 static alias_set_type set
= -1;
12422 set
= new_alias_set ();
12426 /* Return a legitimate reference for ORIG (an address) using the
12427 register REG. If REG is 0, a new pseudo is generated.
12429 There are two types of references that must be handled:
12431 1. Global data references must load the address from the GOT, via
12432 the PIC reg. An insn is emitted to do this load, and the reg is
12435 2. Static data references, constant pool addresses, and code labels
12436 compute the address as an offset from the GOT, whose base is in
12437 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12438 differentiate them from global data objects. The returned
12439 address is the PIC reg + an unspec constant.
12441 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12442 reg also appears in the address. */
12445 legitimize_pic_address (rtx orig
, rtx reg
)
12448 rtx new_rtx
= orig
;
12451 if (TARGET_MACHO
&& !TARGET_64BIT
)
12454 reg
= gen_reg_rtx (Pmode
);
12455 /* Use the generic Mach-O PIC machinery. */
12456 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
12460 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
12462 else if (TARGET_64BIT
12463 && ix86_cmodel
!= CM_SMALL_PIC
12464 && gotoff_operand (addr
, Pmode
))
12467 /* This symbol may be referenced via a displacement from the PIC
12468 base address (@GOTOFF). */
12470 if (reload_in_progress
)
12471 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12472 if (GET_CODE (addr
) == CONST
)
12473 addr
= XEXP (addr
, 0);
12474 if (GET_CODE (addr
) == PLUS
)
12476 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12478 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12481 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12482 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12484 tmpreg
= gen_reg_rtx (Pmode
);
12487 emit_move_insn (tmpreg
, new_rtx
);
12491 new_rtx
= expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
12492 tmpreg
, 1, OPTAB_DIRECT
);
12495 else new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
12497 else if (!TARGET_64BIT
&& gotoff_operand (addr
, Pmode
))
12499 /* This symbol may be referenced via a displacement from the PIC
12500 base address (@GOTOFF). */
12502 if (reload_in_progress
)
12503 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12504 if (GET_CODE (addr
) == CONST
)
12505 addr
= XEXP (addr
, 0);
12506 if (GET_CODE (addr
) == PLUS
)
12508 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12510 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12513 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12514 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12515 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12519 emit_move_insn (reg
, new_rtx
);
12523 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
12524 /* We can't use @GOTOFF for text labels on VxWorks;
12525 see gotoff_operand. */
12526 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
12528 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
12530 if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (addr
))
12531 return legitimize_dllimport_symbol (addr
, true);
12532 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
12533 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
12534 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr
, 0), 0)))
12536 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (addr
, 0), 0), true);
12537 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
12541 /* For x64 PE-COFF there is no GOT table. So we use address
12543 if (TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
12545 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_PCREL
);
12546 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12549 reg
= gen_reg_rtx (Pmode
);
12550 emit_move_insn (reg
, new_rtx
);
12553 else if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
12555 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
12556 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12557 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12558 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12561 reg
= gen_reg_rtx (Pmode
);
12562 /* Use directly gen_movsi, otherwise the address is loaded
12563 into register for CSE. We don't want to CSE this addresses,
12564 instead we CSE addresses from the GOT table, so skip this. */
12565 emit_insn (gen_movsi (reg
, new_rtx
));
12570 /* This symbol must be referenced via a load from the
12571 Global Offset Table (@GOT). */
12573 if (reload_in_progress
)
12574 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12575 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
12576 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12578 new_rtx
= force_reg (Pmode
, new_rtx
);
12579 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12580 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12581 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12584 reg
= gen_reg_rtx (Pmode
);
12585 emit_move_insn (reg
, new_rtx
);
12591 if (CONST_INT_P (addr
)
12592 && !x86_64_immediate_operand (addr
, VOIDmode
))
12596 emit_move_insn (reg
, addr
);
12600 new_rtx
= force_reg (Pmode
, addr
);
12602 else if (GET_CODE (addr
) == CONST
)
12604 addr
= XEXP (addr
, 0);
12606 /* We must match stuff we generate before. Assume the only
12607 unspecs that can get here are ours. Not that we could do
12608 anything with them anyway.... */
12609 if (GET_CODE (addr
) == UNSPEC
12610 || (GET_CODE (addr
) == PLUS
12611 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
12613 gcc_assert (GET_CODE (addr
) == PLUS
);
12615 if (GET_CODE (addr
) == PLUS
)
12617 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
12619 /* Check first to see if this is a constant offset from a @GOTOFF
12620 symbol reference. */
12621 if (gotoff_operand (op0
, Pmode
)
12622 && CONST_INT_P (op1
))
12626 if (reload_in_progress
)
12627 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12628 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
12630 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, op1
);
12631 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12632 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12636 emit_move_insn (reg
, new_rtx
);
12642 if (INTVAL (op1
) < -16*1024*1024
12643 || INTVAL (op1
) >= 16*1024*1024)
12645 if (!x86_64_immediate_operand (op1
, Pmode
))
12646 op1
= force_reg (Pmode
, op1
);
12647 new_rtx
= gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
12653 rtx base
= legitimize_pic_address (op0
, reg
);
12654 enum machine_mode mode
= GET_MODE (base
);
12656 = legitimize_pic_address (op1
, base
== reg
? NULL_RTX
: reg
);
12658 if (CONST_INT_P (new_rtx
))
12660 if (INTVAL (new_rtx
) < -16*1024*1024
12661 || INTVAL (new_rtx
) >= 16*1024*1024)
12663 if (!x86_64_immediate_operand (new_rtx
, mode
))
12664 new_rtx
= force_reg (mode
, new_rtx
);
12666 = gen_rtx_PLUS (mode
, force_reg (mode
, base
), new_rtx
);
12669 new_rtx
= plus_constant (mode
, base
, INTVAL (new_rtx
));
12673 if (GET_CODE (new_rtx
) == PLUS
12674 && CONSTANT_P (XEXP (new_rtx
, 1)))
12676 base
= gen_rtx_PLUS (mode
, base
, XEXP (new_rtx
, 0));
12677 new_rtx
= XEXP (new_rtx
, 1);
12679 new_rtx
= gen_rtx_PLUS (mode
, base
, new_rtx
);
12687 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12690 get_thread_pointer (enum machine_mode tp_mode
, bool to_reg
)
12692 rtx tp
= gen_rtx_UNSPEC (ptr_mode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
12694 if (GET_MODE (tp
) != tp_mode
)
12696 gcc_assert (GET_MODE (tp
) == SImode
);
12697 gcc_assert (tp_mode
== DImode
);
12699 tp
= gen_rtx_ZERO_EXTEND (tp_mode
, tp
);
12703 tp
= copy_to_mode_reg (tp_mode
, tp
);
12708 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12710 static GTY(()) rtx ix86_tls_symbol
;
12713 ix86_tls_get_addr (void)
12715 if (!ix86_tls_symbol
)
12718 = ((TARGET_ANY_GNU_TLS
&& !TARGET_64BIT
)
12719 ? "___tls_get_addr" : "__tls_get_addr");
12721 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
, sym
);
12724 return ix86_tls_symbol
;
12727 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12729 static GTY(()) rtx ix86_tls_module_base_symbol
;
12732 ix86_tls_module_base (void)
12734 if (!ix86_tls_module_base_symbol
)
12736 ix86_tls_module_base_symbol
12737 = gen_rtx_SYMBOL_REF (Pmode
, "_TLS_MODULE_BASE_");
12739 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
12740 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
12743 return ix86_tls_module_base_symbol
;
12746 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12747 false if we expect this to be used for a memory address and true if
12748 we expect to load the address into a register. */
12751 legitimize_tls_address (rtx x
, enum tls_model model
, bool for_mov
)
12753 rtx dest
, base
, off
;
12754 rtx pic
= NULL_RTX
, tp
= NULL_RTX
;
12755 enum machine_mode tp_mode
= Pmode
;
12760 case TLS_MODEL_GLOBAL_DYNAMIC
:
12761 dest
= gen_reg_rtx (Pmode
);
12766 pic
= pic_offset_table_rtx
;
12769 pic
= gen_reg_rtx (Pmode
);
12770 emit_insn (gen_set_got (pic
));
12774 if (TARGET_GNU2_TLS
)
12777 emit_insn (gen_tls_dynamic_gnu2_64 (dest
, x
));
12779 emit_insn (gen_tls_dynamic_gnu2_32 (dest
, x
, pic
));
12781 tp
= get_thread_pointer (Pmode
, true);
12782 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
12784 if (GET_MODE (x
) != Pmode
)
12785 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
12787 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
12791 rtx caddr
= ix86_tls_get_addr ();
12795 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
);
12800 (ix86_gen_tls_global_dynamic_64 (rax
, x
, caddr
));
12801 insns
= get_insns ();
12804 if (GET_MODE (x
) != Pmode
)
12805 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
12807 RTL_CONST_CALL_P (insns
) = 1;
12808 emit_libcall_block (insns
, dest
, rax
, x
);
12811 emit_insn (gen_tls_global_dynamic_32 (dest
, x
, pic
, caddr
));
12815 case TLS_MODEL_LOCAL_DYNAMIC
:
12816 base
= gen_reg_rtx (Pmode
);
12821 pic
= pic_offset_table_rtx
;
12824 pic
= gen_reg_rtx (Pmode
);
12825 emit_insn (gen_set_got (pic
));
12829 if (TARGET_GNU2_TLS
)
12831 rtx tmp
= ix86_tls_module_base ();
12834 emit_insn (gen_tls_dynamic_gnu2_64 (base
, tmp
));
12836 emit_insn (gen_tls_dynamic_gnu2_32 (base
, tmp
, pic
));
12838 tp
= get_thread_pointer (Pmode
, true);
12839 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
12840 gen_rtx_MINUS (Pmode
, tmp
, tp
));
12844 rtx caddr
= ix86_tls_get_addr ();
12848 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
);
12853 (ix86_gen_tls_local_dynamic_base_64 (rax
, caddr
));
12854 insns
= get_insns ();
12857 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
12858 share the LD_BASE result with other LD model accesses. */
12859 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
12860 UNSPEC_TLS_LD_BASE
);
12862 RTL_CONST_CALL_P (insns
) = 1;
12863 emit_libcall_block (insns
, base
, rax
, eqv
);
12866 emit_insn (gen_tls_local_dynamic_base_32 (base
, pic
, caddr
));
12869 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
12870 off
= gen_rtx_CONST (Pmode
, off
);
12872 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
12874 if (TARGET_GNU2_TLS
)
12876 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
12878 if (GET_MODE (x
) != Pmode
)
12879 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
12881 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
12885 case TLS_MODEL_INITIAL_EXEC
:
12888 if (TARGET_SUN_TLS
&& !TARGET_X32
)
12890 /* The Sun linker took the AMD64 TLS spec literally
12891 and can only handle %rax as destination of the
12892 initial executable code sequence. */
12894 dest
= gen_reg_rtx (DImode
);
12895 emit_insn (gen_tls_initial_exec_64_sun (dest
, x
));
12899 /* Generate DImode references to avoid %fs:(%reg32)
12900 problems and linker IE->LE relaxation bug. */
12903 type
= UNSPEC_GOTNTPOFF
;
12907 if (reload_in_progress
)
12908 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12909 pic
= pic_offset_table_rtx
;
12910 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
12912 else if (!TARGET_ANY_GNU_TLS
)
12914 pic
= gen_reg_rtx (Pmode
);
12915 emit_insn (gen_set_got (pic
));
12916 type
= UNSPEC_GOTTPOFF
;
12921 type
= UNSPEC_INDNTPOFF
;
12924 off
= gen_rtx_UNSPEC (tp_mode
, gen_rtvec (1, x
), type
);
12925 off
= gen_rtx_CONST (tp_mode
, off
);
12927 off
= gen_rtx_PLUS (tp_mode
, pic
, off
);
12928 off
= gen_const_mem (tp_mode
, off
);
12929 set_mem_alias_set (off
, ix86_GOT_alias_set ());
12931 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12933 base
= get_thread_pointer (tp_mode
,
12934 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
12935 off
= force_reg (tp_mode
, off
);
12936 return gen_rtx_PLUS (tp_mode
, base
, off
);
12940 base
= get_thread_pointer (Pmode
, true);
12941 dest
= gen_reg_rtx (Pmode
);
12942 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
12946 case TLS_MODEL_LOCAL_EXEC
:
12947 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
12948 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12949 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
12950 off
= gen_rtx_CONST (Pmode
, off
);
12952 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12954 base
= get_thread_pointer (Pmode
,
12955 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
12956 return gen_rtx_PLUS (Pmode
, base
, off
);
12960 base
= get_thread_pointer (Pmode
, true);
12961 dest
= gen_reg_rtx (Pmode
);
12962 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
12967 gcc_unreachable ();
12973 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12976 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map
)))
12977 htab_t dllimport_map
;
12980 get_dllimport_decl (tree decl
)
12982 struct tree_map
*h
, in
;
12985 const char *prefix
;
12986 size_t namelen
, prefixlen
;
12991 if (!dllimport_map
)
12992 dllimport_map
= htab_create_ggc (512, tree_map_hash
, tree_map_eq
, 0);
12994 in
.hash
= htab_hash_pointer (decl
);
12995 in
.base
.from
= decl
;
12996 loc
= htab_find_slot_with_hash (dllimport_map
, &in
, in
.hash
, INSERT
);
12997 h
= (struct tree_map
*) *loc
;
13001 *loc
= h
= ggc_alloc_tree_map ();
13003 h
->base
.from
= decl
;
13004 h
->to
= to
= build_decl (DECL_SOURCE_LOCATION (decl
),
13005 VAR_DECL
, NULL
, ptr_type_node
);
13006 DECL_ARTIFICIAL (to
) = 1;
13007 DECL_IGNORED_P (to
) = 1;
13008 DECL_EXTERNAL (to
) = 1;
13009 TREE_READONLY (to
) = 1;
13011 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
13012 name
= targetm
.strip_name_encoding (name
);
13013 prefix
= name
[0] == FASTCALL_PREFIX
|| user_label_prefix
[0] == 0
13014 ? "*__imp_" : "*__imp__";
13015 namelen
= strlen (name
);
13016 prefixlen
= strlen (prefix
);
13017 imp_name
= (char *) alloca (namelen
+ prefixlen
+ 1);
13018 memcpy (imp_name
, prefix
, prefixlen
);
13019 memcpy (imp_name
+ prefixlen
, name
, namelen
+ 1);
13021 name
= ggc_alloc_string (imp_name
, namelen
+ prefixlen
);
13022 rtl
= gen_rtx_SYMBOL_REF (Pmode
, name
);
13023 SET_SYMBOL_REF_DECL (rtl
, to
);
13024 SYMBOL_REF_FLAGS (rtl
) = SYMBOL_FLAG_LOCAL
;
13026 rtl
= gen_const_mem (Pmode
, rtl
);
13027 set_mem_alias_set (rtl
, ix86_GOT_alias_set ());
13029 SET_DECL_RTL (to
, rtl
);
13030 SET_DECL_ASSEMBLER_NAME (to
, get_identifier (name
));
13035 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13036 true if we require the result be a register. */
13039 legitimize_dllimport_symbol (rtx symbol
, bool want_reg
)
13044 gcc_assert (SYMBOL_REF_DECL (symbol
));
13045 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
));
13047 x
= DECL_RTL (imp_decl
);
13049 x
= force_reg (Pmode
, x
);
13053 /* Try machine-dependent ways of modifying an illegitimate address
13054 to be legitimate. If we find one, return the new, valid address.
13055 This macro is used in only one place: `memory_address' in explow.c.
13057 OLDX is the address as it was before break_out_memory_refs was called.
13058 In some cases it is useful to look at this to decide what needs to be done.
13060 It is always safe for this macro to do nothing. It exists to recognize
13061 opportunities to optimize the output.
13063 For the 80386, we handle X+REG by loading X into a register R and
13064 using R+REG. R will go in a general reg and indexing will be used.
13065 However, if REG is a broken-out memory address or multiplication,
13066 nothing needs to be done because REG can certainly go in a general reg.
13068 When -fpic is used, special handling is needed for symbolic references.
13069 See comments by legitimize_pic_address in i386.c for details. */
13072 ix86_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
13073 enum machine_mode mode
)
13078 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
13080 return legitimize_tls_address (x
, (enum tls_model
) log
, false);
13081 if (GET_CODE (x
) == CONST
13082 && GET_CODE (XEXP (x
, 0)) == PLUS
13083 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
13084 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
13086 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0),
13087 (enum tls_model
) log
, false);
13088 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
13091 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
13093 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (x
))
13094 return legitimize_dllimport_symbol (x
, true);
13095 if (GET_CODE (x
) == CONST
13096 && GET_CODE (XEXP (x
, 0)) == PLUS
13097 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
13098 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x
, 0), 0)))
13100 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (x
, 0), 0), true);
13101 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
13105 if (flag_pic
&& SYMBOLIC_CONST (x
))
13106 return legitimize_pic_address (x
, 0);
13109 if (MACHO_DYNAMIC_NO_PIC_P
&& SYMBOLIC_CONST (x
))
13110 return machopic_indirect_data_reference (x
, 0);
13113 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13114 if (GET_CODE (x
) == ASHIFT
13115 && CONST_INT_P (XEXP (x
, 1))
13116 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
13119 log
= INTVAL (XEXP (x
, 1));
13120 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
13121 GEN_INT (1 << log
));
13124 if (GET_CODE (x
) == PLUS
)
13126 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13128 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
13129 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
13130 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
13133 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
13134 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
13135 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
13136 GEN_INT (1 << log
));
13139 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
13140 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
13141 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
13144 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
13145 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
13146 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
13147 GEN_INT (1 << log
));
13150 /* Put multiply first if it isn't already. */
13151 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13153 rtx tmp
= XEXP (x
, 0);
13154 XEXP (x
, 0) = XEXP (x
, 1);
13159 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13160 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13161 created by virtual register instantiation, register elimination, and
13162 similar optimizations. */
13163 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
13166 x
= gen_rtx_PLUS (Pmode
,
13167 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
13168 XEXP (XEXP (x
, 1), 0)),
13169 XEXP (XEXP (x
, 1), 1));
13173 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13174 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13175 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
13176 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
13177 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
13178 && CONSTANT_P (XEXP (x
, 1)))
13181 rtx other
= NULL_RTX
;
13183 if (CONST_INT_P (XEXP (x
, 1)))
13185 constant
= XEXP (x
, 1);
13186 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
13188 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
13190 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
13191 other
= XEXP (x
, 1);
13199 x
= gen_rtx_PLUS (Pmode
,
13200 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
13201 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
13202 plus_constant (Pmode
, other
,
13203 INTVAL (constant
)));
13207 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13210 if (GET_CODE (XEXP (x
, 0)) == MULT
)
13213 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
13216 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13219 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
13223 && REG_P (XEXP (x
, 1))
13224 && REG_P (XEXP (x
, 0)))
13227 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
13230 x
= legitimize_pic_address (x
, 0);
13233 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13236 if (REG_P (XEXP (x
, 0)))
13238 rtx temp
= gen_reg_rtx (Pmode
);
13239 rtx val
= force_operand (XEXP (x
, 1), temp
);
13242 val
= convert_to_mode (Pmode
, val
, 1);
13243 emit_move_insn (temp
, val
);
13246 XEXP (x
, 1) = temp
;
13250 else if (REG_P (XEXP (x
, 1)))
13252 rtx temp
= gen_reg_rtx (Pmode
);
13253 rtx val
= force_operand (XEXP (x
, 0), temp
);
13256 val
= convert_to_mode (Pmode
, val
, 1);
13257 emit_move_insn (temp
, val
);
13260 XEXP (x
, 0) = temp
;
13268 /* Print an integer constant expression in assembler syntax. Addition
13269 and subtraction are the only arithmetic that may appear in these
13270 expressions. FILE is the stdio stream to write to, X is the rtx, and
13271 CODE is the operand print code from the output string. */
13274 output_pic_addr_const (FILE *file
, rtx x
, int code
)
13278 switch (GET_CODE (x
))
13281 gcc_assert (flag_pic
);
13286 if (TARGET_64BIT
|| ! TARGET_MACHO_BRANCH_ISLANDS
)
13287 output_addr_const (file
, x
);
13290 const char *name
= XSTR (x
, 0);
13292 /* Mark the decl as referenced so that cgraph will
13293 output the function. */
13294 if (SYMBOL_REF_DECL (x
))
13295 mark_decl_referenced (SYMBOL_REF_DECL (x
));
13298 if (MACHOPIC_INDIRECT
13299 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
13300 name
= machopic_indirection_name (x
, /*stub_p=*/true);
13302 assemble_name (file
, name
);
13304 if (!TARGET_MACHO
&& !(TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
13305 && code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
13306 fputs ("@PLT", file
);
13313 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
13314 assemble_name (asm_out_file
, buf
);
13318 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
13322 /* This used to output parentheses around the expression,
13323 but that does not work on the 386 (either ATT or BSD assembler). */
13324 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13328 if (GET_MODE (x
) == VOIDmode
)
13330 /* We can use %d if the number is <32 bits and positive. */
13331 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
13332 fprintf (file
, "0x%lx%08lx",
13333 (unsigned long) CONST_DOUBLE_HIGH (x
),
13334 (unsigned long) CONST_DOUBLE_LOW (x
));
13336 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
13339 /* We can't handle floating point constants;
13340 TARGET_PRINT_OPERAND must handle them. */
13341 output_operand_lossage ("floating constant misused");
13345 /* Some assemblers need integer constants to appear first. */
13346 if (CONST_INT_P (XEXP (x
, 0)))
13348 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13350 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13354 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
13355 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13357 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13363 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
13364 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13366 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13368 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
13372 if (XINT (x
, 1) == UNSPEC_STACK_CHECK
)
13374 bool f
= i386_asm_output_addr_const_extra (file
, x
);
13379 gcc_assert (XVECLEN (x
, 0) == 1);
13380 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
13381 switch (XINT (x
, 1))
13384 fputs ("@GOT", file
);
13386 case UNSPEC_GOTOFF
:
13387 fputs ("@GOTOFF", file
);
13389 case UNSPEC_PLTOFF
:
13390 fputs ("@PLTOFF", file
);
13393 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13394 "(%rip)" : "[rip]", file
);
13396 case UNSPEC_GOTPCREL
:
13397 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13398 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file
);
13400 case UNSPEC_GOTTPOFF
:
13401 /* FIXME: This might be @TPOFF in Sun ld too. */
13402 fputs ("@gottpoff", file
);
13405 fputs ("@tpoff", file
);
13407 case UNSPEC_NTPOFF
:
13409 fputs ("@tpoff", file
);
13411 fputs ("@ntpoff", file
);
13413 case UNSPEC_DTPOFF
:
13414 fputs ("@dtpoff", file
);
13416 case UNSPEC_GOTNTPOFF
:
13418 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13419 "@gottpoff(%rip)": "@gottpoff[rip]", file
);
13421 fputs ("@gotntpoff", file
);
13423 case UNSPEC_INDNTPOFF
:
13424 fputs ("@indntpoff", file
);
13427 case UNSPEC_MACHOPIC_OFFSET
:
13429 machopic_output_function_base_name (file
);
13433 output_operand_lossage ("invalid UNSPEC as operand");
13439 output_operand_lossage ("invalid expression as operand");
13443 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13444 We need to emit DTP-relative relocations. */
13446 static void ATTRIBUTE_UNUSED
13447 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
13449 fputs (ASM_LONG
, file
);
13450 output_addr_const (file
, x
);
13451 fputs ("@dtpoff", file
);
13457 fputs (", 0", file
);
13460 gcc_unreachable ();
13464 /* Return true if X is a representation of the PIC register. This copes
13465 with calls from ix86_find_base_term, where the register might have
13466 been replaced by a cselib value. */
13469 ix86_pic_register_p (rtx x
)
13471 if (GET_CODE (x
) == VALUE
&& CSELIB_VAL_PTR (x
))
13472 return (pic_offset_table_rtx
13473 && rtx_equal_for_cselib_p (x
, pic_offset_table_rtx
));
13475 return REG_P (x
) && REGNO (x
) == PIC_OFFSET_TABLE_REGNUM
;
13478 /* Helper function for ix86_delegitimize_address.
13479 Attempt to delegitimize TLS local-exec accesses. */
13482 ix86_delegitimize_tls_address (rtx orig_x
)
13484 rtx x
= orig_x
, unspec
;
13485 struct ix86_address addr
;
13487 if (!TARGET_TLS_DIRECT_SEG_REFS
)
13491 if (GET_CODE (x
) != PLUS
|| GET_MODE (x
) != Pmode
)
13493 if (ix86_decompose_address (x
, &addr
) == 0
13494 || addr
.seg
!= (TARGET_64BIT
? SEG_FS
: SEG_GS
)
13495 || addr
.disp
== NULL_RTX
13496 || GET_CODE (addr
.disp
) != CONST
)
13498 unspec
= XEXP (addr
.disp
, 0);
13499 if (GET_CODE (unspec
) == PLUS
&& CONST_INT_P (XEXP (unspec
, 1)))
13500 unspec
= XEXP (unspec
, 0);
13501 if (GET_CODE (unspec
) != UNSPEC
|| XINT (unspec
, 1) != UNSPEC_NTPOFF
)
13503 x
= XVECEXP (unspec
, 0, 0);
13504 gcc_assert (GET_CODE (x
) == SYMBOL_REF
);
13505 if (unspec
!= XEXP (addr
.disp
, 0))
13506 x
= gen_rtx_PLUS (Pmode
, x
, XEXP (XEXP (addr
.disp
, 0), 1));
13509 rtx idx
= addr
.index
;
13510 if (addr
.scale
!= 1)
13511 idx
= gen_rtx_MULT (Pmode
, idx
, GEN_INT (addr
.scale
));
13512 x
= gen_rtx_PLUS (Pmode
, idx
, x
);
13515 x
= gen_rtx_PLUS (Pmode
, addr
.base
, x
);
13516 if (MEM_P (orig_x
))
13517 x
= replace_equiv_address_nv (orig_x
, x
);
13521 /* In the name of slightly smaller debug output, and to cater to
13522 general assembler lossage, recognize PIC+GOTOFF and turn it back
13523 into a direct symbol reference.
13525 On Darwin, this is necessary to avoid a crash, because Darwin
13526 has a different PIC label for each routine but the DWARF debugging
13527 information is not associated with any particular routine, so it's
13528 necessary to remove references to the PIC label from RTL stored by
13529 the DWARF output code. */
13532 ix86_delegitimize_address (rtx x
)
13534 rtx orig_x
= delegitimize_mem_from_attrs (x
);
13535 /* addend is NULL or some rtx if x is something+GOTOFF where
13536 something doesn't include the PIC register. */
13537 rtx addend
= NULL_RTX
;
13538 /* reg_addend is NULL or a multiple of some register. */
13539 rtx reg_addend
= NULL_RTX
;
13540 /* const_addend is NULL or a const_int. */
13541 rtx const_addend
= NULL_RTX
;
13542 /* This is the result, or NULL. */
13543 rtx result
= NULL_RTX
;
13552 if (GET_CODE (x
) == CONST
13553 && GET_CODE (XEXP (x
, 0)) == PLUS
13554 && GET_MODE (XEXP (x
, 0)) == Pmode
13555 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
13556 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == UNSPEC
13557 && XINT (XEXP (XEXP (x
, 0), 0), 1) == UNSPEC_PCREL
)
13559 rtx x2
= XVECEXP (XEXP (XEXP (x
, 0), 0), 0, 0);
13560 x
= gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 1), x2
);
13561 if (MEM_P (orig_x
))
13562 x
= replace_equiv_address_nv (orig_x
, x
);
13565 if (GET_CODE (x
) != CONST
13566 || GET_CODE (XEXP (x
, 0)) != UNSPEC
13567 || (XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
13568 && XINT (XEXP (x
, 0), 1) != UNSPEC_PCREL
)
13569 || (!MEM_P (orig_x
) && XINT (XEXP (x
, 0), 1) != UNSPEC_PCREL
))
13570 return ix86_delegitimize_tls_address (orig_x
);
13571 x
= XVECEXP (XEXP (x
, 0), 0, 0);
13572 if (GET_MODE (orig_x
) != GET_MODE (x
) && MEM_P (orig_x
))
13574 x
= simplify_gen_subreg (GET_MODE (orig_x
), x
,
13582 if (GET_CODE (x
) != PLUS
13583 || GET_CODE (XEXP (x
, 1)) != CONST
)
13584 return ix86_delegitimize_tls_address (orig_x
);
13586 if (ix86_pic_register_p (XEXP (x
, 0)))
13587 /* %ebx + GOT/GOTOFF */
13589 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
13591 /* %ebx + %reg * scale + GOT/GOTOFF */
13592 reg_addend
= XEXP (x
, 0);
13593 if (ix86_pic_register_p (XEXP (reg_addend
, 0)))
13594 reg_addend
= XEXP (reg_addend
, 1);
13595 else if (ix86_pic_register_p (XEXP (reg_addend
, 1)))
13596 reg_addend
= XEXP (reg_addend
, 0);
13599 reg_addend
= NULL_RTX
;
13600 addend
= XEXP (x
, 0);
13604 addend
= XEXP (x
, 0);
13606 x
= XEXP (XEXP (x
, 1), 0);
13607 if (GET_CODE (x
) == PLUS
13608 && CONST_INT_P (XEXP (x
, 1)))
13610 const_addend
= XEXP (x
, 1);
13614 if (GET_CODE (x
) == UNSPEC
13615 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
) && !addend
)
13616 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))))
13617 result
= XVECEXP (x
, 0, 0);
13619 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
13620 && !MEM_P (orig_x
))
13621 result
= XVECEXP (x
, 0, 0);
13624 return ix86_delegitimize_tls_address (orig_x
);
13627 result
= gen_rtx_CONST (Pmode
, gen_rtx_PLUS (Pmode
, result
, const_addend
));
13629 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
13632 /* If the rest of original X doesn't involve the PIC register, add
13633 addend and subtract pic_offset_table_rtx. This can happen e.g.
13635 leal (%ebx, %ecx, 4), %ecx
13637 movl foo@GOTOFF(%ecx), %edx
13638 in which case we return (%ecx - %ebx) + foo. */
13639 if (pic_offset_table_rtx
)
13640 result
= gen_rtx_PLUS (Pmode
, gen_rtx_MINUS (Pmode
, copy_rtx (addend
),
13641 pic_offset_table_rtx
),
13646 if (GET_MODE (orig_x
) != Pmode
&& MEM_P (orig_x
))
13648 result
= simplify_gen_subreg (GET_MODE (orig_x
), result
, Pmode
, 0);
13649 if (result
== NULL_RTX
)
13655 /* If X is a machine specific address (i.e. a symbol or label being
13656 referenced as a displacement from the GOT implemented using an
13657 UNSPEC), then return the base term. Otherwise return X. */
13660 ix86_find_base_term (rtx x
)
13666 if (GET_CODE (x
) != CONST
)
13668 term
= XEXP (x
, 0);
13669 if (GET_CODE (term
) == PLUS
13670 && (CONST_INT_P (XEXP (term
, 1))
13671 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
13672 term
= XEXP (term
, 0);
13673 if (GET_CODE (term
) != UNSPEC
13674 || (XINT (term
, 1) != UNSPEC_GOTPCREL
13675 && XINT (term
, 1) != UNSPEC_PCREL
))
13678 return XVECEXP (term
, 0, 0);
13681 return ix86_delegitimize_address (x
);
13685 put_condition_code (enum rtx_code code
, enum machine_mode mode
, bool reverse
,
13686 bool fp
, FILE *file
)
13688 const char *suffix
;
13690 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
13692 code
= ix86_fp_compare_code_to_integer (code
);
13696 code
= reverse_condition (code
);
13747 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
13751 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13752 Those same assemblers have the same but opposite lossage on cmov. */
13753 if (mode
== CCmode
)
13754 suffix
= fp
? "nbe" : "a";
13755 else if (mode
== CCCmode
)
13758 gcc_unreachable ();
13774 gcc_unreachable ();
13778 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
13795 gcc_unreachable ();
13799 /* ??? As above. */
13800 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
13801 suffix
= fp
? "nb" : "ae";
13804 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
13808 /* ??? As above. */
13809 if (mode
== CCmode
)
13811 else if (mode
== CCCmode
)
13812 suffix
= fp
? "nb" : "ae";
13814 gcc_unreachable ();
13817 suffix
= fp
? "u" : "p";
13820 suffix
= fp
? "nu" : "np";
13823 gcc_unreachable ();
13825 fputs (suffix
, file
);
13828 /* Print the name of register X to FILE based on its machine mode and number.
13829 If CODE is 'w', pretend the mode is HImode.
13830 If CODE is 'b', pretend the mode is QImode.
13831 If CODE is 'k', pretend the mode is SImode.
13832 If CODE is 'q', pretend the mode is DImode.
13833 If CODE is 'x', pretend the mode is V4SFmode.
13834 If CODE is 't', pretend the mode is V8SFmode.
13835 If CODE is 'h', pretend the reg is the 'high' byte register.
13836 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13837 If CODE is 'd', duplicate the operand for AVX instruction.
13841 print_reg (rtx x
, int code
, FILE *file
)
13844 unsigned int regno
;
13845 bool duplicated
= code
== 'd' && TARGET_AVX
;
13847 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13852 gcc_assert (TARGET_64BIT
);
13853 fputs ("rip", file
);
13857 regno
= true_regnum (x
);
13858 gcc_assert (regno
!= ARG_POINTER_REGNUM
13859 && regno
!= FRAME_POINTER_REGNUM
13860 && regno
!= FLAGS_REG
13861 && regno
!= FPSR_REG
13862 && regno
!= FPCR_REG
);
13864 if (code
== 'w' || MMX_REG_P (x
))
13866 else if (code
== 'b')
13868 else if (code
== 'k')
13870 else if (code
== 'q')
13872 else if (code
== 'y')
13874 else if (code
== 'h')
13876 else if (code
== 'x')
13878 else if (code
== 't')
13881 code
= GET_MODE_SIZE (GET_MODE (x
));
13883 /* Irritatingly, AMD extended registers use different naming convention
13884 from the normal registers: "r%d[bwd]" */
13885 if (REX_INT_REGNO_P (regno
))
13887 gcc_assert (TARGET_64BIT
);
13889 fprint_ul (file
, regno
- FIRST_REX_INT_REG
+ 8);
13893 error ("extended registers have no high halves");
13908 error ("unsupported operand size for extended register");
13918 if (STACK_TOP_P (x
))
13927 if (! ANY_FP_REG_P (x
))
13928 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
13933 reg
= hi_reg_name
[regno
];
13936 if (regno
>= ARRAY_SIZE (qi_reg_name
))
13938 reg
= qi_reg_name
[regno
];
13941 if (regno
>= ARRAY_SIZE (qi_high_reg_name
))
13943 reg
= qi_high_reg_name
[regno
];
13948 gcc_assert (!duplicated
);
13950 fputs (hi_reg_name
[regno
] + 1, file
);
13955 gcc_unreachable ();
13961 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13962 fprintf (file
, ", %%%s", reg
);
13964 fprintf (file
, ", %s", reg
);
13968 /* Locate some local-dynamic symbol still in use by this function
13969 so that we can print its name in some tls_local_dynamic_base
13973 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
13977 if (GET_CODE (x
) == SYMBOL_REF
13978 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
13980 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
13987 static const char *
13988 get_some_local_dynamic_name (void)
13992 if (cfun
->machine
->some_ld_name
)
13993 return cfun
->machine
->some_ld_name
;
13995 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
13996 if (NONDEBUG_INSN_P (insn
)
13997 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
13998 return cfun
->machine
->some_ld_name
;
14003 /* Meaning of CODE:
14004 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14005 C -- print opcode suffix for set/cmov insn.
14006 c -- like C, but print reversed condition
14007 F,f -- likewise, but for floating-point.
14008 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14010 R -- print the prefix for register names.
14011 z -- print the opcode suffix for the size of the current operand.
14012 Z -- likewise, with special suffixes for x87 instructions.
14013 * -- print a star (in certain assembler syntax)
14014 A -- print an absolute memory reference.
14015 E -- print address with DImode register names if TARGET_64BIT.
14016 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14017 s -- print a shift double count, followed by the assemblers argument
14019 b -- print the QImode name of the register for the indicated operand.
14020 %b0 would print %al if operands[0] is reg 0.
14021 w -- likewise, print the HImode name of the register.
14022 k -- likewise, print the SImode name of the register.
14023 q -- likewise, print the DImode name of the register.
14024 x -- likewise, print the V4SFmode name of the register.
14025 t -- likewise, print the V8SFmode name of the register.
14026 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14027 y -- print "st(0)" instead of "st" as a register.
14028 d -- print duplicated register operand for AVX instruction.
14029 D -- print condition for SSE cmp instruction.
14030 P -- if PIC, print an @PLT suffix.
14031 p -- print raw symbol name.
14032 X -- don't print any sort of PIC '@' suffix for a symbol.
14033 & -- print some in-use local-dynamic symbol name.
14034 H -- print a memory address offset by 8; used for sse high-parts
14035 Y -- print condition for XOP pcom* instruction.
14036 + -- print a branch hint as 'cs' or 'ds' prefix
14037 ; -- print a semicolon (after prefixes due to bug in older gas).
14038 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14039 @ -- print a segment register of thread base pointer load
14040 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14044 ix86_print_operand (FILE *file
, rtx x
, int code
)
14051 switch (ASSEMBLER_DIALECT
)
14058 /* Intel syntax. For absolute addresses, registers should not
14059 be surrounded by braces. */
14063 ix86_print_operand (file
, x
, 0);
14070 gcc_unreachable ();
14073 ix86_print_operand (file
, x
, 0);
14077 /* Wrap address in an UNSPEC to declare special handling. */
14079 x
= gen_rtx_UNSPEC (DImode
, gen_rtvec (1, x
), UNSPEC_LEA_ADDR
);
14081 output_address (x
);
14085 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14090 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14095 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14100 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14105 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14110 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14115 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14116 if (ASSEMBLER_DIALECT
!= ASM_ATT
)
14119 switch (GET_MODE_SIZE (GET_MODE (x
)))
14134 output_operand_lossage
14135 ("invalid operand size for operand code 'O'");
14144 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
14146 /* Opcodes don't get size suffixes if using Intel opcodes. */
14147 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14150 switch (GET_MODE_SIZE (GET_MODE (x
)))
14169 output_operand_lossage
14170 ("invalid operand size for operand code 'z'");
14175 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14177 (0, "non-integer operand used with operand code 'z'");
14181 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14182 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14185 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
14187 switch (GET_MODE_SIZE (GET_MODE (x
)))
14190 #ifdef HAVE_AS_IX86_FILDS
14200 #ifdef HAVE_AS_IX86_FILDQ
14203 fputs ("ll", file
);
14211 else if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14213 /* 387 opcodes don't get size suffixes
14214 if the operands are registers. */
14215 if (STACK_REG_P (x
))
14218 switch (GET_MODE_SIZE (GET_MODE (x
)))
14239 output_operand_lossage
14240 ("invalid operand type used with operand code 'Z'");
14244 output_operand_lossage
14245 ("invalid operand size for operand code 'Z'");
14263 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
14265 ix86_print_operand (file
, x
, 0);
14266 fputs (", ", file
);
14271 switch (GET_CODE (x
))
14274 fputs ("neq", file
);
14277 fputs ("eq", file
);
14281 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "ge" : "unlt", file
);
14285 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "gt" : "unle", file
);
14289 fputs ("le", file
);
14293 fputs ("lt", file
);
14296 fputs ("unord", file
);
14299 fputs ("ord", file
);
14302 fputs ("ueq", file
);
14305 fputs ("nlt", file
);
14308 fputs ("nle", file
);
14311 fputs ("ule", file
);
14314 fputs ("ult", file
);
14317 fputs ("une", file
);
14320 output_operand_lossage ("operand is not a condition code, "
14321 "invalid operand code 'Y'");
14327 /* Little bit of braindamage here. The SSE compare instructions
14328 does use completely different names for the comparisons that the
14329 fp conditional moves. */
14330 switch (GET_CODE (x
))
14335 fputs ("eq_us", file
);
14339 fputs ("eq", file
);
14344 fputs ("nge", file
);
14348 fputs ("lt", file
);
14353 fputs ("ngt", file
);
14357 fputs ("le", file
);
14360 fputs ("unord", file
);
14365 fputs ("neq_oq", file
);
14369 fputs ("neq", file
);
14374 fputs ("ge", file
);
14378 fputs ("nlt", file
);
14383 fputs ("gt", file
);
14387 fputs ("nle", file
);
14390 fputs ("ord", file
);
14393 output_operand_lossage ("operand is not a condition code, "
14394 "invalid operand code 'D'");
14401 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14402 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14408 if (!COMPARISON_P (x
))
14410 output_operand_lossage ("operand is not a condition code, "
14411 "invalid operand code '%c'", code
);
14414 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)),
14415 code
== 'c' || code
== 'f',
14416 code
== 'F' || code
== 'f',
14421 if (!offsettable_memref_p (x
))
14423 output_operand_lossage ("operand is not an offsettable memory "
14424 "reference, invalid operand code 'H'");
14427 /* It doesn't actually matter what mode we use here, as we're
14428 only going to use this for printing. */
14429 x
= adjust_address_nv (x
, DImode
, 8);
14433 gcc_assert (CONST_INT_P (x
));
14435 if (INTVAL (x
) & IX86_HLE_ACQUIRE
)
14436 #ifdef HAVE_AS_IX86_HLE
14437 fputs ("xacquire ", file
);
14439 fputs ("\n" ASM_BYTE
"0xf2\n\t", file
);
14441 else if (INTVAL (x
) & IX86_HLE_RELEASE
)
14442 #ifdef HAVE_AS_IX86_HLE
14443 fputs ("xrelease ", file
);
14445 fputs ("\n" ASM_BYTE
"0xf3\n\t", file
);
14447 /* We do not want to print value of the operand. */
14451 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14457 const char *name
= get_some_local_dynamic_name ();
14459 output_operand_lossage ("'%%&' used without any "
14460 "local dynamic TLS references");
14462 assemble_name (file
, name
);
14471 || optimize_function_for_size_p (cfun
)
14472 || !TARGET_BRANCH_PREDICTION_HINTS
)
14475 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
14478 int pred_val
= INTVAL (XEXP (x
, 0));
14480 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
14481 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
14483 bool taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
14485 = final_forward_branch_p (current_output_insn
) == 0;
14487 /* Emit hints only in the case default branch prediction
14488 heuristics would fail. */
14489 if (taken
!= cputaken
)
14491 /* We use 3e (DS) prefix for taken branches and
14492 2e (CS) prefix for not taken branches. */
14494 fputs ("ds ; ", file
);
14496 fputs ("cs ; ", file
);
14504 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14510 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14513 /* The kernel uses a different segment register for performance
14514 reasons; a system call would not have to trash the userspace
14515 segment register, which would be expensive. */
14516 if (TARGET_64BIT
&& ix86_cmodel
!= CM_KERNEL
)
14517 fputs ("fs", file
);
14519 fputs ("gs", file
);
14523 putc (TARGET_AVX2
? 'i' : 'f', file
);
14527 if (TARGET_64BIT
&& Pmode
!= word_mode
)
14528 fputs ("addr32 ", file
);
14532 output_operand_lossage ("invalid operand code '%c'", code
);
14537 print_reg (x
, code
, file
);
14539 else if (MEM_P (x
))
14541 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14542 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P'
14543 && GET_MODE (x
) != BLKmode
)
14546 switch (GET_MODE_SIZE (GET_MODE (x
)))
14548 case 1: size
= "BYTE"; break;
14549 case 2: size
= "WORD"; break;
14550 case 4: size
= "DWORD"; break;
14551 case 8: size
= "QWORD"; break;
14552 case 12: size
= "TBYTE"; break;
14554 if (GET_MODE (x
) == XFmode
)
14559 case 32: size
= "YMMWORD"; break;
14561 gcc_unreachable ();
14564 /* Check for explicit size override (codes 'b', 'w', 'k',
14568 else if (code
== 'w')
14570 else if (code
== 'k')
14572 else if (code
== 'q')
14574 else if (code
== 'x')
14577 fputs (size
, file
);
14578 fputs (" PTR ", file
);
14582 /* Avoid (%rip) for call operands. */
14583 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
14584 && !CONST_INT_P (x
))
14585 output_addr_const (file
, x
);
14586 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
14587 output_operand_lossage ("invalid constraints for operand");
14589 output_address (x
);
14592 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
14597 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14598 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
14600 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14602 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14604 fprintf (file
, "0x%08llx", (unsigned long long) (int) l
);
14606 fprintf (file
, "0x%08x", (unsigned int) l
);
14609 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
14614 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14615 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
14617 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14619 fprintf (file
, "0x%lx%08lx", l
[1] & 0xffffffff, l
[0] & 0xffffffff);
14622 /* These float cases don't actually occur as immediate operands. */
14623 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == XFmode
)
14627 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
14628 fputs (dstr
, file
);
14633 /* We have patterns that allow zero sets of memory, for instance.
14634 In 64-bit mode, we should probably support all 8-byte vectors,
14635 since we can in fact encode that into an immediate. */
14636 if (GET_CODE (x
) == CONST_VECTOR
)
14638 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
14642 if (code
!= 'P' && code
!= 'p')
14644 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
14646 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14649 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
14650 || GET_CODE (x
) == LABEL_REF
)
14652 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14655 fputs ("OFFSET FLAT:", file
);
14658 if (CONST_INT_P (x
))
14659 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
14660 else if (flag_pic
|| MACHOPIC_INDIRECT
)
14661 output_pic_addr_const (file
, x
, code
);
14663 output_addr_const (file
, x
);
14668 ix86_print_operand_punct_valid_p (unsigned char code
)
14670 return (code
== '@' || code
== '*' || code
== '+' || code
== '&'
14671 || code
== ';' || code
== '~' || code
== '^');
14674 /* Print a memory operand whose address is ADDR. */
14677 ix86_print_operand_address (FILE *file
, rtx addr
)
14679 struct ix86_address parts
;
14680 rtx base
, index
, disp
;
14686 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_VSIBADDR
)
14688 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
14689 gcc_assert (parts
.index
== NULL_RTX
);
14690 parts
.index
= XVECEXP (addr
, 0, 1);
14691 parts
.scale
= INTVAL (XVECEXP (addr
, 0, 2));
14692 addr
= XVECEXP (addr
, 0, 0);
14695 else if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_LEA_ADDR
)
14697 gcc_assert (TARGET_64BIT
);
14698 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
14702 ok
= ix86_decompose_address (addr
, &parts
);
14707 index
= parts
.index
;
14709 scale
= parts
.scale
;
14717 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14719 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
14722 gcc_unreachable ();
14725 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14726 if (TARGET_64BIT
&& !base
&& !index
)
14730 if (GET_CODE (disp
) == CONST
14731 && GET_CODE (XEXP (disp
, 0)) == PLUS
14732 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
14733 symbol
= XEXP (XEXP (disp
, 0), 0);
14735 if (GET_CODE (symbol
) == LABEL_REF
14736 || (GET_CODE (symbol
) == SYMBOL_REF
14737 && SYMBOL_REF_TLS_MODEL (symbol
) == 0))
14740 if (!base
&& !index
)
14742 /* Displacement only requires special attention. */
14744 if (CONST_INT_P (disp
))
14746 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
14747 fputs ("ds:", file
);
14748 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
14751 output_pic_addr_const (file
, disp
, 0);
14753 output_addr_const (file
, disp
);
14757 /* Print SImode register names to force addr32 prefix. */
14758 if (SImode_address_operand (addr
, VOIDmode
))
14760 #ifdef ENABLE_CHECKING
14761 gcc_assert (TARGET_64BIT
);
14762 switch (GET_CODE (addr
))
14765 gcc_assert (GET_MODE (addr
) == SImode
);
14766 gcc_assert (GET_MODE (SUBREG_REG (addr
)) == DImode
);
14770 gcc_assert (GET_MODE (addr
) == DImode
);
14773 gcc_unreachable ();
14776 gcc_assert (!code
);
14782 && CONST_INT_P (disp
)
14783 && INTVAL (disp
) < -16*1024*1024)
14785 /* X32 runs in 64-bit mode, where displacement, DISP, in
14786 address DISP(%r64), is encoded as 32-bit immediate sign-
14787 extended from 32-bit to 64-bit. For -0x40000300(%r64),
14788 address is %r64 + 0xffffffffbffffd00. When %r64 <
14789 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
14790 which is invalid for x32. The correct address is %r64
14791 - 0x40000300 == 0xf7ffdd64. To properly encode
14792 -0x40000300(%r64) for x32, we zero-extend negative
14793 displacement by forcing addr32 prefix which truncates
14794 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
14795 zero-extend all negative displacements, including -1(%rsp).
14796 However, for small negative displacements, sign-extension
14797 won't cause overflow. We only zero-extend negative
14798 displacements if they < -16*1024*1024, which is also used
14799 to check legitimate address displacements for PIC. */
14803 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14808 output_pic_addr_const (file
, disp
, 0);
14809 else if (GET_CODE (disp
) == LABEL_REF
)
14810 output_asm_label (disp
);
14812 output_addr_const (file
, disp
);
14817 print_reg (base
, code
, file
);
14821 print_reg (index
, vsib
? 0 : code
, file
);
14822 if (scale
!= 1 || vsib
)
14823 fprintf (file
, ",%d", scale
);
14829 rtx offset
= NULL_RTX
;
14833 /* Pull out the offset of a symbol; print any symbol itself. */
14834 if (GET_CODE (disp
) == CONST
14835 && GET_CODE (XEXP (disp
, 0)) == PLUS
14836 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
14838 offset
= XEXP (XEXP (disp
, 0), 1);
14839 disp
= gen_rtx_CONST (VOIDmode
,
14840 XEXP (XEXP (disp
, 0), 0));
14844 output_pic_addr_const (file
, disp
, 0);
14845 else if (GET_CODE (disp
) == LABEL_REF
)
14846 output_asm_label (disp
);
14847 else if (CONST_INT_P (disp
))
14850 output_addr_const (file
, disp
);
14856 print_reg (base
, code
, file
);
14859 if (INTVAL (offset
) >= 0)
14861 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
14865 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
14872 print_reg (index
, vsib
? 0 : code
, file
);
14873 if (scale
!= 1 || vsib
)
14874 fprintf (file
, "*%d", scale
);
14881 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14884 i386_asm_output_addr_const_extra (FILE *file
, rtx x
)
14888 if (GET_CODE (x
) != UNSPEC
)
14891 op
= XVECEXP (x
, 0, 0);
14892 switch (XINT (x
, 1))
14894 case UNSPEC_GOTTPOFF
:
14895 output_addr_const (file
, op
);
14896 /* FIXME: This might be @TPOFF in Sun ld. */
14897 fputs ("@gottpoff", file
);
14900 output_addr_const (file
, op
);
14901 fputs ("@tpoff", file
);
14903 case UNSPEC_NTPOFF
:
14904 output_addr_const (file
, op
);
14906 fputs ("@tpoff", file
);
14908 fputs ("@ntpoff", file
);
14910 case UNSPEC_DTPOFF
:
14911 output_addr_const (file
, op
);
14912 fputs ("@dtpoff", file
);
14914 case UNSPEC_GOTNTPOFF
:
14915 output_addr_const (file
, op
);
14917 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
14918 "@gottpoff(%rip)" : "@gottpoff[rip]", file
);
14920 fputs ("@gotntpoff", file
);
14922 case UNSPEC_INDNTPOFF
:
14923 output_addr_const (file
, op
);
14924 fputs ("@indntpoff", file
);
14927 case UNSPEC_MACHOPIC_OFFSET
:
14928 output_addr_const (file
, op
);
14930 machopic_output_function_base_name (file
);
14934 case UNSPEC_STACK_CHECK
:
14938 gcc_assert (flag_split_stack
);
14940 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14941 offset
= TARGET_THREAD_SPLIT_STACK_OFFSET
;
14943 gcc_unreachable ();
14946 fprintf (file
, "%s:%d", TARGET_64BIT
? "%fs" : "%gs", offset
);
14957 /* Split one or more double-mode RTL references into pairs of half-mode
14958 references. The RTL can be REG, offsettable MEM, integer constant, or
14959 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14960 split and "num" is its length. lo_half and hi_half are output arrays
14961 that parallel "operands". */
14964 split_double_mode (enum machine_mode mode
, rtx operands
[],
14965 int num
, rtx lo_half
[], rtx hi_half
[])
14967 enum machine_mode half_mode
;
14973 half_mode
= DImode
;
14976 half_mode
= SImode
;
14979 gcc_unreachable ();
14982 byte
= GET_MODE_SIZE (half_mode
);
14986 rtx op
= operands
[num
];
14988 /* simplify_subreg refuse to split volatile memory addresses,
14989 but we still have to handle it. */
14992 lo_half
[num
] = adjust_address (op
, half_mode
, 0);
14993 hi_half
[num
] = adjust_address (op
, half_mode
, byte
);
14997 lo_half
[num
] = simplify_gen_subreg (half_mode
, op
,
14998 GET_MODE (op
) == VOIDmode
14999 ? mode
: GET_MODE (op
), 0);
15000 hi_half
[num
] = simplify_gen_subreg (half_mode
, op
,
15001 GET_MODE (op
) == VOIDmode
15002 ? mode
: GET_MODE (op
), byte
);
15007 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15008 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15009 is the expression of the binary operation. The output may either be
15010 emitted here, or returned to the caller, like all output_* functions.
15012 There is no guarantee that the operands are the same mode, as they
15013 might be within FLOAT or FLOAT_EXTEND expressions. */
15015 #ifndef SYSV386_COMPAT
15016 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15017 wants to fix the assemblers because that causes incompatibility
15018 with gcc. No-one wants to fix gcc because that causes
15019 incompatibility with assemblers... You can use the option of
15020 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15021 #define SYSV386_COMPAT 1
15025 output_387_binary_op (rtx insn
, rtx
*operands
)
15027 static char buf
[40];
15030 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
15032 #ifdef ENABLE_CHECKING
15033 /* Even if we do not want to check the inputs, this documents input
15034 constraints. Which helps in understanding the following code. */
15035 if (STACK_REG_P (operands
[0])
15036 && ((REG_P (operands
[1])
15037 && REGNO (operands
[0]) == REGNO (operands
[1])
15038 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
15039 || (REG_P (operands
[2])
15040 && REGNO (operands
[0]) == REGNO (operands
[2])
15041 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
15042 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
15045 gcc_assert (is_sse
);
15048 switch (GET_CODE (operands
[3]))
15051 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15052 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15060 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15061 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15069 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15070 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15078 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15079 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15087 gcc_unreachable ();
15094 strcpy (buf
, ssep
);
15095 if (GET_MODE (operands
[0]) == SFmode
)
15096 strcat (buf
, "ss\t{%2, %1, %0|%0, %1, %2}");
15098 strcat (buf
, "sd\t{%2, %1, %0|%0, %1, %2}");
15102 strcpy (buf
, ssep
+ 1);
15103 if (GET_MODE (operands
[0]) == SFmode
)
15104 strcat (buf
, "ss\t{%2, %0|%0, %2}");
15106 strcat (buf
, "sd\t{%2, %0|%0, %2}");
15112 switch (GET_CODE (operands
[3]))
15116 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
15118 rtx temp
= operands
[2];
15119 operands
[2] = operands
[1];
15120 operands
[1] = temp
;
15123 /* know operands[0] == operands[1]. */
15125 if (MEM_P (operands
[2]))
15131 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
15133 if (STACK_TOP_P (operands
[0]))
15134 /* How is it that we are storing to a dead operand[2]?
15135 Well, presumably operands[1] is dead too. We can't
15136 store the result to st(0) as st(0) gets popped on this
15137 instruction. Instead store to operands[2] (which I
15138 think has to be st(1)). st(1) will be popped later.
15139 gcc <= 2.8.1 didn't have this check and generated
15140 assembly code that the Unixware assembler rejected. */
15141 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15143 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15147 if (STACK_TOP_P (operands
[0]))
15148 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15150 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15155 if (MEM_P (operands
[1]))
15161 if (MEM_P (operands
[2]))
15167 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
15170 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15171 derived assemblers, confusingly reverse the direction of
15172 the operation for fsub{r} and fdiv{r} when the
15173 destination register is not st(0). The Intel assembler
15174 doesn't have this brain damage. Read !SYSV386_COMPAT to
15175 figure out what the hardware really does. */
15176 if (STACK_TOP_P (operands
[0]))
15177 p
= "{p\t%0, %2|rp\t%2, %0}";
15179 p
= "{rp\t%2, %0|p\t%0, %2}";
15181 if (STACK_TOP_P (operands
[0]))
15182 /* As above for fmul/fadd, we can't store to st(0). */
15183 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15185 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15190 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
15193 if (STACK_TOP_P (operands
[0]))
15194 p
= "{rp\t%0, %1|p\t%1, %0}";
15196 p
= "{p\t%1, %0|rp\t%0, %1}";
15198 if (STACK_TOP_P (operands
[0]))
15199 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
15201 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
15206 if (STACK_TOP_P (operands
[0]))
15208 if (STACK_TOP_P (operands
[1]))
15209 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15211 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
15214 else if (STACK_TOP_P (operands
[1]))
15217 p
= "{\t%1, %0|r\t%0, %1}";
15219 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
15225 p
= "{r\t%2, %0|\t%0, %2}";
15227 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15233 gcc_unreachable ();
15240 /* Check if a 256bit AVX register is referenced inside of EXP. */
15243 ix86_check_avx256_register (rtx
*pexp
, void *data ATTRIBUTE_UNUSED
)
15247 if (GET_CODE (exp
) == SUBREG
)
15248 exp
= SUBREG_REG (exp
);
15251 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp
)))
15257 /* Return needed mode for entity in optimize_mode_switching pass. */
15260 ix86_avx_u128_mode_needed (rtx insn
)
15266 /* Needed mode is set to AVX_U128_CLEAN if there are
15267 no 256bit modes used in function arguments. */
15268 for (link
= CALL_INSN_FUNCTION_USAGE (insn
);
15270 link
= XEXP (link
, 1))
15272 if (GET_CODE (XEXP (link
, 0)) == USE
)
15274 rtx arg
= XEXP (XEXP (link
, 0), 0);
15276 if (ix86_check_avx256_register (&arg
, NULL
))
15277 return AVX_U128_ANY
;
15281 return AVX_U128_CLEAN
;
15284 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
15285 changes state only when a 256bit register is written to, but we need
15286 to prevent the compiler from moving optimal insertion point above
15287 eventual read from 256bit register. */
15288 if (for_each_rtx (&PATTERN (insn
), ix86_check_avx256_register
, NULL
))
15289 return AVX_U128_DIRTY
;
15291 return AVX_U128_ANY
;
15294 /* Return mode that i387 must be switched into
15295 prior to the execution of insn. */
15298 ix86_i387_mode_needed (int entity
, rtx insn
)
15300 enum attr_i387_cw mode
;
15302 /* The mode UNINITIALIZED is used to store control word after a
15303 function call or ASM pattern. The mode ANY specify that function
15304 has no requirements on the control word and make no changes in the
15305 bits we are interested in. */
15308 || (NONJUMP_INSN_P (insn
)
15309 && (asm_noperands (PATTERN (insn
)) >= 0
15310 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
15311 return I387_CW_UNINITIALIZED
;
15313 if (recog_memoized (insn
) < 0)
15314 return I387_CW_ANY
;
15316 mode
= get_attr_i387_cw (insn
);
15321 if (mode
== I387_CW_TRUNC
)
15326 if (mode
== I387_CW_FLOOR
)
15331 if (mode
== I387_CW_CEIL
)
15336 if (mode
== I387_CW_MASK_PM
)
15341 gcc_unreachable ();
15344 return I387_CW_ANY
;
15347 /* Return mode that entity must be switched into
15348 prior to the execution of insn. */
15351 ix86_mode_needed (int entity
, rtx insn
)
15356 return ix86_avx_u128_mode_needed (insn
);
15361 return ix86_i387_mode_needed (entity
, insn
);
15363 gcc_unreachable ();
15368 /* Check if a 256bit AVX register is referenced in stores. */
15371 ix86_check_avx256_stores (rtx dest
, const_rtx set ATTRIBUTE_UNUSED
, void *data
)
15373 if (ix86_check_avx256_register (&dest
, NULL
))
15375 bool *used
= (bool *) data
;
15380 /* Calculate mode of upper 128bit AVX registers after the insn. */
15383 ix86_avx_u128_mode_after (int mode
, rtx insn
)
15385 rtx pat
= PATTERN (insn
);
15387 if (vzeroupper_operation (pat
, VOIDmode
)
15388 || vzeroall_operation (pat
, VOIDmode
))
15389 return AVX_U128_CLEAN
;
15391 /* We know that state is clean after CALL insn if there are no
15392 256bit registers used in the function return register. */
15395 bool avx_reg256_found
= false;
15396 note_stores (pat
, ix86_check_avx256_stores
, &avx_reg256_found
);
15397 if (!avx_reg256_found
)
15398 return AVX_U128_CLEAN
;
15401 /* Otherwise, return current mode. Remember that if insn
15402 references AVX 256bit registers, the mode was already changed
15403 to DIRTY from MODE_NEEDED. */
15407 /* Return the mode that an insn results in. */
15410 ix86_mode_after (int entity
, int mode
, rtx insn
)
15415 return ix86_avx_u128_mode_after (mode
, insn
);
15422 gcc_unreachable ();
15427 ix86_avx_u128_mode_entry (void)
15431 /* Entry mode is set to AVX_U128_DIRTY if there are
15432 256bit modes used in function arguments. */
15433 for (arg
= DECL_ARGUMENTS (current_function_decl
); arg
;
15434 arg
= TREE_CHAIN (arg
))
15436 rtx incoming
= DECL_INCOMING_RTL (arg
);
15438 if (incoming
&& ix86_check_avx256_register (&incoming
, NULL
))
15439 return AVX_U128_DIRTY
;
15442 return AVX_U128_CLEAN
;
15445 /* Return a mode that ENTITY is assumed to be
15446 switched to at function entry. */
15449 ix86_mode_entry (int entity
)
15454 return ix86_avx_u128_mode_entry ();
15459 return I387_CW_ANY
;
15461 gcc_unreachable ();
15466 ix86_avx_u128_mode_exit (void)
15468 rtx reg
= crtl
->return_rtx
;
15470 /* Exit mode is set to AVX_U128_DIRTY if there are
15471 256bit modes used in the function return register. */
15472 if (reg
&& ix86_check_avx256_register (®
, NULL
))
15473 return AVX_U128_DIRTY
;
15475 return AVX_U128_CLEAN
;
15478 /* Return a mode that ENTITY is assumed to be
15479 switched to at function exit. */
15482 ix86_mode_exit (int entity
)
15487 return ix86_avx_u128_mode_exit ();
15492 return I387_CW_ANY
;
15494 gcc_unreachable ();
15498 /* Output code to initialize control word copies used by trunc?f?i and
15499 rounding patterns. CURRENT_MODE is set to current control word,
15500 while NEW_MODE is set to new control word. */
15503 emit_i387_cw_initialization (int mode
)
15505 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
15508 enum ix86_stack_slot slot
;
15510 rtx reg
= gen_reg_rtx (HImode
);
15512 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
15513 emit_move_insn (reg
, copy_rtx (stored_mode
));
15515 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
15516 || optimize_function_for_size_p (cfun
))
15520 case I387_CW_TRUNC
:
15521 /* round toward zero (truncate) */
15522 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
15523 slot
= SLOT_CW_TRUNC
;
15526 case I387_CW_FLOOR
:
15527 /* round down toward -oo */
15528 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
15529 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
15530 slot
= SLOT_CW_FLOOR
;
15534 /* round up toward +oo */
15535 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
15536 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
15537 slot
= SLOT_CW_CEIL
;
15540 case I387_CW_MASK_PM
:
15541 /* mask precision exception for nearbyint() */
15542 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
15543 slot
= SLOT_CW_MASK_PM
;
15547 gcc_unreachable ();
15554 case I387_CW_TRUNC
:
15555 /* round toward zero (truncate) */
15556 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
15557 slot
= SLOT_CW_TRUNC
;
15560 case I387_CW_FLOOR
:
15561 /* round down toward -oo */
15562 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
15563 slot
= SLOT_CW_FLOOR
;
15567 /* round up toward +oo */
15568 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
15569 slot
= SLOT_CW_CEIL
;
15572 case I387_CW_MASK_PM
:
15573 /* mask precision exception for nearbyint() */
15574 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
15575 slot
= SLOT_CW_MASK_PM
;
15579 gcc_unreachable ();
15583 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
15585 new_mode
= assign_386_stack_local (HImode
, slot
);
15586 emit_move_insn (new_mode
, reg
);
15589 /* Emit vzeroupper. */
15592 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live
)
15596 /* Cancel automatic vzeroupper insertion if there are
15597 live call-saved SSE registers at the insertion point. */
15599 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
15600 if (TEST_HARD_REG_BIT (regs_live
, i
) && !call_used_regs
[i
])
15604 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
15605 if (TEST_HARD_REG_BIT (regs_live
, i
) && !call_used_regs
[i
])
15608 emit_insn (gen_avx_vzeroupper ());
15611 /* Generate one or more insns to set ENTITY to MODE. */
15614 ix86_emit_mode_set (int entity
, int mode
, HARD_REG_SET regs_live
)
15619 if (mode
== AVX_U128_CLEAN
)
15620 ix86_avx_emit_vzeroupper (regs_live
);
15626 if (mode
!= I387_CW_ANY
15627 && mode
!= I387_CW_UNINITIALIZED
)
15628 emit_i387_cw_initialization (mode
);
15631 gcc_unreachable ();
15635 /* Output code for INSN to convert a float to a signed int. OPERANDS
15636 are the insn operands. The output may be [HSD]Imode and the input
15637 operand may be [SDX]Fmode. */
15640 output_fix_trunc (rtx insn
, rtx
*operands
, bool fisttp
)
15642 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
15643 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
15644 int round_mode
= get_attr_i387_cw (insn
);
15646 /* Jump through a hoop or two for DImode, since the hardware has no
15647 non-popping instruction. We used to do this a different way, but
15648 that was somewhat fragile and broke with post-reload splitters. */
15649 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
15650 output_asm_insn ("fld\t%y1", operands
);
15652 gcc_assert (STACK_TOP_P (operands
[1]));
15653 gcc_assert (MEM_P (operands
[0]));
15654 gcc_assert (GET_MODE (operands
[1]) != TFmode
);
15657 output_asm_insn ("fisttp%Z0\t%0", operands
);
15660 if (round_mode
!= I387_CW_ANY
)
15661 output_asm_insn ("fldcw\t%3", operands
);
15662 if (stack_top_dies
|| dimode_p
)
15663 output_asm_insn ("fistp%Z0\t%0", operands
);
15665 output_asm_insn ("fist%Z0\t%0", operands
);
15666 if (round_mode
!= I387_CW_ANY
)
15667 output_asm_insn ("fldcw\t%2", operands
);
15673 /* Output code for x87 ffreep insn. The OPNO argument, which may only
15674 have the values zero or one, indicates the ffreep insn's operand
15675 from the OPERANDS array. */
15677 static const char *
15678 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
15680 if (TARGET_USE_FFREEP
)
15681 #ifdef HAVE_AS_IX86_FFREEP
15682 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
15685 static char retval
[32];
15686 int regno
= REGNO (operands
[opno
]);
15688 gcc_assert (STACK_REGNO_P (regno
));
15690 regno
-= FIRST_STACK_REG
;
15692 snprintf (retval
, sizeof (retval
), ASM_SHORT
"0xc%ddf", regno
);
15697 return opno
? "fstp\t%y1" : "fstp\t%y0";
15701 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
15702 should be used. UNORDERED_P is true when fucom should be used. */
15705 output_fp_compare (rtx insn
, rtx
*operands
, bool eflags_p
, bool unordered_p
)
15707 int stack_top_dies
;
15708 rtx cmp_op0
, cmp_op1
;
15709 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
15713 cmp_op0
= operands
[0];
15714 cmp_op1
= operands
[1];
15718 cmp_op0
= operands
[1];
15719 cmp_op1
= operands
[2];
15724 if (GET_MODE (operands
[0]) == SFmode
)
15726 return "%vucomiss\t{%1, %0|%0, %1}";
15728 return "%vcomiss\t{%1, %0|%0, %1}";
15731 return "%vucomisd\t{%1, %0|%0, %1}";
15733 return "%vcomisd\t{%1, %0|%0, %1}";
15736 gcc_assert (STACK_TOP_P (cmp_op0
));
15738 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
15740 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
15742 if (stack_top_dies
)
15744 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
15745 return output_387_ffreep (operands
, 1);
15748 return "ftst\n\tfnstsw\t%0";
15751 if (STACK_REG_P (cmp_op1
)
15753 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
15754 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
15756 /* If both the top of the 387 stack dies, and the other operand
15757 is also a stack register that dies, then this must be a
15758 `fcompp' float compare */
15762 /* There is no double popping fcomi variant. Fortunately,
15763 eflags is immune from the fstp's cc clobbering. */
15765 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
15767 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
15768 return output_387_ffreep (operands
, 0);
15773 return "fucompp\n\tfnstsw\t%0";
15775 return "fcompp\n\tfnstsw\t%0";
15780 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
15782 static const char * const alt
[16] =
15784 "fcom%Z2\t%y2\n\tfnstsw\t%0",
15785 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
15786 "fucom%Z2\t%y2\n\tfnstsw\t%0",
15787 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
15789 "ficom%Z2\t%y2\n\tfnstsw\t%0",
15790 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
15794 "fcomi\t{%y1, %0|%0, %y1}",
15795 "fcomip\t{%y1, %0|%0, %y1}",
15796 "fucomi\t{%y1, %0|%0, %y1}",
15797 "fucomip\t{%y1, %0|%0, %y1}",
15808 mask
= eflags_p
<< 3;
15809 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
15810 mask
|= unordered_p
<< 1;
15811 mask
|= stack_top_dies
;
15813 gcc_assert (mask
< 16);
15822 ix86_output_addr_vec_elt (FILE *file
, int value
)
15824 const char *directive
= ASM_LONG
;
15828 directive
= ASM_QUAD
;
15830 gcc_assert (!TARGET_64BIT
);
15833 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
15837 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
15839 const char *directive
= ASM_LONG
;
15842 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
15843 directive
= ASM_QUAD
;
15845 gcc_assert (!TARGET_64BIT
);
15847 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15848 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
15849 fprintf (file
, "%s%s%d-%s%d\n",
15850 directive
, LPREFIX
, value
, LPREFIX
, rel
);
15851 else if (HAVE_AS_GOTOFF_IN_DATA
)
15852 fprintf (file
, ASM_LONG
"%s%d@GOTOFF\n", LPREFIX
, value
);
15854 else if (TARGET_MACHO
)
15856 fprintf (file
, ASM_LONG
"%s%d-", LPREFIX
, value
);
15857 machopic_output_function_base_name (file
);
15862 asm_fprintf (file
, ASM_LONG
"%U%s+[.-%s%d]\n",
15863 GOT_SYMBOL_NAME
, LPREFIX
, value
);
15866 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
15870 ix86_expand_clear (rtx dest
)
15874 /* We play register width games, which are only valid after reload. */
15875 gcc_assert (reload_completed
);
15877 /* Avoid HImode and its attendant prefix byte. */
15878 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
15879 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
15880 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
15882 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
15883 if (!TARGET_USE_MOV0
|| optimize_insn_for_speed_p ())
15885 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
15886 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
15892 /* X is an unchanging MEM. If it is a constant pool reference, return
15893 the constant pool rtx, else NULL. */
15896 maybe_get_pool_constant (rtx x
)
15898 x
= ix86_delegitimize_address (XEXP (x
, 0));
15900 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
15901 return get_pool_constant (x
);
15907 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
15910 enum tls_model model
;
15915 if (GET_CODE (op1
) == SYMBOL_REF
)
15917 model
= SYMBOL_REF_TLS_MODEL (op1
);
15920 op1
= legitimize_tls_address (op1
, model
, true);
15921 op1
= force_operand (op1
, op0
);
15924 op1
= convert_to_mode (mode
, op1
, 1);
15926 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15927 && SYMBOL_REF_DLLIMPORT_P (op1
))
15928 op1
= legitimize_dllimport_symbol (op1
, false);
15930 else if (GET_CODE (op1
) == CONST
15931 && GET_CODE (XEXP (op1
, 0)) == PLUS
15932 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
15934 rtx addend
= XEXP (XEXP (op1
, 0), 1);
15935 rtx symbol
= XEXP (XEXP (op1
, 0), 0);
15938 model
= SYMBOL_REF_TLS_MODEL (symbol
);
15940 tmp
= legitimize_tls_address (symbol
, model
, true);
15941 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15942 && SYMBOL_REF_DLLIMPORT_P (symbol
))
15943 tmp
= legitimize_dllimport_symbol (symbol
, true);
15947 tmp
= force_operand (tmp
, NULL
);
15948 tmp
= expand_simple_binop (Pmode
, PLUS
, tmp
, addend
,
15949 op0
, 1, OPTAB_DIRECT
);
15952 op1
= convert_to_mode (mode
, tmp
, 1);
15956 if ((flag_pic
|| MACHOPIC_INDIRECT
)
15957 && symbolic_operand (op1
, mode
))
15959 if (TARGET_MACHO
&& !TARGET_64BIT
)
15962 /* dynamic-no-pic */
15963 if (MACHOPIC_INDIRECT
)
15965 rtx temp
= ((reload_in_progress
15966 || ((op0
&& REG_P (op0
))
15968 ? op0
: gen_reg_rtx (Pmode
));
15969 op1
= machopic_indirect_data_reference (op1
, temp
);
15971 op1
= machopic_legitimize_pic_address (op1
, mode
,
15972 temp
== op1
? 0 : temp
);
15974 if (op0
!= op1
&& GET_CODE (op0
) != MEM
)
15976 rtx insn
= gen_rtx_SET (VOIDmode
, op0
, op1
);
15980 if (GET_CODE (op0
) == MEM
)
15981 op1
= force_reg (Pmode
, op1
);
15985 if (GET_CODE (temp
) != REG
)
15986 temp
= gen_reg_rtx (Pmode
);
15987 temp
= legitimize_pic_address (op1
, temp
);
15992 /* dynamic-no-pic */
15998 op1
= force_reg (mode
, op1
);
15999 else if (!(TARGET_64BIT
&& x86_64_movabs_operand (op1
, DImode
)))
16001 rtx reg
= can_create_pseudo_p () ? NULL_RTX
: op0
;
16002 op1
= legitimize_pic_address (op1
, reg
);
16005 op1
= convert_to_mode (mode
, op1
, 1);
16012 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
16013 || !push_operand (op0
, mode
))
16015 op1
= force_reg (mode
, op1
);
16017 if (push_operand (op0
, mode
)
16018 && ! general_no_elim_operand (op1
, mode
))
16019 op1
= copy_to_mode_reg (mode
, op1
);
16021 /* Force large constants in 64bit compilation into register
16022 to get them CSEed. */
16023 if (can_create_pseudo_p ()
16024 && (mode
== DImode
) && TARGET_64BIT
16025 && immediate_operand (op1
, mode
)
16026 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
16027 && !register_operand (op0
, mode
)
16029 op1
= copy_to_mode_reg (mode
, op1
);
16031 if (can_create_pseudo_p ()
16032 && FLOAT_MODE_P (mode
)
16033 && GET_CODE (op1
) == CONST_DOUBLE
)
16035 /* If we are loading a floating point constant to a register,
16036 force the value to memory now, since we'll get better code
16037 out the back end. */
16039 op1
= validize_mem (force_const_mem (mode
, op1
));
16040 if (!register_operand (op0
, mode
))
16042 rtx temp
= gen_reg_rtx (mode
);
16043 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
16044 emit_move_insn (op0
, temp
);
16050 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
16054 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
16056 rtx op0
= operands
[0], op1
= operands
[1];
16057 unsigned int align
= GET_MODE_ALIGNMENT (mode
);
16059 /* Force constants other than zero into memory. We do not know how
16060 the instructions used to build constants modify the upper 64 bits
16061 of the register, once we have that information we may be able
16062 to handle some of them more efficiently. */
16063 if (can_create_pseudo_p ()
16064 && register_operand (op0
, mode
)
16065 && (CONSTANT_P (op1
)
16066 || (GET_CODE (op1
) == SUBREG
16067 && CONSTANT_P (SUBREG_REG (op1
))))
16068 && !standard_sse_constant_p (op1
))
16069 op1
= validize_mem (force_const_mem (mode
, op1
));
16071 /* We need to check memory alignment for SSE mode since attribute
16072 can make operands unaligned. */
16073 if (can_create_pseudo_p ()
16074 && SSE_REG_MODE_P (mode
)
16075 && ((MEM_P (op0
) && (MEM_ALIGN (op0
) < align
))
16076 || (MEM_P (op1
) && (MEM_ALIGN (op1
) < align
))))
16080 /* ix86_expand_vector_move_misalign() does not like constants ... */
16081 if (CONSTANT_P (op1
)
16082 || (GET_CODE (op1
) == SUBREG
16083 && CONSTANT_P (SUBREG_REG (op1
))))
16084 op1
= validize_mem (force_const_mem (mode
, op1
));
16086 /* ... nor both arguments in memory. */
16087 if (!register_operand (op0
, mode
)
16088 && !register_operand (op1
, mode
))
16089 op1
= force_reg (mode
, op1
);
16091 tmp
[0] = op0
; tmp
[1] = op1
;
16092 ix86_expand_vector_move_misalign (mode
, tmp
);
16096 /* Make operand1 a register if it isn't already. */
16097 if (can_create_pseudo_p ()
16098 && !register_operand (op0
, mode
)
16099 && !register_operand (op1
, mode
))
16101 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
16105 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
16108 /* Split 32-byte AVX unaligned load and store if needed. */
16111 ix86_avx256_split_vector_move_misalign (rtx op0
, rtx op1
)
16114 rtx (*extract
) (rtx
, rtx
, rtx
);
16115 rtx (*load_unaligned
) (rtx
, rtx
);
16116 rtx (*store_unaligned
) (rtx
, rtx
);
16117 enum machine_mode mode
;
16119 switch (GET_MODE (op0
))
16122 gcc_unreachable ();
16124 extract
= gen_avx_vextractf128v32qi
;
16125 load_unaligned
= gen_avx_loaddqu256
;
16126 store_unaligned
= gen_avx_storedqu256
;
16130 extract
= gen_avx_vextractf128v8sf
;
16131 load_unaligned
= gen_avx_loadups256
;
16132 store_unaligned
= gen_avx_storeups256
;
16136 extract
= gen_avx_vextractf128v4df
;
16137 load_unaligned
= gen_avx_loadupd256
;
16138 store_unaligned
= gen_avx_storeupd256
;
16145 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
)
16147 rtx r
= gen_reg_rtx (mode
);
16148 m
= adjust_address (op1
, mode
, 0);
16149 emit_move_insn (r
, m
);
16150 m
= adjust_address (op1
, mode
, 16);
16151 r
= gen_rtx_VEC_CONCAT (GET_MODE (op0
), r
, m
);
16152 emit_move_insn (op0
, r
);
16155 emit_insn (load_unaligned (op0
, op1
));
16157 else if (MEM_P (op0
))
16159 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE
)
16161 m
= adjust_address (op0
, mode
, 0);
16162 emit_insn (extract (m
, op1
, const0_rtx
));
16163 m
= adjust_address (op0
, mode
, 16);
16164 emit_insn (extract (m
, op1
, const1_rtx
));
16167 emit_insn (store_unaligned (op0
, op1
));
16170 gcc_unreachable ();
16173 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
16174 straight to ix86_expand_vector_move. */
16175 /* Code generation for scalar reg-reg moves of single and double precision data:
16176 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
16180 if (x86_sse_partial_reg_dependency == true)
16185 Code generation for scalar loads of double precision data:
16186 if (x86_sse_split_regs == true)
16187 movlpd mem, reg (gas syntax)
16191 Code generation for unaligned packed loads of single precision data
16192 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
16193 if (x86_sse_unaligned_move_optimal)
16196 if (x86_sse_partial_reg_dependency == true)
16208 Code generation for unaligned packed loads of double precision data
16209 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
16210 if (x86_sse_unaligned_move_optimal)
16213 if (x86_sse_split_regs == true)
16226 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
16234 && GET_MODE_SIZE (mode
) == 32)
16236 switch (GET_MODE_CLASS (mode
))
16238 case MODE_VECTOR_INT
:
16240 op0
= gen_lowpart (V32QImode
, op0
);
16241 op1
= gen_lowpart (V32QImode
, op1
);
16244 case MODE_VECTOR_FLOAT
:
16245 ix86_avx256_split_vector_move_misalign (op0
, op1
);
16249 gcc_unreachable ();
16257 /* ??? If we have typed data, then it would appear that using
16258 movdqu is the only way to get unaligned data loaded with
16260 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
16262 op0
= gen_lowpart (V16QImode
, op0
);
16263 op1
= gen_lowpart (V16QImode
, op1
);
16264 /* We will eventually emit movups based on insn attributes. */
16265 emit_insn (gen_sse2_loaddqu (op0
, op1
));
16267 else if (TARGET_SSE2
&& mode
== V2DFmode
)
16272 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
16273 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16274 || optimize_function_for_size_p (cfun
))
16276 /* We will eventually emit movups based on insn attributes. */
16277 emit_insn (gen_sse2_loadupd (op0
, op1
));
16281 /* When SSE registers are split into halves, we can avoid
16282 writing to the top half twice. */
16283 if (TARGET_SSE_SPLIT_REGS
)
16285 emit_clobber (op0
);
16290 /* ??? Not sure about the best option for the Intel chips.
16291 The following would seem to satisfy; the register is
16292 entirely cleared, breaking the dependency chain. We
16293 then store to the upper half, with a dependency depth
16294 of one. A rumor has it that Intel recommends two movsd
16295 followed by an unpacklpd, but this is unconfirmed. And
16296 given that the dependency depth of the unpacklpd would
16297 still be one, I'm not sure why this would be better. */
16298 zero
= CONST0_RTX (V2DFmode
);
16301 m
= adjust_address (op1
, DFmode
, 0);
16302 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
16303 m
= adjust_address (op1
, DFmode
, 8);
16304 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
16309 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
16310 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16311 || optimize_function_for_size_p (cfun
))
16313 op0
= gen_lowpart (V4SFmode
, op0
);
16314 op1
= gen_lowpart (V4SFmode
, op1
);
16315 emit_insn (gen_sse_loadups (op0
, op1
));
16319 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
16320 emit_move_insn (op0
, CONST0_RTX (mode
));
16322 emit_clobber (op0
);
16324 if (mode
!= V4SFmode
)
16325 op0
= gen_lowpart (V4SFmode
, op0
);
16327 m
= adjust_address (op1
, V2SFmode
, 0);
16328 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
16329 m
= adjust_address (op1
, V2SFmode
, 8);
16330 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
16333 else if (MEM_P (op0
))
16335 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
16337 op0
= gen_lowpart (V16QImode
, op0
);
16338 op1
= gen_lowpart (V16QImode
, op1
);
16339 /* We will eventually emit movups based on insn attributes. */
16340 emit_insn (gen_sse2_storedqu (op0
, op1
));
16342 else if (TARGET_SSE2
&& mode
== V2DFmode
)
16345 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
16346 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16347 || optimize_function_for_size_p (cfun
))
16348 /* We will eventually emit movups based on insn attributes. */
16349 emit_insn (gen_sse2_storeupd (op0
, op1
));
16352 m
= adjust_address (op0
, DFmode
, 0);
16353 emit_insn (gen_sse2_storelpd (m
, op1
));
16354 m
= adjust_address (op0
, DFmode
, 8);
16355 emit_insn (gen_sse2_storehpd (m
, op1
));
16360 if (mode
!= V4SFmode
)
16361 op1
= gen_lowpart (V4SFmode
, op1
);
16364 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
16365 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16366 || optimize_function_for_size_p (cfun
))
16368 op0
= gen_lowpart (V4SFmode
, op0
);
16369 emit_insn (gen_sse_storeups (op0
, op1
));
16373 m
= adjust_address (op0
, V2SFmode
, 0);
16374 emit_insn (gen_sse_storelps (m
, op1
));
16375 m
= adjust_address (op0
, V2SFmode
, 8);
16376 emit_insn (gen_sse_storehps (m
, op1
));
16381 gcc_unreachable ();
16384 /* Expand a push in MODE. This is some mode for which we do not support
16385 proper push instructions, at least from the registers that we expect
16386 the value to live in. */
16389 ix86_expand_push (enum machine_mode mode
, rtx x
)
16393 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
16394 GEN_INT (-GET_MODE_SIZE (mode
)),
16395 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
16396 if (tmp
!= stack_pointer_rtx
)
16397 emit_move_insn (stack_pointer_rtx
, tmp
);
16399 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
16401 /* When we push an operand onto stack, it has to be aligned at least
16402 at the function argument boundary. However since we don't have
16403 the argument type, we can't determine the actual argument
16405 emit_move_insn (tmp
, x
);
16408 /* Helper function of ix86_fixup_binary_operands to canonicalize
16409 operand order. Returns true if the operands should be swapped. */
16412 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
16415 rtx dst
= operands
[0];
16416 rtx src1
= operands
[1];
16417 rtx src2
= operands
[2];
16419 /* If the operation is not commutative, we can't do anything. */
16420 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
16423 /* Highest priority is that src1 should match dst. */
16424 if (rtx_equal_p (dst
, src1
))
16426 if (rtx_equal_p (dst
, src2
))
16429 /* Next highest priority is that immediate constants come second. */
16430 if (immediate_operand (src2
, mode
))
16432 if (immediate_operand (src1
, mode
))
16435 /* Lowest priority is that memory references should come second. */
16445 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
16446 destination to use for the operation. If different from the true
16447 destination in operands[0], a copy operation will be required. */
16450 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
16453 rtx dst
= operands
[0];
16454 rtx src1
= operands
[1];
16455 rtx src2
= operands
[2];
16457 /* Canonicalize operand order. */
16458 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
16462 /* It is invalid to swap operands of different modes. */
16463 gcc_assert (GET_MODE (src1
) == GET_MODE (src2
));
16470 /* Both source operands cannot be in memory. */
16471 if (MEM_P (src1
) && MEM_P (src2
))
16473 /* Optimization: Only read from memory once. */
16474 if (rtx_equal_p (src1
, src2
))
16476 src2
= force_reg (mode
, src2
);
16480 src2
= force_reg (mode
, src2
);
16483 /* If the destination is memory, and we do not have matching source
16484 operands, do things in registers. */
16485 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
16486 dst
= gen_reg_rtx (mode
);
16488 /* Source 1 cannot be a constant. */
16489 if (CONSTANT_P (src1
))
16490 src1
= force_reg (mode
, src1
);
16492 /* Source 1 cannot be a non-matching memory. */
16493 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
16494 src1
= force_reg (mode
, src1
);
16496 /* Improve address combine. */
16498 && GET_MODE_CLASS (mode
) == MODE_INT
16500 src2
= force_reg (mode
, src2
);
16502 operands
[1] = src1
;
16503 operands
[2] = src2
;
16507 /* Similarly, but assume that the destination has already been
16508 set up properly. */
16511 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
16512 enum machine_mode mode
, rtx operands
[])
16514 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
16515 gcc_assert (dst
== operands
[0]);
16518 /* Attempt to expand a binary operator. Make the expansion closer to the
16519 actual machine, then just general_operand, which will allow 3 separate
16520 memory references (one output, two input) in a single insn. */
16523 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
16526 rtx src1
, src2
, dst
, op
, clob
;
16528 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
16529 src1
= operands
[1];
16530 src2
= operands
[2];
16532 /* Emit the instruction. */
16534 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
16535 if (reload_in_progress
)
16537 /* Reload doesn't know about the flags register, and doesn't know that
16538 it doesn't want to clobber it. We can only do this with PLUS. */
16539 gcc_assert (code
== PLUS
);
16542 else if (reload_completed
16544 && !rtx_equal_p (dst
, src1
))
16546 /* This is going to be an LEA; avoid splitting it later. */
16551 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16552 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16555 /* Fix up the destination if needed. */
16556 if (dst
!= operands
[0])
16557 emit_move_insn (operands
[0], dst
);
16560 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
16561 the given OPERANDS. */
16564 ix86_expand_vector_logical_operator (enum rtx_code code
, enum machine_mode mode
,
16567 rtx op1
= NULL_RTX
, op2
= NULL_RTX
;
16568 if (GET_CODE (operands
[1]) == SUBREG
)
16573 else if (GET_CODE (operands
[2]) == SUBREG
)
16578 /* Optimize (__m128i) d | (__m128i) e and similar code
16579 when d and e are float vectors into float vector logical
16580 insn. In C/C++ without using intrinsics there is no other way
16581 to express vector logical operation on float vectors than
16582 to cast them temporarily to integer vectors. */
16584 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16585 && ((GET_CODE (op2
) == SUBREG
|| GET_CODE (op2
) == CONST_VECTOR
))
16586 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1
))) == MODE_VECTOR_FLOAT
16587 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1
))) == GET_MODE_SIZE (mode
)
16588 && SUBREG_BYTE (op1
) == 0
16589 && (GET_CODE (op2
) == CONST_VECTOR
16590 || (GET_MODE (SUBREG_REG (op1
)) == GET_MODE (SUBREG_REG (op2
))
16591 && SUBREG_BYTE (op2
) == 0))
16592 && can_create_pseudo_p ())
16595 switch (GET_MODE (SUBREG_REG (op1
)))
16601 dst
= gen_reg_rtx (GET_MODE (SUBREG_REG (op1
)));
16602 if (GET_CODE (op2
) == CONST_VECTOR
)
16604 op2
= gen_lowpart (GET_MODE (dst
), op2
);
16605 op2
= force_reg (GET_MODE (dst
), op2
);
16610 op2
= SUBREG_REG (operands
[2]);
16611 if (!nonimmediate_operand (op2
, GET_MODE (dst
)))
16612 op2
= force_reg (GET_MODE (dst
), op2
);
16614 op1
= SUBREG_REG (op1
);
16615 if (!nonimmediate_operand (op1
, GET_MODE (dst
)))
16616 op1
= force_reg (GET_MODE (dst
), op1
);
16617 emit_insn (gen_rtx_SET (VOIDmode
, dst
,
16618 gen_rtx_fmt_ee (code
, GET_MODE (dst
),
16620 emit_move_insn (operands
[0], gen_lowpart (mode
, dst
));
16626 if (!nonimmediate_operand (operands
[1], mode
))
16627 operands
[1] = force_reg (mode
, operands
[1]);
16628 if (!nonimmediate_operand (operands
[2], mode
))
16629 operands
[2] = force_reg (mode
, operands
[2]);
16630 ix86_fixup_binary_operands_no_copy (code
, mode
, operands
);
16631 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
16632 gen_rtx_fmt_ee (code
, mode
, operands
[1],
16636 /* Return TRUE or FALSE depending on whether the binary operator meets the
16637 appropriate constraints. */
16640 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
16643 rtx dst
= operands
[0];
16644 rtx src1
= operands
[1];
16645 rtx src2
= operands
[2];
16647 /* Both source operands cannot be in memory. */
16648 if (MEM_P (src1
) && MEM_P (src2
))
16651 /* Canonicalize operand order for commutative operators. */
16652 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
16659 /* If the destination is memory, we must have a matching source operand. */
16660 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
16663 /* Source 1 cannot be a constant. */
16664 if (CONSTANT_P (src1
))
16667 /* Source 1 cannot be a non-matching memory. */
16668 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
16669 /* Support "andhi/andsi/anddi" as a zero-extending move. */
16670 return (code
== AND
16673 || (TARGET_64BIT
&& mode
== DImode
))
16674 && satisfies_constraint_L (src2
));
16679 /* Attempt to expand a unary operator. Make the expansion closer to the
16680 actual machine, then just general_operand, which will allow 2 separate
16681 memory references (one output, one input) in a single insn. */
16684 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
16687 int matching_memory
;
16688 rtx src
, dst
, op
, clob
;
16693 /* If the destination is memory, and we do not have matching source
16694 operands, do things in registers. */
16695 matching_memory
= 0;
16698 if (rtx_equal_p (dst
, src
))
16699 matching_memory
= 1;
16701 dst
= gen_reg_rtx (mode
);
16704 /* When source operand is memory, destination must match. */
16705 if (MEM_P (src
) && !matching_memory
)
16706 src
= force_reg (mode
, src
);
16708 /* Emit the instruction. */
16710 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
16711 if (reload_in_progress
|| code
== NOT
)
16713 /* Reload doesn't know about the flags register, and doesn't know that
16714 it doesn't want to clobber it. */
16715 gcc_assert (code
== NOT
);
16720 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16721 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16724 /* Fix up the destination if needed. */
16725 if (dst
!= operands
[0])
16726 emit_move_insn (operands
[0], dst
);
16729 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
16730 divisor are within the range [0-255]. */
16733 ix86_split_idivmod (enum machine_mode mode
, rtx operands
[],
16736 rtx end_label
, qimode_label
;
16737 rtx insn
, div
, mod
;
16738 rtx scratch
, tmp0
, tmp1
, tmp2
;
16739 rtx (*gen_divmod4_1
) (rtx
, rtx
, rtx
, rtx
);
16740 rtx (*gen_zero_extend
) (rtx
, rtx
);
16741 rtx (*gen_test_ccno_1
) (rtx
, rtx
);
16746 gen_divmod4_1
= signed_p
? gen_divmodsi4_1
: gen_udivmodsi4_1
;
16747 gen_test_ccno_1
= gen_testsi_ccno_1
;
16748 gen_zero_extend
= gen_zero_extendqisi2
;
16751 gen_divmod4_1
= signed_p
? gen_divmoddi4_1
: gen_udivmoddi4_1
;
16752 gen_test_ccno_1
= gen_testdi_ccno_1
;
16753 gen_zero_extend
= gen_zero_extendqidi2
;
16756 gcc_unreachable ();
16759 end_label
= gen_label_rtx ();
16760 qimode_label
= gen_label_rtx ();
16762 scratch
= gen_reg_rtx (mode
);
16764 /* Use 8bit unsigned divimod if dividend and divisor are within
16765 the range [0-255]. */
16766 emit_move_insn (scratch
, operands
[2]);
16767 scratch
= expand_simple_binop (mode
, IOR
, scratch
, operands
[3],
16768 scratch
, 1, OPTAB_DIRECT
);
16769 emit_insn (gen_test_ccno_1 (scratch
, GEN_INT (-0x100)));
16770 tmp0
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
16771 tmp0
= gen_rtx_EQ (VOIDmode
, tmp0
, const0_rtx
);
16772 tmp0
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp0
,
16773 gen_rtx_LABEL_REF (VOIDmode
, qimode_label
),
16775 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp0
));
16776 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
16777 JUMP_LABEL (insn
) = qimode_label
;
16779 /* Generate original signed/unsigned divimod. */
16780 div
= gen_divmod4_1 (operands
[0], operands
[1],
16781 operands
[2], operands
[3]);
16784 /* Branch to the end. */
16785 emit_jump_insn (gen_jump (end_label
));
16788 /* Generate 8bit unsigned divide. */
16789 emit_label (qimode_label
);
16790 /* Don't use operands[0] for result of 8bit divide since not all
16791 registers support QImode ZERO_EXTRACT. */
16792 tmp0
= simplify_gen_subreg (HImode
, scratch
, mode
, 0);
16793 tmp1
= simplify_gen_subreg (HImode
, operands
[2], mode
, 0);
16794 tmp2
= simplify_gen_subreg (QImode
, operands
[3], mode
, 0);
16795 emit_insn (gen_udivmodhiqi3 (tmp0
, tmp1
, tmp2
));
16799 div
= gen_rtx_DIV (SImode
, operands
[2], operands
[3]);
16800 mod
= gen_rtx_MOD (SImode
, operands
[2], operands
[3]);
16804 div
= gen_rtx_UDIV (SImode
, operands
[2], operands
[3]);
16805 mod
= gen_rtx_UMOD (SImode
, operands
[2], operands
[3]);
16808 /* Extract remainder from AH. */
16809 tmp1
= gen_rtx_ZERO_EXTRACT (mode
, tmp0
, GEN_INT (8), GEN_INT (8));
16810 if (REG_P (operands
[1]))
16811 insn
= emit_move_insn (operands
[1], tmp1
);
16814 /* Need a new scratch register since the old one has result
16816 scratch
= gen_reg_rtx (mode
);
16817 emit_move_insn (scratch
, tmp1
);
16818 insn
= emit_move_insn (operands
[1], scratch
);
16820 set_unique_reg_note (insn
, REG_EQUAL
, mod
);
16822 /* Zero extend quotient from AL. */
16823 tmp1
= gen_lowpart (QImode
, tmp0
);
16824 insn
= emit_insn (gen_zero_extend (operands
[0], tmp1
));
16825 set_unique_reg_note (insn
, REG_EQUAL
, div
);
16827 emit_label (end_label
);
16830 #define LEA_MAX_STALL (3)
16831 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
16833 /* Increase given DISTANCE in half-cycles according to
16834 dependencies between PREV and NEXT instructions.
16835 Add 1 half-cycle if there is no dependency and
16836 go to next cycle if there is some dependecy. */
16838 static unsigned int
16839 increase_distance (rtx prev
, rtx next
, unsigned int distance
)
16844 if (!prev
|| !next
)
16845 return distance
+ (distance
& 1) + 2;
16847 if (!DF_INSN_USES (next
) || !DF_INSN_DEFS (prev
))
16848 return distance
+ 1;
16850 for (use_rec
= DF_INSN_USES (next
); *use_rec
; use_rec
++)
16851 for (def_rec
= DF_INSN_DEFS (prev
); *def_rec
; def_rec
++)
16852 if (!DF_REF_IS_ARTIFICIAL (*def_rec
)
16853 && DF_REF_REGNO (*use_rec
) == DF_REF_REGNO (*def_rec
))
16854 return distance
+ (distance
& 1) + 2;
16856 return distance
+ 1;
16859 /* Function checks if instruction INSN defines register number
16860 REGNO1 or REGNO2. */
16863 insn_defines_reg (unsigned int regno1
, unsigned int regno2
,
16868 for (def_rec
= DF_INSN_DEFS (insn
); *def_rec
; def_rec
++)
16869 if (DF_REF_REG_DEF_P (*def_rec
)
16870 && !DF_REF_IS_ARTIFICIAL (*def_rec
)
16871 && (regno1
== DF_REF_REGNO (*def_rec
)
16872 || regno2
== DF_REF_REGNO (*def_rec
)))
16880 /* Function checks if instruction INSN uses register number
16881 REGNO as a part of address expression. */
16884 insn_uses_reg_mem (unsigned int regno
, rtx insn
)
16888 for (use_rec
= DF_INSN_USES (insn
); *use_rec
; use_rec
++)
16889 if (DF_REF_REG_MEM_P (*use_rec
) && regno
== DF_REF_REGNO (*use_rec
))
16895 /* Search backward for non-agu definition of register number REGNO1
16896 or register number REGNO2 in basic block starting from instruction
16897 START up to head of basic block or instruction INSN.
16899 Function puts true value into *FOUND var if definition was found
16900 and false otherwise.
16902 Distance in half-cycles between START and found instruction or head
16903 of BB is added to DISTANCE and returned. */
16906 distance_non_agu_define_in_bb (unsigned int regno1
, unsigned int regno2
,
16907 rtx insn
, int distance
,
16908 rtx start
, bool *found
)
16910 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
16918 && distance
< LEA_SEARCH_THRESHOLD
)
16920 if (NONDEBUG_INSN_P (prev
) && NONJUMP_INSN_P (prev
))
16922 distance
= increase_distance (prev
, next
, distance
);
16923 if (insn_defines_reg (regno1
, regno2
, prev
))
16925 if (recog_memoized (prev
) < 0
16926 || get_attr_type (prev
) != TYPE_LEA
)
16935 if (prev
== BB_HEAD (bb
))
16938 prev
= PREV_INSN (prev
);
16944 /* Search backward for non-agu definition of register number REGNO1
16945 or register number REGNO2 in INSN's basic block until
16946 1. Pass LEA_SEARCH_THRESHOLD instructions, or
16947 2. Reach neighbour BBs boundary, or
16948 3. Reach agu definition.
16949 Returns the distance between the non-agu definition point and INSN.
16950 If no definition point, returns -1. */
16953 distance_non_agu_define (unsigned int regno1
, unsigned int regno2
,
16956 basic_block bb
= BLOCK_FOR_INSN (insn
);
16958 bool found
= false;
16960 if (insn
!= BB_HEAD (bb
))
16961 distance
= distance_non_agu_define_in_bb (regno1
, regno2
, insn
,
16962 distance
, PREV_INSN (insn
),
16965 if (!found
&& distance
< LEA_SEARCH_THRESHOLD
)
16969 bool simple_loop
= false;
16971 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
16974 simple_loop
= true;
16979 distance
= distance_non_agu_define_in_bb (regno1
, regno2
,
16981 BB_END (bb
), &found
);
16984 int shortest_dist
= -1;
16985 bool found_in_bb
= false;
16987 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
16990 = distance_non_agu_define_in_bb (regno1
, regno2
,
16996 if (shortest_dist
< 0)
16997 shortest_dist
= bb_dist
;
16998 else if (bb_dist
> 0)
16999 shortest_dist
= MIN (bb_dist
, shortest_dist
);
17005 distance
= shortest_dist
;
17009 /* get_attr_type may modify recog data. We want to make sure
17010 that recog data is valid for instruction INSN, on which
17011 distance_non_agu_define is called. INSN is unchanged here. */
17012 extract_insn_cached (insn
);
17017 return distance
>> 1;
17020 /* Return the distance in half-cycles between INSN and the next
17021 insn that uses register number REGNO in memory address added
17022 to DISTANCE. Return -1 if REGNO0 is set.
17024 Put true value into *FOUND if register usage was found and
17026 Put true value into *REDEFINED if register redefinition was
17027 found and false otherwise. */
17030 distance_agu_use_in_bb (unsigned int regno
,
17031 rtx insn
, int distance
, rtx start
,
17032 bool *found
, bool *redefined
)
17034 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
17039 *redefined
= false;
17043 && distance
< LEA_SEARCH_THRESHOLD
)
17045 if (NONDEBUG_INSN_P (next
) && NONJUMP_INSN_P (next
))
17047 distance
= increase_distance(prev
, next
, distance
);
17048 if (insn_uses_reg_mem (regno
, next
))
17050 /* Return DISTANCE if OP0 is used in memory
17051 address in NEXT. */
17056 if (insn_defines_reg (regno
, INVALID_REGNUM
, next
))
17058 /* Return -1 if OP0 is set in NEXT. */
17066 if (next
== BB_END (bb
))
17069 next
= NEXT_INSN (next
);
17075 /* Return the distance between INSN and the next insn that uses
17076 register number REGNO0 in memory address. Return -1 if no such
17077 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
17080 distance_agu_use (unsigned int regno0
, rtx insn
)
17082 basic_block bb
= BLOCK_FOR_INSN (insn
);
17084 bool found
= false;
17085 bool redefined
= false;
17087 if (insn
!= BB_END (bb
))
17088 distance
= distance_agu_use_in_bb (regno0
, insn
, distance
,
17090 &found
, &redefined
);
17092 if (!found
&& !redefined
&& distance
< LEA_SEARCH_THRESHOLD
)
17096 bool simple_loop
= false;
17098 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
17101 simple_loop
= true;
17106 distance
= distance_agu_use_in_bb (regno0
, insn
,
17107 distance
, BB_HEAD (bb
),
17108 &found
, &redefined
);
17111 int shortest_dist
= -1;
17112 bool found_in_bb
= false;
17113 bool redefined_in_bb
= false;
17115 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
17118 = distance_agu_use_in_bb (regno0
, insn
,
17119 distance
, BB_HEAD (e
->dest
),
17120 &found_in_bb
, &redefined_in_bb
);
17123 if (shortest_dist
< 0)
17124 shortest_dist
= bb_dist
;
17125 else if (bb_dist
> 0)
17126 shortest_dist
= MIN (bb_dist
, shortest_dist
);
17132 distance
= shortest_dist
;
17136 if (!found
|| redefined
)
17139 return distance
>> 1;
17142 /* Define this macro to tune LEA priority vs ADD, it take effect when
17143 there is a dilemma of choicing LEA or ADD
17144 Negative value: ADD is more preferred than LEA
17146 Positive value: LEA is more preferred than ADD*/
17147 #define IX86_LEA_PRIORITY 0
17149 /* Return true if usage of lea INSN has performance advantage
17150 over a sequence of instructions. Instructions sequence has
17151 SPLIT_COST cycles higher latency than lea latency. */
17154 ix86_lea_outperforms (rtx insn
, unsigned int regno0
, unsigned int regno1
,
17155 unsigned int regno2
, int split_cost
)
17157 int dist_define
, dist_use
;
17159 dist_define
= distance_non_agu_define (regno1
, regno2
, insn
);
17160 dist_use
= distance_agu_use (regno0
, insn
);
17162 if (dist_define
< 0 || dist_define
>= LEA_MAX_STALL
)
17164 /* If there is no non AGU operand definition, no AGU
17165 operand usage and split cost is 0 then both lea
17166 and non lea variants have same priority. Currently
17167 we prefer lea for 64 bit code and non lea on 32 bit
17169 if (dist_use
< 0 && split_cost
== 0)
17170 return TARGET_64BIT
|| IX86_LEA_PRIORITY
;
17175 /* With longer definitions distance lea is more preferable.
17176 Here we change it to take into account splitting cost and
17178 dist_define
+= split_cost
+ IX86_LEA_PRIORITY
;
17180 /* If there is no use in memory addess then we just check
17181 that split cost exceeds AGU stall. */
17183 return dist_define
> LEA_MAX_STALL
;
17185 /* If this insn has both backward non-agu dependence and forward
17186 agu dependence, the one with short distance takes effect. */
17187 return dist_define
>= dist_use
;
17190 /* Return true if it is legal to clobber flags by INSN and
17191 false otherwise. */
17194 ix86_ok_to_clobber_flags (rtx insn
)
17196 basic_block bb
= BLOCK_FOR_INSN (insn
);
17202 if (NONDEBUG_INSN_P (insn
))
17204 for (use
= DF_INSN_USES (insn
); *use
; use
++)
17205 if (DF_REF_REG_USE_P (*use
) && DF_REF_REGNO (*use
) == FLAGS_REG
)
17208 if (insn_defines_reg (FLAGS_REG
, INVALID_REGNUM
, insn
))
17212 if (insn
== BB_END (bb
))
17215 insn
= NEXT_INSN (insn
);
17218 live
= df_get_live_out(bb
);
17219 return !REGNO_REG_SET_P (live
, FLAGS_REG
);
17222 /* Return true if we need to split op0 = op1 + op2 into a sequence of
17223 move and add to avoid AGU stalls. */
17226 ix86_avoid_lea_for_add (rtx insn
, rtx operands
[])
17228 unsigned int regno0
, regno1
, regno2
;
17230 /* Check if we need to optimize. */
17231 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17234 /* Check it is correct to split here. */
17235 if (!ix86_ok_to_clobber_flags(insn
))
17238 regno0
= true_regnum (operands
[0]);
17239 regno1
= true_regnum (operands
[1]);
17240 regno2
= true_regnum (operands
[2]);
17242 /* We need to split only adds with non destructive
17243 destination operand. */
17244 if (regno0
== regno1
|| regno0
== regno2
)
17247 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 1);
17250 /* Return true if we should emit lea instruction instead of mov
17254 ix86_use_lea_for_mov (rtx insn
, rtx operands
[])
17256 unsigned int regno0
, regno1
;
17258 /* Check if we need to optimize. */
17259 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17262 /* Use lea for reg to reg moves only. */
17263 if (!REG_P (operands
[0]) || !REG_P (operands
[1]))
17266 regno0
= true_regnum (operands
[0]);
17267 regno1
= true_regnum (operands
[1]);
17269 return ix86_lea_outperforms (insn
, regno0
, regno1
, INVALID_REGNUM
, 0);
17272 /* Return true if we need to split lea into a sequence of
17273 instructions to avoid AGU stalls. */
17276 ix86_avoid_lea_for_addr (rtx insn
, rtx operands
[])
17278 unsigned int regno0
, regno1
, regno2
;
17280 struct ix86_address parts
;
17283 /* Check we need to optimize. */
17284 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17287 /* Check it is correct to split here. */
17288 if (!ix86_ok_to_clobber_flags(insn
))
17291 ok
= ix86_decompose_address (operands
[1], &parts
);
17294 /* There should be at least two components in the address. */
17295 if ((parts
.base
!= NULL_RTX
) + (parts
.index
!= NULL_RTX
)
17296 + (parts
.disp
!= NULL_RTX
) + (parts
.scale
> 1) < 2)
17299 /* We should not split into add if non legitimate pic
17300 operand is used as displacement. */
17301 if (parts
.disp
&& flag_pic
&& !LEGITIMATE_PIC_OPERAND_P (parts
.disp
))
17304 regno0
= true_regnum (operands
[0]) ;
17305 regno1
= INVALID_REGNUM
;
17306 regno2
= INVALID_REGNUM
;
17309 regno1
= true_regnum (parts
.base
);
17311 regno2
= true_regnum (parts
.index
);
17315 /* Compute how many cycles we will add to execution time
17316 if split lea into a sequence of instructions. */
17317 if (parts
.base
|| parts
.index
)
17319 /* Have to use mov instruction if non desctructive
17320 destination form is used. */
17321 if (regno1
!= regno0
&& regno2
!= regno0
)
17324 /* Have to add index to base if both exist. */
17325 if (parts
.base
&& parts
.index
)
17328 /* Have to use shift and adds if scale is 2 or greater. */
17329 if (parts
.scale
> 1)
17331 if (regno0
!= regno1
)
17333 else if (regno2
== regno0
)
17336 split_cost
+= parts
.scale
;
17339 /* Have to use add instruction with immediate if
17340 disp is non zero. */
17341 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17344 /* Subtract the price of lea. */
17348 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, split_cost
);
17351 /* Emit x86 binary operand CODE in mode MODE, where the first operand
17352 matches destination. RTX includes clobber of FLAGS_REG. */
17355 ix86_emit_binop (enum rtx_code code
, enum machine_mode mode
,
17360 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, dst
, src
));
17361 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
17363 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
17366 /* Return true if regno1 def is nearest to the insn. */
17369 find_nearest_reg_def (rtx insn
, int regno1
, int regno2
)
17372 rtx start
= BB_HEAD (BLOCK_FOR_INSN (insn
));
17376 while (prev
&& prev
!= start
)
17378 if (!INSN_P (prev
) || !NONDEBUG_INSN_P (prev
))
17380 prev
= PREV_INSN (prev
);
17383 if (insn_defines_reg (regno1
, INVALID_REGNUM
, prev
))
17385 else if (insn_defines_reg (regno2
, INVALID_REGNUM
, prev
))
17387 prev
= PREV_INSN (prev
);
17390 /* None of the regs is defined in the bb. */
17394 /* Split lea instructions into a sequence of instructions
17395 which are executed on ALU to avoid AGU stalls.
17396 It is assumed that it is allowed to clobber flags register
17397 at lea position. */
17400 ix86_split_lea_for_addr (rtx insn
, rtx operands
[], enum machine_mode mode
)
17402 unsigned int regno0
, regno1
, regno2
;
17403 struct ix86_address parts
;
17407 ok
= ix86_decompose_address (operands
[1], &parts
);
17410 target
= gen_lowpart (mode
, operands
[0]);
17412 regno0
= true_regnum (target
);
17413 regno1
= INVALID_REGNUM
;
17414 regno2
= INVALID_REGNUM
;
17418 parts
.base
= gen_lowpart (mode
, parts
.base
);
17419 regno1
= true_regnum (parts
.base
);
17424 parts
.index
= gen_lowpart (mode
, parts
.index
);
17425 regno2
= true_regnum (parts
.index
);
17429 parts
.disp
= gen_lowpart (mode
, parts
.disp
);
17431 if (parts
.scale
> 1)
17433 /* Case r1 = r1 + ... */
17434 if (regno1
== regno0
)
17436 /* If we have a case r1 = r1 + C * r1 then we
17437 should use multiplication which is very
17438 expensive. Assume cost model is wrong if we
17439 have such case here. */
17440 gcc_assert (regno2
!= regno0
);
17442 for (adds
= parts
.scale
; adds
> 0; adds
--)
17443 ix86_emit_binop (PLUS
, mode
, target
, parts
.index
);
17447 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
17448 if (regno0
!= regno2
)
17449 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.index
));
17451 /* Use shift for scaling. */
17452 ix86_emit_binop (ASHIFT
, mode
, target
,
17453 GEN_INT (exact_log2 (parts
.scale
)));
17456 ix86_emit_binop (PLUS
, mode
, target
, parts
.base
);
17458 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17459 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
17462 else if (!parts
.base
&& !parts
.index
)
17464 gcc_assert(parts
.disp
);
17465 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.disp
));
17471 if (regno0
!= regno2
)
17472 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.index
));
17474 else if (!parts
.index
)
17476 if (regno0
!= regno1
)
17477 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.base
));
17481 if (regno0
== regno1
)
17483 else if (regno0
== regno2
)
17489 /* Find better operand for SET instruction, depending
17490 on which definition is farther from the insn. */
17491 if (find_nearest_reg_def (insn
, regno1
, regno2
))
17492 tmp
= parts
.index
, tmp1
= parts
.base
;
17494 tmp
= parts
.base
, tmp1
= parts
.index
;
17496 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
17498 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17499 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
17501 ix86_emit_binop (PLUS
, mode
, target
, tmp1
);
17505 ix86_emit_binop (PLUS
, mode
, target
, tmp
);
17508 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17509 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
17513 /* Return true if it is ok to optimize an ADD operation to LEA
17514 operation to avoid flag register consumation. For most processors,
17515 ADD is faster than LEA. For the processors like ATOM, if the
17516 destination register of LEA holds an actual address which will be
17517 used soon, LEA is better and otherwise ADD is better. */
17520 ix86_lea_for_add_ok (rtx insn
, rtx operands
[])
17522 unsigned int regno0
= true_regnum (operands
[0]);
17523 unsigned int regno1
= true_regnum (operands
[1]);
17524 unsigned int regno2
= true_regnum (operands
[2]);
17526 /* If a = b + c, (a!=b && a!=c), must use lea form. */
17527 if (regno0
!= regno1
&& regno0
!= regno2
)
17530 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17533 return ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 0);
17536 /* Return true if destination reg of SET_BODY is shift count of
17540 ix86_dep_by_shift_count_body (const_rtx set_body
, const_rtx use_body
)
17546 /* Retrieve destination of SET_BODY. */
17547 switch (GET_CODE (set_body
))
17550 set_dest
= SET_DEST (set_body
);
17551 if (!set_dest
|| !REG_P (set_dest
))
17555 for (i
= XVECLEN (set_body
, 0) - 1; i
>= 0; i
--)
17556 if (ix86_dep_by_shift_count_body (XVECEXP (set_body
, 0, i
),
17564 /* Retrieve shift count of USE_BODY. */
17565 switch (GET_CODE (use_body
))
17568 shift_rtx
= XEXP (use_body
, 1);
17571 for (i
= XVECLEN (use_body
, 0) - 1; i
>= 0; i
--)
17572 if (ix86_dep_by_shift_count_body (set_body
,
17573 XVECEXP (use_body
, 0, i
)))
17581 && (GET_CODE (shift_rtx
) == ASHIFT
17582 || GET_CODE (shift_rtx
) == LSHIFTRT
17583 || GET_CODE (shift_rtx
) == ASHIFTRT
17584 || GET_CODE (shift_rtx
) == ROTATE
17585 || GET_CODE (shift_rtx
) == ROTATERT
))
17587 rtx shift_count
= XEXP (shift_rtx
, 1);
17589 /* Return true if shift count is dest of SET_BODY. */
17590 if (REG_P (shift_count
))
17592 /* Add check since it can be invoked before register
17593 allocation in pre-reload schedule. */
17594 if (reload_completed
17595 && true_regnum (set_dest
) == true_regnum (shift_count
))
17597 else if (REGNO(set_dest
) == REGNO(shift_count
))
17605 /* Return true if destination reg of SET_INSN is shift count of
17609 ix86_dep_by_shift_count (const_rtx set_insn
, const_rtx use_insn
)
17611 return ix86_dep_by_shift_count_body (PATTERN (set_insn
),
17612 PATTERN (use_insn
));
17615 /* Return TRUE or FALSE depending on whether the unary operator meets the
17616 appropriate constraints. */
17619 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
17620 enum machine_mode mode ATTRIBUTE_UNUSED
,
17621 rtx operands
[2] ATTRIBUTE_UNUSED
)
17623 /* If one of operands is memory, source and destination must match. */
17624 if ((MEM_P (operands
[0])
17625 || MEM_P (operands
[1]))
17626 && ! rtx_equal_p (operands
[0], operands
[1]))
17631 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
17632 are ok, keeping in mind the possible movddup alternative. */
17635 ix86_vec_interleave_v2df_operator_ok (rtx operands
[3], bool high
)
17637 if (MEM_P (operands
[0]))
17638 return rtx_equal_p (operands
[0], operands
[1 + high
]);
17639 if (MEM_P (operands
[1]) && MEM_P (operands
[2]))
17640 return TARGET_SSE3
&& rtx_equal_p (operands
[1], operands
[2]);
17644 /* Post-reload splitter for converting an SF or DFmode value in an
17645 SSE register into an unsigned SImode. */
17648 ix86_split_convert_uns_si_sse (rtx operands
[])
17650 enum machine_mode vecmode
;
17651 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
17653 large
= operands
[1];
17654 zero_or_two31
= operands
[2];
17655 input
= operands
[3];
17656 two31
= operands
[4];
17657 vecmode
= GET_MODE (large
);
17658 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
17660 /* Load up the value into the low element. We must ensure that the other
17661 elements are valid floats -- zero is the easiest such value. */
17664 if (vecmode
== V4SFmode
)
17665 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
17667 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
17671 input
= gen_rtx_REG (vecmode
, REGNO (input
));
17672 emit_move_insn (value
, CONST0_RTX (vecmode
));
17673 if (vecmode
== V4SFmode
)
17674 emit_insn (gen_sse_movss (value
, value
, input
));
17676 emit_insn (gen_sse2_movsd (value
, value
, input
));
17679 emit_move_insn (large
, two31
);
17680 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
17682 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
17683 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
17685 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
17686 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
17688 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
17689 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
17691 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
17692 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
17694 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
17695 if (vecmode
== V4SFmode
)
17696 emit_insn (gen_fix_truncv4sfv4si2 (x
, value
));
17698 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
17701 emit_insn (gen_xorv4si3 (value
, value
, large
));
17704 /* Convert an unsigned DImode value into a DFmode, using only SSE.
17705 Expects the 64-bit DImode to be supplied in a pair of integral
17706 registers. Requires SSE2; will use SSE3 if available. For x86_32,
17707 -mfpmath=sse, !optimize_size only. */
17710 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
17712 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
17713 rtx int_xmm
, fp_xmm
;
17714 rtx biases
, exponents
;
17717 int_xmm
= gen_reg_rtx (V4SImode
);
17718 if (TARGET_INTER_UNIT_MOVES
)
17719 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
17720 else if (TARGET_SSE_SPLIT_REGS
)
17722 emit_clobber (int_xmm
);
17723 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
17727 x
= gen_reg_rtx (V2DImode
);
17728 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
17729 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
17732 x
= gen_rtx_CONST_VECTOR (V4SImode
,
17733 gen_rtvec (4, GEN_INT (0x43300000UL
),
17734 GEN_INT (0x45300000UL
),
17735 const0_rtx
, const0_rtx
));
17736 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
17738 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
17739 emit_insn (gen_vec_interleave_lowv4si (int_xmm
, int_xmm
, exponents
));
17741 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
17742 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
17743 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
17744 (0x1.0p84 + double(fp_value_hi_xmm)).
17745 Note these exponents differ by 32. */
17747 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
17749 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
17750 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
17751 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
17752 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
17753 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
17754 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
17755 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
17756 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
17757 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
17759 /* Add the upper and lower DFmode values together. */
17761 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
17764 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
17765 emit_insn (gen_vec_interleave_highv2df (fp_xmm
, fp_xmm
, fp_xmm
));
17766 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
17769 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
17772 /* Not used, but eases macroization of patterns. */
17774 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED
,
17775 rtx input ATTRIBUTE_UNUSED
)
17777 gcc_unreachable ();
17780 /* Convert an unsigned SImode value into a DFmode. Only currently used
17781 for SSE, but applicable anywhere. */
17784 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
17786 REAL_VALUE_TYPE TWO31r
;
17789 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
17790 NULL
, 1, OPTAB_DIRECT
);
17792 fp
= gen_reg_rtx (DFmode
);
17793 emit_insn (gen_floatsidf2 (fp
, x
));
17795 real_ldexp (&TWO31r
, &dconst1
, 31);
17796 x
= const_double_from_real_value (TWO31r
, DFmode
);
17798 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
17800 emit_move_insn (target
, x
);
17803 /* Convert a signed DImode value into a DFmode. Only used for SSE in
17804 32-bit mode; otherwise we have a direct convert instruction. */
17807 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
17809 REAL_VALUE_TYPE TWO32r
;
17810 rtx fp_lo
, fp_hi
, x
;
17812 fp_lo
= gen_reg_rtx (DFmode
);
17813 fp_hi
= gen_reg_rtx (DFmode
);
17815 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
17817 real_ldexp (&TWO32r
, &dconst1
, 32);
17818 x
= const_double_from_real_value (TWO32r
, DFmode
);
17819 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
17821 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
17823 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
17826 emit_move_insn (target
, x
);
17829 /* Convert an unsigned SImode value into a SFmode, using only SSE.
17830 For x86_32, -mfpmath=sse, !optimize_size only. */
17832 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
17834 REAL_VALUE_TYPE ONE16r
;
17835 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
17837 real_ldexp (&ONE16r
, &dconst1
, 16);
17838 x
= const_double_from_real_value (ONE16r
, SFmode
);
17839 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
17840 NULL
, 0, OPTAB_DIRECT
);
17841 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
17842 NULL
, 0, OPTAB_DIRECT
);
17843 fp_hi
= gen_reg_rtx (SFmode
);
17844 fp_lo
= gen_reg_rtx (SFmode
);
17845 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
17846 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
17847 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
17849 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
17851 if (!rtx_equal_p (target
, fp_hi
))
17852 emit_move_insn (target
, fp_hi
);
17855 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
17856 a vector of unsigned ints VAL to vector of floats TARGET. */
17859 ix86_expand_vector_convert_uns_vsivsf (rtx target
, rtx val
)
17862 REAL_VALUE_TYPE TWO16r
;
17863 enum machine_mode intmode
= GET_MODE (val
);
17864 enum machine_mode fltmode
= GET_MODE (target
);
17865 rtx (*cvt
) (rtx
, rtx
);
17867 if (intmode
== V4SImode
)
17868 cvt
= gen_floatv4siv4sf2
;
17870 cvt
= gen_floatv8siv8sf2
;
17871 tmp
[0] = ix86_build_const_vector (intmode
, 1, GEN_INT (0xffff));
17872 tmp
[0] = force_reg (intmode
, tmp
[0]);
17873 tmp
[1] = expand_simple_binop (intmode
, AND
, val
, tmp
[0], NULL_RTX
, 1,
17875 tmp
[2] = expand_simple_binop (intmode
, LSHIFTRT
, val
, GEN_INT (16),
17876 NULL_RTX
, 1, OPTAB_DIRECT
);
17877 tmp
[3] = gen_reg_rtx (fltmode
);
17878 emit_insn (cvt (tmp
[3], tmp
[1]));
17879 tmp
[4] = gen_reg_rtx (fltmode
);
17880 emit_insn (cvt (tmp
[4], tmp
[2]));
17881 real_ldexp (&TWO16r
, &dconst1
, 16);
17882 tmp
[5] = const_double_from_real_value (TWO16r
, SFmode
);
17883 tmp
[5] = force_reg (fltmode
, ix86_build_const_vector (fltmode
, 1, tmp
[5]));
17884 tmp
[6] = expand_simple_binop (fltmode
, MULT
, tmp
[4], tmp
[5], NULL_RTX
, 1,
17886 tmp
[7] = expand_simple_binop (fltmode
, PLUS
, tmp
[3], tmp
[6], target
, 1,
17888 if (tmp
[7] != target
)
17889 emit_move_insn (target
, tmp
[7]);
17892 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
17893 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
17894 This is done by doing just signed conversion if < 0x1p31, and otherwise by
17895 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
17898 ix86_expand_adjust_ufix_to_sfix_si (rtx val
, rtx
*xorp
)
17900 REAL_VALUE_TYPE TWO31r
;
17901 rtx two31r
, tmp
[4];
17902 enum machine_mode mode
= GET_MODE (val
);
17903 enum machine_mode scalarmode
= GET_MODE_INNER (mode
);
17904 enum machine_mode intmode
= GET_MODE_SIZE (mode
) == 32 ? V8SImode
: V4SImode
;
17905 rtx (*cmp
) (rtx
, rtx
, rtx
, rtx
);
17908 for (i
= 0; i
< 3; i
++)
17909 tmp
[i
] = gen_reg_rtx (mode
);
17910 real_ldexp (&TWO31r
, &dconst1
, 31);
17911 two31r
= const_double_from_real_value (TWO31r
, scalarmode
);
17912 two31r
= ix86_build_const_vector (mode
, 1, two31r
);
17913 two31r
= force_reg (mode
, two31r
);
17916 case V8SFmode
: cmp
= gen_avx_maskcmpv8sf3
; break;
17917 case V4SFmode
: cmp
= gen_sse_maskcmpv4sf3
; break;
17918 case V4DFmode
: cmp
= gen_avx_maskcmpv4df3
; break;
17919 case V2DFmode
: cmp
= gen_sse2_maskcmpv2df3
; break;
17920 default: gcc_unreachable ();
17922 tmp
[3] = gen_rtx_LE (mode
, two31r
, val
);
17923 emit_insn (cmp (tmp
[0], two31r
, val
, tmp
[3]));
17924 tmp
[1] = expand_simple_binop (mode
, AND
, tmp
[0], two31r
, tmp
[1],
17926 if (intmode
== V4SImode
|| TARGET_AVX2
)
17927 *xorp
= expand_simple_binop (intmode
, ASHIFT
,
17928 gen_lowpart (intmode
, tmp
[0]),
17929 GEN_INT (31), NULL_RTX
, 0,
17933 rtx two31
= GEN_INT ((unsigned HOST_WIDE_INT
) 1 << 31);
17934 two31
= ix86_build_const_vector (intmode
, 1, two31
);
17935 *xorp
= expand_simple_binop (intmode
, AND
,
17936 gen_lowpart (intmode
, tmp
[0]),
17937 two31
, NULL_RTX
, 0,
17940 return expand_simple_binop (mode
, MINUS
, val
, tmp
[1], tmp
[2],
17944 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
17945 then replicate the value for all elements of the vector
17949 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
17953 enum machine_mode scalar_mode
;
17970 n_elt
= GET_MODE_NUNITS (mode
);
17971 v
= rtvec_alloc (n_elt
);
17972 scalar_mode
= GET_MODE_INNER (mode
);
17974 RTVEC_ELT (v
, 0) = value
;
17976 for (i
= 1; i
< n_elt
; ++i
)
17977 RTVEC_ELT (v
, i
) = vect
? value
: CONST0_RTX (scalar_mode
);
17979 return gen_rtx_CONST_VECTOR (mode
, v
);
17982 gcc_unreachable ();
17986 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
17987 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
17988 for an SSE register. If VECT is true, then replicate the mask for
17989 all elements of the vector register. If INVERT is true, then create
17990 a mask excluding the sign bit. */
17993 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
17995 enum machine_mode vec_mode
, imode
;
17996 HOST_WIDE_INT hi
, lo
;
18001 /* Find the sign bit, sign extended to 2*HWI. */
18009 mode
= GET_MODE_INNER (mode
);
18011 lo
= 0x80000000, hi
= lo
< 0;
18019 mode
= GET_MODE_INNER (mode
);
18021 if (HOST_BITS_PER_WIDE_INT
>= 64)
18022 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
18024 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
18029 vec_mode
= VOIDmode
;
18030 if (HOST_BITS_PER_WIDE_INT
>= 64)
18033 lo
= 0, hi
= (HOST_WIDE_INT
)1 << shift
;
18040 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
18044 lo
= ~lo
, hi
= ~hi
;
18050 mask
= immed_double_const (lo
, hi
, imode
);
18052 vec
= gen_rtvec (2, v
, mask
);
18053 v
= gen_rtx_CONST_VECTOR (V2DImode
, vec
);
18054 v
= copy_to_mode_reg (mode
, gen_lowpart (mode
, v
));
18061 gcc_unreachable ();
18065 lo
= ~lo
, hi
= ~hi
;
18067 /* Force this value into the low part of a fp vector constant. */
18068 mask
= immed_double_const (lo
, hi
, imode
);
18069 mask
= gen_lowpart (mode
, mask
);
18071 if (vec_mode
== VOIDmode
)
18072 return force_reg (mode
, mask
);
18074 v
= ix86_build_const_vector (vec_mode
, vect
, mask
);
18075 return force_reg (vec_mode
, v
);
18078 /* Generate code for floating point ABS or NEG. */
18081 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
18084 rtx mask
, set
, dst
, src
;
18085 bool use_sse
= false;
18086 bool vector_mode
= VECTOR_MODE_P (mode
);
18087 enum machine_mode vmode
= mode
;
18091 else if (mode
== TFmode
)
18093 else if (TARGET_SSE_MATH
)
18095 use_sse
= SSE_FLOAT_MODE_P (mode
);
18096 if (mode
== SFmode
)
18098 else if (mode
== DFmode
)
18102 /* NEG and ABS performed with SSE use bitwise mask operations.
18103 Create the appropriate mask now. */
18105 mask
= ix86_build_signbit_mask (vmode
, vector_mode
, code
== ABS
);
18112 set
= gen_rtx_fmt_e (code
, mode
, src
);
18113 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
18120 use
= gen_rtx_USE (VOIDmode
, mask
);
18122 par
= gen_rtvec (2, set
, use
);
18125 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
18126 par
= gen_rtvec (3, set
, use
, clob
);
18128 emit_insn (gen_rtx_PARALLEL (VOIDmode
, par
));
18134 /* Expand a copysign operation. Special case operand 0 being a constant. */
18137 ix86_expand_copysign (rtx operands
[])
18139 enum machine_mode mode
, vmode
;
18140 rtx dest
, op0
, op1
, mask
, nmask
;
18142 dest
= operands
[0];
18146 mode
= GET_MODE (dest
);
18148 if (mode
== SFmode
)
18150 else if (mode
== DFmode
)
18155 if (GET_CODE (op0
) == CONST_DOUBLE
)
18157 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
);
18159 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
18160 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
18162 if (mode
== SFmode
|| mode
== DFmode
)
18164 if (op0
== CONST0_RTX (mode
))
18165 op0
= CONST0_RTX (vmode
);
18168 rtx v
= ix86_build_const_vector (vmode
, false, op0
);
18170 op0
= force_reg (vmode
, v
);
18173 else if (op0
!= CONST0_RTX (mode
))
18174 op0
= force_reg (mode
, op0
);
18176 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
18178 if (mode
== SFmode
)
18179 copysign_insn
= gen_copysignsf3_const
;
18180 else if (mode
== DFmode
)
18181 copysign_insn
= gen_copysigndf3_const
;
18183 copysign_insn
= gen_copysigntf3_const
;
18185 emit_insn (copysign_insn (dest
, op0
, op1
, mask
));
18189 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
18191 nmask
= ix86_build_signbit_mask (vmode
, 0, 1);
18192 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
18194 if (mode
== SFmode
)
18195 copysign_insn
= gen_copysignsf3_var
;
18196 else if (mode
== DFmode
)
18197 copysign_insn
= gen_copysigndf3_var
;
18199 copysign_insn
= gen_copysigntf3_var
;
18201 emit_insn (copysign_insn (dest
, NULL_RTX
, op0
, op1
, nmask
, mask
));
18205 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
18206 be a constant, and so has already been expanded into a vector constant. */
18209 ix86_split_copysign_const (rtx operands
[])
18211 enum machine_mode mode
, vmode
;
18212 rtx dest
, op0
, mask
, x
;
18214 dest
= operands
[0];
18216 mask
= operands
[3];
18218 mode
= GET_MODE (dest
);
18219 vmode
= GET_MODE (mask
);
18221 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
18222 x
= gen_rtx_AND (vmode
, dest
, mask
);
18223 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18225 if (op0
!= CONST0_RTX (vmode
))
18227 x
= gen_rtx_IOR (vmode
, dest
, op0
);
18228 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18232 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
18233 so we have to do two masks. */
18236 ix86_split_copysign_var (rtx operands
[])
18238 enum machine_mode mode
, vmode
;
18239 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
18241 dest
= operands
[0];
18242 scratch
= operands
[1];
18245 nmask
= operands
[4];
18246 mask
= operands
[5];
18248 mode
= GET_MODE (dest
);
18249 vmode
= GET_MODE (mask
);
18251 if (rtx_equal_p (op0
, op1
))
18253 /* Shouldn't happen often (it's useless, obviously), but when it does
18254 we'd generate incorrect code if we continue below. */
18255 emit_move_insn (dest
, op0
);
18259 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
18261 gcc_assert (REGNO (op1
) == REGNO (scratch
));
18263 x
= gen_rtx_AND (vmode
, scratch
, mask
);
18264 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
18267 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
18268 x
= gen_rtx_NOT (vmode
, dest
);
18269 x
= gen_rtx_AND (vmode
, x
, op0
);
18270 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18274 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
18276 x
= gen_rtx_AND (vmode
, scratch
, mask
);
18278 else /* alternative 2,4 */
18280 gcc_assert (REGNO (mask
) == REGNO (scratch
));
18281 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
18282 x
= gen_rtx_AND (vmode
, scratch
, op1
);
18284 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
18286 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
18288 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
18289 x
= gen_rtx_AND (vmode
, dest
, nmask
);
18291 else /* alternative 3,4 */
18293 gcc_assert (REGNO (nmask
) == REGNO (dest
));
18295 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
18296 x
= gen_rtx_AND (vmode
, dest
, op0
);
18298 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18301 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
18302 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18305 /* Return TRUE or FALSE depending on whether the first SET in INSN
18306 has source and destination with matching CC modes, and that the
18307 CC mode is at least as constrained as REQ_MODE. */
18310 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
18313 enum machine_mode set_mode
;
18315 set
= PATTERN (insn
);
18316 if (GET_CODE (set
) == PARALLEL
)
18317 set
= XVECEXP (set
, 0, 0);
18318 gcc_assert (GET_CODE (set
) == SET
);
18319 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
18321 set_mode
= GET_MODE (SET_DEST (set
));
18325 if (req_mode
!= CCNOmode
18326 && (req_mode
!= CCmode
18327 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
18331 if (req_mode
== CCGCmode
)
18335 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
18339 if (req_mode
== CCZmode
)
18349 if (set_mode
!= req_mode
)
18354 gcc_unreachable ();
18357 return GET_MODE (SET_SRC (set
)) == set_mode
;
18360 /* Generate insn patterns to do an integer compare of OPERANDS. */
18363 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
18365 enum machine_mode cmpmode
;
18368 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
18369 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
18371 /* This is very simple, but making the interface the same as in the
18372 FP case makes the rest of the code easier. */
18373 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
18374 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
18376 /* Return the test that should be put into the flags user, i.e.
18377 the bcc, scc, or cmov instruction. */
18378 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
18381 /* Figure out whether to use ordered or unordered fp comparisons.
18382 Return the appropriate mode to use. */
18385 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
18387 /* ??? In order to make all comparisons reversible, we do all comparisons
18388 non-trapping when compiling for IEEE. Once gcc is able to distinguish
18389 all forms trapping and nontrapping comparisons, we can make inequality
18390 comparisons trapping again, since it results in better code when using
18391 FCOM based compares. */
18392 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
18396 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
18398 enum machine_mode mode
= GET_MODE (op0
);
18400 if (SCALAR_FLOAT_MODE_P (mode
))
18402 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
18403 return ix86_fp_compare_mode (code
);
18408 /* Only zero flag is needed. */
18409 case EQ
: /* ZF=0 */
18410 case NE
: /* ZF!=0 */
18412 /* Codes needing carry flag. */
18413 case GEU
: /* CF=0 */
18414 case LTU
: /* CF=1 */
18415 /* Detect overflow checks. They need just the carry flag. */
18416 if (GET_CODE (op0
) == PLUS
18417 && rtx_equal_p (op1
, XEXP (op0
, 0)))
18421 case GTU
: /* CF=0 & ZF=0 */
18422 case LEU
: /* CF=1 | ZF=1 */
18423 /* Detect overflow checks. They need just the carry flag. */
18424 if (GET_CODE (op0
) == MINUS
18425 && rtx_equal_p (op1
, XEXP (op0
, 0)))
18429 /* Codes possibly doable only with sign flag when
18430 comparing against zero. */
18431 case GE
: /* SF=OF or SF=0 */
18432 case LT
: /* SF<>OF or SF=1 */
18433 if (op1
== const0_rtx
)
18436 /* For other cases Carry flag is not required. */
18438 /* Codes doable only with sign flag when comparing
18439 against zero, but we miss jump instruction for it
18440 so we need to use relational tests against overflow
18441 that thus needs to be zero. */
18442 case GT
: /* ZF=0 & SF=OF */
18443 case LE
: /* ZF=1 | SF<>OF */
18444 if (op1
== const0_rtx
)
18448 /* strcmp pattern do (use flags) and combine may ask us for proper
18453 gcc_unreachable ();
18457 /* Return the fixed registers used for condition codes. */
18460 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
18467 /* If two condition code modes are compatible, return a condition code
18468 mode which is compatible with both. Otherwise, return
18471 static enum machine_mode
18472 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
18477 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
18480 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
18481 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
18484 if (m1
== CCZmode
&& (m2
== CCGCmode
|| m2
== CCGOCmode
))
18486 else if (m2
== CCZmode
&& (m1
== CCGCmode
|| m1
== CCGOCmode
))
18492 gcc_unreachable ();
18522 /* These are only compatible with themselves, which we already
18529 /* Return a comparison we can do and that it is equivalent to
18530 swap_condition (code) apart possibly from orderedness.
18531 But, never change orderedness if TARGET_IEEE_FP, returning
18532 UNKNOWN in that case if necessary. */
18534 static enum rtx_code
18535 ix86_fp_swap_condition (enum rtx_code code
)
18539 case GT
: /* GTU - CF=0 & ZF=0 */
18540 return TARGET_IEEE_FP
? UNKNOWN
: UNLT
;
18541 case GE
: /* GEU - CF=0 */
18542 return TARGET_IEEE_FP
? UNKNOWN
: UNLE
;
18543 case UNLT
: /* LTU - CF=1 */
18544 return TARGET_IEEE_FP
? UNKNOWN
: GT
;
18545 case UNLE
: /* LEU - CF=1 | ZF=1 */
18546 return TARGET_IEEE_FP
? UNKNOWN
: GE
;
18548 return swap_condition (code
);
18552 /* Return cost of comparison CODE using the best strategy for performance.
18553 All following functions do use number of instructions as a cost metrics.
18554 In future this should be tweaked to compute bytes for optimize_size and
18555 take into account performance of various instructions on various CPUs. */
18558 ix86_fp_comparison_cost (enum rtx_code code
)
18562 /* The cost of code using bit-twiddling on %ah. */
18579 arith_cost
= TARGET_IEEE_FP
? 5 : 4;
18583 arith_cost
= TARGET_IEEE_FP
? 6 : 4;
18586 gcc_unreachable ();
18589 switch (ix86_fp_comparison_strategy (code
))
18591 case IX86_FPCMP_COMI
:
18592 return arith_cost
> 4 ? 3 : 2;
18593 case IX86_FPCMP_SAHF
:
18594 return arith_cost
> 4 ? 4 : 3;
18600 /* Return strategy to use for floating-point. We assume that fcomi is always
18601 preferrable where available, since that is also true when looking at size
18602 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
18604 enum ix86_fpcmp_strategy
18605 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED
)
18607 /* Do fcomi/sahf based test when profitable. */
18610 return IX86_FPCMP_COMI
;
18612 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_function_for_size_p (cfun
)))
18613 return IX86_FPCMP_SAHF
;
18615 return IX86_FPCMP_ARITH
;
18618 /* Swap, force into registers, or otherwise massage the two operands
18619 to a fp comparison. The operands are updated in place; the new
18620 comparison code is returned. */
18622 static enum rtx_code
18623 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
18625 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
18626 rtx op0
= *pop0
, op1
= *pop1
;
18627 enum machine_mode op_mode
= GET_MODE (op0
);
18628 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
18630 /* All of the unordered compare instructions only work on registers.
18631 The same is true of the fcomi compare instructions. The XFmode
18632 compare instructions require registers except when comparing
18633 against zero or when converting operand 1 from fixed point to
18637 && (fpcmp_mode
== CCFPUmode
18638 || (op_mode
== XFmode
18639 && ! (standard_80387_constant_p (op0
) == 1
18640 || standard_80387_constant_p (op1
) == 1)
18641 && GET_CODE (op1
) != FLOAT
)
18642 || ix86_fp_comparison_strategy (code
) == IX86_FPCMP_COMI
))
18644 op0
= force_reg (op_mode
, op0
);
18645 op1
= force_reg (op_mode
, op1
);
18649 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
18650 things around if they appear profitable, otherwise force op0
18651 into a register. */
18653 if (standard_80387_constant_p (op0
) == 0
18655 && ! (standard_80387_constant_p (op1
) == 0
18658 enum rtx_code new_code
= ix86_fp_swap_condition (code
);
18659 if (new_code
!= UNKNOWN
)
18662 tmp
= op0
, op0
= op1
, op1
= tmp
;
18668 op0
= force_reg (op_mode
, op0
);
18670 if (CONSTANT_P (op1
))
18672 int tmp
= standard_80387_constant_p (op1
);
18674 op1
= validize_mem (force_const_mem (op_mode
, op1
));
18678 op1
= force_reg (op_mode
, op1
);
18681 op1
= force_reg (op_mode
, op1
);
18685 /* Try to rearrange the comparison to make it cheaper. */
18686 if (ix86_fp_comparison_cost (code
)
18687 > ix86_fp_comparison_cost (swap_condition (code
))
18688 && (REG_P (op1
) || can_create_pseudo_p ()))
18691 tmp
= op0
, op0
= op1
, op1
= tmp
;
18692 code
= swap_condition (code
);
18694 op0
= force_reg (op_mode
, op0
);
18702 /* Convert comparison codes we use to represent FP comparison to integer
18703 code that will result in proper branch. Return UNKNOWN if no such code
18707 ix86_fp_compare_code_to_integer (enum rtx_code code
)
18736 /* Generate insn patterns to do a floating point compare of OPERANDS. */
18739 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
)
18741 enum machine_mode fpcmp_mode
, intcmp_mode
;
18744 fpcmp_mode
= ix86_fp_compare_mode (code
);
18745 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
18747 /* Do fcomi/sahf based test when profitable. */
18748 switch (ix86_fp_comparison_strategy (code
))
18750 case IX86_FPCMP_COMI
:
18751 intcmp_mode
= fpcmp_mode
;
18752 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18753 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
18758 case IX86_FPCMP_SAHF
:
18759 intcmp_mode
= fpcmp_mode
;
18760 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18761 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
18765 scratch
= gen_reg_rtx (HImode
);
18766 tmp2
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
18767 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, tmp2
)));
18770 case IX86_FPCMP_ARITH
:
18771 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
18772 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18773 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
18775 scratch
= gen_reg_rtx (HImode
);
18776 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
18778 /* In the unordered case, we have to check C2 for NaN's, which
18779 doesn't happen to work out to anything nice combination-wise.
18780 So do some bit twiddling on the value we've got in AH to come
18781 up with an appropriate set of condition codes. */
18783 intcmp_mode
= CCNOmode
;
18788 if (code
== GT
|| !TARGET_IEEE_FP
)
18790 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
18795 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18796 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
18797 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
18798 intcmp_mode
= CCmode
;
18804 if (code
== LT
&& TARGET_IEEE_FP
)
18806 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18807 emit_insn (gen_cmpqi_ext_3 (scratch
, const1_rtx
));
18808 intcmp_mode
= CCmode
;
18813 emit_insn (gen_testqi_ext_ccno_0 (scratch
, const1_rtx
));
18819 if (code
== GE
|| !TARGET_IEEE_FP
)
18821 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
18826 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18827 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
, const1_rtx
));
18833 if (code
== LE
&& TARGET_IEEE_FP
)
18835 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18836 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
18837 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
18838 intcmp_mode
= CCmode
;
18843 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
18849 if (code
== EQ
&& TARGET_IEEE_FP
)
18851 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18852 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
18853 intcmp_mode
= CCmode
;
18858 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
18864 if (code
== NE
&& TARGET_IEEE_FP
)
18866 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18867 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
18873 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
18879 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
18883 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
18888 gcc_unreachable ();
18896 /* Return the test that should be put into the flags user, i.e.
18897 the bcc, scc, or cmov instruction. */
18898 return gen_rtx_fmt_ee (code
, VOIDmode
,
18899 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
18904 ix86_expand_compare (enum rtx_code code
, rtx op0
, rtx op1
)
18908 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
)
18909 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, op0
, op1
);
18911 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
18913 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0
)));
18914 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
18917 ret
= ix86_expand_int_compare (code
, op0
, op1
);
18923 ix86_expand_branch (enum rtx_code code
, rtx op0
, rtx op1
, rtx label
)
18925 enum machine_mode mode
= GET_MODE (op0
);
18937 tmp
= ix86_expand_compare (code
, op0
, op1
);
18938 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
18939 gen_rtx_LABEL_REF (VOIDmode
, label
),
18941 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
18948 /* Expand DImode branch into multiple compare+branch. */
18950 rtx lo
[2], hi
[2], label2
;
18951 enum rtx_code code1
, code2
, code3
;
18952 enum machine_mode submode
;
18954 if (CONSTANT_P (op0
) && !CONSTANT_P (op1
))
18956 tmp
= op0
, op0
= op1
, op1
= tmp
;
18957 code
= swap_condition (code
);
18960 split_double_mode (mode
, &op0
, 1, lo
+0, hi
+0);
18961 split_double_mode (mode
, &op1
, 1, lo
+1, hi
+1);
18963 submode
= mode
== DImode
? SImode
: DImode
;
18965 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
18966 avoid two branches. This costs one extra insn, so disable when
18967 optimizing for size. */
18969 if ((code
== EQ
|| code
== NE
)
18970 && (!optimize_insn_for_size_p ()
18971 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
18976 if (hi
[1] != const0_rtx
)
18977 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
18978 NULL_RTX
, 0, OPTAB_WIDEN
);
18981 if (lo
[1] != const0_rtx
)
18982 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
18983 NULL_RTX
, 0, OPTAB_WIDEN
);
18985 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
18986 NULL_RTX
, 0, OPTAB_WIDEN
);
18988 ix86_expand_branch (code
, tmp
, const0_rtx
, label
);
18992 /* Otherwise, if we are doing less-than or greater-or-equal-than,
18993 op1 is a constant and the low word is zero, then we can just
18994 examine the high word. Similarly for low word -1 and
18995 less-or-equal-than or greater-than. */
18997 if (CONST_INT_P (hi
[1]))
19000 case LT
: case LTU
: case GE
: case GEU
:
19001 if (lo
[1] == const0_rtx
)
19003 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
19007 case LE
: case LEU
: case GT
: case GTU
:
19008 if (lo
[1] == constm1_rtx
)
19010 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
19018 /* Otherwise, we need two or three jumps. */
19020 label2
= gen_label_rtx ();
19023 code2
= swap_condition (code
);
19024 code3
= unsigned_condition (code
);
19028 case LT
: case GT
: case LTU
: case GTU
:
19031 case LE
: code1
= LT
; code2
= GT
; break;
19032 case GE
: code1
= GT
; code2
= LT
; break;
19033 case LEU
: code1
= LTU
; code2
= GTU
; break;
19034 case GEU
: code1
= GTU
; code2
= LTU
; break;
19036 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
19037 case NE
: code2
= UNKNOWN
; break;
19040 gcc_unreachable ();
19045 * if (hi(a) < hi(b)) goto true;
19046 * if (hi(a) > hi(b)) goto false;
19047 * if (lo(a) < lo(b)) goto true;
19051 if (code1
!= UNKNOWN
)
19052 ix86_expand_branch (code1
, hi
[0], hi
[1], label
);
19053 if (code2
!= UNKNOWN
)
19054 ix86_expand_branch (code2
, hi
[0], hi
[1], label2
);
19056 ix86_expand_branch (code3
, lo
[0], lo
[1], label
);
19058 if (code2
!= UNKNOWN
)
19059 emit_label (label2
);
19064 gcc_assert (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
);
19069 /* Split branch based on floating point condition. */
19071 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
19072 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
19077 if (target2
!= pc_rtx
)
19080 code
= reverse_condition_maybe_unordered (code
);
19085 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
19088 /* Remove pushed operand from stack. */
19090 ix86_free_from_memory (GET_MODE (pushed
));
19092 i
= emit_jump_insn (gen_rtx_SET
19094 gen_rtx_IF_THEN_ELSE (VOIDmode
,
19095 condition
, target1
, target2
)));
19096 if (split_branch_probability
>= 0)
19097 add_reg_note (i
, REG_BR_PROB
, GEN_INT (split_branch_probability
));
19101 ix86_expand_setcc (rtx dest
, enum rtx_code code
, rtx op0
, rtx op1
)
19105 gcc_assert (GET_MODE (dest
) == QImode
);
19107 ret
= ix86_expand_compare (code
, op0
, op1
);
19108 PUT_MODE (ret
, QImode
);
19109 emit_insn (gen_rtx_SET (VOIDmode
, dest
, ret
));
19112 /* Expand comparison setting or clearing carry flag. Return true when
19113 successful and set pop for the operation. */
19115 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
19117 enum machine_mode mode
=
19118 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
19120 /* Do not handle double-mode compares that go through special path. */
19121 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
19124 if (SCALAR_FLOAT_MODE_P (mode
))
19126 rtx compare_op
, compare_seq
;
19128 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
19130 /* Shortcut: following common codes never translate
19131 into carry flag compares. */
19132 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
19133 || code
== ORDERED
|| code
== UNORDERED
)
19136 /* These comparisons require zero flag; swap operands so they won't. */
19137 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
19138 && !TARGET_IEEE_FP
)
19143 code
= swap_condition (code
);
19146 /* Try to expand the comparison and verify that we end up with
19147 carry flag based comparison. This fails to be true only when
19148 we decide to expand comparison using arithmetic that is not
19149 too common scenario. */
19151 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
19152 compare_seq
= get_insns ();
19155 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
19156 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
19157 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
19159 code
= GET_CODE (compare_op
);
19161 if (code
!= LTU
&& code
!= GEU
)
19164 emit_insn (compare_seq
);
19169 if (!INTEGRAL_MODE_P (mode
))
19178 /* Convert a==0 into (unsigned)a<1. */
19181 if (op1
!= const0_rtx
)
19184 code
= (code
== EQ
? LTU
: GEU
);
19187 /* Convert a>b into b<a or a>=b-1. */
19190 if (CONST_INT_P (op1
))
19192 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
19193 /* Bail out on overflow. We still can swap operands but that
19194 would force loading of the constant into register. */
19195 if (op1
== const0_rtx
19196 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
19198 code
= (code
== GTU
? GEU
: LTU
);
19205 code
= (code
== GTU
? LTU
: GEU
);
19209 /* Convert a>=0 into (unsigned)a<0x80000000. */
19212 if (mode
== DImode
|| op1
!= const0_rtx
)
19214 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
19215 code
= (code
== LT
? GEU
: LTU
);
19219 if (mode
== DImode
|| op1
!= constm1_rtx
)
19221 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
19222 code
= (code
== LE
? GEU
: LTU
);
19228 /* Swapping operands may cause constant to appear as first operand. */
19229 if (!nonimmediate_operand (op0
, VOIDmode
))
19231 if (!can_create_pseudo_p ())
19233 op0
= force_reg (mode
, op0
);
19235 *pop
= ix86_expand_compare (code
, op0
, op1
);
19236 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
19241 ix86_expand_int_movcc (rtx operands
[])
19243 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
19244 rtx compare_seq
, compare_op
;
19245 enum machine_mode mode
= GET_MODE (operands
[0]);
19246 bool sign_bit_compare_p
= false;
19247 rtx op0
= XEXP (operands
[1], 0);
19248 rtx op1
= XEXP (operands
[1], 1);
19250 if (GET_MODE (op0
) == TImode
19251 || (GET_MODE (op0
) == DImode
19256 compare_op
= ix86_expand_compare (code
, op0
, op1
);
19257 compare_seq
= get_insns ();
19260 compare_code
= GET_CODE (compare_op
);
19262 if ((op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
19263 || (op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
19264 sign_bit_compare_p
= true;
19266 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
19267 HImode insns, we'd be swallowed in word prefix ops. */
19269 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
19270 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
19271 && CONST_INT_P (operands
[2])
19272 && CONST_INT_P (operands
[3]))
19274 rtx out
= operands
[0];
19275 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
19276 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
19277 HOST_WIDE_INT diff
;
19280 /* Sign bit compares are better done using shifts than we do by using
19282 if (sign_bit_compare_p
19283 || ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
19285 /* Detect overlap between destination and compare sources. */
19288 if (!sign_bit_compare_p
)
19291 bool fpcmp
= false;
19293 compare_code
= GET_CODE (compare_op
);
19295 flags
= XEXP (compare_op
, 0);
19297 if (GET_MODE (flags
) == CCFPmode
19298 || GET_MODE (flags
) == CCFPUmode
)
19302 = ix86_fp_compare_code_to_integer (compare_code
);
19305 /* To simplify rest of code, restrict to the GEU case. */
19306 if (compare_code
== LTU
)
19308 HOST_WIDE_INT tmp
= ct
;
19311 compare_code
= reverse_condition (compare_code
);
19312 code
= reverse_condition (code
);
19317 PUT_CODE (compare_op
,
19318 reverse_condition_maybe_unordered
19319 (GET_CODE (compare_op
)));
19321 PUT_CODE (compare_op
,
19322 reverse_condition (GET_CODE (compare_op
)));
19326 if (reg_overlap_mentioned_p (out
, op0
)
19327 || reg_overlap_mentioned_p (out
, op1
))
19328 tmp
= gen_reg_rtx (mode
);
19330 if (mode
== DImode
)
19331 emit_insn (gen_x86_movdicc_0_m1 (tmp
, flags
, compare_op
));
19333 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
),
19334 flags
, compare_op
));
19338 if (code
== GT
|| code
== GE
)
19339 code
= reverse_condition (code
);
19342 HOST_WIDE_INT tmp
= ct
;
19347 tmp
= emit_store_flag (tmp
, code
, op0
, op1
, VOIDmode
, 0, -1);
19360 tmp
= expand_simple_binop (mode
, PLUS
,
19362 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19373 tmp
= expand_simple_binop (mode
, IOR
,
19375 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19377 else if (diff
== -1 && ct
)
19387 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
19389 tmp
= expand_simple_binop (mode
, PLUS
,
19390 copy_rtx (tmp
), GEN_INT (cf
),
19391 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19399 * andl cf - ct, dest
19409 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
19412 tmp
= expand_simple_binop (mode
, AND
,
19414 gen_int_mode (cf
- ct
, mode
),
19415 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19417 tmp
= expand_simple_binop (mode
, PLUS
,
19418 copy_rtx (tmp
), GEN_INT (ct
),
19419 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19422 if (!rtx_equal_p (tmp
, out
))
19423 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
19430 enum machine_mode cmp_mode
= GET_MODE (op0
);
19433 tmp
= ct
, ct
= cf
, cf
= tmp
;
19436 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
19438 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
19440 /* We may be reversing unordered compare to normal compare, that
19441 is not valid in general (we may convert non-trapping condition
19442 to trapping one), however on i386 we currently emit all
19443 comparisons unordered. */
19444 compare_code
= reverse_condition_maybe_unordered (compare_code
);
19445 code
= reverse_condition_maybe_unordered (code
);
19449 compare_code
= reverse_condition (compare_code
);
19450 code
= reverse_condition (code
);
19454 compare_code
= UNKNOWN
;
19455 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
19456 && CONST_INT_P (op1
))
19458 if (op1
== const0_rtx
19459 && (code
== LT
|| code
== GE
))
19460 compare_code
= code
;
19461 else if (op1
== constm1_rtx
)
19465 else if (code
== GT
)
19470 /* Optimize dest = (op0 < 0) ? -1 : cf. */
19471 if (compare_code
!= UNKNOWN
19472 && GET_MODE (op0
) == GET_MODE (out
)
19473 && (cf
== -1 || ct
== -1))
19475 /* If lea code below could be used, only optimize
19476 if it results in a 2 insn sequence. */
19478 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
19479 || diff
== 3 || diff
== 5 || diff
== 9)
19480 || (compare_code
== LT
&& ct
== -1)
19481 || (compare_code
== GE
&& cf
== -1))
19484 * notl op1 (if necessary)
19492 code
= reverse_condition (code
);
19495 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
19497 out
= expand_simple_binop (mode
, IOR
,
19499 out
, 1, OPTAB_DIRECT
);
19500 if (out
!= operands
[0])
19501 emit_move_insn (operands
[0], out
);
19508 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
19509 || diff
== 3 || diff
== 5 || diff
== 9)
19510 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
19512 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
19518 * lea cf(dest*(ct-cf)),dest
19522 * This also catches the degenerate setcc-only case.
19528 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
19531 /* On x86_64 the lea instruction operates on Pmode, so we need
19532 to get arithmetics done in proper mode to match. */
19534 tmp
= copy_rtx (out
);
19538 out1
= copy_rtx (out
);
19539 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
19543 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
19549 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
19552 if (!rtx_equal_p (tmp
, out
))
19555 out
= force_operand (tmp
, copy_rtx (out
));
19557 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
19559 if (!rtx_equal_p (out
, operands
[0]))
19560 emit_move_insn (operands
[0], copy_rtx (out
));
19566 * General case: Jumpful:
19567 * xorl dest,dest cmpl op1, op2
19568 * cmpl op1, op2 movl ct, dest
19569 * setcc dest jcc 1f
19570 * decl dest movl cf, dest
19571 * andl (cf-ct),dest 1:
19574 * Size 20. Size 14.
19576 * This is reasonably steep, but branch mispredict costs are
19577 * high on modern cpus, so consider failing only if optimizing
19581 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
19582 && BRANCH_COST (optimize_insn_for_speed_p (),
19587 enum machine_mode cmp_mode
= GET_MODE (op0
);
19592 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
19594 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
19596 /* We may be reversing unordered compare to normal compare,
19597 that is not valid in general (we may convert non-trapping
19598 condition to trapping one), however on i386 we currently
19599 emit all comparisons unordered. */
19600 code
= reverse_condition_maybe_unordered (code
);
19604 code
= reverse_condition (code
);
19605 if (compare_code
!= UNKNOWN
)
19606 compare_code
= reverse_condition (compare_code
);
19610 if (compare_code
!= UNKNOWN
)
19612 /* notl op1 (if needed)
19617 For x < 0 (resp. x <= -1) there will be no notl,
19618 so if possible swap the constants to get rid of the
19620 True/false will be -1/0 while code below (store flag
19621 followed by decrement) is 0/-1, so the constants need
19622 to be exchanged once more. */
19624 if (compare_code
== GE
|| !cf
)
19626 code
= reverse_condition (code
);
19631 HOST_WIDE_INT tmp
= cf
;
19636 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
19640 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
19642 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
),
19644 copy_rtx (out
), 1, OPTAB_DIRECT
);
19647 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
19648 gen_int_mode (cf
- ct
, mode
),
19649 copy_rtx (out
), 1, OPTAB_DIRECT
);
19651 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
19652 copy_rtx (out
), 1, OPTAB_DIRECT
);
19653 if (!rtx_equal_p (out
, operands
[0]))
19654 emit_move_insn (operands
[0], copy_rtx (out
));
19660 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
19662 /* Try a few things more with specific constants and a variable. */
19665 rtx var
, orig_out
, out
, tmp
;
19667 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
19670 /* If one of the two operands is an interesting constant, load a
19671 constant with the above and mask it in with a logical operation. */
19673 if (CONST_INT_P (operands
[2]))
19676 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
19677 operands
[3] = constm1_rtx
, op
= and_optab
;
19678 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
19679 operands
[3] = const0_rtx
, op
= ior_optab
;
19683 else if (CONST_INT_P (operands
[3]))
19686 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
19687 operands
[2] = constm1_rtx
, op
= and_optab
;
19688 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
19689 operands
[2] = const0_rtx
, op
= ior_optab
;
19696 orig_out
= operands
[0];
19697 tmp
= gen_reg_rtx (mode
);
19700 /* Recurse to get the constant loaded. */
19701 if (ix86_expand_int_movcc (operands
) == 0)
19704 /* Mask in the interesting variable. */
19705 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
19707 if (!rtx_equal_p (out
, orig_out
))
19708 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
19714 * For comparison with above,
19724 if (! nonimmediate_operand (operands
[2], mode
))
19725 operands
[2] = force_reg (mode
, operands
[2]);
19726 if (! nonimmediate_operand (operands
[3], mode
))
19727 operands
[3] = force_reg (mode
, operands
[3]);
19729 if (! register_operand (operands
[2], VOIDmode
)
19731 || ! register_operand (operands
[3], VOIDmode
)))
19732 operands
[2] = force_reg (mode
, operands
[2]);
19735 && ! register_operand (operands
[3], VOIDmode
))
19736 operands
[3] = force_reg (mode
, operands
[3]);
19738 emit_insn (compare_seq
);
19739 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
19740 gen_rtx_IF_THEN_ELSE (mode
,
19741 compare_op
, operands
[2],
19746 /* Swap, force into registers, or otherwise massage the two operands
19747 to an sse comparison with a mask result. Thus we differ a bit from
19748 ix86_prepare_fp_compare_args which expects to produce a flags result.
19750 The DEST operand exists to help determine whether to commute commutative
19751 operators. The POP0/POP1 operands are updated in place. The new
19752 comparison code is returned, or UNKNOWN if not implementable. */
19754 static enum rtx_code
19755 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
19756 rtx
*pop0
, rtx
*pop1
)
19764 /* AVX supports all the needed comparisons. */
19767 /* We have no LTGT as an operator. We could implement it with
19768 NE & ORDERED, but this requires an extra temporary. It's
19769 not clear that it's worth it. */
19776 /* These are supported directly. */
19783 /* AVX has 3 operand comparisons, no need to swap anything. */
19786 /* For commutative operators, try to canonicalize the destination
19787 operand to be first in the comparison - this helps reload to
19788 avoid extra moves. */
19789 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
19797 /* These are not supported directly before AVX, and furthermore
19798 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
19799 comparison operands to transform into something that is
19804 code
= swap_condition (code
);
19808 gcc_unreachable ();
19814 /* Detect conditional moves that exactly match min/max operational
19815 semantics. Note that this is IEEE safe, as long as we don't
19816 interchange the operands.
19818 Returns FALSE if this conditional move doesn't match a MIN/MAX,
19819 and TRUE if the operation is successful and instructions are emitted. */
19822 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
19823 rtx cmp_op1
, rtx if_true
, rtx if_false
)
19825 enum machine_mode mode
;
19831 else if (code
== UNGE
)
19834 if_true
= if_false
;
19840 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
19842 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
19847 mode
= GET_MODE (dest
);
19849 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
19850 but MODE may be a vector mode and thus not appropriate. */
19851 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
19853 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
19856 if_true
= force_reg (mode
, if_true
);
19857 v
= gen_rtvec (2, if_true
, if_false
);
19858 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
19862 code
= is_min
? SMIN
: SMAX
;
19863 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
19866 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
19870 /* Expand an sse vector comparison. Return the register with the result. */
19873 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
19874 rtx op_true
, rtx op_false
)
19876 enum machine_mode mode
= GET_MODE (dest
);
19877 enum machine_mode cmp_mode
= GET_MODE (cmp_op0
);
19880 cmp_op0
= force_reg (cmp_mode
, cmp_op0
);
19881 if (!nonimmediate_operand (cmp_op1
, cmp_mode
))
19882 cmp_op1
= force_reg (cmp_mode
, cmp_op1
);
19885 || reg_overlap_mentioned_p (dest
, op_true
)
19886 || reg_overlap_mentioned_p (dest
, op_false
))
19887 dest
= gen_reg_rtx (mode
);
19889 x
= gen_rtx_fmt_ee (code
, cmp_mode
, cmp_op0
, cmp_op1
);
19890 if (cmp_mode
!= mode
)
19892 x
= force_reg (cmp_mode
, x
);
19893 convert_move (dest
, x
, false);
19896 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19901 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
19902 operations. This is used for both scalar and vector conditional moves. */
19905 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
19907 enum machine_mode mode
= GET_MODE (dest
);
19910 if (vector_all_ones_operand (op_true
, mode
)
19911 && rtx_equal_p (op_false
, CONST0_RTX (mode
)))
19913 emit_insn (gen_rtx_SET (VOIDmode
, dest
, cmp
));
19915 else if (op_false
== CONST0_RTX (mode
))
19917 op_true
= force_reg (mode
, op_true
);
19918 x
= gen_rtx_AND (mode
, cmp
, op_true
);
19919 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19921 else if (op_true
== CONST0_RTX (mode
))
19923 op_false
= force_reg (mode
, op_false
);
19924 x
= gen_rtx_NOT (mode
, cmp
);
19925 x
= gen_rtx_AND (mode
, x
, op_false
);
19926 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19928 else if (INTEGRAL_MODE_P (mode
) && op_true
== CONSTM1_RTX (mode
))
19930 op_false
= force_reg (mode
, op_false
);
19931 x
= gen_rtx_IOR (mode
, cmp
, op_false
);
19932 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19934 else if (TARGET_XOP
)
19936 op_true
= force_reg (mode
, op_true
);
19938 if (!nonimmediate_operand (op_false
, mode
))
19939 op_false
= force_reg (mode
, op_false
);
19941 emit_insn (gen_rtx_SET (mode
, dest
,
19942 gen_rtx_IF_THEN_ELSE (mode
, cmp
,
19948 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
19950 if (!nonimmediate_operand (op_true
, mode
))
19951 op_true
= force_reg (mode
, op_true
);
19953 op_false
= force_reg (mode
, op_false
);
19959 gen
= gen_sse4_1_blendvps
;
19963 gen
= gen_sse4_1_blendvpd
;
19971 gen
= gen_sse4_1_pblendvb
;
19972 dest
= gen_lowpart (V16QImode
, dest
);
19973 op_false
= gen_lowpart (V16QImode
, op_false
);
19974 op_true
= gen_lowpart (V16QImode
, op_true
);
19975 cmp
= gen_lowpart (V16QImode
, cmp
);
19980 gen
= gen_avx_blendvps256
;
19984 gen
= gen_avx_blendvpd256
;
19992 gen
= gen_avx2_pblendvb
;
19993 dest
= gen_lowpart (V32QImode
, dest
);
19994 op_false
= gen_lowpart (V32QImode
, op_false
);
19995 op_true
= gen_lowpart (V32QImode
, op_true
);
19996 cmp
= gen_lowpart (V32QImode
, cmp
);
20004 emit_insn (gen (dest
, op_false
, op_true
, cmp
));
20007 op_true
= force_reg (mode
, op_true
);
20009 t2
= gen_reg_rtx (mode
);
20011 t3
= gen_reg_rtx (mode
);
20015 x
= gen_rtx_AND (mode
, op_true
, cmp
);
20016 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
20018 x
= gen_rtx_NOT (mode
, cmp
);
20019 x
= gen_rtx_AND (mode
, x
, op_false
);
20020 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
20022 x
= gen_rtx_IOR (mode
, t3
, t2
);
20023 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20028 /* Expand a floating-point conditional move. Return true if successful. */
20031 ix86_expand_fp_movcc (rtx operands
[])
20033 enum machine_mode mode
= GET_MODE (operands
[0]);
20034 enum rtx_code code
= GET_CODE (operands
[1]);
20035 rtx tmp
, compare_op
;
20036 rtx op0
= XEXP (operands
[1], 0);
20037 rtx op1
= XEXP (operands
[1], 1);
20039 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
20041 enum machine_mode cmode
;
20043 /* Since we've no cmove for sse registers, don't force bad register
20044 allocation just to gain access to it. Deny movcc when the
20045 comparison mode doesn't match the move mode. */
20046 cmode
= GET_MODE (op0
);
20047 if (cmode
== VOIDmode
)
20048 cmode
= GET_MODE (op1
);
20052 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
, &op0
, &op1
);
20053 if (code
== UNKNOWN
)
20056 if (ix86_expand_sse_fp_minmax (operands
[0], code
, op0
, op1
,
20057 operands
[2], operands
[3]))
20060 tmp
= ix86_expand_sse_cmp (operands
[0], code
, op0
, op1
,
20061 operands
[2], operands
[3]);
20062 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
20066 if (GET_MODE (op0
) == TImode
20067 || (GET_MODE (op0
) == DImode
20071 /* The floating point conditional move instructions don't directly
20072 support conditions resulting from a signed integer comparison. */
20074 compare_op
= ix86_expand_compare (code
, op0
, op1
);
20075 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
20077 tmp
= gen_reg_rtx (QImode
);
20078 ix86_expand_setcc (tmp
, code
, op0
, op1
);
20080 compare_op
= ix86_expand_compare (NE
, tmp
, const0_rtx
);
20083 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
20084 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
20085 operands
[2], operands
[3])));
20090 /* Expand a floating-point vector conditional move; a vcond operation
20091 rather than a movcc operation. */
20094 ix86_expand_fp_vcond (rtx operands
[])
20096 enum rtx_code code
= GET_CODE (operands
[3]);
20099 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
20100 &operands
[4], &operands
[5]);
20101 if (code
== UNKNOWN
)
20104 switch (GET_CODE (operands
[3]))
20107 temp
= ix86_expand_sse_cmp (operands
[0], ORDERED
, operands
[4],
20108 operands
[5], operands
[0], operands
[0]);
20109 cmp
= ix86_expand_sse_cmp (operands
[0], NE
, operands
[4],
20110 operands
[5], operands
[1], operands
[2]);
20114 temp
= ix86_expand_sse_cmp (operands
[0], UNORDERED
, operands
[4],
20115 operands
[5], operands
[0], operands
[0]);
20116 cmp
= ix86_expand_sse_cmp (operands
[0], EQ
, operands
[4],
20117 operands
[5], operands
[1], operands
[2]);
20121 gcc_unreachable ();
20123 cmp
= expand_simple_binop (GET_MODE (cmp
), code
, temp
, cmp
, cmp
, 1,
20125 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
20129 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
20130 operands
[5], operands
[1], operands
[2]))
20133 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
20134 operands
[1], operands
[2]);
20135 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
20139 /* Expand a signed/unsigned integral vector conditional move. */
20142 ix86_expand_int_vcond (rtx operands
[])
20144 enum machine_mode data_mode
= GET_MODE (operands
[0]);
20145 enum machine_mode mode
= GET_MODE (operands
[4]);
20146 enum rtx_code code
= GET_CODE (operands
[3]);
20147 bool negate
= false;
20150 cop0
= operands
[4];
20151 cop1
= operands
[5];
20153 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
20154 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
20155 if ((code
== LT
|| code
== GE
)
20156 && data_mode
== mode
20157 && cop1
== CONST0_RTX (mode
)
20158 && operands
[1 + (code
== LT
)] == CONST0_RTX (data_mode
)
20159 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) > 1
20160 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) <= 8
20161 && (GET_MODE_SIZE (data_mode
) == 16
20162 || (TARGET_AVX2
&& GET_MODE_SIZE (data_mode
) == 32)))
20164 rtx negop
= operands
[2 - (code
== LT
)];
20165 int shift
= GET_MODE_BITSIZE (GET_MODE_INNER (data_mode
)) - 1;
20166 if (negop
== CONST1_RTX (data_mode
))
20168 rtx res
= expand_simple_binop (mode
, LSHIFTRT
, cop0
, GEN_INT (shift
),
20169 operands
[0], 1, OPTAB_DIRECT
);
20170 if (res
!= operands
[0])
20171 emit_move_insn (operands
[0], res
);
20174 else if (GET_MODE_INNER (data_mode
) != DImode
20175 && vector_all_ones_operand (negop
, data_mode
))
20177 rtx res
= expand_simple_binop (mode
, ASHIFTRT
, cop0
, GEN_INT (shift
),
20178 operands
[0], 0, OPTAB_DIRECT
);
20179 if (res
!= operands
[0])
20180 emit_move_insn (operands
[0], res
);
20185 if (!nonimmediate_operand (cop1
, mode
))
20186 cop1
= force_reg (mode
, cop1
);
20187 if (!general_operand (operands
[1], data_mode
))
20188 operands
[1] = force_reg (data_mode
, operands
[1]);
20189 if (!general_operand (operands
[2], data_mode
))
20190 operands
[2] = force_reg (data_mode
, operands
[2]);
20192 /* XOP supports all of the comparisons on all 128-bit vector int types. */
20194 && (mode
== V16QImode
|| mode
== V8HImode
20195 || mode
== V4SImode
|| mode
== V2DImode
))
20199 /* Canonicalize the comparison to EQ, GT, GTU. */
20210 code
= reverse_condition (code
);
20216 code
= reverse_condition (code
);
20222 code
= swap_condition (code
);
20223 x
= cop0
, cop0
= cop1
, cop1
= x
;
20227 gcc_unreachable ();
20230 /* Only SSE4.1/SSE4.2 supports V2DImode. */
20231 if (mode
== V2DImode
)
20236 /* SSE4.1 supports EQ. */
20237 if (!TARGET_SSE4_1
)
20243 /* SSE4.2 supports GT/GTU. */
20244 if (!TARGET_SSE4_2
)
20249 gcc_unreachable ();
20253 /* Unsigned parallel compare is not supported by the hardware.
20254 Play some tricks to turn this into a signed comparison
20258 cop0
= force_reg (mode
, cop0
);
20268 rtx (*gen_sub3
) (rtx
, rtx
, rtx
);
20272 case V8SImode
: gen_sub3
= gen_subv8si3
; break;
20273 case V4DImode
: gen_sub3
= gen_subv4di3
; break;
20274 case V4SImode
: gen_sub3
= gen_subv4si3
; break;
20275 case V2DImode
: gen_sub3
= gen_subv2di3
; break;
20277 gcc_unreachable ();
20279 /* Subtract (-(INT MAX) - 1) from both operands to make
20281 mask
= ix86_build_signbit_mask (mode
, true, false);
20282 t1
= gen_reg_rtx (mode
);
20283 emit_insn (gen_sub3 (t1
, cop0
, mask
));
20285 t2
= gen_reg_rtx (mode
);
20286 emit_insn (gen_sub3 (t2
, cop1
, mask
));
20298 /* Perform a parallel unsigned saturating subtraction. */
20299 x
= gen_reg_rtx (mode
);
20300 emit_insn (gen_rtx_SET (VOIDmode
, x
,
20301 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
20304 cop1
= CONST0_RTX (mode
);
20310 gcc_unreachable ();
20315 /* Allow the comparison to be done in one mode, but the movcc to
20316 happen in another mode. */
20317 if (data_mode
== mode
)
20319 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
20320 operands
[1+negate
], operands
[2-negate
]);
20324 gcc_assert (GET_MODE_SIZE (data_mode
) == GET_MODE_SIZE (mode
));
20325 x
= ix86_expand_sse_cmp (gen_lowpart (mode
, operands
[0]),
20327 operands
[1+negate
], operands
[2-negate
]);
20328 x
= gen_lowpart (data_mode
, x
);
20331 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
20332 operands
[2-negate
]);
20336 /* Expand a variable vector permutation. */
20339 ix86_expand_vec_perm (rtx operands
[])
20341 rtx target
= operands
[0];
20342 rtx op0
= operands
[1];
20343 rtx op1
= operands
[2];
20344 rtx mask
= operands
[3];
20345 rtx t1
, t2
, t3
, t4
, vt
, vt2
, vec
[32];
20346 enum machine_mode mode
= GET_MODE (op0
);
20347 enum machine_mode maskmode
= GET_MODE (mask
);
20349 bool one_operand_shuffle
= rtx_equal_p (op0
, op1
);
20351 /* Number of elements in the vector. */
20352 w
= GET_MODE_NUNITS (mode
);
20353 e
= GET_MODE_UNIT_SIZE (mode
);
20354 gcc_assert (w
<= 32);
20358 if (mode
== V4DImode
|| mode
== V4DFmode
|| mode
== V16HImode
)
20360 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
20361 an constant shuffle operand. With a tiny bit of effort we can
20362 use VPERMD instead. A re-interpretation stall for V4DFmode is
20363 unfortunate but there's no avoiding it.
20364 Similarly for V16HImode we don't have instructions for variable
20365 shuffling, while for V32QImode we can use after preparing suitable
20366 masks vpshufb; vpshufb; vpermq; vpor. */
20368 if (mode
== V16HImode
)
20370 maskmode
= mode
= V32QImode
;
20376 maskmode
= mode
= V8SImode
;
20380 t1
= gen_reg_rtx (maskmode
);
20382 /* Replicate the low bits of the V4DImode mask into V8SImode:
20384 t1 = { A A B B C C D D }. */
20385 for (i
= 0; i
< w
/ 2; ++i
)
20386 vec
[i
*2 + 1] = vec
[i
*2] = GEN_INT (i
* 2);
20387 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20388 vt
= force_reg (maskmode
, vt
);
20389 mask
= gen_lowpart (maskmode
, mask
);
20390 if (maskmode
== V8SImode
)
20391 emit_insn (gen_avx2_permvarv8si (t1
, mask
, vt
));
20393 emit_insn (gen_avx2_pshufbv32qi3 (t1
, mask
, vt
));
20395 /* Multiply the shuffle indicies by two. */
20396 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, t1
, t1
, 1,
20399 /* Add one to the odd shuffle indicies:
20400 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
20401 for (i
= 0; i
< w
/ 2; ++i
)
20403 vec
[i
* 2] = const0_rtx
;
20404 vec
[i
* 2 + 1] = const1_rtx
;
20406 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20407 vt
= force_const_mem (maskmode
, vt
);
20408 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, vt
, t1
, 1,
20411 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
20412 operands
[3] = mask
= t1
;
20413 target
= gen_lowpart (mode
, target
);
20414 op0
= gen_lowpart (mode
, op0
);
20415 op1
= gen_lowpart (mode
, op1
);
20421 /* The VPERMD and VPERMPS instructions already properly ignore
20422 the high bits of the shuffle elements. No need for us to
20423 perform an AND ourselves. */
20424 if (one_operand_shuffle
)
20425 emit_insn (gen_avx2_permvarv8si (target
, op0
, mask
));
20428 t1
= gen_reg_rtx (V8SImode
);
20429 t2
= gen_reg_rtx (V8SImode
);
20430 emit_insn (gen_avx2_permvarv8si (t1
, op0
, mask
));
20431 emit_insn (gen_avx2_permvarv8si (t2
, op1
, mask
));
20437 mask
= gen_lowpart (V8SFmode
, mask
);
20438 if (one_operand_shuffle
)
20439 emit_insn (gen_avx2_permvarv8sf (target
, op0
, mask
));
20442 t1
= gen_reg_rtx (V8SFmode
);
20443 t2
= gen_reg_rtx (V8SFmode
);
20444 emit_insn (gen_avx2_permvarv8sf (t1
, op0
, mask
));
20445 emit_insn (gen_avx2_permvarv8sf (t2
, op1
, mask
));
20451 /* By combining the two 128-bit input vectors into one 256-bit
20452 input vector, we can use VPERMD and VPERMPS for the full
20453 two-operand shuffle. */
20454 t1
= gen_reg_rtx (V8SImode
);
20455 t2
= gen_reg_rtx (V8SImode
);
20456 emit_insn (gen_avx_vec_concatv8si (t1
, op0
, op1
));
20457 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
20458 emit_insn (gen_avx2_permvarv8si (t1
, t1
, t2
));
20459 emit_insn (gen_avx_vextractf128v8si (target
, t1
, const0_rtx
));
20463 t1
= gen_reg_rtx (V8SFmode
);
20464 t2
= gen_reg_rtx (V8SImode
);
20465 mask
= gen_lowpart (V4SImode
, mask
);
20466 emit_insn (gen_avx_vec_concatv8sf (t1
, op0
, op1
));
20467 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
20468 emit_insn (gen_avx2_permvarv8sf (t1
, t1
, t2
));
20469 emit_insn (gen_avx_vextractf128v8sf (target
, t1
, const0_rtx
));
20473 t1
= gen_reg_rtx (V32QImode
);
20474 t2
= gen_reg_rtx (V32QImode
);
20475 t3
= gen_reg_rtx (V32QImode
);
20476 vt2
= GEN_INT (128);
20477 for (i
= 0; i
< 32; i
++)
20479 vt
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
20480 vt
= force_reg (V32QImode
, vt
);
20481 for (i
= 0; i
< 32; i
++)
20482 vec
[i
] = i
< 16 ? vt2
: const0_rtx
;
20483 vt2
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
20484 vt2
= force_reg (V32QImode
, vt2
);
20485 /* From mask create two adjusted masks, which contain the same
20486 bits as mask in the low 7 bits of each vector element.
20487 The first mask will have the most significant bit clear
20488 if it requests element from the same 128-bit lane
20489 and MSB set if it requests element from the other 128-bit lane.
20490 The second mask will have the opposite values of the MSB,
20491 and additionally will have its 128-bit lanes swapped.
20492 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
20493 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
20494 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
20495 stands for other 12 bytes. */
20496 /* The bit whether element is from the same lane or the other
20497 lane is bit 4, so shift it up by 3 to the MSB position. */
20498 emit_insn (gen_ashlv4di3 (gen_lowpart (V4DImode
, t1
),
20499 gen_lowpart (V4DImode
, mask
),
20501 /* Clear MSB bits from the mask just in case it had them set. */
20502 emit_insn (gen_avx2_andnotv32qi3 (t2
, vt
, mask
));
20503 /* After this t1 will have MSB set for elements from other lane. */
20504 emit_insn (gen_xorv32qi3 (t1
, t1
, vt2
));
20505 /* Clear bits other than MSB. */
20506 emit_insn (gen_andv32qi3 (t1
, t1
, vt
));
20507 /* Or in the lower bits from mask into t3. */
20508 emit_insn (gen_iorv32qi3 (t3
, t1
, t2
));
20509 /* And invert MSB bits in t1, so MSB is set for elements from the same
20511 emit_insn (gen_xorv32qi3 (t1
, t1
, vt
));
20512 /* Swap 128-bit lanes in t3. */
20513 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20514 gen_lowpart (V4DImode
, t3
),
20515 const2_rtx
, GEN_INT (3),
20516 const0_rtx
, const1_rtx
));
20517 /* And or in the lower bits from mask into t1. */
20518 emit_insn (gen_iorv32qi3 (t1
, t1
, t2
));
20519 if (one_operand_shuffle
)
20521 /* Each of these shuffles will put 0s in places where
20522 element from the other 128-bit lane is needed, otherwise
20523 will shuffle in the requested value. */
20524 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op0
, t3
));
20525 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op0
, t1
));
20526 /* For t3 the 128-bit lanes are swapped again. */
20527 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20528 gen_lowpart (V4DImode
, t3
),
20529 const2_rtx
, GEN_INT (3),
20530 const0_rtx
, const1_rtx
));
20531 /* And oring both together leads to the result. */
20532 emit_insn (gen_iorv32qi3 (target
, t1
, t3
));
20536 t4
= gen_reg_rtx (V32QImode
);
20537 /* Similarly to the above one_operand_shuffle code,
20538 just for repeated twice for each operand. merge_two:
20539 code will merge the two results together. */
20540 emit_insn (gen_avx2_pshufbv32qi3 (t4
, op0
, t3
));
20541 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op1
, t3
));
20542 emit_insn (gen_avx2_pshufbv32qi3 (t2
, op0
, t1
));
20543 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op1
, t1
));
20544 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t4
),
20545 gen_lowpart (V4DImode
, t4
),
20546 const2_rtx
, GEN_INT (3),
20547 const0_rtx
, const1_rtx
));
20548 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20549 gen_lowpart (V4DImode
, t3
),
20550 const2_rtx
, GEN_INT (3),
20551 const0_rtx
, const1_rtx
));
20552 emit_insn (gen_iorv32qi3 (t4
, t2
, t4
));
20553 emit_insn (gen_iorv32qi3 (t3
, t1
, t3
));
20559 gcc_assert (GET_MODE_SIZE (mode
) <= 16);
20566 /* The XOP VPPERM insn supports three inputs. By ignoring the
20567 one_operand_shuffle special case, we avoid creating another
20568 set of constant vectors in memory. */
20569 one_operand_shuffle
= false;
20571 /* mask = mask & {2*w-1, ...} */
20572 vt
= GEN_INT (2*w
- 1);
20576 /* mask = mask & {w-1, ...} */
20577 vt
= GEN_INT (w
- 1);
20580 for (i
= 0; i
< w
; i
++)
20582 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20583 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
20584 NULL_RTX
, 0, OPTAB_DIRECT
);
20586 /* For non-QImode operations, convert the word permutation control
20587 into a byte permutation control. */
20588 if (mode
!= V16QImode
)
20590 mask
= expand_simple_binop (maskmode
, ASHIFT
, mask
,
20591 GEN_INT (exact_log2 (e
)),
20592 NULL_RTX
, 0, OPTAB_DIRECT
);
20594 /* Convert mask to vector of chars. */
20595 mask
= force_reg (V16QImode
, gen_lowpart (V16QImode
, mask
));
20597 /* Replicate each of the input bytes into byte positions:
20598 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
20599 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
20600 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
20601 for (i
= 0; i
< 16; ++i
)
20602 vec
[i
] = GEN_INT (i
/e
* e
);
20603 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
20604 vt
= force_const_mem (V16QImode
, vt
);
20606 emit_insn (gen_xop_pperm (mask
, mask
, mask
, vt
));
20608 emit_insn (gen_ssse3_pshufbv16qi3 (mask
, mask
, vt
));
20610 /* Convert it into the byte positions by doing
20611 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
20612 for (i
= 0; i
< 16; ++i
)
20613 vec
[i
] = GEN_INT (i
% e
);
20614 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
20615 vt
= force_const_mem (V16QImode
, vt
);
20616 emit_insn (gen_addv16qi3 (mask
, mask
, vt
));
20619 /* The actual shuffle operations all operate on V16QImode. */
20620 op0
= gen_lowpart (V16QImode
, op0
);
20621 op1
= gen_lowpart (V16QImode
, op1
);
20622 target
= gen_lowpart (V16QImode
, target
);
20626 emit_insn (gen_xop_pperm (target
, op0
, op1
, mask
));
20628 else if (one_operand_shuffle
)
20630 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, mask
));
20637 /* Shuffle the two input vectors independently. */
20638 t1
= gen_reg_rtx (V16QImode
);
20639 t2
= gen_reg_rtx (V16QImode
);
20640 emit_insn (gen_ssse3_pshufbv16qi3 (t1
, op0
, mask
));
20641 emit_insn (gen_ssse3_pshufbv16qi3 (t2
, op1
, mask
));
20644 /* Then merge them together. The key is whether any given control
20645 element contained a bit set that indicates the second word. */
20646 mask
= operands
[3];
20648 if (maskmode
== V2DImode
&& !TARGET_SSE4_1
)
20650 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
20651 more shuffle to convert the V2DI input mask into a V4SI
20652 input mask. At which point the masking that expand_int_vcond
20653 will work as desired. */
20654 rtx t3
= gen_reg_rtx (V4SImode
);
20655 emit_insn (gen_sse2_pshufd_1 (t3
, gen_lowpart (V4SImode
, mask
),
20656 const0_rtx
, const0_rtx
,
20657 const2_rtx
, const2_rtx
));
20659 maskmode
= V4SImode
;
20663 for (i
= 0; i
< w
; i
++)
20665 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20666 vt
= force_reg (maskmode
, vt
);
20667 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
20668 NULL_RTX
, 0, OPTAB_DIRECT
);
20670 xops
[0] = gen_lowpart (mode
, operands
[0]);
20671 xops
[1] = gen_lowpart (mode
, t2
);
20672 xops
[2] = gen_lowpart (mode
, t1
);
20673 xops
[3] = gen_rtx_EQ (maskmode
, mask
, vt
);
20676 ok
= ix86_expand_int_vcond (xops
);
20681 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
20682 true if we should do zero extension, else sign extension. HIGH_P is
20683 true if we want the N/2 high elements, else the low elements. */
20686 ix86_expand_sse_unpack (rtx dest
, rtx src
, bool unsigned_p
, bool high_p
)
20688 enum machine_mode imode
= GET_MODE (src
);
20693 rtx (*unpack
)(rtx
, rtx
);
20694 rtx (*extract
)(rtx
, rtx
) = NULL
;
20695 enum machine_mode halfmode
= BLKmode
;
20701 unpack
= gen_avx2_zero_extendv16qiv16hi2
;
20703 unpack
= gen_avx2_sign_extendv16qiv16hi2
;
20704 halfmode
= V16QImode
;
20706 = high_p
? gen_vec_extract_hi_v32qi
: gen_vec_extract_lo_v32qi
;
20710 unpack
= gen_avx2_zero_extendv8hiv8si2
;
20712 unpack
= gen_avx2_sign_extendv8hiv8si2
;
20713 halfmode
= V8HImode
;
20715 = high_p
? gen_vec_extract_hi_v16hi
: gen_vec_extract_lo_v16hi
;
20719 unpack
= gen_avx2_zero_extendv4siv4di2
;
20721 unpack
= gen_avx2_sign_extendv4siv4di2
;
20722 halfmode
= V4SImode
;
20724 = high_p
? gen_vec_extract_hi_v8si
: gen_vec_extract_lo_v8si
;
20728 unpack
= gen_sse4_1_zero_extendv8qiv8hi2
;
20730 unpack
= gen_sse4_1_sign_extendv8qiv8hi2
;
20734 unpack
= gen_sse4_1_zero_extendv4hiv4si2
;
20736 unpack
= gen_sse4_1_sign_extendv4hiv4si2
;
20740 unpack
= gen_sse4_1_zero_extendv2siv2di2
;
20742 unpack
= gen_sse4_1_sign_extendv2siv2di2
;
20745 gcc_unreachable ();
20748 if (GET_MODE_SIZE (imode
) == 32)
20750 tmp
= gen_reg_rtx (halfmode
);
20751 emit_insn (extract (tmp
, src
));
20755 /* Shift higher 8 bytes to lower 8 bytes. */
20756 tmp
= gen_reg_rtx (imode
);
20757 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, tmp
),
20758 gen_lowpart (V1TImode
, src
),
20764 emit_insn (unpack (dest
, tmp
));
20768 rtx (*unpack
)(rtx
, rtx
, rtx
);
20774 unpack
= gen_vec_interleave_highv16qi
;
20776 unpack
= gen_vec_interleave_lowv16qi
;
20780 unpack
= gen_vec_interleave_highv8hi
;
20782 unpack
= gen_vec_interleave_lowv8hi
;
20786 unpack
= gen_vec_interleave_highv4si
;
20788 unpack
= gen_vec_interleave_lowv4si
;
20791 gcc_unreachable ();
20795 tmp
= force_reg (imode
, CONST0_RTX (imode
));
20797 tmp
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
20798 src
, pc_rtx
, pc_rtx
);
20800 emit_insn (unpack (gen_lowpart (imode
, dest
), src
, tmp
));
20804 /* Expand conditional increment or decrement using adb/sbb instructions.
20805 The default case using setcc followed by the conditional move can be
20806 done by generic code. */
20808 ix86_expand_int_addcc (rtx operands
[])
20810 enum rtx_code code
= GET_CODE (operands
[1]);
20812 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
, rtx
);
20814 rtx val
= const0_rtx
;
20815 bool fpcmp
= false;
20816 enum machine_mode mode
;
20817 rtx op0
= XEXP (operands
[1], 0);
20818 rtx op1
= XEXP (operands
[1], 1);
20820 if (operands
[3] != const1_rtx
20821 && operands
[3] != constm1_rtx
)
20823 if (!ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
20825 code
= GET_CODE (compare_op
);
20827 flags
= XEXP (compare_op
, 0);
20829 if (GET_MODE (flags
) == CCFPmode
20830 || GET_MODE (flags
) == CCFPUmode
)
20833 code
= ix86_fp_compare_code_to_integer (code
);
20840 PUT_CODE (compare_op
,
20841 reverse_condition_maybe_unordered
20842 (GET_CODE (compare_op
)));
20844 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
20847 mode
= GET_MODE (operands
[0]);
20849 /* Construct either adc or sbb insn. */
20850 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
20855 insn
= gen_subqi3_carry
;
20858 insn
= gen_subhi3_carry
;
20861 insn
= gen_subsi3_carry
;
20864 insn
= gen_subdi3_carry
;
20867 gcc_unreachable ();
20875 insn
= gen_addqi3_carry
;
20878 insn
= gen_addhi3_carry
;
20881 insn
= gen_addsi3_carry
;
20884 insn
= gen_adddi3_carry
;
20887 gcc_unreachable ();
20890 emit_insn (insn (operands
[0], operands
[2], val
, flags
, compare_op
));
20896 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
20897 but works for floating pointer parameters and nonoffsetable memories.
20898 For pushes, it returns just stack offsets; the values will be saved
20899 in the right order. Maximally three parts are generated. */
20902 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
20907 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
20909 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
20911 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
20912 gcc_assert (size
>= 2 && size
<= 4);
20914 /* Optimize constant pool reference to immediates. This is used by fp
20915 moves, that force all constants to memory to allow combining. */
20916 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
20918 rtx tmp
= maybe_get_pool_constant (operand
);
20923 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
20925 /* The only non-offsetable memories we handle are pushes. */
20926 int ok
= push_operand (operand
, VOIDmode
);
20930 operand
= copy_rtx (operand
);
20931 PUT_MODE (operand
, word_mode
);
20932 parts
[0] = parts
[1] = parts
[2] = parts
[3] = operand
;
20936 if (GET_CODE (operand
) == CONST_VECTOR
)
20938 enum machine_mode imode
= int_mode_for_mode (mode
);
20939 /* Caution: if we looked through a constant pool memory above,
20940 the operand may actually have a different mode now. That's
20941 ok, since we want to pun this all the way back to an integer. */
20942 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
20943 gcc_assert (operand
!= NULL
);
20949 if (mode
== DImode
)
20950 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
20955 if (REG_P (operand
))
20957 gcc_assert (reload_completed
);
20958 for (i
= 0; i
< size
; i
++)
20959 parts
[i
] = gen_rtx_REG (SImode
, REGNO (operand
) + i
);
20961 else if (offsettable_memref_p (operand
))
20963 operand
= adjust_address (operand
, SImode
, 0);
20964 parts
[0] = operand
;
20965 for (i
= 1; i
< size
; i
++)
20966 parts
[i
] = adjust_address (operand
, SImode
, 4 * i
);
20968 else if (GET_CODE (operand
) == CONST_DOUBLE
)
20973 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
20977 real_to_target (l
, &r
, mode
);
20978 parts
[3] = gen_int_mode (l
[3], SImode
);
20979 parts
[2] = gen_int_mode (l
[2], SImode
);
20982 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
20983 long double may not be 80-bit. */
20984 real_to_target (l
, &r
, mode
);
20985 parts
[2] = gen_int_mode (l
[2], SImode
);
20988 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
20991 gcc_unreachable ();
20993 parts
[1] = gen_int_mode (l
[1], SImode
);
20994 parts
[0] = gen_int_mode (l
[0], SImode
);
20997 gcc_unreachable ();
21002 if (mode
== TImode
)
21003 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
21004 if (mode
== XFmode
|| mode
== TFmode
)
21006 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
21007 if (REG_P (operand
))
21009 gcc_assert (reload_completed
);
21010 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
21011 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
21013 else if (offsettable_memref_p (operand
))
21015 operand
= adjust_address (operand
, DImode
, 0);
21016 parts
[0] = operand
;
21017 parts
[1] = adjust_address (operand
, upper_mode
, 8);
21019 else if (GET_CODE (operand
) == CONST_DOUBLE
)
21024 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
21025 real_to_target (l
, &r
, mode
);
21027 /* Do not use shift by 32 to avoid warning on 32bit systems. */
21028 if (HOST_BITS_PER_WIDE_INT
>= 64)
21031 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
21032 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
21035 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
21037 if (upper_mode
== SImode
)
21038 parts
[1] = gen_int_mode (l
[2], SImode
);
21039 else if (HOST_BITS_PER_WIDE_INT
>= 64)
21042 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
21043 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
21046 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
21049 gcc_unreachable ();
21056 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
21057 Return false when normal moves are needed; true when all required
21058 insns have been emitted. Operands 2-4 contain the input values
21059 int the correct order; operands 5-7 contain the output values. */
21062 ix86_split_long_move (rtx operands
[])
21067 int collisions
= 0;
21068 enum machine_mode mode
= GET_MODE (operands
[0]);
21069 bool collisionparts
[4];
21071 /* The DFmode expanders may ask us to move double.
21072 For 64bit target this is single move. By hiding the fact
21073 here we simplify i386.md splitters. */
21074 if (TARGET_64BIT
&& GET_MODE_SIZE (GET_MODE (operands
[0])) == 8)
21076 /* Optimize constant pool reference to immediates. This is used by
21077 fp moves, that force all constants to memory to allow combining. */
21079 if (MEM_P (operands
[1])
21080 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
21081 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
21082 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
21083 if (push_operand (operands
[0], VOIDmode
))
21085 operands
[0] = copy_rtx (operands
[0]);
21086 PUT_MODE (operands
[0], word_mode
);
21089 operands
[0] = gen_lowpart (DImode
, operands
[0]);
21090 operands
[1] = gen_lowpart (DImode
, operands
[1]);
21091 emit_move_insn (operands
[0], operands
[1]);
21095 /* The only non-offsettable memory we handle is push. */
21096 if (push_operand (operands
[0], VOIDmode
))
21099 gcc_assert (!MEM_P (operands
[0])
21100 || offsettable_memref_p (operands
[0]));
21102 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
21103 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
21105 /* When emitting push, take care for source operands on the stack. */
21106 if (push
&& MEM_P (operands
[1])
21107 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
21109 rtx src_base
= XEXP (part
[1][nparts
- 1], 0);
21111 /* Compensate for the stack decrement by 4. */
21112 if (!TARGET_64BIT
&& nparts
== 3
21113 && mode
== XFmode
&& TARGET_128BIT_LONG_DOUBLE
)
21114 src_base
= plus_constant (Pmode
, src_base
, 4);
21116 /* src_base refers to the stack pointer and is
21117 automatically decreased by emitted push. */
21118 for (i
= 0; i
< nparts
; i
++)
21119 part
[1][i
] = change_address (part
[1][i
],
21120 GET_MODE (part
[1][i
]), src_base
);
21123 /* We need to do copy in the right order in case an address register
21124 of the source overlaps the destination. */
21125 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
21129 for (i
= 0; i
< nparts
; i
++)
21132 = reg_overlap_mentioned_p (part
[0][i
], XEXP (part
[1][0], 0));
21133 if (collisionparts
[i
])
21137 /* Collision in the middle part can be handled by reordering. */
21138 if (collisions
== 1 && nparts
== 3 && collisionparts
[1])
21140 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
21141 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
21143 else if (collisions
== 1
21145 && (collisionparts
[1] || collisionparts
[2]))
21147 if (collisionparts
[1])
21149 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
21150 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
21154 tmp
= part
[0][2]; part
[0][2] = part
[0][3]; part
[0][3] = tmp
;
21155 tmp
= part
[1][2]; part
[1][2] = part
[1][3]; part
[1][3] = tmp
;
21159 /* If there are more collisions, we can't handle it by reordering.
21160 Do an lea to the last part and use only one colliding move. */
21161 else if (collisions
> 1)
21167 base
= part
[0][nparts
- 1];
21169 /* Handle the case when the last part isn't valid for lea.
21170 Happens in 64-bit mode storing the 12-byte XFmode. */
21171 if (GET_MODE (base
) != Pmode
)
21172 base
= gen_rtx_REG (Pmode
, REGNO (base
));
21174 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
21175 part
[1][0] = replace_equiv_address (part
[1][0], base
);
21176 for (i
= 1; i
< nparts
; i
++)
21178 tmp
= plus_constant (Pmode
, base
, UNITS_PER_WORD
* i
);
21179 part
[1][i
] = replace_equiv_address (part
[1][i
], tmp
);
21190 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
21191 emit_insn (ix86_gen_add3 (stack_pointer_rtx
,
21192 stack_pointer_rtx
, GEN_INT (-4)));
21193 emit_move_insn (part
[0][2], part
[1][2]);
21195 else if (nparts
== 4)
21197 emit_move_insn (part
[0][3], part
[1][3]);
21198 emit_move_insn (part
[0][2], part
[1][2]);
21203 /* In 64bit mode we don't have 32bit push available. In case this is
21204 register, it is OK - we will just use larger counterpart. We also
21205 retype memory - these comes from attempt to avoid REX prefix on
21206 moving of second half of TFmode value. */
21207 if (GET_MODE (part
[1][1]) == SImode
)
21209 switch (GET_CODE (part
[1][1]))
21212 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
21216 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
21220 gcc_unreachable ();
21223 if (GET_MODE (part
[1][0]) == SImode
)
21224 part
[1][0] = part
[1][1];
21227 emit_move_insn (part
[0][1], part
[1][1]);
21228 emit_move_insn (part
[0][0], part
[1][0]);
21232 /* Choose correct order to not overwrite the source before it is copied. */
21233 if ((REG_P (part
[0][0])
21234 && REG_P (part
[1][1])
21235 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
21237 && REGNO (part
[0][0]) == REGNO (part
[1][2]))
21239 && REGNO (part
[0][0]) == REGNO (part
[1][3]))))
21241 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
21243 for (i
= 0, j
= nparts
- 1; i
< nparts
; i
++, j
--)
21245 operands
[2 + i
] = part
[0][j
];
21246 operands
[6 + i
] = part
[1][j
];
21251 for (i
= 0; i
< nparts
; i
++)
21253 operands
[2 + i
] = part
[0][i
];
21254 operands
[6 + i
] = part
[1][i
];
21258 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
21259 if (optimize_insn_for_size_p ())
21261 for (j
= 0; j
< nparts
- 1; j
++)
21262 if (CONST_INT_P (operands
[6 + j
])
21263 && operands
[6 + j
] != const0_rtx
21264 && REG_P (operands
[2 + j
]))
21265 for (i
= j
; i
< nparts
- 1; i
++)
21266 if (CONST_INT_P (operands
[7 + i
])
21267 && INTVAL (operands
[7 + i
]) == INTVAL (operands
[6 + j
]))
21268 operands
[7 + i
] = operands
[2 + j
];
21271 for (i
= 0; i
< nparts
; i
++)
21272 emit_move_insn (operands
[2 + i
], operands
[6 + i
]);
21277 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
21278 left shift by a constant, either using a single shift or
21279 a sequence of add instructions. */
21282 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
21284 rtx (*insn
)(rtx
, rtx
, rtx
);
21287 || (count
* ix86_cost
->add
<= ix86_cost
->shift_const
21288 && !optimize_insn_for_size_p ()))
21290 insn
= mode
== DImode
? gen_addsi3
: gen_adddi3
;
21291 while (count
-- > 0)
21292 emit_insn (insn (operand
, operand
, operand
));
21296 insn
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
21297 emit_insn (insn (operand
, operand
, GEN_INT (count
)));
21302 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
21304 rtx (*gen_ashl3
)(rtx
, rtx
, rtx
);
21305 rtx (*gen_shld
)(rtx
, rtx
, rtx
);
21306 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
21308 rtx low
[2], high
[2];
21311 if (CONST_INT_P (operands
[2]))
21313 split_double_mode (mode
, operands
, 2, low
, high
);
21314 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
21316 if (count
>= half_width
)
21318 emit_move_insn (high
[0], low
[1]);
21319 emit_move_insn (low
[0], const0_rtx
);
21321 if (count
> half_width
)
21322 ix86_expand_ashl_const (high
[0], count
- half_width
, mode
);
21326 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
21328 if (!rtx_equal_p (operands
[0], operands
[1]))
21329 emit_move_insn (operands
[0], operands
[1]);
21331 emit_insn (gen_shld (high
[0], low
[0], GEN_INT (count
)));
21332 ix86_expand_ashl_const (low
[0], count
, mode
);
21337 split_double_mode (mode
, operands
, 1, low
, high
);
21339 gen_ashl3
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
21341 if (operands
[1] == const1_rtx
)
21343 /* Assuming we've chosen a QImode capable registers, then 1 << N
21344 can be done with two 32/64-bit shifts, no branches, no cmoves. */
21345 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
21347 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
21349 ix86_expand_clear (low
[0]);
21350 ix86_expand_clear (high
[0]);
21351 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (half_width
)));
21353 d
= gen_lowpart (QImode
, low
[0]);
21354 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
21355 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
21356 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
21358 d
= gen_lowpart (QImode
, high
[0]);
21359 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
21360 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
21361 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
21364 /* Otherwise, we can get the same results by manually performing
21365 a bit extract operation on bit 5/6, and then performing the two
21366 shifts. The two methods of getting 0/1 into low/high are exactly
21367 the same size. Avoiding the shift in the bit extract case helps
21368 pentium4 a bit; no one else seems to care much either way. */
21371 enum machine_mode half_mode
;
21372 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
);
21373 rtx (*gen_and3
)(rtx
, rtx
, rtx
);
21374 rtx (*gen_xor3
)(rtx
, rtx
, rtx
);
21375 HOST_WIDE_INT bits
;
21378 if (mode
== DImode
)
21380 half_mode
= SImode
;
21381 gen_lshr3
= gen_lshrsi3
;
21382 gen_and3
= gen_andsi3
;
21383 gen_xor3
= gen_xorsi3
;
21388 half_mode
= DImode
;
21389 gen_lshr3
= gen_lshrdi3
;
21390 gen_and3
= gen_anddi3
;
21391 gen_xor3
= gen_xordi3
;
21395 if (TARGET_PARTIAL_REG_STALL
&& !optimize_insn_for_size_p ())
21396 x
= gen_rtx_ZERO_EXTEND (half_mode
, operands
[2]);
21398 x
= gen_lowpart (half_mode
, operands
[2]);
21399 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
21401 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (bits
)));
21402 emit_insn (gen_and3 (high
[0], high
[0], const1_rtx
));
21403 emit_move_insn (low
[0], high
[0]);
21404 emit_insn (gen_xor3 (low
[0], low
[0], const1_rtx
));
21407 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
21408 emit_insn (gen_ashl3 (high
[0], high
[0], operands
[2]));
21412 if (operands
[1] == constm1_rtx
)
21414 /* For -1 << N, we can avoid the shld instruction, because we
21415 know that we're shifting 0...31/63 ones into a -1. */
21416 emit_move_insn (low
[0], constm1_rtx
);
21417 if (optimize_insn_for_size_p ())
21418 emit_move_insn (high
[0], low
[0]);
21420 emit_move_insn (high
[0], constm1_rtx
);
21424 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
21426 if (!rtx_equal_p (operands
[0], operands
[1]))
21427 emit_move_insn (operands
[0], operands
[1]);
21429 split_double_mode (mode
, operands
, 1, low
, high
);
21430 emit_insn (gen_shld (high
[0], low
[0], operands
[2]));
21433 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
21435 if (TARGET_CMOVE
&& scratch
)
21437 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21438 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21440 ix86_expand_clear (scratch
);
21441 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2], scratch
));
21445 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
21446 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
21448 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
21453 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
21455 rtx (*gen_ashr3
)(rtx
, rtx
, rtx
)
21456 = mode
== DImode
? gen_ashrsi3
: gen_ashrdi3
;
21457 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
21458 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
21460 rtx low
[2], high
[2];
21463 if (CONST_INT_P (operands
[2]))
21465 split_double_mode (mode
, operands
, 2, low
, high
);
21466 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
21468 if (count
== GET_MODE_BITSIZE (mode
) - 1)
21470 emit_move_insn (high
[0], high
[1]);
21471 emit_insn (gen_ashr3 (high
[0], high
[0],
21472 GEN_INT (half_width
- 1)));
21473 emit_move_insn (low
[0], high
[0]);
21476 else if (count
>= half_width
)
21478 emit_move_insn (low
[0], high
[1]);
21479 emit_move_insn (high
[0], low
[0]);
21480 emit_insn (gen_ashr3 (high
[0], high
[0],
21481 GEN_INT (half_width
- 1)));
21483 if (count
> half_width
)
21484 emit_insn (gen_ashr3 (low
[0], low
[0],
21485 GEN_INT (count
- half_width
)));
21489 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21491 if (!rtx_equal_p (operands
[0], operands
[1]))
21492 emit_move_insn (operands
[0], operands
[1]);
21494 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
21495 emit_insn (gen_ashr3 (high
[0], high
[0], GEN_INT (count
)));
21500 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21502 if (!rtx_equal_p (operands
[0], operands
[1]))
21503 emit_move_insn (operands
[0], operands
[1]);
21505 split_double_mode (mode
, operands
, 1, low
, high
);
21507 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
21508 emit_insn (gen_ashr3 (high
[0], high
[0], operands
[2]));
21510 if (TARGET_CMOVE
&& scratch
)
21512 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21513 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21515 emit_move_insn (scratch
, high
[0]);
21516 emit_insn (gen_ashr3 (scratch
, scratch
,
21517 GEN_INT (half_width
- 1)));
21518 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
21523 rtx (*gen_x86_shift_adj_3
)(rtx
, rtx
, rtx
)
21524 = mode
== DImode
? gen_x86_shiftsi_adj_3
: gen_x86_shiftdi_adj_3
;
21526 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
21532 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
21534 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
)
21535 = mode
== DImode
? gen_lshrsi3
: gen_lshrdi3
;
21536 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
21537 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
21539 rtx low
[2], high
[2];
21542 if (CONST_INT_P (operands
[2]))
21544 split_double_mode (mode
, operands
, 2, low
, high
);
21545 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
21547 if (count
>= half_width
)
21549 emit_move_insn (low
[0], high
[1]);
21550 ix86_expand_clear (high
[0]);
21552 if (count
> half_width
)
21553 emit_insn (gen_lshr3 (low
[0], low
[0],
21554 GEN_INT (count
- half_width
)));
21558 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21560 if (!rtx_equal_p (operands
[0], operands
[1]))
21561 emit_move_insn (operands
[0], operands
[1]);
21563 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
21564 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (count
)));
21569 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21571 if (!rtx_equal_p (operands
[0], operands
[1]))
21572 emit_move_insn (operands
[0], operands
[1]);
21574 split_double_mode (mode
, operands
, 1, low
, high
);
21576 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
21577 emit_insn (gen_lshr3 (high
[0], high
[0], operands
[2]));
21579 if (TARGET_CMOVE
&& scratch
)
21581 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21582 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21584 ix86_expand_clear (scratch
);
21585 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
21590 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
21591 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
21593 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
21598 /* Predict just emitted jump instruction to be taken with probability PROB. */
21600 predict_jump (int prob
)
21602 rtx insn
= get_last_insn ();
21603 gcc_assert (JUMP_P (insn
));
21604 add_reg_note (insn
, REG_BR_PROB
, GEN_INT (prob
));
21607 /* Helper function for the string operations below. Dest VARIABLE whether
21608 it is aligned to VALUE bytes. If true, jump to the label. */
21610 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
21612 rtx label
= gen_label_rtx ();
21613 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
21614 if (GET_MODE (variable
) == DImode
)
21615 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
21617 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
21618 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
21621 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
21623 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
21627 /* Adjust COUNTER by the VALUE. */
21629 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
21631 rtx (*gen_add
)(rtx
, rtx
, rtx
)
21632 = GET_MODE (countreg
) == DImode
? gen_adddi3
: gen_addsi3
;
21634 emit_insn (gen_add (countreg
, countreg
, GEN_INT (-value
)));
21637 /* Zero extend possibly SImode EXP to Pmode register. */
21639 ix86_zero_extend_to_Pmode (rtx exp
)
21641 return force_reg (Pmode
, convert_to_mode (Pmode
, exp
, 1));
21644 /* Divide COUNTREG by SCALE. */
21646 scale_counter (rtx countreg
, int scale
)
21652 if (CONST_INT_P (countreg
))
21653 return GEN_INT (INTVAL (countreg
) / scale
);
21654 gcc_assert (REG_P (countreg
));
21656 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
21657 GEN_INT (exact_log2 (scale
)),
21658 NULL
, 1, OPTAB_DIRECT
);
21662 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
21663 DImode for constant loop counts. */
21665 static enum machine_mode
21666 counter_mode (rtx count_exp
)
21668 if (GET_MODE (count_exp
) != VOIDmode
)
21669 return GET_MODE (count_exp
);
21670 if (!CONST_INT_P (count_exp
))
21672 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
21677 /* When SRCPTR is non-NULL, output simple loop to move memory
21678 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
21679 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
21680 equivalent loop to set memory by VALUE (supposed to be in MODE).
21682 The size is rounded down to whole number of chunk size moved at once.
21683 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
21687 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
21688 rtx destptr
, rtx srcptr
, rtx value
,
21689 rtx count
, enum machine_mode mode
, int unroll
,
21692 rtx out_label
, top_label
, iter
, tmp
;
21693 enum machine_mode iter_mode
= counter_mode (count
);
21694 rtx piece_size
= GEN_INT (GET_MODE_SIZE (mode
) * unroll
);
21695 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
21701 top_label
= gen_label_rtx ();
21702 out_label
= gen_label_rtx ();
21703 iter
= gen_reg_rtx (iter_mode
);
21705 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
21706 NULL
, 1, OPTAB_DIRECT
);
21707 /* Those two should combine. */
21708 if (piece_size
== const1_rtx
)
21710 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
21712 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
21714 emit_move_insn (iter
, const0_rtx
);
21716 emit_label (top_label
);
21718 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
21719 x_addr
= gen_rtx_PLUS (Pmode
, destptr
, tmp
);
21720 destmem
= change_address (destmem
, mode
, x_addr
);
21724 y_addr
= gen_rtx_PLUS (Pmode
, srcptr
, copy_rtx (tmp
));
21725 srcmem
= change_address (srcmem
, mode
, y_addr
);
21727 /* When unrolling for chips that reorder memory reads and writes,
21728 we can save registers by using single temporary.
21729 Also using 4 temporaries is overkill in 32bit mode. */
21730 if (!TARGET_64BIT
&& 0)
21732 for (i
= 0; i
< unroll
; i
++)
21737 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21739 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
21741 emit_move_insn (destmem
, srcmem
);
21747 gcc_assert (unroll
<= 4);
21748 for (i
= 0; i
< unroll
; i
++)
21750 tmpreg
[i
] = gen_reg_rtx (mode
);
21754 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
21756 emit_move_insn (tmpreg
[i
], srcmem
);
21758 for (i
= 0; i
< unroll
; i
++)
21763 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21765 emit_move_insn (destmem
, tmpreg
[i
]);
21770 for (i
= 0; i
< unroll
; i
++)
21774 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21775 emit_move_insn (destmem
, value
);
21778 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
21779 true, OPTAB_LIB_WIDEN
);
21781 emit_move_insn (iter
, tmp
);
21783 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
21785 if (expected_size
!= -1)
21787 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
21788 if (expected_size
== 0)
21790 else if (expected_size
> REG_BR_PROB_BASE
)
21791 predict_jump (REG_BR_PROB_BASE
- 1);
21793 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
21796 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
21797 iter
= ix86_zero_extend_to_Pmode (iter
);
21798 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
21799 true, OPTAB_LIB_WIDEN
);
21800 if (tmp
!= destptr
)
21801 emit_move_insn (destptr
, tmp
);
21804 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
21805 true, OPTAB_LIB_WIDEN
);
21807 emit_move_insn (srcptr
, tmp
);
21809 emit_label (out_label
);
21812 /* Output "rep; mov" instruction.
21813 Arguments have same meaning as for previous function */
21815 expand_movmem_via_rep_mov (rtx destmem
, rtx srcmem
,
21816 rtx destptr
, rtx srcptr
,
21818 enum machine_mode mode
)
21823 HOST_WIDE_INT rounded_count
;
21825 /* If the size is known, it is shorter to use rep movs. */
21826 if (mode
== QImode
&& CONST_INT_P (count
)
21827 && !(INTVAL (count
) & 3))
21830 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
21831 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
21832 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
21833 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
21834 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
21835 if (mode
!= QImode
)
21837 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21838 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21839 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
21840 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21841 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21842 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
21846 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
21847 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
21849 if (CONST_INT_P (count
))
21851 rounded_count
= (INTVAL (count
)
21852 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
21853 destmem
= shallow_copy_rtx (destmem
);
21854 srcmem
= shallow_copy_rtx (srcmem
);
21855 set_mem_size (destmem
, rounded_count
);
21856 set_mem_size (srcmem
, rounded_count
);
21860 if (MEM_SIZE_KNOWN_P (destmem
))
21861 clear_mem_size (destmem
);
21862 if (MEM_SIZE_KNOWN_P (srcmem
))
21863 clear_mem_size (srcmem
);
21865 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
21869 /* Output "rep; stos" instruction.
21870 Arguments have same meaning as for previous function */
21872 expand_setmem_via_rep_stos (rtx destmem
, rtx destptr
, rtx value
,
21873 rtx count
, enum machine_mode mode
,
21878 HOST_WIDE_INT rounded_count
;
21880 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
21881 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
21882 value
= force_reg (mode
, gen_lowpart (mode
, value
));
21883 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
21884 if (mode
!= QImode
)
21886 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21887 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21888 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
21891 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
21892 if (orig_value
== const0_rtx
&& CONST_INT_P (count
))
21894 rounded_count
= (INTVAL (count
)
21895 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
21896 destmem
= shallow_copy_rtx (destmem
);
21897 set_mem_size (destmem
, rounded_count
);
21899 else if (MEM_SIZE_KNOWN_P (destmem
))
21900 clear_mem_size (destmem
);
21901 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
21905 emit_strmov (rtx destmem
, rtx srcmem
,
21906 rtx destptr
, rtx srcptr
, enum machine_mode mode
, int offset
)
21908 rtx src
= adjust_automodify_address_nv (srcmem
, mode
, srcptr
, offset
);
21909 rtx dest
= adjust_automodify_address_nv (destmem
, mode
, destptr
, offset
);
21910 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21913 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
21915 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
21916 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
21919 if (CONST_INT_P (count
))
21921 HOST_WIDE_INT countval
= INTVAL (count
);
21924 if ((countval
& 0x10) && max_size
> 16)
21928 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
21929 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
+ 8);
21932 gcc_unreachable ();
21935 if ((countval
& 0x08) && max_size
> 8)
21938 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
21941 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
21942 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
+ 4);
21946 if ((countval
& 0x04) && max_size
> 4)
21948 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
21951 if ((countval
& 0x02) && max_size
> 2)
21953 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, HImode
, offset
);
21956 if ((countval
& 0x01) && max_size
> 1)
21958 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, QImode
, offset
);
21965 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
21966 count
, 1, OPTAB_DIRECT
);
21967 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
21968 count
, QImode
, 1, 4);
21972 /* When there are stringops, we can cheaply increase dest and src pointers.
21973 Otherwise we save code size by maintaining offset (zero is readily
21974 available from preceding rep operation) and using x86 addressing modes.
21976 if (TARGET_SINGLE_STRINGOP
)
21980 rtx label
= ix86_expand_aligntest (count
, 4, true);
21981 src
= change_address (srcmem
, SImode
, srcptr
);
21982 dest
= change_address (destmem
, SImode
, destptr
);
21983 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21984 emit_label (label
);
21985 LABEL_NUSES (label
) = 1;
21989 rtx label
= ix86_expand_aligntest (count
, 2, true);
21990 src
= change_address (srcmem
, HImode
, srcptr
);
21991 dest
= change_address (destmem
, HImode
, destptr
);
21992 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21993 emit_label (label
);
21994 LABEL_NUSES (label
) = 1;
21998 rtx label
= ix86_expand_aligntest (count
, 1, true);
21999 src
= change_address (srcmem
, QImode
, srcptr
);
22000 dest
= change_address (destmem
, QImode
, destptr
);
22001 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
22002 emit_label (label
);
22003 LABEL_NUSES (label
) = 1;
22008 rtx offset
= force_reg (Pmode
, const0_rtx
);
22013 rtx label
= ix86_expand_aligntest (count
, 4, true);
22014 src
= change_address (srcmem
, SImode
, srcptr
);
22015 dest
= change_address (destmem
, SImode
, destptr
);
22016 emit_move_insn (dest
, src
);
22017 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
22018 true, OPTAB_LIB_WIDEN
);
22020 emit_move_insn (offset
, tmp
);
22021 emit_label (label
);
22022 LABEL_NUSES (label
) = 1;
22026 rtx label
= ix86_expand_aligntest (count
, 2, true);
22027 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
22028 src
= change_address (srcmem
, HImode
, tmp
);
22029 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
22030 dest
= change_address (destmem
, HImode
, tmp
);
22031 emit_move_insn (dest
, src
);
22032 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
22033 true, OPTAB_LIB_WIDEN
);
22035 emit_move_insn (offset
, tmp
);
22036 emit_label (label
);
22037 LABEL_NUSES (label
) = 1;
22041 rtx label
= ix86_expand_aligntest (count
, 1, true);
22042 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
22043 src
= change_address (srcmem
, QImode
, tmp
);
22044 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
22045 dest
= change_address (destmem
, QImode
, tmp
);
22046 emit_move_insn (dest
, src
);
22047 emit_label (label
);
22048 LABEL_NUSES (label
) = 1;
22053 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
22055 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
22056 rtx count
, int max_size
)
22059 expand_simple_binop (counter_mode (count
), AND
, count
,
22060 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
22061 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
22062 gen_lowpart (QImode
, value
), count
, QImode
,
22066 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
22068 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx count
, int max_size
)
22072 if (CONST_INT_P (count
))
22074 HOST_WIDE_INT countval
= INTVAL (count
);
22077 if ((countval
& 0x10) && max_size
> 16)
22081 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
22082 emit_insn (gen_strset (destptr
, dest
, value
));
22083 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
+ 8);
22084 emit_insn (gen_strset (destptr
, dest
, value
));
22087 gcc_unreachable ();
22090 if ((countval
& 0x08) && max_size
> 8)
22094 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
22095 emit_insn (gen_strset (destptr
, dest
, value
));
22099 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
22100 emit_insn (gen_strset (destptr
, dest
, value
));
22101 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
+ 4);
22102 emit_insn (gen_strset (destptr
, dest
, value
));
22106 if ((countval
& 0x04) && max_size
> 4)
22108 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
22109 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
22112 if ((countval
& 0x02) && max_size
> 2)
22114 dest
= adjust_automodify_address_nv (destmem
, HImode
, destptr
, offset
);
22115 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
22118 if ((countval
& 0x01) && max_size
> 1)
22120 dest
= adjust_automodify_address_nv (destmem
, QImode
, destptr
, offset
);
22121 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
22128 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
22133 rtx label
= ix86_expand_aligntest (count
, 16, true);
22136 dest
= change_address (destmem
, DImode
, destptr
);
22137 emit_insn (gen_strset (destptr
, dest
, value
));
22138 emit_insn (gen_strset (destptr
, dest
, value
));
22142 dest
= change_address (destmem
, SImode
, destptr
);
22143 emit_insn (gen_strset (destptr
, dest
, value
));
22144 emit_insn (gen_strset (destptr
, dest
, value
));
22145 emit_insn (gen_strset (destptr
, dest
, value
));
22146 emit_insn (gen_strset (destptr
, dest
, value
));
22148 emit_label (label
);
22149 LABEL_NUSES (label
) = 1;
22153 rtx label
= ix86_expand_aligntest (count
, 8, true);
22156 dest
= change_address (destmem
, DImode
, destptr
);
22157 emit_insn (gen_strset (destptr
, dest
, value
));
22161 dest
= change_address (destmem
, SImode
, destptr
);
22162 emit_insn (gen_strset (destptr
, dest
, value
));
22163 emit_insn (gen_strset (destptr
, dest
, value
));
22165 emit_label (label
);
22166 LABEL_NUSES (label
) = 1;
22170 rtx label
= ix86_expand_aligntest (count
, 4, true);
22171 dest
= change_address (destmem
, SImode
, destptr
);
22172 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
22173 emit_label (label
);
22174 LABEL_NUSES (label
) = 1;
22178 rtx label
= ix86_expand_aligntest (count
, 2, true);
22179 dest
= change_address (destmem
, HImode
, destptr
);
22180 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
22181 emit_label (label
);
22182 LABEL_NUSES (label
) = 1;
22186 rtx label
= ix86_expand_aligntest (count
, 1, true);
22187 dest
= change_address (destmem
, QImode
, destptr
);
22188 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
22189 emit_label (label
);
22190 LABEL_NUSES (label
) = 1;
22194 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
22195 DESIRED_ALIGNMENT. */
22197 expand_movmem_prologue (rtx destmem
, rtx srcmem
,
22198 rtx destptr
, rtx srcptr
, rtx count
,
22199 int align
, int desired_alignment
)
22201 if (align
<= 1 && desired_alignment
> 1)
22203 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
22204 srcmem
= change_address (srcmem
, QImode
, srcptr
);
22205 destmem
= change_address (destmem
, QImode
, destptr
);
22206 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
22207 ix86_adjust_counter (count
, 1);
22208 emit_label (label
);
22209 LABEL_NUSES (label
) = 1;
22211 if (align
<= 2 && desired_alignment
> 2)
22213 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
22214 srcmem
= change_address (srcmem
, HImode
, srcptr
);
22215 destmem
= change_address (destmem
, HImode
, destptr
);
22216 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
22217 ix86_adjust_counter (count
, 2);
22218 emit_label (label
);
22219 LABEL_NUSES (label
) = 1;
22221 if (align
<= 4 && desired_alignment
> 4)
22223 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
22224 srcmem
= change_address (srcmem
, SImode
, srcptr
);
22225 destmem
= change_address (destmem
, SImode
, destptr
);
22226 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
22227 ix86_adjust_counter (count
, 4);
22228 emit_label (label
);
22229 LABEL_NUSES (label
) = 1;
22231 gcc_assert (desired_alignment
<= 8);
22234 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
22235 ALIGN_BYTES is how many bytes need to be copied. */
22237 expand_constant_movmem_prologue (rtx dst
, rtx
*srcp
, rtx destreg
, rtx srcreg
,
22238 int desired_align
, int align_bytes
)
22241 rtx orig_dst
= dst
;
22242 rtx orig_src
= src
;
22244 int src_align_bytes
= get_mem_align_offset (src
, desired_align
* BITS_PER_UNIT
);
22245 if (src_align_bytes
>= 0)
22246 src_align_bytes
= desired_align
- src_align_bytes
;
22247 if (align_bytes
& 1)
22249 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
22250 src
= adjust_automodify_address_nv (src
, QImode
, srcreg
, 0);
22252 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
22254 if (align_bytes
& 2)
22256 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
22257 src
= adjust_automodify_address_nv (src
, HImode
, srcreg
, off
);
22258 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
22259 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
22260 if (src_align_bytes
>= 0
22261 && (src_align_bytes
& 1) == (align_bytes
& 1)
22262 && MEM_ALIGN (src
) < 2 * BITS_PER_UNIT
)
22263 set_mem_align (src
, 2 * BITS_PER_UNIT
);
22265 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
22267 if (align_bytes
& 4)
22269 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
22270 src
= adjust_automodify_address_nv (src
, SImode
, srcreg
, off
);
22271 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
22272 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
22273 if (src_align_bytes
>= 0)
22275 unsigned int src_align
= 0;
22276 if ((src_align_bytes
& 3) == (align_bytes
& 3))
22278 else if ((src_align_bytes
& 1) == (align_bytes
& 1))
22280 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
22281 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
22284 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
22286 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
22287 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
, off
);
22288 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
22289 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
22290 if (src_align_bytes
>= 0)
22292 unsigned int src_align
= 0;
22293 if ((src_align_bytes
& 7) == (align_bytes
& 7))
22295 else if ((src_align_bytes
& 3) == (align_bytes
& 3))
22297 else if ((src_align_bytes
& 1) == (align_bytes
& 1))
22299 if (src_align
> (unsigned int) desired_align
)
22300 src_align
= desired_align
;
22301 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
22302 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
22304 if (MEM_SIZE_KNOWN_P (orig_dst
))
22305 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
22306 if (MEM_SIZE_KNOWN_P (orig_src
))
22307 set_mem_size (src
, MEM_SIZE (orig_src
) - align_bytes
);
22312 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
22313 DESIRED_ALIGNMENT. */
22315 expand_setmem_prologue (rtx destmem
, rtx destptr
, rtx value
, rtx count
,
22316 int align
, int desired_alignment
)
22318 if (align
<= 1 && desired_alignment
> 1)
22320 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
22321 destmem
= change_address (destmem
, QImode
, destptr
);
22322 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (QImode
, value
)));
22323 ix86_adjust_counter (count
, 1);
22324 emit_label (label
);
22325 LABEL_NUSES (label
) = 1;
22327 if (align
<= 2 && desired_alignment
> 2)
22329 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
22330 destmem
= change_address (destmem
, HImode
, destptr
);
22331 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (HImode
, value
)));
22332 ix86_adjust_counter (count
, 2);
22333 emit_label (label
);
22334 LABEL_NUSES (label
) = 1;
22336 if (align
<= 4 && desired_alignment
> 4)
22338 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
22339 destmem
= change_address (destmem
, SImode
, destptr
);
22340 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
22341 ix86_adjust_counter (count
, 4);
22342 emit_label (label
);
22343 LABEL_NUSES (label
) = 1;
22345 gcc_assert (desired_alignment
<= 8);
22348 /* Set enough from DST to align DST known to by aligned by ALIGN to
22349 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
22351 expand_constant_setmem_prologue (rtx dst
, rtx destreg
, rtx value
,
22352 int desired_align
, int align_bytes
)
22355 rtx orig_dst
= dst
;
22356 if (align_bytes
& 1)
22358 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
22360 emit_insn (gen_strset (destreg
, dst
,
22361 gen_lowpart (QImode
, value
)));
22363 if (align_bytes
& 2)
22365 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
22366 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
22367 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
22369 emit_insn (gen_strset (destreg
, dst
,
22370 gen_lowpart (HImode
, value
)));
22372 if (align_bytes
& 4)
22374 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
22375 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
22376 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
22378 emit_insn (gen_strset (destreg
, dst
,
22379 gen_lowpart (SImode
, value
)));
22381 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
22382 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
22383 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
22384 if (MEM_SIZE_KNOWN_P (orig_dst
))
22385 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
22389 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
22390 static enum stringop_alg
22391 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
, bool memset
,
22392 int *dynamic_check
, bool *noalign
)
22394 const struct stringop_algs
* algs
;
22395 bool optimize_for_speed
;
22396 /* Algorithms using the rep prefix want at least edi and ecx;
22397 additionally, memset wants eax and memcpy wants esi. Don't
22398 consider such algorithms if the user has appropriated those
22399 registers for their own purposes. */
22400 bool rep_prefix_usable
= !(fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
]
22402 ? fixed_regs
[AX_REG
] : fixed_regs
[SI_REG
]));
22405 #define ALG_USABLE_P(alg) (rep_prefix_usable \
22406 || (alg != rep_prefix_1_byte \
22407 && alg != rep_prefix_4_byte \
22408 && alg != rep_prefix_8_byte))
22409 const struct processor_costs
*cost
;
22411 /* Even if the string operation call is cold, we still might spend a lot
22412 of time processing large blocks. */
22413 if (optimize_function_for_size_p (cfun
)
22414 || (optimize_insn_for_size_p ()
22415 && expected_size
!= -1 && expected_size
< 256))
22416 optimize_for_speed
= false;
22418 optimize_for_speed
= true;
22420 cost
= optimize_for_speed
? ix86_cost
: &ix86_size_cost
;
22422 *dynamic_check
= -1;
22424 algs
= &cost
->memset
[TARGET_64BIT
!= 0];
22426 algs
= &cost
->memcpy
[TARGET_64BIT
!= 0];
22427 if (ix86_stringop_alg
!= no_stringop
&& ALG_USABLE_P (ix86_stringop_alg
))
22428 return ix86_stringop_alg
;
22429 /* rep; movq or rep; movl is the smallest variant. */
22430 else if (!optimize_for_speed
)
22432 if (!count
|| (count
& 3))
22433 return rep_prefix_usable
? rep_prefix_1_byte
: loop_1_byte
;
22435 return rep_prefix_usable
? rep_prefix_4_byte
: loop
;
22437 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
22439 else if (expected_size
!= -1 && expected_size
< 4)
22440 return loop_1_byte
;
22441 else if (expected_size
!= -1)
22444 enum stringop_alg alg
= libcall
;
22445 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
22447 /* We get here if the algorithms that were not libcall-based
22448 were rep-prefix based and we are unable to use rep prefixes
22449 based on global register usage. Break out of the loop and
22450 use the heuristic below. */
22451 if (algs
->size
[i
].max
== 0)
22453 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
22455 enum stringop_alg candidate
= algs
->size
[i
].alg
;
22457 if (candidate
!= libcall
&& ALG_USABLE_P (candidate
))
22459 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
22460 last non-libcall inline algorithm. */
22461 if (TARGET_INLINE_ALL_STRINGOPS
)
22463 /* When the current size is best to be copied by a libcall,
22464 but we are still forced to inline, run the heuristic below
22465 that will pick code for medium sized blocks. */
22466 if (alg
!= libcall
)
22470 else if (ALG_USABLE_P (candidate
))
22472 *noalign
= algs
->size
[i
].noalign
;
22477 gcc_assert (TARGET_INLINE_ALL_STRINGOPS
|| !rep_prefix_usable
);
22479 /* When asked to inline the call anyway, try to pick meaningful choice.
22480 We look for maximal size of block that is faster to copy by hand and
22481 take blocks of at most of that size guessing that average size will
22482 be roughly half of the block.
22484 If this turns out to be bad, we might simply specify the preferred
22485 choice in ix86_costs. */
22486 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22487 && (algs
->unknown_size
== libcall
|| !ALG_USABLE_P (algs
->unknown_size
)))
22490 enum stringop_alg alg
;
22492 bool any_alg_usable_p
= true;
22494 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
22496 enum stringop_alg candidate
= algs
->size
[i
].alg
;
22497 any_alg_usable_p
= any_alg_usable_p
&& ALG_USABLE_P (candidate
);
22499 if (candidate
!= libcall
&& candidate
22500 && ALG_USABLE_P (candidate
))
22501 max
= algs
->size
[i
].max
;
22503 /* If there aren't any usable algorithms, then recursing on
22504 smaller sizes isn't going to find anything. Just return the
22505 simple byte-at-a-time copy loop. */
22506 if (!any_alg_usable_p
)
22508 /* Pick something reasonable. */
22509 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22510 *dynamic_check
= 128;
22511 return loop_1_byte
;
22515 alg
= decide_alg (count
, max
/ 2, memset
, dynamic_check
, noalign
);
22516 gcc_assert (*dynamic_check
== -1);
22517 gcc_assert (alg
!= libcall
);
22518 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22519 *dynamic_check
= max
;
22522 return ALG_USABLE_P (algs
->unknown_size
) ? algs
->unknown_size
: libcall
;
22523 #undef ALG_USABLE_P
22526 /* Decide on alignment. We know that the operand is already aligned to ALIGN
22527 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
22529 decide_alignment (int align
,
22530 enum stringop_alg alg
,
22533 int desired_align
= 0;
22537 gcc_unreachable ();
22539 case unrolled_loop
:
22540 desired_align
= GET_MODE_SIZE (Pmode
);
22542 case rep_prefix_8_byte
:
22545 case rep_prefix_4_byte
:
22546 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22547 copying whole cacheline at once. */
22548 if (TARGET_PENTIUMPRO
)
22553 case rep_prefix_1_byte
:
22554 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22555 copying whole cacheline at once. */
22556 if (TARGET_PENTIUMPRO
)
22570 if (desired_align
< align
)
22571 desired_align
= align
;
22572 if (expected_size
!= -1 && expected_size
< 4)
22573 desired_align
= align
;
22574 return desired_align
;
22577 /* Return the smallest power of 2 greater than VAL. */
22579 smallest_pow2_greater_than (int val
)
22587 /* Expand string move (memcpy) operation. Use i386 string operations
22588 when profitable. expand_setmem contains similar code. The code
22589 depends upon architecture, block size and alignment, but always has
22590 the same overall structure:
22592 1) Prologue guard: Conditional that jumps up to epilogues for small
22593 blocks that can be handled by epilogue alone. This is faster
22594 but also needed for correctness, since prologue assume the block
22595 is larger than the desired alignment.
22597 Optional dynamic check for size and libcall for large
22598 blocks is emitted here too, with -minline-stringops-dynamically.
22600 2) Prologue: copy first few bytes in order to get destination
22601 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
22602 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
22603 copied. We emit either a jump tree on power of two sized
22604 blocks, or a byte loop.
22606 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
22607 with specified algorithm.
22609 4) Epilogue: code copying tail of the block that is too small to be
22610 handled by main body (or up to size guarded by prologue guard). */
22613 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
,
22614 rtx expected_align_exp
, rtx expected_size_exp
)
22620 rtx jump_around_label
= NULL
;
22621 HOST_WIDE_INT align
= 1;
22622 unsigned HOST_WIDE_INT count
= 0;
22623 HOST_WIDE_INT expected_size
= -1;
22624 int size_needed
= 0, epilogue_size_needed
;
22625 int desired_align
= 0, align_bytes
= 0;
22626 enum stringop_alg alg
;
22628 bool need_zero_guard
= false;
22631 if (CONST_INT_P (align_exp
))
22632 align
= INTVAL (align_exp
);
22633 /* i386 can do misaligned access on reasonably increased cost. */
22634 if (CONST_INT_P (expected_align_exp
)
22635 && INTVAL (expected_align_exp
) > align
)
22636 align
= INTVAL (expected_align_exp
);
22637 /* ALIGN is the minimum of destination and source alignment, but we care here
22638 just about destination alignment. */
22639 else if (MEM_ALIGN (dst
) > (unsigned HOST_WIDE_INT
) align
* BITS_PER_UNIT
)
22640 align
= MEM_ALIGN (dst
) / BITS_PER_UNIT
;
22642 if (CONST_INT_P (count_exp
))
22643 count
= expected_size
= INTVAL (count_exp
);
22644 if (CONST_INT_P (expected_size_exp
) && count
== 0)
22645 expected_size
= INTVAL (expected_size_exp
);
22647 /* Make sure we don't need to care about overflow later on. */
22648 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
22651 /* Step 0: Decide on preferred algorithm, desired alignment and
22652 size of chunks to be copied by main loop. */
22654 alg
= decide_alg (count
, expected_size
, false, &dynamic_check
, &noalign
);
22655 desired_align
= decide_alignment (align
, alg
, expected_size
);
22657 if (!TARGET_ALIGN_STRINGOPS
|| noalign
)
22658 align
= desired_align
;
22660 if (alg
== libcall
)
22662 gcc_assert (alg
!= no_stringop
);
22664 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
22665 destreg
= copy_addr_to_reg (XEXP (dst
, 0));
22666 srcreg
= copy_addr_to_reg (XEXP (src
, 0));
22671 gcc_unreachable ();
22673 need_zero_guard
= true;
22674 size_needed
= GET_MODE_SIZE (word_mode
);
22676 case unrolled_loop
:
22677 need_zero_guard
= true;
22678 size_needed
= GET_MODE_SIZE (word_mode
) * (TARGET_64BIT
? 4 : 2);
22680 case rep_prefix_8_byte
:
22683 case rep_prefix_4_byte
:
22686 case rep_prefix_1_byte
:
22690 need_zero_guard
= true;
22695 epilogue_size_needed
= size_needed
;
22697 /* Step 1: Prologue guard. */
22699 /* Alignment code needs count to be in register. */
22700 if (CONST_INT_P (count_exp
) && desired_align
> align
)
22702 if (INTVAL (count_exp
) > desired_align
22703 && INTVAL (count_exp
) > size_needed
)
22706 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
22707 if (align_bytes
<= 0)
22710 align_bytes
= desired_align
- align_bytes
;
22712 if (align_bytes
== 0)
22713 count_exp
= force_reg (counter_mode (count_exp
), count_exp
);
22715 gcc_assert (desired_align
>= 1 && align
>= 1);
22717 /* Ensure that alignment prologue won't copy past end of block. */
22718 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
22720 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
22721 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
22722 Make sure it is power of 2. */
22723 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
22727 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
22729 /* If main algorithm works on QImode, no epilogue is needed.
22730 For small sizes just don't align anything. */
22731 if (size_needed
== 1)
22732 desired_align
= align
;
22739 label
= gen_label_rtx ();
22740 emit_cmp_and_jump_insns (count_exp
,
22741 GEN_INT (epilogue_size_needed
),
22742 LTU
, 0, counter_mode (count_exp
), 1, label
);
22743 if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
22744 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22746 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22750 /* Emit code to decide on runtime whether library call or inline should be
22752 if (dynamic_check
!= -1)
22754 if (CONST_INT_P (count_exp
))
22756 if (UINTVAL (count_exp
) >= (unsigned HOST_WIDE_INT
)dynamic_check
)
22758 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
22759 count_exp
= const0_rtx
;
22765 rtx hot_label
= gen_label_rtx ();
22766 jump_around_label
= gen_label_rtx ();
22767 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
22768 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
22769 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
22770 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
22771 emit_jump (jump_around_label
);
22772 emit_label (hot_label
);
22776 /* Step 2: Alignment prologue. */
22778 if (desired_align
> align
)
22780 if (align_bytes
== 0)
22782 /* Except for the first move in epilogue, we no longer know
22783 constant offset in aliasing info. It don't seems to worth
22784 the pain to maintain it for the first move, so throw away
22786 src
= change_address (src
, BLKmode
, srcreg
);
22787 dst
= change_address (dst
, BLKmode
, destreg
);
22788 expand_movmem_prologue (dst
, src
, destreg
, srcreg
, count_exp
, align
,
22793 /* If we know how many bytes need to be stored before dst is
22794 sufficiently aligned, maintain aliasing info accurately. */
22795 dst
= expand_constant_movmem_prologue (dst
, &src
, destreg
, srcreg
,
22796 desired_align
, align_bytes
);
22797 count_exp
= plus_constant (counter_mode (count_exp
),
22798 count_exp
, -align_bytes
);
22799 count
-= align_bytes
;
22801 if (need_zero_guard
22802 && (count
< (unsigned HOST_WIDE_INT
) size_needed
22803 || (align_bytes
== 0
22804 && count
< ((unsigned HOST_WIDE_INT
) size_needed
22805 + desired_align
- align
))))
22807 /* It is possible that we copied enough so the main loop will not
22809 gcc_assert (size_needed
> 1);
22810 if (label
== NULL_RTX
)
22811 label
= gen_label_rtx ();
22812 emit_cmp_and_jump_insns (count_exp
,
22813 GEN_INT (size_needed
),
22814 LTU
, 0, counter_mode (count_exp
), 1, label
);
22815 if (expected_size
== -1
22816 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
22817 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22819 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22822 if (label
&& size_needed
== 1)
22824 emit_label (label
);
22825 LABEL_NUSES (label
) = 1;
22827 epilogue_size_needed
= 1;
22829 else if (label
== NULL_RTX
)
22830 epilogue_size_needed
= size_needed
;
22832 /* Step 3: Main loop. */
22838 gcc_unreachable ();
22840 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22841 count_exp
, QImode
, 1, expected_size
);
22844 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22845 count_exp
, word_mode
, 1, expected_size
);
22847 case unrolled_loop
:
22848 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
22849 registers for 4 temporaries anyway. */
22850 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22851 count_exp
, word_mode
, TARGET_64BIT
? 4 : 2,
22854 case rep_prefix_8_byte
:
22855 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22858 case rep_prefix_4_byte
:
22859 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22862 case rep_prefix_1_byte
:
22863 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22867 /* Adjust properly the offset of src and dest memory for aliasing. */
22868 if (CONST_INT_P (count_exp
))
22870 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
22871 (count
/ size_needed
) * size_needed
);
22872 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
22873 (count
/ size_needed
) * size_needed
);
22877 src
= change_address (src
, BLKmode
, srcreg
);
22878 dst
= change_address (dst
, BLKmode
, destreg
);
22881 /* Step 4: Epilogue to copy the remaining bytes. */
22885 /* When the main loop is done, COUNT_EXP might hold original count,
22886 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
22887 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
22888 bytes. Compensate if needed. */
22890 if (size_needed
< epilogue_size_needed
)
22893 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
22894 GEN_INT (size_needed
- 1), count_exp
, 1,
22896 if (tmp
!= count_exp
)
22897 emit_move_insn (count_exp
, tmp
);
22899 emit_label (label
);
22900 LABEL_NUSES (label
) = 1;
22903 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
22904 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
22905 epilogue_size_needed
);
22906 if (jump_around_label
)
22907 emit_label (jump_around_label
);
22911 /* Helper function for memcpy. For QImode value 0xXY produce
22912 0xXYXYXYXY of wide specified by MODE. This is essentially
22913 a * 0x10101010, but we can do slightly better than
22914 synth_mult by unwinding the sequence by hand on CPUs with
22917 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
22919 enum machine_mode valmode
= GET_MODE (val
);
22921 int nops
= mode
== DImode
? 3 : 2;
22923 gcc_assert (mode
== SImode
|| mode
== DImode
);
22924 if (val
== const0_rtx
)
22925 return copy_to_mode_reg (mode
, const0_rtx
);
22926 if (CONST_INT_P (val
))
22928 HOST_WIDE_INT v
= INTVAL (val
) & 255;
22932 if (mode
== DImode
)
22933 v
|= (v
<< 16) << 16;
22934 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
22937 if (valmode
== VOIDmode
)
22939 if (valmode
!= QImode
)
22940 val
= gen_lowpart (QImode
, val
);
22941 if (mode
== QImode
)
22943 if (!TARGET_PARTIAL_REG_STALL
)
22945 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
22946 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
22947 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
22948 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
22950 rtx reg
= convert_modes (mode
, QImode
, val
, true);
22951 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
22952 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
22957 rtx reg
= convert_modes (mode
, QImode
, val
, true);
22959 if (!TARGET_PARTIAL_REG_STALL
)
22960 if (mode
== SImode
)
22961 emit_insn (gen_movsi_insv_1 (reg
, reg
));
22963 emit_insn (gen_movdi_insv_1 (reg
, reg
));
22966 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
22967 NULL
, 1, OPTAB_DIRECT
);
22969 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
22971 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
22972 NULL
, 1, OPTAB_DIRECT
);
22973 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
22974 if (mode
== SImode
)
22976 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
22977 NULL
, 1, OPTAB_DIRECT
);
22978 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
22983 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
22984 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
22985 alignment from ALIGN to DESIRED_ALIGN. */
22987 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
, int align
)
22992 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
22993 promoted_val
= promote_duplicated_reg (DImode
, val
);
22994 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
22995 promoted_val
= promote_duplicated_reg (SImode
, val
);
22996 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
22997 promoted_val
= promote_duplicated_reg (HImode
, val
);
22999 promoted_val
= val
;
23001 return promoted_val
;
23004 /* Expand string clear operation (bzero). Use i386 string operations when
23005 profitable. See expand_movmem comment for explanation of individual
23006 steps performed. */
23008 ix86_expand_setmem (rtx dst
, rtx count_exp
, rtx val_exp
, rtx align_exp
,
23009 rtx expected_align_exp
, rtx expected_size_exp
)
23014 rtx jump_around_label
= NULL
;
23015 HOST_WIDE_INT align
= 1;
23016 unsigned HOST_WIDE_INT count
= 0;
23017 HOST_WIDE_INT expected_size
= -1;
23018 int size_needed
= 0, epilogue_size_needed
;
23019 int desired_align
= 0, align_bytes
= 0;
23020 enum stringop_alg alg
;
23021 rtx promoted_val
= NULL
;
23022 bool force_loopy_epilogue
= false;
23024 bool need_zero_guard
= false;
23027 if (CONST_INT_P (align_exp
))
23028 align
= INTVAL (align_exp
);
23029 /* i386 can do misaligned access on reasonably increased cost. */
23030 if (CONST_INT_P (expected_align_exp
)
23031 && INTVAL (expected_align_exp
) > align
)
23032 align
= INTVAL (expected_align_exp
);
23033 if (CONST_INT_P (count_exp
))
23034 count
= expected_size
= INTVAL (count_exp
);
23035 if (CONST_INT_P (expected_size_exp
) && count
== 0)
23036 expected_size
= INTVAL (expected_size_exp
);
23038 /* Make sure we don't need to care about overflow later on. */
23039 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
23042 /* Step 0: Decide on preferred algorithm, desired alignment and
23043 size of chunks to be copied by main loop. */
23045 alg
= decide_alg (count
, expected_size
, true, &dynamic_check
, &noalign
);
23046 desired_align
= decide_alignment (align
, alg
, expected_size
);
23048 if (!TARGET_ALIGN_STRINGOPS
|| noalign
)
23049 align
= desired_align
;
23051 if (alg
== libcall
)
23053 gcc_assert (alg
!= no_stringop
);
23055 count_exp
= copy_to_mode_reg (counter_mode (count_exp
), count_exp
);
23056 destreg
= copy_addr_to_reg (XEXP (dst
, 0));
23061 gcc_unreachable ();
23063 need_zero_guard
= true;
23064 size_needed
= GET_MODE_SIZE (word_mode
);
23066 case unrolled_loop
:
23067 need_zero_guard
= true;
23068 size_needed
= GET_MODE_SIZE (word_mode
) * 4;
23070 case rep_prefix_8_byte
:
23073 case rep_prefix_4_byte
:
23076 case rep_prefix_1_byte
:
23080 need_zero_guard
= true;
23084 epilogue_size_needed
= size_needed
;
23086 /* Step 1: Prologue guard. */
23088 /* Alignment code needs count to be in register. */
23089 if (CONST_INT_P (count_exp
) && desired_align
> align
)
23091 if (INTVAL (count_exp
) > desired_align
23092 && INTVAL (count_exp
) > size_needed
)
23095 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
23096 if (align_bytes
<= 0)
23099 align_bytes
= desired_align
- align_bytes
;
23101 if (align_bytes
== 0)
23103 enum machine_mode mode
= SImode
;
23104 if (TARGET_64BIT
&& (count
& ~0xffffffff))
23106 count_exp
= force_reg (mode
, count_exp
);
23109 /* Do the cheap promotion to allow better CSE across the
23110 main loop and epilogue (ie one load of the big constant in the
23111 front of all code. */
23112 if (CONST_INT_P (val_exp
))
23113 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
23114 desired_align
, align
);
23115 /* Ensure that alignment prologue won't copy past end of block. */
23116 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
23118 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
23119 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
23120 Make sure it is power of 2. */
23121 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
23123 /* To improve performance of small blocks, we jump around the VAL
23124 promoting mode. This mean that if the promoted VAL is not constant,
23125 we might not use it in the epilogue and have to use byte
23127 if (epilogue_size_needed
> 2 && !promoted_val
)
23128 force_loopy_epilogue
= true;
23131 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
23133 /* If main algorithm works on QImode, no epilogue is needed.
23134 For small sizes just don't align anything. */
23135 if (size_needed
== 1)
23136 desired_align
= align
;
23143 label
= gen_label_rtx ();
23144 emit_cmp_and_jump_insns (count_exp
,
23145 GEN_INT (epilogue_size_needed
),
23146 LTU
, 0, counter_mode (count_exp
), 1, label
);
23147 if (expected_size
== -1 || expected_size
<= epilogue_size_needed
)
23148 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
23150 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
23153 if (dynamic_check
!= -1)
23155 rtx hot_label
= gen_label_rtx ();
23156 jump_around_label
= gen_label_rtx ();
23157 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
23158 LEU
, 0, counter_mode (count_exp
), 1, hot_label
);
23159 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
23160 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
23161 emit_jump (jump_around_label
);
23162 emit_label (hot_label
);
23165 /* Step 2: Alignment prologue. */
23167 /* Do the expensive promotion once we branched off the small blocks. */
23169 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
23170 desired_align
, align
);
23171 gcc_assert (desired_align
>= 1 && align
>= 1);
23173 if (desired_align
> align
)
23175 if (align_bytes
== 0)
23177 /* Except for the first move in epilogue, we no longer know
23178 constant offset in aliasing info. It don't seems to worth
23179 the pain to maintain it for the first move, so throw away
23181 dst
= change_address (dst
, BLKmode
, destreg
);
23182 expand_setmem_prologue (dst
, destreg
, promoted_val
, count_exp
, align
,
23187 /* If we know how many bytes need to be stored before dst is
23188 sufficiently aligned, maintain aliasing info accurately. */
23189 dst
= expand_constant_setmem_prologue (dst
, destreg
, promoted_val
,
23190 desired_align
, align_bytes
);
23191 count_exp
= plus_constant (counter_mode (count_exp
),
23192 count_exp
, -align_bytes
);
23193 count
-= align_bytes
;
23195 if (need_zero_guard
23196 && (count
< (unsigned HOST_WIDE_INT
) size_needed
23197 || (align_bytes
== 0
23198 && count
< ((unsigned HOST_WIDE_INT
) size_needed
23199 + desired_align
- align
))))
23201 /* It is possible that we copied enough so the main loop will not
23203 gcc_assert (size_needed
> 1);
23204 if (label
== NULL_RTX
)
23205 label
= gen_label_rtx ();
23206 emit_cmp_and_jump_insns (count_exp
,
23207 GEN_INT (size_needed
),
23208 LTU
, 0, counter_mode (count_exp
), 1, label
);
23209 if (expected_size
== -1
23210 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
23211 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
23213 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
23216 if (label
&& size_needed
== 1)
23218 emit_label (label
);
23219 LABEL_NUSES (label
) = 1;
23221 promoted_val
= val_exp
;
23222 epilogue_size_needed
= 1;
23224 else if (label
== NULL_RTX
)
23225 epilogue_size_needed
= size_needed
;
23227 /* Step 3: Main loop. */
23233 gcc_unreachable ();
23235 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
23236 count_exp
, QImode
, 1, expected_size
);
23239 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
23240 count_exp
, word_mode
, 1, expected_size
);
23242 case unrolled_loop
:
23243 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
23244 count_exp
, word_mode
, 4, expected_size
);
23246 case rep_prefix_8_byte
:
23247 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
23250 case rep_prefix_4_byte
:
23251 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
23254 case rep_prefix_1_byte
:
23255 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
23259 /* Adjust properly the offset of src and dest memory for aliasing. */
23260 if (CONST_INT_P (count_exp
))
23261 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
23262 (count
/ size_needed
) * size_needed
);
23264 dst
= change_address (dst
, BLKmode
, destreg
);
23266 /* Step 4: Epilogue to copy the remaining bytes. */
23270 /* When the main loop is done, COUNT_EXP might hold original count,
23271 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
23272 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
23273 bytes. Compensate if needed. */
23275 if (size_needed
< epilogue_size_needed
)
23278 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
23279 GEN_INT (size_needed
- 1), count_exp
, 1,
23281 if (tmp
!= count_exp
)
23282 emit_move_insn (count_exp
, tmp
);
23284 emit_label (label
);
23285 LABEL_NUSES (label
) = 1;
23288 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
23290 if (force_loopy_epilogue
)
23291 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
23292 epilogue_size_needed
);
23294 expand_setmem_epilogue (dst
, destreg
, promoted_val
, count_exp
,
23295 epilogue_size_needed
);
23297 if (jump_around_label
)
23298 emit_label (jump_around_label
);
23302 /* Expand the appropriate insns for doing strlen if not just doing
23305 out = result, initialized with the start address
23306 align_rtx = alignment of the address.
23307 scratch = scratch register, initialized with the startaddress when
23308 not aligned, otherwise undefined
23310 This is just the body. It needs the initializations mentioned above and
23311 some address computing at the end. These things are done in i386.md. */
23314 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
23318 rtx align_2_label
= NULL_RTX
;
23319 rtx align_3_label
= NULL_RTX
;
23320 rtx align_4_label
= gen_label_rtx ();
23321 rtx end_0_label
= gen_label_rtx ();
23323 rtx tmpreg
= gen_reg_rtx (SImode
);
23324 rtx scratch
= gen_reg_rtx (SImode
);
23328 if (CONST_INT_P (align_rtx
))
23329 align
= INTVAL (align_rtx
);
23331 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
23333 /* Is there a known alignment and is it less than 4? */
23336 rtx scratch1
= gen_reg_rtx (Pmode
);
23337 emit_move_insn (scratch1
, out
);
23338 /* Is there a known alignment and is it not 2? */
23341 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
23342 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
23344 /* Leave just the 3 lower bits. */
23345 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
23346 NULL_RTX
, 0, OPTAB_WIDEN
);
23348 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
23349 Pmode
, 1, align_4_label
);
23350 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
23351 Pmode
, 1, align_2_label
);
23352 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
23353 Pmode
, 1, align_3_label
);
23357 /* Since the alignment is 2, we have to check 2 or 0 bytes;
23358 check if is aligned to 4 - byte. */
23360 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
23361 NULL_RTX
, 0, OPTAB_WIDEN
);
23363 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
23364 Pmode
, 1, align_4_label
);
23367 mem
= change_address (src
, QImode
, out
);
23369 /* Now compare the bytes. */
23371 /* Compare the first n unaligned byte on a byte per byte basis. */
23372 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
23373 QImode
, 1, end_0_label
);
23375 /* Increment the address. */
23376 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
23378 /* Not needed with an alignment of 2 */
23381 emit_label (align_2_label
);
23383 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
23386 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
23388 emit_label (align_3_label
);
23391 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
23394 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
23397 /* Generate loop to check 4 bytes at a time. It is not a good idea to
23398 align this loop. It gives only huge programs, but does not help to
23400 emit_label (align_4_label
);
23402 mem
= change_address (src
, SImode
, out
);
23403 emit_move_insn (scratch
, mem
);
23404 emit_insn (ix86_gen_add3 (out
, out
, GEN_INT (4)));
23406 /* This formula yields a nonzero result iff one of the bytes is zero.
23407 This saves three branches inside loop and many cycles. */
23409 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
23410 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
23411 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
23412 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
23413 gen_int_mode (0x80808080, SImode
)));
23414 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
23419 rtx reg
= gen_reg_rtx (SImode
);
23420 rtx reg2
= gen_reg_rtx (Pmode
);
23421 emit_move_insn (reg
, tmpreg
);
23422 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
23424 /* If zero is not in the first two bytes, move two bytes forward. */
23425 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
23426 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
23427 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
23428 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
23429 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
23432 /* Emit lea manually to avoid clobbering of flags. */
23433 emit_insn (gen_rtx_SET (SImode
, reg2
,
23434 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
23436 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
23437 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
23438 emit_insn (gen_rtx_SET (VOIDmode
, out
,
23439 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
23445 rtx end_2_label
= gen_label_rtx ();
23446 /* Is zero in the first two bytes? */
23448 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
23449 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
23450 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
23451 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
23452 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
23454 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
23455 JUMP_LABEL (tmp
) = end_2_label
;
23457 /* Not in the first two. Move two bytes forward. */
23458 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
23459 emit_insn (ix86_gen_add3 (out
, out
, const2_rtx
));
23461 emit_label (end_2_label
);
23465 /* Avoid branch in fixing the byte. */
23466 tmpreg
= gen_lowpart (QImode
, tmpreg
);
23467 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
23468 tmp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
23469 cmp
= gen_rtx_LTU (VOIDmode
, tmp
, const0_rtx
);
23470 emit_insn (ix86_gen_sub3_carry (out
, out
, GEN_INT (3), tmp
, cmp
));
23472 emit_label (end_0_label
);
23475 /* Expand strlen. */
23478 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
23480 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
23482 /* The generic case of strlen expander is long. Avoid it's
23483 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
23485 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
23486 && !TARGET_INLINE_ALL_STRINGOPS
23487 && !optimize_insn_for_size_p ()
23488 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
23491 addr
= force_reg (Pmode
, XEXP (src
, 0));
23492 scratch1
= gen_reg_rtx (Pmode
);
23494 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
23495 && !optimize_insn_for_size_p ())
23497 /* Well it seems that some optimizer does not combine a call like
23498 foo(strlen(bar), strlen(bar));
23499 when the move and the subtraction is done here. It does calculate
23500 the length just once when these instructions are done inside of
23501 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
23502 often used and I use one fewer register for the lifetime of
23503 output_strlen_unroll() this is better. */
23505 emit_move_insn (out
, addr
);
23507 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
23509 /* strlensi_unroll_1 returns the address of the zero at the end of
23510 the string, like memchr(), so compute the length by subtracting
23511 the start address. */
23512 emit_insn (ix86_gen_sub3 (out
, out
, addr
));
23518 /* Can't use this if the user has appropriated eax, ecx, or edi. */
23519 if (fixed_regs
[AX_REG
] || fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
])
23522 scratch2
= gen_reg_rtx (Pmode
);
23523 scratch3
= gen_reg_rtx (Pmode
);
23524 scratch4
= force_reg (Pmode
, constm1_rtx
);
23526 emit_move_insn (scratch3
, addr
);
23527 eoschar
= force_reg (QImode
, eoschar
);
23529 src
= replace_equiv_address_nv (src
, scratch3
);
23531 /* If .md starts supporting :P, this can be done in .md. */
23532 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
23533 scratch4
), UNSPEC_SCAS
);
23534 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
23535 emit_insn (ix86_gen_one_cmpl2 (scratch2
, scratch1
));
23536 emit_insn (ix86_gen_add3 (out
, scratch2
, constm1_rtx
));
23541 /* For given symbol (function) construct code to compute address of it's PLT
23542 entry in large x86-64 PIC model. */
23544 construct_plt_address (rtx symbol
)
23548 gcc_assert (GET_CODE (symbol
) == SYMBOL_REF
);
23549 gcc_assert (ix86_cmodel
== CM_LARGE_PIC
);
23550 gcc_assert (Pmode
== DImode
);
23552 tmp
= gen_reg_rtx (Pmode
);
23553 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, symbol
), UNSPEC_PLTOFF
);
23555 emit_move_insn (tmp
, gen_rtx_CONST (Pmode
, unspec
));
23556 emit_insn (ix86_gen_add3 (tmp
, tmp
, pic_offset_table_rtx
));
23561 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
23563 rtx pop
, bool sibcall
)
23565 /* We need to represent that SI and DI registers are clobbered
23567 static int clobbered_registers
[] = {
23568 XMM6_REG
, XMM7_REG
, XMM8_REG
,
23569 XMM9_REG
, XMM10_REG
, XMM11_REG
,
23570 XMM12_REG
, XMM13_REG
, XMM14_REG
,
23571 XMM15_REG
, SI_REG
, DI_REG
23573 rtx vec
[ARRAY_SIZE (clobbered_registers
) + 3];
23574 rtx use
= NULL
, call
;
23575 unsigned int vec_len
;
23577 if (pop
== const0_rtx
)
23579 gcc_assert (!TARGET_64BIT
|| !pop
);
23581 if (TARGET_MACHO
&& !TARGET_64BIT
)
23584 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
23585 fnaddr
= machopic_indirect_call_target (fnaddr
);
23590 /* Static functions and indirect calls don't need the pic register. */
23591 if (flag_pic
&& (!TARGET_64BIT
|| ix86_cmodel
== CM_LARGE_PIC
)
23592 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
23593 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
23594 use_reg (&use
, pic_offset_table_rtx
);
23597 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
23599 rtx al
= gen_rtx_REG (QImode
, AX_REG
);
23600 emit_move_insn (al
, callarg2
);
23601 use_reg (&use
, al
);
23604 if (ix86_cmodel
== CM_LARGE_PIC
23606 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
23607 && !local_symbolic_operand (XEXP (fnaddr
, 0), VOIDmode
))
23608 fnaddr
= gen_rtx_MEM (QImode
, construct_plt_address (XEXP (fnaddr
, 0)));
23610 ? !sibcall_insn_operand (XEXP (fnaddr
, 0), word_mode
)
23611 : !call_insn_operand (XEXP (fnaddr
, 0), word_mode
))
23613 fnaddr
= convert_to_mode (word_mode
, XEXP (fnaddr
, 0), 1);
23614 fnaddr
= gen_rtx_MEM (QImode
, copy_to_mode_reg (word_mode
, fnaddr
));
23618 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
23620 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
23621 vec
[vec_len
++] = call
;
23625 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
23626 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
23627 vec
[vec_len
++] = pop
;
23630 if (TARGET_64BIT_MS_ABI
23631 && (!callarg2
|| INTVAL (callarg2
) != -2))
23635 vec
[vec_len
++] = gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
),
23636 UNSPEC_MS_TO_SYSV_CALL
);
23638 for (i
= 0; i
< ARRAY_SIZE (clobbered_registers
); i
++)
23640 = gen_rtx_CLOBBER (VOIDmode
,
23641 gen_rtx_REG (SSE_REGNO_P (clobbered_registers
[i
])
23643 clobbered_registers
[i
]));
23647 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (vec_len
, vec
));
23648 call
= emit_call_insn (call
);
23650 CALL_INSN_FUNCTION_USAGE (call
) = use
;
23655 /* Output the assembly for a call instruction. */
23658 ix86_output_call_insn (rtx insn
, rtx call_op
)
23660 bool direct_p
= constant_call_address_operand (call_op
, VOIDmode
);
23661 bool seh_nop_p
= false;
23664 if (SIBLING_CALL_P (insn
))
23668 /* SEH epilogue detection requires the indirect branch case
23669 to include REX.W. */
23670 else if (TARGET_SEH
)
23671 xasm
= "rex.W jmp %A0";
23675 output_asm_insn (xasm
, &call_op
);
23679 /* SEH unwinding can require an extra nop to be emitted in several
23680 circumstances. Determine if we have one of those. */
23685 for (i
= NEXT_INSN (insn
); i
; i
= NEXT_INSN (i
))
23687 /* If we get to another real insn, we don't need the nop. */
23691 /* If we get to the epilogue note, prevent a catch region from
23692 being adjacent to the standard epilogue sequence. If non-
23693 call-exceptions, we'll have done this during epilogue emission. */
23694 if (NOTE_P (i
) && NOTE_KIND (i
) == NOTE_INSN_EPILOGUE_BEG
23695 && !flag_non_call_exceptions
23696 && !can_throw_internal (insn
))
23703 /* If we didn't find a real insn following the call, prevent the
23704 unwinder from looking into the next function. */
23710 xasm
= "call\t%P0";
23712 xasm
= "call\t%A0";
23714 output_asm_insn (xasm
, &call_op
);
23722 /* Clear stack slot assignments remembered from previous functions.
23723 This is called from INIT_EXPANDERS once before RTL is emitted for each
23726 static struct machine_function
*
23727 ix86_init_machine_status (void)
23729 struct machine_function
*f
;
23731 f
= ggc_alloc_cleared_machine_function ();
23732 f
->use_fast_prologue_epilogue_nregs
= -1;
23733 f
->call_abi
= ix86_abi
;
23738 /* Return a MEM corresponding to a stack slot with mode MODE.
23739 Allocate a new slot if necessary.
23741 The RTL for a function can have several slots available: N is
23742 which slot to use. */
23745 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
23747 struct stack_local_entry
*s
;
23749 gcc_assert (n
< MAX_386_STACK_LOCALS
);
23751 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
23752 if (s
->mode
== mode
&& s
->n
== n
)
23753 return validize_mem (copy_rtx (s
->rtl
));
23755 s
= ggc_alloc_stack_local_entry ();
23758 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
23760 s
->next
= ix86_stack_locals
;
23761 ix86_stack_locals
= s
;
23762 return validize_mem (s
->rtl
);
23766 ix86_instantiate_decls (void)
23768 struct stack_local_entry
*s
;
23770 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
23771 if (s
->rtl
!= NULL_RTX
)
23772 instantiate_decl_rtl (s
->rtl
);
23775 /* Calculate the length of the memory address in the instruction encoding.
23776 Includes addr32 prefix, does not include the one-byte modrm, opcode,
23777 or other prefixes. We never generate addr32 prefix for LEA insn. */
23780 memory_address_length (rtx addr
, bool lea
)
23782 struct ix86_address parts
;
23783 rtx base
, index
, disp
;
23787 if (GET_CODE (addr
) == PRE_DEC
23788 || GET_CODE (addr
) == POST_INC
23789 || GET_CODE (addr
) == PRE_MODIFY
23790 || GET_CODE (addr
) == POST_MODIFY
)
23793 ok
= ix86_decompose_address (addr
, &parts
);
23796 len
= (parts
.seg
== SEG_DEFAULT
) ? 0 : 1;
23798 /* If this is not LEA instruction, add the length of addr32 prefix. */
23799 if (TARGET_64BIT
&& !lea
23800 && (SImode_address_operand (addr
, VOIDmode
)
23801 || (parts
.base
&& GET_MODE (parts
.base
) == SImode
)
23802 || (parts
.index
&& GET_MODE (parts
.index
) == SImode
)))
23806 index
= parts
.index
;
23809 if (base
&& GET_CODE (base
) == SUBREG
)
23810 base
= SUBREG_REG (base
);
23811 if (index
&& GET_CODE (index
) == SUBREG
)
23812 index
= SUBREG_REG (index
);
23814 gcc_assert (base
== NULL_RTX
|| REG_P (base
));
23815 gcc_assert (index
== NULL_RTX
|| REG_P (index
));
23818 - esp as the base always wants an index,
23819 - ebp as the base always wants a displacement,
23820 - r12 as the base always wants an index,
23821 - r13 as the base always wants a displacement. */
23823 /* Register Indirect. */
23824 if (base
&& !index
&& !disp
)
23826 /* esp (for its index) and ebp (for its displacement) need
23827 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
23829 if (base
== arg_pointer_rtx
23830 || base
== frame_pointer_rtx
23831 || REGNO (base
) == SP_REG
23832 || REGNO (base
) == BP_REG
23833 || REGNO (base
) == R12_REG
23834 || REGNO (base
) == R13_REG
)
23838 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
23839 is not disp32, but disp32(%rip), so for disp32
23840 SIB byte is needed, unless print_operand_address
23841 optimizes it into disp32(%rip) or (%rip) is implied
23843 else if (disp
&& !base
&& !index
)
23850 if (GET_CODE (disp
) == CONST
)
23851 symbol
= XEXP (disp
, 0);
23852 if (GET_CODE (symbol
) == PLUS
23853 && CONST_INT_P (XEXP (symbol
, 1)))
23854 symbol
= XEXP (symbol
, 0);
23856 if (GET_CODE (symbol
) != LABEL_REF
23857 && (GET_CODE (symbol
) != SYMBOL_REF
23858 || SYMBOL_REF_TLS_MODEL (symbol
) != 0)
23859 && (GET_CODE (symbol
) != UNSPEC
23860 || (XINT (symbol
, 1) != UNSPEC_GOTPCREL
23861 && XINT (symbol
, 1) != UNSPEC_PCREL
23862 && XINT (symbol
, 1) != UNSPEC_GOTNTPOFF
)))
23868 /* Find the length of the displacement constant. */
23871 if (base
&& satisfies_constraint_K (disp
))
23876 /* ebp always wants a displacement. Similarly r13. */
23877 else if (base
&& (REGNO (base
) == BP_REG
|| REGNO (base
) == R13_REG
))
23880 /* An index requires the two-byte modrm form.... */
23882 /* ...like esp (or r12), which always wants an index. */
23883 || base
== arg_pointer_rtx
23884 || base
== frame_pointer_rtx
23885 || (base
&& (REGNO (base
) == SP_REG
|| REGNO (base
) == R12_REG
)))
23892 /* Compute default value for "length_immediate" attribute. When SHORTFORM
23893 is set, expect that insn have 8bit immediate alternative. */
23895 ix86_attr_length_immediate_default (rtx insn
, bool shortform
)
23899 extract_insn_cached (insn
);
23900 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23901 if (CONSTANT_P (recog_data
.operand
[i
]))
23903 enum attr_mode mode
= get_attr_mode (insn
);
23906 if (shortform
&& CONST_INT_P (recog_data
.operand
[i
]))
23908 HOST_WIDE_INT ival
= INTVAL (recog_data
.operand
[i
]);
23915 ival
= trunc_int_for_mode (ival
, HImode
);
23918 ival
= trunc_int_for_mode (ival
, SImode
);
23923 if (IN_RANGE (ival
, -128, 127))
23940 /* Immediates for DImode instructions are encoded
23941 as 32bit sign extended values. */
23946 fatal_insn ("unknown insn mode", insn
);
23952 /* Compute default value for "length_address" attribute. */
23954 ix86_attr_length_address_default (rtx insn
)
23958 if (get_attr_type (insn
) == TYPE_LEA
)
23960 rtx set
= PATTERN (insn
), addr
;
23962 if (GET_CODE (set
) == PARALLEL
)
23963 set
= XVECEXP (set
, 0, 0);
23965 gcc_assert (GET_CODE (set
) == SET
);
23967 addr
= SET_SRC (set
);
23969 return memory_address_length (addr
, true);
23972 extract_insn_cached (insn
);
23973 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23974 if (MEM_P (recog_data
.operand
[i
]))
23976 constrain_operands_cached (reload_completed
);
23977 if (which_alternative
!= -1)
23979 const char *constraints
= recog_data
.constraints
[i
];
23980 int alt
= which_alternative
;
23982 while (*constraints
== '=' || *constraints
== '+')
23985 while (*constraints
++ != ',')
23987 /* Skip ignored operands. */
23988 if (*constraints
== 'X')
23991 return memory_address_length (XEXP (recog_data
.operand
[i
], 0), false);
23996 /* Compute default value for "length_vex" attribute. It includes
23997 2 or 3 byte VEX prefix and 1 opcode byte. */
24000 ix86_attr_length_vex_default (rtx insn
, bool has_0f_opcode
, bool has_vex_w
)
24004 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
24005 byte VEX prefix. */
24006 if (!has_0f_opcode
|| has_vex_w
)
24009 /* We can always use 2 byte VEX prefix in 32bit. */
24013 extract_insn_cached (insn
);
24015 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
24016 if (REG_P (recog_data
.operand
[i
]))
24018 /* REX.W bit uses 3 byte VEX prefix. */
24019 if (GET_MODE (recog_data
.operand
[i
]) == DImode
24020 && GENERAL_REG_P (recog_data
.operand
[i
]))
24025 /* REX.X or REX.B bits use 3 byte VEX prefix. */
24026 if (MEM_P (recog_data
.operand
[i
])
24027 && x86_extended_reg_mentioned_p (recog_data
.operand
[i
]))
24034 /* Return the maximum number of instructions a cpu can issue. */
24037 ix86_issue_rate (void)
24041 case PROCESSOR_PENTIUM
:
24042 case PROCESSOR_ATOM
:
24044 case PROCESSOR_BTVER2
:
24047 case PROCESSOR_PENTIUMPRO
:
24048 case PROCESSOR_PENTIUM4
:
24049 case PROCESSOR_CORE2
:
24050 case PROCESSOR_COREI7
:
24051 case PROCESSOR_ATHLON
:
24053 case PROCESSOR_AMDFAM10
:
24054 case PROCESSOR_NOCONA
:
24055 case PROCESSOR_GENERIC32
:
24056 case PROCESSOR_GENERIC64
:
24057 case PROCESSOR_BDVER1
:
24058 case PROCESSOR_BDVER2
:
24059 case PROCESSOR_BDVER3
:
24060 case PROCESSOR_BTVER1
:
24068 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
24069 by DEP_INSN and nothing set by DEP_INSN. */
24072 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
24076 /* Simplify the test for uninteresting insns. */
24077 if (insn_type
!= TYPE_SETCC
24078 && insn_type
!= TYPE_ICMOV
24079 && insn_type
!= TYPE_FCMOV
24080 && insn_type
!= TYPE_IBR
)
24083 if ((set
= single_set (dep_insn
)) != 0)
24085 set
= SET_DEST (set
);
24088 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
24089 && XVECLEN (PATTERN (dep_insn
), 0) == 2
24090 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
24091 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
24093 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
24094 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
24099 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
24102 /* This test is true if the dependent insn reads the flags but
24103 not any other potentially set register. */
24104 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
24107 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
24113 /* Return true iff USE_INSN has a memory address with operands set by
24117 ix86_agi_dependent (rtx set_insn
, rtx use_insn
)
24120 extract_insn_cached (use_insn
);
24121 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
24122 if (MEM_P (recog_data
.operand
[i
]))
24124 rtx addr
= XEXP (recog_data
.operand
[i
], 0);
24125 return modified_in_p (addr
, set_insn
) != 0;
24131 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
24133 enum attr_type insn_type
, dep_insn_type
;
24134 enum attr_memory memory
;
24136 int dep_insn_code_number
;
24138 /* Anti and output dependencies have zero cost on all CPUs. */
24139 if (REG_NOTE_KIND (link
) != 0)
24142 dep_insn_code_number
= recog_memoized (dep_insn
);
24144 /* If we can't recognize the insns, we can't really do anything. */
24145 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
24148 insn_type
= get_attr_type (insn
);
24149 dep_insn_type
= get_attr_type (dep_insn
);
24153 case PROCESSOR_PENTIUM
:
24154 /* Address Generation Interlock adds a cycle of latency. */
24155 if (insn_type
== TYPE_LEA
)
24157 rtx addr
= PATTERN (insn
);
24159 if (GET_CODE (addr
) == PARALLEL
)
24160 addr
= XVECEXP (addr
, 0, 0);
24162 gcc_assert (GET_CODE (addr
) == SET
);
24164 addr
= SET_SRC (addr
);
24165 if (modified_in_p (addr
, dep_insn
))
24168 else if (ix86_agi_dependent (dep_insn
, insn
))
24171 /* ??? Compares pair with jump/setcc. */
24172 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
24175 /* Floating point stores require value to be ready one cycle earlier. */
24176 if (insn_type
== TYPE_FMOV
24177 && get_attr_memory (insn
) == MEMORY_STORE
24178 && !ix86_agi_dependent (dep_insn
, insn
))
24182 case PROCESSOR_PENTIUMPRO
:
24183 memory
= get_attr_memory (insn
);
24185 /* INT->FP conversion is expensive. */
24186 if (get_attr_fp_int_src (dep_insn
))
24189 /* There is one cycle extra latency between an FP op and a store. */
24190 if (insn_type
== TYPE_FMOV
24191 && (set
= single_set (dep_insn
)) != NULL_RTX
24192 && (set2
= single_set (insn
)) != NULL_RTX
24193 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
24194 && MEM_P (SET_DEST (set2
)))
24197 /* Show ability of reorder buffer to hide latency of load by executing
24198 in parallel with previous instruction in case
24199 previous instruction is not needed to compute the address. */
24200 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
24201 && !ix86_agi_dependent (dep_insn
, insn
))
24203 /* Claim moves to take one cycle, as core can issue one load
24204 at time and the next load can start cycle later. */
24205 if (dep_insn_type
== TYPE_IMOV
24206 || dep_insn_type
== TYPE_FMOV
)
24214 memory
= get_attr_memory (insn
);
24216 /* The esp dependency is resolved before the instruction is really
24218 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
24219 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
24222 /* INT->FP conversion is expensive. */
24223 if (get_attr_fp_int_src (dep_insn
))
24226 /* Show ability of reorder buffer to hide latency of load by executing
24227 in parallel with previous instruction in case
24228 previous instruction is not needed to compute the address. */
24229 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
24230 && !ix86_agi_dependent (dep_insn
, insn
))
24232 /* Claim moves to take one cycle, as core can issue one load
24233 at time and the next load can start cycle later. */
24234 if (dep_insn_type
== TYPE_IMOV
24235 || dep_insn_type
== TYPE_FMOV
)
24244 case PROCESSOR_ATHLON
:
24246 case PROCESSOR_AMDFAM10
:
24247 case PROCESSOR_BDVER1
:
24248 case PROCESSOR_BDVER2
:
24249 case PROCESSOR_BDVER3
:
24250 case PROCESSOR_BTVER1
:
24251 case PROCESSOR_BTVER2
:
24252 case PROCESSOR_ATOM
:
24253 case PROCESSOR_GENERIC32
:
24254 case PROCESSOR_GENERIC64
:
24255 memory
= get_attr_memory (insn
);
24257 /* Show ability of reorder buffer to hide latency of load by executing
24258 in parallel with previous instruction in case
24259 previous instruction is not needed to compute the address. */
24260 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
24261 && !ix86_agi_dependent (dep_insn
, insn
))
24263 enum attr_unit unit
= get_attr_unit (insn
);
24266 /* Because of the difference between the length of integer and
24267 floating unit pipeline preparation stages, the memory operands
24268 for floating point are cheaper.
24270 ??? For Athlon it the difference is most probably 2. */
24271 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
24274 loadcost
= TARGET_ATHLON
? 2 : 0;
24276 if (cost
>= loadcost
)
24289 /* How many alternative schedules to try. This should be as wide as the
24290 scheduling freedom in the DFA, but no wider. Making this value too
24291 large results extra work for the scheduler. */
24294 ia32_multipass_dfa_lookahead (void)
24298 case PROCESSOR_PENTIUM
:
24301 case PROCESSOR_PENTIUMPRO
:
24305 case PROCESSOR_CORE2
:
24306 case PROCESSOR_COREI7
:
24307 case PROCESSOR_ATOM
:
24308 /* Generally, we want haifa-sched:max_issue() to look ahead as far
24309 as many instructions can be executed on a cycle, i.e.,
24310 issue_rate. I wonder why tuning for many CPUs does not do this. */
24311 if (reload_completed
)
24312 return ix86_issue_rate ();
24313 /* Don't use lookahead for pre-reload schedule to save compile time. */
24321 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
24322 execution. It is applied if
24323 (1) IMUL instruction is on the top of list;
24324 (2) There exists the only producer of independent IMUL instruction in
24326 (3) Put found producer on the top of ready list.
24327 Returns issue rate. */
24330 ix86_sched_reorder(FILE *dump
, int sched_verbose
, rtx
*ready
, int *pn_ready
,
24331 int clock_var ATTRIBUTE_UNUSED
)
24333 static int issue_rate
= -1;
24334 int n_ready
= *pn_ready
;
24335 rtx insn
, insn1
, insn2
;
24337 sd_iterator_def sd_it
;
24341 /* Set up issue rate. */
24342 issue_rate
= ix86_issue_rate();
24344 /* Do reodering for Atom only. */
24345 if (ix86_tune
!= PROCESSOR_ATOM
)
24347 /* Do not perform ready list reodering for pre-reload schedule pass. */
24348 if (!reload_completed
)
24350 /* Nothing to do if ready list contains only 1 instruction. */
24354 /* Check that IMUL instruction is on the top of ready list. */
24355 insn
= ready
[n_ready
- 1];
24356 if (!NONDEBUG_INSN_P (insn
))
24358 insn
= PATTERN (insn
);
24359 if (GET_CODE (insn
) == PARALLEL
)
24360 insn
= XVECEXP (insn
, 0, 0);
24361 if (GET_CODE (insn
) != SET
)
24363 if (!(GET_CODE (SET_SRC (insn
)) == MULT
24364 && GET_MODE (SET_SRC (insn
)) == SImode
))
24367 /* Search for producer of independent IMUL instruction. */
24368 for (i
= n_ready
- 2; i
>= 0; i
--)
24371 if (!NONDEBUG_INSN_P (insn
))
24373 /* Skip IMUL instruction. */
24374 insn2
= PATTERN (insn
);
24375 if (GET_CODE (insn2
) == PARALLEL
)
24376 insn2
= XVECEXP (insn2
, 0, 0);
24377 if (GET_CODE (insn2
) == SET
24378 && GET_CODE (SET_SRC (insn2
)) == MULT
24379 && GET_MODE (SET_SRC (insn2
)) == SImode
)
24382 FOR_EACH_DEP (insn
, SD_LIST_FORW
, sd_it
, dep
)
24385 con
= DEP_CON (dep
);
24386 if (!NONDEBUG_INSN_P (con
))
24388 insn1
= PATTERN (con
);
24389 if (GET_CODE (insn1
) == PARALLEL
)
24390 insn1
= XVECEXP (insn1
, 0, 0);
24392 if (GET_CODE (insn1
) == SET
24393 && GET_CODE (SET_SRC (insn1
)) == MULT
24394 && GET_MODE (SET_SRC (insn1
)) == SImode
)
24396 sd_iterator_def sd_it1
;
24398 /* Check if there is no other dependee for IMUL. */
24400 FOR_EACH_DEP (con
, SD_LIST_BACK
, sd_it1
, dep1
)
24403 pro
= DEP_PRO (dep1
);
24404 if (!NONDEBUG_INSN_P (pro
))
24417 return issue_rate
; /* Didn't find IMUL producer. */
24419 if (sched_verbose
> 1)
24420 fprintf(dump
, ";;\tatom sched_reorder: swap %d and %d insns\n",
24421 INSN_UID (ready
[index
]), INSN_UID (ready
[n_ready
- 1]));
24423 /* Put IMUL producer (ready[index]) at the top of ready list. */
24424 insn1
= ready
[index
];
24425 for (i
= index
; i
< n_ready
- 1; i
++)
24426 ready
[i
] = ready
[i
+ 1];
24427 ready
[n_ready
- 1] = insn1
;
24433 ix86_class_likely_spilled_p (reg_class_t
);
24435 /* Returns true if lhs of insn is HW function argument register and set up
24436 is_spilled to true if it is likely spilled HW register. */
24438 insn_is_function_arg (rtx insn
, bool* is_spilled
)
24442 if (!NONDEBUG_INSN_P (insn
))
24444 /* Call instructions are not movable, ignore it. */
24447 insn
= PATTERN (insn
);
24448 if (GET_CODE (insn
) == PARALLEL
)
24449 insn
= XVECEXP (insn
, 0, 0);
24450 if (GET_CODE (insn
) != SET
)
24452 dst
= SET_DEST (insn
);
24453 if (REG_P (dst
) && HARD_REGISTER_P (dst
)
24454 && ix86_function_arg_regno_p (REGNO (dst
)))
24456 /* Is it likely spilled HW register? */
24457 if (!TEST_HARD_REG_BIT (fixed_reg_set
, REGNO (dst
))
24458 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst
))))
24459 *is_spilled
= true;
24465 /* Add output dependencies for chain of function adjacent arguments if only
24466 there is a move to likely spilled HW register. Return first argument
24467 if at least one dependence was added or NULL otherwise. */
24469 add_parameter_dependencies (rtx call
, rtx head
)
24473 rtx first_arg
= NULL
;
24474 bool is_spilled
= false;
24476 head
= PREV_INSN (head
);
24478 /* Find nearest to call argument passing instruction. */
24481 last
= PREV_INSN (last
);
24484 if (!NONDEBUG_INSN_P (last
))
24486 if (insn_is_function_arg (last
, &is_spilled
))
24494 insn
= PREV_INSN (last
);
24495 if (!INSN_P (insn
))
24499 if (!NONDEBUG_INSN_P (insn
))
24504 if (insn_is_function_arg (insn
, &is_spilled
))
24506 /* Add output depdendence between two function arguments if chain
24507 of output arguments contains likely spilled HW registers. */
24509 add_dependence (last
, insn
, REG_DEP_OUTPUT
);
24510 first_arg
= last
= insn
;
24520 /* Add output or anti dependency from insn to first_arg to restrict its code
24523 avoid_func_arg_motion (rtx first_arg
, rtx insn
)
24528 set
= single_set (insn
);
24531 tmp
= SET_DEST (set
);
24534 /* Add output dependency to the first function argument. */
24535 add_dependence (first_arg
, insn
, REG_DEP_OUTPUT
);
24538 /* Add anti dependency. */
24539 add_dependence (first_arg
, insn
, REG_DEP_ANTI
);
24542 /* Avoid cross block motion of function argument through adding dependency
24543 from the first non-jump instruction in bb. */
24545 add_dependee_for_func_arg (rtx arg
, basic_block bb
)
24547 rtx insn
= BB_END (bb
);
24551 if (NONDEBUG_INSN_P (insn
) && NONJUMP_INSN_P (insn
))
24553 rtx set
= single_set (insn
);
24556 avoid_func_arg_motion (arg
, insn
);
24560 if (insn
== BB_HEAD (bb
))
24562 insn
= PREV_INSN (insn
);
24566 /* Hook for pre-reload schedule - avoid motion of function arguments
24567 passed in likely spilled HW registers. */
24569 ix86_dependencies_evaluation_hook (rtx head
, rtx tail
)
24572 rtx first_arg
= NULL
;
24573 if (reload_completed
)
24575 while (head
!= tail
&& DEBUG_INSN_P (head
))
24576 head
= NEXT_INSN (head
);
24577 for (insn
= tail
; insn
!= head
; insn
= PREV_INSN (insn
))
24578 if (INSN_P (insn
) && CALL_P (insn
))
24580 first_arg
= add_parameter_dependencies (insn
, head
);
24583 /* Add dependee for first argument to predecessors if only
24584 region contains more than one block. */
24585 basic_block bb
= BLOCK_FOR_INSN (insn
);
24586 int rgn
= CONTAINING_RGN (bb
->index
);
24587 int nr_blks
= RGN_NR_BLOCKS (rgn
);
24588 /* Skip trivial regions and region head blocks that can have
24589 predecessors outside of region. */
24590 if (nr_blks
> 1 && BLOCK_TO_BB (bb
->index
) != 0)
24594 /* Assume that region is SCC, i.e. all immediate predecessors
24595 of non-head block are in the same region. */
24596 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
24598 /* Avoid creating of loop-carried dependencies through
24599 using topological odering in region. */
24600 if (BLOCK_TO_BB (bb
->index
) > BLOCK_TO_BB (e
->src
->index
))
24601 add_dependee_for_func_arg (first_arg
, e
->src
);
24609 else if (first_arg
)
24610 avoid_func_arg_motion (first_arg
, insn
);
24613 /* Hook for pre-reload schedule - set priority of moves from likely spilled
24614 HW registers to maximum, to schedule them at soon as possible. These are
24615 moves from function argument registers at the top of the function entry
24616 and moves from function return value registers after call. */
24618 ix86_adjust_priority (rtx insn
, int priority
)
24622 if (reload_completed
)
24625 if (!NONDEBUG_INSN_P (insn
))
24628 set
= single_set (insn
);
24631 rtx tmp
= SET_SRC (set
);
24633 && HARD_REGISTER_P (tmp
)
24634 && !TEST_HARD_REG_BIT (fixed_reg_set
, REGNO (tmp
))
24635 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp
))))
24636 return current_sched_info
->sched_max_insns_priority
;
24642 /* Model decoder of Core 2/i7.
24643 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
24644 track the instruction fetch block boundaries and make sure that long
24645 (9+ bytes) instructions are assigned to D0. */
24647 /* Maximum length of an insn that can be handled by
24648 a secondary decoder unit. '8' for Core 2/i7. */
24649 static int core2i7_secondary_decoder_max_insn_size
;
24651 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
24652 '16' for Core 2/i7. */
24653 static int core2i7_ifetch_block_size
;
24655 /* Maximum number of instructions decoder can handle per cycle.
24656 '6' for Core 2/i7. */
24657 static int core2i7_ifetch_block_max_insns
;
24659 typedef struct ix86_first_cycle_multipass_data_
*
24660 ix86_first_cycle_multipass_data_t
;
24661 typedef const struct ix86_first_cycle_multipass_data_
*
24662 const_ix86_first_cycle_multipass_data_t
;
24664 /* A variable to store target state across calls to max_issue within
24666 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data
,
24667 *ix86_first_cycle_multipass_data
= &_ix86_first_cycle_multipass_data
;
24669 /* Initialize DATA. */
24671 core2i7_first_cycle_multipass_init (void *_data
)
24673 ix86_first_cycle_multipass_data_t data
24674 = (ix86_first_cycle_multipass_data_t
) _data
;
24676 data
->ifetch_block_len
= 0;
24677 data
->ifetch_block_n_insns
= 0;
24678 data
->ready_try_change
= NULL
;
24679 data
->ready_try_change_size
= 0;
24682 /* Advancing the cycle; reset ifetch block counts. */
24684 core2i7_dfa_post_advance_cycle (void)
24686 ix86_first_cycle_multipass_data_t data
= ix86_first_cycle_multipass_data
;
24688 gcc_assert (data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
24690 data
->ifetch_block_len
= 0;
24691 data
->ifetch_block_n_insns
= 0;
24694 static int min_insn_size (rtx
);
24696 /* Filter out insns from ready_try that the core will not be able to issue
24697 on current cycle due to decoder. */
24699 core2i7_first_cycle_multipass_filter_ready_try
24700 (const_ix86_first_cycle_multipass_data_t data
,
24701 char *ready_try
, int n_ready
, bool first_cycle_insn_p
)
24708 if (ready_try
[n_ready
])
24711 insn
= get_ready_element (n_ready
);
24712 insn_size
= min_insn_size (insn
);
24714 if (/* If this is a too long an insn for a secondary decoder ... */
24715 (!first_cycle_insn_p
24716 && insn_size
> core2i7_secondary_decoder_max_insn_size
)
24717 /* ... or it would not fit into the ifetch block ... */
24718 || data
->ifetch_block_len
+ insn_size
> core2i7_ifetch_block_size
24719 /* ... or the decoder is full already ... */
24720 || data
->ifetch_block_n_insns
+ 1 > core2i7_ifetch_block_max_insns
)
24721 /* ... mask the insn out. */
24723 ready_try
[n_ready
] = 1;
24725 if (data
->ready_try_change
)
24726 bitmap_set_bit (data
->ready_try_change
, n_ready
);
24731 /* Prepare for a new round of multipass lookahead scheduling. */
24733 core2i7_first_cycle_multipass_begin (void *_data
, char *ready_try
, int n_ready
,
24734 bool first_cycle_insn_p
)
24736 ix86_first_cycle_multipass_data_t data
24737 = (ix86_first_cycle_multipass_data_t
) _data
;
24738 const_ix86_first_cycle_multipass_data_t prev_data
24739 = ix86_first_cycle_multipass_data
;
24741 /* Restore the state from the end of the previous round. */
24742 data
->ifetch_block_len
= prev_data
->ifetch_block_len
;
24743 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
;
24745 /* Filter instructions that cannot be issued on current cycle due to
24746 decoder restrictions. */
24747 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
24748 first_cycle_insn_p
);
24751 /* INSN is being issued in current solution. Account for its impact on
24752 the decoder model. */
24754 core2i7_first_cycle_multipass_issue (void *_data
, char *ready_try
, int n_ready
,
24755 rtx insn
, const void *_prev_data
)
24757 ix86_first_cycle_multipass_data_t data
24758 = (ix86_first_cycle_multipass_data_t
) _data
;
24759 const_ix86_first_cycle_multipass_data_t prev_data
24760 = (const_ix86_first_cycle_multipass_data_t
) _prev_data
;
24762 int insn_size
= min_insn_size (insn
);
24764 data
->ifetch_block_len
= prev_data
->ifetch_block_len
+ insn_size
;
24765 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
+ 1;
24766 gcc_assert (data
->ifetch_block_len
<= core2i7_ifetch_block_size
24767 && data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
24769 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
24770 if (!data
->ready_try_change
)
24772 data
->ready_try_change
= sbitmap_alloc (n_ready
);
24773 data
->ready_try_change_size
= n_ready
;
24775 else if (data
->ready_try_change_size
< n_ready
)
24777 data
->ready_try_change
= sbitmap_resize (data
->ready_try_change
,
24779 data
->ready_try_change_size
= n_ready
;
24781 bitmap_clear (data
->ready_try_change
);
24783 /* Filter out insns from ready_try that the core will not be able to issue
24784 on current cycle due to decoder. */
24785 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
24789 /* Revert the effect on ready_try. */
24791 core2i7_first_cycle_multipass_backtrack (const void *_data
,
24793 int n_ready ATTRIBUTE_UNUSED
)
24795 const_ix86_first_cycle_multipass_data_t data
24796 = (const_ix86_first_cycle_multipass_data_t
) _data
;
24797 unsigned int i
= 0;
24798 sbitmap_iterator sbi
;
24800 gcc_assert (bitmap_last_set_bit (data
->ready_try_change
) < n_ready
);
24801 EXECUTE_IF_SET_IN_BITMAP (data
->ready_try_change
, 0, i
, sbi
)
24807 /* Save the result of multipass lookahead scheduling for the next round. */
24809 core2i7_first_cycle_multipass_end (const void *_data
)
24811 const_ix86_first_cycle_multipass_data_t data
24812 = (const_ix86_first_cycle_multipass_data_t
) _data
;
24813 ix86_first_cycle_multipass_data_t next_data
24814 = ix86_first_cycle_multipass_data
;
24818 next_data
->ifetch_block_len
= data
->ifetch_block_len
;
24819 next_data
->ifetch_block_n_insns
= data
->ifetch_block_n_insns
;
24823 /* Deallocate target data. */
24825 core2i7_first_cycle_multipass_fini (void *_data
)
24827 ix86_first_cycle_multipass_data_t data
24828 = (ix86_first_cycle_multipass_data_t
) _data
;
24830 if (data
->ready_try_change
)
24832 sbitmap_free (data
->ready_try_change
);
24833 data
->ready_try_change
= NULL
;
24834 data
->ready_try_change_size
= 0;
24838 /* Prepare for scheduling pass. */
24840 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED
,
24841 int verbose ATTRIBUTE_UNUSED
,
24842 int max_uid ATTRIBUTE_UNUSED
)
24844 /* Install scheduling hooks for current CPU. Some of these hooks are used
24845 in time-critical parts of the scheduler, so we only set them up when
24846 they are actually used. */
24849 case PROCESSOR_CORE2
:
24850 case PROCESSOR_COREI7
:
24851 /* Do not perform multipass scheduling for pre-reload schedule
24852 to save compile time. */
24853 if (reload_completed
)
24855 targetm
.sched
.dfa_post_advance_cycle
24856 = core2i7_dfa_post_advance_cycle
;
24857 targetm
.sched
.first_cycle_multipass_init
24858 = core2i7_first_cycle_multipass_init
;
24859 targetm
.sched
.first_cycle_multipass_begin
24860 = core2i7_first_cycle_multipass_begin
;
24861 targetm
.sched
.first_cycle_multipass_issue
24862 = core2i7_first_cycle_multipass_issue
;
24863 targetm
.sched
.first_cycle_multipass_backtrack
24864 = core2i7_first_cycle_multipass_backtrack
;
24865 targetm
.sched
.first_cycle_multipass_end
24866 = core2i7_first_cycle_multipass_end
;
24867 targetm
.sched
.first_cycle_multipass_fini
24868 = core2i7_first_cycle_multipass_fini
;
24870 /* Set decoder parameters. */
24871 core2i7_secondary_decoder_max_insn_size
= 8;
24872 core2i7_ifetch_block_size
= 16;
24873 core2i7_ifetch_block_max_insns
= 6;
24876 /* ... Fall through ... */
24878 targetm
.sched
.dfa_post_advance_cycle
= NULL
;
24879 targetm
.sched
.first_cycle_multipass_init
= NULL
;
24880 targetm
.sched
.first_cycle_multipass_begin
= NULL
;
24881 targetm
.sched
.first_cycle_multipass_issue
= NULL
;
24882 targetm
.sched
.first_cycle_multipass_backtrack
= NULL
;
24883 targetm
.sched
.first_cycle_multipass_end
= NULL
;
24884 targetm
.sched
.first_cycle_multipass_fini
= NULL
;
24890 /* Compute the alignment given to a constant that is being placed in memory.
24891 EXP is the constant and ALIGN is the alignment that the object would
24893 The value of this function is used instead of that alignment to align
24897 ix86_constant_alignment (tree exp
, int align
)
24899 if (TREE_CODE (exp
) == REAL_CST
|| TREE_CODE (exp
) == VECTOR_CST
24900 || TREE_CODE (exp
) == INTEGER_CST
)
24902 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
24904 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
24907 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
24908 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
24909 return BITS_PER_WORD
;
24914 /* Compute the alignment for a static variable.
24915 TYPE is the data type, and ALIGN is the alignment that
24916 the object would ordinarily have. The value of this function is used
24917 instead of that alignment to align the object. */
24920 ix86_data_alignment (tree type
, int align
)
24922 int max_align
= optimize_size
? BITS_PER_WORD
: MIN (256, MAX_OFILE_ALIGNMENT
);
24924 if (AGGREGATE_TYPE_P (type
)
24925 && TYPE_SIZE (type
)
24926 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
24927 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
24928 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
24929 && align
< max_align
)
24932 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
24933 to 16byte boundary. */
24936 if (AGGREGATE_TYPE_P (type
)
24937 && TYPE_SIZE (type
)
24938 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
24939 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
24940 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
24944 if (TREE_CODE (type
) == ARRAY_TYPE
)
24946 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
24948 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
24951 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
24954 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
24956 if ((TYPE_MODE (type
) == XCmode
24957 || TYPE_MODE (type
) == TCmode
) && align
< 128)
24960 else if ((TREE_CODE (type
) == RECORD_TYPE
24961 || TREE_CODE (type
) == UNION_TYPE
24962 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
24963 && TYPE_FIELDS (type
))
24965 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
24967 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
24970 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
24971 || TREE_CODE (type
) == INTEGER_TYPE
)
24973 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
24975 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
24982 /* Compute the alignment for a local variable or a stack slot. EXP is
24983 the data type or decl itself, MODE is the widest mode available and
24984 ALIGN is the alignment that the object would ordinarily have. The
24985 value of this macro is used instead of that alignment to align the
24989 ix86_local_alignment (tree exp
, enum machine_mode mode
,
24990 unsigned int align
)
24994 if (exp
&& DECL_P (exp
))
24996 type
= TREE_TYPE (exp
);
25005 /* Don't do dynamic stack realignment for long long objects with
25006 -mpreferred-stack-boundary=2. */
25009 && ix86_preferred_stack_boundary
< 64
25010 && (mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
25011 && (!type
|| !TYPE_USER_ALIGN (type
))
25012 && (!decl
|| !DECL_USER_ALIGN (decl
)))
25015 /* If TYPE is NULL, we are allocating a stack slot for caller-save
25016 register in MODE. We will return the largest alignment of XF
25020 if (mode
== XFmode
&& align
< GET_MODE_ALIGNMENT (DFmode
))
25021 align
= GET_MODE_ALIGNMENT (DFmode
);
25025 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
25026 to 16byte boundary. Exact wording is:
25028 An array uses the same alignment as its elements, except that a local or
25029 global array variable of length at least 16 bytes or
25030 a C99 variable-length array variable always has alignment of at least 16 bytes.
25032 This was added to allow use of aligned SSE instructions at arrays. This
25033 rule is meant for static storage (where compiler can not do the analysis
25034 by itself). We follow it for automatic variables only when convenient.
25035 We fully control everything in the function compiled and functions from
25036 other unit can not rely on the alignment.
25038 Exclude va_list type. It is the common case of local array where
25039 we can not benefit from the alignment. */
25040 if (TARGET_64BIT
&& optimize_function_for_speed_p (cfun
)
25043 if (AGGREGATE_TYPE_P (type
)
25044 && (va_list_type_node
== NULL_TREE
25045 || (TYPE_MAIN_VARIANT (type
)
25046 != TYPE_MAIN_VARIANT (va_list_type_node
)))
25047 && TYPE_SIZE (type
)
25048 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
25049 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
25050 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
25053 if (TREE_CODE (type
) == ARRAY_TYPE
)
25055 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
25057 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
25060 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
25062 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
25064 if ((TYPE_MODE (type
) == XCmode
25065 || TYPE_MODE (type
) == TCmode
) && align
< 128)
25068 else if ((TREE_CODE (type
) == RECORD_TYPE
25069 || TREE_CODE (type
) == UNION_TYPE
25070 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
25071 && TYPE_FIELDS (type
))
25073 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
25075 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
25078 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
25079 || TREE_CODE (type
) == INTEGER_TYPE
)
25082 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
25084 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
25090 /* Compute the minimum required alignment for dynamic stack realignment
25091 purposes for a local variable, parameter or a stack slot. EXP is
25092 the data type or decl itself, MODE is its mode and ALIGN is the
25093 alignment that the object would ordinarily have. */
25096 ix86_minimum_alignment (tree exp
, enum machine_mode mode
,
25097 unsigned int align
)
25101 if (exp
&& DECL_P (exp
))
25103 type
= TREE_TYPE (exp
);
25112 if (TARGET_64BIT
|| align
!= 64 || ix86_preferred_stack_boundary
>= 64)
25115 /* Don't do dynamic stack realignment for long long objects with
25116 -mpreferred-stack-boundary=2. */
25117 if ((mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
25118 && (!type
|| !TYPE_USER_ALIGN (type
))
25119 && (!decl
|| !DECL_USER_ALIGN (decl
)))
25125 /* Find a location for the static chain incoming to a nested function.
25126 This is a register, unless all free registers are used by arguments. */
25129 ix86_static_chain (const_tree fndecl
, bool incoming_p
)
25133 if (!DECL_STATIC_CHAIN (fndecl
))
25138 /* We always use R10 in 64-bit mode. */
25146 /* By default in 32-bit mode we use ECX to pass the static chain. */
25149 fntype
= TREE_TYPE (fndecl
);
25150 ccvt
= ix86_get_callcvt (fntype
);
25151 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
25153 /* Fastcall functions use ecx/edx for arguments, which leaves
25154 us with EAX for the static chain.
25155 Thiscall functions use ecx for arguments, which also
25156 leaves us with EAX for the static chain. */
25159 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
25161 /* Thiscall functions use ecx for arguments, which leaves
25162 us with EAX and EDX for the static chain.
25163 We are using for abi-compatibility EAX. */
25166 else if (ix86_function_regparm (fntype
, fndecl
) == 3)
25168 /* For regparm 3, we have no free call-clobbered registers in
25169 which to store the static chain. In order to implement this,
25170 we have the trampoline push the static chain to the stack.
25171 However, we can't push a value below the return address when
25172 we call the nested function directly, so we have to use an
25173 alternate entry point. For this we use ESI, and have the
25174 alternate entry point push ESI, so that things appear the
25175 same once we're executing the nested function. */
25178 if (fndecl
== current_function_decl
)
25179 ix86_static_chain_on_stack
= true;
25180 return gen_frame_mem (SImode
,
25181 plus_constant (Pmode
,
25182 arg_pointer_rtx
, -8));
25188 return gen_rtx_REG (Pmode
, regno
);
25191 /* Emit RTL insns to initialize the variable parts of a trampoline.
25192 FNDECL is the decl of the target address; M_TRAMP is a MEM for
25193 the trampoline, and CHAIN_VALUE is an RTX for the static chain
25194 to be passed to the target function. */
25197 ix86_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
25203 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
25209 /* Load the function address to r11. Try to load address using
25210 the shorter movl instead of movabs. We may want to support
25211 movq for kernel mode, but kernel does not use trampolines at
25212 the moment. FNADDR is a 32bit address and may not be in
25213 DImode when ptr_mode == SImode. Always use movl in this
25215 if (ptr_mode
== SImode
25216 || x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
25218 fnaddr
= copy_addr_to_reg (fnaddr
);
25220 mem
= adjust_address (m_tramp
, HImode
, offset
);
25221 emit_move_insn (mem
, gen_int_mode (0xbb41, HImode
));
25223 mem
= adjust_address (m_tramp
, SImode
, offset
+ 2);
25224 emit_move_insn (mem
, gen_lowpart (SImode
, fnaddr
));
25229 mem
= adjust_address (m_tramp
, HImode
, offset
);
25230 emit_move_insn (mem
, gen_int_mode (0xbb49, HImode
));
25232 mem
= adjust_address (m_tramp
, DImode
, offset
+ 2);
25233 emit_move_insn (mem
, fnaddr
);
25237 /* Load static chain using movabs to r10. Use the shorter movl
25238 instead of movabs when ptr_mode == SImode. */
25239 if (ptr_mode
== SImode
)
25250 mem
= adjust_address (m_tramp
, HImode
, offset
);
25251 emit_move_insn (mem
, gen_int_mode (opcode
, HImode
));
25253 mem
= adjust_address (m_tramp
, ptr_mode
, offset
+ 2);
25254 emit_move_insn (mem
, chain_value
);
25257 /* Jump to r11; the last (unused) byte is a nop, only there to
25258 pad the write out to a single 32-bit store. */
25259 mem
= adjust_address (m_tramp
, SImode
, offset
);
25260 emit_move_insn (mem
, gen_int_mode (0x90e3ff49, SImode
));
25267 /* Depending on the static chain location, either load a register
25268 with a constant, or push the constant to the stack. All of the
25269 instructions are the same size. */
25270 chain
= ix86_static_chain (fndecl
, true);
25273 switch (REGNO (chain
))
25276 opcode
= 0xb8; break;
25278 opcode
= 0xb9; break;
25280 gcc_unreachable ();
25286 mem
= adjust_address (m_tramp
, QImode
, offset
);
25287 emit_move_insn (mem
, gen_int_mode (opcode
, QImode
));
25289 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
25290 emit_move_insn (mem
, chain_value
);
25293 mem
= adjust_address (m_tramp
, QImode
, offset
);
25294 emit_move_insn (mem
, gen_int_mode (0xe9, QImode
));
25296 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
25298 /* Compute offset from the end of the jmp to the target function.
25299 In the case in which the trampoline stores the static chain on
25300 the stack, we need to skip the first insn which pushes the
25301 (call-saved) register static chain; this push is 1 byte. */
25303 disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
25304 plus_constant (Pmode
, XEXP (m_tramp
, 0),
25305 offset
- (MEM_P (chain
) ? 1 : 0)),
25306 NULL_RTX
, 1, OPTAB_DIRECT
);
25307 emit_move_insn (mem
, disp
);
25310 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
25312 #ifdef HAVE_ENABLE_EXECUTE_STACK
25313 #ifdef CHECK_EXECUTE_STACK_ENABLED
25314 if (CHECK_EXECUTE_STACK_ENABLED
)
25316 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
25317 LCT_NORMAL
, VOIDmode
, 1, XEXP (m_tramp
, 0), Pmode
);
25321 /* The following file contains several enumerations and data structures
25322 built from the definitions in i386-builtin-types.def. */
25324 #include "i386-builtin-types.inc"
25326 /* Table for the ix86 builtin non-function types. */
25327 static GTY(()) tree ix86_builtin_type_tab
[(int) IX86_BT_LAST_CPTR
+ 1];
25329 /* Retrieve an element from the above table, building some of
25330 the types lazily. */
25333 ix86_get_builtin_type (enum ix86_builtin_type tcode
)
25335 unsigned int index
;
25338 gcc_assert ((unsigned)tcode
< ARRAY_SIZE(ix86_builtin_type_tab
));
25340 type
= ix86_builtin_type_tab
[(int) tcode
];
25344 gcc_assert (tcode
> IX86_BT_LAST_PRIM
);
25345 if (tcode
<= IX86_BT_LAST_VECT
)
25347 enum machine_mode mode
;
25349 index
= tcode
- IX86_BT_LAST_PRIM
- 1;
25350 itype
= ix86_get_builtin_type (ix86_builtin_type_vect_base
[index
]);
25351 mode
= ix86_builtin_type_vect_mode
[index
];
25353 type
= build_vector_type_for_mode (itype
, mode
);
25359 index
= tcode
- IX86_BT_LAST_VECT
- 1;
25360 if (tcode
<= IX86_BT_LAST_PTR
)
25361 quals
= TYPE_UNQUALIFIED
;
25363 quals
= TYPE_QUAL_CONST
;
25365 itype
= ix86_get_builtin_type (ix86_builtin_type_ptr_base
[index
]);
25366 if (quals
!= TYPE_UNQUALIFIED
)
25367 itype
= build_qualified_type (itype
, quals
);
25369 type
= build_pointer_type (itype
);
25372 ix86_builtin_type_tab
[(int) tcode
] = type
;
25376 /* Table for the ix86 builtin function types. */
25377 static GTY(()) tree ix86_builtin_func_type_tab
[(int) IX86_BT_LAST_ALIAS
+ 1];
25379 /* Retrieve an element from the above table, building some of
25380 the types lazily. */
25383 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode
)
25387 gcc_assert ((unsigned)tcode
< ARRAY_SIZE (ix86_builtin_func_type_tab
));
25389 type
= ix86_builtin_func_type_tab
[(int) tcode
];
25393 if (tcode
<= IX86_BT_LAST_FUNC
)
25395 unsigned start
= ix86_builtin_func_start
[(int) tcode
];
25396 unsigned after
= ix86_builtin_func_start
[(int) tcode
+ 1];
25397 tree rtype
, atype
, args
= void_list_node
;
25400 rtype
= ix86_get_builtin_type (ix86_builtin_func_args
[start
]);
25401 for (i
= after
- 1; i
> start
; --i
)
25403 atype
= ix86_get_builtin_type (ix86_builtin_func_args
[i
]);
25404 args
= tree_cons (NULL
, atype
, args
);
25407 type
= build_function_type (rtype
, args
);
25411 unsigned index
= tcode
- IX86_BT_LAST_FUNC
- 1;
25412 enum ix86_builtin_func_type icode
;
25414 icode
= ix86_builtin_func_alias_base
[index
];
25415 type
= ix86_get_builtin_func_type (icode
);
25418 ix86_builtin_func_type_tab
[(int) tcode
] = type
;
25423 /* Codes for all the SSE/MMX builtins. */
25426 IX86_BUILTIN_ADDPS
,
25427 IX86_BUILTIN_ADDSS
,
25428 IX86_BUILTIN_DIVPS
,
25429 IX86_BUILTIN_DIVSS
,
25430 IX86_BUILTIN_MULPS
,
25431 IX86_BUILTIN_MULSS
,
25432 IX86_BUILTIN_SUBPS
,
25433 IX86_BUILTIN_SUBSS
,
25435 IX86_BUILTIN_CMPEQPS
,
25436 IX86_BUILTIN_CMPLTPS
,
25437 IX86_BUILTIN_CMPLEPS
,
25438 IX86_BUILTIN_CMPGTPS
,
25439 IX86_BUILTIN_CMPGEPS
,
25440 IX86_BUILTIN_CMPNEQPS
,
25441 IX86_BUILTIN_CMPNLTPS
,
25442 IX86_BUILTIN_CMPNLEPS
,
25443 IX86_BUILTIN_CMPNGTPS
,
25444 IX86_BUILTIN_CMPNGEPS
,
25445 IX86_BUILTIN_CMPORDPS
,
25446 IX86_BUILTIN_CMPUNORDPS
,
25447 IX86_BUILTIN_CMPEQSS
,
25448 IX86_BUILTIN_CMPLTSS
,
25449 IX86_BUILTIN_CMPLESS
,
25450 IX86_BUILTIN_CMPNEQSS
,
25451 IX86_BUILTIN_CMPNLTSS
,
25452 IX86_BUILTIN_CMPNLESS
,
25453 IX86_BUILTIN_CMPNGTSS
,
25454 IX86_BUILTIN_CMPNGESS
,
25455 IX86_BUILTIN_CMPORDSS
,
25456 IX86_BUILTIN_CMPUNORDSS
,
25458 IX86_BUILTIN_COMIEQSS
,
25459 IX86_BUILTIN_COMILTSS
,
25460 IX86_BUILTIN_COMILESS
,
25461 IX86_BUILTIN_COMIGTSS
,
25462 IX86_BUILTIN_COMIGESS
,
25463 IX86_BUILTIN_COMINEQSS
,
25464 IX86_BUILTIN_UCOMIEQSS
,
25465 IX86_BUILTIN_UCOMILTSS
,
25466 IX86_BUILTIN_UCOMILESS
,
25467 IX86_BUILTIN_UCOMIGTSS
,
25468 IX86_BUILTIN_UCOMIGESS
,
25469 IX86_BUILTIN_UCOMINEQSS
,
25471 IX86_BUILTIN_CVTPI2PS
,
25472 IX86_BUILTIN_CVTPS2PI
,
25473 IX86_BUILTIN_CVTSI2SS
,
25474 IX86_BUILTIN_CVTSI642SS
,
25475 IX86_BUILTIN_CVTSS2SI
,
25476 IX86_BUILTIN_CVTSS2SI64
,
25477 IX86_BUILTIN_CVTTPS2PI
,
25478 IX86_BUILTIN_CVTTSS2SI
,
25479 IX86_BUILTIN_CVTTSS2SI64
,
25481 IX86_BUILTIN_MAXPS
,
25482 IX86_BUILTIN_MAXSS
,
25483 IX86_BUILTIN_MINPS
,
25484 IX86_BUILTIN_MINSS
,
25486 IX86_BUILTIN_LOADUPS
,
25487 IX86_BUILTIN_STOREUPS
,
25488 IX86_BUILTIN_MOVSS
,
25490 IX86_BUILTIN_MOVHLPS
,
25491 IX86_BUILTIN_MOVLHPS
,
25492 IX86_BUILTIN_LOADHPS
,
25493 IX86_BUILTIN_LOADLPS
,
25494 IX86_BUILTIN_STOREHPS
,
25495 IX86_BUILTIN_STORELPS
,
25497 IX86_BUILTIN_MASKMOVQ
,
25498 IX86_BUILTIN_MOVMSKPS
,
25499 IX86_BUILTIN_PMOVMSKB
,
25501 IX86_BUILTIN_MOVNTPS
,
25502 IX86_BUILTIN_MOVNTQ
,
25504 IX86_BUILTIN_LOADDQU
,
25505 IX86_BUILTIN_STOREDQU
,
25507 IX86_BUILTIN_PACKSSWB
,
25508 IX86_BUILTIN_PACKSSDW
,
25509 IX86_BUILTIN_PACKUSWB
,
25511 IX86_BUILTIN_PADDB
,
25512 IX86_BUILTIN_PADDW
,
25513 IX86_BUILTIN_PADDD
,
25514 IX86_BUILTIN_PADDQ
,
25515 IX86_BUILTIN_PADDSB
,
25516 IX86_BUILTIN_PADDSW
,
25517 IX86_BUILTIN_PADDUSB
,
25518 IX86_BUILTIN_PADDUSW
,
25519 IX86_BUILTIN_PSUBB
,
25520 IX86_BUILTIN_PSUBW
,
25521 IX86_BUILTIN_PSUBD
,
25522 IX86_BUILTIN_PSUBQ
,
25523 IX86_BUILTIN_PSUBSB
,
25524 IX86_BUILTIN_PSUBSW
,
25525 IX86_BUILTIN_PSUBUSB
,
25526 IX86_BUILTIN_PSUBUSW
,
25529 IX86_BUILTIN_PANDN
,
25533 IX86_BUILTIN_PAVGB
,
25534 IX86_BUILTIN_PAVGW
,
25536 IX86_BUILTIN_PCMPEQB
,
25537 IX86_BUILTIN_PCMPEQW
,
25538 IX86_BUILTIN_PCMPEQD
,
25539 IX86_BUILTIN_PCMPGTB
,
25540 IX86_BUILTIN_PCMPGTW
,
25541 IX86_BUILTIN_PCMPGTD
,
25543 IX86_BUILTIN_PMADDWD
,
25545 IX86_BUILTIN_PMAXSW
,
25546 IX86_BUILTIN_PMAXUB
,
25547 IX86_BUILTIN_PMINSW
,
25548 IX86_BUILTIN_PMINUB
,
25550 IX86_BUILTIN_PMULHUW
,
25551 IX86_BUILTIN_PMULHW
,
25552 IX86_BUILTIN_PMULLW
,
25554 IX86_BUILTIN_PSADBW
,
25555 IX86_BUILTIN_PSHUFW
,
25557 IX86_BUILTIN_PSLLW
,
25558 IX86_BUILTIN_PSLLD
,
25559 IX86_BUILTIN_PSLLQ
,
25560 IX86_BUILTIN_PSRAW
,
25561 IX86_BUILTIN_PSRAD
,
25562 IX86_BUILTIN_PSRLW
,
25563 IX86_BUILTIN_PSRLD
,
25564 IX86_BUILTIN_PSRLQ
,
25565 IX86_BUILTIN_PSLLWI
,
25566 IX86_BUILTIN_PSLLDI
,
25567 IX86_BUILTIN_PSLLQI
,
25568 IX86_BUILTIN_PSRAWI
,
25569 IX86_BUILTIN_PSRADI
,
25570 IX86_BUILTIN_PSRLWI
,
25571 IX86_BUILTIN_PSRLDI
,
25572 IX86_BUILTIN_PSRLQI
,
25574 IX86_BUILTIN_PUNPCKHBW
,
25575 IX86_BUILTIN_PUNPCKHWD
,
25576 IX86_BUILTIN_PUNPCKHDQ
,
25577 IX86_BUILTIN_PUNPCKLBW
,
25578 IX86_BUILTIN_PUNPCKLWD
,
25579 IX86_BUILTIN_PUNPCKLDQ
,
25581 IX86_BUILTIN_SHUFPS
,
25583 IX86_BUILTIN_RCPPS
,
25584 IX86_BUILTIN_RCPSS
,
25585 IX86_BUILTIN_RSQRTPS
,
25586 IX86_BUILTIN_RSQRTPS_NR
,
25587 IX86_BUILTIN_RSQRTSS
,
25588 IX86_BUILTIN_RSQRTF
,
25589 IX86_BUILTIN_SQRTPS
,
25590 IX86_BUILTIN_SQRTPS_NR
,
25591 IX86_BUILTIN_SQRTSS
,
25593 IX86_BUILTIN_UNPCKHPS
,
25594 IX86_BUILTIN_UNPCKLPS
,
25596 IX86_BUILTIN_ANDPS
,
25597 IX86_BUILTIN_ANDNPS
,
25599 IX86_BUILTIN_XORPS
,
25602 IX86_BUILTIN_LDMXCSR
,
25603 IX86_BUILTIN_STMXCSR
,
25604 IX86_BUILTIN_SFENCE
,
25606 IX86_BUILTIN_FXSAVE
,
25607 IX86_BUILTIN_FXRSTOR
,
25608 IX86_BUILTIN_FXSAVE64
,
25609 IX86_BUILTIN_FXRSTOR64
,
25611 IX86_BUILTIN_XSAVE
,
25612 IX86_BUILTIN_XRSTOR
,
25613 IX86_BUILTIN_XSAVE64
,
25614 IX86_BUILTIN_XRSTOR64
,
25616 IX86_BUILTIN_XSAVEOPT
,
25617 IX86_BUILTIN_XSAVEOPT64
,
25619 /* 3DNow! Original */
25620 IX86_BUILTIN_FEMMS
,
25621 IX86_BUILTIN_PAVGUSB
,
25622 IX86_BUILTIN_PF2ID
,
25623 IX86_BUILTIN_PFACC
,
25624 IX86_BUILTIN_PFADD
,
25625 IX86_BUILTIN_PFCMPEQ
,
25626 IX86_BUILTIN_PFCMPGE
,
25627 IX86_BUILTIN_PFCMPGT
,
25628 IX86_BUILTIN_PFMAX
,
25629 IX86_BUILTIN_PFMIN
,
25630 IX86_BUILTIN_PFMUL
,
25631 IX86_BUILTIN_PFRCP
,
25632 IX86_BUILTIN_PFRCPIT1
,
25633 IX86_BUILTIN_PFRCPIT2
,
25634 IX86_BUILTIN_PFRSQIT1
,
25635 IX86_BUILTIN_PFRSQRT
,
25636 IX86_BUILTIN_PFSUB
,
25637 IX86_BUILTIN_PFSUBR
,
25638 IX86_BUILTIN_PI2FD
,
25639 IX86_BUILTIN_PMULHRW
,
25641 /* 3DNow! Athlon Extensions */
25642 IX86_BUILTIN_PF2IW
,
25643 IX86_BUILTIN_PFNACC
,
25644 IX86_BUILTIN_PFPNACC
,
25645 IX86_BUILTIN_PI2FW
,
25646 IX86_BUILTIN_PSWAPDSI
,
25647 IX86_BUILTIN_PSWAPDSF
,
25650 IX86_BUILTIN_ADDPD
,
25651 IX86_BUILTIN_ADDSD
,
25652 IX86_BUILTIN_DIVPD
,
25653 IX86_BUILTIN_DIVSD
,
25654 IX86_BUILTIN_MULPD
,
25655 IX86_BUILTIN_MULSD
,
25656 IX86_BUILTIN_SUBPD
,
25657 IX86_BUILTIN_SUBSD
,
25659 IX86_BUILTIN_CMPEQPD
,
25660 IX86_BUILTIN_CMPLTPD
,
25661 IX86_BUILTIN_CMPLEPD
,
25662 IX86_BUILTIN_CMPGTPD
,
25663 IX86_BUILTIN_CMPGEPD
,
25664 IX86_BUILTIN_CMPNEQPD
,
25665 IX86_BUILTIN_CMPNLTPD
,
25666 IX86_BUILTIN_CMPNLEPD
,
25667 IX86_BUILTIN_CMPNGTPD
,
25668 IX86_BUILTIN_CMPNGEPD
,
25669 IX86_BUILTIN_CMPORDPD
,
25670 IX86_BUILTIN_CMPUNORDPD
,
25671 IX86_BUILTIN_CMPEQSD
,
25672 IX86_BUILTIN_CMPLTSD
,
25673 IX86_BUILTIN_CMPLESD
,
25674 IX86_BUILTIN_CMPNEQSD
,
25675 IX86_BUILTIN_CMPNLTSD
,
25676 IX86_BUILTIN_CMPNLESD
,
25677 IX86_BUILTIN_CMPORDSD
,
25678 IX86_BUILTIN_CMPUNORDSD
,
25680 IX86_BUILTIN_COMIEQSD
,
25681 IX86_BUILTIN_COMILTSD
,
25682 IX86_BUILTIN_COMILESD
,
25683 IX86_BUILTIN_COMIGTSD
,
25684 IX86_BUILTIN_COMIGESD
,
25685 IX86_BUILTIN_COMINEQSD
,
25686 IX86_BUILTIN_UCOMIEQSD
,
25687 IX86_BUILTIN_UCOMILTSD
,
25688 IX86_BUILTIN_UCOMILESD
,
25689 IX86_BUILTIN_UCOMIGTSD
,
25690 IX86_BUILTIN_UCOMIGESD
,
25691 IX86_BUILTIN_UCOMINEQSD
,
25693 IX86_BUILTIN_MAXPD
,
25694 IX86_BUILTIN_MAXSD
,
25695 IX86_BUILTIN_MINPD
,
25696 IX86_BUILTIN_MINSD
,
25698 IX86_BUILTIN_ANDPD
,
25699 IX86_BUILTIN_ANDNPD
,
25701 IX86_BUILTIN_XORPD
,
25703 IX86_BUILTIN_SQRTPD
,
25704 IX86_BUILTIN_SQRTSD
,
25706 IX86_BUILTIN_UNPCKHPD
,
25707 IX86_BUILTIN_UNPCKLPD
,
25709 IX86_BUILTIN_SHUFPD
,
25711 IX86_BUILTIN_LOADUPD
,
25712 IX86_BUILTIN_STOREUPD
,
25713 IX86_BUILTIN_MOVSD
,
25715 IX86_BUILTIN_LOADHPD
,
25716 IX86_BUILTIN_LOADLPD
,
25718 IX86_BUILTIN_CVTDQ2PD
,
25719 IX86_BUILTIN_CVTDQ2PS
,
25721 IX86_BUILTIN_CVTPD2DQ
,
25722 IX86_BUILTIN_CVTPD2PI
,
25723 IX86_BUILTIN_CVTPD2PS
,
25724 IX86_BUILTIN_CVTTPD2DQ
,
25725 IX86_BUILTIN_CVTTPD2PI
,
25727 IX86_BUILTIN_CVTPI2PD
,
25728 IX86_BUILTIN_CVTSI2SD
,
25729 IX86_BUILTIN_CVTSI642SD
,
25731 IX86_BUILTIN_CVTSD2SI
,
25732 IX86_BUILTIN_CVTSD2SI64
,
25733 IX86_BUILTIN_CVTSD2SS
,
25734 IX86_BUILTIN_CVTSS2SD
,
25735 IX86_BUILTIN_CVTTSD2SI
,
25736 IX86_BUILTIN_CVTTSD2SI64
,
25738 IX86_BUILTIN_CVTPS2DQ
,
25739 IX86_BUILTIN_CVTPS2PD
,
25740 IX86_BUILTIN_CVTTPS2DQ
,
25742 IX86_BUILTIN_MOVNTI
,
25743 IX86_BUILTIN_MOVNTI64
,
25744 IX86_BUILTIN_MOVNTPD
,
25745 IX86_BUILTIN_MOVNTDQ
,
25747 IX86_BUILTIN_MOVQ128
,
25750 IX86_BUILTIN_MASKMOVDQU
,
25751 IX86_BUILTIN_MOVMSKPD
,
25752 IX86_BUILTIN_PMOVMSKB128
,
25754 IX86_BUILTIN_PACKSSWB128
,
25755 IX86_BUILTIN_PACKSSDW128
,
25756 IX86_BUILTIN_PACKUSWB128
,
25758 IX86_BUILTIN_PADDB128
,
25759 IX86_BUILTIN_PADDW128
,
25760 IX86_BUILTIN_PADDD128
,
25761 IX86_BUILTIN_PADDQ128
,
25762 IX86_BUILTIN_PADDSB128
,
25763 IX86_BUILTIN_PADDSW128
,
25764 IX86_BUILTIN_PADDUSB128
,
25765 IX86_BUILTIN_PADDUSW128
,
25766 IX86_BUILTIN_PSUBB128
,
25767 IX86_BUILTIN_PSUBW128
,
25768 IX86_BUILTIN_PSUBD128
,
25769 IX86_BUILTIN_PSUBQ128
,
25770 IX86_BUILTIN_PSUBSB128
,
25771 IX86_BUILTIN_PSUBSW128
,
25772 IX86_BUILTIN_PSUBUSB128
,
25773 IX86_BUILTIN_PSUBUSW128
,
25775 IX86_BUILTIN_PAND128
,
25776 IX86_BUILTIN_PANDN128
,
25777 IX86_BUILTIN_POR128
,
25778 IX86_BUILTIN_PXOR128
,
25780 IX86_BUILTIN_PAVGB128
,
25781 IX86_BUILTIN_PAVGW128
,
25783 IX86_BUILTIN_PCMPEQB128
,
25784 IX86_BUILTIN_PCMPEQW128
,
25785 IX86_BUILTIN_PCMPEQD128
,
25786 IX86_BUILTIN_PCMPGTB128
,
25787 IX86_BUILTIN_PCMPGTW128
,
25788 IX86_BUILTIN_PCMPGTD128
,
25790 IX86_BUILTIN_PMADDWD128
,
25792 IX86_BUILTIN_PMAXSW128
,
25793 IX86_BUILTIN_PMAXUB128
,
25794 IX86_BUILTIN_PMINSW128
,
25795 IX86_BUILTIN_PMINUB128
,
25797 IX86_BUILTIN_PMULUDQ
,
25798 IX86_BUILTIN_PMULUDQ128
,
25799 IX86_BUILTIN_PMULHUW128
,
25800 IX86_BUILTIN_PMULHW128
,
25801 IX86_BUILTIN_PMULLW128
,
25803 IX86_BUILTIN_PSADBW128
,
25804 IX86_BUILTIN_PSHUFHW
,
25805 IX86_BUILTIN_PSHUFLW
,
25806 IX86_BUILTIN_PSHUFD
,
25808 IX86_BUILTIN_PSLLDQI128
,
25809 IX86_BUILTIN_PSLLWI128
,
25810 IX86_BUILTIN_PSLLDI128
,
25811 IX86_BUILTIN_PSLLQI128
,
25812 IX86_BUILTIN_PSRAWI128
,
25813 IX86_BUILTIN_PSRADI128
,
25814 IX86_BUILTIN_PSRLDQI128
,
25815 IX86_BUILTIN_PSRLWI128
,
25816 IX86_BUILTIN_PSRLDI128
,
25817 IX86_BUILTIN_PSRLQI128
,
25819 IX86_BUILTIN_PSLLDQ128
,
25820 IX86_BUILTIN_PSLLW128
,
25821 IX86_BUILTIN_PSLLD128
,
25822 IX86_BUILTIN_PSLLQ128
,
25823 IX86_BUILTIN_PSRAW128
,
25824 IX86_BUILTIN_PSRAD128
,
25825 IX86_BUILTIN_PSRLW128
,
25826 IX86_BUILTIN_PSRLD128
,
25827 IX86_BUILTIN_PSRLQ128
,
25829 IX86_BUILTIN_PUNPCKHBW128
,
25830 IX86_BUILTIN_PUNPCKHWD128
,
25831 IX86_BUILTIN_PUNPCKHDQ128
,
25832 IX86_BUILTIN_PUNPCKHQDQ128
,
25833 IX86_BUILTIN_PUNPCKLBW128
,
25834 IX86_BUILTIN_PUNPCKLWD128
,
25835 IX86_BUILTIN_PUNPCKLDQ128
,
25836 IX86_BUILTIN_PUNPCKLQDQ128
,
25838 IX86_BUILTIN_CLFLUSH
,
25839 IX86_BUILTIN_MFENCE
,
25840 IX86_BUILTIN_LFENCE
,
25841 IX86_BUILTIN_PAUSE
,
25843 IX86_BUILTIN_BSRSI
,
25844 IX86_BUILTIN_BSRDI
,
25845 IX86_BUILTIN_RDPMC
,
25846 IX86_BUILTIN_RDTSC
,
25847 IX86_BUILTIN_RDTSCP
,
25848 IX86_BUILTIN_ROLQI
,
25849 IX86_BUILTIN_ROLHI
,
25850 IX86_BUILTIN_RORQI
,
25851 IX86_BUILTIN_RORHI
,
25854 IX86_BUILTIN_ADDSUBPS
,
25855 IX86_BUILTIN_HADDPS
,
25856 IX86_BUILTIN_HSUBPS
,
25857 IX86_BUILTIN_MOVSHDUP
,
25858 IX86_BUILTIN_MOVSLDUP
,
25859 IX86_BUILTIN_ADDSUBPD
,
25860 IX86_BUILTIN_HADDPD
,
25861 IX86_BUILTIN_HSUBPD
,
25862 IX86_BUILTIN_LDDQU
,
25864 IX86_BUILTIN_MONITOR
,
25865 IX86_BUILTIN_MWAIT
,
25868 IX86_BUILTIN_PHADDW
,
25869 IX86_BUILTIN_PHADDD
,
25870 IX86_BUILTIN_PHADDSW
,
25871 IX86_BUILTIN_PHSUBW
,
25872 IX86_BUILTIN_PHSUBD
,
25873 IX86_BUILTIN_PHSUBSW
,
25874 IX86_BUILTIN_PMADDUBSW
,
25875 IX86_BUILTIN_PMULHRSW
,
25876 IX86_BUILTIN_PSHUFB
,
25877 IX86_BUILTIN_PSIGNB
,
25878 IX86_BUILTIN_PSIGNW
,
25879 IX86_BUILTIN_PSIGND
,
25880 IX86_BUILTIN_PALIGNR
,
25881 IX86_BUILTIN_PABSB
,
25882 IX86_BUILTIN_PABSW
,
25883 IX86_BUILTIN_PABSD
,
25885 IX86_BUILTIN_PHADDW128
,
25886 IX86_BUILTIN_PHADDD128
,
25887 IX86_BUILTIN_PHADDSW128
,
25888 IX86_BUILTIN_PHSUBW128
,
25889 IX86_BUILTIN_PHSUBD128
,
25890 IX86_BUILTIN_PHSUBSW128
,
25891 IX86_BUILTIN_PMADDUBSW128
,
25892 IX86_BUILTIN_PMULHRSW128
,
25893 IX86_BUILTIN_PSHUFB128
,
25894 IX86_BUILTIN_PSIGNB128
,
25895 IX86_BUILTIN_PSIGNW128
,
25896 IX86_BUILTIN_PSIGND128
,
25897 IX86_BUILTIN_PALIGNR128
,
25898 IX86_BUILTIN_PABSB128
,
25899 IX86_BUILTIN_PABSW128
,
25900 IX86_BUILTIN_PABSD128
,
25902 /* AMDFAM10 - SSE4A New Instructions. */
25903 IX86_BUILTIN_MOVNTSD
,
25904 IX86_BUILTIN_MOVNTSS
,
25905 IX86_BUILTIN_EXTRQI
,
25906 IX86_BUILTIN_EXTRQ
,
25907 IX86_BUILTIN_INSERTQI
,
25908 IX86_BUILTIN_INSERTQ
,
25911 IX86_BUILTIN_BLENDPD
,
25912 IX86_BUILTIN_BLENDPS
,
25913 IX86_BUILTIN_BLENDVPD
,
25914 IX86_BUILTIN_BLENDVPS
,
25915 IX86_BUILTIN_PBLENDVB128
,
25916 IX86_BUILTIN_PBLENDW128
,
25921 IX86_BUILTIN_INSERTPS128
,
25923 IX86_BUILTIN_MOVNTDQA
,
25924 IX86_BUILTIN_MPSADBW128
,
25925 IX86_BUILTIN_PACKUSDW128
,
25926 IX86_BUILTIN_PCMPEQQ
,
25927 IX86_BUILTIN_PHMINPOSUW128
,
25929 IX86_BUILTIN_PMAXSB128
,
25930 IX86_BUILTIN_PMAXSD128
,
25931 IX86_BUILTIN_PMAXUD128
,
25932 IX86_BUILTIN_PMAXUW128
,
25934 IX86_BUILTIN_PMINSB128
,
25935 IX86_BUILTIN_PMINSD128
,
25936 IX86_BUILTIN_PMINUD128
,
25937 IX86_BUILTIN_PMINUW128
,
25939 IX86_BUILTIN_PMOVSXBW128
,
25940 IX86_BUILTIN_PMOVSXBD128
,
25941 IX86_BUILTIN_PMOVSXBQ128
,
25942 IX86_BUILTIN_PMOVSXWD128
,
25943 IX86_BUILTIN_PMOVSXWQ128
,
25944 IX86_BUILTIN_PMOVSXDQ128
,
25946 IX86_BUILTIN_PMOVZXBW128
,
25947 IX86_BUILTIN_PMOVZXBD128
,
25948 IX86_BUILTIN_PMOVZXBQ128
,
25949 IX86_BUILTIN_PMOVZXWD128
,
25950 IX86_BUILTIN_PMOVZXWQ128
,
25951 IX86_BUILTIN_PMOVZXDQ128
,
25953 IX86_BUILTIN_PMULDQ128
,
25954 IX86_BUILTIN_PMULLD128
,
25956 IX86_BUILTIN_ROUNDSD
,
25957 IX86_BUILTIN_ROUNDSS
,
25959 IX86_BUILTIN_ROUNDPD
,
25960 IX86_BUILTIN_ROUNDPS
,
25962 IX86_BUILTIN_FLOORPD
,
25963 IX86_BUILTIN_CEILPD
,
25964 IX86_BUILTIN_TRUNCPD
,
25965 IX86_BUILTIN_RINTPD
,
25966 IX86_BUILTIN_ROUNDPD_AZ
,
25968 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
,
25969 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
,
25970 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
,
25972 IX86_BUILTIN_FLOORPS
,
25973 IX86_BUILTIN_CEILPS
,
25974 IX86_BUILTIN_TRUNCPS
,
25975 IX86_BUILTIN_RINTPS
,
25976 IX86_BUILTIN_ROUNDPS_AZ
,
25978 IX86_BUILTIN_FLOORPS_SFIX
,
25979 IX86_BUILTIN_CEILPS_SFIX
,
25980 IX86_BUILTIN_ROUNDPS_AZ_SFIX
,
25982 IX86_BUILTIN_PTESTZ
,
25983 IX86_BUILTIN_PTESTC
,
25984 IX86_BUILTIN_PTESTNZC
,
25986 IX86_BUILTIN_VEC_INIT_V2SI
,
25987 IX86_BUILTIN_VEC_INIT_V4HI
,
25988 IX86_BUILTIN_VEC_INIT_V8QI
,
25989 IX86_BUILTIN_VEC_EXT_V2DF
,
25990 IX86_BUILTIN_VEC_EXT_V2DI
,
25991 IX86_BUILTIN_VEC_EXT_V4SF
,
25992 IX86_BUILTIN_VEC_EXT_V4SI
,
25993 IX86_BUILTIN_VEC_EXT_V8HI
,
25994 IX86_BUILTIN_VEC_EXT_V2SI
,
25995 IX86_BUILTIN_VEC_EXT_V4HI
,
25996 IX86_BUILTIN_VEC_EXT_V16QI
,
25997 IX86_BUILTIN_VEC_SET_V2DI
,
25998 IX86_BUILTIN_VEC_SET_V4SF
,
25999 IX86_BUILTIN_VEC_SET_V4SI
,
26000 IX86_BUILTIN_VEC_SET_V8HI
,
26001 IX86_BUILTIN_VEC_SET_V4HI
,
26002 IX86_BUILTIN_VEC_SET_V16QI
,
26004 IX86_BUILTIN_VEC_PACK_SFIX
,
26005 IX86_BUILTIN_VEC_PACK_SFIX256
,
26008 IX86_BUILTIN_CRC32QI
,
26009 IX86_BUILTIN_CRC32HI
,
26010 IX86_BUILTIN_CRC32SI
,
26011 IX86_BUILTIN_CRC32DI
,
26013 IX86_BUILTIN_PCMPESTRI128
,
26014 IX86_BUILTIN_PCMPESTRM128
,
26015 IX86_BUILTIN_PCMPESTRA128
,
26016 IX86_BUILTIN_PCMPESTRC128
,
26017 IX86_BUILTIN_PCMPESTRO128
,
26018 IX86_BUILTIN_PCMPESTRS128
,
26019 IX86_BUILTIN_PCMPESTRZ128
,
26020 IX86_BUILTIN_PCMPISTRI128
,
26021 IX86_BUILTIN_PCMPISTRM128
,
26022 IX86_BUILTIN_PCMPISTRA128
,
26023 IX86_BUILTIN_PCMPISTRC128
,
26024 IX86_BUILTIN_PCMPISTRO128
,
26025 IX86_BUILTIN_PCMPISTRS128
,
26026 IX86_BUILTIN_PCMPISTRZ128
,
26028 IX86_BUILTIN_PCMPGTQ
,
26030 /* AES instructions */
26031 IX86_BUILTIN_AESENC128
,
26032 IX86_BUILTIN_AESENCLAST128
,
26033 IX86_BUILTIN_AESDEC128
,
26034 IX86_BUILTIN_AESDECLAST128
,
26035 IX86_BUILTIN_AESIMC128
,
26036 IX86_BUILTIN_AESKEYGENASSIST128
,
26038 /* PCLMUL instruction */
26039 IX86_BUILTIN_PCLMULQDQ128
,
26042 IX86_BUILTIN_ADDPD256
,
26043 IX86_BUILTIN_ADDPS256
,
26044 IX86_BUILTIN_ADDSUBPD256
,
26045 IX86_BUILTIN_ADDSUBPS256
,
26046 IX86_BUILTIN_ANDPD256
,
26047 IX86_BUILTIN_ANDPS256
,
26048 IX86_BUILTIN_ANDNPD256
,
26049 IX86_BUILTIN_ANDNPS256
,
26050 IX86_BUILTIN_BLENDPD256
,
26051 IX86_BUILTIN_BLENDPS256
,
26052 IX86_BUILTIN_BLENDVPD256
,
26053 IX86_BUILTIN_BLENDVPS256
,
26054 IX86_BUILTIN_DIVPD256
,
26055 IX86_BUILTIN_DIVPS256
,
26056 IX86_BUILTIN_DPPS256
,
26057 IX86_BUILTIN_HADDPD256
,
26058 IX86_BUILTIN_HADDPS256
,
26059 IX86_BUILTIN_HSUBPD256
,
26060 IX86_BUILTIN_HSUBPS256
,
26061 IX86_BUILTIN_MAXPD256
,
26062 IX86_BUILTIN_MAXPS256
,
26063 IX86_BUILTIN_MINPD256
,
26064 IX86_BUILTIN_MINPS256
,
26065 IX86_BUILTIN_MULPD256
,
26066 IX86_BUILTIN_MULPS256
,
26067 IX86_BUILTIN_ORPD256
,
26068 IX86_BUILTIN_ORPS256
,
26069 IX86_BUILTIN_SHUFPD256
,
26070 IX86_BUILTIN_SHUFPS256
,
26071 IX86_BUILTIN_SUBPD256
,
26072 IX86_BUILTIN_SUBPS256
,
26073 IX86_BUILTIN_XORPD256
,
26074 IX86_BUILTIN_XORPS256
,
26075 IX86_BUILTIN_CMPSD
,
26076 IX86_BUILTIN_CMPSS
,
26077 IX86_BUILTIN_CMPPD
,
26078 IX86_BUILTIN_CMPPS
,
26079 IX86_BUILTIN_CMPPD256
,
26080 IX86_BUILTIN_CMPPS256
,
26081 IX86_BUILTIN_CVTDQ2PD256
,
26082 IX86_BUILTIN_CVTDQ2PS256
,
26083 IX86_BUILTIN_CVTPD2PS256
,
26084 IX86_BUILTIN_CVTPS2DQ256
,
26085 IX86_BUILTIN_CVTPS2PD256
,
26086 IX86_BUILTIN_CVTTPD2DQ256
,
26087 IX86_BUILTIN_CVTPD2DQ256
,
26088 IX86_BUILTIN_CVTTPS2DQ256
,
26089 IX86_BUILTIN_EXTRACTF128PD256
,
26090 IX86_BUILTIN_EXTRACTF128PS256
,
26091 IX86_BUILTIN_EXTRACTF128SI256
,
26092 IX86_BUILTIN_VZEROALL
,
26093 IX86_BUILTIN_VZEROUPPER
,
26094 IX86_BUILTIN_VPERMILVARPD
,
26095 IX86_BUILTIN_VPERMILVARPS
,
26096 IX86_BUILTIN_VPERMILVARPD256
,
26097 IX86_BUILTIN_VPERMILVARPS256
,
26098 IX86_BUILTIN_VPERMILPD
,
26099 IX86_BUILTIN_VPERMILPS
,
26100 IX86_BUILTIN_VPERMILPD256
,
26101 IX86_BUILTIN_VPERMILPS256
,
26102 IX86_BUILTIN_VPERMIL2PD
,
26103 IX86_BUILTIN_VPERMIL2PS
,
26104 IX86_BUILTIN_VPERMIL2PD256
,
26105 IX86_BUILTIN_VPERMIL2PS256
,
26106 IX86_BUILTIN_VPERM2F128PD256
,
26107 IX86_BUILTIN_VPERM2F128PS256
,
26108 IX86_BUILTIN_VPERM2F128SI256
,
26109 IX86_BUILTIN_VBROADCASTSS
,
26110 IX86_BUILTIN_VBROADCASTSD256
,
26111 IX86_BUILTIN_VBROADCASTSS256
,
26112 IX86_BUILTIN_VBROADCASTPD256
,
26113 IX86_BUILTIN_VBROADCASTPS256
,
26114 IX86_BUILTIN_VINSERTF128PD256
,
26115 IX86_BUILTIN_VINSERTF128PS256
,
26116 IX86_BUILTIN_VINSERTF128SI256
,
26117 IX86_BUILTIN_LOADUPD256
,
26118 IX86_BUILTIN_LOADUPS256
,
26119 IX86_BUILTIN_STOREUPD256
,
26120 IX86_BUILTIN_STOREUPS256
,
26121 IX86_BUILTIN_LDDQU256
,
26122 IX86_BUILTIN_MOVNTDQ256
,
26123 IX86_BUILTIN_MOVNTPD256
,
26124 IX86_BUILTIN_MOVNTPS256
,
26125 IX86_BUILTIN_LOADDQU256
,
26126 IX86_BUILTIN_STOREDQU256
,
26127 IX86_BUILTIN_MASKLOADPD
,
26128 IX86_BUILTIN_MASKLOADPS
,
26129 IX86_BUILTIN_MASKSTOREPD
,
26130 IX86_BUILTIN_MASKSTOREPS
,
26131 IX86_BUILTIN_MASKLOADPD256
,
26132 IX86_BUILTIN_MASKLOADPS256
,
26133 IX86_BUILTIN_MASKSTOREPD256
,
26134 IX86_BUILTIN_MASKSTOREPS256
,
26135 IX86_BUILTIN_MOVSHDUP256
,
26136 IX86_BUILTIN_MOVSLDUP256
,
26137 IX86_BUILTIN_MOVDDUP256
,
26139 IX86_BUILTIN_SQRTPD256
,
26140 IX86_BUILTIN_SQRTPS256
,
26141 IX86_BUILTIN_SQRTPS_NR256
,
26142 IX86_BUILTIN_RSQRTPS256
,
26143 IX86_BUILTIN_RSQRTPS_NR256
,
26145 IX86_BUILTIN_RCPPS256
,
26147 IX86_BUILTIN_ROUNDPD256
,
26148 IX86_BUILTIN_ROUNDPS256
,
26150 IX86_BUILTIN_FLOORPD256
,
26151 IX86_BUILTIN_CEILPD256
,
26152 IX86_BUILTIN_TRUNCPD256
,
26153 IX86_BUILTIN_RINTPD256
,
26154 IX86_BUILTIN_ROUNDPD_AZ256
,
26156 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
,
26157 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
,
26158 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
,
26160 IX86_BUILTIN_FLOORPS256
,
26161 IX86_BUILTIN_CEILPS256
,
26162 IX86_BUILTIN_TRUNCPS256
,
26163 IX86_BUILTIN_RINTPS256
,
26164 IX86_BUILTIN_ROUNDPS_AZ256
,
26166 IX86_BUILTIN_FLOORPS_SFIX256
,
26167 IX86_BUILTIN_CEILPS_SFIX256
,
26168 IX86_BUILTIN_ROUNDPS_AZ_SFIX256
,
26170 IX86_BUILTIN_UNPCKHPD256
,
26171 IX86_BUILTIN_UNPCKLPD256
,
26172 IX86_BUILTIN_UNPCKHPS256
,
26173 IX86_BUILTIN_UNPCKLPS256
,
26175 IX86_BUILTIN_SI256_SI
,
26176 IX86_BUILTIN_PS256_PS
,
26177 IX86_BUILTIN_PD256_PD
,
26178 IX86_BUILTIN_SI_SI256
,
26179 IX86_BUILTIN_PS_PS256
,
26180 IX86_BUILTIN_PD_PD256
,
26182 IX86_BUILTIN_VTESTZPD
,
26183 IX86_BUILTIN_VTESTCPD
,
26184 IX86_BUILTIN_VTESTNZCPD
,
26185 IX86_BUILTIN_VTESTZPS
,
26186 IX86_BUILTIN_VTESTCPS
,
26187 IX86_BUILTIN_VTESTNZCPS
,
26188 IX86_BUILTIN_VTESTZPD256
,
26189 IX86_BUILTIN_VTESTCPD256
,
26190 IX86_BUILTIN_VTESTNZCPD256
,
26191 IX86_BUILTIN_VTESTZPS256
,
26192 IX86_BUILTIN_VTESTCPS256
,
26193 IX86_BUILTIN_VTESTNZCPS256
,
26194 IX86_BUILTIN_PTESTZ256
,
26195 IX86_BUILTIN_PTESTC256
,
26196 IX86_BUILTIN_PTESTNZC256
,
26198 IX86_BUILTIN_MOVMSKPD256
,
26199 IX86_BUILTIN_MOVMSKPS256
,
26202 IX86_BUILTIN_MPSADBW256
,
26203 IX86_BUILTIN_PABSB256
,
26204 IX86_BUILTIN_PABSW256
,
26205 IX86_BUILTIN_PABSD256
,
26206 IX86_BUILTIN_PACKSSDW256
,
26207 IX86_BUILTIN_PACKSSWB256
,
26208 IX86_BUILTIN_PACKUSDW256
,
26209 IX86_BUILTIN_PACKUSWB256
,
26210 IX86_BUILTIN_PADDB256
,
26211 IX86_BUILTIN_PADDW256
,
26212 IX86_BUILTIN_PADDD256
,
26213 IX86_BUILTIN_PADDQ256
,
26214 IX86_BUILTIN_PADDSB256
,
26215 IX86_BUILTIN_PADDSW256
,
26216 IX86_BUILTIN_PADDUSB256
,
26217 IX86_BUILTIN_PADDUSW256
,
26218 IX86_BUILTIN_PALIGNR256
,
26219 IX86_BUILTIN_AND256I
,
26220 IX86_BUILTIN_ANDNOT256I
,
26221 IX86_BUILTIN_PAVGB256
,
26222 IX86_BUILTIN_PAVGW256
,
26223 IX86_BUILTIN_PBLENDVB256
,
26224 IX86_BUILTIN_PBLENDVW256
,
26225 IX86_BUILTIN_PCMPEQB256
,
26226 IX86_BUILTIN_PCMPEQW256
,
26227 IX86_BUILTIN_PCMPEQD256
,
26228 IX86_BUILTIN_PCMPEQQ256
,
26229 IX86_BUILTIN_PCMPGTB256
,
26230 IX86_BUILTIN_PCMPGTW256
,
26231 IX86_BUILTIN_PCMPGTD256
,
26232 IX86_BUILTIN_PCMPGTQ256
,
26233 IX86_BUILTIN_PHADDW256
,
26234 IX86_BUILTIN_PHADDD256
,
26235 IX86_BUILTIN_PHADDSW256
,
26236 IX86_BUILTIN_PHSUBW256
,
26237 IX86_BUILTIN_PHSUBD256
,
26238 IX86_BUILTIN_PHSUBSW256
,
26239 IX86_BUILTIN_PMADDUBSW256
,
26240 IX86_BUILTIN_PMADDWD256
,
26241 IX86_BUILTIN_PMAXSB256
,
26242 IX86_BUILTIN_PMAXSW256
,
26243 IX86_BUILTIN_PMAXSD256
,
26244 IX86_BUILTIN_PMAXUB256
,
26245 IX86_BUILTIN_PMAXUW256
,
26246 IX86_BUILTIN_PMAXUD256
,
26247 IX86_BUILTIN_PMINSB256
,
26248 IX86_BUILTIN_PMINSW256
,
26249 IX86_BUILTIN_PMINSD256
,
26250 IX86_BUILTIN_PMINUB256
,
26251 IX86_BUILTIN_PMINUW256
,
26252 IX86_BUILTIN_PMINUD256
,
26253 IX86_BUILTIN_PMOVMSKB256
,
26254 IX86_BUILTIN_PMOVSXBW256
,
26255 IX86_BUILTIN_PMOVSXBD256
,
26256 IX86_BUILTIN_PMOVSXBQ256
,
26257 IX86_BUILTIN_PMOVSXWD256
,
26258 IX86_BUILTIN_PMOVSXWQ256
,
26259 IX86_BUILTIN_PMOVSXDQ256
,
26260 IX86_BUILTIN_PMOVZXBW256
,
26261 IX86_BUILTIN_PMOVZXBD256
,
26262 IX86_BUILTIN_PMOVZXBQ256
,
26263 IX86_BUILTIN_PMOVZXWD256
,
26264 IX86_BUILTIN_PMOVZXWQ256
,
26265 IX86_BUILTIN_PMOVZXDQ256
,
26266 IX86_BUILTIN_PMULDQ256
,
26267 IX86_BUILTIN_PMULHRSW256
,
26268 IX86_BUILTIN_PMULHUW256
,
26269 IX86_BUILTIN_PMULHW256
,
26270 IX86_BUILTIN_PMULLW256
,
26271 IX86_BUILTIN_PMULLD256
,
26272 IX86_BUILTIN_PMULUDQ256
,
26273 IX86_BUILTIN_POR256
,
26274 IX86_BUILTIN_PSADBW256
,
26275 IX86_BUILTIN_PSHUFB256
,
26276 IX86_BUILTIN_PSHUFD256
,
26277 IX86_BUILTIN_PSHUFHW256
,
26278 IX86_BUILTIN_PSHUFLW256
,
26279 IX86_BUILTIN_PSIGNB256
,
26280 IX86_BUILTIN_PSIGNW256
,
26281 IX86_BUILTIN_PSIGND256
,
26282 IX86_BUILTIN_PSLLDQI256
,
26283 IX86_BUILTIN_PSLLWI256
,
26284 IX86_BUILTIN_PSLLW256
,
26285 IX86_BUILTIN_PSLLDI256
,
26286 IX86_BUILTIN_PSLLD256
,
26287 IX86_BUILTIN_PSLLQI256
,
26288 IX86_BUILTIN_PSLLQ256
,
26289 IX86_BUILTIN_PSRAWI256
,
26290 IX86_BUILTIN_PSRAW256
,
26291 IX86_BUILTIN_PSRADI256
,
26292 IX86_BUILTIN_PSRAD256
,
26293 IX86_BUILTIN_PSRLDQI256
,
26294 IX86_BUILTIN_PSRLWI256
,
26295 IX86_BUILTIN_PSRLW256
,
26296 IX86_BUILTIN_PSRLDI256
,
26297 IX86_BUILTIN_PSRLD256
,
26298 IX86_BUILTIN_PSRLQI256
,
26299 IX86_BUILTIN_PSRLQ256
,
26300 IX86_BUILTIN_PSUBB256
,
26301 IX86_BUILTIN_PSUBW256
,
26302 IX86_BUILTIN_PSUBD256
,
26303 IX86_BUILTIN_PSUBQ256
,
26304 IX86_BUILTIN_PSUBSB256
,
26305 IX86_BUILTIN_PSUBSW256
,
26306 IX86_BUILTIN_PSUBUSB256
,
26307 IX86_BUILTIN_PSUBUSW256
,
26308 IX86_BUILTIN_PUNPCKHBW256
,
26309 IX86_BUILTIN_PUNPCKHWD256
,
26310 IX86_BUILTIN_PUNPCKHDQ256
,
26311 IX86_BUILTIN_PUNPCKHQDQ256
,
26312 IX86_BUILTIN_PUNPCKLBW256
,
26313 IX86_BUILTIN_PUNPCKLWD256
,
26314 IX86_BUILTIN_PUNPCKLDQ256
,
26315 IX86_BUILTIN_PUNPCKLQDQ256
,
26316 IX86_BUILTIN_PXOR256
,
26317 IX86_BUILTIN_MOVNTDQA256
,
26318 IX86_BUILTIN_VBROADCASTSS_PS
,
26319 IX86_BUILTIN_VBROADCASTSS_PS256
,
26320 IX86_BUILTIN_VBROADCASTSD_PD256
,
26321 IX86_BUILTIN_VBROADCASTSI256
,
26322 IX86_BUILTIN_PBLENDD256
,
26323 IX86_BUILTIN_PBLENDD128
,
26324 IX86_BUILTIN_PBROADCASTB256
,
26325 IX86_BUILTIN_PBROADCASTW256
,
26326 IX86_BUILTIN_PBROADCASTD256
,
26327 IX86_BUILTIN_PBROADCASTQ256
,
26328 IX86_BUILTIN_PBROADCASTB128
,
26329 IX86_BUILTIN_PBROADCASTW128
,
26330 IX86_BUILTIN_PBROADCASTD128
,
26331 IX86_BUILTIN_PBROADCASTQ128
,
26332 IX86_BUILTIN_VPERMVARSI256
,
26333 IX86_BUILTIN_VPERMDF256
,
26334 IX86_BUILTIN_VPERMVARSF256
,
26335 IX86_BUILTIN_VPERMDI256
,
26336 IX86_BUILTIN_VPERMTI256
,
26337 IX86_BUILTIN_VEXTRACT128I256
,
26338 IX86_BUILTIN_VINSERT128I256
,
26339 IX86_BUILTIN_MASKLOADD
,
26340 IX86_BUILTIN_MASKLOADQ
,
26341 IX86_BUILTIN_MASKLOADD256
,
26342 IX86_BUILTIN_MASKLOADQ256
,
26343 IX86_BUILTIN_MASKSTORED
,
26344 IX86_BUILTIN_MASKSTOREQ
,
26345 IX86_BUILTIN_MASKSTORED256
,
26346 IX86_BUILTIN_MASKSTOREQ256
,
26347 IX86_BUILTIN_PSLLVV4DI
,
26348 IX86_BUILTIN_PSLLVV2DI
,
26349 IX86_BUILTIN_PSLLVV8SI
,
26350 IX86_BUILTIN_PSLLVV4SI
,
26351 IX86_BUILTIN_PSRAVV8SI
,
26352 IX86_BUILTIN_PSRAVV4SI
,
26353 IX86_BUILTIN_PSRLVV4DI
,
26354 IX86_BUILTIN_PSRLVV2DI
,
26355 IX86_BUILTIN_PSRLVV8SI
,
26356 IX86_BUILTIN_PSRLVV4SI
,
26358 IX86_BUILTIN_GATHERSIV2DF
,
26359 IX86_BUILTIN_GATHERSIV4DF
,
26360 IX86_BUILTIN_GATHERDIV2DF
,
26361 IX86_BUILTIN_GATHERDIV4DF
,
26362 IX86_BUILTIN_GATHERSIV4SF
,
26363 IX86_BUILTIN_GATHERSIV8SF
,
26364 IX86_BUILTIN_GATHERDIV4SF
,
26365 IX86_BUILTIN_GATHERDIV8SF
,
26366 IX86_BUILTIN_GATHERSIV2DI
,
26367 IX86_BUILTIN_GATHERSIV4DI
,
26368 IX86_BUILTIN_GATHERDIV2DI
,
26369 IX86_BUILTIN_GATHERDIV4DI
,
26370 IX86_BUILTIN_GATHERSIV4SI
,
26371 IX86_BUILTIN_GATHERSIV8SI
,
26372 IX86_BUILTIN_GATHERDIV4SI
,
26373 IX86_BUILTIN_GATHERDIV8SI
,
26375 /* Alternate 4 element gather for the vectorizer where
26376 all operands are 32-byte wide. */
26377 IX86_BUILTIN_GATHERALTSIV4DF
,
26378 IX86_BUILTIN_GATHERALTDIV8SF
,
26379 IX86_BUILTIN_GATHERALTSIV4DI
,
26380 IX86_BUILTIN_GATHERALTDIV8SI
,
26382 /* TFmode support builtins. */
26384 IX86_BUILTIN_HUGE_VALQ
,
26385 IX86_BUILTIN_FABSQ
,
26386 IX86_BUILTIN_COPYSIGNQ
,
26388 /* Vectorizer support builtins. */
26389 IX86_BUILTIN_CPYSGNPS
,
26390 IX86_BUILTIN_CPYSGNPD
,
26391 IX86_BUILTIN_CPYSGNPS256
,
26392 IX86_BUILTIN_CPYSGNPD256
,
26394 /* FMA4 instructions. */
26395 IX86_BUILTIN_VFMADDSS
,
26396 IX86_BUILTIN_VFMADDSD
,
26397 IX86_BUILTIN_VFMADDPS
,
26398 IX86_BUILTIN_VFMADDPD
,
26399 IX86_BUILTIN_VFMADDPS256
,
26400 IX86_BUILTIN_VFMADDPD256
,
26401 IX86_BUILTIN_VFMADDSUBPS
,
26402 IX86_BUILTIN_VFMADDSUBPD
,
26403 IX86_BUILTIN_VFMADDSUBPS256
,
26404 IX86_BUILTIN_VFMADDSUBPD256
,
26406 /* FMA3 instructions. */
26407 IX86_BUILTIN_VFMADDSS3
,
26408 IX86_BUILTIN_VFMADDSD3
,
26410 /* XOP instructions. */
26411 IX86_BUILTIN_VPCMOV
,
26412 IX86_BUILTIN_VPCMOV_V2DI
,
26413 IX86_BUILTIN_VPCMOV_V4SI
,
26414 IX86_BUILTIN_VPCMOV_V8HI
,
26415 IX86_BUILTIN_VPCMOV_V16QI
,
26416 IX86_BUILTIN_VPCMOV_V4SF
,
26417 IX86_BUILTIN_VPCMOV_V2DF
,
26418 IX86_BUILTIN_VPCMOV256
,
26419 IX86_BUILTIN_VPCMOV_V4DI256
,
26420 IX86_BUILTIN_VPCMOV_V8SI256
,
26421 IX86_BUILTIN_VPCMOV_V16HI256
,
26422 IX86_BUILTIN_VPCMOV_V32QI256
,
26423 IX86_BUILTIN_VPCMOV_V8SF256
,
26424 IX86_BUILTIN_VPCMOV_V4DF256
,
26426 IX86_BUILTIN_VPPERM
,
26428 IX86_BUILTIN_VPMACSSWW
,
26429 IX86_BUILTIN_VPMACSWW
,
26430 IX86_BUILTIN_VPMACSSWD
,
26431 IX86_BUILTIN_VPMACSWD
,
26432 IX86_BUILTIN_VPMACSSDD
,
26433 IX86_BUILTIN_VPMACSDD
,
26434 IX86_BUILTIN_VPMACSSDQL
,
26435 IX86_BUILTIN_VPMACSSDQH
,
26436 IX86_BUILTIN_VPMACSDQL
,
26437 IX86_BUILTIN_VPMACSDQH
,
26438 IX86_BUILTIN_VPMADCSSWD
,
26439 IX86_BUILTIN_VPMADCSWD
,
26441 IX86_BUILTIN_VPHADDBW
,
26442 IX86_BUILTIN_VPHADDBD
,
26443 IX86_BUILTIN_VPHADDBQ
,
26444 IX86_BUILTIN_VPHADDWD
,
26445 IX86_BUILTIN_VPHADDWQ
,
26446 IX86_BUILTIN_VPHADDDQ
,
26447 IX86_BUILTIN_VPHADDUBW
,
26448 IX86_BUILTIN_VPHADDUBD
,
26449 IX86_BUILTIN_VPHADDUBQ
,
26450 IX86_BUILTIN_VPHADDUWD
,
26451 IX86_BUILTIN_VPHADDUWQ
,
26452 IX86_BUILTIN_VPHADDUDQ
,
26453 IX86_BUILTIN_VPHSUBBW
,
26454 IX86_BUILTIN_VPHSUBWD
,
26455 IX86_BUILTIN_VPHSUBDQ
,
26457 IX86_BUILTIN_VPROTB
,
26458 IX86_BUILTIN_VPROTW
,
26459 IX86_BUILTIN_VPROTD
,
26460 IX86_BUILTIN_VPROTQ
,
26461 IX86_BUILTIN_VPROTB_IMM
,
26462 IX86_BUILTIN_VPROTW_IMM
,
26463 IX86_BUILTIN_VPROTD_IMM
,
26464 IX86_BUILTIN_VPROTQ_IMM
,
26466 IX86_BUILTIN_VPSHLB
,
26467 IX86_BUILTIN_VPSHLW
,
26468 IX86_BUILTIN_VPSHLD
,
26469 IX86_BUILTIN_VPSHLQ
,
26470 IX86_BUILTIN_VPSHAB
,
26471 IX86_BUILTIN_VPSHAW
,
26472 IX86_BUILTIN_VPSHAD
,
26473 IX86_BUILTIN_VPSHAQ
,
26475 IX86_BUILTIN_VFRCZSS
,
26476 IX86_BUILTIN_VFRCZSD
,
26477 IX86_BUILTIN_VFRCZPS
,
26478 IX86_BUILTIN_VFRCZPD
,
26479 IX86_BUILTIN_VFRCZPS256
,
26480 IX86_BUILTIN_VFRCZPD256
,
26482 IX86_BUILTIN_VPCOMEQUB
,
26483 IX86_BUILTIN_VPCOMNEUB
,
26484 IX86_BUILTIN_VPCOMLTUB
,
26485 IX86_BUILTIN_VPCOMLEUB
,
26486 IX86_BUILTIN_VPCOMGTUB
,
26487 IX86_BUILTIN_VPCOMGEUB
,
26488 IX86_BUILTIN_VPCOMFALSEUB
,
26489 IX86_BUILTIN_VPCOMTRUEUB
,
26491 IX86_BUILTIN_VPCOMEQUW
,
26492 IX86_BUILTIN_VPCOMNEUW
,
26493 IX86_BUILTIN_VPCOMLTUW
,
26494 IX86_BUILTIN_VPCOMLEUW
,
26495 IX86_BUILTIN_VPCOMGTUW
,
26496 IX86_BUILTIN_VPCOMGEUW
,
26497 IX86_BUILTIN_VPCOMFALSEUW
,
26498 IX86_BUILTIN_VPCOMTRUEUW
,
26500 IX86_BUILTIN_VPCOMEQUD
,
26501 IX86_BUILTIN_VPCOMNEUD
,
26502 IX86_BUILTIN_VPCOMLTUD
,
26503 IX86_BUILTIN_VPCOMLEUD
,
26504 IX86_BUILTIN_VPCOMGTUD
,
26505 IX86_BUILTIN_VPCOMGEUD
,
26506 IX86_BUILTIN_VPCOMFALSEUD
,
26507 IX86_BUILTIN_VPCOMTRUEUD
,
26509 IX86_BUILTIN_VPCOMEQUQ
,
26510 IX86_BUILTIN_VPCOMNEUQ
,
26511 IX86_BUILTIN_VPCOMLTUQ
,
26512 IX86_BUILTIN_VPCOMLEUQ
,
26513 IX86_BUILTIN_VPCOMGTUQ
,
26514 IX86_BUILTIN_VPCOMGEUQ
,
26515 IX86_BUILTIN_VPCOMFALSEUQ
,
26516 IX86_BUILTIN_VPCOMTRUEUQ
,
26518 IX86_BUILTIN_VPCOMEQB
,
26519 IX86_BUILTIN_VPCOMNEB
,
26520 IX86_BUILTIN_VPCOMLTB
,
26521 IX86_BUILTIN_VPCOMLEB
,
26522 IX86_BUILTIN_VPCOMGTB
,
26523 IX86_BUILTIN_VPCOMGEB
,
26524 IX86_BUILTIN_VPCOMFALSEB
,
26525 IX86_BUILTIN_VPCOMTRUEB
,
26527 IX86_BUILTIN_VPCOMEQW
,
26528 IX86_BUILTIN_VPCOMNEW
,
26529 IX86_BUILTIN_VPCOMLTW
,
26530 IX86_BUILTIN_VPCOMLEW
,
26531 IX86_BUILTIN_VPCOMGTW
,
26532 IX86_BUILTIN_VPCOMGEW
,
26533 IX86_BUILTIN_VPCOMFALSEW
,
26534 IX86_BUILTIN_VPCOMTRUEW
,
26536 IX86_BUILTIN_VPCOMEQD
,
26537 IX86_BUILTIN_VPCOMNED
,
26538 IX86_BUILTIN_VPCOMLTD
,
26539 IX86_BUILTIN_VPCOMLED
,
26540 IX86_BUILTIN_VPCOMGTD
,
26541 IX86_BUILTIN_VPCOMGED
,
26542 IX86_BUILTIN_VPCOMFALSED
,
26543 IX86_BUILTIN_VPCOMTRUED
,
26545 IX86_BUILTIN_VPCOMEQQ
,
26546 IX86_BUILTIN_VPCOMNEQ
,
26547 IX86_BUILTIN_VPCOMLTQ
,
26548 IX86_BUILTIN_VPCOMLEQ
,
26549 IX86_BUILTIN_VPCOMGTQ
,
26550 IX86_BUILTIN_VPCOMGEQ
,
26551 IX86_BUILTIN_VPCOMFALSEQ
,
26552 IX86_BUILTIN_VPCOMTRUEQ
,
26554 /* LWP instructions. */
26555 IX86_BUILTIN_LLWPCB
,
26556 IX86_BUILTIN_SLWPCB
,
26557 IX86_BUILTIN_LWPVAL32
,
26558 IX86_BUILTIN_LWPVAL64
,
26559 IX86_BUILTIN_LWPINS32
,
26560 IX86_BUILTIN_LWPINS64
,
26565 IX86_BUILTIN_XBEGIN
,
26567 IX86_BUILTIN_XABORT
,
26568 IX86_BUILTIN_XTEST
,
26570 /* BMI instructions. */
26571 IX86_BUILTIN_BEXTR32
,
26572 IX86_BUILTIN_BEXTR64
,
26575 /* TBM instructions. */
26576 IX86_BUILTIN_BEXTRI32
,
26577 IX86_BUILTIN_BEXTRI64
,
26579 /* BMI2 instructions. */
26580 IX86_BUILTIN_BZHI32
,
26581 IX86_BUILTIN_BZHI64
,
26582 IX86_BUILTIN_PDEP32
,
26583 IX86_BUILTIN_PDEP64
,
26584 IX86_BUILTIN_PEXT32
,
26585 IX86_BUILTIN_PEXT64
,
26587 /* ADX instructions. */
26588 IX86_BUILTIN_ADDCARRYX32
,
26589 IX86_BUILTIN_ADDCARRYX64
,
26591 /* FSGSBASE instructions. */
26592 IX86_BUILTIN_RDFSBASE32
,
26593 IX86_BUILTIN_RDFSBASE64
,
26594 IX86_BUILTIN_RDGSBASE32
,
26595 IX86_BUILTIN_RDGSBASE64
,
26596 IX86_BUILTIN_WRFSBASE32
,
26597 IX86_BUILTIN_WRFSBASE64
,
26598 IX86_BUILTIN_WRGSBASE32
,
26599 IX86_BUILTIN_WRGSBASE64
,
26601 /* RDRND instructions. */
26602 IX86_BUILTIN_RDRAND16_STEP
,
26603 IX86_BUILTIN_RDRAND32_STEP
,
26604 IX86_BUILTIN_RDRAND64_STEP
,
26606 /* RDSEED instructions. */
26607 IX86_BUILTIN_RDSEED16_STEP
,
26608 IX86_BUILTIN_RDSEED32_STEP
,
26609 IX86_BUILTIN_RDSEED64_STEP
,
26611 /* F16C instructions. */
26612 IX86_BUILTIN_CVTPH2PS
,
26613 IX86_BUILTIN_CVTPH2PS256
,
26614 IX86_BUILTIN_CVTPS2PH
,
26615 IX86_BUILTIN_CVTPS2PH256
,
26617 /* CFString built-in for darwin */
26618 IX86_BUILTIN_CFSTRING
,
26620 /* Builtins to get CPU type and supported features. */
26621 IX86_BUILTIN_CPU_INIT
,
26622 IX86_BUILTIN_CPU_IS
,
26623 IX86_BUILTIN_CPU_SUPPORTS
,
26628 /* Table for the ix86 builtin decls. */
26629 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
26631 /* Table of all of the builtin functions that are possible with different ISA's
26632 but are waiting to be built until a function is declared to use that
26634 struct builtin_isa
{
26635 const char *name
; /* function name */
26636 enum ix86_builtin_func_type tcode
; /* type to use in the declaration */
26637 HOST_WIDE_INT isa
; /* isa_flags this builtin is defined for */
26638 bool const_p
; /* true if the declaration is constant */
26639 bool set_and_not_built_p
;
26642 static struct builtin_isa ix86_builtins_isa
[(int) IX86_BUILTIN_MAX
];
26645 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
26646 of which isa_flags to use in the ix86_builtins_isa array. Stores the
26647 function decl in the ix86_builtins array. Returns the function decl or
26648 NULL_TREE, if the builtin was not added.
26650 If the front end has a special hook for builtin functions, delay adding
26651 builtin functions that aren't in the current ISA until the ISA is changed
26652 with function specific optimization. Doing so, can save about 300K for the
26653 default compiler. When the builtin is expanded, check at that time whether
26656 If the front end doesn't have a special hook, record all builtins, even if
26657 it isn't an instruction set in the current ISA in case the user uses
26658 function specific options for a different ISA, so that we don't get scope
26659 errors if a builtin is added in the middle of a function scope. */
26662 def_builtin (HOST_WIDE_INT mask
, const char *name
,
26663 enum ix86_builtin_func_type tcode
,
26664 enum ix86_builtins code
)
26666 tree decl
= NULL_TREE
;
26668 if (!(mask
& OPTION_MASK_ISA_64BIT
) || TARGET_64BIT
)
26670 ix86_builtins_isa
[(int) code
].isa
= mask
;
26672 mask
&= ~OPTION_MASK_ISA_64BIT
;
26674 || (mask
& ix86_isa_flags
) != 0
26675 || (lang_hooks
.builtin_function
26676 == lang_hooks
.builtin_function_ext_scope
))
26679 tree type
= ix86_get_builtin_func_type (tcode
);
26680 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
26682 ix86_builtins
[(int) code
] = decl
;
26683 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= false;
26687 ix86_builtins
[(int) code
] = NULL_TREE
;
26688 ix86_builtins_isa
[(int) code
].tcode
= tcode
;
26689 ix86_builtins_isa
[(int) code
].name
= name
;
26690 ix86_builtins_isa
[(int) code
].const_p
= false;
26691 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= true;
26698 /* Like def_builtin, but also marks the function decl "const". */
26701 def_builtin_const (HOST_WIDE_INT mask
, const char *name
,
26702 enum ix86_builtin_func_type tcode
, enum ix86_builtins code
)
26704 tree decl
= def_builtin (mask
, name
, tcode
, code
);
26706 TREE_READONLY (decl
) = 1;
26708 ix86_builtins_isa
[(int) code
].const_p
= true;
26713 /* Add any new builtin functions for a given ISA that may not have been
26714 declared. This saves a bit of space compared to adding all of the
26715 declarations to the tree, even if we didn't use them. */
26718 ix86_add_new_builtins (HOST_WIDE_INT isa
)
26722 for (i
= 0; i
< (int)IX86_BUILTIN_MAX
; i
++)
26724 if ((ix86_builtins_isa
[i
].isa
& isa
) != 0
26725 && ix86_builtins_isa
[i
].set_and_not_built_p
)
26729 /* Don't define the builtin again. */
26730 ix86_builtins_isa
[i
].set_and_not_built_p
= false;
26732 type
= ix86_get_builtin_func_type (ix86_builtins_isa
[i
].tcode
);
26733 decl
= add_builtin_function_ext_scope (ix86_builtins_isa
[i
].name
,
26734 type
, i
, BUILT_IN_MD
, NULL
,
26737 ix86_builtins
[i
] = decl
;
26738 if (ix86_builtins_isa
[i
].const_p
)
26739 TREE_READONLY (decl
) = 1;
26744 /* Bits for builtin_description.flag. */
26746 /* Set when we don't support the comparison natively, and should
26747 swap_comparison in order to support it. */
26748 #define BUILTIN_DESC_SWAP_OPERANDS 1
26750 struct builtin_description
26752 const HOST_WIDE_INT mask
;
26753 const enum insn_code icode
;
26754 const char *const name
;
26755 const enum ix86_builtins code
;
26756 const enum rtx_code comparison
;
26760 static const struct builtin_description bdesc_comi
[] =
26762 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
26763 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
26764 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
26765 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
26766 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
26767 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
26768 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
26769 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
26770 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
26771 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
26772 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
26773 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
26774 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
26775 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
26776 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
26777 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
26778 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
26779 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
26780 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
26781 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
26782 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
26783 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
26784 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
26785 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
26788 static const struct builtin_description bdesc_pcmpestr
[] =
26791 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128
, UNKNOWN
, 0 },
26792 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128
, UNKNOWN
, 0 },
26793 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128
, UNKNOWN
, (int) CCAmode
},
26794 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128
, UNKNOWN
, (int) CCCmode
},
26795 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128
, UNKNOWN
, (int) CCOmode
},
26796 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128
, UNKNOWN
, (int) CCSmode
},
26797 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128
, UNKNOWN
, (int) CCZmode
},
26800 static const struct builtin_description bdesc_pcmpistr
[] =
26803 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128
, UNKNOWN
, 0 },
26804 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128
, UNKNOWN
, 0 },
26805 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128
, UNKNOWN
, (int) CCAmode
},
26806 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128
, UNKNOWN
, (int) CCCmode
},
26807 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128
, UNKNOWN
, (int) CCOmode
},
26808 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128
, UNKNOWN
, (int) CCSmode
},
26809 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128
, UNKNOWN
, (int) CCZmode
},
26812 /* Special builtins with variable number of arguments. */
26813 static const struct builtin_description bdesc_special_args
[] =
26815 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
26816 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP
, UNKNOWN
, (int) UINT64_FTYPE_PUNSIGNED
},
26817 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_pause
, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26820 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_emms
, "__builtin_ia32_emms", IX86_BUILTIN_EMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26823 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_femms
, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26825 /* FXSR, XSAVE and XSAVEOPT */
26826 { OPTION_MASK_ISA_FXSR
, CODE_FOR_nothing
, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
26827 { OPTION_MASK_ISA_FXSR
, CODE_FOR_nothing
, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
26828 { OPTION_MASK_ISA_XSAVE
, CODE_FOR_nothing
, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
26829 { OPTION_MASK_ISA_XSAVE
, CODE_FOR_nothing
, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
26830 { OPTION_MASK_ISA_XSAVEOPT
, CODE_FOR_nothing
, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
26832 { OPTION_MASK_ISA_FXSR
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
26833 { OPTION_MASK_ISA_FXSR
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
26834 { OPTION_MASK_ISA_XSAVE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
26835 { OPTION_MASK_ISA_XSAVE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
26836 { OPTION_MASK_ISA_XSAVEOPT
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
26839 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storeups
, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
26840 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movntv4sf
, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
26841 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadups
, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
26843 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadhps_exp
, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
26844 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadlps_exp
, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
26845 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storehps
, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
26846 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storelps
, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
26848 /* SSE or 3DNow!A */
26849 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_sfence
, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26850 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_movntq
, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ
, UNKNOWN
, (int) VOID_FTYPE_PULONGLONG_ULONGLONG
},
26853 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lfence
, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26854 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_mfence
, 0, IX86_BUILTIN_MFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26855 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_storeupd
, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
26856 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_storedqu
, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V16QI
},
26857 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2df
, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
26858 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2di
, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI
},
26859 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntisi
, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI
, UNKNOWN
, (int) VOID_FTYPE_PINT_INT
},
26860 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_movntidi
, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64
, UNKNOWN
, (int) VOID_FTYPE_PLONGLONG_LONGLONG
},
26861 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadupd
, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD
, UNKNOWN
, (int) V2DF_FTYPE_PCDOUBLE
},
26862 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loaddqu
, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
26864 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadhpd_exp
, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
26865 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadlpd_exp
, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
26868 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_lddqu
, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
26871 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_movntdqa
, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA
, UNKNOWN
, (int) V2DI_FTYPE_PV2DI
},
26874 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv2df
, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
26875 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv4sf
, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
26878 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroall
, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26879 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroupper
, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26881 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4sf
, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
26882 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4df
, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
26883 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv8sf
, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
26884 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v4df
, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV2DF
},
26885 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v8sf
, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV4SF
},
26887 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loadupd256
, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
26888 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loadups256
, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
26889 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storeupd256
, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
26890 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storeups256
, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
26891 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loaddqu256
, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
26892 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storedqu256
, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V32QI
},
26893 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_lddqu256
, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
26895 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4di
, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI
},
26896 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4df
, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
26897 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv8sf
, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
26899 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd
, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD
, UNKNOWN
, (int) V2DF_FTYPE_PCV2DF_V2DI
},
26900 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps
, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS
, UNKNOWN
, (int) V4SF_FTYPE_PCV4SF_V4SI
},
26901 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd256
, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV4DF_V4DI
},
26902 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps256
, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV8SF_V8SI
},
26903 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd
, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD
, UNKNOWN
, (int) VOID_FTYPE_PV2DF_V2DI_V2DF
},
26904 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps
, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS
, UNKNOWN
, (int) VOID_FTYPE_PV4SF_V4SI_V4SF
},
26905 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd256
, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256
, UNKNOWN
, (int) VOID_FTYPE_PV4DF_V4DI_V4DF
},
26906 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps256
, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256
, UNKNOWN
, (int) VOID_FTYPE_PV8SF_V8SI_V8SF
},
26909 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_movntdqa
, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256
, UNKNOWN
, (int) V4DI_FTYPE_PV4DI
},
26910 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd
, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD
, UNKNOWN
, (int) V4SI_FTYPE_PCV4SI_V4SI
},
26911 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq
, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ
, UNKNOWN
, (int) V2DI_FTYPE_PCV2DI_V2DI
},
26912 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd256
, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256
, UNKNOWN
, (int) V8SI_FTYPE_PCV8SI_V8SI
},
26913 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq256
, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256
, UNKNOWN
, (int) V4DI_FTYPE_PCV4DI_V4DI
},
26914 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored
, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED
, UNKNOWN
, (int) VOID_FTYPE_PV4SI_V4SI_V4SI
},
26915 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq
, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI_V2DI
},
26916 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored256
, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256
, UNKNOWN
, (int) VOID_FTYPE_PV8SI_V8SI_V8SI
},
26917 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq256
, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI_V4DI
},
26919 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_llwpcb
, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
26920 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_slwpcb
, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB
, UNKNOWN
, (int) PVOID_FTYPE_VOID
},
26921 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvalsi3
, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32
, UNKNOWN
, (int) VOID_FTYPE_UINT_UINT_UINT
},
26922 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvaldi3
, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64
, UNKNOWN
, (int) VOID_FTYPE_UINT64_UINT_UINT
},
26923 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinssi3
, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32
, UNKNOWN
, (int) UCHAR_FTYPE_UINT_UINT_UINT
},
26924 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinsdi3
, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64
, UNKNOWN
, (int) UCHAR_FTYPE_UINT64_UINT_UINT
},
26927 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasesi
, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
26928 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasedi
, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
26929 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasesi
, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
26930 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasedi
, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
26931 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasesi
, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
26932 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasedi
, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
26933 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasesi
, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
26934 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasedi
, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
26937 { OPTION_MASK_ISA_RTM
, CODE_FOR_xbegin
, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
26938 { OPTION_MASK_ISA_RTM
, CODE_FOR_xend
, "__builtin_ia32_xend", IX86_BUILTIN_XEND
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26939 { OPTION_MASK_ISA_RTM
, CODE_FOR_xtest
, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST
, UNKNOWN
, (int) INT_FTYPE_VOID
},
26942 /* Builtins with variable number of arguments. */
26943 static const struct builtin_description bdesc_args
[] =
26945 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr
, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI
, UNKNOWN
, (int) INT_FTYPE_INT
},
26946 { OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr_rex64
, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI
, UNKNOWN
, (int) INT64_FTYPE_INT64
},
26947 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC
, UNKNOWN
, (int) UINT64_FTYPE_INT
},
26948 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlqi3
, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
26949 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlhi3
, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
26950 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrqi3
, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
26951 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrhi3
, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
26954 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26955 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26956 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26957 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26958 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26959 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26961 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26962 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26963 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26964 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26965 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26966 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26967 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26968 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26970 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26971 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26973 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26974 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andnotv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26975 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26976 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26978 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26979 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26980 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26981 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26982 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26983 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26985 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26986 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26987 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26988 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26989 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26990 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26992 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packsswb
, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
26993 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packssdw
, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW
, UNKNOWN
, (int) V4HI_FTYPE_V2SI_V2SI
},
26994 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packuswb
, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
26996 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_pmaddwd
, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD
, UNKNOWN
, (int) V2SI_FTYPE_V4HI_V4HI
},
26998 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
26999 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
27000 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
27001 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
27002 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
27003 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
27005 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
27006 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
27007 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
27008 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
27009 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
27010 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
27012 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
27013 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
27014 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
27015 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
27018 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pf2id
, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
27019 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_floatv2si2
, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
27020 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpv2sf2
, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
27021 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqrtv2sf2
, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
27023 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27024 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_haddv2sf3
, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27025 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_addv2sf3
, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27026 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_eqv2sf3
, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
27027 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gev2sf3
, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
27028 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gtv2sf3
, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
27029 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_smaxv2sf3
, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27030 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_sminv2sf3
, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27031 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_mulv2sf3
, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27032 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit1v2sf3
, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27033 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit2v2sf3
, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27034 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqit1v2sf3
, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27035 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subv2sf3
, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27036 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subrv2sf3
, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27037 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pmulhrwv4hi3
, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27040 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pf2iw
, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
27041 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pi2fw
, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
27042 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2si2
, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
27043 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2sf2
, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
27044 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_hsubv2sf3
, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27045 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_addsubv2sf3
, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27048 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movmskps
, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
27049 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_sqrtv4sf2
, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27050 { OPTION_MASK_ISA_SSE
, CODE_FOR_sqrtv4sf2
, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27051 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rsqrtv4sf2
, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27052 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtv4sf2
, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27053 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rcpv4sf2
, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27054 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtps2pi
, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
27055 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtss2si
, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
27056 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtss2siq
, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
27057 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttps2pi
, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
27058 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttss2si
, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
27059 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvttss2siq
, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
27061 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_shufps
, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27063 { OPTION_MASK_ISA_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27064 { OPTION_MASK_ISA_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27065 { OPTION_MASK_ISA_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27066 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27067 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27068 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27069 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27070 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27072 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
27073 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
27074 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27075 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27076 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27077 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
27078 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27079 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27080 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
27081 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27082 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27083 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
27084 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
27085 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
27086 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27087 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
27088 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27089 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27090 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
27091 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27092 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27093 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
27095 { OPTION_MASK_ISA_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27096 { OPTION_MASK_ISA_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27097 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27098 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27100 { OPTION_MASK_ISA_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27101 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_andnotv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27102 { OPTION_MASK_ISA_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27103 { OPTION_MASK_ISA_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27105 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysignv4sf3
, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27107 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27108 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movhlps_exp
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27109 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movlhps_exp
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27110 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_highv4sf
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27111 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_lowv4sf
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27113 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtpi2ps
, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2SI
},
27114 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtsi2ss
, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_SI
},
27115 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtsi2ssq
, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS
, UNKNOWN
, V4SF_FTYPE_V4SF_DI
},
27117 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtsf2
, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF
, UNKNOWN
, (int) FLOAT_FTYPE_FLOAT
},
27119 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsqrtv4sf2
, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
27120 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrsqrtv4sf2
, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
27121 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrcpv4sf2
, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
27123 { OPTION_MASK_ISA_SSE
, CODE_FOR_abstf2
, 0, IX86_BUILTIN_FABSQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128
},
27124 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysigntf3
, 0, IX86_BUILTIN_COPYSIGNQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128
},
27126 /* SSE MMX or 3Dnow!A */
27127 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27128 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27129 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27131 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27132 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27133 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27134 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27136 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_psadbw
, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW
, UNKNOWN
, (int) V1DI_FTYPE_V8QI_V8QI
},
27137 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB
, UNKNOWN
, (int) INT_FTYPE_V8QI
},
27139 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pshufw
, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_INT
},
27142 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_shufpd
, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27144 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movmskpd
, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
27145 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmovmskb
, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128
, UNKNOWN
, (int) INT_FTYPE_V16QI
},
27146 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sqrtv2df2
, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
27147 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtdq2pd
, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SI
},
27148 { OPTION_MASK_ISA_SSE2
, CODE_FOR_floatv4siv4sf2
, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SI
},
27150 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2dq
, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
27151 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2pi
, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
27152 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2ps
, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS
, UNKNOWN
, (int) V4SF_FTYPE_V2DF
},
27153 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2dq
, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
27154 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2pi
, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
27156 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpi2pd
, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD
, UNKNOWN
, (int) V2DF_FTYPE_V2SI
},
27158 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2si
, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
27159 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttsd2si
, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
27160 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsd2siq
, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
27161 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvttsd2siq
, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
27163 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2dq
, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
27164 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2pd
, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SF
},
27165 { OPTION_MASK_ISA_SSE2
, CODE_FOR_fix_truncv4sfv4si2
, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
27167 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27168 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27169 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27170 { OPTION_MASK_ISA_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27171 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27172 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27173 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27174 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27176 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
27177 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
27178 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27179 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
27180 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
27181 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
27182 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27183 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27184 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
27185 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
27186 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
27187 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
27188 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
27189 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
27190 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27191 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
27192 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27193 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27194 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
27195 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
27197 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27198 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27199 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27200 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27202 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27203 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27204 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27205 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27207 { OPTION_MASK_ISA_SSE2
, CODE_FOR_copysignv2df3
, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27209 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27210 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2df
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27211 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2df
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27213 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_pack_sfix_v2df
, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
27215 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27216 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27217 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27218 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27219 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27220 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27221 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27222 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27224 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27225 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27226 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27227 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27228 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27229 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27230 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27231 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27233 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27234 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, UNKNOWN
,(int) V8HI_FTYPE_V8HI_V8HI
},
27236 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27237 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27238 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27239 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27241 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27242 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27244 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27245 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27246 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27247 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27248 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27249 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27251 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27252 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27253 { OPTION_MASK_ISA_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27254 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27256 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv16qi
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27257 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv8hi
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27258 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv4si
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27259 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2di
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27260 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv16qi
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27261 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv8hi
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27262 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv4si
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27263 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2di
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27265 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
27266 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
27267 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
27269 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27270 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_psadbw
, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI_V16QI
},
27272 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv1siv1di3
, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ
, UNKNOWN
, (int) V1DI_FTYPE_V2SI_V2SI
},
27273 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_widen_umult_even_v4si
, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
27275 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmaddwd
, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI_V8HI
},
27277 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsi2sd
, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_SI
},
27278 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_DI
},
27279 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2ss
, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2DF
},
27280 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtss2sd
, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V4SF
},
27282 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ashlv1ti3
, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
27283 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
27284 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
27285 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
27286 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
27287 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
27288 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
27290 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lshrv1ti3
, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
27291 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
27292 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
27293 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
27294 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
27295 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
27296 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
27298 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
27299 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
27300 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
27301 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
27303 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufd
, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_INT
},
27304 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshuflw
, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
27305 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufhw
, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
27307 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsqrtv2df2
, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_VEC_MERGE
},
27309 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse2_movq128
, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
27312 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_addv1di3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
27313 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_subv1di3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
27316 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movshdup
, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27317 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movsldup
, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27319 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27320 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27321 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27322 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27323 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27324 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27327 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
27328 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI
},
27329 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
27330 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI
},
27331 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
27332 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
27334 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27335 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27336 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27337 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27338 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27339 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27340 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27341 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27342 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27343 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27344 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27345 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27346 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw128
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI_V16QI
},
27347 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V8QI_V8QI
},
27348 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27349 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27350 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27351 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27352 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27353 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27354 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27355 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27356 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27357 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27360 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrti
, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
},
27361 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrdi
, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
},
27364 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendpd
, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27365 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendps
, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27366 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvpd
, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_V2DF
},
27367 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvps
, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_V4SF
},
27368 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dppd
, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27369 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dpps
, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27370 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_insertps
, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27371 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mpsadbw
, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_INT
},
27372 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendvb
, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_V16QI
},
27373 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendw
, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_INT
},
27375 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv8qiv8hi2
, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
27376 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4qiv4si2
, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
27377 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2qiv2di2
, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
27378 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4hiv4si2
, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
27379 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2hiv2di2
, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
27380 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2siv2di2
, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
27381 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv8qiv8hi2
, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
27382 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4qiv4si2
, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
27383 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2qiv2di2
, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
27384 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4hiv4si2
, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
27385 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2hiv2di2
, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
27386 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2siv2di2
, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
27387 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_phminposuw
, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
27389 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_packusdw
, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
27390 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_eqv2di3
, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27391 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv16qi3
, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27392 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv4si3
, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27393 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv4si3
, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27394 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv8hi3
, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27395 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv16qi3
, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27396 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv4si3
, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27397 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv4si3
, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27398 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv8hi3
, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27399 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mulv2siv2di3
, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
27400 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_mulv4si3
, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27403 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
27404 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
27405 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundsd
, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27406 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundss
, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27408 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD
, (enum rtx_code
) ROUND_FLOOR
, (int) V2DF_FTYPE_V2DF_ROUND
},
27409 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD
, (enum rtx_code
) ROUND_CEIL
, (int) V2DF_FTYPE_V2DF_ROUND
},
27410 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD
, (enum rtx_code
) ROUND_TRUNC
, (int) V2DF_FTYPE_V2DF_ROUND
},
27411 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD
, (enum rtx_code
) ROUND_MXCSR
, (int) V2DF_FTYPE_V2DF_ROUND
},
27413 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
27414 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
27416 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2
, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
27417 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
27419 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SF_FTYPE_V4SF_ROUND
},
27420 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS
, (enum rtx_code
) ROUND_CEIL
, (int) V4SF_FTYPE_V4SF_ROUND
},
27421 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS
, (enum rtx_code
) ROUND_TRUNC
, (int) V4SF_FTYPE_V4SF_ROUND
},
27422 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS
, (enum rtx_code
) ROUND_MXCSR
, (int) V4SF_FTYPE_V4SF_ROUND
},
27424 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V4SF_ROUND
},
27425 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V4SF_ROUND
},
27427 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2
, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27428 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2_sfix
, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
27430 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ
, EQ
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
27431 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC
, LTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
27432 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC
, GTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
27435 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_gtv2di3
, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27436 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32qi
, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UCHAR
},
27437 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32hi
, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI
, UNKNOWN
, (int) UINT_FTYPE_UINT_USHORT
},
27438 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32si
, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27439 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse4_2_crc32di
, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27442 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrqi
, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_UINT_UINT
},
27443 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrq
, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V16QI
},
27444 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertqi
, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT
},
27445 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertq
, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27448 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aeskeygenassist
, 0, IX86_BUILTIN_AESKEYGENASSIST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT
},
27449 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesimc
, 0, IX86_BUILTIN_AESIMC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
27451 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenc
, 0, IX86_BUILTIN_AESENC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27452 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenclast
, 0, IX86_BUILTIN_AESENCLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27453 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdec
, 0, IX86_BUILTIN_AESDEC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27454 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdeclast
, 0, IX86_BUILTIN_AESDECLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27457 { OPTION_MASK_ISA_SSE2
, CODE_FOR_pclmulqdq
, 0, IX86_BUILTIN_PCLMULQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT
},
27460 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv4df3
, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27461 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv8sf3
, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27462 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv4df3
, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27463 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv8sf3
, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27464 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv4df3
, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27465 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv8sf3
, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27466 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv4df3
, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27467 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv8sf3
, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27468 { OPTION_MASK_ISA_AVX
, CODE_FOR_divv4df3
, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27469 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_divv8sf3
, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27470 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv4df3
, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27471 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv8sf3
, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27472 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv4df3
, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27473 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv8sf3
, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27474 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv4df3
, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27475 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv8sf3
, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27476 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv4df3
, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27477 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv8sf3
, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27478 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv4df3
, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27479 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv8sf3
, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27480 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv4df3
, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27481 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv8sf3
, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27482 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv4df3
, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27483 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv8sf3
, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27484 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv4df3
, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27485 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv8sf3
, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27487 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv2df3
, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DI
},
27488 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4sf3
, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SI
},
27489 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4df3
, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DI
},
27490 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv8sf3
, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
27492 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendpd256
, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
27493 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendps256
, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
27494 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvpd256
, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_V4DF
},
27495 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvps256
, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_V8SF
},
27496 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_dpps256
, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
27497 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufpd256
, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
27498 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufps256
, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
27499 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv2df3
, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27500 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv4sf3
, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27501 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv2df3
, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27502 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4sf3
, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27503 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4df3
, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
27504 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv8sf3
, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
27505 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v4df
, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF_INT
},
27506 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8sf
, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF_INT
},
27507 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8si
, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI_INT
},
27508 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv4siv4df2
, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SI
},
27509 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv8siv8sf2
, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SI
},
27510 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2ps256
, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256
, UNKNOWN
, (int) V4SF_FTYPE_V4DF
},
27511 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2dq256
, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
27512 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2pd256
, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SF
},
27513 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv4dfv4si2
, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
27514 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2dq256
, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
27515 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv8sfv8si2
, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
27516 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v4df3
, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
27517 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8sf3
, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
27518 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8si3
, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
27519 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv2df
, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
27520 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4sf
, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
27521 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4df
, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
27522 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv8sf
, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
27523 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v4df
, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V2DF_INT
},
27524 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8sf
, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V4SF_INT
},
27525 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8si
, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_INT
},
27527 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movshdup256
, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27528 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movsldup256
, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27529 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movddup256
, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
27531 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv4df2
, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
27532 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_sqrtv8sf2
, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27533 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv8sf2
, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27534 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rsqrtv8sf2
, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27535 { OPTION_MASK_ISA_AVX
, CODE_FOR_rsqrtv8sf2
, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27537 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rcpv8sf2
, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27539 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
27540 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
27542 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256
, (enum rtx_code
) ROUND_FLOOR
, (int) V4DF_FTYPE_V4DF_ROUND
},
27543 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256
, (enum rtx_code
) ROUND_CEIL
, (int) V4DF_FTYPE_V4DF_ROUND
},
27544 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256
, (enum rtx_code
) ROUND_TRUNC
, (int) V4DF_FTYPE_V4DF_ROUND
},
27545 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256
, (enum rtx_code
) ROUND_MXCSR
, (int) V4DF_FTYPE_V4DF_ROUND
},
27547 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2
, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
27548 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
27550 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
27551 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
27553 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SF_FTYPE_V8SF_ROUND
},
27554 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SF_FTYPE_V8SF_ROUND
},
27555 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256
, (enum rtx_code
) ROUND_TRUNC
, (int) V8SF_FTYPE_V8SF_ROUND
},
27556 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256
, (enum rtx_code
) ROUND_MXCSR
, (int) V8SF_FTYPE_V8SF_ROUND
},
27558 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V8SF_ROUND
},
27559 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V8SF_ROUND
},
27561 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2
, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27562 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2_sfix
, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
27564 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhpd256
, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27565 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklpd256
, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27566 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhps256
, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27567 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklps256
, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27569 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_si256_si
, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
27570 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ps256_ps
, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
27571 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_pd256_pd
, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
27572 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8si
, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI
},
27573 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8sf
, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF
},
27574 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v4df
, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF
},
27576 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD
, EQ
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
27577 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD
, LTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
27578 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD
, GTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
27579 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS
, EQ
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
27580 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS
, LTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
27581 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS
, GTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
27582 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256
, EQ
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
27583 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256
, LTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
27584 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256
, GTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
27585 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256
, EQ
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
27586 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256
, LTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
27587 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256
, GTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
27588 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256
, EQ
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
27589 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256
, LTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
27590 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256
, GTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
27592 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskpd256
, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256
, UNKNOWN
, (int) INT_FTYPE_V4DF
},
27593 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskps256
, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256
, UNKNOWN
, (int) INT_FTYPE_V8SF
},
27595 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv8sf3
, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27596 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv4df3
, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27598 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_pack_sfix_v4df
, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
27601 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_mpsadbw
, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_INT
},
27602 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv32qi2
, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI
},
27603 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv16hi2
, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI
},
27604 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv8si2
, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI
},
27605 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packssdw
, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
27606 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packsswb
, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
27607 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packusdw
, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
27608 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packuswb
, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
27609 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv32qi3
, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27610 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv16hi3
, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27611 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv8si3
, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27612 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv4di3
, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27613 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv32qi3
, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27614 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv16hi3
, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27615 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv32qi3
, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27616 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv16hi3
, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27617 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_palignrv2ti
, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
},
27618 { OPTION_MASK_ISA_AVX2
, CODE_FOR_andv4di3
, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27619 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_andnotv4di3
, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27620 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv32qi3
, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27621 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv16hi3
, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27622 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendvb
, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_V32QI
},
27623 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendw
, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI_INT
},
27624 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv32qi3
, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27625 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv16hi3
, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27626 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv8si3
, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27627 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv4di3
, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27628 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv32qi3
, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27629 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv16hi3
, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27630 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv8si3
, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27631 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv4di3
, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27632 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddwv16hi3
, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27633 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phadddv8si3
, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27634 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddswv16hi3
, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27635 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubwv16hi3
, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27636 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubdv8si3
, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27637 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubswv16hi3
, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27638 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddubsw256
, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
27639 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddwd
, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256
, UNKNOWN
, (int) V8SI_FTYPE_V16HI_V16HI
},
27640 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv32qi3
, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27641 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv16hi3
, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27642 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv8si3
, "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27643 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv32qi3
, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27644 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv16hi3
, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27645 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv8si3
, "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27646 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv32qi3
, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27647 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv16hi3
, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27648 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv8si3
, "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27649 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv32qi3
, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27650 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv16hi3
, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27651 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv8si3
, "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27652 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmovmskb
, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256
, UNKNOWN
, (int) INT_FTYPE_V32QI
},
27653 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv16qiv16hi2
, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
27654 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8qiv8si2
, "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
27655 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4qiv4di2
, "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
27656 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8hiv8si2
, "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
27657 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4hiv4di2
, "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
27658 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4siv4di2
, "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
27659 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv16qiv16hi2
, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
27660 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8qiv8si2
, "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
27661 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4qiv4di2
, "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
27662 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8hiv8si2
, "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
27663 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4hiv4di2
, "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
27664 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4siv4di2
, "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
27665 { OPTION_MASK_ISA_AVX2
, CODE_FOR_vec_widen_smult_even_v8si
, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
27666 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmulhrswv16hi3
, "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27667 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umulv16hi3_highpart
, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27668 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smulv16hi3_highpart
, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27669 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv16hi3
, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27670 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv8si3
, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27671 { OPTION_MASK_ISA_AVX2
, CODE_FOR_vec_widen_umult_even_v8si
, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
27672 { OPTION_MASK_ISA_AVX2
, CODE_FOR_iorv4di3
, "__builtin_ia32_por256", IX86_BUILTIN_POR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27673 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psadbw
, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
27674 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufbv32qi3
, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27675 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufdv3
, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_INT
},
27676 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufhwv3
, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
27677 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshuflwv3
, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
27678 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv32qi3
, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27679 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv16hi3
, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27680 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv8si3
, "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27681 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlv2ti3
, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
27682 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
27683 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
27684 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
27685 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
27686 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
27687 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
27688 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
27689 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
27690 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
27691 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
27692 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrv2ti3
, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
27693 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
27694 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
27695 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
27696 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
27697 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
27698 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
27699 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv32qi3
, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27700 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv16hi3
, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27701 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv8si3
, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27702 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv4di3
, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27703 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv32qi3
, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27704 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv16hi3
, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27705 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv32qi3
, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27706 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv16hi3
, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27707 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv32qi
, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27708 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv16hi
, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27709 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv8si
, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27710 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv4di
, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27711 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv32qi
, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27712 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv16hi
, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27713 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv8si
, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27714 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv4di
, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27715 { OPTION_MASK_ISA_AVX2
, CODE_FOR_xorv4di3
, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27716 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4sf
, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27717 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv8sf
, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
27718 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4df
, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
27719 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vbroadcasti128_v4di
, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
27720 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv4si
, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_INT
},
27721 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv8si
, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
27722 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv32qi
, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256
, UNKNOWN
, (int) V32QI_FTYPE_V16QI
},
27723 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16hi
, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256
, UNKNOWN
, (int) V16HI_FTYPE_V8HI
},
27724 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8si
, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
27725 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4di
, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
27726 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16qi
, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
27727 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8hi
, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
27728 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4si
, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
27729 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv2di
, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
27730 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8si
, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27731 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8sf
, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
27732 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4df
, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
27733 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4di
, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT
},
27734 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv2ti
, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT
},
27735 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_extracti128
, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256
, UNKNOWN
, (int) V2DI_FTYPE_V4DI_INT
},
27736 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_inserti128
, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_INT
},
27737 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4di
, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27738 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv2di
, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27739 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv8si
, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27740 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4si
, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27741 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv8si
, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27742 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv4si
, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27743 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4di
, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27744 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv2di
, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27745 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv8si
, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27746 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4si
, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27748 { OPTION_MASK_ISA_LZCNT
, CODE_FOR_clzhi2_lzcnt
, "__builtin_clzs", IX86_BUILTIN_CLZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
27751 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_si
, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27752 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_di
, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27753 { OPTION_MASK_ISA_BMI
, CODE_FOR_ctzhi2
, "__builtin_ctzs", IX86_BUILTIN_CTZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
27756 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_si
, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27757 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_di
, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27760 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps
, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS
, UNKNOWN
, (int) V4SF_FTYPE_V8HI
},
27761 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps256
, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8HI
},
27762 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph
, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH
, UNKNOWN
, (int) V8HI_FTYPE_V4SF_INT
},
27763 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph256
, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256
, UNKNOWN
, (int) V8HI_FTYPE_V8SF_INT
},
27766 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_si3
, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27767 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_di3
, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27768 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_si3
, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27769 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_di3
, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27770 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_si3
, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27771 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_di3
, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27774 /* FMA4 and XOP. */
27775 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
27776 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
27777 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
27778 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
27779 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
27780 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
27781 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
27782 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
27783 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
27784 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
27785 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
27786 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
27787 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
27788 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
27789 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
27790 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
27791 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
27792 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
27793 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
27794 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
27795 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
27796 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
27797 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
27798 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
27799 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
27800 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
27801 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
27802 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
27803 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
27804 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
27805 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
27806 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
27807 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
27808 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
27809 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
27810 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
27811 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
27812 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
27813 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
27814 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
27815 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
27816 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
27817 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
27818 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
27819 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
27820 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
27821 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
27822 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
27823 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
27824 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
27825 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
27826 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
27828 static const struct builtin_description bdesc_multi_arg
[] =
27830 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v4sf
,
27831 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS
,
27832 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27833 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v2df
,
27834 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD
,
27835 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27837 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v4sf
,
27838 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3
,
27839 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27840 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v2df
,
27841 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3
,
27842 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27844 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4sf
,
27845 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS
,
27846 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27847 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v2df
,
27848 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD
,
27849 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27850 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v8sf
,
27851 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256
,
27852 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
27853 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4df
,
27854 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256
,
27855 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
27857 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4sf
,
27858 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS
,
27859 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27860 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v2df
,
27861 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD
,
27862 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27863 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v8sf
,
27864 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256
,
27865 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
27866 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4df
,
27867 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256
,
27868 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
27870 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
27871 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
27872 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4si
, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
27873 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8hi
, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
27874 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16qi
, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI
,UNKNOWN
, (int)MULTI_ARG_3_QI
},
27875 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2df
, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF
, UNKNOWN
, (int)MULTI_ARG_3_DF
},
27876 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4sf
, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF
, UNKNOWN
, (int)MULTI_ARG_3_SF
},
27878 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
27879 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
27880 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8si256
, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256
, UNKNOWN
, (int)MULTI_ARG_3_SI2
},
27881 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16hi256
, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256
, UNKNOWN
, (int)MULTI_ARG_3_HI2
},
27882 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v32qi256
, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256
, UNKNOWN
, (int)MULTI_ARG_3_QI2
},
27883 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4df256
, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256
, UNKNOWN
, (int)MULTI_ARG_3_DF2
},
27884 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8sf256
, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256
, UNKNOWN
, (int)MULTI_ARG_3_SF2
},
27886 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pperm
, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM
, UNKNOWN
, (int)MULTI_ARG_3_QI
},
27888 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssww
, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
27889 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsww
, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
27890 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsswd
, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27891 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacswd
, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27892 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdd
, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
27893 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdd
, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
27894 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdql
, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27895 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdqh
, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27896 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdql
, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27897 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdqh
, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27898 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcsswd
, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27899 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcswd
, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27901 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv2di3
, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27902 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv4si3
, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27903 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv8hi3
, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27904 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv16qi3
, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27905 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv2di3
, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM
, UNKNOWN
, (int)MULTI_ARG_2_DI_IMM
},
27906 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv4si3
, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM
, UNKNOWN
, (int)MULTI_ARG_2_SI_IMM
},
27907 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv8hi3
, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM
, UNKNOWN
, (int)MULTI_ARG_2_HI_IMM
},
27908 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv16qi3
, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM
, UNKNOWN
, (int)MULTI_ARG_2_QI_IMM
},
27909 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav2di3
, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27910 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav4si3
, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27911 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav8hi3
, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27912 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav16qi3
, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27913 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv2di3
, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27914 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv4si3
, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27915 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv8hi3
, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27916 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv16qi3
, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27918 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv4sf2
, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS
, UNKNOWN
, (int)MULTI_ARG_2_SF
},
27919 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv2df2
, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD
, UNKNOWN
, (int)MULTI_ARG_2_DF
},
27920 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4sf2
, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS
, UNKNOWN
, (int)MULTI_ARG_1_SF
},
27921 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv2df2
, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD
, UNKNOWN
, (int)MULTI_ARG_1_DF
},
27922 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv8sf2
, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256
, UNKNOWN
, (int)MULTI_ARG_1_SF2
},
27923 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4df2
, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256
, UNKNOWN
, (int)MULTI_ARG_1_DF2
},
27925 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbw
, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27926 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbd
, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
27927 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbq
, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
27928 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwd
, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27929 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwq
, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
27930 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadddq
, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27931 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubw
, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27932 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubd
, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
27933 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubq
, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
27934 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwd
, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27935 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwq
, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
27936 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddudq
, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27937 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubbw
, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27938 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubwd
, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27939 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubdq
, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27941 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
27942 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27943 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27944 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB
, LT
, (int)MULTI_ARG_2_QI_CMP
},
27945 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB
, LE
, (int)MULTI_ARG_2_QI_CMP
},
27946 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB
, GT
, (int)MULTI_ARG_2_QI_CMP
},
27947 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB
, GE
, (int)MULTI_ARG_2_QI_CMP
},
27949 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
27950 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27951 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27952 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW
, LT
, (int)MULTI_ARG_2_HI_CMP
},
27953 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW
, LE
, (int)MULTI_ARG_2_HI_CMP
},
27954 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW
, GT
, (int)MULTI_ARG_2_HI_CMP
},
27955 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW
, GE
, (int)MULTI_ARG_2_HI_CMP
},
27957 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
27958 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27959 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27960 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD
, LT
, (int)MULTI_ARG_2_SI_CMP
},
27961 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED
, LE
, (int)MULTI_ARG_2_SI_CMP
},
27962 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD
, GT
, (int)MULTI_ARG_2_SI_CMP
},
27963 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED
, GE
, (int)MULTI_ARG_2_SI_CMP
},
27965 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
27966 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27967 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27968 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ
, LT
, (int)MULTI_ARG_2_DI_CMP
},
27969 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ
, LE
, (int)MULTI_ARG_2_DI_CMP
},
27970 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ
, GT
, (int)MULTI_ARG_2_DI_CMP
},
27971 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ
, GE
, (int)MULTI_ARG_2_DI_CMP
},
27973 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
27974 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27975 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27976 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB
, LTU
, (int)MULTI_ARG_2_QI_CMP
},
27977 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB
, LEU
, (int)MULTI_ARG_2_QI_CMP
},
27978 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB
, GTU
, (int)MULTI_ARG_2_QI_CMP
},
27979 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB
, GEU
, (int)MULTI_ARG_2_QI_CMP
},
27981 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
27982 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27983 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27984 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW
, LTU
, (int)MULTI_ARG_2_HI_CMP
},
27985 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW
, LEU
, (int)MULTI_ARG_2_HI_CMP
},
27986 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW
, GTU
, (int)MULTI_ARG_2_HI_CMP
},
27987 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW
, GEU
, (int)MULTI_ARG_2_HI_CMP
},
27989 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
27990 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27991 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27992 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD
, LTU
, (int)MULTI_ARG_2_SI_CMP
},
27993 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD
, LEU
, (int)MULTI_ARG_2_SI_CMP
},
27994 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD
, GTU
, (int)MULTI_ARG_2_SI_CMP
},
27995 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD
, GEU
, (int)MULTI_ARG_2_SI_CMP
},
27997 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
27998 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27999 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
28000 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ
, LTU
, (int)MULTI_ARG_2_DI_CMP
},
28001 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ
, LEU
, (int)MULTI_ARG_2_DI_CMP
},
28002 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ
, GTU
, (int)MULTI_ARG_2_DI_CMP
},
28003 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ
, GEU
, (int)MULTI_ARG_2_DI_CMP
},
28005 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
28006 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
28007 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
28008 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
28009 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
28010 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
28011 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
28012 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
28014 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
28015 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
28016 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
28017 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
28018 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
28019 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
28020 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
28021 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
28023 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v2df3
, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I
},
28024 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4sf3
, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I
},
28025 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4df3
, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I1
},
28026 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v8sf3
, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I1
},
28030 /* TM vector builtins. */
28032 /* Reuse the existing x86-specific `struct builtin_description' cause
28033 we're lazy. Add casts to make them fit. */
28034 static const struct builtin_description bdesc_tm
[] =
28036 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
28037 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
28038 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
28039 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
28040 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
28041 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
28042 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
28044 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
28045 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaRM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
28046 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaWM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
28047 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
28048 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaRM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
28049 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
28050 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RfWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
28052 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
28053 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
28054 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
28055 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
28056 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
28057 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
28058 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
28060 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_LM64", (enum ix86_builtins
) BUILT_IN_TM_LOG_M64
, UNKNOWN
, VOID_FTYPE_PCVOID
},
28061 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_LM128", (enum ix86_builtins
) BUILT_IN_TM_LOG_M128
, UNKNOWN
, VOID_FTYPE_PCVOID
},
28062 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_LM256", (enum ix86_builtins
) BUILT_IN_TM_LOG_M256
, UNKNOWN
, VOID_FTYPE_PCVOID
},
28065 /* TM callbacks. */
28067 /* Return the builtin decl needed to load a vector of TYPE. */
28070 ix86_builtin_tm_load (tree type
)
28072 if (TREE_CODE (type
) == VECTOR_TYPE
)
28074 switch (tree_low_cst (TYPE_SIZE (type
), 1))
28077 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64
);
28079 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128
);
28081 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256
);
28087 /* Return the builtin decl needed to store a vector of TYPE. */
28090 ix86_builtin_tm_store (tree type
)
28092 if (TREE_CODE (type
) == VECTOR_TYPE
)
28094 switch (tree_low_cst (TYPE_SIZE (type
), 1))
28097 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64
);
28099 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128
);
28101 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256
);
28107 /* Initialize the transactional memory vector load/store builtins. */
28110 ix86_init_tm_builtins (void)
28112 enum ix86_builtin_func_type ftype
;
28113 const struct builtin_description
*d
;
28116 tree attrs_load
, attrs_type_load
, attrs_store
, attrs_type_store
;
28117 tree attrs_log
, attrs_type_log
;
28122 /* If there are no builtins defined, we must be compiling in a
28123 language without trans-mem support. */
28124 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1
))
28127 /* Use whatever attributes a normal TM load has. */
28128 decl
= builtin_decl_explicit (BUILT_IN_TM_LOAD_1
);
28129 attrs_load
= DECL_ATTRIBUTES (decl
);
28130 attrs_type_load
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
28131 /* Use whatever attributes a normal TM store has. */
28132 decl
= builtin_decl_explicit (BUILT_IN_TM_STORE_1
);
28133 attrs_store
= DECL_ATTRIBUTES (decl
);
28134 attrs_type_store
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
28135 /* Use whatever attributes a normal TM log has. */
28136 decl
= builtin_decl_explicit (BUILT_IN_TM_LOG
);
28137 attrs_log
= DECL_ATTRIBUTES (decl
);
28138 attrs_type_log
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
28140 for (i
= 0, d
= bdesc_tm
;
28141 i
< ARRAY_SIZE (bdesc_tm
);
28144 if ((d
->mask
& ix86_isa_flags
) != 0
28145 || (lang_hooks
.builtin_function
28146 == lang_hooks
.builtin_function_ext_scope
))
28148 tree type
, attrs
, attrs_type
;
28149 enum built_in_function code
= (enum built_in_function
) d
->code
;
28151 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
28152 type
= ix86_get_builtin_func_type (ftype
);
28154 if (BUILTIN_TM_LOAD_P (code
))
28156 attrs
= attrs_load
;
28157 attrs_type
= attrs_type_load
;
28159 else if (BUILTIN_TM_STORE_P (code
))
28161 attrs
= attrs_store
;
28162 attrs_type
= attrs_type_store
;
28167 attrs_type
= attrs_type_log
;
28169 decl
= add_builtin_function (d
->name
, type
, code
, BUILT_IN_NORMAL
,
28170 /* The builtin without the prefix for
28171 calling it directly. */
28172 d
->name
+ strlen ("__builtin_"),
28174 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
28175 set the TYPE_ATTRIBUTES. */
28176 decl_attributes (&TREE_TYPE (decl
), attrs_type
, ATTR_FLAG_BUILT_IN
);
28178 set_builtin_decl (code
, decl
, false);
28183 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
28184 in the current target ISA to allow the user to compile particular modules
28185 with different target specific options that differ from the command line
28188 ix86_init_mmx_sse_builtins (void)
28190 const struct builtin_description
* d
;
28191 enum ix86_builtin_func_type ftype
;
28194 /* Add all special builtins with variable number of operands. */
28195 for (i
= 0, d
= bdesc_special_args
;
28196 i
< ARRAY_SIZE (bdesc_special_args
);
28202 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
28203 def_builtin (d
->mask
, d
->name
, ftype
, d
->code
);
28206 /* Add all builtins with variable number of operands. */
28207 for (i
= 0, d
= bdesc_args
;
28208 i
< ARRAY_SIZE (bdesc_args
);
28214 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
28215 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28218 /* pcmpestr[im] insns. */
28219 for (i
= 0, d
= bdesc_pcmpestr
;
28220 i
< ARRAY_SIZE (bdesc_pcmpestr
);
28223 if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
28224 ftype
= V16QI_FTYPE_V16QI_INT_V16QI_INT_INT
;
28226 ftype
= INT_FTYPE_V16QI_INT_V16QI_INT_INT
;
28227 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28230 /* pcmpistr[im] insns. */
28231 for (i
= 0, d
= bdesc_pcmpistr
;
28232 i
< ARRAY_SIZE (bdesc_pcmpistr
);
28235 if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
28236 ftype
= V16QI_FTYPE_V16QI_V16QI_INT
;
28238 ftype
= INT_FTYPE_V16QI_V16QI_INT
;
28239 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28242 /* comi/ucomi insns. */
28243 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
28245 if (d
->mask
== OPTION_MASK_ISA_SSE2
)
28246 ftype
= INT_FTYPE_V2DF_V2DF
;
28248 ftype
= INT_FTYPE_V4SF_V4SF
;
28249 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28253 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_ldmxcsr",
28254 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_LDMXCSR
);
28255 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_stmxcsr",
28256 UNSIGNED_FTYPE_VOID
, IX86_BUILTIN_STMXCSR
);
28258 /* SSE or 3DNow!A */
28259 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
28260 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR
,
28261 IX86_BUILTIN_MASKMOVQ
);
28264 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_maskmovdqu",
28265 VOID_FTYPE_V16QI_V16QI_PCHAR
, IX86_BUILTIN_MASKMOVDQU
);
28267 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_clflush",
28268 VOID_FTYPE_PCVOID
, IX86_BUILTIN_CLFLUSH
);
28269 x86_mfence
= def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_mfence",
28270 VOID_FTYPE_VOID
, IX86_BUILTIN_MFENCE
);
28273 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_monitor",
28274 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MONITOR
);
28275 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_mwait",
28276 VOID_FTYPE_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MWAIT
);
28279 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenc128",
28280 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENC128
);
28281 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenclast128",
28282 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENCLAST128
);
28283 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdec128",
28284 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDEC128
);
28285 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdeclast128",
28286 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDECLAST128
);
28287 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesimc128",
28288 V2DI_FTYPE_V2DI
, IX86_BUILTIN_AESIMC128
);
28289 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aeskeygenassist128",
28290 V2DI_FTYPE_V2DI_INT
, IX86_BUILTIN_AESKEYGENASSIST128
);
28293 def_builtin_const (OPTION_MASK_ISA_PCLMUL
, "__builtin_ia32_pclmulqdq128",
28294 V2DI_FTYPE_V2DI_V2DI_INT
, IX86_BUILTIN_PCLMULQDQ128
);
28297 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand16_step",
28298 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDRAND16_STEP
);
28299 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand32_step",
28300 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDRAND32_STEP
);
28301 def_builtin (OPTION_MASK_ISA_RDRND
| OPTION_MASK_ISA_64BIT
,
28302 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG
,
28303 IX86_BUILTIN_RDRAND64_STEP
);
28306 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2df",
28307 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT
,
28308 IX86_BUILTIN_GATHERSIV2DF
);
28310 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4df",
28311 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT
,
28312 IX86_BUILTIN_GATHERSIV4DF
);
28314 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2df",
28315 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT
,
28316 IX86_BUILTIN_GATHERDIV2DF
);
28318 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4df",
28319 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT
,
28320 IX86_BUILTIN_GATHERDIV4DF
);
28322 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4sf",
28323 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT
,
28324 IX86_BUILTIN_GATHERSIV4SF
);
28326 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8sf",
28327 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT
,
28328 IX86_BUILTIN_GATHERSIV8SF
);
28330 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf",
28331 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT
,
28332 IX86_BUILTIN_GATHERDIV4SF
);
28334 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf256",
28335 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT
,
28336 IX86_BUILTIN_GATHERDIV8SF
);
28338 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2di",
28339 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT
,
28340 IX86_BUILTIN_GATHERSIV2DI
);
28342 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4di",
28343 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT
,
28344 IX86_BUILTIN_GATHERSIV4DI
);
28346 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2di",
28347 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT
,
28348 IX86_BUILTIN_GATHERDIV2DI
);
28350 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4di",
28351 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT
,
28352 IX86_BUILTIN_GATHERDIV4DI
);
28354 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4si",
28355 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT
,
28356 IX86_BUILTIN_GATHERSIV4SI
);
28358 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8si",
28359 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT
,
28360 IX86_BUILTIN_GATHERSIV8SI
);
28362 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si",
28363 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT
,
28364 IX86_BUILTIN_GATHERDIV4SI
);
28366 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si256",
28367 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT
,
28368 IX86_BUILTIN_GATHERDIV8SI
);
28370 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4df ",
28371 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT
,
28372 IX86_BUILTIN_GATHERALTSIV4DF
);
28374 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4sf256 ",
28375 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT
,
28376 IX86_BUILTIN_GATHERALTDIV8SF
);
28378 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4di ",
28379 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT
,
28380 IX86_BUILTIN_GATHERALTSIV4DI
);
28382 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4si256 ",
28383 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT
,
28384 IX86_BUILTIN_GATHERALTDIV8SI
);
28387 def_builtin (OPTION_MASK_ISA_RTM
, "__builtin_ia32_xabort",
28388 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_XABORT
);
28390 /* MMX access to the vec_init patterns. */
28391 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v2si",
28392 V2SI_FTYPE_INT_INT
, IX86_BUILTIN_VEC_INIT_V2SI
);
28394 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v4hi",
28395 V4HI_FTYPE_HI_HI_HI_HI
,
28396 IX86_BUILTIN_VEC_INIT_V4HI
);
28398 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v8qi",
28399 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI
,
28400 IX86_BUILTIN_VEC_INIT_V8QI
);
28402 /* Access to the vec_extract patterns. */
28403 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2df",
28404 DOUBLE_FTYPE_V2DF_INT
, IX86_BUILTIN_VEC_EXT_V2DF
);
28405 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2di",
28406 DI_FTYPE_V2DI_INT
, IX86_BUILTIN_VEC_EXT_V2DI
);
28407 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_vec_ext_v4sf",
28408 FLOAT_FTYPE_V4SF_INT
, IX86_BUILTIN_VEC_EXT_V4SF
);
28409 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v4si",
28410 SI_FTYPE_V4SI_INT
, IX86_BUILTIN_VEC_EXT_V4SI
);
28411 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v8hi",
28412 HI_FTYPE_V8HI_INT
, IX86_BUILTIN_VEC_EXT_V8HI
);
28414 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
28415 "__builtin_ia32_vec_ext_v4hi",
28416 HI_FTYPE_V4HI_INT
, IX86_BUILTIN_VEC_EXT_V4HI
);
28418 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_ext_v2si",
28419 SI_FTYPE_V2SI_INT
, IX86_BUILTIN_VEC_EXT_V2SI
);
28421 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v16qi",
28422 QI_FTYPE_V16QI_INT
, IX86_BUILTIN_VEC_EXT_V16QI
);
28424 /* Access to the vec_set patterns. */
28425 def_builtin_const (OPTION_MASK_ISA_SSE4_1
| OPTION_MASK_ISA_64BIT
,
28426 "__builtin_ia32_vec_set_v2di",
28427 V2DI_FTYPE_V2DI_DI_INT
, IX86_BUILTIN_VEC_SET_V2DI
);
28429 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4sf",
28430 V4SF_FTYPE_V4SF_FLOAT_INT
, IX86_BUILTIN_VEC_SET_V4SF
);
28432 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4si",
28433 V4SI_FTYPE_V4SI_SI_INT
, IX86_BUILTIN_VEC_SET_V4SI
);
28435 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_set_v8hi",
28436 V8HI_FTYPE_V8HI_HI_INT
, IX86_BUILTIN_VEC_SET_V8HI
);
28438 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
28439 "__builtin_ia32_vec_set_v4hi",
28440 V4HI_FTYPE_V4HI_HI_INT
, IX86_BUILTIN_VEC_SET_V4HI
);
28442 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v16qi",
28443 V16QI_FTYPE_V16QI_QI_INT
, IX86_BUILTIN_VEC_SET_V16QI
);
28446 def_builtin (OPTION_MASK_ISA_RDSEED
, "__builtin_ia32_rdseed_hi_step",
28447 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDSEED16_STEP
);
28448 def_builtin (OPTION_MASK_ISA_RDSEED
, "__builtin_ia32_rdseed_si_step",
28449 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDSEED32_STEP
);
28450 def_builtin (OPTION_MASK_ISA_RDSEED
| OPTION_MASK_ISA_64BIT
,
28451 "__builtin_ia32_rdseed_di_step",
28452 INT_FTYPE_PULONGLONG
, IX86_BUILTIN_RDSEED64_STEP
);
28455 def_builtin (0, "__builtin_ia32_addcarryx_u32",
28456 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED
, IX86_BUILTIN_ADDCARRYX32
);
28457 def_builtin (OPTION_MASK_ISA_64BIT
,
28458 "__builtin_ia32_addcarryx_u64",
28459 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG
,
28460 IX86_BUILTIN_ADDCARRYX64
);
28462 /* Add FMA4 multi-arg argument instructions */
28463 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
28468 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
28469 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28473 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
28474 to return a pointer to VERSION_DECL if the outcome of the expression
28475 formed by PREDICATE_CHAIN is true. This function will be called during
28476 version dispatch to decide which function version to execute. It returns
28477 the basic block at the end, to which more conditions can be added. */
28480 add_condition_to_bb (tree function_decl
, tree version_decl
,
28481 tree predicate_chain
, basic_block new_bb
)
28483 gimple return_stmt
;
28484 tree convert_expr
, result_var
;
28485 gimple convert_stmt
;
28486 gimple call_cond_stmt
;
28487 gimple if_else_stmt
;
28489 basic_block bb1
, bb2
, bb3
;
28492 tree cond_var
, and_expr_var
= NULL_TREE
;
28495 tree predicate_decl
, predicate_arg
;
28497 push_cfun (DECL_STRUCT_FUNCTION (function_decl
));
28499 gcc_assert (new_bb
!= NULL
);
28500 gseq
= bb_seq (new_bb
);
28503 convert_expr
= build1 (CONVERT_EXPR
, ptr_type_node
,
28504 build_fold_addr_expr (version_decl
));
28505 result_var
= create_tmp_var (ptr_type_node
, NULL
);
28506 convert_stmt
= gimple_build_assign (result_var
, convert_expr
);
28507 return_stmt
= gimple_build_return (result_var
);
28509 if (predicate_chain
== NULL_TREE
)
28511 gimple_seq_add_stmt (&gseq
, convert_stmt
);
28512 gimple_seq_add_stmt (&gseq
, return_stmt
);
28513 set_bb_seq (new_bb
, gseq
);
28514 gimple_set_bb (convert_stmt
, new_bb
);
28515 gimple_set_bb (return_stmt
, new_bb
);
28520 while (predicate_chain
!= NULL
)
28522 cond_var
= create_tmp_var (integer_type_node
, NULL
);
28523 predicate_decl
= TREE_PURPOSE (predicate_chain
);
28524 predicate_arg
= TREE_VALUE (predicate_chain
);
28525 call_cond_stmt
= gimple_build_call (predicate_decl
, 1, predicate_arg
);
28526 gimple_call_set_lhs (call_cond_stmt
, cond_var
);
28528 gimple_set_block (call_cond_stmt
, DECL_INITIAL (function_decl
));
28529 gimple_set_bb (call_cond_stmt
, new_bb
);
28530 gimple_seq_add_stmt (&gseq
, call_cond_stmt
);
28532 predicate_chain
= TREE_CHAIN (predicate_chain
);
28534 if (and_expr_var
== NULL
)
28535 and_expr_var
= cond_var
;
28538 gimple assign_stmt
;
28539 /* Use MIN_EXPR to check if any integer is zero?.
28540 and_expr_var = min_expr <cond_var, and_expr_var> */
28541 assign_stmt
= gimple_build_assign (and_expr_var
,
28542 build2 (MIN_EXPR
, integer_type_node
,
28543 cond_var
, and_expr_var
));
28545 gimple_set_block (assign_stmt
, DECL_INITIAL (function_decl
));
28546 gimple_set_bb (assign_stmt
, new_bb
);
28547 gimple_seq_add_stmt (&gseq
, assign_stmt
);
28551 if_else_stmt
= gimple_build_cond (GT_EXPR
, and_expr_var
,
28553 NULL_TREE
, NULL_TREE
);
28554 gimple_set_block (if_else_stmt
, DECL_INITIAL (function_decl
));
28555 gimple_set_bb (if_else_stmt
, new_bb
);
28556 gimple_seq_add_stmt (&gseq
, if_else_stmt
);
28558 gimple_seq_add_stmt (&gseq
, convert_stmt
);
28559 gimple_seq_add_stmt (&gseq
, return_stmt
);
28560 set_bb_seq (new_bb
, gseq
);
28563 e12
= split_block (bb1
, if_else_stmt
);
28565 e12
->flags
&= ~EDGE_FALLTHRU
;
28566 e12
->flags
|= EDGE_TRUE_VALUE
;
28568 e23
= split_block (bb2
, return_stmt
);
28570 gimple_set_bb (convert_stmt
, bb2
);
28571 gimple_set_bb (return_stmt
, bb2
);
28574 make_edge (bb1
, bb3
, EDGE_FALSE_VALUE
);
28577 make_edge (bb2
, EXIT_BLOCK_PTR
, 0);
28584 /* This parses the attribute arguments to target in DECL and determines
28585 the right builtin to use to match the platform specification.
28586 It returns the priority value for this version decl. If PREDICATE_LIST
28587 is not NULL, it stores the list of cpu features that need to be checked
28588 before dispatching this function. */
28590 static unsigned int
28591 get_builtin_code_for_version (tree decl
, tree
*predicate_list
)
28594 struct cl_target_option cur_target
;
28596 struct cl_target_option
*new_target
;
28597 const char *arg_str
= NULL
;
28598 const char *attrs_str
= NULL
;
28599 char *tok_str
= NULL
;
28602 /* Priority of i386 features, greater value is higher priority. This is
28603 used to decide the order in which function dispatch must happen. For
28604 instance, a version specialized for SSE4.2 should be checked for dispatch
28605 before a version for SSE3, as SSE4.2 implies SSE3. */
28606 enum feature_priority
28627 enum feature_priority priority
= P_ZERO
;
28629 /* These are the target attribute strings for which a dispatcher is
28630 available, from fold_builtin_cpu. */
28632 static struct _feature_list
28634 const char *const name
;
28635 const enum feature_priority priority
;
28637 const feature_list
[] =
28643 {"ssse3", P_SSSE3
},
28644 {"sse4.1", P_SSE4_1
},
28645 {"sse4.2", P_SSE4_2
},
28646 {"popcnt", P_POPCNT
},
28652 static unsigned int NUM_FEATURES
28653 = sizeof (feature_list
) / sizeof (struct _feature_list
);
28657 tree predicate_chain
= NULL_TREE
;
28658 tree predicate_decl
, predicate_arg
;
28660 attrs
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
28661 gcc_assert (attrs
!= NULL
);
28663 attrs
= TREE_VALUE (TREE_VALUE (attrs
));
28665 gcc_assert (TREE_CODE (attrs
) == STRING_CST
);
28666 attrs_str
= TREE_STRING_POINTER (attrs
);
28669 /* Handle arch= if specified. For priority, set it to be 1 more than
28670 the best instruction set the processor can handle. For instance, if
28671 there is a version for atom and a version for ssse3 (the highest ISA
28672 priority for atom), the atom version must be checked for dispatch
28673 before the ssse3 version. */
28674 if (strstr (attrs_str
, "arch=") != NULL
)
28676 cl_target_option_save (&cur_target
, &global_options
);
28677 target_node
= ix86_valid_target_attribute_tree (attrs
);
28679 gcc_assert (target_node
);
28680 new_target
= TREE_TARGET_OPTION (target_node
);
28681 gcc_assert (new_target
);
28683 if (new_target
->arch_specified
&& new_target
->arch
> 0)
28685 switch (new_target
->arch
)
28687 case PROCESSOR_CORE2
:
28689 priority
= P_PROC_SSSE3
;
28691 case PROCESSOR_COREI7
:
28692 arg_str
= "corei7";
28693 priority
= P_PROC_SSE4_2
;
28695 case PROCESSOR_ATOM
:
28697 priority
= P_PROC_SSSE3
;
28699 case PROCESSOR_AMDFAM10
:
28700 arg_str
= "amdfam10h";
28701 priority
= P_PROC_SSE4_a
;
28703 case PROCESSOR_BDVER1
:
28704 arg_str
= "bdver1";
28705 priority
= P_PROC_FMA
;
28707 case PROCESSOR_BDVER2
:
28708 arg_str
= "bdver2";
28709 priority
= P_PROC_FMA
;
28714 cl_target_option_restore (&global_options
, &cur_target
);
28716 if (predicate_list
&& arg_str
== NULL
)
28718 error_at (DECL_SOURCE_LOCATION (decl
),
28719 "No dispatcher found for the versioning attributes");
28723 if (predicate_list
)
28725 predicate_decl
= ix86_builtins
[(int) IX86_BUILTIN_CPU_IS
];
28726 /* For a C string literal the length includes the trailing NULL. */
28727 predicate_arg
= build_string_literal (strlen (arg_str
) + 1, arg_str
);
28728 predicate_chain
= tree_cons (predicate_decl
, predicate_arg
,
28733 /* Process feature name. */
28734 tok_str
= (char *) xmalloc (strlen (attrs_str
) + 1);
28735 strcpy (tok_str
, attrs_str
);
28736 token
= strtok (tok_str
, ",");
28737 predicate_decl
= ix86_builtins
[(int) IX86_BUILTIN_CPU_SUPPORTS
];
28739 while (token
!= NULL
)
28741 /* Do not process "arch=" */
28742 if (strncmp (token
, "arch=", 5) == 0)
28744 token
= strtok (NULL
, ",");
28747 for (i
= 0; i
< NUM_FEATURES
; ++i
)
28749 if (strcmp (token
, feature_list
[i
].name
) == 0)
28751 if (predicate_list
)
28753 predicate_arg
= build_string_literal (
28754 strlen (feature_list
[i
].name
) + 1,
28755 feature_list
[i
].name
);
28756 predicate_chain
= tree_cons (predicate_decl
, predicate_arg
,
28759 /* Find the maximum priority feature. */
28760 if (feature_list
[i
].priority
> priority
)
28761 priority
= feature_list
[i
].priority
;
28766 if (predicate_list
&& i
== NUM_FEATURES
)
28768 error_at (DECL_SOURCE_LOCATION (decl
),
28769 "No dispatcher found for %s", token
);
28772 token
= strtok (NULL
, ",");
28776 if (predicate_list
&& predicate_chain
== NULL_TREE
)
28778 error_at (DECL_SOURCE_LOCATION (decl
),
28779 "No dispatcher found for the versioning attributes : %s",
28783 else if (predicate_list
)
28785 predicate_chain
= nreverse (predicate_chain
);
28786 *predicate_list
= predicate_chain
;
28792 /* This compares the priority of target features in function DECL1
28793 and DECL2. It returns positive value if DECL1 is higher priority,
28794 negative value if DECL2 is higher priority and 0 if they are the
28798 ix86_compare_version_priority (tree decl1
, tree decl2
)
28800 unsigned int priority1
= 0;
28801 unsigned int priority2
= 0;
28803 if (lookup_attribute ("target", DECL_ATTRIBUTES (decl1
)) != NULL
)
28804 priority1
= get_builtin_code_for_version (decl1
, NULL
);
28806 if (lookup_attribute ("target", DECL_ATTRIBUTES (decl2
)) != NULL
)
28807 priority2
= get_builtin_code_for_version (decl2
, NULL
);
28809 return (int)priority1
- (int)priority2
;
28812 /* V1 and V2 point to function versions with different priorities
28813 based on the target ISA. This function compares their priorities. */
28816 feature_compare (const void *v1
, const void *v2
)
28818 typedef struct _function_version_info
28821 tree predicate_chain
;
28822 unsigned int dispatch_priority
;
28823 } function_version_info
;
28825 const function_version_info c1
= *(const function_version_info
*)v1
;
28826 const function_version_info c2
= *(const function_version_info
*)v2
;
28827 return (c2
.dispatch_priority
- c1
.dispatch_priority
);
28830 /* This function generates the dispatch function for
28831 multi-versioned functions. DISPATCH_DECL is the function which will
28832 contain the dispatch logic. FNDECLS are the function choices for
28833 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
28834 in DISPATCH_DECL in which the dispatch code is generated. */
28837 dispatch_function_versions (tree dispatch_decl
,
28839 basic_block
*empty_bb
)
28842 gimple ifunc_cpu_init_stmt
;
28846 vec
<tree
> *fndecls
;
28847 unsigned int num_versions
= 0;
28848 unsigned int actual_versions
= 0;
28851 struct _function_version_info
28854 tree predicate_chain
;
28855 unsigned int dispatch_priority
;
28856 }*function_version_info
;
28858 gcc_assert (dispatch_decl
!= NULL
28859 && fndecls_p
!= NULL
28860 && empty_bb
!= NULL
);
28862 /*fndecls_p is actually a vector. */
28863 fndecls
= static_cast<vec
<tree
> *> (fndecls_p
);
28865 /* At least one more version other than the default. */
28866 num_versions
= fndecls
->length ();
28867 gcc_assert (num_versions
>= 2);
28869 function_version_info
= (struct _function_version_info
*)
28870 XNEWVEC (struct _function_version_info
, (num_versions
- 1));
28872 /* The first version in the vector is the default decl. */
28873 default_decl
= (*fndecls
)[0];
28875 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl
));
28877 gseq
= bb_seq (*empty_bb
);
28878 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
28879 constructors, so explicity call __builtin_cpu_init here. */
28880 ifunc_cpu_init_stmt
= gimple_build_call_vec (
28881 ix86_builtins
[(int) IX86_BUILTIN_CPU_INIT
], vNULL
);
28882 gimple_seq_add_stmt (&gseq
, ifunc_cpu_init_stmt
);
28883 gimple_set_bb (ifunc_cpu_init_stmt
, *empty_bb
);
28884 set_bb_seq (*empty_bb
, gseq
);
28889 for (ix
= 1; fndecls
->iterate (ix
, &ele
); ++ix
)
28891 tree version_decl
= ele
;
28892 tree predicate_chain
= NULL_TREE
;
28893 unsigned int priority
;
28894 /* Get attribute string, parse it and find the right predicate decl.
28895 The predicate function could be a lengthy combination of many
28896 features, like arch-type and various isa-variants. */
28897 priority
= get_builtin_code_for_version (version_decl
,
28900 if (predicate_chain
== NULL_TREE
)
28904 function_version_info
[ix
- 1].version_decl
= version_decl
;
28905 function_version_info
[ix
- 1].predicate_chain
= predicate_chain
;
28906 function_version_info
[ix
- 1].dispatch_priority
= priority
;
28909 /* Sort the versions according to descending order of dispatch priority. The
28910 priority is based on the ISA. This is not a perfect solution. There
28911 could still be ambiguity. If more than one function version is suitable
28912 to execute, which one should be dispatched? In future, allow the user
28913 to specify a dispatch priority next to the version. */
28914 qsort (function_version_info
, actual_versions
,
28915 sizeof (struct _function_version_info
), feature_compare
);
28917 for (i
= 0; i
< actual_versions
; ++i
)
28918 *empty_bb
= add_condition_to_bb (dispatch_decl
,
28919 function_version_info
[i
].version_decl
,
28920 function_version_info
[i
].predicate_chain
,
28923 /* dispatch default version at the end. */
28924 *empty_bb
= add_condition_to_bb (dispatch_decl
, default_decl
,
28927 free (function_version_info
);
28931 /* Comparator function to be used in qsort routine to sort attribute
28932 specification strings to "target". */
28935 attr_strcmp (const void *v1
, const void *v2
)
28937 const char *c1
= *(char *const*)v1
;
28938 const char *c2
= *(char *const*)v2
;
28939 return strcmp (c1
, c2
);
28942 /* STR is the argument to target attribute. This function tokenizes
28943 the comma separated arguments, sorts them and returns a string which
28944 is a unique identifier for the comma separated arguments. It also
28945 replaces non-identifier characters "=,-" with "_". */
28948 sorted_attr_string (const char *str
)
28950 char **args
= NULL
;
28951 char *attr_str
, *ret_str
;
28953 unsigned int argnum
= 1;
28956 for (i
= 0; i
< strlen (str
); i
++)
28960 attr_str
= (char *)xmalloc (strlen (str
) + 1);
28961 strcpy (attr_str
, str
);
28963 /* Replace "=,-" with "_". */
28964 for (i
= 0; i
< strlen (attr_str
); i
++)
28965 if (attr_str
[i
] == '=' || attr_str
[i
]== '-')
28971 args
= XNEWVEC (char *, argnum
);
28974 attr
= strtok (attr_str
, ",");
28975 while (attr
!= NULL
)
28979 attr
= strtok (NULL
, ",");
28982 qsort (args
, argnum
, sizeof (char*), attr_strcmp
);
28984 ret_str
= (char *)xmalloc (strlen (str
) + 1);
28985 strcpy (ret_str
, args
[0]);
28986 for (i
= 1; i
< argnum
; i
++)
28988 strcat (ret_str
, "_");
28989 strcat (ret_str
, args
[i
]);
28997 /* This function changes the assembler name for functions that are
28998 versions. If DECL is a function version and has a "target"
28999 attribute, it appends the attribute string to its assembler name. */
29002 ix86_mangle_function_version_assembler_name (tree decl
, tree id
)
29005 const char *orig_name
, *version_string
, *attr_str
;
29006 char *assembler_name
;
29008 if (DECL_DECLARED_INLINE_P (decl
)
29009 && lookup_attribute ("gnu_inline",
29010 DECL_ATTRIBUTES (decl
)))
29011 error_at (DECL_SOURCE_LOCATION (decl
),
29012 "Function versions cannot be marked as gnu_inline,"
29013 " bodies have to be generated");
29015 if (DECL_VIRTUAL_P (decl
)
29016 || DECL_VINDEX (decl
))
29017 error_at (DECL_SOURCE_LOCATION (decl
),
29018 "Virtual function versioning not supported\n");
29020 version_attr
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
29022 /* target attribute string is NULL for default functions. */
29023 if (version_attr
== NULL_TREE
)
29026 orig_name
= IDENTIFIER_POINTER (id
);
29028 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr
)));
29030 attr_str
= sorted_attr_string (version_string
);
29031 assembler_name
= (char *) xmalloc (strlen (orig_name
)
29032 + strlen (attr_str
) + 2);
29034 sprintf (assembler_name
, "%s.%s", orig_name
, attr_str
);
29036 /* Allow assembler name to be modified if already set. */
29037 if (DECL_ASSEMBLER_NAME_SET_P (decl
))
29038 SET_DECL_RTL (decl
, NULL
);
29040 return get_identifier (assembler_name
);
29043 /* This function returns true if FN1 and FN2 are versions of the same function,
29044 that is, the target strings of the function decls are different. This assumes
29045 that FN1 and FN2 have the same signature. */
29048 ix86_function_versions (tree fn1
, tree fn2
)
29051 const char *attr_str1
, *attr_str2
;
29052 char *target1
, *target2
;
29055 if (TREE_CODE (fn1
) != FUNCTION_DECL
29056 || TREE_CODE (fn2
) != FUNCTION_DECL
)
29059 attr1
= lookup_attribute ("target", DECL_ATTRIBUTES (fn1
));
29060 attr2
= lookup_attribute ("target", DECL_ATTRIBUTES (fn2
));
29062 /* At least one function decl should have the target attribute specified. */
29063 if (attr1
== NULL_TREE
&& attr2
== NULL_TREE
)
29066 /* If one function does not have a target attribute, these are versions. */
29067 if (attr1
== NULL_TREE
|| attr2
== NULL_TREE
)
29070 attr_str1
= TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr1
)));
29071 attr_str2
= TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr2
)));
29073 target1
= sorted_attr_string (attr_str1
);
29074 target2
= sorted_attr_string (attr_str2
);
29076 /* The sorted target strings must be different for fn1 and fn2
29078 if (strcmp (target1
, target2
) == 0)
29089 /* This target supports function multiversioning. */
29092 ix86_supports_function_versions (void)
29098 ix86_mangle_decl_assembler_name (tree decl
, tree id
)
29100 /* For function version, add the target suffix to the assembler name. */
29101 if (TREE_CODE (decl
) == FUNCTION_DECL
29102 && DECL_FUNCTION_VERSIONED (decl
))
29103 id
= ix86_mangle_function_version_assembler_name (decl
, id
);
29104 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
29105 id
= SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl
, id
);
29111 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
29112 is true, append the full path name of the source file. */
29115 make_name (tree decl
, const char *suffix
, bool make_unique
)
29117 char *global_var_name
;
29120 const char *unique_name
= NULL
;
29122 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
29124 /* Get a unique name that can be used globally without any chances
29125 of collision at link time. */
29127 unique_name
= IDENTIFIER_POINTER (get_file_function_name ("\0"));
29129 name_len
= strlen (name
) + strlen (suffix
) + 2;
29132 name_len
+= strlen (unique_name
) + 1;
29133 global_var_name
= XNEWVEC (char, name_len
);
29135 /* Use '.' to concatenate names as it is demangler friendly. */
29137 snprintf (global_var_name
, name_len
, "%s.%s.%s", name
,
29138 unique_name
, suffix
);
29140 snprintf (global_var_name
, name_len
, "%s.%s", name
, suffix
);
29142 return global_var_name
;
29145 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE) && HAVE_GNU_INDIRECT_FUNCTION
29147 /* Make a dispatcher declaration for the multi-versioned function DECL.
29148 Calls to DECL function will be replaced with calls to the dispatcher
29149 by the front-end. Return the decl created. */
29152 make_dispatcher_decl (const tree decl
)
29155 char *func_name
, *resolver_name
;
29156 tree fn_type
, func_type
;
29157 bool is_uniq
= false;
29159 if (TREE_PUBLIC (decl
) == 0)
29162 func_name
= make_name (decl
, "ifunc", is_uniq
);
29163 resolver_name
= make_name (decl
, "resolver", is_uniq
);
29164 gcc_assert (resolver_name
);
29166 fn_type
= TREE_TYPE (decl
);
29167 func_type
= build_function_type (TREE_TYPE (fn_type
),
29168 TYPE_ARG_TYPES (fn_type
));
29170 func_decl
= build_fn_decl (func_name
, func_type
);
29171 TREE_USED (func_decl
) = 1;
29172 DECL_CONTEXT (func_decl
) = NULL_TREE
;
29173 DECL_INITIAL (func_decl
) = error_mark_node
;
29174 DECL_ARTIFICIAL (func_decl
) = 1;
29175 /* Mark this func as external, the resolver will flip it again if
29176 it gets generated. */
29177 DECL_EXTERNAL (func_decl
) = 1;
29178 /* This will be of type IFUNCs have to be externally visible. */
29179 TREE_PUBLIC (func_decl
) = 1;
29186 /* Returns true if decl is multi-versioned and DECL is the default function,
29187 that is it is not tagged with target specific optimization. */
29190 is_function_default_version (const tree decl
)
29192 return (TREE_CODE (decl
) == FUNCTION_DECL
29193 && DECL_FUNCTION_VERSIONED (decl
)
29194 && lookup_attribute ("target", DECL_ATTRIBUTES (decl
)) == NULL_TREE
);
29197 /* Make a dispatcher declaration for the multi-versioned function DECL.
29198 Calls to DECL function will be replaced with calls to the dispatcher
29199 by the front-end. Returns the decl of the dispatcher function. */
29202 ix86_get_function_versions_dispatcher (void *decl
)
29204 tree fn
= (tree
) decl
;
29205 struct cgraph_node
*node
= NULL
;
29206 struct cgraph_node
*default_node
= NULL
;
29207 struct cgraph_function_version_info
*node_v
= NULL
;
29208 struct cgraph_function_version_info
*first_v
= NULL
;
29210 tree dispatch_decl
= NULL
;
29212 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE) && HAVE_GNU_INDIRECT_FUNCTION
29213 struct cgraph_function_version_info
*it_v
= NULL
;
29214 struct cgraph_node
*dispatcher_node
= NULL
;
29215 struct cgraph_function_version_info
*dispatcher_version_info
= NULL
;
29218 struct cgraph_function_version_info
*default_version_info
= NULL
;
29220 gcc_assert (fn
!= NULL
&& DECL_FUNCTION_VERSIONED (fn
));
29222 node
= cgraph_get_node (fn
);
29223 gcc_assert (node
!= NULL
);
29225 node_v
= get_cgraph_node_version (node
);
29226 gcc_assert (node_v
!= NULL
);
29228 if (node_v
->dispatcher_resolver
!= NULL
)
29229 return node_v
->dispatcher_resolver
;
29231 /* Find the default version and make it the first node. */
29233 /* Go to the beginnig of the chain. */
29234 while (first_v
->prev
!= NULL
)
29235 first_v
= first_v
->prev
;
29236 default_version_info
= first_v
;
29237 while (default_version_info
!= NULL
)
29239 if (is_function_default_version
29240 (default_version_info
->this_node
->symbol
.decl
))
29242 default_version_info
= default_version_info
->next
;
29245 /* If there is no default node, just return NULL. */
29246 if (default_version_info
== NULL
)
29249 /* Make default info the first node. */
29250 if (first_v
!= default_version_info
)
29252 default_version_info
->prev
->next
= default_version_info
->next
;
29253 if (default_version_info
->next
)
29254 default_version_info
->next
->prev
= default_version_info
->prev
;
29255 first_v
->prev
= default_version_info
;
29256 default_version_info
->next
= first_v
;
29257 default_version_info
->prev
= NULL
;
29260 default_node
= default_version_info
->this_node
;
29262 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE) && HAVE_GNU_INDIRECT_FUNCTION
29263 /* Right now, the dispatching is done via ifunc. */
29264 dispatch_decl
= make_dispatcher_decl (default_node
->symbol
.decl
);
29266 dispatcher_node
= cgraph_get_create_node (dispatch_decl
);
29267 gcc_assert (dispatcher_node
!= NULL
);
29268 dispatcher_node
->dispatcher_function
= 1;
29269 dispatcher_version_info
29270 = insert_new_cgraph_node_version (dispatcher_node
);
29271 dispatcher_version_info
->next
= default_version_info
;
29272 dispatcher_node
->local
.finalized
= 1;
29274 /* Set the dispatcher for all the versions. */
29275 it_v
= default_version_info
;
29276 while (it_v
!= NULL
)
29278 it_v
->dispatcher_resolver
= dispatch_decl
;
29282 error_at (DECL_SOURCE_LOCATION (default_node
->symbol
.decl
),
29283 "multiversioning needs ifunc which is not supported "
29284 "in this configuration");
29286 return dispatch_decl
;
29289 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
29293 make_attribute (const char *name
, const char *arg_name
, tree chain
)
29296 tree attr_arg_name
;
29300 attr_name
= get_identifier (name
);
29301 attr_arg_name
= build_string (strlen (arg_name
), arg_name
);
29302 attr_args
= tree_cons (NULL_TREE
, attr_arg_name
, NULL_TREE
);
29303 attr
= tree_cons (attr_name
, attr_args
, chain
);
29307 /* Make the resolver function decl to dispatch the versions of
29308 a multi-versioned function, DEFAULT_DECL. Create an
29309 empty basic block in the resolver and store the pointer in
29310 EMPTY_BB. Return the decl of the resolver function. */
29313 make_resolver_func (const tree default_decl
,
29314 const tree dispatch_decl
,
29315 basic_block
*empty_bb
)
29317 char *resolver_name
;
29318 tree decl
, type
, decl_name
, t
;
29319 bool is_uniq
= false;
29321 /* IFUNC's have to be globally visible. So, if the default_decl is
29322 not, then the name of the IFUNC should be made unique. */
29323 if (TREE_PUBLIC (default_decl
) == 0)
29326 /* Append the filename to the resolver function if the versions are
29327 not externally visible. This is because the resolver function has
29328 to be externally visible for the loader to find it. So, appending
29329 the filename will prevent conflicts with a resolver function from
29330 another module which is based on the same version name. */
29331 resolver_name
= make_name (default_decl
, "resolver", is_uniq
);
29333 /* The resolver function should return a (void *). */
29334 type
= build_function_type_list (ptr_type_node
, NULL_TREE
);
29336 decl
= build_fn_decl (resolver_name
, type
);
29337 decl_name
= get_identifier (resolver_name
);
29338 SET_DECL_ASSEMBLER_NAME (decl
, decl_name
);
29340 DECL_NAME (decl
) = decl_name
;
29341 TREE_USED (decl
) = 1;
29342 DECL_ARTIFICIAL (decl
) = 1;
29343 DECL_IGNORED_P (decl
) = 0;
29344 /* IFUNC resolvers have to be externally visible. */
29345 TREE_PUBLIC (decl
) = 1;
29346 DECL_UNINLINABLE (decl
) = 0;
29348 /* Resolver is not external, body is generated. */
29349 DECL_EXTERNAL (decl
) = 0;
29350 DECL_EXTERNAL (dispatch_decl
) = 0;
29352 DECL_CONTEXT (decl
) = NULL_TREE
;
29353 DECL_INITIAL (decl
) = make_node (BLOCK
);
29354 DECL_STATIC_CONSTRUCTOR (decl
) = 0;
29356 if (DECL_COMDAT_GROUP (default_decl
)
29357 || TREE_PUBLIC (default_decl
))
29359 /* In this case, each translation unit with a call to this
29360 versioned function will put out a resolver. Ensure it
29361 is comdat to keep just one copy. */
29362 DECL_COMDAT (decl
) = 1;
29363 make_decl_one_only (decl
, DECL_ASSEMBLER_NAME (decl
));
29365 /* Build result decl and add to function_decl. */
29366 t
= build_decl (UNKNOWN_LOCATION
, RESULT_DECL
, NULL_TREE
, ptr_type_node
);
29367 DECL_ARTIFICIAL (t
) = 1;
29368 DECL_IGNORED_P (t
) = 1;
29369 DECL_RESULT (decl
) = t
;
29371 gimplify_function_tree (decl
);
29372 push_cfun (DECL_STRUCT_FUNCTION (decl
));
29373 *empty_bb
= init_lowered_empty_function (decl
, false);
29375 cgraph_add_new_function (decl
, true);
29376 cgraph_call_function_insertion_hooks (cgraph_get_create_node (decl
));
29380 gcc_assert (dispatch_decl
!= NULL
);
29381 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
29382 DECL_ATTRIBUTES (dispatch_decl
)
29383 = make_attribute ("ifunc", resolver_name
, DECL_ATTRIBUTES (dispatch_decl
));
29385 /* Create the alias for dispatch to resolver here. */
29386 /*cgraph_create_function_alias (dispatch_decl, decl);*/
29387 cgraph_same_body_alias (NULL
, dispatch_decl
, decl
);
29391 /* Generate the dispatching code body to dispatch multi-versioned function
29392 DECL. The target hook is called to process the "target" attributes and
29393 provide the code to dispatch the right function at run-time. NODE points
29394 to the dispatcher decl whose body will be created. */
29397 ix86_generate_version_dispatcher_body (void *node_p
)
29399 tree resolver_decl
;
29400 basic_block empty_bb
;
29401 vec
<tree
> fn_ver_vec
= vNULL
;
29402 tree default_ver_decl
;
29403 struct cgraph_node
*versn
;
29404 struct cgraph_node
*node
;
29406 struct cgraph_function_version_info
*node_version_info
= NULL
;
29407 struct cgraph_function_version_info
*versn_info
= NULL
;
29409 node
= (cgraph_node
*)node_p
;
29411 node_version_info
= get_cgraph_node_version (node
);
29412 gcc_assert (node
->dispatcher_function
29413 && node_version_info
!= NULL
);
29415 if (node_version_info
->dispatcher_resolver
)
29416 return node_version_info
->dispatcher_resolver
;
29418 /* The first version in the chain corresponds to the default version. */
29419 default_ver_decl
= node_version_info
->next
->this_node
->symbol
.decl
;
29421 /* node is going to be an alias, so remove the finalized bit. */
29422 node
->local
.finalized
= false;
29424 resolver_decl
= make_resolver_func (default_ver_decl
,
29425 node
->symbol
.decl
, &empty_bb
);
29427 node_version_info
->dispatcher_resolver
= resolver_decl
;
29429 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl
));
29431 fn_ver_vec
.create (2);
29433 for (versn_info
= node_version_info
->next
; versn_info
;
29434 versn_info
= versn_info
->next
)
29436 versn
= versn_info
->this_node
;
29437 /* Check for virtual functions here again, as by this time it should
29438 have been determined if this function needs a vtable index or
29439 not. This happens for methods in derived classes that override
29440 virtual methods in base classes but are not explicitly marked as
29442 if (DECL_VINDEX (versn
->symbol
.decl
))
29443 error_at (DECL_SOURCE_LOCATION (versn
->symbol
.decl
),
29444 "Virtual function multiversioning not supported");
29445 fn_ver_vec
.safe_push (versn
->symbol
.decl
);
29448 dispatch_function_versions (resolver_decl
, &fn_ver_vec
, &empty_bb
);
29450 rebuild_cgraph_edges ();
29452 return resolver_decl
;
29454 /* This builds the processor_model struct type defined in
29455 libgcc/config/i386/cpuinfo.c */
29458 build_processor_model_struct (void)
29460 const char *field_name
[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
29462 tree field
= NULL_TREE
, field_chain
= NULL_TREE
;
29464 tree type
= make_node (RECORD_TYPE
);
29466 /* The first 3 fields are unsigned int. */
29467 for (i
= 0; i
< 3; ++i
)
29469 field
= build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
29470 get_identifier (field_name
[i
]), unsigned_type_node
);
29471 if (field_chain
!= NULL_TREE
)
29472 DECL_CHAIN (field
) = field_chain
;
29473 field_chain
= field
;
29476 /* The last field is an array of unsigned integers of size one. */
29477 field
= build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
29478 get_identifier (field_name
[3]),
29479 build_array_type (unsigned_type_node
,
29480 build_index_type (size_one_node
)));
29481 if (field_chain
!= NULL_TREE
)
29482 DECL_CHAIN (field
) = field_chain
;
29483 field_chain
= field
;
29485 finish_builtin_struct (type
, "__processor_model", field_chain
, NULL_TREE
);
29489 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
29492 make_var_decl (tree type
, const char *name
)
29496 new_decl
= build_decl (UNKNOWN_LOCATION
,
29498 get_identifier(name
),
29501 DECL_EXTERNAL (new_decl
) = 1;
29502 TREE_STATIC (new_decl
) = 1;
29503 TREE_PUBLIC (new_decl
) = 1;
29504 DECL_INITIAL (new_decl
) = 0;
29505 DECL_ARTIFICIAL (new_decl
) = 0;
29506 DECL_PRESERVE_P (new_decl
) = 1;
29508 make_decl_one_only (new_decl
, DECL_ASSEMBLER_NAME (new_decl
));
29509 assemble_variable (new_decl
, 0, 0, 0);
29514 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
29515 into an integer defined in libgcc/config/i386/cpuinfo.c */
29518 fold_builtin_cpu (tree fndecl
, tree
*args
)
29521 enum ix86_builtins fn_code
= (enum ix86_builtins
)
29522 DECL_FUNCTION_CODE (fndecl
);
29523 tree param_string_cst
= NULL
;
29525 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
29526 enum processor_features
29542 /* These are the values for vendor types and cpu types and subtypes
29543 in cpuinfo.c. Cpu types and subtypes should be subtracted by
29544 the corresponding start value. */
29545 enum processor_model
29555 M_CPU_SUBTYPE_START
,
29556 M_INTEL_COREI7_NEHALEM
,
29557 M_INTEL_COREI7_WESTMERE
,
29558 M_INTEL_COREI7_SANDYBRIDGE
,
29559 M_AMDFAM10H_BARCELONA
,
29560 M_AMDFAM10H_SHANGHAI
,
29561 M_AMDFAM10H_ISTANBUL
,
29562 M_AMDFAM15H_BDVER1
,
29563 M_AMDFAM15H_BDVER2
,
29567 static struct _arch_names_table
29569 const char *const name
;
29570 const enum processor_model model
;
29572 const arch_names_table
[] =
29575 {"intel", M_INTEL
},
29576 {"atom", M_INTEL_ATOM
},
29577 {"core2", M_INTEL_CORE2
},
29578 {"corei7", M_INTEL_COREI7
},
29579 {"nehalem", M_INTEL_COREI7_NEHALEM
},
29580 {"westmere", M_INTEL_COREI7_WESTMERE
},
29581 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE
},
29582 {"amdfam10h", M_AMDFAM10H
},
29583 {"barcelona", M_AMDFAM10H_BARCELONA
},
29584 {"shanghai", M_AMDFAM10H_SHANGHAI
},
29585 {"istanbul", M_AMDFAM10H_ISTANBUL
},
29586 {"amdfam15h", M_AMDFAM15H
},
29587 {"bdver1", M_AMDFAM15H_BDVER1
},
29588 {"bdver2", M_AMDFAM15H_BDVER2
},
29589 {"bdver3", M_AMDFAM15H_BDVER3
},
29592 static struct _isa_names_table
29594 const char *const name
;
29595 const enum processor_features feature
;
29597 const isa_names_table
[] =
29601 {"popcnt", F_POPCNT
},
29605 {"ssse3", F_SSSE3
},
29606 {"sse4.1", F_SSE4_1
},
29607 {"sse4.2", F_SSE4_2
},
29612 tree __processor_model_type
= build_processor_model_struct ();
29613 tree __cpu_model_var
= make_var_decl (__processor_model_type
,
29616 gcc_assert ((args
!= NULL
) && (*args
!= NULL
));
29618 param_string_cst
= *args
;
29619 while (param_string_cst
29620 && TREE_CODE (param_string_cst
) != STRING_CST
)
29622 /* *args must be a expr that can contain other EXPRS leading to a
29624 if (!EXPR_P (param_string_cst
))
29626 error ("Parameter to builtin must be a string constant or literal");
29627 return integer_zero_node
;
29629 param_string_cst
= TREE_OPERAND (EXPR_CHECK (param_string_cst
), 0);
29632 gcc_assert (param_string_cst
);
29634 if (fn_code
== IX86_BUILTIN_CPU_IS
)
29640 unsigned int field_val
= 0;
29641 unsigned int NUM_ARCH_NAMES
29642 = sizeof (arch_names_table
) / sizeof (struct _arch_names_table
);
29644 for (i
= 0; i
< NUM_ARCH_NAMES
; i
++)
29645 if (strcmp (arch_names_table
[i
].name
,
29646 TREE_STRING_POINTER (param_string_cst
)) == 0)
29649 if (i
== NUM_ARCH_NAMES
)
29651 error ("Parameter to builtin not valid: %s",
29652 TREE_STRING_POINTER (param_string_cst
));
29653 return integer_zero_node
;
29656 field
= TYPE_FIELDS (__processor_model_type
);
29657 field_val
= arch_names_table
[i
].model
;
29659 /* CPU types are stored in the next field. */
29660 if (field_val
> M_CPU_TYPE_START
29661 && field_val
< M_CPU_SUBTYPE_START
)
29663 field
= DECL_CHAIN (field
);
29664 field_val
-= M_CPU_TYPE_START
;
29667 /* CPU subtypes are stored in the next field. */
29668 if (field_val
> M_CPU_SUBTYPE_START
)
29670 field
= DECL_CHAIN ( DECL_CHAIN (field
));
29671 field_val
-= M_CPU_SUBTYPE_START
;
29674 /* Get the appropriate field in __cpu_model. */
29675 ref
= build3 (COMPONENT_REF
, TREE_TYPE (field
), __cpu_model_var
,
29678 /* Check the value. */
29679 final
= build2 (EQ_EXPR
, unsigned_type_node
, ref
,
29680 build_int_cstu (unsigned_type_node
, field_val
));
29681 return build1 (CONVERT_EXPR
, integer_type_node
, final
);
29683 else if (fn_code
== IX86_BUILTIN_CPU_SUPPORTS
)
29690 unsigned int field_val
= 0;
29691 unsigned int NUM_ISA_NAMES
29692 = sizeof (isa_names_table
) / sizeof (struct _isa_names_table
);
29694 for (i
= 0; i
< NUM_ISA_NAMES
; i
++)
29695 if (strcmp (isa_names_table
[i
].name
,
29696 TREE_STRING_POINTER (param_string_cst
)) == 0)
29699 if (i
== NUM_ISA_NAMES
)
29701 error ("Parameter to builtin not valid: %s",
29702 TREE_STRING_POINTER (param_string_cst
));
29703 return integer_zero_node
;
29706 field
= TYPE_FIELDS (__processor_model_type
);
29707 /* Get the last field, which is __cpu_features. */
29708 while (DECL_CHAIN (field
))
29709 field
= DECL_CHAIN (field
);
29711 /* Get the appropriate field: __cpu_model.__cpu_features */
29712 ref
= build3 (COMPONENT_REF
, TREE_TYPE (field
), __cpu_model_var
,
29715 /* Access the 0th element of __cpu_features array. */
29716 array_elt
= build4 (ARRAY_REF
, unsigned_type_node
, ref
,
29717 integer_zero_node
, NULL_TREE
, NULL_TREE
);
29719 field_val
= (1 << isa_names_table
[i
].feature
);
29720 /* Return __cpu_model.__cpu_features[0] & field_val */
29721 final
= build2 (BIT_AND_EXPR
, unsigned_type_node
, array_elt
,
29722 build_int_cstu (unsigned_type_node
, field_val
));
29723 return build1 (CONVERT_EXPR
, integer_type_node
, final
);
29725 gcc_unreachable ();
29729 ix86_fold_builtin (tree fndecl
, int n_args
,
29730 tree
*args
, bool ignore ATTRIBUTE_UNUSED
)
29732 if (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
29734 enum ix86_builtins fn_code
= (enum ix86_builtins
)
29735 DECL_FUNCTION_CODE (fndecl
);
29736 if (fn_code
== IX86_BUILTIN_CPU_IS
29737 || fn_code
== IX86_BUILTIN_CPU_SUPPORTS
)
29739 gcc_assert (n_args
== 1);
29740 return fold_builtin_cpu (fndecl
, args
);
29744 #ifdef SUBTARGET_FOLD_BUILTIN
29745 return SUBTARGET_FOLD_BUILTIN (fndecl
, n_args
, args
, ignore
);
29751 /* Make builtins to detect cpu type and features supported. NAME is
29752 the builtin name, CODE is the builtin code, and FTYPE is the function
29753 type of the builtin. */
29756 make_cpu_type_builtin (const char* name
, int code
,
29757 enum ix86_builtin_func_type ftype
, bool is_const
)
29762 type
= ix86_get_builtin_func_type (ftype
);
29763 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
29765 gcc_assert (decl
!= NULL_TREE
);
29766 ix86_builtins
[(int) code
] = decl
;
29767 TREE_READONLY (decl
) = is_const
;
29770 /* Make builtins to get CPU type and features supported. The created
29773 __builtin_cpu_init (), to detect cpu type and features,
29774 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
29775 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
29779 ix86_init_platform_type_builtins (void)
29781 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT
,
29782 INT_FTYPE_VOID
, false);
29783 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS
,
29784 INT_FTYPE_PCCHAR
, true);
29785 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS
,
29786 INT_FTYPE_PCCHAR
, true);
29789 /* Internal method for ix86_init_builtins. */
29792 ix86_init_builtins_va_builtins_abi (void)
29794 tree ms_va_ref
, sysv_va_ref
;
29795 tree fnvoid_va_end_ms
, fnvoid_va_end_sysv
;
29796 tree fnvoid_va_start_ms
, fnvoid_va_start_sysv
;
29797 tree fnvoid_va_copy_ms
, fnvoid_va_copy_sysv
;
29798 tree fnattr_ms
= NULL_TREE
, fnattr_sysv
= NULL_TREE
;
29802 fnattr_ms
= build_tree_list (get_identifier ("ms_abi"), NULL_TREE
);
29803 fnattr_sysv
= build_tree_list (get_identifier ("sysv_abi"), NULL_TREE
);
29804 ms_va_ref
= build_reference_type (ms_va_list_type_node
);
29806 build_pointer_type (TREE_TYPE (sysv_va_list_type_node
));
29809 build_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
29810 fnvoid_va_start_ms
=
29811 build_varargs_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
29812 fnvoid_va_end_sysv
=
29813 build_function_type_list (void_type_node
, sysv_va_ref
, NULL_TREE
);
29814 fnvoid_va_start_sysv
=
29815 build_varargs_function_type_list (void_type_node
, sysv_va_ref
,
29817 fnvoid_va_copy_ms
=
29818 build_function_type_list (void_type_node
, ms_va_ref
, ms_va_list_type_node
,
29820 fnvoid_va_copy_sysv
=
29821 build_function_type_list (void_type_node
, sysv_va_ref
,
29822 sysv_va_ref
, NULL_TREE
);
29824 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms
,
29825 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
29826 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms
,
29827 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
29828 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms
,
29829 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
29830 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv
,
29831 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
29832 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv
,
29833 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
29834 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv
,
29835 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
29839 ix86_init_builtin_types (void)
29841 tree float128_type_node
, float80_type_node
;
29843 /* The __float80 type. */
29844 float80_type_node
= long_double_type_node
;
29845 if (TYPE_MODE (float80_type_node
) != XFmode
)
29847 /* The __float80 type. */
29848 float80_type_node
= make_node (REAL_TYPE
);
29850 TYPE_PRECISION (float80_type_node
) = 80;
29851 layout_type (float80_type_node
);
29853 lang_hooks
.types
.register_builtin_type (float80_type_node
, "__float80");
29855 /* The __float128 type. */
29856 float128_type_node
= make_node (REAL_TYPE
);
29857 TYPE_PRECISION (float128_type_node
) = 128;
29858 layout_type (float128_type_node
);
29859 lang_hooks
.types
.register_builtin_type (float128_type_node
, "__float128");
29861 /* This macro is built by i386-builtin-types.awk. */
29862 DEFINE_BUILTIN_PRIMITIVE_TYPES
;
29866 ix86_init_builtins (void)
29870 ix86_init_builtin_types ();
29872 /* Builtins to get CPU type and features. */
29873 ix86_init_platform_type_builtins ();
29875 /* TFmode support builtins. */
29876 def_builtin_const (0, "__builtin_infq",
29877 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_INFQ
);
29878 def_builtin_const (0, "__builtin_huge_valq",
29879 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_HUGE_VALQ
);
29881 /* We will expand them to normal call if SSE isn't available since
29882 they are used by libgcc. */
29883 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128
);
29884 t
= add_builtin_function ("__builtin_fabsq", t
, IX86_BUILTIN_FABSQ
,
29885 BUILT_IN_MD
, "__fabstf2", NULL_TREE
);
29886 TREE_READONLY (t
) = 1;
29887 ix86_builtins
[(int) IX86_BUILTIN_FABSQ
] = t
;
29889 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128
);
29890 t
= add_builtin_function ("__builtin_copysignq", t
, IX86_BUILTIN_COPYSIGNQ
,
29891 BUILT_IN_MD
, "__copysigntf3", NULL_TREE
);
29892 TREE_READONLY (t
) = 1;
29893 ix86_builtins
[(int) IX86_BUILTIN_COPYSIGNQ
] = t
;
29895 ix86_init_tm_builtins ();
29896 ix86_init_mmx_sse_builtins ();
29899 ix86_init_builtins_va_builtins_abi ();
29901 #ifdef SUBTARGET_INIT_BUILTINS
29902 SUBTARGET_INIT_BUILTINS
;
29906 /* Return the ix86 builtin for CODE. */
29909 ix86_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
29911 if (code
>= IX86_BUILTIN_MAX
)
29912 return error_mark_node
;
29914 return ix86_builtins
[code
];
29917 /* Errors in the source file can cause expand_expr to return const0_rtx
29918 where we expect a vector. To avoid crashing, use one of the vector
29919 clear instructions. */
29921 safe_vector_operand (rtx x
, enum machine_mode mode
)
29923 if (x
== const0_rtx
)
29924 x
= CONST0_RTX (mode
);
29928 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
29931 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
29934 tree arg0
= CALL_EXPR_ARG (exp
, 0);
29935 tree arg1
= CALL_EXPR_ARG (exp
, 1);
29936 rtx op0
= expand_normal (arg0
);
29937 rtx op1
= expand_normal (arg1
);
29938 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
29939 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
29940 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
29942 if (VECTOR_MODE_P (mode0
))
29943 op0
= safe_vector_operand (op0
, mode0
);
29944 if (VECTOR_MODE_P (mode1
))
29945 op1
= safe_vector_operand (op1
, mode1
);
29947 if (optimize
|| !target
29948 || GET_MODE (target
) != tmode
29949 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
29950 target
= gen_reg_rtx (tmode
);
29952 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
29954 rtx x
= gen_reg_rtx (V4SImode
);
29955 emit_insn (gen_sse2_loadd (x
, op1
));
29956 op1
= gen_lowpart (TImode
, x
);
29959 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
29960 op0
= copy_to_mode_reg (mode0
, op0
);
29961 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode1
))
29962 op1
= copy_to_mode_reg (mode1
, op1
);
29964 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
29973 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
29976 ix86_expand_multi_arg_builtin (enum insn_code icode
, tree exp
, rtx target
,
29977 enum ix86_builtin_func_type m_type
,
29978 enum rtx_code sub_code
)
29983 bool comparison_p
= false;
29985 bool last_arg_constant
= false;
29986 int num_memory
= 0;
29989 enum machine_mode mode
;
29992 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
29996 case MULTI_ARG_4_DF2_DI_I
:
29997 case MULTI_ARG_4_DF2_DI_I1
:
29998 case MULTI_ARG_4_SF2_SI_I
:
29999 case MULTI_ARG_4_SF2_SI_I1
:
30001 last_arg_constant
= true;
30004 case MULTI_ARG_3_SF
:
30005 case MULTI_ARG_3_DF
:
30006 case MULTI_ARG_3_SF2
:
30007 case MULTI_ARG_3_DF2
:
30008 case MULTI_ARG_3_DI
:
30009 case MULTI_ARG_3_SI
:
30010 case MULTI_ARG_3_SI_DI
:
30011 case MULTI_ARG_3_HI
:
30012 case MULTI_ARG_3_HI_SI
:
30013 case MULTI_ARG_3_QI
:
30014 case MULTI_ARG_3_DI2
:
30015 case MULTI_ARG_3_SI2
:
30016 case MULTI_ARG_3_HI2
:
30017 case MULTI_ARG_3_QI2
:
30021 case MULTI_ARG_2_SF
:
30022 case MULTI_ARG_2_DF
:
30023 case MULTI_ARG_2_DI
:
30024 case MULTI_ARG_2_SI
:
30025 case MULTI_ARG_2_HI
:
30026 case MULTI_ARG_2_QI
:
30030 case MULTI_ARG_2_DI_IMM
:
30031 case MULTI_ARG_2_SI_IMM
:
30032 case MULTI_ARG_2_HI_IMM
:
30033 case MULTI_ARG_2_QI_IMM
:
30035 last_arg_constant
= true;
30038 case MULTI_ARG_1_SF
:
30039 case MULTI_ARG_1_DF
:
30040 case MULTI_ARG_1_SF2
:
30041 case MULTI_ARG_1_DF2
:
30042 case MULTI_ARG_1_DI
:
30043 case MULTI_ARG_1_SI
:
30044 case MULTI_ARG_1_HI
:
30045 case MULTI_ARG_1_QI
:
30046 case MULTI_ARG_1_SI_DI
:
30047 case MULTI_ARG_1_HI_DI
:
30048 case MULTI_ARG_1_HI_SI
:
30049 case MULTI_ARG_1_QI_DI
:
30050 case MULTI_ARG_1_QI_SI
:
30051 case MULTI_ARG_1_QI_HI
:
30055 case MULTI_ARG_2_DI_CMP
:
30056 case MULTI_ARG_2_SI_CMP
:
30057 case MULTI_ARG_2_HI_CMP
:
30058 case MULTI_ARG_2_QI_CMP
:
30060 comparison_p
= true;
30063 case MULTI_ARG_2_SF_TF
:
30064 case MULTI_ARG_2_DF_TF
:
30065 case MULTI_ARG_2_DI_TF
:
30066 case MULTI_ARG_2_SI_TF
:
30067 case MULTI_ARG_2_HI_TF
:
30068 case MULTI_ARG_2_QI_TF
:
30074 gcc_unreachable ();
30077 if (optimize
|| !target
30078 || GET_MODE (target
) != tmode
30079 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
30080 target
= gen_reg_rtx (tmode
);
30082 gcc_assert (nargs
<= 4);
30084 for (i
= 0; i
< nargs
; i
++)
30086 tree arg
= CALL_EXPR_ARG (exp
, i
);
30087 rtx op
= expand_normal (arg
);
30088 int adjust
= (comparison_p
) ? 1 : 0;
30089 enum machine_mode mode
= insn_data
[icode
].operand
[i
+adjust
+1].mode
;
30091 if (last_arg_constant
&& i
== nargs
- 1)
30093 if (!insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
))
30095 enum insn_code new_icode
= icode
;
30098 case CODE_FOR_xop_vpermil2v2df3
:
30099 case CODE_FOR_xop_vpermil2v4sf3
:
30100 case CODE_FOR_xop_vpermil2v4df3
:
30101 case CODE_FOR_xop_vpermil2v8sf3
:
30102 error ("the last argument must be a 2-bit immediate");
30103 return gen_reg_rtx (tmode
);
30104 case CODE_FOR_xop_rotlv2di3
:
30105 new_icode
= CODE_FOR_rotlv2di3
;
30107 case CODE_FOR_xop_rotlv4si3
:
30108 new_icode
= CODE_FOR_rotlv4si3
;
30110 case CODE_FOR_xop_rotlv8hi3
:
30111 new_icode
= CODE_FOR_rotlv8hi3
;
30113 case CODE_FOR_xop_rotlv16qi3
:
30114 new_icode
= CODE_FOR_rotlv16qi3
;
30116 if (CONST_INT_P (op
))
30118 int mask
= GET_MODE_BITSIZE (GET_MODE_INNER (tmode
)) - 1;
30119 op
= GEN_INT (INTVAL (op
) & mask
);
30120 gcc_checking_assert
30121 (insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
));
30125 gcc_checking_assert
30127 && insn_data
[new_icode
].operand
[0].mode
== tmode
30128 && insn_data
[new_icode
].operand
[1].mode
== tmode
30129 && insn_data
[new_icode
].operand
[2].mode
== mode
30130 && insn_data
[new_icode
].operand
[0].predicate
30131 == insn_data
[icode
].operand
[0].predicate
30132 && insn_data
[new_icode
].operand
[1].predicate
30133 == insn_data
[icode
].operand
[1].predicate
);
30139 gcc_unreachable ();
30146 if (VECTOR_MODE_P (mode
))
30147 op
= safe_vector_operand (op
, mode
);
30149 /* If we aren't optimizing, only allow one memory operand to be
30151 if (memory_operand (op
, mode
))
30154 gcc_assert (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
);
30157 || !insn_data
[icode
].operand
[i
+adjust
+1].predicate (op
, mode
)
30159 op
= force_reg (mode
, op
);
30163 args
[i
].mode
= mode
;
30169 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
30174 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
,
30175 GEN_INT ((int)sub_code
));
30176 else if (! comparison_p
)
30177 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
30180 rtx cmp_op
= gen_rtx_fmt_ee (sub_code
, GET_MODE (target
),
30184 pat
= GEN_FCN (icode
) (target
, cmp_op
, args
[0].op
, args
[1].op
);
30189 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
30193 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
, args
[3].op
);
30197 gcc_unreachable ();
30207 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
30208 insns with vec_merge. */
30211 ix86_expand_unop_vec_merge_builtin (enum insn_code icode
, tree exp
,
30215 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30216 rtx op1
, op0
= expand_normal (arg0
);
30217 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
30218 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
30220 if (optimize
|| !target
30221 || GET_MODE (target
) != tmode
30222 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
30223 target
= gen_reg_rtx (tmode
);
30225 if (VECTOR_MODE_P (mode0
))
30226 op0
= safe_vector_operand (op0
, mode0
);
30228 if ((optimize
&& !register_operand (op0
, mode0
))
30229 || !insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
30230 op0
= copy_to_mode_reg (mode0
, op0
);
30233 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode0
))
30234 op1
= copy_to_mode_reg (mode0
, op1
);
30236 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
30243 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
30246 ix86_expand_sse_compare (const struct builtin_description
*d
,
30247 tree exp
, rtx target
, bool swap
)
30250 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30251 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30252 rtx op0
= expand_normal (arg0
);
30253 rtx op1
= expand_normal (arg1
);
30255 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
30256 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
30257 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
30258 enum rtx_code comparison
= d
->comparison
;
30260 if (VECTOR_MODE_P (mode0
))
30261 op0
= safe_vector_operand (op0
, mode0
);
30262 if (VECTOR_MODE_P (mode1
))
30263 op1
= safe_vector_operand (op1
, mode1
);
30265 /* Swap operands if we have a comparison that isn't available in
30269 rtx tmp
= gen_reg_rtx (mode1
);
30270 emit_move_insn (tmp
, op1
);
30275 if (optimize
|| !target
30276 || GET_MODE (target
) != tmode
30277 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
30278 target
= gen_reg_rtx (tmode
);
30280 if ((optimize
&& !register_operand (op0
, mode0
))
30281 || !insn_data
[d
->icode
].operand
[1].predicate (op0
, mode0
))
30282 op0
= copy_to_mode_reg (mode0
, op0
);
30283 if ((optimize
&& !register_operand (op1
, mode1
))
30284 || !insn_data
[d
->icode
].operand
[2].predicate (op1
, mode1
))
30285 op1
= copy_to_mode_reg (mode1
, op1
);
30287 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
30288 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
30295 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
30298 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
30302 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30303 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30304 rtx op0
= expand_normal (arg0
);
30305 rtx op1
= expand_normal (arg1
);
30306 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
30307 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
30308 enum rtx_code comparison
= d
->comparison
;
30310 if (VECTOR_MODE_P (mode0
))
30311 op0
= safe_vector_operand (op0
, mode0
);
30312 if (VECTOR_MODE_P (mode1
))
30313 op1
= safe_vector_operand (op1
, mode1
);
30315 /* Swap operands if we have a comparison that isn't available in
30317 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
30324 target
= gen_reg_rtx (SImode
);
30325 emit_move_insn (target
, const0_rtx
);
30326 target
= gen_rtx_SUBREG (QImode
, target
, 0);
30328 if ((optimize
&& !register_operand (op0
, mode0
))
30329 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
30330 op0
= copy_to_mode_reg (mode0
, op0
);
30331 if ((optimize
&& !register_operand (op1
, mode1
))
30332 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
30333 op1
= copy_to_mode_reg (mode1
, op1
);
30335 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
30339 emit_insn (gen_rtx_SET (VOIDmode
,
30340 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
30341 gen_rtx_fmt_ee (comparison
, QImode
,
30345 return SUBREG_REG (target
);
30348 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
30351 ix86_expand_sse_round (const struct builtin_description
*d
, tree exp
,
30355 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30356 rtx op1
, op0
= expand_normal (arg0
);
30357 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
30358 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
30360 if (optimize
|| target
== 0
30361 || GET_MODE (target
) != tmode
30362 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
30363 target
= gen_reg_rtx (tmode
);
30365 if (VECTOR_MODE_P (mode0
))
30366 op0
= safe_vector_operand (op0
, mode0
);
30368 if ((optimize
&& !register_operand (op0
, mode0
))
30369 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
30370 op0
= copy_to_mode_reg (mode0
, op0
);
30372 op1
= GEN_INT (d
->comparison
);
30374 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
);
30382 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description
*d
,
30383 tree exp
, rtx target
)
30386 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30387 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30388 rtx op0
= expand_normal (arg0
);
30389 rtx op1
= expand_normal (arg1
);
30391 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
30392 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
30393 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
30395 if (optimize
|| target
== 0
30396 || GET_MODE (target
) != tmode
30397 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
30398 target
= gen_reg_rtx (tmode
);
30400 op0
= safe_vector_operand (op0
, mode0
);
30401 op1
= safe_vector_operand (op1
, mode1
);
30403 if ((optimize
&& !register_operand (op0
, mode0
))
30404 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
30405 op0
= copy_to_mode_reg (mode0
, op0
);
30406 if ((optimize
&& !register_operand (op1
, mode1
))
30407 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
30408 op1
= copy_to_mode_reg (mode1
, op1
);
30410 op2
= GEN_INT (d
->comparison
);
30412 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
30419 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
30422 ix86_expand_sse_ptest (const struct builtin_description
*d
, tree exp
,
30426 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30427 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30428 rtx op0
= expand_normal (arg0
);
30429 rtx op1
= expand_normal (arg1
);
30430 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
30431 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
30432 enum rtx_code comparison
= d
->comparison
;
30434 if (VECTOR_MODE_P (mode0
))
30435 op0
= safe_vector_operand (op0
, mode0
);
30436 if (VECTOR_MODE_P (mode1
))
30437 op1
= safe_vector_operand (op1
, mode1
);
30439 target
= gen_reg_rtx (SImode
);
30440 emit_move_insn (target
, const0_rtx
);
30441 target
= gen_rtx_SUBREG (QImode
, target
, 0);
30443 if ((optimize
&& !register_operand (op0
, mode0
))
30444 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
30445 op0
= copy_to_mode_reg (mode0
, op0
);
30446 if ((optimize
&& !register_operand (op1
, mode1
))
30447 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
30448 op1
= copy_to_mode_reg (mode1
, op1
);
30450 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
30454 emit_insn (gen_rtx_SET (VOIDmode
,
30455 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
30456 gen_rtx_fmt_ee (comparison
, QImode
,
30460 return SUBREG_REG (target
);
30463 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
30466 ix86_expand_sse_pcmpestr (const struct builtin_description
*d
,
30467 tree exp
, rtx target
)
30470 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30471 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30472 tree arg2
= CALL_EXPR_ARG (exp
, 2);
30473 tree arg3
= CALL_EXPR_ARG (exp
, 3);
30474 tree arg4
= CALL_EXPR_ARG (exp
, 4);
30475 rtx scratch0
, scratch1
;
30476 rtx op0
= expand_normal (arg0
);
30477 rtx op1
= expand_normal (arg1
);
30478 rtx op2
= expand_normal (arg2
);
30479 rtx op3
= expand_normal (arg3
);
30480 rtx op4
= expand_normal (arg4
);
30481 enum machine_mode tmode0
, tmode1
, modev2
, modei3
, modev4
, modei5
, modeimm
;
30483 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
30484 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
30485 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
30486 modei3
= insn_data
[d
->icode
].operand
[3].mode
;
30487 modev4
= insn_data
[d
->icode
].operand
[4].mode
;
30488 modei5
= insn_data
[d
->icode
].operand
[5].mode
;
30489 modeimm
= insn_data
[d
->icode
].operand
[6].mode
;
30491 if (VECTOR_MODE_P (modev2
))
30492 op0
= safe_vector_operand (op0
, modev2
);
30493 if (VECTOR_MODE_P (modev4
))
30494 op2
= safe_vector_operand (op2
, modev4
);
30496 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
30497 op0
= copy_to_mode_reg (modev2
, op0
);
30498 if (!insn_data
[d
->icode
].operand
[3].predicate (op1
, modei3
))
30499 op1
= copy_to_mode_reg (modei3
, op1
);
30500 if ((optimize
&& !register_operand (op2
, modev4
))
30501 || !insn_data
[d
->icode
].operand
[4].predicate (op2
, modev4
))
30502 op2
= copy_to_mode_reg (modev4
, op2
);
30503 if (!insn_data
[d
->icode
].operand
[5].predicate (op3
, modei5
))
30504 op3
= copy_to_mode_reg (modei5
, op3
);
30506 if (!insn_data
[d
->icode
].operand
[6].predicate (op4
, modeimm
))
30508 error ("the fifth argument must be an 8-bit immediate");
30512 if (d
->code
== IX86_BUILTIN_PCMPESTRI128
)
30514 if (optimize
|| !target
30515 || GET_MODE (target
) != tmode0
30516 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
30517 target
= gen_reg_rtx (tmode0
);
30519 scratch1
= gen_reg_rtx (tmode1
);
30521 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
, op3
, op4
);
30523 else if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
30525 if (optimize
|| !target
30526 || GET_MODE (target
) != tmode1
30527 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
30528 target
= gen_reg_rtx (tmode1
);
30530 scratch0
= gen_reg_rtx (tmode0
);
30532 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
, op3
, op4
);
30536 gcc_assert (d
->flag
);
30538 scratch0
= gen_reg_rtx (tmode0
);
30539 scratch1
= gen_reg_rtx (tmode1
);
30541 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
, op3
, op4
);
30551 target
= gen_reg_rtx (SImode
);
30552 emit_move_insn (target
, const0_rtx
);
30553 target
= gen_rtx_SUBREG (QImode
, target
, 0);
30556 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
30557 gen_rtx_fmt_ee (EQ
, QImode
,
30558 gen_rtx_REG ((enum machine_mode
) d
->flag
,
30561 return SUBREG_REG (target
);
30568 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
30571 ix86_expand_sse_pcmpistr (const struct builtin_description
*d
,
30572 tree exp
, rtx target
)
30575 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30576 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30577 tree arg2
= CALL_EXPR_ARG (exp
, 2);
30578 rtx scratch0
, scratch1
;
30579 rtx op0
= expand_normal (arg0
);
30580 rtx op1
= expand_normal (arg1
);
30581 rtx op2
= expand_normal (arg2
);
30582 enum machine_mode tmode0
, tmode1
, modev2
, modev3
, modeimm
;
30584 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
30585 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
30586 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
30587 modev3
= insn_data
[d
->icode
].operand
[3].mode
;
30588 modeimm
= insn_data
[d
->icode
].operand
[4].mode
;
30590 if (VECTOR_MODE_P (modev2
))
30591 op0
= safe_vector_operand (op0
, modev2
);
30592 if (VECTOR_MODE_P (modev3
))
30593 op1
= safe_vector_operand (op1
, modev3
);
30595 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
30596 op0
= copy_to_mode_reg (modev2
, op0
);
30597 if ((optimize
&& !register_operand (op1
, modev3
))
30598 || !insn_data
[d
->icode
].operand
[3].predicate (op1
, modev3
))
30599 op1
= copy_to_mode_reg (modev3
, op1
);
30601 if (!insn_data
[d
->icode
].operand
[4].predicate (op2
, modeimm
))
30603 error ("the third argument must be an 8-bit immediate");
30607 if (d
->code
== IX86_BUILTIN_PCMPISTRI128
)
30609 if (optimize
|| !target
30610 || GET_MODE (target
) != tmode0
30611 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
30612 target
= gen_reg_rtx (tmode0
);
30614 scratch1
= gen_reg_rtx (tmode1
);
30616 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
);
30618 else if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
30620 if (optimize
|| !target
30621 || GET_MODE (target
) != tmode1
30622 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
30623 target
= gen_reg_rtx (tmode1
);
30625 scratch0
= gen_reg_rtx (tmode0
);
30627 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
);
30631 gcc_assert (d
->flag
);
30633 scratch0
= gen_reg_rtx (tmode0
);
30634 scratch1
= gen_reg_rtx (tmode1
);
30636 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
);
30646 target
= gen_reg_rtx (SImode
);
30647 emit_move_insn (target
, const0_rtx
);
30648 target
= gen_rtx_SUBREG (QImode
, target
, 0);
30651 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
30652 gen_rtx_fmt_ee (EQ
, QImode
,
30653 gen_rtx_REG ((enum machine_mode
) d
->flag
,
30656 return SUBREG_REG (target
);
30662 /* Subroutine of ix86_expand_builtin to take care of insns with
30663 variable number of operands. */
30666 ix86_expand_args_builtin (const struct builtin_description
*d
,
30667 tree exp
, rtx target
)
30669 rtx pat
, real_target
;
30670 unsigned int i
, nargs
;
30671 unsigned int nargs_constant
= 0;
30672 int num_memory
= 0;
30676 enum machine_mode mode
;
30678 bool last_arg_count
= false;
30679 enum insn_code icode
= d
->icode
;
30680 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
30681 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
30682 enum machine_mode rmode
= VOIDmode
;
30684 enum rtx_code comparison
= d
->comparison
;
30686 switch ((enum ix86_builtin_func_type
) d
->flag
)
30688 case V2DF_FTYPE_V2DF_ROUND
:
30689 case V4DF_FTYPE_V4DF_ROUND
:
30690 case V4SF_FTYPE_V4SF_ROUND
:
30691 case V8SF_FTYPE_V8SF_ROUND
:
30692 case V4SI_FTYPE_V4SF_ROUND
:
30693 case V8SI_FTYPE_V8SF_ROUND
:
30694 return ix86_expand_sse_round (d
, exp
, target
);
30695 case V4SI_FTYPE_V2DF_V2DF_ROUND
:
30696 case V8SI_FTYPE_V4DF_V4DF_ROUND
:
30697 return ix86_expand_sse_round_vec_pack_sfix (d
, exp
, target
);
30698 case INT_FTYPE_V8SF_V8SF_PTEST
:
30699 case INT_FTYPE_V4DI_V4DI_PTEST
:
30700 case INT_FTYPE_V4DF_V4DF_PTEST
:
30701 case INT_FTYPE_V4SF_V4SF_PTEST
:
30702 case INT_FTYPE_V2DI_V2DI_PTEST
:
30703 case INT_FTYPE_V2DF_V2DF_PTEST
:
30704 return ix86_expand_sse_ptest (d
, exp
, target
);
30705 case FLOAT128_FTYPE_FLOAT128
:
30706 case FLOAT_FTYPE_FLOAT
:
30707 case INT_FTYPE_INT
:
30708 case UINT64_FTYPE_INT
:
30709 case UINT16_FTYPE_UINT16
:
30710 case INT64_FTYPE_INT64
:
30711 case INT64_FTYPE_V4SF
:
30712 case INT64_FTYPE_V2DF
:
30713 case INT_FTYPE_V16QI
:
30714 case INT_FTYPE_V8QI
:
30715 case INT_FTYPE_V8SF
:
30716 case INT_FTYPE_V4DF
:
30717 case INT_FTYPE_V4SF
:
30718 case INT_FTYPE_V2DF
:
30719 case INT_FTYPE_V32QI
:
30720 case V16QI_FTYPE_V16QI
:
30721 case V8SI_FTYPE_V8SF
:
30722 case V8SI_FTYPE_V4SI
:
30723 case V8HI_FTYPE_V8HI
:
30724 case V8HI_FTYPE_V16QI
:
30725 case V8QI_FTYPE_V8QI
:
30726 case V8SF_FTYPE_V8SF
:
30727 case V8SF_FTYPE_V8SI
:
30728 case V8SF_FTYPE_V4SF
:
30729 case V8SF_FTYPE_V8HI
:
30730 case V4SI_FTYPE_V4SI
:
30731 case V4SI_FTYPE_V16QI
:
30732 case V4SI_FTYPE_V4SF
:
30733 case V4SI_FTYPE_V8SI
:
30734 case V4SI_FTYPE_V8HI
:
30735 case V4SI_FTYPE_V4DF
:
30736 case V4SI_FTYPE_V2DF
:
30737 case V4HI_FTYPE_V4HI
:
30738 case V4DF_FTYPE_V4DF
:
30739 case V4DF_FTYPE_V4SI
:
30740 case V4DF_FTYPE_V4SF
:
30741 case V4DF_FTYPE_V2DF
:
30742 case V4SF_FTYPE_V4SF
:
30743 case V4SF_FTYPE_V4SI
:
30744 case V4SF_FTYPE_V8SF
:
30745 case V4SF_FTYPE_V4DF
:
30746 case V4SF_FTYPE_V8HI
:
30747 case V4SF_FTYPE_V2DF
:
30748 case V2DI_FTYPE_V2DI
:
30749 case V2DI_FTYPE_V16QI
:
30750 case V2DI_FTYPE_V8HI
:
30751 case V2DI_FTYPE_V4SI
:
30752 case V2DF_FTYPE_V2DF
:
30753 case V2DF_FTYPE_V4SI
:
30754 case V2DF_FTYPE_V4DF
:
30755 case V2DF_FTYPE_V4SF
:
30756 case V2DF_FTYPE_V2SI
:
30757 case V2SI_FTYPE_V2SI
:
30758 case V2SI_FTYPE_V4SF
:
30759 case V2SI_FTYPE_V2SF
:
30760 case V2SI_FTYPE_V2DF
:
30761 case V2SF_FTYPE_V2SF
:
30762 case V2SF_FTYPE_V2SI
:
30763 case V32QI_FTYPE_V32QI
:
30764 case V32QI_FTYPE_V16QI
:
30765 case V16HI_FTYPE_V16HI
:
30766 case V16HI_FTYPE_V8HI
:
30767 case V8SI_FTYPE_V8SI
:
30768 case V16HI_FTYPE_V16QI
:
30769 case V8SI_FTYPE_V16QI
:
30770 case V4DI_FTYPE_V16QI
:
30771 case V8SI_FTYPE_V8HI
:
30772 case V4DI_FTYPE_V8HI
:
30773 case V4DI_FTYPE_V4SI
:
30774 case V4DI_FTYPE_V2DI
:
30777 case V4SF_FTYPE_V4SF_VEC_MERGE
:
30778 case V2DF_FTYPE_V2DF_VEC_MERGE
:
30779 return ix86_expand_unop_vec_merge_builtin (icode
, exp
, target
);
30780 case FLOAT128_FTYPE_FLOAT128_FLOAT128
:
30781 case V16QI_FTYPE_V16QI_V16QI
:
30782 case V16QI_FTYPE_V8HI_V8HI
:
30783 case V8QI_FTYPE_V8QI_V8QI
:
30784 case V8QI_FTYPE_V4HI_V4HI
:
30785 case V8HI_FTYPE_V8HI_V8HI
:
30786 case V8HI_FTYPE_V16QI_V16QI
:
30787 case V8HI_FTYPE_V4SI_V4SI
:
30788 case V8SF_FTYPE_V8SF_V8SF
:
30789 case V8SF_FTYPE_V8SF_V8SI
:
30790 case V4SI_FTYPE_V4SI_V4SI
:
30791 case V4SI_FTYPE_V8HI_V8HI
:
30792 case V4SI_FTYPE_V4SF_V4SF
:
30793 case V4SI_FTYPE_V2DF_V2DF
:
30794 case V4HI_FTYPE_V4HI_V4HI
:
30795 case V4HI_FTYPE_V8QI_V8QI
:
30796 case V4HI_FTYPE_V2SI_V2SI
:
30797 case V4DF_FTYPE_V4DF_V4DF
:
30798 case V4DF_FTYPE_V4DF_V4DI
:
30799 case V4SF_FTYPE_V4SF_V4SF
:
30800 case V4SF_FTYPE_V4SF_V4SI
:
30801 case V4SF_FTYPE_V4SF_V2SI
:
30802 case V4SF_FTYPE_V4SF_V2DF
:
30803 case V4SF_FTYPE_V4SF_DI
:
30804 case V4SF_FTYPE_V4SF_SI
:
30805 case V2DI_FTYPE_V2DI_V2DI
:
30806 case V2DI_FTYPE_V16QI_V16QI
:
30807 case V2DI_FTYPE_V4SI_V4SI
:
30808 case V2UDI_FTYPE_V4USI_V4USI
:
30809 case V2DI_FTYPE_V2DI_V16QI
:
30810 case V2DI_FTYPE_V2DF_V2DF
:
30811 case V2SI_FTYPE_V2SI_V2SI
:
30812 case V2SI_FTYPE_V4HI_V4HI
:
30813 case V2SI_FTYPE_V2SF_V2SF
:
30814 case V2DF_FTYPE_V2DF_V2DF
:
30815 case V2DF_FTYPE_V2DF_V4SF
:
30816 case V2DF_FTYPE_V2DF_V2DI
:
30817 case V2DF_FTYPE_V2DF_DI
:
30818 case V2DF_FTYPE_V2DF_SI
:
30819 case V2SF_FTYPE_V2SF_V2SF
:
30820 case V1DI_FTYPE_V1DI_V1DI
:
30821 case V1DI_FTYPE_V8QI_V8QI
:
30822 case V1DI_FTYPE_V2SI_V2SI
:
30823 case V32QI_FTYPE_V16HI_V16HI
:
30824 case V16HI_FTYPE_V8SI_V8SI
:
30825 case V32QI_FTYPE_V32QI_V32QI
:
30826 case V16HI_FTYPE_V32QI_V32QI
:
30827 case V16HI_FTYPE_V16HI_V16HI
:
30828 case V8SI_FTYPE_V4DF_V4DF
:
30829 case V8SI_FTYPE_V8SI_V8SI
:
30830 case V8SI_FTYPE_V16HI_V16HI
:
30831 case V4DI_FTYPE_V4DI_V4DI
:
30832 case V4DI_FTYPE_V8SI_V8SI
:
30833 case V4UDI_FTYPE_V8USI_V8USI
:
30834 if (comparison
== UNKNOWN
)
30835 return ix86_expand_binop_builtin (icode
, exp
, target
);
30838 case V4SF_FTYPE_V4SF_V4SF_SWAP
:
30839 case V2DF_FTYPE_V2DF_V2DF_SWAP
:
30840 gcc_assert (comparison
!= UNKNOWN
);
30844 case V16HI_FTYPE_V16HI_V8HI_COUNT
:
30845 case V16HI_FTYPE_V16HI_SI_COUNT
:
30846 case V8SI_FTYPE_V8SI_V4SI_COUNT
:
30847 case V8SI_FTYPE_V8SI_SI_COUNT
:
30848 case V4DI_FTYPE_V4DI_V2DI_COUNT
:
30849 case V4DI_FTYPE_V4DI_INT_COUNT
:
30850 case V8HI_FTYPE_V8HI_V8HI_COUNT
:
30851 case V8HI_FTYPE_V8HI_SI_COUNT
:
30852 case V4SI_FTYPE_V4SI_V4SI_COUNT
:
30853 case V4SI_FTYPE_V4SI_SI_COUNT
:
30854 case V4HI_FTYPE_V4HI_V4HI_COUNT
:
30855 case V4HI_FTYPE_V4HI_SI_COUNT
:
30856 case V2DI_FTYPE_V2DI_V2DI_COUNT
:
30857 case V2DI_FTYPE_V2DI_SI_COUNT
:
30858 case V2SI_FTYPE_V2SI_V2SI_COUNT
:
30859 case V2SI_FTYPE_V2SI_SI_COUNT
:
30860 case V1DI_FTYPE_V1DI_V1DI_COUNT
:
30861 case V1DI_FTYPE_V1DI_SI_COUNT
:
30863 last_arg_count
= true;
30865 case UINT64_FTYPE_UINT64_UINT64
:
30866 case UINT_FTYPE_UINT_UINT
:
30867 case UINT_FTYPE_UINT_USHORT
:
30868 case UINT_FTYPE_UINT_UCHAR
:
30869 case UINT16_FTYPE_UINT16_INT
:
30870 case UINT8_FTYPE_UINT8_INT
:
30873 case V2DI_FTYPE_V2DI_INT_CONVERT
:
30876 nargs_constant
= 1;
30878 case V4DI_FTYPE_V4DI_INT_CONVERT
:
30881 nargs_constant
= 1;
30883 case V8HI_FTYPE_V8HI_INT
:
30884 case V8HI_FTYPE_V8SF_INT
:
30885 case V8HI_FTYPE_V4SF_INT
:
30886 case V8SF_FTYPE_V8SF_INT
:
30887 case V4SI_FTYPE_V4SI_INT
:
30888 case V4SI_FTYPE_V8SI_INT
:
30889 case V4HI_FTYPE_V4HI_INT
:
30890 case V4DF_FTYPE_V4DF_INT
:
30891 case V4SF_FTYPE_V4SF_INT
:
30892 case V4SF_FTYPE_V8SF_INT
:
30893 case V2DI_FTYPE_V2DI_INT
:
30894 case V2DF_FTYPE_V2DF_INT
:
30895 case V2DF_FTYPE_V4DF_INT
:
30896 case V16HI_FTYPE_V16HI_INT
:
30897 case V8SI_FTYPE_V8SI_INT
:
30898 case V4DI_FTYPE_V4DI_INT
:
30899 case V2DI_FTYPE_V4DI_INT
:
30901 nargs_constant
= 1;
30903 case V16QI_FTYPE_V16QI_V16QI_V16QI
:
30904 case V8SF_FTYPE_V8SF_V8SF_V8SF
:
30905 case V4DF_FTYPE_V4DF_V4DF_V4DF
:
30906 case V4SF_FTYPE_V4SF_V4SF_V4SF
:
30907 case V2DF_FTYPE_V2DF_V2DF_V2DF
:
30908 case V32QI_FTYPE_V32QI_V32QI_V32QI
:
30911 case V32QI_FTYPE_V32QI_V32QI_INT
:
30912 case V16HI_FTYPE_V16HI_V16HI_INT
:
30913 case V16QI_FTYPE_V16QI_V16QI_INT
:
30914 case V4DI_FTYPE_V4DI_V4DI_INT
:
30915 case V8HI_FTYPE_V8HI_V8HI_INT
:
30916 case V8SI_FTYPE_V8SI_V8SI_INT
:
30917 case V8SI_FTYPE_V8SI_V4SI_INT
:
30918 case V8SF_FTYPE_V8SF_V8SF_INT
:
30919 case V8SF_FTYPE_V8SF_V4SF_INT
:
30920 case V4SI_FTYPE_V4SI_V4SI_INT
:
30921 case V4DF_FTYPE_V4DF_V4DF_INT
:
30922 case V4DF_FTYPE_V4DF_V2DF_INT
:
30923 case V4SF_FTYPE_V4SF_V4SF_INT
:
30924 case V2DI_FTYPE_V2DI_V2DI_INT
:
30925 case V4DI_FTYPE_V4DI_V2DI_INT
:
30926 case V2DF_FTYPE_V2DF_V2DF_INT
:
30928 nargs_constant
= 1;
30930 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
:
30933 nargs_constant
= 1;
30935 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
:
30938 nargs_constant
= 1;
30940 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
:
30943 nargs_constant
= 1;
30945 case V2DI_FTYPE_V2DI_UINT_UINT
:
30947 nargs_constant
= 2;
30949 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT
:
30950 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT
:
30951 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT
:
30952 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT
:
30954 nargs_constant
= 1;
30956 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT
:
30958 nargs_constant
= 2;
30960 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED
:
30961 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG
:
30965 gcc_unreachable ();
30968 gcc_assert (nargs
<= ARRAY_SIZE (args
));
30970 if (comparison
!= UNKNOWN
)
30972 gcc_assert (nargs
== 2);
30973 return ix86_expand_sse_compare (d
, exp
, target
, swap
);
30976 if (rmode
== VOIDmode
|| rmode
== tmode
)
30980 || GET_MODE (target
) != tmode
30981 || !insn_p
->operand
[0].predicate (target
, tmode
))
30982 target
= gen_reg_rtx (tmode
);
30983 real_target
= target
;
30987 target
= gen_reg_rtx (rmode
);
30988 real_target
= simplify_gen_subreg (tmode
, target
, rmode
, 0);
30991 for (i
= 0; i
< nargs
; i
++)
30993 tree arg
= CALL_EXPR_ARG (exp
, i
);
30994 rtx op
= expand_normal (arg
);
30995 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
30996 bool match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
30998 if (last_arg_count
&& (i
+ 1) == nargs
)
31000 /* SIMD shift insns take either an 8-bit immediate or
31001 register as count. But builtin functions take int as
31002 count. If count doesn't match, we put it in register. */
31005 op
= simplify_gen_subreg (SImode
, op
, GET_MODE (op
), 0);
31006 if (!insn_p
->operand
[i
+ 1].predicate (op
, mode
))
31007 op
= copy_to_reg (op
);
31010 else if ((nargs
- i
) <= nargs_constant
)
31015 case CODE_FOR_avx2_inserti128
:
31016 case CODE_FOR_avx2_extracti128
:
31017 error ("the last argument must be an 1-bit immediate");
31020 case CODE_FOR_sse4_1_roundsd
:
31021 case CODE_FOR_sse4_1_roundss
:
31023 case CODE_FOR_sse4_1_roundpd
:
31024 case CODE_FOR_sse4_1_roundps
:
31025 case CODE_FOR_avx_roundpd256
:
31026 case CODE_FOR_avx_roundps256
:
31028 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix
:
31029 case CODE_FOR_sse4_1_roundps_sfix
:
31030 case CODE_FOR_avx_roundpd_vec_pack_sfix256
:
31031 case CODE_FOR_avx_roundps_sfix256
:
31033 case CODE_FOR_sse4_1_blendps
:
31034 case CODE_FOR_avx_blendpd256
:
31035 case CODE_FOR_avx_vpermilv4df
:
31036 error ("the last argument must be a 4-bit immediate");
31039 case CODE_FOR_sse4_1_blendpd
:
31040 case CODE_FOR_avx_vpermilv2df
:
31041 case CODE_FOR_xop_vpermil2v2df3
:
31042 case CODE_FOR_xop_vpermil2v4sf3
:
31043 case CODE_FOR_xop_vpermil2v4df3
:
31044 case CODE_FOR_xop_vpermil2v8sf3
:
31045 error ("the last argument must be a 2-bit immediate");
31048 case CODE_FOR_avx_vextractf128v4df
:
31049 case CODE_FOR_avx_vextractf128v8sf
:
31050 case CODE_FOR_avx_vextractf128v8si
:
31051 case CODE_FOR_avx_vinsertf128v4df
:
31052 case CODE_FOR_avx_vinsertf128v8sf
:
31053 case CODE_FOR_avx_vinsertf128v8si
:
31054 error ("the last argument must be a 1-bit immediate");
31057 case CODE_FOR_avx_vmcmpv2df3
:
31058 case CODE_FOR_avx_vmcmpv4sf3
:
31059 case CODE_FOR_avx_cmpv2df3
:
31060 case CODE_FOR_avx_cmpv4sf3
:
31061 case CODE_FOR_avx_cmpv4df3
:
31062 case CODE_FOR_avx_cmpv8sf3
:
31063 error ("the last argument must be a 5-bit immediate");
31067 switch (nargs_constant
)
31070 if ((nargs
- i
) == nargs_constant
)
31072 error ("the next to last argument must be an 8-bit immediate");
31076 error ("the last argument must be an 8-bit immediate");
31079 gcc_unreachable ();
31086 if (VECTOR_MODE_P (mode
))
31087 op
= safe_vector_operand (op
, mode
);
31089 /* If we aren't optimizing, only allow one memory operand to
31091 if (memory_operand (op
, mode
))
31094 if (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
)
31096 if (optimize
|| !match
|| num_memory
> 1)
31097 op
= copy_to_mode_reg (mode
, op
);
31101 op
= copy_to_reg (op
);
31102 op
= simplify_gen_subreg (mode
, op
, GET_MODE (op
), 0);
31107 args
[i
].mode
= mode
;
31113 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
);
31116 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
);
31119 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
31123 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
31124 args
[2].op
, args
[3].op
);
31127 gcc_unreachable ();
31137 /* Subroutine of ix86_expand_builtin to take care of special insns
31138 with variable number of operands. */
31141 ix86_expand_special_args_builtin (const struct builtin_description
*d
,
31142 tree exp
, rtx target
)
31146 unsigned int i
, nargs
, arg_adjust
, memory
;
31150 enum machine_mode mode
;
31152 enum insn_code icode
= d
->icode
;
31153 bool last_arg_constant
= false;
31154 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
31155 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
31156 enum { load
, store
} klass
;
31158 switch ((enum ix86_builtin_func_type
) d
->flag
)
31160 case VOID_FTYPE_VOID
:
31161 emit_insn (GEN_FCN (icode
) (target
));
31163 case VOID_FTYPE_UINT64
:
31164 case VOID_FTYPE_UNSIGNED
:
31170 case INT_FTYPE_VOID
:
31171 case UINT64_FTYPE_VOID
:
31172 case UNSIGNED_FTYPE_VOID
:
31177 case UINT64_FTYPE_PUNSIGNED
:
31178 case V2DI_FTYPE_PV2DI
:
31179 case V4DI_FTYPE_PV4DI
:
31180 case V32QI_FTYPE_PCCHAR
:
31181 case V16QI_FTYPE_PCCHAR
:
31182 case V8SF_FTYPE_PCV4SF
:
31183 case V8SF_FTYPE_PCFLOAT
:
31184 case V4SF_FTYPE_PCFLOAT
:
31185 case V4DF_FTYPE_PCV2DF
:
31186 case V4DF_FTYPE_PCDOUBLE
:
31187 case V2DF_FTYPE_PCDOUBLE
:
31188 case VOID_FTYPE_PVOID
:
31193 case VOID_FTYPE_PV2SF_V4SF
:
31194 case VOID_FTYPE_PV4DI_V4DI
:
31195 case VOID_FTYPE_PV2DI_V2DI
:
31196 case VOID_FTYPE_PCHAR_V32QI
:
31197 case VOID_FTYPE_PCHAR_V16QI
:
31198 case VOID_FTYPE_PFLOAT_V8SF
:
31199 case VOID_FTYPE_PFLOAT_V4SF
:
31200 case VOID_FTYPE_PDOUBLE_V4DF
:
31201 case VOID_FTYPE_PDOUBLE_V2DF
:
31202 case VOID_FTYPE_PLONGLONG_LONGLONG
:
31203 case VOID_FTYPE_PULONGLONG_ULONGLONG
:
31204 case VOID_FTYPE_PINT_INT
:
31207 /* Reserve memory operand for target. */
31208 memory
= ARRAY_SIZE (args
);
31210 case V4SF_FTYPE_V4SF_PCV2SF
:
31211 case V2DF_FTYPE_V2DF_PCDOUBLE
:
31216 case V8SF_FTYPE_PCV8SF_V8SI
:
31217 case V4DF_FTYPE_PCV4DF_V4DI
:
31218 case V4SF_FTYPE_PCV4SF_V4SI
:
31219 case V2DF_FTYPE_PCV2DF_V2DI
:
31220 case V8SI_FTYPE_PCV8SI_V8SI
:
31221 case V4DI_FTYPE_PCV4DI_V4DI
:
31222 case V4SI_FTYPE_PCV4SI_V4SI
:
31223 case V2DI_FTYPE_PCV2DI_V2DI
:
31228 case VOID_FTYPE_PV8SF_V8SI_V8SF
:
31229 case VOID_FTYPE_PV4DF_V4DI_V4DF
:
31230 case VOID_FTYPE_PV4SF_V4SI_V4SF
:
31231 case VOID_FTYPE_PV2DF_V2DI_V2DF
:
31232 case VOID_FTYPE_PV8SI_V8SI_V8SI
:
31233 case VOID_FTYPE_PV4DI_V4DI_V4DI
:
31234 case VOID_FTYPE_PV4SI_V4SI_V4SI
:
31235 case VOID_FTYPE_PV2DI_V2DI_V2DI
:
31238 /* Reserve memory operand for target. */
31239 memory
= ARRAY_SIZE (args
);
31241 case VOID_FTYPE_UINT_UINT_UINT
:
31242 case VOID_FTYPE_UINT64_UINT_UINT
:
31243 case UCHAR_FTYPE_UINT_UINT_UINT
:
31244 case UCHAR_FTYPE_UINT64_UINT_UINT
:
31247 memory
= ARRAY_SIZE (args
);
31248 last_arg_constant
= true;
31251 gcc_unreachable ();
31254 gcc_assert (nargs
<= ARRAY_SIZE (args
));
31256 if (klass
== store
)
31258 arg
= CALL_EXPR_ARG (exp
, 0);
31259 op
= expand_normal (arg
);
31260 gcc_assert (target
== 0);
31263 op
= force_reg (Pmode
, convert_to_mode (Pmode
, op
, 1));
31264 target
= gen_rtx_MEM (tmode
, op
);
31267 target
= force_reg (tmode
, op
);
31275 || !register_operand (target
, tmode
)
31276 || GET_MODE (target
) != tmode
)
31277 target
= gen_reg_rtx (tmode
);
31280 for (i
= 0; i
< nargs
; i
++)
31282 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
31285 arg
= CALL_EXPR_ARG (exp
, i
+ arg_adjust
);
31286 op
= expand_normal (arg
);
31287 match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
31289 if (last_arg_constant
&& (i
+ 1) == nargs
)
31293 if (icode
== CODE_FOR_lwp_lwpvalsi3
31294 || icode
== CODE_FOR_lwp_lwpinssi3
31295 || icode
== CODE_FOR_lwp_lwpvaldi3
31296 || icode
== CODE_FOR_lwp_lwpinsdi3
)
31297 error ("the last argument must be a 32-bit immediate");
31299 error ("the last argument must be an 8-bit immediate");
31307 /* This must be the memory operand. */
31308 op
= force_reg (Pmode
, convert_to_mode (Pmode
, op
, 1));
31309 op
= gen_rtx_MEM (mode
, op
);
31310 gcc_assert (GET_MODE (op
) == mode
31311 || GET_MODE (op
) == VOIDmode
);
31315 /* This must be register. */
31316 if (VECTOR_MODE_P (mode
))
31317 op
= safe_vector_operand (op
, mode
);
31319 gcc_assert (GET_MODE (op
) == mode
31320 || GET_MODE (op
) == VOIDmode
);
31321 op
= copy_to_mode_reg (mode
, op
);
31326 args
[i
].mode
= mode
;
31332 pat
= GEN_FCN (icode
) (target
);
31335 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
31338 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
31341 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
31344 gcc_unreachable ();
31350 return klass
== store
? 0 : target
;
31353 /* Return the integer constant in ARG. Constrain it to be in the range
31354 of the subparts of VEC_TYPE; issue an error if not. */
31357 get_element_number (tree vec_type
, tree arg
)
31359 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
31361 if (!host_integerp (arg
, 1)
31362 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
31364 error ("selector must be an integer constant in the range 0..%wi", max
);
31371 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
31372 ix86_expand_vector_init. We DO have language-level syntax for this, in
31373 the form of (type){ init-list }. Except that since we can't place emms
31374 instructions from inside the compiler, we can't allow the use of MMX
31375 registers unless the user explicitly asks for it. So we do *not* define
31376 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
31377 we have builtins invoked by mmintrin.h that gives us license to emit
31378 these sorts of instructions. */
31381 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
31383 enum machine_mode tmode
= TYPE_MODE (type
);
31384 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
31385 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
31386 rtvec v
= rtvec_alloc (n_elt
);
31388 gcc_assert (VECTOR_MODE_P (tmode
));
31389 gcc_assert (call_expr_nargs (exp
) == n_elt
);
31391 for (i
= 0; i
< n_elt
; ++i
)
31393 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
31394 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
31397 if (!target
|| !register_operand (target
, tmode
))
31398 target
= gen_reg_rtx (tmode
);
31400 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
31404 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
31405 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
31406 had a language-level syntax for referencing vector elements. */
31409 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
31411 enum machine_mode tmode
, mode0
;
31416 arg0
= CALL_EXPR_ARG (exp
, 0);
31417 arg1
= CALL_EXPR_ARG (exp
, 1);
31419 op0
= expand_normal (arg0
);
31420 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
31422 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
31423 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
31424 gcc_assert (VECTOR_MODE_P (mode0
));
31426 op0
= force_reg (mode0
, op0
);
31428 if (optimize
|| !target
|| !register_operand (target
, tmode
))
31429 target
= gen_reg_rtx (tmode
);
31431 ix86_expand_vector_extract (true, target
, op0
, elt
);
31436 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
31437 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
31438 a language-level syntax for referencing vector elements. */
31441 ix86_expand_vec_set_builtin (tree exp
)
31443 enum machine_mode tmode
, mode1
;
31444 tree arg0
, arg1
, arg2
;
31446 rtx op0
, op1
, target
;
31448 arg0
= CALL_EXPR_ARG (exp
, 0);
31449 arg1
= CALL_EXPR_ARG (exp
, 1);
31450 arg2
= CALL_EXPR_ARG (exp
, 2);
31452 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
31453 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
31454 gcc_assert (VECTOR_MODE_P (tmode
));
31456 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, EXPAND_NORMAL
);
31457 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, EXPAND_NORMAL
);
31458 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
31460 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
31461 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
31463 op0
= force_reg (tmode
, op0
);
31464 op1
= force_reg (mode1
, op1
);
31466 /* OP0 is the source of these builtin functions and shouldn't be
31467 modified. Create a copy, use it and return it as target. */
31468 target
= gen_reg_rtx (tmode
);
31469 emit_move_insn (target
, op0
);
31470 ix86_expand_vector_set (true, target
, op1
, elt
);
31475 /* Expand an expression EXP that calls a built-in function,
31476 with result going to TARGET if that's convenient
31477 (and in mode MODE if that's convenient).
31478 SUBTARGET may be used as the target for computing one of EXP's operands.
31479 IGNORE is nonzero if the value is to be ignored. */
31482 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
31483 enum machine_mode mode ATTRIBUTE_UNUSED
,
31484 int ignore ATTRIBUTE_UNUSED
)
31486 const struct builtin_description
*d
;
31488 enum insn_code icode
;
31489 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
31490 tree arg0
, arg1
, arg2
, arg3
, arg4
;
31491 rtx op0
, op1
, op2
, op3
, op4
, pat
, insn
;
31492 enum machine_mode mode0
, mode1
, mode2
, mode3
, mode4
;
31493 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
31495 /* For CPU builtins that can be folded, fold first and expand the fold. */
31498 case IX86_BUILTIN_CPU_INIT
:
31500 /* Make it call __cpu_indicator_init in libgcc. */
31501 tree call_expr
, fndecl
, type
;
31502 type
= build_function_type_list (integer_type_node
, NULL_TREE
);
31503 fndecl
= build_fn_decl ("__cpu_indicator_init", type
);
31504 call_expr
= build_call_expr (fndecl
, 0);
31505 return expand_expr (call_expr
, target
, mode
, EXPAND_NORMAL
);
31507 case IX86_BUILTIN_CPU_IS
:
31508 case IX86_BUILTIN_CPU_SUPPORTS
:
31510 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31511 tree fold_expr
= fold_builtin_cpu (fndecl
, &arg0
);
31512 gcc_assert (fold_expr
!= NULL_TREE
);
31513 return expand_expr (fold_expr
, target
, mode
, EXPAND_NORMAL
);
31517 /* Determine whether the builtin function is available under the current ISA.
31518 Originally the builtin was not created if it wasn't applicable to the
31519 current ISA based on the command line switches. With function specific
31520 options, we need to check in the context of the function making the call
31521 whether it is supported. */
31522 if (ix86_builtins_isa
[fcode
].isa
31523 && !(ix86_builtins_isa
[fcode
].isa
& ix86_isa_flags
))
31525 char *opts
= ix86_target_string (ix86_builtins_isa
[fcode
].isa
, 0, NULL
,
31526 NULL
, (enum fpmath_unit
) 0, false);
31529 error ("%qE needs unknown isa option", fndecl
);
31532 gcc_assert (opts
!= NULL
);
31533 error ("%qE needs isa option %s", fndecl
, opts
);
31541 case IX86_BUILTIN_MASKMOVQ
:
31542 case IX86_BUILTIN_MASKMOVDQU
:
31543 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
31544 ? CODE_FOR_mmx_maskmovq
31545 : CODE_FOR_sse2_maskmovdqu
);
31546 /* Note the arg order is different from the operand order. */
31547 arg1
= CALL_EXPR_ARG (exp
, 0);
31548 arg2
= CALL_EXPR_ARG (exp
, 1);
31549 arg0
= CALL_EXPR_ARG (exp
, 2);
31550 op0
= expand_normal (arg0
);
31551 op1
= expand_normal (arg1
);
31552 op2
= expand_normal (arg2
);
31553 mode0
= insn_data
[icode
].operand
[0].mode
;
31554 mode1
= insn_data
[icode
].operand
[1].mode
;
31555 mode2
= insn_data
[icode
].operand
[2].mode
;
31557 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
31558 op0
= gen_rtx_MEM (mode1
, op0
);
31560 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
31561 op0
= copy_to_mode_reg (mode0
, op0
);
31562 if (!insn_data
[icode
].operand
[1].predicate (op1
, mode1
))
31563 op1
= copy_to_mode_reg (mode1
, op1
);
31564 if (!insn_data
[icode
].operand
[2].predicate (op2
, mode2
))
31565 op2
= copy_to_mode_reg (mode2
, op2
);
31566 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
31572 case IX86_BUILTIN_LDMXCSR
:
31573 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
31574 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
31575 emit_move_insn (target
, op0
);
31576 emit_insn (gen_sse_ldmxcsr (target
));
31579 case IX86_BUILTIN_STMXCSR
:
31580 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
31581 emit_insn (gen_sse_stmxcsr (target
));
31582 return copy_to_mode_reg (SImode
, target
);
31584 case IX86_BUILTIN_CLFLUSH
:
31585 arg0
= CALL_EXPR_ARG (exp
, 0);
31586 op0
= expand_normal (arg0
);
31587 icode
= CODE_FOR_sse2_clflush
;
31588 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
31589 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
31591 emit_insn (gen_sse2_clflush (op0
));
31594 case IX86_BUILTIN_MONITOR
:
31595 arg0
= CALL_EXPR_ARG (exp
, 0);
31596 arg1
= CALL_EXPR_ARG (exp
, 1);
31597 arg2
= CALL_EXPR_ARG (exp
, 2);
31598 op0
= expand_normal (arg0
);
31599 op1
= expand_normal (arg1
);
31600 op2
= expand_normal (arg2
);
31602 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
31604 op1
= copy_to_mode_reg (SImode
, op1
);
31606 op2
= copy_to_mode_reg (SImode
, op2
);
31607 emit_insn (ix86_gen_monitor (op0
, op1
, op2
));
31610 case IX86_BUILTIN_MWAIT
:
31611 arg0
= CALL_EXPR_ARG (exp
, 0);
31612 arg1
= CALL_EXPR_ARG (exp
, 1);
31613 op0
= expand_normal (arg0
);
31614 op1
= expand_normal (arg1
);
31616 op0
= copy_to_mode_reg (SImode
, op0
);
31618 op1
= copy_to_mode_reg (SImode
, op1
);
31619 emit_insn (gen_sse3_mwait (op0
, op1
));
31622 case IX86_BUILTIN_VEC_INIT_V2SI
:
31623 case IX86_BUILTIN_VEC_INIT_V4HI
:
31624 case IX86_BUILTIN_VEC_INIT_V8QI
:
31625 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
31627 case IX86_BUILTIN_VEC_EXT_V2DF
:
31628 case IX86_BUILTIN_VEC_EXT_V2DI
:
31629 case IX86_BUILTIN_VEC_EXT_V4SF
:
31630 case IX86_BUILTIN_VEC_EXT_V4SI
:
31631 case IX86_BUILTIN_VEC_EXT_V8HI
:
31632 case IX86_BUILTIN_VEC_EXT_V2SI
:
31633 case IX86_BUILTIN_VEC_EXT_V4HI
:
31634 case IX86_BUILTIN_VEC_EXT_V16QI
:
31635 return ix86_expand_vec_ext_builtin (exp
, target
);
31637 case IX86_BUILTIN_VEC_SET_V2DI
:
31638 case IX86_BUILTIN_VEC_SET_V4SF
:
31639 case IX86_BUILTIN_VEC_SET_V4SI
:
31640 case IX86_BUILTIN_VEC_SET_V8HI
:
31641 case IX86_BUILTIN_VEC_SET_V4HI
:
31642 case IX86_BUILTIN_VEC_SET_V16QI
:
31643 return ix86_expand_vec_set_builtin (exp
);
31645 case IX86_BUILTIN_INFQ
:
31646 case IX86_BUILTIN_HUGE_VALQ
:
31648 REAL_VALUE_TYPE inf
;
31652 tmp
= CONST_DOUBLE_FROM_REAL_VALUE (inf
, mode
);
31654 tmp
= validize_mem (force_const_mem (mode
, tmp
));
31657 target
= gen_reg_rtx (mode
);
31659 emit_move_insn (target
, tmp
);
31663 case IX86_BUILTIN_RDPMC
:
31664 case IX86_BUILTIN_RDTSC
:
31665 case IX86_BUILTIN_RDTSCP
:
31667 op0
= gen_reg_rtx (DImode
);
31668 op1
= gen_reg_rtx (DImode
);
31670 if (fcode
== IX86_BUILTIN_RDPMC
)
31672 arg0
= CALL_EXPR_ARG (exp
, 0);
31673 op2
= expand_normal (arg0
);
31674 if (!register_operand (op2
, SImode
))
31675 op2
= copy_to_mode_reg (SImode
, op2
);
31677 insn
= (TARGET_64BIT
31678 ? gen_rdpmc_rex64 (op0
, op1
, op2
)
31679 : gen_rdpmc (op0
, op2
));
31682 else if (fcode
== IX86_BUILTIN_RDTSC
)
31684 insn
= (TARGET_64BIT
31685 ? gen_rdtsc_rex64 (op0
, op1
)
31686 : gen_rdtsc (op0
));
31691 op2
= gen_reg_rtx (SImode
);
31693 insn
= (TARGET_64BIT
31694 ? gen_rdtscp_rex64 (op0
, op1
, op2
)
31695 : gen_rdtscp (op0
, op2
));
31698 arg0
= CALL_EXPR_ARG (exp
, 0);
31699 op4
= expand_normal (arg0
);
31700 if (!address_operand (op4
, VOIDmode
))
31702 op4
= convert_memory_address (Pmode
, op4
);
31703 op4
= copy_addr_to_reg (op4
);
31705 emit_move_insn (gen_rtx_MEM (SImode
, op4
), op2
);
31709 target
= gen_reg_rtx (mode
);
31713 op1
= expand_simple_binop (DImode
, ASHIFT
, op1
, GEN_INT (32),
31714 op1
, 1, OPTAB_DIRECT
);
31715 op0
= expand_simple_binop (DImode
, IOR
, op0
, op1
,
31716 op0
, 1, OPTAB_DIRECT
);
31719 emit_move_insn (target
, op0
);
31722 case IX86_BUILTIN_FXSAVE
:
31723 case IX86_BUILTIN_FXRSTOR
:
31724 case IX86_BUILTIN_FXSAVE64
:
31725 case IX86_BUILTIN_FXRSTOR64
:
31728 case IX86_BUILTIN_FXSAVE
:
31729 icode
= CODE_FOR_fxsave
;
31731 case IX86_BUILTIN_FXRSTOR
:
31732 icode
= CODE_FOR_fxrstor
;
31734 case IX86_BUILTIN_FXSAVE64
:
31735 icode
= CODE_FOR_fxsave64
;
31737 case IX86_BUILTIN_FXRSTOR64
:
31738 icode
= CODE_FOR_fxrstor64
;
31741 gcc_unreachable ();
31744 arg0
= CALL_EXPR_ARG (exp
, 0);
31745 op0
= expand_normal (arg0
);
31747 if (!address_operand (op0
, VOIDmode
))
31749 op0
= convert_memory_address (Pmode
, op0
);
31750 op0
= copy_addr_to_reg (op0
);
31752 op0
= gen_rtx_MEM (BLKmode
, op0
);
31754 pat
= GEN_FCN (icode
) (op0
);
31759 case IX86_BUILTIN_XSAVE
:
31760 case IX86_BUILTIN_XRSTOR
:
31761 case IX86_BUILTIN_XSAVE64
:
31762 case IX86_BUILTIN_XRSTOR64
:
31763 case IX86_BUILTIN_XSAVEOPT
:
31764 case IX86_BUILTIN_XSAVEOPT64
:
31765 arg0
= CALL_EXPR_ARG (exp
, 0);
31766 arg1
= CALL_EXPR_ARG (exp
, 1);
31767 op0
= expand_normal (arg0
);
31768 op1
= expand_normal (arg1
);
31770 if (!address_operand (op0
, VOIDmode
))
31772 op0
= convert_memory_address (Pmode
, op0
);
31773 op0
= copy_addr_to_reg (op0
);
31775 op0
= gen_rtx_MEM (BLKmode
, op0
);
31777 op1
= force_reg (DImode
, op1
);
31781 op2
= expand_simple_binop (DImode
, LSHIFTRT
, op1
, GEN_INT (32),
31782 NULL
, 1, OPTAB_DIRECT
);
31785 case IX86_BUILTIN_XSAVE
:
31786 icode
= CODE_FOR_xsave_rex64
;
31788 case IX86_BUILTIN_XRSTOR
:
31789 icode
= CODE_FOR_xrstor_rex64
;
31791 case IX86_BUILTIN_XSAVE64
:
31792 icode
= CODE_FOR_xsave64
;
31794 case IX86_BUILTIN_XRSTOR64
:
31795 icode
= CODE_FOR_xrstor64
;
31797 case IX86_BUILTIN_XSAVEOPT
:
31798 icode
= CODE_FOR_xsaveopt_rex64
;
31800 case IX86_BUILTIN_XSAVEOPT64
:
31801 icode
= CODE_FOR_xsaveopt64
;
31804 gcc_unreachable ();
31807 op2
= gen_lowpart (SImode
, op2
);
31808 op1
= gen_lowpart (SImode
, op1
);
31809 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
31815 case IX86_BUILTIN_XSAVE
:
31816 icode
= CODE_FOR_xsave
;
31818 case IX86_BUILTIN_XRSTOR
:
31819 icode
= CODE_FOR_xrstor
;
31821 case IX86_BUILTIN_XSAVEOPT
:
31822 icode
= CODE_FOR_xsaveopt
;
31825 gcc_unreachable ();
31827 pat
= GEN_FCN (icode
) (op0
, op1
);
31834 case IX86_BUILTIN_LLWPCB
:
31835 arg0
= CALL_EXPR_ARG (exp
, 0);
31836 op0
= expand_normal (arg0
);
31837 icode
= CODE_FOR_lwp_llwpcb
;
31838 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
31839 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
31840 emit_insn (gen_lwp_llwpcb (op0
));
31843 case IX86_BUILTIN_SLWPCB
:
31844 icode
= CODE_FOR_lwp_slwpcb
;
31846 || !insn_data
[icode
].operand
[0].predicate (target
, Pmode
))
31847 target
= gen_reg_rtx (Pmode
);
31848 emit_insn (gen_lwp_slwpcb (target
));
31851 case IX86_BUILTIN_BEXTRI32
:
31852 case IX86_BUILTIN_BEXTRI64
:
31853 arg0
= CALL_EXPR_ARG (exp
, 0);
31854 arg1
= CALL_EXPR_ARG (exp
, 1);
31855 op0
= expand_normal (arg0
);
31856 op1
= expand_normal (arg1
);
31857 icode
= (fcode
== IX86_BUILTIN_BEXTRI32
31858 ? CODE_FOR_tbm_bextri_si
31859 : CODE_FOR_tbm_bextri_di
);
31860 if (!CONST_INT_P (op1
))
31862 error ("last argument must be an immediate");
31867 unsigned char length
= (INTVAL (op1
) >> 8) & 0xFF;
31868 unsigned char lsb_index
= INTVAL (op1
) & 0xFF;
31869 op1
= GEN_INT (length
);
31870 op2
= GEN_INT (lsb_index
);
31871 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
31877 case IX86_BUILTIN_RDRAND16_STEP
:
31878 icode
= CODE_FOR_rdrandhi_1
;
31882 case IX86_BUILTIN_RDRAND32_STEP
:
31883 icode
= CODE_FOR_rdrandsi_1
;
31887 case IX86_BUILTIN_RDRAND64_STEP
:
31888 icode
= CODE_FOR_rdranddi_1
;
31892 op0
= gen_reg_rtx (mode0
);
31893 emit_insn (GEN_FCN (icode
) (op0
));
31895 arg0
= CALL_EXPR_ARG (exp
, 0);
31896 op1
= expand_normal (arg0
);
31897 if (!address_operand (op1
, VOIDmode
))
31899 op1
= convert_memory_address (Pmode
, op1
);
31900 op1
= copy_addr_to_reg (op1
);
31902 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
31904 op1
= gen_reg_rtx (SImode
);
31905 emit_move_insn (op1
, CONST1_RTX (SImode
));
31907 /* Emit SImode conditional move. */
31908 if (mode0
== HImode
)
31910 op2
= gen_reg_rtx (SImode
);
31911 emit_insn (gen_zero_extendhisi2 (op2
, op0
));
31913 else if (mode0
== SImode
)
31916 op2
= gen_rtx_SUBREG (SImode
, op0
, 0);
31919 target
= gen_reg_rtx (SImode
);
31921 pat
= gen_rtx_GEU (VOIDmode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
31923 emit_insn (gen_rtx_SET (VOIDmode
, target
,
31924 gen_rtx_IF_THEN_ELSE (SImode
, pat
, op2
, op1
)));
31927 case IX86_BUILTIN_RDSEED16_STEP
:
31928 icode
= CODE_FOR_rdseedhi_1
;
31932 case IX86_BUILTIN_RDSEED32_STEP
:
31933 icode
= CODE_FOR_rdseedsi_1
;
31937 case IX86_BUILTIN_RDSEED64_STEP
:
31938 icode
= CODE_FOR_rdseeddi_1
;
31942 op0
= gen_reg_rtx (mode0
);
31943 emit_insn (GEN_FCN (icode
) (op0
));
31945 arg0
= CALL_EXPR_ARG (exp
, 0);
31946 op1
= expand_normal (arg0
);
31947 if (!address_operand (op1
, VOIDmode
))
31949 op1
= convert_memory_address (Pmode
, op1
);
31950 op1
= copy_addr_to_reg (op1
);
31952 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
31954 op2
= gen_reg_rtx (QImode
);
31956 pat
= gen_rtx_LTU (QImode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
31958 emit_insn (gen_rtx_SET (VOIDmode
, op2
, pat
));
31961 target
= gen_reg_rtx (SImode
);
31963 emit_insn (gen_zero_extendqisi2 (target
, op2
));
31966 case IX86_BUILTIN_ADDCARRYX32
:
31967 icode
= TARGET_ADX
? CODE_FOR_adcxsi3
: CODE_FOR_addsi3_carry
;
31971 case IX86_BUILTIN_ADDCARRYX64
:
31972 icode
= TARGET_ADX
? CODE_FOR_adcxdi3
: CODE_FOR_adddi3_carry
;
31976 arg0
= CALL_EXPR_ARG (exp
, 0); /* unsigned char c_in. */
31977 arg1
= CALL_EXPR_ARG (exp
, 1); /* unsigned int src1. */
31978 arg2
= CALL_EXPR_ARG (exp
, 2); /* unsigned int src2. */
31979 arg3
= CALL_EXPR_ARG (exp
, 3); /* unsigned int *sum_out. */
31981 op0
= gen_reg_rtx (QImode
);
31983 /* Generate CF from input operand. */
31984 op1
= expand_normal (arg0
);
31985 op1
= copy_to_mode_reg (QImode
, convert_to_mode (QImode
, op1
, 1));
31986 emit_insn (gen_addqi3_cc (op0
, op1
, constm1_rtx
));
31988 /* Gen ADCX instruction to compute X+Y+CF. */
31989 op2
= expand_normal (arg1
);
31990 op3
= expand_normal (arg2
);
31993 op2
= copy_to_mode_reg (mode0
, op2
);
31995 op3
= copy_to_mode_reg (mode0
, op3
);
31997 op0
= gen_reg_rtx (mode0
);
31999 op4
= gen_rtx_REG (CCCmode
, FLAGS_REG
);
32000 pat
= gen_rtx_LTU (VOIDmode
, op4
, const0_rtx
);
32001 emit_insn (GEN_FCN (icode
) (op0
, op2
, op3
, op4
, pat
));
32003 /* Store the result. */
32004 op4
= expand_normal (arg3
);
32005 if (!address_operand (op4
, VOIDmode
))
32007 op4
= convert_memory_address (Pmode
, op4
);
32008 op4
= copy_addr_to_reg (op4
);
32010 emit_move_insn (gen_rtx_MEM (mode0
, op4
), op0
);
32012 /* Return current CF value. */
32014 target
= gen_reg_rtx (QImode
);
32016 PUT_MODE (pat
, QImode
);
32017 emit_insn (gen_rtx_SET (VOIDmode
, target
, pat
));
32020 case IX86_BUILTIN_GATHERSIV2DF
:
32021 icode
= CODE_FOR_avx2_gathersiv2df
;
32023 case IX86_BUILTIN_GATHERSIV4DF
:
32024 icode
= CODE_FOR_avx2_gathersiv4df
;
32026 case IX86_BUILTIN_GATHERDIV2DF
:
32027 icode
= CODE_FOR_avx2_gatherdiv2df
;
32029 case IX86_BUILTIN_GATHERDIV4DF
:
32030 icode
= CODE_FOR_avx2_gatherdiv4df
;
32032 case IX86_BUILTIN_GATHERSIV4SF
:
32033 icode
= CODE_FOR_avx2_gathersiv4sf
;
32035 case IX86_BUILTIN_GATHERSIV8SF
:
32036 icode
= CODE_FOR_avx2_gathersiv8sf
;
32038 case IX86_BUILTIN_GATHERDIV4SF
:
32039 icode
= CODE_FOR_avx2_gatherdiv4sf
;
32041 case IX86_BUILTIN_GATHERDIV8SF
:
32042 icode
= CODE_FOR_avx2_gatherdiv8sf
;
32044 case IX86_BUILTIN_GATHERSIV2DI
:
32045 icode
= CODE_FOR_avx2_gathersiv2di
;
32047 case IX86_BUILTIN_GATHERSIV4DI
:
32048 icode
= CODE_FOR_avx2_gathersiv4di
;
32050 case IX86_BUILTIN_GATHERDIV2DI
:
32051 icode
= CODE_FOR_avx2_gatherdiv2di
;
32053 case IX86_BUILTIN_GATHERDIV4DI
:
32054 icode
= CODE_FOR_avx2_gatherdiv4di
;
32056 case IX86_BUILTIN_GATHERSIV4SI
:
32057 icode
= CODE_FOR_avx2_gathersiv4si
;
32059 case IX86_BUILTIN_GATHERSIV8SI
:
32060 icode
= CODE_FOR_avx2_gathersiv8si
;
32062 case IX86_BUILTIN_GATHERDIV4SI
:
32063 icode
= CODE_FOR_avx2_gatherdiv4si
;
32065 case IX86_BUILTIN_GATHERDIV8SI
:
32066 icode
= CODE_FOR_avx2_gatherdiv8si
;
32068 case IX86_BUILTIN_GATHERALTSIV4DF
:
32069 icode
= CODE_FOR_avx2_gathersiv4df
;
32071 case IX86_BUILTIN_GATHERALTDIV8SF
:
32072 icode
= CODE_FOR_avx2_gatherdiv8sf
;
32074 case IX86_BUILTIN_GATHERALTSIV4DI
:
32075 icode
= CODE_FOR_avx2_gathersiv4di
;
32077 case IX86_BUILTIN_GATHERALTDIV8SI
:
32078 icode
= CODE_FOR_avx2_gatherdiv8si
;
32082 arg0
= CALL_EXPR_ARG (exp
, 0);
32083 arg1
= CALL_EXPR_ARG (exp
, 1);
32084 arg2
= CALL_EXPR_ARG (exp
, 2);
32085 arg3
= CALL_EXPR_ARG (exp
, 3);
32086 arg4
= CALL_EXPR_ARG (exp
, 4);
32087 op0
= expand_normal (arg0
);
32088 op1
= expand_normal (arg1
);
32089 op2
= expand_normal (arg2
);
32090 op3
= expand_normal (arg3
);
32091 op4
= expand_normal (arg4
);
32092 /* Note the arg order is different from the operand order. */
32093 mode0
= insn_data
[icode
].operand
[1].mode
;
32094 mode2
= insn_data
[icode
].operand
[3].mode
;
32095 mode3
= insn_data
[icode
].operand
[4].mode
;
32096 mode4
= insn_data
[icode
].operand
[5].mode
;
32098 if (target
== NULL_RTX
32099 || GET_MODE (target
) != insn_data
[icode
].operand
[0].mode
)
32100 subtarget
= gen_reg_rtx (insn_data
[icode
].operand
[0].mode
);
32102 subtarget
= target
;
32104 if (fcode
== IX86_BUILTIN_GATHERALTSIV4DF
32105 || fcode
== IX86_BUILTIN_GATHERALTSIV4DI
)
32107 rtx half
= gen_reg_rtx (V4SImode
);
32108 if (!nonimmediate_operand (op2
, V8SImode
))
32109 op2
= copy_to_mode_reg (V8SImode
, op2
);
32110 emit_insn (gen_vec_extract_lo_v8si (half
, op2
));
32113 else if (fcode
== IX86_BUILTIN_GATHERALTDIV8SF
32114 || fcode
== IX86_BUILTIN_GATHERALTDIV8SI
)
32116 rtx (*gen
) (rtx
, rtx
);
32117 rtx half
= gen_reg_rtx (mode0
);
32118 if (mode0
== V4SFmode
)
32119 gen
= gen_vec_extract_lo_v8sf
;
32121 gen
= gen_vec_extract_lo_v8si
;
32122 if (!nonimmediate_operand (op0
, GET_MODE (op0
)))
32123 op0
= copy_to_mode_reg (GET_MODE (op0
), op0
);
32124 emit_insn (gen (half
, op0
));
32126 if (!nonimmediate_operand (op3
, GET_MODE (op3
)))
32127 op3
= copy_to_mode_reg (GET_MODE (op3
), op3
);
32128 emit_insn (gen (half
, op3
));
32132 /* Force memory operand only with base register here. But we
32133 don't want to do it on memory operand for other builtin
32135 op1
= force_reg (Pmode
, convert_to_mode (Pmode
, op1
, 1));
32137 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
32138 op0
= copy_to_mode_reg (mode0
, op0
);
32139 if (!insn_data
[icode
].operand
[2].predicate (op1
, Pmode
))
32140 op1
= copy_to_mode_reg (Pmode
, op1
);
32141 if (!insn_data
[icode
].operand
[3].predicate (op2
, mode2
))
32142 op2
= copy_to_mode_reg (mode2
, op2
);
32143 if (!insn_data
[icode
].operand
[4].predicate (op3
, mode3
))
32144 op3
= copy_to_mode_reg (mode3
, op3
);
32145 if (!insn_data
[icode
].operand
[5].predicate (op4
, mode4
))
32147 error ("last argument must be scale 1, 2, 4, 8");
32151 /* Optimize. If mask is known to have all high bits set,
32152 replace op0 with pc_rtx to signal that the instruction
32153 overwrites the whole destination and doesn't use its
32154 previous contents. */
32157 if (TREE_CODE (arg3
) == VECTOR_CST
)
32159 unsigned int negative
= 0;
32160 for (i
= 0; i
< VECTOR_CST_NELTS (arg3
); ++i
)
32162 tree cst
= VECTOR_CST_ELT (arg3
, i
);
32163 if (TREE_CODE (cst
) == INTEGER_CST
32164 && tree_int_cst_sign_bit (cst
))
32166 else if (TREE_CODE (cst
) == REAL_CST
32167 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst
)))
32170 if (negative
== TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3
)))
32173 else if (TREE_CODE (arg3
) == SSA_NAME
)
32175 /* Recognize also when mask is like:
32176 __v2df src = _mm_setzero_pd ();
32177 __v2df mask = _mm_cmpeq_pd (src, src);
32179 __v8sf src = _mm256_setzero_ps ();
32180 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
32181 as that is a cheaper way to load all ones into
32182 a register than having to load a constant from
32184 gimple def_stmt
= SSA_NAME_DEF_STMT (arg3
);
32185 if (is_gimple_call (def_stmt
))
32187 tree fndecl
= gimple_call_fndecl (def_stmt
);
32189 && DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
32190 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl
))
32192 case IX86_BUILTIN_CMPPD
:
32193 case IX86_BUILTIN_CMPPS
:
32194 case IX86_BUILTIN_CMPPD256
:
32195 case IX86_BUILTIN_CMPPS256
:
32196 if (!integer_zerop (gimple_call_arg (def_stmt
, 2)))
32199 case IX86_BUILTIN_CMPEQPD
:
32200 case IX86_BUILTIN_CMPEQPS
:
32201 if (initializer_zerop (gimple_call_arg (def_stmt
, 0))
32202 && initializer_zerop (gimple_call_arg (def_stmt
,
32213 pat
= GEN_FCN (icode
) (subtarget
, op0
, op1
, op2
, op3
, op4
);
32218 if (fcode
== IX86_BUILTIN_GATHERDIV8SF
32219 || fcode
== IX86_BUILTIN_GATHERDIV8SI
)
32221 enum machine_mode tmode
= GET_MODE (subtarget
) == V8SFmode
32222 ? V4SFmode
: V4SImode
;
32223 if (target
== NULL_RTX
)
32224 target
= gen_reg_rtx (tmode
);
32225 if (tmode
== V4SFmode
)
32226 emit_insn (gen_vec_extract_lo_v8sf (target
, subtarget
));
32228 emit_insn (gen_vec_extract_lo_v8si (target
, subtarget
));
32231 target
= subtarget
;
32235 case IX86_BUILTIN_XABORT
:
32236 icode
= CODE_FOR_xabort
;
32237 arg0
= CALL_EXPR_ARG (exp
, 0);
32238 op0
= expand_normal (arg0
);
32239 mode0
= insn_data
[icode
].operand
[0].mode
;
32240 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
32242 error ("the xabort's argument must be an 8-bit immediate");
32245 emit_insn (gen_xabort (op0
));
32252 for (i
= 0, d
= bdesc_special_args
;
32253 i
< ARRAY_SIZE (bdesc_special_args
);
32255 if (d
->code
== fcode
)
32256 return ix86_expand_special_args_builtin (d
, exp
, target
);
32258 for (i
= 0, d
= bdesc_args
;
32259 i
< ARRAY_SIZE (bdesc_args
);
32261 if (d
->code
== fcode
)
32264 case IX86_BUILTIN_FABSQ
:
32265 case IX86_BUILTIN_COPYSIGNQ
:
32267 /* Emit a normal call if SSE isn't available. */
32268 return expand_call (exp
, target
, ignore
);
32270 return ix86_expand_args_builtin (d
, exp
, target
);
32273 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
32274 if (d
->code
== fcode
)
32275 return ix86_expand_sse_comi (d
, exp
, target
);
32277 for (i
= 0, d
= bdesc_pcmpestr
;
32278 i
< ARRAY_SIZE (bdesc_pcmpestr
);
32280 if (d
->code
== fcode
)
32281 return ix86_expand_sse_pcmpestr (d
, exp
, target
);
32283 for (i
= 0, d
= bdesc_pcmpistr
;
32284 i
< ARRAY_SIZE (bdesc_pcmpistr
);
32286 if (d
->code
== fcode
)
32287 return ix86_expand_sse_pcmpistr (d
, exp
, target
);
32289 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
32290 if (d
->code
== fcode
)
32291 return ix86_expand_multi_arg_builtin (d
->icode
, exp
, target
,
32292 (enum ix86_builtin_func_type
)
32293 d
->flag
, d
->comparison
);
32295 gcc_unreachable ();
32298 /* Returns a function decl for a vectorized version of the builtin function
32299 with builtin function code FN and the result vector type TYPE, or NULL_TREE
32300 if it is not available. */
32303 ix86_builtin_vectorized_function (tree fndecl
, tree type_out
,
32306 enum machine_mode in_mode
, out_mode
;
32308 enum built_in_function fn
= DECL_FUNCTION_CODE (fndecl
);
32310 if (TREE_CODE (type_out
) != VECTOR_TYPE
32311 || TREE_CODE (type_in
) != VECTOR_TYPE
32312 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
32315 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
32316 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
32317 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
32318 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
32322 case BUILT_IN_SQRT
:
32323 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32325 if (out_n
== 2 && in_n
== 2)
32326 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
32327 else if (out_n
== 4 && in_n
== 4)
32328 return ix86_builtins
[IX86_BUILTIN_SQRTPD256
];
32332 case BUILT_IN_SQRTF
:
32333 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32335 if (out_n
== 4 && in_n
== 4)
32336 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR
];
32337 else if (out_n
== 8 && in_n
== 8)
32338 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR256
];
32342 case BUILT_IN_IFLOOR
:
32343 case BUILT_IN_LFLOOR
:
32344 case BUILT_IN_LLFLOOR
:
32345 /* The round insn does not trap on denormals. */
32346 if (flag_trapping_math
|| !TARGET_ROUND
)
32349 if (out_mode
== SImode
&& in_mode
== DFmode
)
32351 if (out_n
== 4 && in_n
== 2)
32352 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
];
32353 else if (out_n
== 8 && in_n
== 4)
32354 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
];
32358 case BUILT_IN_IFLOORF
:
32359 case BUILT_IN_LFLOORF
:
32360 case BUILT_IN_LLFLOORF
:
32361 /* The round insn does not trap on denormals. */
32362 if (flag_trapping_math
|| !TARGET_ROUND
)
32365 if (out_mode
== SImode
&& in_mode
== SFmode
)
32367 if (out_n
== 4 && in_n
== 4)
32368 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX
];
32369 else if (out_n
== 8 && in_n
== 8)
32370 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX256
];
32374 case BUILT_IN_ICEIL
:
32375 case BUILT_IN_LCEIL
:
32376 case BUILT_IN_LLCEIL
:
32377 /* The round insn does not trap on denormals. */
32378 if (flag_trapping_math
|| !TARGET_ROUND
)
32381 if (out_mode
== SImode
&& in_mode
== DFmode
)
32383 if (out_n
== 4 && in_n
== 2)
32384 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
];
32385 else if (out_n
== 8 && in_n
== 4)
32386 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
];
32390 case BUILT_IN_ICEILF
:
32391 case BUILT_IN_LCEILF
:
32392 case BUILT_IN_LLCEILF
:
32393 /* The round insn does not trap on denormals. */
32394 if (flag_trapping_math
|| !TARGET_ROUND
)
32397 if (out_mode
== SImode
&& in_mode
== SFmode
)
32399 if (out_n
== 4 && in_n
== 4)
32400 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX
];
32401 else if (out_n
== 8 && in_n
== 8)
32402 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX256
];
32406 case BUILT_IN_IRINT
:
32407 case BUILT_IN_LRINT
:
32408 case BUILT_IN_LLRINT
:
32409 if (out_mode
== SImode
&& in_mode
== DFmode
)
32411 if (out_n
== 4 && in_n
== 2)
32412 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX
];
32413 else if (out_n
== 8 && in_n
== 4)
32414 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX256
];
32418 case BUILT_IN_IRINTF
:
32419 case BUILT_IN_LRINTF
:
32420 case BUILT_IN_LLRINTF
:
32421 if (out_mode
== SImode
&& in_mode
== SFmode
)
32423 if (out_n
== 4 && in_n
== 4)
32424 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
32425 else if (out_n
== 8 && in_n
== 8)
32426 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ256
];
32430 case BUILT_IN_IROUND
:
32431 case BUILT_IN_LROUND
:
32432 case BUILT_IN_LLROUND
:
32433 /* The round insn does not trap on denormals. */
32434 if (flag_trapping_math
|| !TARGET_ROUND
)
32437 if (out_mode
== SImode
&& in_mode
== DFmode
)
32439 if (out_n
== 4 && in_n
== 2)
32440 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
];
32441 else if (out_n
== 8 && in_n
== 4)
32442 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
];
32446 case BUILT_IN_IROUNDF
:
32447 case BUILT_IN_LROUNDF
:
32448 case BUILT_IN_LLROUNDF
:
32449 /* The round insn does not trap on denormals. */
32450 if (flag_trapping_math
|| !TARGET_ROUND
)
32453 if (out_mode
== SImode
&& in_mode
== SFmode
)
32455 if (out_n
== 4 && in_n
== 4)
32456 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX
];
32457 else if (out_n
== 8 && in_n
== 8)
32458 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX256
];
32462 case BUILT_IN_COPYSIGN
:
32463 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32465 if (out_n
== 2 && in_n
== 2)
32466 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD
];
32467 else if (out_n
== 4 && in_n
== 4)
32468 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD256
];
32472 case BUILT_IN_COPYSIGNF
:
32473 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32475 if (out_n
== 4 && in_n
== 4)
32476 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS
];
32477 else if (out_n
== 8 && in_n
== 8)
32478 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS256
];
32482 case BUILT_IN_FLOOR
:
32483 /* The round insn does not trap on denormals. */
32484 if (flag_trapping_math
|| !TARGET_ROUND
)
32487 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32489 if (out_n
== 2 && in_n
== 2)
32490 return ix86_builtins
[IX86_BUILTIN_FLOORPD
];
32491 else if (out_n
== 4 && in_n
== 4)
32492 return ix86_builtins
[IX86_BUILTIN_FLOORPD256
];
32496 case BUILT_IN_FLOORF
:
32497 /* The round insn does not trap on denormals. */
32498 if (flag_trapping_math
|| !TARGET_ROUND
)
32501 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32503 if (out_n
== 4 && in_n
== 4)
32504 return ix86_builtins
[IX86_BUILTIN_FLOORPS
];
32505 else if (out_n
== 8 && in_n
== 8)
32506 return ix86_builtins
[IX86_BUILTIN_FLOORPS256
];
32510 case BUILT_IN_CEIL
:
32511 /* The round insn does not trap on denormals. */
32512 if (flag_trapping_math
|| !TARGET_ROUND
)
32515 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32517 if (out_n
== 2 && in_n
== 2)
32518 return ix86_builtins
[IX86_BUILTIN_CEILPD
];
32519 else if (out_n
== 4 && in_n
== 4)
32520 return ix86_builtins
[IX86_BUILTIN_CEILPD256
];
32524 case BUILT_IN_CEILF
:
32525 /* The round insn does not trap on denormals. */
32526 if (flag_trapping_math
|| !TARGET_ROUND
)
32529 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32531 if (out_n
== 4 && in_n
== 4)
32532 return ix86_builtins
[IX86_BUILTIN_CEILPS
];
32533 else if (out_n
== 8 && in_n
== 8)
32534 return ix86_builtins
[IX86_BUILTIN_CEILPS256
];
32538 case BUILT_IN_TRUNC
:
32539 /* The round insn does not trap on denormals. */
32540 if (flag_trapping_math
|| !TARGET_ROUND
)
32543 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32545 if (out_n
== 2 && in_n
== 2)
32546 return ix86_builtins
[IX86_BUILTIN_TRUNCPD
];
32547 else if (out_n
== 4 && in_n
== 4)
32548 return ix86_builtins
[IX86_BUILTIN_TRUNCPD256
];
32552 case BUILT_IN_TRUNCF
:
32553 /* The round insn does not trap on denormals. */
32554 if (flag_trapping_math
|| !TARGET_ROUND
)
32557 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32559 if (out_n
== 4 && in_n
== 4)
32560 return ix86_builtins
[IX86_BUILTIN_TRUNCPS
];
32561 else if (out_n
== 8 && in_n
== 8)
32562 return ix86_builtins
[IX86_BUILTIN_TRUNCPS256
];
32566 case BUILT_IN_RINT
:
32567 /* The round insn does not trap on denormals. */
32568 if (flag_trapping_math
|| !TARGET_ROUND
)
32571 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32573 if (out_n
== 2 && in_n
== 2)
32574 return ix86_builtins
[IX86_BUILTIN_RINTPD
];
32575 else if (out_n
== 4 && in_n
== 4)
32576 return ix86_builtins
[IX86_BUILTIN_RINTPD256
];
32580 case BUILT_IN_RINTF
:
32581 /* The round insn does not trap on denormals. */
32582 if (flag_trapping_math
|| !TARGET_ROUND
)
32585 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32587 if (out_n
== 4 && in_n
== 4)
32588 return ix86_builtins
[IX86_BUILTIN_RINTPS
];
32589 else if (out_n
== 8 && in_n
== 8)
32590 return ix86_builtins
[IX86_BUILTIN_RINTPS256
];
32594 case BUILT_IN_ROUND
:
32595 /* The round insn does not trap on denormals. */
32596 if (flag_trapping_math
|| !TARGET_ROUND
)
32599 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32601 if (out_n
== 2 && in_n
== 2)
32602 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ
];
32603 else if (out_n
== 4 && in_n
== 4)
32604 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ256
];
32608 case BUILT_IN_ROUNDF
:
32609 /* The round insn does not trap on denormals. */
32610 if (flag_trapping_math
|| !TARGET_ROUND
)
32613 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32615 if (out_n
== 4 && in_n
== 4)
32616 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ
];
32617 else if (out_n
== 8 && in_n
== 8)
32618 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ256
];
32623 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32625 if (out_n
== 2 && in_n
== 2)
32626 return ix86_builtins
[IX86_BUILTIN_VFMADDPD
];
32627 if (out_n
== 4 && in_n
== 4)
32628 return ix86_builtins
[IX86_BUILTIN_VFMADDPD256
];
32632 case BUILT_IN_FMAF
:
32633 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32635 if (out_n
== 4 && in_n
== 4)
32636 return ix86_builtins
[IX86_BUILTIN_VFMADDPS
];
32637 if (out_n
== 8 && in_n
== 8)
32638 return ix86_builtins
[IX86_BUILTIN_VFMADDPS256
];
32646 /* Dispatch to a handler for a vectorization library. */
32647 if (ix86_veclib_handler
)
32648 return ix86_veclib_handler ((enum built_in_function
) fn
, type_out
,
32654 /* Handler for an SVML-style interface to
32655 a library with vectorized intrinsics. */
32658 ix86_veclibabi_svml (enum built_in_function fn
, tree type_out
, tree type_in
)
32661 tree fntype
, new_fndecl
, args
;
32664 enum machine_mode el_mode
, in_mode
;
32667 /* The SVML is suitable for unsafe math only. */
32668 if (!flag_unsafe_math_optimizations
)
32671 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
32672 n
= TYPE_VECTOR_SUBPARTS (type_out
);
32673 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
32674 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
32675 if (el_mode
!= in_mode
32683 case BUILT_IN_LOG10
:
32685 case BUILT_IN_TANH
:
32687 case BUILT_IN_ATAN
:
32688 case BUILT_IN_ATAN2
:
32689 case BUILT_IN_ATANH
:
32690 case BUILT_IN_CBRT
:
32691 case BUILT_IN_SINH
:
32693 case BUILT_IN_ASINH
:
32694 case BUILT_IN_ASIN
:
32695 case BUILT_IN_COSH
:
32697 case BUILT_IN_ACOSH
:
32698 case BUILT_IN_ACOS
:
32699 if (el_mode
!= DFmode
|| n
!= 2)
32703 case BUILT_IN_EXPF
:
32704 case BUILT_IN_LOGF
:
32705 case BUILT_IN_LOG10F
:
32706 case BUILT_IN_POWF
:
32707 case BUILT_IN_TANHF
:
32708 case BUILT_IN_TANF
:
32709 case BUILT_IN_ATANF
:
32710 case BUILT_IN_ATAN2F
:
32711 case BUILT_IN_ATANHF
:
32712 case BUILT_IN_CBRTF
:
32713 case BUILT_IN_SINHF
:
32714 case BUILT_IN_SINF
:
32715 case BUILT_IN_ASINHF
:
32716 case BUILT_IN_ASINF
:
32717 case BUILT_IN_COSHF
:
32718 case BUILT_IN_COSF
:
32719 case BUILT_IN_ACOSHF
:
32720 case BUILT_IN_ACOSF
:
32721 if (el_mode
!= SFmode
|| n
!= 4)
32729 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
32731 if (fn
== BUILT_IN_LOGF
)
32732 strcpy (name
, "vmlsLn4");
32733 else if (fn
== BUILT_IN_LOG
)
32734 strcpy (name
, "vmldLn2");
32737 sprintf (name
, "vmls%s", bname
+10);
32738 name
[strlen (name
)-1] = '4';
32741 sprintf (name
, "vmld%s2", bname
+10);
32743 /* Convert to uppercase. */
32747 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
32749 args
= TREE_CHAIN (args
))
32753 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
32755 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
32757 /* Build a function declaration for the vectorized function. */
32758 new_fndecl
= build_decl (BUILTINS_LOCATION
,
32759 FUNCTION_DECL
, get_identifier (name
), fntype
);
32760 TREE_PUBLIC (new_fndecl
) = 1;
32761 DECL_EXTERNAL (new_fndecl
) = 1;
32762 DECL_IS_NOVOPS (new_fndecl
) = 1;
32763 TREE_READONLY (new_fndecl
) = 1;
32768 /* Handler for an ACML-style interface to
32769 a library with vectorized intrinsics. */
32772 ix86_veclibabi_acml (enum built_in_function fn
, tree type_out
, tree type_in
)
32774 char name
[20] = "__vr.._";
32775 tree fntype
, new_fndecl
, args
;
32778 enum machine_mode el_mode
, in_mode
;
32781 /* The ACML is 64bits only and suitable for unsafe math only as
32782 it does not correctly support parts of IEEE with the required
32783 precision such as denormals. */
32785 || !flag_unsafe_math_optimizations
)
32788 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
32789 n
= TYPE_VECTOR_SUBPARTS (type_out
);
32790 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
32791 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
32792 if (el_mode
!= in_mode
32802 case BUILT_IN_LOG2
:
32803 case BUILT_IN_LOG10
:
32806 if (el_mode
!= DFmode
32811 case BUILT_IN_SINF
:
32812 case BUILT_IN_COSF
:
32813 case BUILT_IN_EXPF
:
32814 case BUILT_IN_POWF
:
32815 case BUILT_IN_LOGF
:
32816 case BUILT_IN_LOG2F
:
32817 case BUILT_IN_LOG10F
:
32820 if (el_mode
!= SFmode
32829 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
32830 sprintf (name
+ 7, "%s", bname
+10);
32833 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
32835 args
= TREE_CHAIN (args
))
32839 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
32841 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
32843 /* Build a function declaration for the vectorized function. */
32844 new_fndecl
= build_decl (BUILTINS_LOCATION
,
32845 FUNCTION_DECL
, get_identifier (name
), fntype
);
32846 TREE_PUBLIC (new_fndecl
) = 1;
32847 DECL_EXTERNAL (new_fndecl
) = 1;
32848 DECL_IS_NOVOPS (new_fndecl
) = 1;
32849 TREE_READONLY (new_fndecl
) = 1;
32854 /* Returns a decl of a function that implements gather load with
32855 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
32856 Return NULL_TREE if it is not available. */
32859 ix86_vectorize_builtin_gather (const_tree mem_vectype
,
32860 const_tree index_type
, int scale
)
32863 enum ix86_builtins code
;
32868 if ((TREE_CODE (index_type
) != INTEGER_TYPE
32869 && !POINTER_TYPE_P (index_type
))
32870 || (TYPE_MODE (index_type
) != SImode
32871 && TYPE_MODE (index_type
) != DImode
))
32874 if (TYPE_PRECISION (index_type
) > POINTER_SIZE
)
32877 /* v*gather* insn sign extends index to pointer mode. */
32878 if (TYPE_PRECISION (index_type
) < POINTER_SIZE
32879 && TYPE_UNSIGNED (index_type
))
32884 || (scale
& (scale
- 1)) != 0)
32887 si
= TYPE_MODE (index_type
) == SImode
;
32888 switch (TYPE_MODE (mem_vectype
))
32891 code
= si
? IX86_BUILTIN_GATHERSIV2DF
: IX86_BUILTIN_GATHERDIV2DF
;
32894 code
= si
? IX86_BUILTIN_GATHERALTSIV4DF
: IX86_BUILTIN_GATHERDIV4DF
;
32897 code
= si
? IX86_BUILTIN_GATHERSIV2DI
: IX86_BUILTIN_GATHERDIV2DI
;
32900 code
= si
? IX86_BUILTIN_GATHERALTSIV4DI
: IX86_BUILTIN_GATHERDIV4DI
;
32903 code
= si
? IX86_BUILTIN_GATHERSIV4SF
: IX86_BUILTIN_GATHERDIV4SF
;
32906 code
= si
? IX86_BUILTIN_GATHERSIV8SF
: IX86_BUILTIN_GATHERALTDIV8SF
;
32909 code
= si
? IX86_BUILTIN_GATHERSIV4SI
: IX86_BUILTIN_GATHERDIV4SI
;
32912 code
= si
? IX86_BUILTIN_GATHERSIV8SI
: IX86_BUILTIN_GATHERALTDIV8SI
;
32918 return ix86_builtins
[code
];
32921 /* Returns a code for a target-specific builtin that implements
32922 reciprocal of the function, or NULL_TREE if not available. */
32925 ix86_builtin_reciprocal (unsigned int fn
, bool md_fn
,
32926 bool sqrt ATTRIBUTE_UNUSED
)
32928 if (! (TARGET_SSE_MATH
&& !optimize_insn_for_size_p ()
32929 && flag_finite_math_only
&& !flag_trapping_math
32930 && flag_unsafe_math_optimizations
))
32934 /* Machine dependent builtins. */
32937 /* Vectorized version of sqrt to rsqrt conversion. */
32938 case IX86_BUILTIN_SQRTPS_NR
:
32939 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR
];
32941 case IX86_BUILTIN_SQRTPS_NR256
:
32942 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR256
];
32948 /* Normal builtins. */
32951 /* Sqrt to rsqrt conversion. */
32952 case BUILT_IN_SQRTF
:
32953 return ix86_builtins
[IX86_BUILTIN_RSQRTF
];
32960 /* Helper for avx_vpermilps256_operand et al. This is also used by
32961 the expansion functions to turn the parallel back into a mask.
32962 The return value is 0 for no match and the imm8+1 for a match. */
32965 avx_vpermilp_parallel (rtx par
, enum machine_mode mode
)
32967 unsigned i
, nelt
= GET_MODE_NUNITS (mode
);
32969 unsigned char ipar
[8] = {}; /* Silence -Wuninitialized warning. */
32971 if (XVECLEN (par
, 0) != (int) nelt
)
32974 /* Validate that all of the elements are constants, and not totally
32975 out of range. Copy the data into an integral array to make the
32976 subsequent checks easier. */
32977 for (i
= 0; i
< nelt
; ++i
)
32979 rtx er
= XVECEXP (par
, 0, i
);
32980 unsigned HOST_WIDE_INT ei
;
32982 if (!CONST_INT_P (er
))
32993 /* In the 256-bit DFmode case, we can only move elements within
32995 for (i
= 0; i
< 2; ++i
)
32999 mask
|= ipar
[i
] << i
;
33001 for (i
= 2; i
< 4; ++i
)
33005 mask
|= (ipar
[i
] - 2) << i
;
33010 /* In the 256-bit SFmode case, we have full freedom of movement
33011 within the low 128-bit lane, but the high 128-bit lane must
33012 mirror the exact same pattern. */
33013 for (i
= 0; i
< 4; ++i
)
33014 if (ipar
[i
] + 4 != ipar
[i
+ 4])
33021 /* In the 128-bit case, we've full freedom in the placement of
33022 the elements from the source operand. */
33023 for (i
= 0; i
< nelt
; ++i
)
33024 mask
|= ipar
[i
] << (i
* (nelt
/ 2));
33028 gcc_unreachable ();
33031 /* Make sure success has a non-zero value by adding one. */
33035 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
33036 the expansion functions to turn the parallel back into a mask.
33037 The return value is 0 for no match and the imm8+1 for a match. */
33040 avx_vperm2f128_parallel (rtx par
, enum machine_mode mode
)
33042 unsigned i
, nelt
= GET_MODE_NUNITS (mode
), nelt2
= nelt
/ 2;
33044 unsigned char ipar
[8] = {}; /* Silence -Wuninitialized warning. */
33046 if (XVECLEN (par
, 0) != (int) nelt
)
33049 /* Validate that all of the elements are constants, and not totally
33050 out of range. Copy the data into an integral array to make the
33051 subsequent checks easier. */
33052 for (i
= 0; i
< nelt
; ++i
)
33054 rtx er
= XVECEXP (par
, 0, i
);
33055 unsigned HOST_WIDE_INT ei
;
33057 if (!CONST_INT_P (er
))
33060 if (ei
>= 2 * nelt
)
33065 /* Validate that the halves of the permute are halves. */
33066 for (i
= 0; i
< nelt2
- 1; ++i
)
33067 if (ipar
[i
] + 1 != ipar
[i
+ 1])
33069 for (i
= nelt2
; i
< nelt
- 1; ++i
)
33070 if (ipar
[i
] + 1 != ipar
[i
+ 1])
33073 /* Reconstruct the mask. */
33074 for (i
= 0; i
< 2; ++i
)
33076 unsigned e
= ipar
[i
* nelt2
];
33080 mask
|= e
<< (i
* 4);
33083 /* Make sure success has a non-zero value by adding one. */
33087 /* Store OPERAND to the memory after reload is completed. This means
33088 that we can't easily use assign_stack_local. */
33090 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
33094 gcc_assert (reload_completed
);
33095 if (ix86_using_red_zone ())
33097 result
= gen_rtx_MEM (mode
,
33098 gen_rtx_PLUS (Pmode
,
33100 GEN_INT (-RED_ZONE_SIZE
)));
33101 emit_move_insn (result
, operand
);
33103 else if (TARGET_64BIT
)
33109 operand
= gen_lowpart (DImode
, operand
);
33113 gen_rtx_SET (VOIDmode
,
33114 gen_rtx_MEM (DImode
,
33115 gen_rtx_PRE_DEC (DImode
,
33116 stack_pointer_rtx
)),
33120 gcc_unreachable ();
33122 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
33131 split_double_mode (mode
, &operand
, 1, operands
, operands
+ 1);
33133 gen_rtx_SET (VOIDmode
,
33134 gen_rtx_MEM (SImode
,
33135 gen_rtx_PRE_DEC (Pmode
,
33136 stack_pointer_rtx
)),
33139 gen_rtx_SET (VOIDmode
,
33140 gen_rtx_MEM (SImode
,
33141 gen_rtx_PRE_DEC (Pmode
,
33142 stack_pointer_rtx
)),
33147 /* Store HImodes as SImodes. */
33148 operand
= gen_lowpart (SImode
, operand
);
33152 gen_rtx_SET (VOIDmode
,
33153 gen_rtx_MEM (GET_MODE (operand
),
33154 gen_rtx_PRE_DEC (SImode
,
33155 stack_pointer_rtx
)),
33159 gcc_unreachable ();
33161 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
33166 /* Free operand from the memory. */
33168 ix86_free_from_memory (enum machine_mode mode
)
33170 if (!ix86_using_red_zone ())
33174 if (mode
== DImode
|| TARGET_64BIT
)
33178 /* Use LEA to deallocate stack space. In peephole2 it will be converted
33179 to pop or add instruction if registers are available. */
33180 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
33181 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
33186 /* Return a register priority for hard reg REGNO. */
33188 ix86_register_priority (int hard_regno
)
33190 /* ebp and r13 as the base always wants a displacement, r12 as the
33191 base always wants an index. So discourage their usage in an
33193 if (hard_regno
== R12_REG
|| hard_regno
== R13_REG
)
33195 if (hard_regno
== BP_REG
)
33197 /* New x86-64 int registers result in bigger code size. Discourage
33199 if (FIRST_REX_INT_REG
<= hard_regno
&& hard_regno
<= LAST_REX_INT_REG
)
33201 /* New x86-64 SSE registers result in bigger code size. Discourage
33203 if (FIRST_REX_SSE_REG
<= hard_regno
&& hard_regno
<= LAST_REX_SSE_REG
)
33205 /* Usage of AX register results in smaller code. Prefer it. */
33206 if (hard_regno
== 0)
33211 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
33213 Put float CONST_DOUBLE in the constant pool instead of fp regs.
33214 QImode must go into class Q_REGS.
33215 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
33216 movdf to do mem-to-mem moves through integer regs. */
33219 ix86_preferred_reload_class (rtx x
, reg_class_t regclass
)
33221 enum machine_mode mode
= GET_MODE (x
);
33223 /* We're only allowed to return a subclass of CLASS. Many of the
33224 following checks fail for NO_REGS, so eliminate that early. */
33225 if (regclass
== NO_REGS
)
33228 /* All classes can load zeros. */
33229 if (x
== CONST0_RTX (mode
))
33232 /* Force constants into memory if we are loading a (nonzero) constant into
33233 an MMX or SSE register. This is because there are no MMX/SSE instructions
33234 to load from a constant. */
33236 && (MAYBE_MMX_CLASS_P (regclass
) || MAYBE_SSE_CLASS_P (regclass
)))
33239 /* Prefer SSE regs only, if we can use them for math. */
33240 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
33241 return SSE_CLASS_P (regclass
) ? regclass
: NO_REGS
;
33243 /* Floating-point constants need more complex checks. */
33244 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
33246 /* General regs can load everything. */
33247 if (reg_class_subset_p (regclass
, GENERAL_REGS
))
33250 /* Floats can load 0 and 1 plus some others. Note that we eliminated
33251 zero above. We only want to wind up preferring 80387 registers if
33252 we plan on doing computation with them. */
33254 && standard_80387_constant_p (x
) > 0)
33256 /* Limit class to non-sse. */
33257 if (regclass
== FLOAT_SSE_REGS
)
33259 if (regclass
== FP_TOP_SSE_REGS
)
33261 if (regclass
== FP_SECOND_SSE_REGS
)
33262 return FP_SECOND_REG
;
33263 if (regclass
== FLOAT_INT_REGS
|| regclass
== FLOAT_REGS
)
33270 /* Generally when we see PLUS here, it's the function invariant
33271 (plus soft-fp const_int). Which can only be computed into general
33273 if (GET_CODE (x
) == PLUS
)
33274 return reg_class_subset_p (regclass
, GENERAL_REGS
) ? regclass
: NO_REGS
;
33276 /* QImode constants are easy to load, but non-constant QImode data
33277 must go into Q_REGS. */
33278 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
33280 if (reg_class_subset_p (regclass
, Q_REGS
))
33282 if (reg_class_subset_p (Q_REGS
, regclass
))
33290 /* Discourage putting floating-point values in SSE registers unless
33291 SSE math is being used, and likewise for the 387 registers. */
33293 ix86_preferred_output_reload_class (rtx x
, reg_class_t regclass
)
33295 enum machine_mode mode
= GET_MODE (x
);
33297 /* Restrict the output reload class to the register bank that we are doing
33298 math on. If we would like not to return a subset of CLASS, reject this
33299 alternative: if reload cannot do this, it will still use its choice. */
33300 mode
= GET_MODE (x
);
33301 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
33302 return MAYBE_SSE_CLASS_P (regclass
) ? SSE_REGS
: NO_REGS
;
33304 if (X87_FLOAT_MODE_P (mode
))
33306 if (regclass
== FP_TOP_SSE_REGS
)
33308 else if (regclass
== FP_SECOND_SSE_REGS
)
33309 return FP_SECOND_REG
;
33311 return FLOAT_CLASS_P (regclass
) ? regclass
: NO_REGS
;
33318 ix86_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass
,
33319 enum machine_mode mode
, secondary_reload_info
*sri
)
33321 /* Double-word spills from general registers to non-offsettable memory
33322 references (zero-extended addresses) require special handling. */
33325 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
33326 && rclass
== GENERAL_REGS
33327 && !offsettable_memref_p (x
))
33330 ? CODE_FOR_reload_noff_load
33331 : CODE_FOR_reload_noff_store
);
33332 /* Add the cost of moving address to a temporary. */
33333 sri
->extra_cost
= 1;
33338 /* QImode spills from non-QI registers require
33339 intermediate register on 32bit targets. */
33341 && !in_p
&& mode
== QImode
33342 && (rclass
== GENERAL_REGS
33343 || rclass
== LEGACY_REGS
33344 || rclass
== NON_Q_REGS
33347 || rclass
== INDEX_REGS
))
33356 if (regno
>= FIRST_PSEUDO_REGISTER
|| GET_CODE (x
) == SUBREG
)
33357 regno
= true_regnum (x
);
33359 /* Return Q_REGS if the operand is in memory. */
33364 /* This condition handles corner case where an expression involving
33365 pointers gets vectorized. We're trying to use the address of a
33366 stack slot as a vector initializer.
33368 (set (reg:V2DI 74 [ vect_cst_.2 ])
33369 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
33371 Eventually frame gets turned into sp+offset like this:
33373 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
33374 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
33375 (const_int 392 [0x188]))))
33377 That later gets turned into:
33379 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
33380 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
33381 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
33383 We'll have the following reload recorded:
33385 Reload 0: reload_in (DI) =
33386 (plus:DI (reg/f:DI 7 sp)
33387 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
33388 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
33389 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
33390 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
33391 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
33392 reload_reg_rtx: (reg:V2DI 22 xmm1)
33394 Which isn't going to work since SSE instructions can't handle scalar
33395 additions. Returning GENERAL_REGS forces the addition into integer
33396 register and reload can handle subsequent reloads without problems. */
33398 if (in_p
&& GET_CODE (x
) == PLUS
33399 && SSE_CLASS_P (rclass
)
33400 && SCALAR_INT_MODE_P (mode
))
33401 return GENERAL_REGS
;
33406 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
33409 ix86_class_likely_spilled_p (reg_class_t rclass
)
33420 case SSE_FIRST_REG
:
33422 case FP_SECOND_REG
:
33432 /* If we are copying between general and FP registers, we need a memory
33433 location. The same is true for SSE and MMX registers.
33435 To optimize register_move_cost performance, allow inline variant.
33437 The macro can't work reliably when one of the CLASSES is class containing
33438 registers from multiple units (SSE, MMX, integer). We avoid this by never
33439 combining those units in single alternative in the machine description.
33440 Ensure that this constraint holds to avoid unexpected surprises.
33442 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
33443 enforce these sanity checks. */
33446 inline_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
33447 enum machine_mode mode
, int strict
)
33449 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
33450 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
33451 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
33452 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
33453 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
33454 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
33456 gcc_assert (!strict
|| lra_in_progress
);
33460 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
33463 /* ??? This is a lie. We do have moves between mmx/general, and for
33464 mmx/sse2. But by saying we need secondary memory we discourage the
33465 register allocator from using the mmx registers unless needed. */
33466 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
33469 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
33471 /* SSE1 doesn't have any direct moves from other classes. */
33475 /* If the target says that inter-unit moves are more expensive
33476 than moving through memory, then don't generate them. */
33477 if (!TARGET_INTER_UNIT_MOVES
)
33480 /* Between SSE and general, we have moves no larger than word size. */
33481 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
33489 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
33490 enum machine_mode mode
, int strict
)
33492 return inline_secondary_memory_needed (class1
, class2
, mode
, strict
);
33495 /* Implement the TARGET_CLASS_MAX_NREGS hook.
33497 On the 80386, this is the size of MODE in words,
33498 except in the FP regs, where a single reg is always enough. */
33500 static unsigned char
33501 ix86_class_max_nregs (reg_class_t rclass
, enum machine_mode mode
)
33503 if (MAYBE_INTEGER_CLASS_P (rclass
))
33505 if (mode
== XFmode
)
33506 return (TARGET_64BIT
? 2 : 3);
33507 else if (mode
== XCmode
)
33508 return (TARGET_64BIT
? 4 : 6);
33510 return ((GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
);
33514 if (COMPLEX_MODE_P (mode
))
33521 /* Return true if the registers in CLASS cannot represent the change from
33522 modes FROM to TO. */
33525 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
33526 enum reg_class regclass
)
33531 /* x87 registers can't do subreg at all, as all values are reformatted
33532 to extended precision. */
33533 if (MAYBE_FLOAT_CLASS_P (regclass
))
33536 if (MAYBE_SSE_CLASS_P (regclass
) || MAYBE_MMX_CLASS_P (regclass
))
33538 /* Vector registers do not support QI or HImode loads. If we don't
33539 disallow a change to these modes, reload will assume it's ok to
33540 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
33541 the vec_dupv4hi pattern. */
33542 if (GET_MODE_SIZE (from
) < 4)
33545 /* Vector registers do not support subreg with nonzero offsets, which
33546 are otherwise valid for integer registers. Since we can't see
33547 whether we have a nonzero offset from here, prohibit all
33548 nonparadoxical subregs changing size. */
33549 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
33556 /* Return the cost of moving data of mode M between a
33557 register and memory. A value of 2 is the default; this cost is
33558 relative to those in `REGISTER_MOVE_COST'.
33560 This function is used extensively by register_move_cost that is used to
33561 build tables at startup. Make it inline in this case.
33562 When IN is 2, return maximum of in and out move cost.
33564 If moving between registers and memory is more expensive than
33565 between two registers, you should define this macro to express the
33568 Model also increased moving costs of QImode registers in non
33572 inline_memory_move_cost (enum machine_mode mode
, enum reg_class regclass
,
33576 if (FLOAT_CLASS_P (regclass
))
33594 return MAX (ix86_cost
->fp_load
[index
], ix86_cost
->fp_store
[index
]);
33595 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
33597 if (SSE_CLASS_P (regclass
))
33600 switch (GET_MODE_SIZE (mode
))
33615 return MAX (ix86_cost
->sse_load
[index
], ix86_cost
->sse_store
[index
]);
33616 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
33618 if (MMX_CLASS_P (regclass
))
33621 switch (GET_MODE_SIZE (mode
))
33633 return MAX (ix86_cost
->mmx_load
[index
], ix86_cost
->mmx_store
[index
]);
33634 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
33636 switch (GET_MODE_SIZE (mode
))
33639 if (Q_CLASS_P (regclass
) || TARGET_64BIT
)
33642 return ix86_cost
->int_store
[0];
33643 if (TARGET_PARTIAL_REG_DEPENDENCY
33644 && optimize_function_for_speed_p (cfun
))
33645 cost
= ix86_cost
->movzbl_load
;
33647 cost
= ix86_cost
->int_load
[0];
33649 return MAX (cost
, ix86_cost
->int_store
[0]);
33655 return MAX (ix86_cost
->movzbl_load
, ix86_cost
->int_store
[0] + 4);
33657 return ix86_cost
->movzbl_load
;
33659 return ix86_cost
->int_store
[0] + 4;
33664 return MAX (ix86_cost
->int_load
[1], ix86_cost
->int_store
[1]);
33665 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
33667 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
33668 if (mode
== TFmode
)
33671 cost
= MAX (ix86_cost
->int_load
[2] , ix86_cost
->int_store
[2]);
33673 cost
= ix86_cost
->int_load
[2];
33675 cost
= ix86_cost
->int_store
[2];
33676 return (cost
* (((int) GET_MODE_SIZE (mode
)
33677 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
33682 ix86_memory_move_cost (enum machine_mode mode
, reg_class_t regclass
,
33685 return inline_memory_move_cost (mode
, (enum reg_class
) regclass
, in
? 1 : 0);
33689 /* Return the cost of moving data from a register in class CLASS1 to
33690 one in class CLASS2.
33692 It is not required that the cost always equal 2 when FROM is the same as TO;
33693 on some machines it is expensive to move between registers if they are not
33694 general registers. */
33697 ix86_register_move_cost (enum machine_mode mode
, reg_class_t class1_i
,
33698 reg_class_t class2_i
)
33700 enum reg_class class1
= (enum reg_class
) class1_i
;
33701 enum reg_class class2
= (enum reg_class
) class2_i
;
33703 /* In case we require secondary memory, compute cost of the store followed
33704 by load. In order to avoid bad register allocation choices, we need
33705 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
33707 if (inline_secondary_memory_needed (class1
, class2
, mode
, 0))
33711 cost
+= inline_memory_move_cost (mode
, class1
, 2);
33712 cost
+= inline_memory_move_cost (mode
, class2
, 2);
33714 /* In case of copying from general_purpose_register we may emit multiple
33715 stores followed by single load causing memory size mismatch stall.
33716 Count this as arbitrarily high cost of 20. */
33717 if (targetm
.class_max_nregs (class1
, mode
)
33718 > targetm
.class_max_nregs (class2
, mode
))
33721 /* In the case of FP/MMX moves, the registers actually overlap, and we
33722 have to switch modes in order to treat them differently. */
33723 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
33724 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
33730 /* Moves between SSE/MMX and integer unit are expensive. */
33731 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
33732 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
33734 /* ??? By keeping returned value relatively high, we limit the number
33735 of moves between integer and MMX/SSE registers for all targets.
33736 Additionally, high value prevents problem with x86_modes_tieable_p(),
33737 where integer modes in MMX/SSE registers are not tieable
33738 because of missing QImode and HImode moves to, from or between
33739 MMX/SSE registers. */
33740 return MAX (8, ix86_cost
->mmxsse_to_integer
);
33742 if (MAYBE_FLOAT_CLASS_P (class1
))
33743 return ix86_cost
->fp_move
;
33744 if (MAYBE_SSE_CLASS_P (class1
))
33745 return ix86_cost
->sse_move
;
33746 if (MAYBE_MMX_CLASS_P (class1
))
33747 return ix86_cost
->mmx_move
;
33751 /* Return TRUE if hard register REGNO can hold a value of machine-mode
33755 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
33757 /* Flags and only flags can only hold CCmode values. */
33758 if (CC_REGNO_P (regno
))
33759 return GET_MODE_CLASS (mode
) == MODE_CC
;
33760 if (GET_MODE_CLASS (mode
) == MODE_CC
33761 || GET_MODE_CLASS (mode
) == MODE_RANDOM
33762 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
33764 if (STACK_REGNO_P (regno
))
33765 return VALID_FP_MODE_P (mode
);
33766 if (SSE_REGNO_P (regno
))
33768 /* We implement the move patterns for all vector modes into and
33769 out of SSE registers, even when no operation instructions
33770 are available. OImode move is available only when AVX is
33772 return ((TARGET_AVX
&& mode
== OImode
)
33773 || VALID_AVX256_REG_MODE (mode
)
33774 || VALID_SSE_REG_MODE (mode
)
33775 || VALID_SSE2_REG_MODE (mode
)
33776 || VALID_MMX_REG_MODE (mode
)
33777 || VALID_MMX_REG_MODE_3DNOW (mode
));
33779 if (MMX_REGNO_P (regno
))
33781 /* We implement the move patterns for 3DNOW modes even in MMX mode,
33782 so if the register is available at all, then we can move data of
33783 the given mode into or out of it. */
33784 return (VALID_MMX_REG_MODE (mode
)
33785 || VALID_MMX_REG_MODE_3DNOW (mode
));
33788 if (mode
== QImode
)
33790 /* Take care for QImode values - they can be in non-QI regs,
33791 but then they do cause partial register stalls. */
33792 if (TARGET_64BIT
|| QI_REGNO_P (regno
))
33794 if (!TARGET_PARTIAL_REG_STALL
)
33796 return !can_create_pseudo_p ();
33798 /* We handle both integer and floats in the general purpose registers. */
33799 else if (VALID_INT_MODE_P (mode
))
33801 else if (VALID_FP_MODE_P (mode
))
33803 else if (VALID_DFP_MODE_P (mode
))
33805 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
33806 on to use that value in smaller contexts, this can easily force a
33807 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
33808 supporting DImode, allow it. */
33809 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
33815 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
33816 tieable integer mode. */
33819 ix86_tieable_integer_mode_p (enum machine_mode mode
)
33828 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
33831 return TARGET_64BIT
;
33838 /* Return true if MODE1 is accessible in a register that can hold MODE2
33839 without copying. That is, all register classes that can hold MODE2
33840 can also hold MODE1. */
33843 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
33845 if (mode1
== mode2
)
33848 if (ix86_tieable_integer_mode_p (mode1
)
33849 && ix86_tieable_integer_mode_p (mode2
))
33852 /* MODE2 being XFmode implies fp stack or general regs, which means we
33853 can tie any smaller floating point modes to it. Note that we do not
33854 tie this with TFmode. */
33855 if (mode2
== XFmode
)
33856 return mode1
== SFmode
|| mode1
== DFmode
;
33858 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
33859 that we can tie it with SFmode. */
33860 if (mode2
== DFmode
)
33861 return mode1
== SFmode
;
33863 /* If MODE2 is only appropriate for an SSE register, then tie with
33864 any other mode acceptable to SSE registers. */
33865 if (GET_MODE_SIZE (mode2
) == 32
33866 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
33867 return (GET_MODE_SIZE (mode1
) == 32
33868 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
33869 if (GET_MODE_SIZE (mode2
) == 16
33870 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
33871 return (GET_MODE_SIZE (mode1
) == 16
33872 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
33874 /* If MODE2 is appropriate for an MMX register, then tie
33875 with any other mode acceptable to MMX registers. */
33876 if (GET_MODE_SIZE (mode2
) == 8
33877 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
33878 return (GET_MODE_SIZE (mode1
) == 8
33879 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
33884 /* Return the cost of moving between two registers of mode MODE. */
33887 ix86_set_reg_reg_cost (enum machine_mode mode
)
33889 unsigned int units
= UNITS_PER_WORD
;
33891 switch (GET_MODE_CLASS (mode
))
33897 units
= GET_MODE_SIZE (CCmode
);
33901 if ((TARGET_SSE
&& mode
== TFmode
)
33902 || (TARGET_80387
&& mode
== XFmode
)
33903 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DFmode
)
33904 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SFmode
))
33905 units
= GET_MODE_SIZE (mode
);
33908 case MODE_COMPLEX_FLOAT
:
33909 if ((TARGET_SSE
&& mode
== TCmode
)
33910 || (TARGET_80387
&& mode
== XCmode
)
33911 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DCmode
)
33912 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SCmode
))
33913 units
= GET_MODE_SIZE (mode
);
33916 case MODE_VECTOR_INT
:
33917 case MODE_VECTOR_FLOAT
:
33918 if ((TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
33919 || (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
33920 || (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
33921 || (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
)))
33922 units
= GET_MODE_SIZE (mode
);
33925 /* Return the cost of moving between two registers of mode MODE,
33926 assuming that the move will be in pieces of at most UNITS bytes. */
33927 return COSTS_N_INSNS ((GET_MODE_SIZE (mode
) + units
- 1) / units
);
33930 /* Compute a (partial) cost for rtx X. Return true if the complete
33931 cost has been computed, and false if subexpressions should be
33932 scanned. In either case, *TOTAL contains the cost result. */
33935 ix86_rtx_costs (rtx x
, int code_i
, int outer_code_i
, int opno
, int *total
,
33938 enum rtx_code code
= (enum rtx_code
) code_i
;
33939 enum rtx_code outer_code
= (enum rtx_code
) outer_code_i
;
33940 enum machine_mode mode
= GET_MODE (x
);
33941 const struct processor_costs
*cost
= speed
? ix86_cost
: &ix86_size_cost
;
33946 if (register_operand (SET_DEST (x
), VOIDmode
)
33947 && reg_or_0_operand (SET_SRC (x
), VOIDmode
))
33949 *total
= ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x
)));
33958 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
33960 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
33962 else if (flag_pic
&& SYMBOLIC_CONST (x
)
33964 || (!GET_CODE (x
) != LABEL_REF
33965 && (GET_CODE (x
) != SYMBOL_REF
33966 || !SYMBOL_REF_LOCAL_P (x
)))))
33973 if (mode
== VOIDmode
)
33978 switch (standard_80387_constant_p (x
))
33983 default: /* Other constants */
33990 if (SSE_FLOAT_MODE_P (mode
))
33993 switch (standard_sse_constant_p (x
))
33997 case 1: /* 0: xor eliminates false dependency */
34000 default: /* -1: cmp contains false dependency */
34005 /* Fall back to (MEM (SYMBOL_REF)), since that's where
34006 it'll probably end up. Add a penalty for size. */
34007 *total
= (COSTS_N_INSNS (1)
34008 + (flag_pic
!= 0 && !TARGET_64BIT
)
34009 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
34013 /* The zero extensions is often completely free on x86_64, so make
34014 it as cheap as possible. */
34015 if (TARGET_64BIT
&& mode
== DImode
34016 && GET_MODE (XEXP (x
, 0)) == SImode
)
34018 else if (TARGET_ZERO_EXTEND_WITH_AND
)
34019 *total
= cost
->add
;
34021 *total
= cost
->movzx
;
34025 *total
= cost
->movsx
;
34029 if (SCALAR_INT_MODE_P (mode
)
34030 && GET_MODE_SIZE (mode
) < UNITS_PER_WORD
34031 && CONST_INT_P (XEXP (x
, 1)))
34033 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
34036 *total
= cost
->add
;
34039 if ((value
== 2 || value
== 3)
34040 && cost
->lea
<= cost
->shift_const
)
34042 *total
= cost
->lea
;
34052 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
34054 /* ??? Should be SSE vector operation cost. */
34055 /* At least for published AMD latencies, this really is the same
34056 as the latency for a simple fpu operation like fabs. */
34057 /* V*QImode is emulated with 1-11 insns. */
34058 if (mode
== V16QImode
|| mode
== V32QImode
)
34061 if (TARGET_XOP
&& mode
== V16QImode
)
34063 /* For XOP we use vpshab, which requires a broadcast of the
34064 value to the variable shift insn. For constants this
34065 means a V16Q const in mem; even when we can perform the
34066 shift with one insn set the cost to prefer paddb. */
34067 if (CONSTANT_P (XEXP (x
, 1)))
34069 *total
= (cost
->fabs
34070 + rtx_cost (XEXP (x
, 0), code
, 0, speed
)
34071 + (speed
? 2 : COSTS_N_BYTES (16)));
34076 else if (TARGET_SSSE3
)
34078 *total
= cost
->fabs
* count
;
34081 *total
= cost
->fabs
;
34083 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
34085 if (CONST_INT_P (XEXP (x
, 1)))
34087 if (INTVAL (XEXP (x
, 1)) > 32)
34088 *total
= cost
->shift_const
+ COSTS_N_INSNS (2);
34090 *total
= cost
->shift_const
* 2;
34094 if (GET_CODE (XEXP (x
, 1)) == AND
)
34095 *total
= cost
->shift_var
* 2;
34097 *total
= cost
->shift_var
* 6 + COSTS_N_INSNS (2);
34102 if (CONST_INT_P (XEXP (x
, 1)))
34103 *total
= cost
->shift_const
;
34105 *total
= cost
->shift_var
;
34113 gcc_assert (FLOAT_MODE_P (mode
));
34114 gcc_assert (TARGET_FMA
|| TARGET_FMA4
);
34116 /* ??? SSE scalar/vector cost should be used here. */
34117 /* ??? Bald assumption that fma has the same cost as fmul. */
34118 *total
= cost
->fmul
;
34119 *total
+= rtx_cost (XEXP (x
, 1), FMA
, 1, speed
);
34121 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
34123 if (GET_CODE (sub
) == NEG
)
34124 sub
= XEXP (sub
, 0);
34125 *total
+= rtx_cost (sub
, FMA
, 0, speed
);
34128 if (GET_CODE (sub
) == NEG
)
34129 sub
= XEXP (sub
, 0);
34130 *total
+= rtx_cost (sub
, FMA
, 2, speed
);
34135 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34137 /* ??? SSE scalar cost should be used here. */
34138 *total
= cost
->fmul
;
34141 else if (X87_FLOAT_MODE_P (mode
))
34143 *total
= cost
->fmul
;
34146 else if (FLOAT_MODE_P (mode
))
34148 /* ??? SSE vector cost should be used here. */
34149 *total
= cost
->fmul
;
34152 else if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
34154 /* V*QImode is emulated with 7-13 insns. */
34155 if (mode
== V16QImode
|| mode
== V32QImode
)
34158 if (TARGET_XOP
&& mode
== V16QImode
)
34160 else if (TARGET_SSSE3
)
34162 *total
= cost
->fmul
* 2 + cost
->fabs
* extra
;
34164 /* V*DImode is emulated with 5-8 insns. */
34165 else if (mode
== V2DImode
|| mode
== V4DImode
)
34167 if (TARGET_XOP
&& mode
== V2DImode
)
34168 *total
= cost
->fmul
* 2 + cost
->fabs
* 3;
34170 *total
= cost
->fmul
* 3 + cost
->fabs
* 5;
34172 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
34173 insns, including two PMULUDQ. */
34174 else if (mode
== V4SImode
&& !(TARGET_SSE4_1
|| TARGET_AVX
))
34175 *total
= cost
->fmul
* 2 + cost
->fabs
* 5;
34177 *total
= cost
->fmul
;
34182 rtx op0
= XEXP (x
, 0);
34183 rtx op1
= XEXP (x
, 1);
34185 if (CONST_INT_P (XEXP (x
, 1)))
34187 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
34188 for (nbits
= 0; value
!= 0; value
&= value
- 1)
34192 /* This is arbitrary. */
34195 /* Compute costs correctly for widening multiplication. */
34196 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
34197 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
34198 == GET_MODE_SIZE (mode
))
34200 int is_mulwiden
= 0;
34201 enum machine_mode inner_mode
= GET_MODE (op0
);
34203 if (GET_CODE (op0
) == GET_CODE (op1
))
34204 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
34205 else if (CONST_INT_P (op1
))
34207 if (GET_CODE (op0
) == SIGN_EXTEND
)
34208 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
34211 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
34215 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
34218 *total
= (cost
->mult_init
[MODE_INDEX (mode
)]
34219 + nbits
* cost
->mult_bit
34220 + rtx_cost (op0
, outer_code
, opno
, speed
)
34221 + rtx_cost (op1
, outer_code
, opno
, speed
));
34230 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34231 /* ??? SSE cost should be used here. */
34232 *total
= cost
->fdiv
;
34233 else if (X87_FLOAT_MODE_P (mode
))
34234 *total
= cost
->fdiv
;
34235 else if (FLOAT_MODE_P (mode
))
34236 /* ??? SSE vector cost should be used here. */
34237 *total
= cost
->fdiv
;
34239 *total
= cost
->divide
[MODE_INDEX (mode
)];
34243 if (GET_MODE_CLASS (mode
) == MODE_INT
34244 && GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
)
34246 if (GET_CODE (XEXP (x
, 0)) == PLUS
34247 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
34248 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
34249 && CONSTANT_P (XEXP (x
, 1)))
34251 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
34252 if (val
== 2 || val
== 4 || val
== 8)
34254 *total
= cost
->lea
;
34255 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
34256 outer_code
, opno
, speed
);
34257 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
34258 outer_code
, opno
, speed
);
34259 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
34263 else if (GET_CODE (XEXP (x
, 0)) == MULT
34264 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
34266 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
34267 if (val
== 2 || val
== 4 || val
== 8)
34269 *total
= cost
->lea
;
34270 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
34271 outer_code
, opno
, speed
);
34272 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
34276 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
34278 *total
= cost
->lea
;
34279 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
34280 outer_code
, opno
, speed
);
34281 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
34282 outer_code
, opno
, speed
);
34283 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
34290 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34292 /* ??? SSE cost should be used here. */
34293 *total
= cost
->fadd
;
34296 else if (X87_FLOAT_MODE_P (mode
))
34298 *total
= cost
->fadd
;
34301 else if (FLOAT_MODE_P (mode
))
34303 /* ??? SSE vector cost should be used here. */
34304 *total
= cost
->fadd
;
34312 if (GET_MODE_CLASS (mode
) == MODE_INT
34313 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
34315 *total
= (cost
->add
* 2
34316 + (rtx_cost (XEXP (x
, 0), outer_code
, opno
, speed
)
34317 << (GET_MODE (XEXP (x
, 0)) != DImode
))
34318 + (rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
)
34319 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
34325 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34327 /* ??? SSE cost should be used here. */
34328 *total
= cost
->fchs
;
34331 else if (X87_FLOAT_MODE_P (mode
))
34333 *total
= cost
->fchs
;
34336 else if (FLOAT_MODE_P (mode
))
34338 /* ??? SSE vector cost should be used here. */
34339 *total
= cost
->fchs
;
34345 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
34347 /* ??? Should be SSE vector operation cost. */
34348 /* At least for published AMD latencies, this really is the same
34349 as the latency for a simple fpu operation like fabs. */
34350 *total
= cost
->fabs
;
34352 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
34353 *total
= cost
->add
* 2;
34355 *total
= cost
->add
;
34359 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
34360 && XEXP (XEXP (x
, 0), 1) == const1_rtx
34361 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
34362 && XEXP (x
, 1) == const0_rtx
)
34364 /* This kind of construct is implemented using test[bwl].
34365 Treat it as if we had an AND. */
34366 *total
= (cost
->add
34367 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
, opno
, speed
)
34368 + rtx_cost (const1_rtx
, outer_code
, opno
, speed
));
34374 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
34379 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34380 /* ??? SSE cost should be used here. */
34381 *total
= cost
->fabs
;
34382 else if (X87_FLOAT_MODE_P (mode
))
34383 *total
= cost
->fabs
;
34384 else if (FLOAT_MODE_P (mode
))
34385 /* ??? SSE vector cost should be used here. */
34386 *total
= cost
->fabs
;
34390 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34391 /* ??? SSE cost should be used here. */
34392 *total
= cost
->fsqrt
;
34393 else if (X87_FLOAT_MODE_P (mode
))
34394 *total
= cost
->fsqrt
;
34395 else if (FLOAT_MODE_P (mode
))
34396 /* ??? SSE vector cost should be used here. */
34397 *total
= cost
->fsqrt
;
34401 if (XINT (x
, 1) == UNSPEC_TP
)
34408 case VEC_DUPLICATE
:
34409 /* ??? Assume all of these vector manipulation patterns are
34410 recognizable. In which case they all pretty much have the
34412 *total
= cost
->fabs
;
34422 static int current_machopic_label_num
;
34424 /* Given a symbol name and its associated stub, write out the
34425 definition of the stub. */
34428 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
34430 unsigned int length
;
34431 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
34432 int label
= ++current_machopic_label_num
;
34434 /* For 64-bit we shouldn't get here. */
34435 gcc_assert (!TARGET_64BIT
);
34437 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
34438 symb
= targetm
.strip_name_encoding (symb
);
34440 length
= strlen (stub
);
34441 binder_name
= XALLOCAVEC (char, length
+ 32);
34442 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
34444 length
= strlen (symb
);
34445 symbol_name
= XALLOCAVEC (char, length
+ 32);
34446 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
34448 sprintf (lazy_ptr_name
, "L%d$lz", label
);
34450 if (MACHOPIC_ATT_STUB
)
34451 switch_to_section (darwin_sections
[machopic_picsymbol_stub3_section
]);
34452 else if (MACHOPIC_PURE
)
34453 switch_to_section (darwin_sections
[machopic_picsymbol_stub2_section
]);
34455 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
34457 fprintf (file
, "%s:\n", stub
);
34458 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
34460 if (MACHOPIC_ATT_STUB
)
34462 fprintf (file
, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
34464 else if (MACHOPIC_PURE
)
34467 /* 25-byte PIC stub using "CALL get_pc_thunk". */
34468 rtx tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
34469 output_set_got (tmp
, NULL_RTX
); /* "CALL ___<cpu>.get_pc_thunk.cx". */
34470 fprintf (file
, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
34471 label
, lazy_ptr_name
, label
);
34472 fprintf (file
, "\tjmp\t*%%ecx\n");
34475 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
34477 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
34478 it needs no stub-binding-helper. */
34479 if (MACHOPIC_ATT_STUB
)
34482 fprintf (file
, "%s:\n", binder_name
);
34486 fprintf (file
, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name
, binder_name
);
34487 fprintf (file
, "\tpushl\t%%ecx\n");
34490 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
34492 fputs ("\tjmp\tdyld_stub_binding_helper\n", file
);
34494 /* N.B. Keep the correspondence of these
34495 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
34496 old-pic/new-pic/non-pic stubs; altering this will break
34497 compatibility with existing dylibs. */
34500 /* 25-byte PIC stub using "CALL get_pc_thunk". */
34501 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr2_section
]);
34504 /* 16-byte -mdynamic-no-pic stub. */
34505 switch_to_section(darwin_sections
[machopic_lazy_symbol_ptr3_section
]);
34507 fprintf (file
, "%s:\n", lazy_ptr_name
);
34508 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
34509 fprintf (file
, ASM_LONG
"%s\n", binder_name
);
34511 #endif /* TARGET_MACHO */
34513 /* Order the registers for register allocator. */
34516 x86_order_regs_for_local_alloc (void)
34521 /* First allocate the local general purpose registers. */
34522 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
34523 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
34524 reg_alloc_order
[pos
++] = i
;
34526 /* Global general purpose registers. */
34527 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
34528 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
34529 reg_alloc_order
[pos
++] = i
;
34531 /* x87 registers come first in case we are doing FP math
34533 if (!TARGET_SSE_MATH
)
34534 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
34535 reg_alloc_order
[pos
++] = i
;
34537 /* SSE registers. */
34538 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
34539 reg_alloc_order
[pos
++] = i
;
34540 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
34541 reg_alloc_order
[pos
++] = i
;
34543 /* x87 registers. */
34544 if (TARGET_SSE_MATH
)
34545 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
34546 reg_alloc_order
[pos
++] = i
;
34548 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
34549 reg_alloc_order
[pos
++] = i
;
34551 /* Initialize the rest of array as we do not allocate some registers
34553 while (pos
< FIRST_PSEUDO_REGISTER
)
34554 reg_alloc_order
[pos
++] = 0;
34557 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
34558 in struct attribute_spec handler. */
34560 ix86_handle_callee_pop_aggregate_return (tree
*node
, tree name
,
34562 int flags ATTRIBUTE_UNUSED
,
34563 bool *no_add_attrs
)
34565 if (TREE_CODE (*node
) != FUNCTION_TYPE
34566 && TREE_CODE (*node
) != METHOD_TYPE
34567 && TREE_CODE (*node
) != FIELD_DECL
34568 && TREE_CODE (*node
) != TYPE_DECL
)
34570 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
34572 *no_add_attrs
= true;
34577 warning (OPT_Wattributes
, "%qE attribute only available for 32-bit",
34579 *no_add_attrs
= true;
34582 if (is_attribute_p ("callee_pop_aggregate_return", name
))
34586 cst
= TREE_VALUE (args
);
34587 if (TREE_CODE (cst
) != INTEGER_CST
)
34589 warning (OPT_Wattributes
,
34590 "%qE attribute requires an integer constant argument",
34592 *no_add_attrs
= true;
34594 else if (compare_tree_int (cst
, 0) != 0
34595 && compare_tree_int (cst
, 1) != 0)
34597 warning (OPT_Wattributes
,
34598 "argument to %qE attribute is neither zero, nor one",
34600 *no_add_attrs
= true;
34609 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
34610 struct attribute_spec.handler. */
34612 ix86_handle_abi_attribute (tree
*node
, tree name
,
34613 tree args ATTRIBUTE_UNUSED
,
34614 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
34616 if (TREE_CODE (*node
) != FUNCTION_TYPE
34617 && TREE_CODE (*node
) != METHOD_TYPE
34618 && TREE_CODE (*node
) != FIELD_DECL
34619 && TREE_CODE (*node
) != TYPE_DECL
)
34621 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
34623 *no_add_attrs
= true;
34627 /* Can combine regparm with all attributes but fastcall. */
34628 if (is_attribute_p ("ms_abi", name
))
34630 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node
)))
34632 error ("ms_abi and sysv_abi attributes are not compatible");
34637 else if (is_attribute_p ("sysv_abi", name
))
34639 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node
)))
34641 error ("ms_abi and sysv_abi attributes are not compatible");
34650 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
34651 struct attribute_spec.handler. */
34653 ix86_handle_struct_attribute (tree
*node
, tree name
,
34654 tree args ATTRIBUTE_UNUSED
,
34655 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
34658 if (DECL_P (*node
))
34660 if (TREE_CODE (*node
) == TYPE_DECL
)
34661 type
= &TREE_TYPE (*node
);
34666 if (!(type
&& RECORD_OR_UNION_TYPE_P (*type
)))
34668 warning (OPT_Wattributes
, "%qE attribute ignored",
34670 *no_add_attrs
= true;
34673 else if ((is_attribute_p ("ms_struct", name
)
34674 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
34675 || ((is_attribute_p ("gcc_struct", name
)
34676 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
34678 warning (OPT_Wattributes
, "%qE incompatible attribute ignored",
34680 *no_add_attrs
= true;
34687 ix86_handle_fndecl_attribute (tree
*node
, tree name
,
34688 tree args ATTRIBUTE_UNUSED
,
34689 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
34691 if (TREE_CODE (*node
) != FUNCTION_DECL
)
34693 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
34695 *no_add_attrs
= true;
34701 ix86_ms_bitfield_layout_p (const_tree record_type
)
34703 return ((TARGET_MS_BITFIELD_LAYOUT
34704 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
34705 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
)));
34708 /* Returns an expression indicating where the this parameter is
34709 located on entry to the FUNCTION. */
34712 x86_this_parameter (tree function
)
34714 tree type
= TREE_TYPE (function
);
34715 bool aggr
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
34720 const int *parm_regs
;
34722 if (ix86_function_type_abi (type
) == MS_ABI
)
34723 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
34725 parm_regs
= x86_64_int_parameter_registers
;
34726 return gen_rtx_REG (Pmode
, parm_regs
[aggr
]);
34729 nregs
= ix86_function_regparm (type
, function
);
34731 if (nregs
> 0 && !stdarg_p (type
))
34734 unsigned int ccvt
= ix86_get_callcvt (type
);
34736 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
34737 regno
= aggr
? DX_REG
: CX_REG
;
34738 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
34742 return gen_rtx_MEM (SImode
,
34743 plus_constant (Pmode
, stack_pointer_rtx
, 4));
34752 return gen_rtx_MEM (SImode
,
34753 plus_constant (Pmode
,
34754 stack_pointer_rtx
, 4));
34757 return gen_rtx_REG (SImode
, regno
);
34760 return gen_rtx_MEM (SImode
, plus_constant (Pmode
, stack_pointer_rtx
,
34764 /* Determine whether x86_output_mi_thunk can succeed. */
34767 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED
,
34768 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
34769 HOST_WIDE_INT vcall_offset
, const_tree function
)
34771 /* 64-bit can handle anything. */
34775 /* For 32-bit, everything's fine if we have one free register. */
34776 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
34779 /* Need a free register for vcall_offset. */
34783 /* Need a free register for GOT references. */
34784 if (flag_pic
&& !targetm
.binds_local_p (function
))
34787 /* Otherwise ok. */
34791 /* Output the assembler code for a thunk function. THUNK_DECL is the
34792 declaration for the thunk function itself, FUNCTION is the decl for
34793 the target function. DELTA is an immediate constant offset to be
34794 added to THIS. If VCALL_OFFSET is nonzero, the word at
34795 *(*this + vcall_offset) should be added to THIS. */
34798 x86_output_mi_thunk (FILE *file
,
34799 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
34800 HOST_WIDE_INT vcall_offset
, tree function
)
34802 rtx this_param
= x86_this_parameter (function
);
34803 rtx this_reg
, tmp
, fnaddr
;
34804 unsigned int tmp_regno
;
34807 tmp_regno
= R10_REG
;
34810 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (function
));
34811 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
34812 tmp_regno
= AX_REG
;
34813 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
34814 tmp_regno
= DX_REG
;
34816 tmp_regno
= CX_REG
;
34819 emit_note (NOTE_INSN_PROLOGUE_END
);
34821 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
34822 pull it in now and let DELTA benefit. */
34823 if (REG_P (this_param
))
34824 this_reg
= this_param
;
34825 else if (vcall_offset
)
34827 /* Put the this parameter into %eax. */
34828 this_reg
= gen_rtx_REG (Pmode
, AX_REG
);
34829 emit_move_insn (this_reg
, this_param
);
34832 this_reg
= NULL_RTX
;
34834 /* Adjust the this parameter by a fixed constant. */
34837 rtx delta_rtx
= GEN_INT (delta
);
34838 rtx delta_dst
= this_reg
? this_reg
: this_param
;
34842 if (!x86_64_general_operand (delta_rtx
, Pmode
))
34844 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
34845 emit_move_insn (tmp
, delta_rtx
);
34850 ix86_emit_binop (PLUS
, Pmode
, delta_dst
, delta_rtx
);
34853 /* Adjust the this parameter by a value stored in the vtable. */
34856 rtx vcall_addr
, vcall_mem
, this_mem
;
34858 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
34860 this_mem
= gen_rtx_MEM (ptr_mode
, this_reg
);
34861 if (Pmode
!= ptr_mode
)
34862 this_mem
= gen_rtx_ZERO_EXTEND (Pmode
, this_mem
);
34863 emit_move_insn (tmp
, this_mem
);
34865 /* Adjust the this parameter. */
34866 vcall_addr
= plus_constant (Pmode
, tmp
, vcall_offset
);
34868 && !ix86_legitimate_address_p (ptr_mode
, vcall_addr
, true))
34870 rtx tmp2
= gen_rtx_REG (Pmode
, R11_REG
);
34871 emit_move_insn (tmp2
, GEN_INT (vcall_offset
));
34872 vcall_addr
= gen_rtx_PLUS (Pmode
, tmp
, tmp2
);
34875 vcall_mem
= gen_rtx_MEM (ptr_mode
, vcall_addr
);
34876 if (Pmode
!= ptr_mode
)
34877 emit_insn (gen_addsi_1_zext (this_reg
,
34878 gen_rtx_REG (ptr_mode
,
34882 ix86_emit_binop (PLUS
, Pmode
, this_reg
, vcall_mem
);
34885 /* If necessary, drop THIS back to its stack slot. */
34886 if (this_reg
&& this_reg
!= this_param
)
34887 emit_move_insn (this_param
, this_reg
);
34889 fnaddr
= XEXP (DECL_RTL (function
), 0);
34892 if (!flag_pic
|| targetm
.binds_local_p (function
)
34893 || cfun
->machine
->call_abi
== MS_ABI
)
34897 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOTPCREL
);
34898 tmp
= gen_rtx_CONST (Pmode
, tmp
);
34899 fnaddr
= gen_rtx_MEM (Pmode
, tmp
);
34904 if (!flag_pic
|| targetm
.binds_local_p (function
))
34907 else if (TARGET_MACHO
)
34909 fnaddr
= machopic_indirect_call_target (DECL_RTL (function
));
34910 fnaddr
= XEXP (fnaddr
, 0);
34912 #endif /* TARGET_MACHO */
34915 tmp
= gen_rtx_REG (Pmode
, CX_REG
);
34916 output_set_got (tmp
, NULL_RTX
);
34918 fnaddr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOT
);
34919 fnaddr
= gen_rtx_PLUS (Pmode
, fnaddr
, tmp
);
34920 fnaddr
= gen_rtx_MEM (Pmode
, fnaddr
);
34924 /* Our sibling call patterns do not allow memories, because we have no
34925 predicate that can distinguish between frame and non-frame memory.
34926 For our purposes here, we can get away with (ab)using a jump pattern,
34927 because we're going to do no optimization. */
34928 if (MEM_P (fnaddr
))
34929 emit_jump_insn (gen_indirect_jump (fnaddr
));
34932 if (ix86_cmodel
== CM_LARGE_PIC
&& SYMBOLIC_CONST (fnaddr
))
34933 fnaddr
= legitimize_pic_address (fnaddr
,
34934 gen_rtx_REG (Pmode
, tmp_regno
));
34936 if (!sibcall_insn_operand (fnaddr
, word_mode
))
34938 tmp
= gen_rtx_REG (word_mode
, tmp_regno
);
34939 if (GET_MODE (fnaddr
) != word_mode
)
34940 fnaddr
= gen_rtx_ZERO_EXTEND (word_mode
, fnaddr
);
34941 emit_move_insn (tmp
, fnaddr
);
34945 tmp
= gen_rtx_MEM (QImode
, fnaddr
);
34946 tmp
= gen_rtx_CALL (VOIDmode
, tmp
, const0_rtx
);
34947 tmp
= emit_call_insn (tmp
);
34948 SIBLING_CALL_P (tmp
) = 1;
34952 /* Emit just enough of rest_of_compilation to get the insns emitted.
34953 Note that use_thunk calls assemble_start_function et al. */
34954 tmp
= get_insns ();
34955 shorten_branches (tmp
);
34956 final_start_function (tmp
, file
, 1);
34957 final (tmp
, file
, 1);
34958 final_end_function ();
34962 x86_file_start (void)
34964 default_file_start ();
34966 darwin_file_start ();
34968 if (X86_FILE_START_VERSION_DIRECTIVE
)
34969 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
34970 if (X86_FILE_START_FLTUSED
)
34971 fputs ("\t.global\t__fltused\n", asm_out_file
);
34972 if (ix86_asm_dialect
== ASM_INTEL
)
34973 fputs ("\t.intel_syntax noprefix\n", asm_out_file
);
34977 x86_field_alignment (tree field
, int computed
)
34979 enum machine_mode mode
;
34980 tree type
= TREE_TYPE (field
);
34982 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
34984 mode
= TYPE_MODE (strip_array_types (type
));
34985 if (mode
== DFmode
|| mode
== DCmode
34986 || GET_MODE_CLASS (mode
) == MODE_INT
34987 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
34988 return MIN (32, computed
);
34992 /* Output assembler code to FILE to increment profiler label # LABELNO
34993 for profiling a function entry. */
34995 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
34997 const char *mcount_name
= (flag_fentry
? MCOUNT_NAME_BEFORE_PROLOGUE
35002 #ifndef NO_PROFILE_COUNTERS
35003 fprintf (file
, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX
, labelno
);
35006 if (DEFAULT_ABI
== SYSV_ABI
&& flag_pic
)
35007 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name
);
35009 fprintf (file
, "\tcall\t%s\n", mcount_name
);
35013 #ifndef NO_PROFILE_COUNTERS
35014 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER
"\n",
35017 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name
);
35021 #ifndef NO_PROFILE_COUNTERS
35022 fprintf (file
, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER
"\n",
35025 fprintf (file
, "\tcall\t%s\n", mcount_name
);
35029 /* We don't have exact information about the insn sizes, but we may assume
35030 quite safely that we are informed about all 1 byte insns and memory
35031 address sizes. This is enough to eliminate unnecessary padding in
35035 min_insn_size (rtx insn
)
35039 if (!INSN_P (insn
) || !active_insn_p (insn
))
35042 /* Discard alignments we've emit and jump instructions. */
35043 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
35044 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
35046 if (JUMP_TABLE_DATA_P (insn
))
35049 /* Important case - calls are always 5 bytes.
35050 It is common to have many calls in the row. */
35052 && symbolic_reference_mentioned_p (PATTERN (insn
))
35053 && !SIBLING_CALL_P (insn
))
35055 len
= get_attr_length (insn
);
35059 /* For normal instructions we rely on get_attr_length being exact,
35060 with a few exceptions. */
35061 if (!JUMP_P (insn
))
35063 enum attr_type type
= get_attr_type (insn
);
35068 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
35069 || asm_noperands (PATTERN (insn
)) >= 0)
35076 /* Otherwise trust get_attr_length. */
35080 l
= get_attr_length_address (insn
);
35081 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
35090 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
35092 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
35096 ix86_avoid_jump_mispredicts (void)
35098 rtx insn
, start
= get_insns ();
35099 int nbytes
= 0, njumps
= 0;
35102 /* Look for all minimal intervals of instructions containing 4 jumps.
35103 The intervals are bounded by START and INSN. NBYTES is the total
35104 size of instructions in the interval including INSN and not including
35105 START. When the NBYTES is smaller than 16 bytes, it is possible
35106 that the end of START and INSN ends up in the same 16byte page.
35108 The smallest offset in the page INSN can start is the case where START
35109 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
35110 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
35112 for (insn
= start
; insn
; insn
= NEXT_INSN (insn
))
35116 if (LABEL_P (insn
))
35118 int align
= label_to_alignment (insn
);
35119 int max_skip
= label_to_max_skip (insn
);
35123 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
35124 already in the current 16 byte page, because otherwise
35125 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
35126 bytes to reach 16 byte boundary. */
35128 || (align
<= 3 && max_skip
!= (1 << align
) - 1))
35131 fprintf (dump_file
, "Label %i with max_skip %i\n",
35132 INSN_UID (insn
), max_skip
);
35135 while (nbytes
+ max_skip
>= 16)
35137 start
= NEXT_INSN (start
);
35138 if ((JUMP_P (start
)
35139 && GET_CODE (PATTERN (start
)) != ADDR_VEC
35140 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
35142 njumps
--, isjump
= 1;
35145 nbytes
-= min_insn_size (start
);
35151 min_size
= min_insn_size (insn
);
35152 nbytes
+= min_size
;
35154 fprintf (dump_file
, "Insn %i estimated to %i bytes\n",
35155 INSN_UID (insn
), min_size
);
35157 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
35158 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
35166 start
= NEXT_INSN (start
);
35167 if ((JUMP_P (start
)
35168 && GET_CODE (PATTERN (start
)) != ADDR_VEC
35169 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
35171 njumps
--, isjump
= 1;
35174 nbytes
-= min_insn_size (start
);
35176 gcc_assert (njumps
>= 0);
35178 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
35179 INSN_UID (start
), INSN_UID (insn
), nbytes
);
35181 if (njumps
== 3 && isjump
&& nbytes
< 16)
35183 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
35186 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
35187 INSN_UID (insn
), padsize
);
35188 emit_insn_before (gen_pad (GEN_INT (padsize
)), insn
);
35194 /* AMD Athlon works faster
35195 when RET is not destination of conditional jump or directly preceded
35196 by other jump instruction. We avoid the penalty by inserting NOP just
35197 before the RET instructions in such cases. */
35199 ix86_pad_returns (void)
35204 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
35206 basic_block bb
= e
->src
;
35207 rtx ret
= BB_END (bb
);
35209 bool replace
= false;
35211 if (!JUMP_P (ret
) || !ANY_RETURN_P (PATTERN (ret
))
35212 || optimize_bb_for_size_p (bb
))
35214 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
35215 if (active_insn_p (prev
) || LABEL_P (prev
))
35217 if (prev
&& LABEL_P (prev
))
35222 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
35223 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
35224 && !(e
->flags
& EDGE_FALLTHRU
))
35229 prev
= prev_active_insn (ret
);
35231 && ((JUMP_P (prev
) && any_condjump_p (prev
))
35234 /* Empty functions get branch mispredict even when
35235 the jump destination is not visible to us. */
35236 if (!prev
&& !optimize_function_for_size_p (cfun
))
35241 emit_jump_insn_before (gen_simple_return_internal_long (), ret
);
35247 /* Count the minimum number of instructions in BB. Return 4 if the
35248 number of instructions >= 4. */
35251 ix86_count_insn_bb (basic_block bb
)
35254 int insn_count
= 0;
35256 /* Count number of instructions in this block. Return 4 if the number
35257 of instructions >= 4. */
35258 FOR_BB_INSNS (bb
, insn
)
35260 /* Only happen in exit blocks. */
35262 && ANY_RETURN_P (PATTERN (insn
)))
35265 if (NONDEBUG_INSN_P (insn
)
35266 && GET_CODE (PATTERN (insn
)) != USE
35267 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
35270 if (insn_count
>= 4)
35279 /* Count the minimum number of instructions in code path in BB.
35280 Return 4 if the number of instructions >= 4. */
35283 ix86_count_insn (basic_block bb
)
35287 int min_prev_count
;
35289 /* Only bother counting instructions along paths with no
35290 more than 2 basic blocks between entry and exit. Given
35291 that BB has an edge to exit, determine if a predecessor
35292 of BB has an edge from entry. If so, compute the number
35293 of instructions in the predecessor block. If there
35294 happen to be multiple such blocks, compute the minimum. */
35295 min_prev_count
= 4;
35296 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
35299 edge_iterator prev_ei
;
35301 if (e
->src
== ENTRY_BLOCK_PTR
)
35303 min_prev_count
= 0;
35306 FOR_EACH_EDGE (prev_e
, prev_ei
, e
->src
->preds
)
35308 if (prev_e
->src
== ENTRY_BLOCK_PTR
)
35310 int count
= ix86_count_insn_bb (e
->src
);
35311 if (count
< min_prev_count
)
35312 min_prev_count
= count
;
35318 if (min_prev_count
< 4)
35319 min_prev_count
+= ix86_count_insn_bb (bb
);
35321 return min_prev_count
;
35324 /* Pad short function to 4 instructions. */
35327 ix86_pad_short_function (void)
35332 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
35334 rtx ret
= BB_END (e
->src
);
35335 if (JUMP_P (ret
) && ANY_RETURN_P (PATTERN (ret
)))
35337 int insn_count
= ix86_count_insn (e
->src
);
35339 /* Pad short function. */
35340 if (insn_count
< 4)
35344 /* Find epilogue. */
35347 || NOTE_KIND (insn
) != NOTE_INSN_EPILOGUE_BEG
))
35348 insn
= PREV_INSN (insn
);
35353 /* Two NOPs count as one instruction. */
35354 insn_count
= 2 * (4 - insn_count
);
35355 emit_insn_before (gen_nops (GEN_INT (insn_count
)), insn
);
35361 /* Implement machine specific optimizations. We implement padding of returns
35362 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
35366 /* We are freeing block_for_insn in the toplev to keep compatibility
35367 with old MDEP_REORGS that are not CFG based. Recompute it now. */
35368 compute_bb_for_insn ();
35370 if (optimize
&& optimize_function_for_speed_p (cfun
))
35372 if (TARGET_PAD_SHORT_FUNCTION
)
35373 ix86_pad_short_function ();
35374 else if (TARGET_PAD_RETURNS
)
35375 ix86_pad_returns ();
35376 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
35377 if (TARGET_FOUR_JUMP_LIMIT
)
35378 ix86_avoid_jump_mispredicts ();
35383 /* Return nonzero when QImode register that must be represented via REX prefix
35386 x86_extended_QIreg_mentioned_p (rtx insn
)
35389 extract_insn_cached (insn
);
35390 for (i
= 0; i
< recog_data
.n_operands
; i
++)
35391 if (GENERAL_REG_P (recog_data
.operand
[i
])
35392 && !QI_REGNO_P (REGNO (recog_data
.operand
[i
])))
35397 /* Return nonzero when P points to register encoded via REX prefix.
35398 Called via for_each_rtx. */
35400 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
35402 unsigned int regno
;
35405 regno
= REGNO (*p
);
35406 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
35409 /* Return true when INSN mentions register that must be encoded using REX
35412 x86_extended_reg_mentioned_p (rtx insn
)
35414 return for_each_rtx (INSN_P (insn
) ? &PATTERN (insn
) : &insn
,
35415 extended_reg_mentioned_1
, NULL
);
35418 /* If profitable, negate (without causing overflow) integer constant
35419 of mode MODE at location LOC. Return true in this case. */
35421 x86_maybe_negate_const_int (rtx
*loc
, enum machine_mode mode
)
35425 if (!CONST_INT_P (*loc
))
35431 /* DImode x86_64 constants must fit in 32 bits. */
35432 gcc_assert (x86_64_immediate_operand (*loc
, mode
));
35443 gcc_unreachable ();
35446 /* Avoid overflows. */
35447 if (mode_signbit_p (mode
, *loc
))
35450 val
= INTVAL (*loc
);
35452 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
35453 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
35454 if ((val
< 0 && val
!= -128)
35457 *loc
= GEN_INT (-val
);
35464 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
35465 optabs would emit if we didn't have TFmode patterns. */
35468 x86_emit_floatuns (rtx operands
[2])
35470 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
35471 enum machine_mode mode
, inmode
;
35473 inmode
= GET_MODE (operands
[1]);
35474 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
35477 in
= force_reg (inmode
, operands
[1]);
35478 mode
= GET_MODE (out
);
35479 neglab
= gen_label_rtx ();
35480 donelab
= gen_label_rtx ();
35481 f0
= gen_reg_rtx (mode
);
35483 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
35485 expand_float (out
, in
, 0);
35487 emit_jump_insn (gen_jump (donelab
));
35490 emit_label (neglab
);
35492 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
35494 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
35496 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
35498 expand_float (f0
, i0
, 0);
35500 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
35502 emit_label (donelab
);
35505 /* AVX2 does support 32-byte integer vector operations,
35506 thus the longest vector we are faced with is V32QImode. */
35507 #define MAX_VECT_LEN 32
35509 struct expand_vec_perm_d
35511 rtx target
, op0
, op1
;
35512 unsigned char perm
[MAX_VECT_LEN
];
35513 enum machine_mode vmode
;
35514 unsigned char nelt
;
35515 bool one_operand_p
;
35519 static bool canonicalize_perm (struct expand_vec_perm_d
*d
);
35520 static bool expand_vec_perm_1 (struct expand_vec_perm_d
*d
);
35521 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
);
35523 /* Get a vector mode of the same size as the original but with elements
35524 twice as wide. This is only guaranteed to apply to integral vectors. */
35526 static inline enum machine_mode
35527 get_mode_wider_vector (enum machine_mode o
)
35529 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
35530 enum machine_mode n
= GET_MODE_WIDER_MODE (o
);
35531 gcc_assert (GET_MODE_NUNITS (o
) == GET_MODE_NUNITS (n
) * 2);
35532 gcc_assert (GET_MODE_SIZE (o
) == GET_MODE_SIZE (n
));
35536 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
35537 with all elements equal to VAR. Return true if successful. */
35540 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
35541 rtx target
, rtx val
)
35564 /* First attempt to recognize VAL as-is. */
35565 dup
= gen_rtx_VEC_DUPLICATE (mode
, val
);
35566 insn
= emit_insn (gen_rtx_SET (VOIDmode
, target
, dup
));
35567 if (recog_memoized (insn
) < 0)
35570 /* If that fails, force VAL into a register. */
35573 XEXP (dup
, 0) = force_reg (GET_MODE_INNER (mode
), val
);
35574 seq
= get_insns ();
35577 emit_insn_before (seq
, insn
);
35579 ok
= recog_memoized (insn
) >= 0;
35588 if (TARGET_SSE
|| TARGET_3DNOW_A
)
35592 val
= gen_lowpart (SImode
, val
);
35593 x
= gen_rtx_TRUNCATE (HImode
, val
);
35594 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
35595 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
35608 struct expand_vec_perm_d dperm
;
35612 memset (&dperm
, 0, sizeof (dperm
));
35613 dperm
.target
= target
;
35614 dperm
.vmode
= mode
;
35615 dperm
.nelt
= GET_MODE_NUNITS (mode
);
35616 dperm
.op0
= dperm
.op1
= gen_reg_rtx (mode
);
35617 dperm
.one_operand_p
= true;
35619 /* Extend to SImode using a paradoxical SUBREG. */
35620 tmp1
= gen_reg_rtx (SImode
);
35621 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
35623 /* Insert the SImode value as low element of a V4SImode vector. */
35624 tmp2
= gen_lowpart (V4SImode
, dperm
.op0
);
35625 emit_insn (gen_vec_setv4si_0 (tmp2
, CONST0_RTX (V4SImode
), tmp1
));
35627 ok
= (expand_vec_perm_1 (&dperm
)
35628 || expand_vec_perm_broadcast_1 (&dperm
));
35640 /* Replicate the value once into the next wider mode and recurse. */
35642 enum machine_mode smode
, wsmode
, wvmode
;
35645 smode
= GET_MODE_INNER (mode
);
35646 wvmode
= get_mode_wider_vector (mode
);
35647 wsmode
= GET_MODE_INNER (wvmode
);
35649 val
= convert_modes (wsmode
, smode
, val
, true);
35650 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
35651 GEN_INT (GET_MODE_BITSIZE (smode
)),
35652 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
35653 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
35655 x
= gen_lowpart (wvmode
, target
);
35656 ok
= ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
);
35664 enum machine_mode hvmode
= (mode
== V16HImode
? V8HImode
: V16QImode
);
35665 rtx x
= gen_reg_rtx (hvmode
);
35667 ok
= ix86_expand_vector_init_duplicate (false, hvmode
, x
, val
);
35670 x
= gen_rtx_VEC_CONCAT (mode
, x
, x
);
35671 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
35680 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
35681 whose ONE_VAR element is VAR, and other elements are zero. Return true
35685 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
35686 rtx target
, rtx var
, int one_var
)
35688 enum machine_mode vsimode
;
35691 bool use_vector_set
= false;
35696 /* For SSE4.1, we normally use vector set. But if the second
35697 element is zero and inter-unit moves are OK, we use movq
35699 use_vector_set
= (TARGET_64BIT
35701 && !(TARGET_INTER_UNIT_MOVES
35707 use_vector_set
= TARGET_SSE4_1
;
35710 use_vector_set
= TARGET_SSE2
;
35713 use_vector_set
= TARGET_SSE
|| TARGET_3DNOW_A
;
35720 use_vector_set
= TARGET_AVX
;
35723 /* Use ix86_expand_vector_set in 64bit mode only. */
35724 use_vector_set
= TARGET_AVX
&& TARGET_64BIT
;
35730 if (use_vector_set
)
35732 emit_insn (gen_rtx_SET (VOIDmode
, target
, CONST0_RTX (mode
)));
35733 var
= force_reg (GET_MODE_INNER (mode
), var
);
35734 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
35750 var
= force_reg (GET_MODE_INNER (mode
), var
);
35751 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
35752 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
35757 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
35758 new_target
= gen_reg_rtx (mode
);
35760 new_target
= target
;
35761 var
= force_reg (GET_MODE_INNER (mode
), var
);
35762 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
35763 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
35764 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
35767 /* We need to shuffle the value to the correct position, so
35768 create a new pseudo to store the intermediate result. */
35770 /* With SSE2, we can use the integer shuffle insns. */
35771 if (mode
!= V4SFmode
&& TARGET_SSE2
)
35773 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
35775 GEN_INT (one_var
== 1 ? 0 : 1),
35776 GEN_INT (one_var
== 2 ? 0 : 1),
35777 GEN_INT (one_var
== 3 ? 0 : 1)));
35778 if (target
!= new_target
)
35779 emit_move_insn (target
, new_target
);
35783 /* Otherwise convert the intermediate result to V4SFmode and
35784 use the SSE1 shuffle instructions. */
35785 if (mode
!= V4SFmode
)
35787 tmp
= gen_reg_rtx (V4SFmode
);
35788 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
35793 emit_insn (gen_sse_shufps_v4sf (tmp
, tmp
, tmp
,
35795 GEN_INT (one_var
== 1 ? 0 : 1),
35796 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
35797 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
35799 if (mode
!= V4SFmode
)
35800 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
35801 else if (tmp
!= target
)
35802 emit_move_insn (target
, tmp
);
35804 else if (target
!= new_target
)
35805 emit_move_insn (target
, new_target
);
35810 vsimode
= V4SImode
;
35816 vsimode
= V2SImode
;
35822 /* Zero extend the variable element to SImode and recurse. */
35823 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
35825 x
= gen_reg_rtx (vsimode
);
35826 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
35828 gcc_unreachable ();
35830 emit_move_insn (target
, gen_lowpart (mode
, x
));
35838 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
35839 consisting of the values in VALS. It is known that all elements
35840 except ONE_VAR are constants. Return true if successful. */
35843 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
35844 rtx target
, rtx vals
, int one_var
)
35846 rtx var
= XVECEXP (vals
, 0, one_var
);
35847 enum machine_mode wmode
;
35850 const_vec
= copy_rtx (vals
);
35851 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
35852 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
35860 /* For the two element vectors, it's just as easy to use
35861 the general case. */
35865 /* Use ix86_expand_vector_set in 64bit mode only. */
35888 /* There's no way to set one QImode entry easily. Combine
35889 the variable value with its adjacent constant value, and
35890 promote to an HImode set. */
35891 x
= XVECEXP (vals
, 0, one_var
^ 1);
35894 var
= convert_modes (HImode
, QImode
, var
, true);
35895 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
35896 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
35897 x
= GEN_INT (INTVAL (x
) & 0xff);
35901 var
= convert_modes (HImode
, QImode
, var
, true);
35902 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
35904 if (x
!= const0_rtx
)
35905 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
35906 1, OPTAB_LIB_WIDEN
);
35908 x
= gen_reg_rtx (wmode
);
35909 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
35910 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
35912 emit_move_insn (target
, gen_lowpart (mode
, x
));
35919 emit_move_insn (target
, const_vec
);
35920 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
35924 /* A subroutine of ix86_expand_vector_init_general. Use vector
35925 concatenate to handle the most general case: all values variable,
35926 and none identical. */
35929 ix86_expand_vector_init_concat (enum machine_mode mode
,
35930 rtx target
, rtx
*ops
, int n
)
35932 enum machine_mode cmode
, hmode
= VOIDmode
;
35933 rtx first
[8], second
[4];
35973 gcc_unreachable ();
35976 if (!register_operand (ops
[1], cmode
))
35977 ops
[1] = force_reg (cmode
, ops
[1]);
35978 if (!register_operand (ops
[0], cmode
))
35979 ops
[0] = force_reg (cmode
, ops
[0]);
35980 emit_insn (gen_rtx_SET (VOIDmode
, target
,
35981 gen_rtx_VEC_CONCAT (mode
, ops
[0],
36001 gcc_unreachable ();
36017 gcc_unreachable ();
36022 /* FIXME: We process inputs backward to help RA. PR 36222. */
36025 for (; i
> 0; i
-= 2, j
--)
36027 first
[j
] = gen_reg_rtx (cmode
);
36028 v
= gen_rtvec (2, ops
[i
- 1], ops
[i
]);
36029 ix86_expand_vector_init (false, first
[j
],
36030 gen_rtx_PARALLEL (cmode
, v
));
36036 gcc_assert (hmode
!= VOIDmode
);
36037 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
36039 second
[j
] = gen_reg_rtx (hmode
);
36040 ix86_expand_vector_init_concat (hmode
, second
[j
],
36044 ix86_expand_vector_init_concat (mode
, target
, second
, n
);
36047 ix86_expand_vector_init_concat (mode
, target
, first
, n
);
36051 gcc_unreachable ();
36055 /* A subroutine of ix86_expand_vector_init_general. Use vector
36056 interleave to handle the most general case: all values variable,
36057 and none identical. */
36060 ix86_expand_vector_init_interleave (enum machine_mode mode
,
36061 rtx target
, rtx
*ops
, int n
)
36063 enum machine_mode first_imode
, second_imode
, third_imode
, inner_mode
;
36066 rtx (*gen_load_even
) (rtx
, rtx
, rtx
);
36067 rtx (*gen_interleave_first_low
) (rtx
, rtx
, rtx
);
36068 rtx (*gen_interleave_second_low
) (rtx
, rtx
, rtx
);
36073 gen_load_even
= gen_vec_setv8hi
;
36074 gen_interleave_first_low
= gen_vec_interleave_lowv4si
;
36075 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
36076 inner_mode
= HImode
;
36077 first_imode
= V4SImode
;
36078 second_imode
= V2DImode
;
36079 third_imode
= VOIDmode
;
36082 gen_load_even
= gen_vec_setv16qi
;
36083 gen_interleave_first_low
= gen_vec_interleave_lowv8hi
;
36084 gen_interleave_second_low
= gen_vec_interleave_lowv4si
;
36085 inner_mode
= QImode
;
36086 first_imode
= V8HImode
;
36087 second_imode
= V4SImode
;
36088 third_imode
= V2DImode
;
36091 gcc_unreachable ();
36094 for (i
= 0; i
< n
; i
++)
36096 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
36097 op0
= gen_reg_rtx (SImode
);
36098 emit_move_insn (op0
, gen_lowpart (SImode
, ops
[i
+ i
]));
36100 /* Insert the SImode value as low element of V4SImode vector. */
36101 op1
= gen_reg_rtx (V4SImode
);
36102 op0
= gen_rtx_VEC_MERGE (V4SImode
,
36103 gen_rtx_VEC_DUPLICATE (V4SImode
,
36105 CONST0_RTX (V4SImode
),
36107 emit_insn (gen_rtx_SET (VOIDmode
, op1
, op0
));
36109 /* Cast the V4SImode vector back to a vector in orignal mode. */
36110 op0
= gen_reg_rtx (mode
);
36111 emit_move_insn (op0
, gen_lowpart (mode
, op1
));
36113 /* Load even elements into the second positon. */
36114 emit_insn (gen_load_even (op0
,
36115 force_reg (inner_mode
,
36119 /* Cast vector to FIRST_IMODE vector. */
36120 ops
[i
] = gen_reg_rtx (first_imode
);
36121 emit_move_insn (ops
[i
], gen_lowpart (first_imode
, op0
));
36124 /* Interleave low FIRST_IMODE vectors. */
36125 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
36127 op0
= gen_reg_rtx (first_imode
);
36128 emit_insn (gen_interleave_first_low (op0
, ops
[i
], ops
[i
+ 1]));
36130 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
36131 ops
[j
] = gen_reg_rtx (second_imode
);
36132 emit_move_insn (ops
[j
], gen_lowpart (second_imode
, op0
));
36135 /* Interleave low SECOND_IMODE vectors. */
36136 switch (second_imode
)
36139 for (i
= j
= 0; i
< n
/ 2; i
+= 2, j
++)
36141 op0
= gen_reg_rtx (second_imode
);
36142 emit_insn (gen_interleave_second_low (op0
, ops
[i
],
36145 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
36147 ops
[j
] = gen_reg_rtx (third_imode
);
36148 emit_move_insn (ops
[j
], gen_lowpart (third_imode
, op0
));
36150 second_imode
= V2DImode
;
36151 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
36155 op0
= gen_reg_rtx (second_imode
);
36156 emit_insn (gen_interleave_second_low (op0
, ops
[0],
36159 /* Cast the SECOND_IMODE vector back to a vector on original
36161 emit_insn (gen_rtx_SET (VOIDmode
, target
,
36162 gen_lowpart (mode
, op0
)));
36166 gcc_unreachable ();
36170 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
36171 all values variable, and none identical. */
36174 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
36175 rtx target
, rtx vals
)
36177 rtx ops
[32], op0
, op1
;
36178 enum machine_mode half_mode
= VOIDmode
;
36185 if (!mmx_ok
&& !TARGET_SSE
)
36197 n
= GET_MODE_NUNITS (mode
);
36198 for (i
= 0; i
< n
; i
++)
36199 ops
[i
] = XVECEXP (vals
, 0, i
);
36200 ix86_expand_vector_init_concat (mode
, target
, ops
, n
);
36204 half_mode
= V16QImode
;
36208 half_mode
= V8HImode
;
36212 n
= GET_MODE_NUNITS (mode
);
36213 for (i
= 0; i
< n
; i
++)
36214 ops
[i
] = XVECEXP (vals
, 0, i
);
36215 op0
= gen_reg_rtx (half_mode
);
36216 op1
= gen_reg_rtx (half_mode
);
36217 ix86_expand_vector_init_interleave (half_mode
, op0
, ops
,
36219 ix86_expand_vector_init_interleave (half_mode
, op1
,
36220 &ops
[n
>> 1], n
>> 2);
36221 emit_insn (gen_rtx_SET (VOIDmode
, target
,
36222 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
36226 if (!TARGET_SSE4_1
)
36234 /* Don't use ix86_expand_vector_init_interleave if we can't
36235 move from GPR to SSE register directly. */
36236 if (!TARGET_INTER_UNIT_MOVES
)
36239 n
= GET_MODE_NUNITS (mode
);
36240 for (i
= 0; i
< n
; i
++)
36241 ops
[i
] = XVECEXP (vals
, 0, i
);
36242 ix86_expand_vector_init_interleave (mode
, target
, ops
, n
>> 1);
36250 gcc_unreachable ();
36254 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
36255 enum machine_mode inner_mode
;
36256 rtx words
[4], shift
;
36258 inner_mode
= GET_MODE_INNER (mode
);
36259 n_elts
= GET_MODE_NUNITS (mode
);
36260 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
36261 n_elt_per_word
= n_elts
/ n_words
;
36262 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
36264 for (i
= 0; i
< n_words
; ++i
)
36266 rtx word
= NULL_RTX
;
36268 for (j
= 0; j
< n_elt_per_word
; ++j
)
36270 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
36271 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
36277 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
36278 word
, 1, OPTAB_LIB_WIDEN
);
36279 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
36280 word
, 1, OPTAB_LIB_WIDEN
);
36288 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
36289 else if (n_words
== 2)
36291 rtx tmp
= gen_reg_rtx (mode
);
36292 emit_clobber (tmp
);
36293 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
36294 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
36295 emit_move_insn (target
, tmp
);
36297 else if (n_words
== 4)
36299 rtx tmp
= gen_reg_rtx (V4SImode
);
36300 gcc_assert (word_mode
== SImode
);
36301 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
36302 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
36303 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
36306 gcc_unreachable ();
36310 /* Initialize vector TARGET via VALS. Suppress the use of MMX
36311 instructions unless MMX_OK is true. */
36314 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
36316 enum machine_mode mode
= GET_MODE (target
);
36317 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
36318 int n_elts
= GET_MODE_NUNITS (mode
);
36319 int n_var
= 0, one_var
= -1;
36320 bool all_same
= true, all_const_zero
= true;
36324 for (i
= 0; i
< n_elts
; ++i
)
36326 x
= XVECEXP (vals
, 0, i
);
36327 if (!(CONST_INT_P (x
)
36328 || GET_CODE (x
) == CONST_DOUBLE
36329 || GET_CODE (x
) == CONST_FIXED
))
36330 n_var
++, one_var
= i
;
36331 else if (x
!= CONST0_RTX (inner_mode
))
36332 all_const_zero
= false;
36333 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
36337 /* Constants are best loaded from the constant pool. */
36340 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
36344 /* If all values are identical, broadcast the value. */
36346 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
36347 XVECEXP (vals
, 0, 0)))
36350 /* Values where only one field is non-constant are best loaded from
36351 the pool and overwritten via move later. */
36355 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
36356 XVECEXP (vals
, 0, one_var
),
36360 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
36364 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
36368 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
36370 enum machine_mode mode
= GET_MODE (target
);
36371 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
36372 enum machine_mode half_mode
;
36373 bool use_vec_merge
= false;
36375 static rtx (*gen_extract
[6][2]) (rtx
, rtx
)
36377 { gen_vec_extract_lo_v32qi
, gen_vec_extract_hi_v32qi
},
36378 { gen_vec_extract_lo_v16hi
, gen_vec_extract_hi_v16hi
},
36379 { gen_vec_extract_lo_v8si
, gen_vec_extract_hi_v8si
},
36380 { gen_vec_extract_lo_v4di
, gen_vec_extract_hi_v4di
},
36381 { gen_vec_extract_lo_v8sf
, gen_vec_extract_hi_v8sf
},
36382 { gen_vec_extract_lo_v4df
, gen_vec_extract_hi_v4df
}
36384 static rtx (*gen_insert
[6][2]) (rtx
, rtx
, rtx
)
36386 { gen_vec_set_lo_v32qi
, gen_vec_set_hi_v32qi
},
36387 { gen_vec_set_lo_v16hi
, gen_vec_set_hi_v16hi
},
36388 { gen_vec_set_lo_v8si
, gen_vec_set_hi_v8si
},
36389 { gen_vec_set_lo_v4di
, gen_vec_set_hi_v4di
},
36390 { gen_vec_set_lo_v8sf
, gen_vec_set_hi_v8sf
},
36391 { gen_vec_set_lo_v4df
, gen_vec_set_hi_v4df
}
36401 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
36402 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
36404 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
36406 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
36407 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
36413 use_vec_merge
= TARGET_SSE4_1
&& TARGET_64BIT
;
36417 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
36418 ix86_expand_vector_extract (false, tmp
, target
, 1 - elt
);
36420 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
36422 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
36423 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
36430 /* For the two element vectors, we implement a VEC_CONCAT with
36431 the extraction of the other element. */
36433 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
36434 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
36437 op0
= val
, op1
= tmp
;
36439 op0
= tmp
, op1
= val
;
36441 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
36442 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
36447 use_vec_merge
= TARGET_SSE4_1
;
36454 use_vec_merge
= true;
36458 /* tmp = target = A B C D */
36459 tmp
= copy_to_reg (target
);
36460 /* target = A A B B */
36461 emit_insn (gen_vec_interleave_lowv4sf (target
, target
, target
));
36462 /* target = X A B B */
36463 ix86_expand_vector_set (false, target
, val
, 0);
36464 /* target = A X C D */
36465 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
36466 const1_rtx
, const0_rtx
,
36467 GEN_INT (2+4), GEN_INT (3+4)));
36471 /* tmp = target = A B C D */
36472 tmp
= copy_to_reg (target
);
36473 /* tmp = X B C D */
36474 ix86_expand_vector_set (false, tmp
, val
, 0);
36475 /* target = A B X D */
36476 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
36477 const0_rtx
, const1_rtx
,
36478 GEN_INT (0+4), GEN_INT (3+4)));
36482 /* tmp = target = A B C D */
36483 tmp
= copy_to_reg (target
);
36484 /* tmp = X B C D */
36485 ix86_expand_vector_set (false, tmp
, val
, 0);
36486 /* target = A B X D */
36487 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
36488 const0_rtx
, const1_rtx
,
36489 GEN_INT (2+4), GEN_INT (0+4)));
36493 gcc_unreachable ();
36498 use_vec_merge
= TARGET_SSE4_1
;
36502 /* Element 0 handled by vec_merge below. */
36505 use_vec_merge
= true;
36511 /* With SSE2, use integer shuffles to swap element 0 and ELT,
36512 store into element 0, then shuffle them back. */
36516 order
[0] = GEN_INT (elt
);
36517 order
[1] = const1_rtx
;
36518 order
[2] = const2_rtx
;
36519 order
[3] = GEN_INT (3);
36520 order
[elt
] = const0_rtx
;
36522 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
36523 order
[1], order
[2], order
[3]));
36525 ix86_expand_vector_set (false, target
, val
, 0);
36527 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
36528 order
[1], order
[2], order
[3]));
36532 /* For SSE1, we have to reuse the V4SF code. */
36533 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
36534 gen_lowpart (SFmode
, val
), elt
);
36539 use_vec_merge
= TARGET_SSE2
;
36542 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
36546 use_vec_merge
= TARGET_SSE4_1
;
36553 half_mode
= V16QImode
;
36559 half_mode
= V8HImode
;
36565 half_mode
= V4SImode
;
36571 half_mode
= V2DImode
;
36577 half_mode
= V4SFmode
;
36583 half_mode
= V2DFmode
;
36589 /* Compute offset. */
36593 gcc_assert (i
<= 1);
36595 /* Extract the half. */
36596 tmp
= gen_reg_rtx (half_mode
);
36597 emit_insn (gen_extract
[j
][i
] (tmp
, target
));
36599 /* Put val in tmp at elt. */
36600 ix86_expand_vector_set (false, tmp
, val
, elt
);
36603 emit_insn (gen_insert
[j
][i
] (target
, target
, tmp
));
36612 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
36613 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
36614 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
36618 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
36620 emit_move_insn (mem
, target
);
36622 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
36623 emit_move_insn (tmp
, val
);
36625 emit_move_insn (target
, mem
);
36630 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
36632 enum machine_mode mode
= GET_MODE (vec
);
36633 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
36634 bool use_vec_extr
= false;
36647 use_vec_extr
= true;
36651 use_vec_extr
= TARGET_SSE4_1
;
36663 tmp
= gen_reg_rtx (mode
);
36664 emit_insn (gen_sse_shufps_v4sf (tmp
, vec
, vec
,
36665 GEN_INT (elt
), GEN_INT (elt
),
36666 GEN_INT (elt
+4), GEN_INT (elt
+4)));
36670 tmp
= gen_reg_rtx (mode
);
36671 emit_insn (gen_vec_interleave_highv4sf (tmp
, vec
, vec
));
36675 gcc_unreachable ();
36678 use_vec_extr
= true;
36683 use_vec_extr
= TARGET_SSE4_1
;
36697 tmp
= gen_reg_rtx (mode
);
36698 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
36699 GEN_INT (elt
), GEN_INT (elt
),
36700 GEN_INT (elt
), GEN_INT (elt
)));
36704 tmp
= gen_reg_rtx (mode
);
36705 emit_insn (gen_vec_interleave_highv4si (tmp
, vec
, vec
));
36709 gcc_unreachable ();
36712 use_vec_extr
= true;
36717 /* For SSE1, we have to reuse the V4SF code. */
36718 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
36719 gen_lowpart (V4SFmode
, vec
), elt
);
36725 use_vec_extr
= TARGET_SSE2
;
36728 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
36732 use_vec_extr
= TARGET_SSE4_1
;
36738 tmp
= gen_reg_rtx (V4SFmode
);
36740 emit_insn (gen_vec_extract_lo_v8sf (tmp
, vec
));
36742 emit_insn (gen_vec_extract_hi_v8sf (tmp
, vec
));
36743 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
36751 tmp
= gen_reg_rtx (V2DFmode
);
36753 emit_insn (gen_vec_extract_lo_v4df (tmp
, vec
));
36755 emit_insn (gen_vec_extract_hi_v4df (tmp
, vec
));
36756 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
36764 tmp
= gen_reg_rtx (V16QImode
);
36766 emit_insn (gen_vec_extract_lo_v32qi (tmp
, vec
));
36768 emit_insn (gen_vec_extract_hi_v32qi (tmp
, vec
));
36769 ix86_expand_vector_extract (false, target
, tmp
, elt
& 15);
36777 tmp
= gen_reg_rtx (V8HImode
);
36779 emit_insn (gen_vec_extract_lo_v16hi (tmp
, vec
));
36781 emit_insn (gen_vec_extract_hi_v16hi (tmp
, vec
));
36782 ix86_expand_vector_extract (false, target
, tmp
, elt
& 7);
36790 tmp
= gen_reg_rtx (V4SImode
);
36792 emit_insn (gen_vec_extract_lo_v8si (tmp
, vec
));
36794 emit_insn (gen_vec_extract_hi_v8si (tmp
, vec
));
36795 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
36803 tmp
= gen_reg_rtx (V2DImode
);
36805 emit_insn (gen_vec_extract_lo_v4di (tmp
, vec
));
36807 emit_insn (gen_vec_extract_hi_v4di (tmp
, vec
));
36808 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
36814 /* ??? Could extract the appropriate HImode element and shift. */
36821 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
36822 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
36824 /* Let the rtl optimizers know about the zero extension performed. */
36825 if (inner_mode
== QImode
|| inner_mode
== HImode
)
36827 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
36828 target
= gen_lowpart (SImode
, target
);
36831 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
36835 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
36837 emit_move_insn (mem
, vec
);
36839 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
36840 emit_move_insn (target
, tmp
);
36844 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
36845 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
36846 The upper bits of DEST are undefined, though they shouldn't cause
36847 exceptions (some bits from src or all zeros are ok). */
36850 emit_reduc_half (rtx dest
, rtx src
, int i
)
36853 switch (GET_MODE (src
))
36857 tem
= gen_sse_movhlps (dest
, src
, src
);
36859 tem
= gen_sse_shufps_v4sf (dest
, src
, src
, const1_rtx
, const1_rtx
,
36860 GEN_INT (1 + 4), GEN_INT (1 + 4));
36863 tem
= gen_vec_interleave_highv2df (dest
, src
, src
);
36869 tem
= gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, dest
),
36870 gen_lowpart (V1TImode
, src
),
36875 tem
= gen_avx_vperm2f128v8sf3 (dest
, src
, src
, const1_rtx
);
36877 tem
= gen_avx_shufps256 (dest
, src
, src
,
36878 GEN_INT (i
== 128 ? 2 + (3 << 2) : 1));
36882 tem
= gen_avx_vperm2f128v4df3 (dest
, src
, src
, const1_rtx
);
36884 tem
= gen_avx_shufpd256 (dest
, src
, src
, const1_rtx
);
36891 tem
= gen_avx2_permv2ti (gen_lowpart (V4DImode
, dest
),
36892 gen_lowpart (V4DImode
, src
),
36893 gen_lowpart (V4DImode
, src
),
36896 tem
= gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode
, dest
),
36897 gen_lowpart (V2TImode
, src
),
36901 gcc_unreachable ();
36906 /* Expand a vector reduction. FN is the binary pattern to reduce;
36907 DEST is the destination; IN is the input vector. */
36910 ix86_expand_reduc (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
36912 rtx half
, dst
, vec
= in
;
36913 enum machine_mode mode
= GET_MODE (in
);
36916 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
36918 && mode
== V8HImode
36919 && fn
== gen_uminv8hi3
)
36921 emit_insn (gen_sse4_1_phminposuw (dest
, in
));
36925 for (i
= GET_MODE_BITSIZE (mode
);
36926 i
> GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
36929 half
= gen_reg_rtx (mode
);
36930 emit_reduc_half (half
, vec
, i
);
36931 if (i
== GET_MODE_BITSIZE (GET_MODE_INNER (mode
)) * 2)
36934 dst
= gen_reg_rtx (mode
);
36935 emit_insn (fn (dst
, half
, vec
));
36940 /* Target hook for scalar_mode_supported_p. */
36942 ix86_scalar_mode_supported_p (enum machine_mode mode
)
36944 if (DECIMAL_FLOAT_MODE_P (mode
))
36945 return default_decimal_float_supported_p ();
36946 else if (mode
== TFmode
)
36949 return default_scalar_mode_supported_p (mode
);
36952 /* Implements target hook vector_mode_supported_p. */
36954 ix86_vector_mode_supported_p (enum machine_mode mode
)
36956 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
36958 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
36960 if (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
36962 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
36964 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
36969 /* Target hook for c_mode_for_suffix. */
36970 static enum machine_mode
36971 ix86_c_mode_for_suffix (char suffix
)
36981 /* Worker function for TARGET_MD_ASM_CLOBBERS.
36983 We do this in the new i386 backend to maintain source compatibility
36984 with the old cc0-based compiler. */
36987 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
36988 tree inputs ATTRIBUTE_UNUSED
,
36991 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
36993 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
36998 /* Implements target vector targetm.asm.encode_section_info. */
37000 static void ATTRIBUTE_UNUSED
37001 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
37003 default_encode_section_info (decl
, rtl
, first
);
37005 if (TREE_CODE (decl
) == VAR_DECL
37006 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
37007 && ix86_in_large_data_p (decl
))
37008 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
37011 /* Worker function for REVERSE_CONDITION. */
37014 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
37016 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
37017 ? reverse_condition (code
)
37018 : reverse_condition_maybe_unordered (code
));
37021 /* Output code to perform an x87 FP register move, from OPERANDS[1]
37025 output_387_reg_move (rtx insn
, rtx
*operands
)
37027 if (REG_P (operands
[0]))
37029 if (REG_P (operands
[1])
37030 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
37032 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
37033 return output_387_ffreep (operands
, 0);
37034 return "fstp\t%y0";
37036 if (STACK_TOP_P (operands
[0]))
37037 return "fld%Z1\t%y1";
37040 else if (MEM_P (operands
[0]))
37042 gcc_assert (REG_P (operands
[1]));
37043 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
37044 return "fstp%Z0\t%y0";
37047 /* There is no non-popping store to memory for XFmode.
37048 So if we need one, follow the store with a load. */
37049 if (GET_MODE (operands
[0]) == XFmode
)
37050 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
37052 return "fst%Z0\t%y0";
37059 /* Output code to perform a conditional jump to LABEL, if C2 flag in
37060 FP status register is set. */
37063 ix86_emit_fp_unordered_jump (rtx label
)
37065 rtx reg
= gen_reg_rtx (HImode
);
37068 emit_insn (gen_x86_fnstsw_1 (reg
));
37070 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
37072 emit_insn (gen_x86_sahf_1 (reg
));
37074 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
37075 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
37079 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
37081 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
37082 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
37085 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
37086 gen_rtx_LABEL_REF (VOIDmode
, label
),
37088 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
37090 emit_jump_insn (temp
);
37091 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
37094 /* Output code to perform a log1p XFmode calculation. */
37096 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
37098 rtx label1
= gen_label_rtx ();
37099 rtx label2
= gen_label_rtx ();
37101 rtx tmp
= gen_reg_rtx (XFmode
);
37102 rtx tmp2
= gen_reg_rtx (XFmode
);
37105 emit_insn (gen_absxf2 (tmp
, op1
));
37106 test
= gen_rtx_GE (VOIDmode
, tmp
,
37107 CONST_DOUBLE_FROM_REAL_VALUE (
37108 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
37110 emit_jump_insn (gen_cbranchxf4 (test
, XEXP (test
, 0), XEXP (test
, 1), label1
));
37112 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
37113 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
37114 emit_jump (label2
);
37116 emit_label (label1
);
37117 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
37118 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
37119 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
37120 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
37122 emit_label (label2
);
37125 /* Emit code for round calculation. */
37126 void ix86_emit_i387_round (rtx op0
, rtx op1
)
37128 enum machine_mode inmode
= GET_MODE (op1
);
37129 enum machine_mode outmode
= GET_MODE (op0
);
37130 rtx e1
, e2
, res
, tmp
, tmp1
, half
;
37131 rtx scratch
= gen_reg_rtx (HImode
);
37132 rtx flags
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
37133 rtx jump_label
= gen_label_rtx ();
37135 rtx (*gen_abs
) (rtx
, rtx
);
37136 rtx (*gen_neg
) (rtx
, rtx
);
37141 gen_abs
= gen_abssf2
;
37144 gen_abs
= gen_absdf2
;
37147 gen_abs
= gen_absxf2
;
37150 gcc_unreachable ();
37156 gen_neg
= gen_negsf2
;
37159 gen_neg
= gen_negdf2
;
37162 gen_neg
= gen_negxf2
;
37165 gen_neg
= gen_neghi2
;
37168 gen_neg
= gen_negsi2
;
37171 gen_neg
= gen_negdi2
;
37174 gcc_unreachable ();
37177 e1
= gen_reg_rtx (inmode
);
37178 e2
= gen_reg_rtx (inmode
);
37179 res
= gen_reg_rtx (outmode
);
37181 half
= CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf
, inmode
);
37183 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
37185 /* scratch = fxam(op1) */
37186 emit_insn (gen_rtx_SET (VOIDmode
, scratch
,
37187 gen_rtx_UNSPEC (HImode
, gen_rtvec (1, op1
),
37189 /* e1 = fabs(op1) */
37190 emit_insn (gen_abs (e1
, op1
));
37192 /* e2 = e1 + 0.5 */
37193 half
= force_reg (inmode
, half
);
37194 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
37195 gen_rtx_PLUS (inmode
, e1
, half
)));
37197 /* res = floor(e2) */
37198 if (inmode
!= XFmode
)
37200 tmp1
= gen_reg_rtx (XFmode
);
37202 emit_insn (gen_rtx_SET (VOIDmode
, tmp1
,
37203 gen_rtx_FLOAT_EXTEND (XFmode
, e2
)));
37213 rtx tmp0
= gen_reg_rtx (XFmode
);
37215 emit_insn (gen_frndintxf2_floor (tmp0
, tmp1
));
37217 emit_insn (gen_rtx_SET (VOIDmode
, res
,
37218 gen_rtx_UNSPEC (outmode
, gen_rtvec (1, tmp0
),
37219 UNSPEC_TRUNC_NOOP
)));
37223 emit_insn (gen_frndintxf2_floor (res
, tmp1
));
37226 emit_insn (gen_lfloorxfhi2 (res
, tmp1
));
37229 emit_insn (gen_lfloorxfsi2 (res
, tmp1
));
37232 emit_insn (gen_lfloorxfdi2 (res
, tmp1
));
37235 gcc_unreachable ();
37238 /* flags = signbit(a) */
37239 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x02)));
37241 /* if (flags) then res = -res */
37242 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
,
37243 gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
),
37244 gen_rtx_LABEL_REF (VOIDmode
, jump_label
),
37246 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
37247 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
37248 JUMP_LABEL (insn
) = jump_label
;
37250 emit_insn (gen_neg (res
, res
));
37252 emit_label (jump_label
);
37253 LABEL_NUSES (jump_label
) = 1;
37255 emit_move_insn (op0
, res
);
37258 /* Output code to perform a Newton-Rhapson approximation of a single precision
37259 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
37261 void ix86_emit_swdivsf (rtx res
, rtx a
, rtx b
, enum machine_mode mode
)
37263 rtx x0
, x1
, e0
, e1
;
37265 x0
= gen_reg_rtx (mode
);
37266 e0
= gen_reg_rtx (mode
);
37267 e1
= gen_reg_rtx (mode
);
37268 x1
= gen_reg_rtx (mode
);
37270 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
37272 b
= force_reg (mode
, b
);
37274 /* x0 = rcp(b) estimate */
37275 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
37276 gen_rtx_UNSPEC (mode
, gen_rtvec (1, b
),
37279 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
37280 gen_rtx_MULT (mode
, x0
, b
)));
37283 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
37284 gen_rtx_MULT (mode
, x0
, e0
)));
37287 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
37288 gen_rtx_PLUS (mode
, x0
, x0
)));
37291 emit_insn (gen_rtx_SET (VOIDmode
, x1
,
37292 gen_rtx_MINUS (mode
, e1
, e0
)));
37295 emit_insn (gen_rtx_SET (VOIDmode
, res
,
37296 gen_rtx_MULT (mode
, a
, x1
)));
37299 /* Output code to perform a Newton-Rhapson approximation of a
37300 single precision floating point [reciprocal] square root. */
37302 void ix86_emit_swsqrtsf (rtx res
, rtx a
, enum machine_mode mode
,
37305 rtx x0
, e0
, e1
, e2
, e3
, mthree
, mhalf
;
37308 x0
= gen_reg_rtx (mode
);
37309 e0
= gen_reg_rtx (mode
);
37310 e1
= gen_reg_rtx (mode
);
37311 e2
= gen_reg_rtx (mode
);
37312 e3
= gen_reg_rtx (mode
);
37314 real_from_integer (&r
, VOIDmode
, -3, -1, 0);
37315 mthree
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
37317 real_arithmetic (&r
, NEGATE_EXPR
, &dconsthalf
, NULL
);
37318 mhalf
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
37320 if (VECTOR_MODE_P (mode
))
37322 mthree
= ix86_build_const_vector (mode
, true, mthree
);
37323 mhalf
= ix86_build_const_vector (mode
, true, mhalf
);
37326 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
37327 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
37329 a
= force_reg (mode
, a
);
37331 /* x0 = rsqrt(a) estimate */
37332 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
37333 gen_rtx_UNSPEC (mode
, gen_rtvec (1, a
),
37336 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
37341 zero
= gen_reg_rtx (mode
);
37342 mask
= gen_reg_rtx (mode
);
37344 zero
= force_reg (mode
, CONST0_RTX(mode
));
37345 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
37346 gen_rtx_NE (mode
, zero
, a
)));
37348 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
37349 gen_rtx_AND (mode
, x0
, mask
)));
37353 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
37354 gen_rtx_MULT (mode
, x0
, a
)));
37356 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
37357 gen_rtx_MULT (mode
, e0
, x0
)));
37360 mthree
= force_reg (mode
, mthree
);
37361 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
37362 gen_rtx_PLUS (mode
, e1
, mthree
)));
37364 mhalf
= force_reg (mode
, mhalf
);
37366 /* e3 = -.5 * x0 */
37367 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
37368 gen_rtx_MULT (mode
, x0
, mhalf
)));
37370 /* e3 = -.5 * e0 */
37371 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
37372 gen_rtx_MULT (mode
, e0
, mhalf
)));
37373 /* ret = e2 * e3 */
37374 emit_insn (gen_rtx_SET (VOIDmode
, res
,
37375 gen_rtx_MULT (mode
, e2
, e3
)));
37378 #ifdef TARGET_SOLARIS
37379 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
37382 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
37385 /* With Binutils 2.15, the "@unwind" marker must be specified on
37386 every occurrence of the ".eh_frame" section, not just the first
37389 && strcmp (name
, ".eh_frame") == 0)
37391 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
37392 flags
& SECTION_WRITE
? "aw" : "a");
37397 if (HAVE_COMDAT_GROUP
&& flags
& SECTION_LINKONCE
)
37399 solaris_elf_asm_comdat_section (name
, flags
, decl
);
37404 default_elf_asm_named_section (name
, flags
, decl
);
37406 #endif /* TARGET_SOLARIS */
37408 /* Return the mangling of TYPE if it is an extended fundamental type. */
37410 static const char *
37411 ix86_mangle_type (const_tree type
)
37413 type
= TYPE_MAIN_VARIANT (type
);
37415 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
37416 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
37419 switch (TYPE_MODE (type
))
37422 /* __float128 is "g". */
37425 /* "long double" or __float80 is "e". */
37432 /* For 32-bit code we can save PIC register setup by using
37433 __stack_chk_fail_local hidden function instead of calling
37434 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
37435 register, so it is better to call __stack_chk_fail directly. */
37437 static tree ATTRIBUTE_UNUSED
37438 ix86_stack_protect_fail (void)
37440 return TARGET_64BIT
37441 ? default_external_stack_protect_fail ()
37442 : default_hidden_stack_protect_fail ();
37445 /* Select a format to encode pointers in exception handling data. CODE
37446 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
37447 true if the symbol may be affected by dynamic relocations.
37449 ??? All x86 object file formats are capable of representing this.
37450 After all, the relocation needed is the same as for the call insn.
37451 Whether or not a particular assembler allows us to enter such, I
37452 guess we'll have to see. */
37454 asm_preferred_eh_data_format (int code
, int global
)
37458 int type
= DW_EH_PE_sdata8
;
37460 || ix86_cmodel
== CM_SMALL_PIC
37461 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
37462 type
= DW_EH_PE_sdata4
;
37463 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
37465 if (ix86_cmodel
== CM_SMALL
37466 || (ix86_cmodel
== CM_MEDIUM
&& code
))
37467 return DW_EH_PE_udata4
;
37468 return DW_EH_PE_absptr
;
37471 /* Expand copysign from SIGN to the positive value ABS_VALUE
37472 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
37475 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
37477 enum machine_mode mode
= GET_MODE (sign
);
37478 rtx sgn
= gen_reg_rtx (mode
);
37479 if (mask
== NULL_RTX
)
37481 enum machine_mode vmode
;
37483 if (mode
== SFmode
)
37485 else if (mode
== DFmode
)
37490 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), false);
37491 if (!VECTOR_MODE_P (mode
))
37493 /* We need to generate a scalar mode mask in this case. */
37494 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
37495 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
37496 mask
= gen_reg_rtx (mode
);
37497 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
37501 mask
= gen_rtx_NOT (mode
, mask
);
37502 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
37503 gen_rtx_AND (mode
, mask
, sign
)));
37504 emit_insn (gen_rtx_SET (VOIDmode
, result
,
37505 gen_rtx_IOR (mode
, abs_value
, sgn
)));
37508 /* Expand fabs (OP0) and return a new rtx that holds the result. The
37509 mask for masking out the sign-bit is stored in *SMASK, if that is
37512 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
37514 enum machine_mode vmode
, mode
= GET_MODE (op0
);
37517 xa
= gen_reg_rtx (mode
);
37518 if (mode
== SFmode
)
37520 else if (mode
== DFmode
)
37524 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), true);
37525 if (!VECTOR_MODE_P (mode
))
37527 /* We need to generate a scalar mode mask in this case. */
37528 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
37529 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
37530 mask
= gen_reg_rtx (mode
);
37531 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
37533 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
37534 gen_rtx_AND (mode
, op0
, mask
)));
37542 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
37543 swapping the operands if SWAP_OPERANDS is true. The expanded
37544 code is a forward jump to a newly created label in case the
37545 comparison is true. The generated label rtx is returned. */
37547 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
37548 bool swap_operands
)
37559 label
= gen_label_rtx ();
37560 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
37561 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
37562 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
37563 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
37564 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
37565 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
37566 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
37567 JUMP_LABEL (tmp
) = label
;
37572 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
37573 using comparison code CODE. Operands are swapped for the comparison if
37574 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
37576 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
37577 bool swap_operands
)
37579 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
);
37580 enum machine_mode mode
= GET_MODE (op0
);
37581 rtx mask
= gen_reg_rtx (mode
);
37590 insn
= mode
== DFmode
? gen_setcc_df_sse
: gen_setcc_sf_sse
;
37592 emit_insn (insn (mask
, op0
, op1
,
37593 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
37597 /* Generate and return a rtx of mode MODE for 2**n where n is the number
37598 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
37600 ix86_gen_TWO52 (enum machine_mode mode
)
37602 REAL_VALUE_TYPE TWO52r
;
37605 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
37606 TWO52
= const_double_from_real_value (TWO52r
, mode
);
37607 TWO52
= force_reg (mode
, TWO52
);
37612 /* Expand SSE sequence for computing lround from OP1 storing
37615 ix86_expand_lround (rtx op0
, rtx op1
)
37617 /* C code for the stuff we're doing below:
37618 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
37621 enum machine_mode mode
= GET_MODE (op1
);
37622 const struct real_format
*fmt
;
37623 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
37626 /* load nextafter (0.5, 0.0) */
37627 fmt
= REAL_MODE_FORMAT (mode
);
37628 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
37629 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
37631 /* adj = copysign (0.5, op1) */
37632 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
37633 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
37635 /* adj = op1 + adj */
37636 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
37638 /* op0 = (imode)adj */
37639 expand_fix (op0
, adj
, 0);
37642 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
37645 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
37647 /* C code for the stuff we're doing below (for do_floor):
37649 xi -= (double)xi > op1 ? 1 : 0;
37652 enum machine_mode fmode
= GET_MODE (op1
);
37653 enum machine_mode imode
= GET_MODE (op0
);
37654 rtx ireg
, freg
, label
, tmp
;
37656 /* reg = (long)op1 */
37657 ireg
= gen_reg_rtx (imode
);
37658 expand_fix (ireg
, op1
, 0);
37660 /* freg = (double)reg */
37661 freg
= gen_reg_rtx (fmode
);
37662 expand_float (freg
, ireg
, 0);
37664 /* ireg = (freg > op1) ? ireg - 1 : ireg */
37665 label
= ix86_expand_sse_compare_and_jump (UNLE
,
37666 freg
, op1
, !do_floor
);
37667 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
37668 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
37669 emit_move_insn (ireg
, tmp
);
37671 emit_label (label
);
37672 LABEL_NUSES (label
) = 1;
37674 emit_move_insn (op0
, ireg
);
37677 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
37678 result in OPERAND0. */
37680 ix86_expand_rint (rtx operand0
, rtx operand1
)
37682 /* C code for the stuff we're doing below:
37683 xa = fabs (operand1);
37684 if (!isless (xa, 2**52))
37686 xa = xa + 2**52 - 2**52;
37687 return copysign (xa, operand1);
37689 enum machine_mode mode
= GET_MODE (operand0
);
37690 rtx res
, xa
, label
, TWO52
, mask
;
37692 res
= gen_reg_rtx (mode
);
37693 emit_move_insn (res
, operand1
);
37695 /* xa = abs (operand1) */
37696 xa
= ix86_expand_sse_fabs (res
, &mask
);
37698 /* if (!isless (xa, TWO52)) goto label; */
37699 TWO52
= ix86_gen_TWO52 (mode
);
37700 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
37702 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
37703 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
37705 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
37707 emit_label (label
);
37708 LABEL_NUSES (label
) = 1;
37710 emit_move_insn (operand0
, res
);
37713 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
37716 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
37718 /* C code for the stuff we expand below.
37719 double xa = fabs (x), x2;
37720 if (!isless (xa, TWO52))
37722 xa = xa + TWO52 - TWO52;
37723 x2 = copysign (xa, x);
37732 enum machine_mode mode
= GET_MODE (operand0
);
37733 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
37735 TWO52
= ix86_gen_TWO52 (mode
);
37737 /* Temporary for holding the result, initialized to the input
37738 operand to ease control flow. */
37739 res
= gen_reg_rtx (mode
);
37740 emit_move_insn (res
, operand1
);
37742 /* xa = abs (operand1) */
37743 xa
= ix86_expand_sse_fabs (res
, &mask
);
37745 /* if (!isless (xa, TWO52)) goto label; */
37746 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
37748 /* xa = xa + TWO52 - TWO52; */
37749 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
37750 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
37752 /* xa = copysign (xa, operand1) */
37753 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
37755 /* generate 1.0 or -1.0 */
37756 one
= force_reg (mode
,
37757 const_double_from_real_value (do_floor
37758 ? dconst1
: dconstm1
, mode
));
37760 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
37761 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
37762 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
37763 gen_rtx_AND (mode
, one
, tmp
)));
37764 /* We always need to subtract here to preserve signed zero. */
37765 tmp
= expand_simple_binop (mode
, MINUS
,
37766 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
37767 emit_move_insn (res
, tmp
);
37769 emit_label (label
);
37770 LABEL_NUSES (label
) = 1;
37772 emit_move_insn (operand0
, res
);
37775 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
37778 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
37780 /* C code for the stuff we expand below.
37781 double xa = fabs (x), x2;
37782 if (!isless (xa, TWO52))
37784 x2 = (double)(long)x;
37791 if (HONOR_SIGNED_ZEROS (mode))
37792 return copysign (x2, x);
37795 enum machine_mode mode
= GET_MODE (operand0
);
37796 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
37798 TWO52
= ix86_gen_TWO52 (mode
);
37800 /* Temporary for holding the result, initialized to the input
37801 operand to ease control flow. */
37802 res
= gen_reg_rtx (mode
);
37803 emit_move_insn (res
, operand1
);
37805 /* xa = abs (operand1) */
37806 xa
= ix86_expand_sse_fabs (res
, &mask
);
37808 /* if (!isless (xa, TWO52)) goto label; */
37809 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
37811 /* xa = (double)(long)x */
37812 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
37813 expand_fix (xi
, res
, 0);
37814 expand_float (xa
, xi
, 0);
37817 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
37819 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
37820 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
37821 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
37822 gen_rtx_AND (mode
, one
, tmp
)));
37823 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
37824 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
37825 emit_move_insn (res
, tmp
);
37827 if (HONOR_SIGNED_ZEROS (mode
))
37828 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
37830 emit_label (label
);
37831 LABEL_NUSES (label
) = 1;
37833 emit_move_insn (operand0
, res
);
37836 /* Expand SSE sequence for computing round from OPERAND1 storing
37837 into OPERAND0. Sequence that works without relying on DImode truncation
37838 via cvttsd2siq that is only available on 64bit targets. */
37840 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
37842 /* C code for the stuff we expand below.
37843 double xa = fabs (x), xa2, x2;
37844 if (!isless (xa, TWO52))
37846 Using the absolute value and copying back sign makes
37847 -0.0 -> -0.0 correct.
37848 xa2 = xa + TWO52 - TWO52;
37853 else if (dxa > 0.5)
37855 x2 = copysign (xa2, x);
37858 enum machine_mode mode
= GET_MODE (operand0
);
37859 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
37861 TWO52
= ix86_gen_TWO52 (mode
);
37863 /* Temporary for holding the result, initialized to the input
37864 operand to ease control flow. */
37865 res
= gen_reg_rtx (mode
);
37866 emit_move_insn (res
, operand1
);
37868 /* xa = abs (operand1) */
37869 xa
= ix86_expand_sse_fabs (res
, &mask
);
37871 /* if (!isless (xa, TWO52)) goto label; */
37872 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
37874 /* xa2 = xa + TWO52 - TWO52; */
37875 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
37876 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
37878 /* dxa = xa2 - xa; */
37879 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
37881 /* generate 0.5, 1.0 and -0.5 */
37882 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
37883 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
37884 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
37888 tmp
= gen_reg_rtx (mode
);
37889 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
37890 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
37891 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
37892 gen_rtx_AND (mode
, one
, tmp
)));
37893 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
37894 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
37895 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
37896 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
37897 gen_rtx_AND (mode
, one
, tmp
)));
37898 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
37900 /* res = copysign (xa2, operand1) */
37901 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
37903 emit_label (label
);
37904 LABEL_NUSES (label
) = 1;
37906 emit_move_insn (operand0
, res
);
37909 /* Expand SSE sequence for computing trunc from OPERAND1 storing
37912 ix86_expand_trunc (rtx operand0
, rtx operand1
)
37914 /* C code for SSE variant we expand below.
37915 double xa = fabs (x), x2;
37916 if (!isless (xa, TWO52))
37918 x2 = (double)(long)x;
37919 if (HONOR_SIGNED_ZEROS (mode))
37920 return copysign (x2, x);
37923 enum machine_mode mode
= GET_MODE (operand0
);
37924 rtx xa
, xi
, TWO52
, label
, res
, mask
;
37926 TWO52
= ix86_gen_TWO52 (mode
);
37928 /* Temporary for holding the result, initialized to the input
37929 operand to ease control flow. */
37930 res
= gen_reg_rtx (mode
);
37931 emit_move_insn (res
, operand1
);
37933 /* xa = abs (operand1) */
37934 xa
= ix86_expand_sse_fabs (res
, &mask
);
37936 /* if (!isless (xa, TWO52)) goto label; */
37937 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
37939 /* x = (double)(long)x */
37940 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
37941 expand_fix (xi
, res
, 0);
37942 expand_float (res
, xi
, 0);
37944 if (HONOR_SIGNED_ZEROS (mode
))
37945 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
37947 emit_label (label
);
37948 LABEL_NUSES (label
) = 1;
37950 emit_move_insn (operand0
, res
);
37953 /* Expand SSE sequence for computing trunc from OPERAND1 storing
37956 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
37958 enum machine_mode mode
= GET_MODE (operand0
);
37959 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
37961 /* C code for SSE variant we expand below.
37962 double xa = fabs (x), x2;
37963 if (!isless (xa, TWO52))
37965 xa2 = xa + TWO52 - TWO52;
37969 x2 = copysign (xa2, x);
37973 TWO52
= ix86_gen_TWO52 (mode
);
37975 /* Temporary for holding the result, initialized to the input
37976 operand to ease control flow. */
37977 res
= gen_reg_rtx (mode
);
37978 emit_move_insn (res
, operand1
);
37980 /* xa = abs (operand1) */
37981 xa
= ix86_expand_sse_fabs (res
, &smask
);
37983 /* if (!isless (xa, TWO52)) goto label; */
37984 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
37986 /* res = xa + TWO52 - TWO52; */
37987 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
37988 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
37989 emit_move_insn (res
, tmp
);
37992 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
37994 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
37995 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
37996 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
37997 gen_rtx_AND (mode
, mask
, one
)));
37998 tmp
= expand_simple_binop (mode
, MINUS
,
37999 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
38000 emit_move_insn (res
, tmp
);
38002 /* res = copysign (res, operand1) */
38003 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
38005 emit_label (label
);
38006 LABEL_NUSES (label
) = 1;
38008 emit_move_insn (operand0
, res
);
38011 /* Expand SSE sequence for computing round from OPERAND1 storing
38014 ix86_expand_round (rtx operand0
, rtx operand1
)
38016 /* C code for the stuff we're doing below:
38017 double xa = fabs (x);
38018 if (!isless (xa, TWO52))
38020 xa = (double)(long)(xa + nextafter (0.5, 0.0));
38021 return copysign (xa, x);
38023 enum machine_mode mode
= GET_MODE (operand0
);
38024 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
38025 const struct real_format
*fmt
;
38026 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
38028 /* Temporary for holding the result, initialized to the input
38029 operand to ease control flow. */
38030 res
= gen_reg_rtx (mode
);
38031 emit_move_insn (res
, operand1
);
38033 TWO52
= ix86_gen_TWO52 (mode
);
38034 xa
= ix86_expand_sse_fabs (res
, &mask
);
38035 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
38037 /* load nextafter (0.5, 0.0) */
38038 fmt
= REAL_MODE_FORMAT (mode
);
38039 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
38040 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
38042 /* xa = xa + 0.5 */
38043 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
38044 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
38046 /* xa = (double)(int64_t)xa */
38047 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
38048 expand_fix (xi
, xa
, 0);
38049 expand_float (xa
, xi
, 0);
38051 /* res = copysign (xa, operand1) */
38052 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
38054 emit_label (label
);
38055 LABEL_NUSES (label
) = 1;
38057 emit_move_insn (operand0
, res
);
38060 /* Expand SSE sequence for computing round
38061 from OP1 storing into OP0 using sse4 round insn. */
38063 ix86_expand_round_sse4 (rtx op0
, rtx op1
)
38065 enum machine_mode mode
= GET_MODE (op0
);
38066 rtx e1
, e2
, res
, half
;
38067 const struct real_format
*fmt
;
38068 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
38069 rtx (*gen_copysign
) (rtx
, rtx
, rtx
);
38070 rtx (*gen_round
) (rtx
, rtx
, rtx
);
38075 gen_copysign
= gen_copysignsf3
;
38076 gen_round
= gen_sse4_1_roundsf2
;
38079 gen_copysign
= gen_copysigndf3
;
38080 gen_round
= gen_sse4_1_rounddf2
;
38083 gcc_unreachable ();
38086 /* round (a) = trunc (a + copysign (0.5, a)) */
38088 /* load nextafter (0.5, 0.0) */
38089 fmt
= REAL_MODE_FORMAT (mode
);
38090 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
38091 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
38092 half
= const_double_from_real_value (pred_half
, mode
);
38094 /* e1 = copysign (0.5, op1) */
38095 e1
= gen_reg_rtx (mode
);
38096 emit_insn (gen_copysign (e1
, half
, op1
));
38098 /* e2 = op1 + e1 */
38099 e2
= expand_simple_binop (mode
, PLUS
, op1
, e1
, NULL_RTX
, 0, OPTAB_DIRECT
);
38101 /* res = trunc (e2) */
38102 res
= gen_reg_rtx (mode
);
38103 emit_insn (gen_round (res
, e2
, GEN_INT (ROUND_TRUNC
)));
38105 emit_move_insn (op0
, res
);
38109 /* Table of valid machine attributes. */
38110 static const struct attribute_spec ix86_attribute_table
[] =
38112 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
38113 affects_type_identity } */
38114 /* Stdcall attribute says callee is responsible for popping arguments
38115 if they are not variable. */
38116 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38118 /* Fastcall attribute says callee is responsible for popping arguments
38119 if they are not variable. */
38120 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38122 /* Thiscall attribute says callee is responsible for popping arguments
38123 if they are not variable. */
38124 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38126 /* Cdecl attribute says the callee is a normal C declaration */
38127 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38129 /* Regparm attribute specifies how many integer arguments are to be
38130 passed in registers. */
38131 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
,
38133 /* Sseregparm attribute says we are using x86_64 calling conventions
38134 for FP arguments. */
38135 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38137 /* The transactional memory builtins are implicitly regparm or fastcall
38138 depending on the ABI. Override the generic do-nothing attribute that
38139 these builtins were declared with. */
38140 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute
,
38142 /* force_align_arg_pointer says this function realigns the stack at entry. */
38143 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
38144 false, true, true, ix86_handle_cconv_attribute
, false },
38145 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
38146 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
38147 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
38148 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
,
38151 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
38153 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
38155 #ifdef SUBTARGET_ATTRIBUTE_TABLE
38156 SUBTARGET_ATTRIBUTE_TABLE
,
38158 /* ms_abi and sysv_abi calling convention function attributes. */
38159 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
38160 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
38161 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute
,
38163 { "callee_pop_aggregate_return", 1, 1, false, true, true,
38164 ix86_handle_callee_pop_aggregate_return
, true },
38166 { NULL
, 0, 0, false, false, false, NULL
, false }
38169 /* Implement targetm.vectorize.builtin_vectorization_cost. */
38171 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
38173 int misalign ATTRIBUTE_UNUSED
)
38177 switch (type_of_cost
)
38180 return ix86_cost
->scalar_stmt_cost
;
38183 return ix86_cost
->scalar_load_cost
;
38186 return ix86_cost
->scalar_store_cost
;
38189 return ix86_cost
->vec_stmt_cost
;
38192 return ix86_cost
->vec_align_load_cost
;
38195 return ix86_cost
->vec_store_cost
;
38197 case vec_to_scalar
:
38198 return ix86_cost
->vec_to_scalar_cost
;
38200 case scalar_to_vec
:
38201 return ix86_cost
->scalar_to_vec_cost
;
38203 case unaligned_load
:
38204 case unaligned_store
:
38205 return ix86_cost
->vec_unalign_load_cost
;
38207 case cond_branch_taken
:
38208 return ix86_cost
->cond_taken_branch_cost
;
38210 case cond_branch_not_taken
:
38211 return ix86_cost
->cond_not_taken_branch_cost
;
38214 case vec_promote_demote
:
38215 return ix86_cost
->vec_stmt_cost
;
38217 case vec_construct
:
38218 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
38219 return elements
/ 2 + 1;
38222 gcc_unreachable ();
38226 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
38227 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
38228 insn every time. */
38230 static GTY(()) rtx vselect_insn
;
38232 /* Initialize vselect_insn. */
38235 init_vselect_insn (void)
38240 x
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (MAX_VECT_LEN
));
38241 for (i
= 0; i
< MAX_VECT_LEN
; ++i
)
38242 XVECEXP (x
, 0, i
) = const0_rtx
;
38243 x
= gen_rtx_VEC_SELECT (V2DFmode
, gen_rtx_VEC_CONCAT (V4DFmode
, const0_rtx
,
38245 x
= gen_rtx_SET (VOIDmode
, const0_rtx
, x
);
38247 vselect_insn
= emit_insn (x
);
38251 /* Construct (set target (vec_select op0 (parallel perm))) and
38252 return true if that's a valid instruction in the active ISA. */
38255 expand_vselect (rtx target
, rtx op0
, const unsigned char *perm
,
38256 unsigned nelt
, bool testing_p
)
38259 rtx x
, save_vconcat
;
38262 if (vselect_insn
== NULL_RTX
)
38263 init_vselect_insn ();
38265 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 1);
38266 PUT_NUM_ELEM (XVEC (x
, 0), nelt
);
38267 for (i
= 0; i
< nelt
; ++i
)
38268 XVECEXP (x
, 0, i
) = GEN_INT (perm
[i
]);
38269 save_vconcat
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
38270 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = op0
;
38271 PUT_MODE (SET_SRC (PATTERN (vselect_insn
)), GET_MODE (target
));
38272 SET_DEST (PATTERN (vselect_insn
)) = target
;
38273 icode
= recog_memoized (vselect_insn
);
38275 if (icode
>= 0 && !testing_p
)
38276 emit_insn (copy_rtx (PATTERN (vselect_insn
)));
38278 SET_DEST (PATTERN (vselect_insn
)) = const0_rtx
;
38279 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = save_vconcat
;
38280 INSN_CODE (vselect_insn
) = -1;
38285 /* Similar, but generate a vec_concat from op0 and op1 as well. */
38288 expand_vselect_vconcat (rtx target
, rtx op0
, rtx op1
,
38289 const unsigned char *perm
, unsigned nelt
,
38292 enum machine_mode v2mode
;
38296 if (vselect_insn
== NULL_RTX
)
38297 init_vselect_insn ();
38299 v2mode
= GET_MODE_2XWIDER_MODE (GET_MODE (op0
));
38300 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
38301 PUT_MODE (x
, v2mode
);
38304 ok
= expand_vselect (target
, x
, perm
, nelt
, testing_p
);
38305 XEXP (x
, 0) = const0_rtx
;
38306 XEXP (x
, 1) = const0_rtx
;
38310 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
38311 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
38314 expand_vec_perm_blend (struct expand_vec_perm_d
*d
)
38316 enum machine_mode vmode
= d
->vmode
;
38317 unsigned i
, mask
, nelt
= d
->nelt
;
38318 rtx target
, op0
, op1
, x
;
38319 rtx rperm
[32], vperm
;
38321 if (d
->one_operand_p
)
38323 if (TARGET_AVX2
&& GET_MODE_SIZE (vmode
) == 32)
38325 else if (TARGET_AVX
&& (vmode
== V4DFmode
|| vmode
== V8SFmode
))
38327 else if (TARGET_SSE4_1
&& GET_MODE_SIZE (vmode
) == 16)
38332 /* This is a blend, not a permute. Elements must stay in their
38333 respective lanes. */
38334 for (i
= 0; i
< nelt
; ++i
)
38336 unsigned e
= d
->perm
[i
];
38337 if (!(e
== i
|| e
== i
+ nelt
))
38344 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
38345 decision should be extracted elsewhere, so that we only try that
38346 sequence once all budget==3 options have been tried. */
38347 target
= d
->target
;
38360 for (i
= 0; i
< nelt
; ++i
)
38361 mask
|= (d
->perm
[i
] >= nelt
) << i
;
38365 for (i
= 0; i
< 2; ++i
)
38366 mask
|= (d
->perm
[i
] >= 2 ? 15 : 0) << (i
* 4);
38371 for (i
= 0; i
< 4; ++i
)
38372 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
38377 /* See if bytes move in pairs so we can use pblendw with
38378 an immediate argument, rather than pblendvb with a vector
38380 for (i
= 0; i
< 16; i
+= 2)
38381 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
38384 for (i
= 0; i
< nelt
; ++i
)
38385 rperm
[i
] = (d
->perm
[i
] < nelt
? const0_rtx
: constm1_rtx
);
38388 vperm
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
38389 vperm
= force_reg (vmode
, vperm
);
38391 if (GET_MODE_SIZE (vmode
) == 16)
38392 emit_insn (gen_sse4_1_pblendvb (target
, op0
, op1
, vperm
));
38394 emit_insn (gen_avx2_pblendvb (target
, op0
, op1
, vperm
));
38398 for (i
= 0; i
< 8; ++i
)
38399 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
38404 target
= gen_lowpart (vmode
, target
);
38405 op0
= gen_lowpart (vmode
, op0
);
38406 op1
= gen_lowpart (vmode
, op1
);
38410 /* See if bytes move in pairs. If not, vpblendvb must be used. */
38411 for (i
= 0; i
< 32; i
+= 2)
38412 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
38414 /* See if bytes move in quadruplets. If yes, vpblendd
38415 with immediate can be used. */
38416 for (i
= 0; i
< 32; i
+= 4)
38417 if (d
->perm
[i
] + 2 != d
->perm
[i
+ 2])
38421 /* See if bytes move the same in both lanes. If yes,
38422 vpblendw with immediate can be used. */
38423 for (i
= 0; i
< 16; i
+= 2)
38424 if (d
->perm
[i
] + 16 != d
->perm
[i
+ 16])
38427 /* Use vpblendw. */
38428 for (i
= 0; i
< 16; ++i
)
38429 mask
|= (d
->perm
[i
* 2] >= 32) << i
;
38434 /* Use vpblendd. */
38435 for (i
= 0; i
< 8; ++i
)
38436 mask
|= (d
->perm
[i
* 4] >= 32) << i
;
38441 /* See if words move in pairs. If yes, vpblendd can be used. */
38442 for (i
= 0; i
< 16; i
+= 2)
38443 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
38447 /* See if words move the same in both lanes. If not,
38448 vpblendvb must be used. */
38449 for (i
= 0; i
< 8; i
++)
38450 if (d
->perm
[i
] + 8 != d
->perm
[i
+ 8])
38452 /* Use vpblendvb. */
38453 for (i
= 0; i
< 32; ++i
)
38454 rperm
[i
] = (d
->perm
[i
/ 2] < 16 ? const0_rtx
: constm1_rtx
);
38458 target
= gen_lowpart (vmode
, target
);
38459 op0
= gen_lowpart (vmode
, op0
);
38460 op1
= gen_lowpart (vmode
, op1
);
38461 goto finish_pblendvb
;
38464 /* Use vpblendw. */
38465 for (i
= 0; i
< 16; ++i
)
38466 mask
|= (d
->perm
[i
] >= 16) << i
;
38470 /* Use vpblendd. */
38471 for (i
= 0; i
< 8; ++i
)
38472 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
38477 /* Use vpblendd. */
38478 for (i
= 0; i
< 4; ++i
)
38479 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
38484 gcc_unreachable ();
38487 /* This matches five different patterns with the different modes. */
38488 x
= gen_rtx_VEC_MERGE (vmode
, op1
, op0
, GEN_INT (mask
));
38489 x
= gen_rtx_SET (VOIDmode
, target
, x
);
38495 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
38496 in terms of the variable form of vpermilps.
38498 Note that we will have already failed the immediate input vpermilps,
38499 which requires that the high and low part shuffle be identical; the
38500 variable form doesn't require that. */
38503 expand_vec_perm_vpermil (struct expand_vec_perm_d
*d
)
38505 rtx rperm
[8], vperm
;
38508 if (!TARGET_AVX
|| d
->vmode
!= V8SFmode
|| !d
->one_operand_p
)
38511 /* We can only permute within the 128-bit lane. */
38512 for (i
= 0; i
< 8; ++i
)
38514 unsigned e
= d
->perm
[i
];
38515 if (i
< 4 ? e
>= 4 : e
< 4)
38522 for (i
= 0; i
< 8; ++i
)
38524 unsigned e
= d
->perm
[i
];
38526 /* Within each 128-bit lane, the elements of op0 are numbered
38527 from 0 and the elements of op1 are numbered from 4. */
38533 rperm
[i
] = GEN_INT (e
);
38536 vperm
= gen_rtx_CONST_VECTOR (V8SImode
, gen_rtvec_v (8, rperm
));
38537 vperm
= force_reg (V8SImode
, vperm
);
38538 emit_insn (gen_avx_vpermilvarv8sf3 (d
->target
, d
->op0
, vperm
));
38543 /* Return true if permutation D can be performed as VMODE permutation
38547 valid_perm_using_mode_p (enum machine_mode vmode
, struct expand_vec_perm_d
*d
)
38549 unsigned int i
, j
, chunk
;
38551 if (GET_MODE_CLASS (vmode
) != MODE_VECTOR_INT
38552 || GET_MODE_CLASS (d
->vmode
) != MODE_VECTOR_INT
38553 || GET_MODE_SIZE (vmode
) != GET_MODE_SIZE (d
->vmode
))
38556 if (GET_MODE_NUNITS (vmode
) >= d
->nelt
)
38559 chunk
= d
->nelt
/ GET_MODE_NUNITS (vmode
);
38560 for (i
= 0; i
< d
->nelt
; i
+= chunk
)
38561 if (d
->perm
[i
] & (chunk
- 1))
38564 for (j
= 1; j
< chunk
; ++j
)
38565 if (d
->perm
[i
] + j
!= d
->perm
[i
+ j
])
38571 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
38572 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
38575 expand_vec_perm_pshufb (struct expand_vec_perm_d
*d
)
38577 unsigned i
, nelt
, eltsz
, mask
;
38578 unsigned char perm
[32];
38579 enum machine_mode vmode
= V16QImode
;
38580 rtx rperm
[32], vperm
, target
, op0
, op1
;
38584 if (!d
->one_operand_p
)
38586 if (!TARGET_XOP
|| GET_MODE_SIZE (d
->vmode
) != 16)
38589 && valid_perm_using_mode_p (V2TImode
, d
))
38594 /* Use vperm2i128 insn. The pattern uses
38595 V4DImode instead of V2TImode. */
38596 target
= gen_lowpart (V4DImode
, d
->target
);
38597 op0
= gen_lowpart (V4DImode
, d
->op0
);
38598 op1
= gen_lowpart (V4DImode
, d
->op1
);
38600 = GEN_INT (((d
->perm
[0] & (nelt
/ 2)) ? 1 : 0)
38601 || ((d
->perm
[nelt
/ 2] & (nelt
/ 2)) ? 2 : 0));
38602 emit_insn (gen_avx2_permv2ti (target
, op0
, op1
, rperm
[0]));
38610 if (GET_MODE_SIZE (d
->vmode
) == 16)
38615 else if (GET_MODE_SIZE (d
->vmode
) == 32)
38620 /* V4DImode should be already handled through
38621 expand_vselect by vpermq instruction. */
38622 gcc_assert (d
->vmode
!= V4DImode
);
38625 if (d
->vmode
== V8SImode
38626 || d
->vmode
== V16HImode
38627 || d
->vmode
== V32QImode
)
38629 /* First see if vpermq can be used for
38630 V8SImode/V16HImode/V32QImode. */
38631 if (valid_perm_using_mode_p (V4DImode
, d
))
38633 for (i
= 0; i
< 4; i
++)
38634 perm
[i
] = (d
->perm
[i
* nelt
/ 4] * 4 / nelt
) & 3;
38637 return expand_vselect (gen_lowpart (V4DImode
, d
->target
),
38638 gen_lowpart (V4DImode
, d
->op0
),
38642 /* Next see if vpermd can be used. */
38643 if (valid_perm_using_mode_p (V8SImode
, d
))
38646 /* Or if vpermps can be used. */
38647 else if (d
->vmode
== V8SFmode
)
38650 if (vmode
== V32QImode
)
38652 /* vpshufb only works intra lanes, it is not
38653 possible to shuffle bytes in between the lanes. */
38654 for (i
= 0; i
< nelt
; ++i
)
38655 if ((d
->perm
[i
] ^ i
) & (nelt
/ 2))
38666 if (vmode
== V8SImode
)
38667 for (i
= 0; i
< 8; ++i
)
38668 rperm
[i
] = GEN_INT ((d
->perm
[i
* nelt
/ 8] * 8 / nelt
) & 7);
38671 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
38672 if (!d
->one_operand_p
)
38673 mask
= 2 * nelt
- 1;
38674 else if (vmode
== V16QImode
)
38677 mask
= nelt
/ 2 - 1;
38679 for (i
= 0; i
< nelt
; ++i
)
38681 unsigned j
, e
= d
->perm
[i
] & mask
;
38682 for (j
= 0; j
< eltsz
; ++j
)
38683 rperm
[i
* eltsz
+ j
] = GEN_INT (e
* eltsz
+ j
);
38687 vperm
= gen_rtx_CONST_VECTOR (vmode
,
38688 gen_rtvec_v (GET_MODE_NUNITS (vmode
), rperm
));
38689 vperm
= force_reg (vmode
, vperm
);
38691 target
= gen_lowpart (vmode
, d
->target
);
38692 op0
= gen_lowpart (vmode
, d
->op0
);
38693 if (d
->one_operand_p
)
38695 if (vmode
== V16QImode
)
38696 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, vperm
));
38697 else if (vmode
== V32QImode
)
38698 emit_insn (gen_avx2_pshufbv32qi3 (target
, op0
, vperm
));
38699 else if (vmode
== V8SFmode
)
38700 emit_insn (gen_avx2_permvarv8sf (target
, op0
, vperm
));
38702 emit_insn (gen_avx2_permvarv8si (target
, op0
, vperm
));
38706 op1
= gen_lowpart (vmode
, d
->op1
);
38707 emit_insn (gen_xop_pperm (target
, op0
, op1
, vperm
));
38713 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
38714 in a single instruction. */
38717 expand_vec_perm_1 (struct expand_vec_perm_d
*d
)
38719 unsigned i
, nelt
= d
->nelt
;
38720 unsigned char perm2
[MAX_VECT_LEN
];
38722 /* Check plain VEC_SELECT first, because AVX has instructions that could
38723 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
38724 input where SEL+CONCAT may not. */
38725 if (d
->one_operand_p
)
38727 int mask
= nelt
- 1;
38728 bool identity_perm
= true;
38729 bool broadcast_perm
= true;
38731 for (i
= 0; i
< nelt
; i
++)
38733 perm2
[i
] = d
->perm
[i
] & mask
;
38735 identity_perm
= false;
38737 broadcast_perm
= false;
38743 emit_move_insn (d
->target
, d
->op0
);
38746 else if (broadcast_perm
&& TARGET_AVX2
)
38748 /* Use vpbroadcast{b,w,d}. */
38749 rtx (*gen
) (rtx
, rtx
) = NULL
;
38753 gen
= gen_avx2_pbroadcastv32qi_1
;
38756 gen
= gen_avx2_pbroadcastv16hi_1
;
38759 gen
= gen_avx2_pbroadcastv8si_1
;
38762 gen
= gen_avx2_pbroadcastv16qi
;
38765 gen
= gen_avx2_pbroadcastv8hi
;
38768 gen
= gen_avx2_vec_dupv8sf_1
;
38770 /* For other modes prefer other shuffles this function creates. */
38776 emit_insn (gen (d
->target
, d
->op0
));
38781 if (expand_vselect (d
->target
, d
->op0
, perm2
, nelt
, d
->testing_p
))
38784 /* There are plenty of patterns in sse.md that are written for
38785 SEL+CONCAT and are not replicated for a single op. Perhaps
38786 that should be changed, to avoid the nastiness here. */
38788 /* Recognize interleave style patterns, which means incrementing
38789 every other permutation operand. */
38790 for (i
= 0; i
< nelt
; i
+= 2)
38792 perm2
[i
] = d
->perm
[i
] & mask
;
38793 perm2
[i
+ 1] = (d
->perm
[i
+ 1] & mask
) + nelt
;
38795 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
,
38799 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
38802 for (i
= 0; i
< nelt
; i
+= 4)
38804 perm2
[i
+ 0] = d
->perm
[i
+ 0] & mask
;
38805 perm2
[i
+ 1] = d
->perm
[i
+ 1] & mask
;
38806 perm2
[i
+ 2] = (d
->perm
[i
+ 2] & mask
) + nelt
;
38807 perm2
[i
+ 3] = (d
->perm
[i
+ 3] & mask
) + nelt
;
38810 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
,
38816 /* Finally, try the fully general two operand permute. */
38817 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op1
, d
->perm
, nelt
,
38821 /* Recognize interleave style patterns with reversed operands. */
38822 if (!d
->one_operand_p
)
38824 for (i
= 0; i
< nelt
; ++i
)
38826 unsigned e
= d
->perm
[i
];
38834 if (expand_vselect_vconcat (d
->target
, d
->op1
, d
->op0
, perm2
, nelt
,
38839 /* Try the SSE4.1 blend variable merge instructions. */
38840 if (expand_vec_perm_blend (d
))
38843 /* Try one of the AVX vpermil variable permutations. */
38844 if (expand_vec_perm_vpermil (d
))
38847 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
38848 vpshufb, vpermd, vpermps or vpermq variable permutation. */
38849 if (expand_vec_perm_pshufb (d
))
38855 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
38856 in terms of a pair of pshuflw + pshufhw instructions. */
38859 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d
*d
)
38861 unsigned char perm2
[MAX_VECT_LEN
];
38865 if (d
->vmode
!= V8HImode
|| !d
->one_operand_p
)
38868 /* The two permutations only operate in 64-bit lanes. */
38869 for (i
= 0; i
< 4; ++i
)
38870 if (d
->perm
[i
] >= 4)
38872 for (i
= 4; i
< 8; ++i
)
38873 if (d
->perm
[i
] < 4)
38879 /* Emit the pshuflw. */
38880 memcpy (perm2
, d
->perm
, 4);
38881 for (i
= 4; i
< 8; ++i
)
38883 ok
= expand_vselect (d
->target
, d
->op0
, perm2
, 8, d
->testing_p
);
38886 /* Emit the pshufhw. */
38887 memcpy (perm2
+ 4, d
->perm
+ 4, 4);
38888 for (i
= 0; i
< 4; ++i
)
38890 ok
= expand_vselect (d
->target
, d
->target
, perm2
, 8, d
->testing_p
);
38896 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
38897 the permutation using the SSSE3 palignr instruction. This succeeds
38898 when all of the elements in PERM fit within one vector and we merely
38899 need to shift them down so that a single vector permutation has a
38900 chance to succeed. */
38903 expand_vec_perm_palignr (struct expand_vec_perm_d
*d
)
38905 unsigned i
, nelt
= d
->nelt
;
38910 /* Even with AVX, palignr only operates on 128-bit vectors. */
38911 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
38914 min
= nelt
, max
= 0;
38915 for (i
= 0; i
< nelt
; ++i
)
38917 unsigned e
= d
->perm
[i
];
38923 if (min
== 0 || max
- min
>= nelt
)
38926 /* Given that we have SSSE3, we know we'll be able to implement the
38927 single operand permutation after the palignr with pshufb. */
38931 shift
= GEN_INT (min
* GET_MODE_BITSIZE (GET_MODE_INNER (d
->vmode
)));
38932 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode
, d
->target
),
38933 gen_lowpart (TImode
, d
->op1
),
38934 gen_lowpart (TImode
, d
->op0
), shift
));
38936 d
->op0
= d
->op1
= d
->target
;
38937 d
->one_operand_p
= true;
38940 for (i
= 0; i
< nelt
; ++i
)
38942 unsigned e
= d
->perm
[i
] - min
;
38948 /* Test for the degenerate case where the alignment by itself
38949 produces the desired permutation. */
38953 ok
= expand_vec_perm_1 (d
);
38959 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
);
38961 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
38962 a two vector permutation into a single vector permutation by using
38963 an interleave operation to merge the vectors. */
38966 expand_vec_perm_interleave2 (struct expand_vec_perm_d
*d
)
38968 struct expand_vec_perm_d dremap
, dfinal
;
38969 unsigned i
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
38970 unsigned HOST_WIDE_INT contents
;
38971 unsigned char remap
[2 * MAX_VECT_LEN
];
38973 bool ok
, same_halves
= false;
38975 if (GET_MODE_SIZE (d
->vmode
) == 16)
38977 if (d
->one_operand_p
)
38980 else if (GET_MODE_SIZE (d
->vmode
) == 32)
38984 /* For 32-byte modes allow even d->one_operand_p.
38985 The lack of cross-lane shuffling in some instructions
38986 might prevent a single insn shuffle. */
38988 dfinal
.testing_p
= true;
38989 /* If expand_vec_perm_interleave3 can expand this into
38990 a 3 insn sequence, give up and let it be expanded as
38991 3 insn sequence. While that is one insn longer,
38992 it doesn't need a memory operand and in the common
38993 case that both interleave low and high permutations
38994 with the same operands are adjacent needs 4 insns
38995 for both after CSE. */
38996 if (expand_vec_perm_interleave3 (&dfinal
))
39002 /* Examine from whence the elements come. */
39004 for (i
= 0; i
< nelt
; ++i
)
39005 contents
|= ((unsigned HOST_WIDE_INT
) 1) << d
->perm
[i
];
39007 memset (remap
, 0xff, sizeof (remap
));
39010 if (GET_MODE_SIZE (d
->vmode
) == 16)
39012 unsigned HOST_WIDE_INT h1
, h2
, h3
, h4
;
39014 /* Split the two input vectors into 4 halves. */
39015 h1
= (((unsigned HOST_WIDE_INT
) 1) << nelt2
) - 1;
39020 /* If the elements from the low halves use interleave low, and similarly
39021 for interleave high. If the elements are from mis-matched halves, we
39022 can use shufps for V4SF/V4SI or do a DImode shuffle. */
39023 if ((contents
& (h1
| h3
)) == contents
)
39026 for (i
= 0; i
< nelt2
; ++i
)
39029 remap
[i
+ nelt
] = i
* 2 + 1;
39030 dremap
.perm
[i
* 2] = i
;
39031 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
39033 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
39034 dremap
.vmode
= V4SFmode
;
39036 else if ((contents
& (h2
| h4
)) == contents
)
39039 for (i
= 0; i
< nelt2
; ++i
)
39041 remap
[i
+ nelt2
] = i
* 2;
39042 remap
[i
+ nelt
+ nelt2
] = i
* 2 + 1;
39043 dremap
.perm
[i
* 2] = i
+ nelt2
;
39044 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt2
;
39046 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
39047 dremap
.vmode
= V4SFmode
;
39049 else if ((contents
& (h1
| h4
)) == contents
)
39052 for (i
= 0; i
< nelt2
; ++i
)
39055 remap
[i
+ nelt
+ nelt2
] = i
+ nelt2
;
39056 dremap
.perm
[i
] = i
;
39057 dremap
.perm
[i
+ nelt2
] = i
+ nelt
+ nelt2
;
39062 dremap
.vmode
= V2DImode
;
39064 dremap
.perm
[0] = 0;
39065 dremap
.perm
[1] = 3;
39068 else if ((contents
& (h2
| h3
)) == contents
)
39071 for (i
= 0; i
< nelt2
; ++i
)
39073 remap
[i
+ nelt2
] = i
;
39074 remap
[i
+ nelt
] = i
+ nelt2
;
39075 dremap
.perm
[i
] = i
+ nelt2
;
39076 dremap
.perm
[i
+ nelt2
] = i
+ nelt
;
39081 dremap
.vmode
= V2DImode
;
39083 dremap
.perm
[0] = 1;
39084 dremap
.perm
[1] = 2;
39092 unsigned int nelt4
= nelt
/ 4, nzcnt
= 0;
39093 unsigned HOST_WIDE_INT q
[8];
39094 unsigned int nonzero_halves
[4];
39096 /* Split the two input vectors into 8 quarters. */
39097 q
[0] = (((unsigned HOST_WIDE_INT
) 1) << nelt4
) - 1;
39098 for (i
= 1; i
< 8; ++i
)
39099 q
[i
] = q
[0] << (nelt4
* i
);
39100 for (i
= 0; i
< 4; ++i
)
39101 if (((q
[2 * i
] | q
[2 * i
+ 1]) & contents
) != 0)
39103 nonzero_halves
[nzcnt
] = i
;
39109 gcc_assert (d
->one_operand_p
);
39110 nonzero_halves
[1] = nonzero_halves
[0];
39111 same_halves
= true;
39113 else if (d
->one_operand_p
)
39115 gcc_assert (nonzero_halves
[0] == 0);
39116 gcc_assert (nonzero_halves
[1] == 1);
39121 if (d
->perm
[0] / nelt2
== nonzero_halves
[1])
39123 /* Attempt to increase the likelihood that dfinal
39124 shuffle will be intra-lane. */
39125 char tmph
= nonzero_halves
[0];
39126 nonzero_halves
[0] = nonzero_halves
[1];
39127 nonzero_halves
[1] = tmph
;
39130 /* vperm2f128 or vperm2i128. */
39131 for (i
= 0; i
< nelt2
; ++i
)
39133 remap
[i
+ nonzero_halves
[1] * nelt2
] = i
+ nelt2
;
39134 remap
[i
+ nonzero_halves
[0] * nelt2
] = i
;
39135 dremap
.perm
[i
+ nelt2
] = i
+ nonzero_halves
[1] * nelt2
;
39136 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * nelt2
;
39139 if (d
->vmode
!= V8SFmode
39140 && d
->vmode
!= V4DFmode
39141 && d
->vmode
!= V8SImode
)
39143 dremap
.vmode
= V8SImode
;
39145 for (i
= 0; i
< 4; ++i
)
39147 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * 4;
39148 dremap
.perm
[i
+ 4] = i
+ nonzero_halves
[1] * 4;
39152 else if (d
->one_operand_p
)
39154 else if (TARGET_AVX2
39155 && (contents
& (q
[0] | q
[2] | q
[4] | q
[6])) == contents
)
39158 for (i
= 0; i
< nelt4
; ++i
)
39161 remap
[i
+ nelt
] = i
* 2 + 1;
39162 remap
[i
+ nelt2
] = i
* 2 + nelt2
;
39163 remap
[i
+ nelt
+ nelt2
] = i
* 2 + nelt2
+ 1;
39164 dremap
.perm
[i
* 2] = i
;
39165 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
39166 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
;
39167 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
;
39170 else if (TARGET_AVX2
39171 && (contents
& (q
[1] | q
[3] | q
[5] | q
[7])) == contents
)
39174 for (i
= 0; i
< nelt4
; ++i
)
39176 remap
[i
+ nelt4
] = i
* 2;
39177 remap
[i
+ nelt
+ nelt4
] = i
* 2 + 1;
39178 remap
[i
+ nelt2
+ nelt4
] = i
* 2 + nelt2
;
39179 remap
[i
+ nelt
+ nelt2
+ nelt4
] = i
* 2 + nelt2
+ 1;
39180 dremap
.perm
[i
* 2] = i
+ nelt4
;
39181 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt4
;
39182 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
+ nelt4
;
39183 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
+ nelt4
;
39190 /* Use the remapping array set up above to move the elements from their
39191 swizzled locations into their final destinations. */
39193 for (i
= 0; i
< nelt
; ++i
)
39195 unsigned e
= remap
[d
->perm
[i
]];
39196 gcc_assert (e
< nelt
);
39197 /* If same_halves is true, both halves of the remapped vector are the
39198 same. Avoid cross-lane accesses if possible. */
39199 if (same_halves
&& i
>= nelt2
)
39201 gcc_assert (e
< nelt2
);
39202 dfinal
.perm
[i
] = e
+ nelt2
;
39205 dfinal
.perm
[i
] = e
;
39207 dfinal
.op0
= gen_reg_rtx (dfinal
.vmode
);
39208 dfinal
.op1
= dfinal
.op0
;
39209 dfinal
.one_operand_p
= true;
39210 dremap
.target
= dfinal
.op0
;
39212 /* Test if the final remap can be done with a single insn. For V4SFmode or
39213 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
39215 ok
= expand_vec_perm_1 (&dfinal
);
39216 seq
= get_insns ();
39225 if (dremap
.vmode
!= dfinal
.vmode
)
39227 dremap
.target
= gen_lowpart (dremap
.vmode
, dremap
.target
);
39228 dremap
.op0
= gen_lowpart (dremap
.vmode
, dremap
.op0
);
39229 dremap
.op1
= gen_lowpart (dremap
.vmode
, dremap
.op1
);
39232 ok
= expand_vec_perm_1 (&dremap
);
39239 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
39240 a single vector cross-lane permutation into vpermq followed
39241 by any of the single insn permutations. */
39244 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d
*d
)
39246 struct expand_vec_perm_d dremap
, dfinal
;
39247 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, nelt4
= nelt
/ 4;
39248 unsigned contents
[2];
39252 && (d
->vmode
== V32QImode
|| d
->vmode
== V16HImode
)
39253 && d
->one_operand_p
))
39258 for (i
= 0; i
< nelt2
; ++i
)
39260 contents
[0] |= 1u << (d
->perm
[i
] / nelt4
);
39261 contents
[1] |= 1u << (d
->perm
[i
+ nelt2
] / nelt4
);
39264 for (i
= 0; i
< 2; ++i
)
39266 unsigned int cnt
= 0;
39267 for (j
= 0; j
< 4; ++j
)
39268 if ((contents
[i
] & (1u << j
)) != 0 && ++cnt
> 2)
39276 dremap
.vmode
= V4DImode
;
39278 dremap
.target
= gen_reg_rtx (V4DImode
);
39279 dremap
.op0
= gen_lowpart (V4DImode
, d
->op0
);
39280 dremap
.op1
= dremap
.op0
;
39281 dremap
.one_operand_p
= true;
39282 for (i
= 0; i
< 2; ++i
)
39284 unsigned int cnt
= 0;
39285 for (j
= 0; j
< 4; ++j
)
39286 if ((contents
[i
] & (1u << j
)) != 0)
39287 dremap
.perm
[2 * i
+ cnt
++] = j
;
39288 for (; cnt
< 2; ++cnt
)
39289 dremap
.perm
[2 * i
+ cnt
] = 0;
39293 dfinal
.op0
= gen_lowpart (dfinal
.vmode
, dremap
.target
);
39294 dfinal
.op1
= dfinal
.op0
;
39295 dfinal
.one_operand_p
= true;
39296 for (i
= 0, j
= 0; i
< nelt
; ++i
)
39300 dfinal
.perm
[i
] = (d
->perm
[i
] & (nelt4
- 1)) | (j
? nelt2
: 0);
39301 if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
])
39303 else if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
+ 1])
39304 dfinal
.perm
[i
] |= nelt4
;
39306 gcc_unreachable ();
39309 ok
= expand_vec_perm_1 (&dremap
);
39312 ok
= expand_vec_perm_1 (&dfinal
);
39318 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
39319 a vector permutation using two instructions, vperm2f128 resp.
39320 vperm2i128 followed by any single in-lane permutation. */
39323 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d
*d
)
39325 struct expand_vec_perm_d dfirst
, dsecond
;
39326 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, perm
;
39330 || GET_MODE_SIZE (d
->vmode
) != 32
39331 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
&& !TARGET_AVX2
))
39335 dsecond
.one_operand_p
= false;
39336 dsecond
.testing_p
= true;
39338 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
39339 immediate. For perm < 16 the second permutation uses
39340 d->op0 as first operand, for perm >= 16 it uses d->op1
39341 as first operand. The second operand is the result of
39343 for (perm
= 0; perm
< 32; perm
++)
39345 /* Ignore permutations which do not move anything cross-lane. */
39348 /* The second shuffle for e.g. V4DFmode has
39349 0123 and ABCD operands.
39350 Ignore AB23, as 23 is already in the second lane
39351 of the first operand. */
39352 if ((perm
& 0xc) == (1 << 2)) continue;
39353 /* And 01CD, as 01 is in the first lane of the first
39355 if ((perm
& 3) == 0) continue;
39356 /* And 4567, as then the vperm2[fi]128 doesn't change
39357 anything on the original 4567 second operand. */
39358 if ((perm
& 0xf) == ((3 << 2) | 2)) continue;
39362 /* The second shuffle for e.g. V4DFmode has
39363 4567 and ABCD operands.
39364 Ignore AB67, as 67 is already in the second lane
39365 of the first operand. */
39366 if ((perm
& 0xc) == (3 << 2)) continue;
39367 /* And 45CD, as 45 is in the first lane of the first
39369 if ((perm
& 3) == 2) continue;
39370 /* And 0123, as then the vperm2[fi]128 doesn't change
39371 anything on the original 0123 first operand. */
39372 if ((perm
& 0xf) == (1 << 2)) continue;
39375 for (i
= 0; i
< nelt
; i
++)
39377 j
= d
->perm
[i
] / nelt2
;
39378 if (j
== ((perm
>> (2 * (i
>= nelt2
))) & 3))
39379 dsecond
.perm
[i
] = nelt
+ (i
& nelt2
) + (d
->perm
[i
] & (nelt2
- 1));
39380 else if (j
== (unsigned) (i
>= nelt2
) + 2 * (perm
>= 16))
39381 dsecond
.perm
[i
] = d
->perm
[i
] & (nelt
- 1);
39389 ok
= expand_vec_perm_1 (&dsecond
);
39400 /* Found a usable second shuffle. dfirst will be
39401 vperm2f128 on d->op0 and d->op1. */
39402 dsecond
.testing_p
= false;
39404 dfirst
.target
= gen_reg_rtx (d
->vmode
);
39405 for (i
= 0; i
< nelt
; i
++)
39406 dfirst
.perm
[i
] = (i
& (nelt2
- 1))
39407 + ((perm
>> (2 * (i
>= nelt2
))) & 3) * nelt2
;
39409 ok
= expand_vec_perm_1 (&dfirst
);
39412 /* And dsecond is some single insn shuffle, taking
39413 d->op0 and result of vperm2f128 (if perm < 16) or
39414 d->op1 and result of vperm2f128 (otherwise). */
39415 dsecond
.op1
= dfirst
.target
;
39417 dsecond
.op0
= dfirst
.op1
;
39419 ok
= expand_vec_perm_1 (&dsecond
);
39425 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
39426 if (d
->one_operand_p
)
39433 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
39434 a two vector permutation using 2 intra-lane interleave insns
39435 and cross-lane shuffle for 32-byte vectors. */
39438 expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
)
39441 rtx (*gen
) (rtx
, rtx
, rtx
);
39443 if (d
->one_operand_p
)
39445 if (TARGET_AVX2
&& GET_MODE_SIZE (d
->vmode
) == 32)
39447 else if (TARGET_AVX
&& (d
->vmode
== V8SFmode
|| d
->vmode
== V4DFmode
))
39453 if (d
->perm
[0] != 0 && d
->perm
[0] != nelt
/ 2)
39455 for (i
= 0; i
< nelt
; i
+= 2)
39456 if (d
->perm
[i
] != d
->perm
[0] + i
/ 2
39457 || d
->perm
[i
+ 1] != d
->perm
[0] + i
/ 2 + nelt
)
39467 gen
= gen_vec_interleave_highv32qi
;
39469 gen
= gen_vec_interleave_lowv32qi
;
39473 gen
= gen_vec_interleave_highv16hi
;
39475 gen
= gen_vec_interleave_lowv16hi
;
39479 gen
= gen_vec_interleave_highv8si
;
39481 gen
= gen_vec_interleave_lowv8si
;
39485 gen
= gen_vec_interleave_highv4di
;
39487 gen
= gen_vec_interleave_lowv4di
;
39491 gen
= gen_vec_interleave_highv8sf
;
39493 gen
= gen_vec_interleave_lowv8sf
;
39497 gen
= gen_vec_interleave_highv4df
;
39499 gen
= gen_vec_interleave_lowv4df
;
39502 gcc_unreachable ();
39505 emit_insn (gen (d
->target
, d
->op0
, d
->op1
));
39509 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
39510 a single vector permutation using a single intra-lane vector
39511 permutation, vperm2f128 swapping the lanes and vblend* insn blending
39512 the non-swapped and swapped vectors together. */
39515 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d
*d
)
39517 struct expand_vec_perm_d dfirst
, dsecond
;
39518 unsigned i
, j
, msk
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
39521 rtx (*blend
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
39525 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
)
39526 || !d
->one_operand_p
)
39530 for (i
= 0; i
< nelt
; i
++)
39531 dfirst
.perm
[i
] = 0xff;
39532 for (i
= 0, msk
= 0; i
< nelt
; i
++)
39534 j
= (d
->perm
[i
] & nelt2
) ? i
| nelt2
: i
& ~nelt2
;
39535 if (dfirst
.perm
[j
] != 0xff && dfirst
.perm
[j
] != d
->perm
[i
])
39537 dfirst
.perm
[j
] = d
->perm
[i
];
39541 for (i
= 0; i
< nelt
; i
++)
39542 if (dfirst
.perm
[i
] == 0xff)
39543 dfirst
.perm
[i
] = i
;
39546 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
39549 ok
= expand_vec_perm_1 (&dfirst
);
39550 seq
= get_insns ();
39562 dsecond
.op0
= dfirst
.target
;
39563 dsecond
.op1
= dfirst
.target
;
39564 dsecond
.one_operand_p
= true;
39565 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
39566 for (i
= 0; i
< nelt
; i
++)
39567 dsecond
.perm
[i
] = i
^ nelt2
;
39569 ok
= expand_vec_perm_1 (&dsecond
);
39572 blend
= d
->vmode
== V8SFmode
? gen_avx_blendps256
: gen_avx_blendpd256
;
39573 emit_insn (blend (d
->target
, dfirst
.target
, dsecond
.target
, GEN_INT (msk
)));
39577 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
39578 permutation using two vperm2f128, followed by a vshufpd insn blending
39579 the two vectors together. */
39582 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d
*d
)
39584 struct expand_vec_perm_d dfirst
, dsecond
, dthird
;
39587 if (!TARGET_AVX
|| (d
->vmode
!= V4DFmode
))
39597 dfirst
.perm
[0] = (d
->perm
[0] & ~1);
39598 dfirst
.perm
[1] = (d
->perm
[0] & ~1) + 1;
39599 dfirst
.perm
[2] = (d
->perm
[2] & ~1);
39600 dfirst
.perm
[3] = (d
->perm
[2] & ~1) + 1;
39601 dsecond
.perm
[0] = (d
->perm
[1] & ~1);
39602 dsecond
.perm
[1] = (d
->perm
[1] & ~1) + 1;
39603 dsecond
.perm
[2] = (d
->perm
[3] & ~1);
39604 dsecond
.perm
[3] = (d
->perm
[3] & ~1) + 1;
39605 dthird
.perm
[0] = (d
->perm
[0] % 2);
39606 dthird
.perm
[1] = (d
->perm
[1] % 2) + 4;
39607 dthird
.perm
[2] = (d
->perm
[2] % 2) + 2;
39608 dthird
.perm
[3] = (d
->perm
[3] % 2) + 6;
39610 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
39611 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
39612 dthird
.op0
= dfirst
.target
;
39613 dthird
.op1
= dsecond
.target
;
39614 dthird
.one_operand_p
= false;
39616 canonicalize_perm (&dfirst
);
39617 canonicalize_perm (&dsecond
);
39619 ok
= expand_vec_perm_1 (&dfirst
)
39620 && expand_vec_perm_1 (&dsecond
)
39621 && expand_vec_perm_1 (&dthird
);
39628 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
39629 permutation with two pshufb insns and an ior. We should have already
39630 failed all two instruction sequences. */
39633 expand_vec_perm_pshufb2 (struct expand_vec_perm_d
*d
)
39635 rtx rperm
[2][16], vperm
, l
, h
, op
, m128
;
39636 unsigned int i
, nelt
, eltsz
;
39638 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
39640 gcc_assert (!d
->one_operand_p
);
39643 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
39645 /* Generate two permutation masks. If the required element is within
39646 the given vector it is shuffled into the proper lane. If the required
39647 element is in the other vector, force a zero into the lane by setting
39648 bit 7 in the permutation mask. */
39649 m128
= GEN_INT (-128);
39650 for (i
= 0; i
< nelt
; ++i
)
39652 unsigned j
, e
= d
->perm
[i
];
39653 unsigned which
= (e
>= nelt
);
39657 for (j
= 0; j
< eltsz
; ++j
)
39659 rperm
[which
][i
*eltsz
+ j
] = GEN_INT (e
*eltsz
+ j
);
39660 rperm
[1-which
][i
*eltsz
+ j
] = m128
;
39664 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[0]));
39665 vperm
= force_reg (V16QImode
, vperm
);
39667 l
= gen_reg_rtx (V16QImode
);
39668 op
= gen_lowpart (V16QImode
, d
->op0
);
39669 emit_insn (gen_ssse3_pshufbv16qi3 (l
, op
, vperm
));
39671 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[1]));
39672 vperm
= force_reg (V16QImode
, vperm
);
39674 h
= gen_reg_rtx (V16QImode
);
39675 op
= gen_lowpart (V16QImode
, d
->op1
);
39676 emit_insn (gen_ssse3_pshufbv16qi3 (h
, op
, vperm
));
39678 op
= gen_lowpart (V16QImode
, d
->target
);
39679 emit_insn (gen_iorv16qi3 (op
, l
, h
));
39684 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
39685 with two vpshufb insns, vpermq and vpor. We should have already failed
39686 all two or three instruction sequences. */
39689 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d
*d
)
39691 rtx rperm
[2][32], vperm
, l
, h
, hp
, op
, m128
;
39692 unsigned int i
, nelt
, eltsz
;
39695 || !d
->one_operand_p
39696 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
39703 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
39705 /* Generate two permutation masks. If the required element is within
39706 the same lane, it is shuffled in. If the required element from the
39707 other lane, force a zero by setting bit 7 in the permutation mask.
39708 In the other mask the mask has non-negative elements if element
39709 is requested from the other lane, but also moved to the other lane,
39710 so that the result of vpshufb can have the two V2TImode halves
39712 m128
= GEN_INT (-128);
39713 for (i
= 0; i
< nelt
; ++i
)
39715 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
39716 unsigned which
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
39718 for (j
= 0; j
< eltsz
; ++j
)
39720 rperm
[!!which
][(i
* eltsz
+ j
) ^ which
] = GEN_INT (e
* eltsz
+ j
);
39721 rperm
[!which
][(i
* eltsz
+ j
) ^ (which
^ 16)] = m128
;
39725 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
39726 vperm
= force_reg (V32QImode
, vperm
);
39728 h
= gen_reg_rtx (V32QImode
);
39729 op
= gen_lowpart (V32QImode
, d
->op0
);
39730 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
39732 /* Swap the 128-byte lanes of h into hp. */
39733 hp
= gen_reg_rtx (V4DImode
);
39734 op
= gen_lowpart (V4DImode
, h
);
39735 emit_insn (gen_avx2_permv4di_1 (hp
, op
, const2_rtx
, GEN_INT (3), const0_rtx
,
39738 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
39739 vperm
= force_reg (V32QImode
, vperm
);
39741 l
= gen_reg_rtx (V32QImode
);
39742 op
= gen_lowpart (V32QImode
, d
->op0
);
39743 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
39745 op
= gen_lowpart (V32QImode
, d
->target
);
39746 emit_insn (gen_iorv32qi3 (op
, l
, gen_lowpart (V32QImode
, hp
)));
39751 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
39752 and extract-odd permutations of two V32QImode and V16QImode operand
39753 with two vpshufb insns, vpor and vpermq. We should have already
39754 failed all two or three instruction sequences. */
39757 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d
*d
)
39759 rtx rperm
[2][32], vperm
, l
, h
, ior
, op
, m128
;
39760 unsigned int i
, nelt
, eltsz
;
39763 || d
->one_operand_p
39764 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
39767 for (i
= 0; i
< d
->nelt
; ++i
)
39768 if ((d
->perm
[i
] ^ (i
* 2)) & (3 * d
->nelt
/ 2))
39775 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
39777 /* Generate two permutation masks. In the first permutation mask
39778 the first quarter will contain indexes for the first half
39779 of the op0, the second quarter will contain bit 7 set, third quarter
39780 will contain indexes for the second half of the op0 and the
39781 last quarter bit 7 set. In the second permutation mask
39782 the first quarter will contain bit 7 set, the second quarter
39783 indexes for the first half of the op1, the third quarter bit 7 set
39784 and last quarter indexes for the second half of the op1.
39785 I.e. the first mask e.g. for V32QImode extract even will be:
39786 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
39787 (all values masked with 0xf except for -128) and second mask
39788 for extract even will be
39789 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
39790 m128
= GEN_INT (-128);
39791 for (i
= 0; i
< nelt
; ++i
)
39793 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
39794 unsigned which
= d
->perm
[i
] >= nelt
;
39795 unsigned xorv
= (i
>= nelt
/ 4 && i
< 3 * nelt
/ 4) ? 24 : 0;
39797 for (j
= 0; j
< eltsz
; ++j
)
39799 rperm
[which
][(i
* eltsz
+ j
) ^ xorv
] = GEN_INT (e
* eltsz
+ j
);
39800 rperm
[1 - which
][(i
* eltsz
+ j
) ^ xorv
] = m128
;
39804 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
39805 vperm
= force_reg (V32QImode
, vperm
);
39807 l
= gen_reg_rtx (V32QImode
);
39808 op
= gen_lowpart (V32QImode
, d
->op0
);
39809 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
39811 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
39812 vperm
= force_reg (V32QImode
, vperm
);
39814 h
= gen_reg_rtx (V32QImode
);
39815 op
= gen_lowpart (V32QImode
, d
->op1
);
39816 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
39818 ior
= gen_reg_rtx (V32QImode
);
39819 emit_insn (gen_iorv32qi3 (ior
, l
, h
));
39821 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
39822 op
= gen_lowpart (V4DImode
, d
->target
);
39823 ior
= gen_lowpart (V4DImode
, ior
);
39824 emit_insn (gen_avx2_permv4di_1 (op
, ior
, const0_rtx
, const2_rtx
,
39825 const1_rtx
, GEN_INT (3)));
39830 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
39831 and extract-odd permutations. */
39834 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d
*d
, unsigned odd
)
39841 t1
= gen_reg_rtx (V4DFmode
);
39842 t2
= gen_reg_rtx (V4DFmode
);
39844 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
39845 emit_insn (gen_avx_vperm2f128v4df3 (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
39846 emit_insn (gen_avx_vperm2f128v4df3 (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
39848 /* Now an unpck[lh]pd will produce the result required. */
39850 t3
= gen_avx_unpckhpd256 (d
->target
, t1
, t2
);
39852 t3
= gen_avx_unpcklpd256 (d
->target
, t1
, t2
);
39858 int mask
= odd
? 0xdd : 0x88;
39860 t1
= gen_reg_rtx (V8SFmode
);
39861 t2
= gen_reg_rtx (V8SFmode
);
39862 t3
= gen_reg_rtx (V8SFmode
);
39864 /* Shuffle within the 128-bit lanes to produce:
39865 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
39866 emit_insn (gen_avx_shufps256 (t1
, d
->op0
, d
->op1
,
39869 /* Shuffle the lanes around to produce:
39870 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
39871 emit_insn (gen_avx_vperm2f128v8sf3 (t2
, t1
, t1
,
39874 /* Shuffle within the 128-bit lanes to produce:
39875 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
39876 emit_insn (gen_avx_shufps256 (t3
, t1
, t2
, GEN_INT (0x44)));
39878 /* Shuffle within the 128-bit lanes to produce:
39879 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
39880 emit_insn (gen_avx_shufps256 (t2
, t1
, t2
, GEN_INT (0xee)));
39882 /* Shuffle the lanes around to produce:
39883 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
39884 emit_insn (gen_avx_vperm2f128v8sf3 (d
->target
, t3
, t2
,
39893 /* These are always directly implementable by expand_vec_perm_1. */
39894 gcc_unreachable ();
39898 return expand_vec_perm_pshufb2 (d
);
39901 /* We need 2*log2(N)-1 operations to achieve odd/even
39902 with interleave. */
39903 t1
= gen_reg_rtx (V8HImode
);
39904 t2
= gen_reg_rtx (V8HImode
);
39905 emit_insn (gen_vec_interleave_highv8hi (t1
, d
->op0
, d
->op1
));
39906 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->op0
, d
->op1
));
39907 emit_insn (gen_vec_interleave_highv8hi (t2
, d
->target
, t1
));
39908 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t1
));
39910 t3
= gen_vec_interleave_highv8hi (d
->target
, d
->target
, t2
);
39912 t3
= gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t2
);
39919 return expand_vec_perm_pshufb2 (d
);
39922 t1
= gen_reg_rtx (V16QImode
);
39923 t2
= gen_reg_rtx (V16QImode
);
39924 t3
= gen_reg_rtx (V16QImode
);
39925 emit_insn (gen_vec_interleave_highv16qi (t1
, d
->op0
, d
->op1
));
39926 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->op0
, d
->op1
));
39927 emit_insn (gen_vec_interleave_highv16qi (t2
, d
->target
, t1
));
39928 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t1
));
39929 emit_insn (gen_vec_interleave_highv16qi (t3
, d
->target
, t2
));
39930 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t2
));
39932 t3
= gen_vec_interleave_highv16qi (d
->target
, d
->target
, t3
);
39934 t3
= gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t3
);
39941 return expand_vec_perm_vpshufb2_vpermq_even_odd (d
);
39946 struct expand_vec_perm_d d_copy
= *d
;
39947 d_copy
.vmode
= V4DFmode
;
39948 d_copy
.target
= gen_lowpart (V4DFmode
, d
->target
);
39949 d_copy
.op0
= gen_lowpart (V4DFmode
, d
->op0
);
39950 d_copy
.op1
= gen_lowpart (V4DFmode
, d
->op1
);
39951 return expand_vec_perm_even_odd_1 (&d_copy
, odd
);
39954 t1
= gen_reg_rtx (V4DImode
);
39955 t2
= gen_reg_rtx (V4DImode
);
39957 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
39958 emit_insn (gen_avx2_permv2ti (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
39959 emit_insn (gen_avx2_permv2ti (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
39961 /* Now an vpunpck[lh]qdq will produce the result required. */
39963 t3
= gen_avx2_interleave_highv4di (d
->target
, t1
, t2
);
39965 t3
= gen_avx2_interleave_lowv4di (d
->target
, t1
, t2
);
39972 struct expand_vec_perm_d d_copy
= *d
;
39973 d_copy
.vmode
= V8SFmode
;
39974 d_copy
.target
= gen_lowpart (V8SFmode
, d
->target
);
39975 d_copy
.op0
= gen_lowpart (V8SFmode
, d
->op0
);
39976 d_copy
.op1
= gen_lowpart (V8SFmode
, d
->op1
);
39977 return expand_vec_perm_even_odd_1 (&d_copy
, odd
);
39980 t1
= gen_reg_rtx (V8SImode
);
39981 t2
= gen_reg_rtx (V8SImode
);
39983 /* Shuffle the lanes around into
39984 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
39985 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode
, t1
),
39986 gen_lowpart (V4DImode
, d
->op0
),
39987 gen_lowpart (V4DImode
, d
->op1
),
39989 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode
, t2
),
39990 gen_lowpart (V4DImode
, d
->op0
),
39991 gen_lowpart (V4DImode
, d
->op1
),
39994 /* Swap the 2nd and 3rd position in each lane into
39995 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
39996 emit_insn (gen_avx2_pshufdv3 (t1
, t1
,
39997 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
39998 emit_insn (gen_avx2_pshufdv3 (t2
, t2
,
39999 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
40001 /* Now an vpunpck[lh]qdq will produce
40002 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
40004 t3
= gen_avx2_interleave_highv4di (gen_lowpart (V4DImode
, d
->target
),
40005 gen_lowpart (V4DImode
, t1
),
40006 gen_lowpart (V4DImode
, t2
));
40008 t3
= gen_avx2_interleave_lowv4di (gen_lowpart (V4DImode
, d
->target
),
40009 gen_lowpart (V4DImode
, t1
),
40010 gen_lowpart (V4DImode
, t2
));
40015 gcc_unreachable ();
40021 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
40022 extract-even and extract-odd permutations. */
40025 expand_vec_perm_even_odd (struct expand_vec_perm_d
*d
)
40027 unsigned i
, odd
, nelt
= d
->nelt
;
40030 if (odd
!= 0 && odd
!= 1)
40033 for (i
= 1; i
< nelt
; ++i
)
40034 if (d
->perm
[i
] != 2 * i
+ odd
)
40037 return expand_vec_perm_even_odd_1 (d
, odd
);
40040 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
40041 permutations. We assume that expand_vec_perm_1 has already failed. */
40044 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
)
40046 unsigned elt
= d
->perm
[0], nelt2
= d
->nelt
/ 2;
40047 enum machine_mode vmode
= d
->vmode
;
40048 unsigned char perm2
[4];
40056 /* These are special-cased in sse.md so that we can optionally
40057 use the vbroadcast instruction. They expand to two insns
40058 if the input happens to be in a register. */
40059 gcc_unreachable ();
40065 /* These are always implementable using standard shuffle patterns. */
40066 gcc_unreachable ();
40070 /* These can be implemented via interleave. We save one insn by
40071 stopping once we have promoted to V4SImode and then use pshufd. */
40075 rtx (*gen
) (rtx
, rtx
, rtx
)
40076 = vmode
== V16QImode
? gen_vec_interleave_lowv16qi
40077 : gen_vec_interleave_lowv8hi
;
40081 gen
= vmode
== V16QImode
? gen_vec_interleave_highv16qi
40082 : gen_vec_interleave_highv8hi
;
40087 dest
= gen_reg_rtx (vmode
);
40088 emit_insn (gen (dest
, op0
, op0
));
40089 vmode
= get_mode_wider_vector (vmode
);
40090 op0
= gen_lowpart (vmode
, dest
);
40092 while (vmode
!= V4SImode
);
40094 memset (perm2
, elt
, 4);
40095 ok
= expand_vselect (gen_lowpart (V4SImode
, d
->target
), op0
, perm2
, 4,
40104 /* For AVX2 broadcasts of the first element vpbroadcast* or
40105 vpermq should be used by expand_vec_perm_1. */
40106 gcc_assert (!TARGET_AVX2
|| d
->perm
[0]);
40110 gcc_unreachable ();
40114 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
40115 broadcast permutations. */
40118 expand_vec_perm_broadcast (struct expand_vec_perm_d
*d
)
40120 unsigned i
, elt
, nelt
= d
->nelt
;
40122 if (!d
->one_operand_p
)
40126 for (i
= 1; i
< nelt
; ++i
)
40127 if (d
->perm
[i
] != elt
)
40130 return expand_vec_perm_broadcast_1 (d
);
40133 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
40134 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
40135 all the shorter instruction sequences. */
40138 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d
*d
)
40140 rtx rperm
[4][32], vperm
, l
[2], h
[2], op
, m128
;
40141 unsigned int i
, nelt
, eltsz
;
40145 || d
->one_operand_p
40146 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
40153 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
40155 /* Generate 4 permutation masks. If the required element is within
40156 the same lane, it is shuffled in. If the required element from the
40157 other lane, force a zero by setting bit 7 in the permutation mask.
40158 In the other mask the mask has non-negative elements if element
40159 is requested from the other lane, but also moved to the other lane,
40160 so that the result of vpshufb can have the two V2TImode halves
40162 m128
= GEN_INT (-128);
40163 for (i
= 0; i
< 32; ++i
)
40165 rperm
[0][i
] = m128
;
40166 rperm
[1][i
] = m128
;
40167 rperm
[2][i
] = m128
;
40168 rperm
[3][i
] = m128
;
40174 for (i
= 0; i
< nelt
; ++i
)
40176 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
40177 unsigned xlane
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
40178 unsigned int which
= ((d
->perm
[i
] & nelt
) ? 2 : 0) + (xlane
? 1 : 0);
40180 for (j
= 0; j
< eltsz
; ++j
)
40181 rperm
[which
][(i
* eltsz
+ j
) ^ xlane
] = GEN_INT (e
* eltsz
+ j
);
40182 used
[which
] = true;
40185 for (i
= 0; i
< 2; ++i
)
40187 if (!used
[2 * i
+ 1])
40192 vperm
= gen_rtx_CONST_VECTOR (V32QImode
,
40193 gen_rtvec_v (32, rperm
[2 * i
+ 1]));
40194 vperm
= force_reg (V32QImode
, vperm
);
40195 h
[i
] = gen_reg_rtx (V32QImode
);
40196 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
40197 emit_insn (gen_avx2_pshufbv32qi3 (h
[i
], op
, vperm
));
40200 /* Swap the 128-byte lanes of h[X]. */
40201 for (i
= 0; i
< 2; ++i
)
40203 if (h
[i
] == NULL_RTX
)
40205 op
= gen_reg_rtx (V4DImode
);
40206 emit_insn (gen_avx2_permv4di_1 (op
, gen_lowpart (V4DImode
, h
[i
]),
40207 const2_rtx
, GEN_INT (3), const0_rtx
,
40209 h
[i
] = gen_lowpart (V32QImode
, op
);
40212 for (i
= 0; i
< 2; ++i
)
40219 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[2 * i
]));
40220 vperm
= force_reg (V32QImode
, vperm
);
40221 l
[i
] = gen_reg_rtx (V32QImode
);
40222 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
40223 emit_insn (gen_avx2_pshufbv32qi3 (l
[i
], op
, vperm
));
40226 for (i
= 0; i
< 2; ++i
)
40230 op
= gen_reg_rtx (V32QImode
);
40231 emit_insn (gen_iorv32qi3 (op
, l
[i
], h
[i
]));
40238 gcc_assert (l
[0] && l
[1]);
40239 op
= gen_lowpart (V32QImode
, d
->target
);
40240 emit_insn (gen_iorv32qi3 (op
, l
[0], l
[1]));
40244 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
40245 With all of the interface bits taken care of, perform the expansion
40246 in D and return true on success. */
40249 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
40251 /* Try a single instruction expansion. */
40252 if (expand_vec_perm_1 (d
))
40255 /* Try sequences of two instructions. */
40257 if (expand_vec_perm_pshuflw_pshufhw (d
))
40260 if (expand_vec_perm_palignr (d
))
40263 if (expand_vec_perm_interleave2 (d
))
40266 if (expand_vec_perm_broadcast (d
))
40269 if (expand_vec_perm_vpermq_perm_1 (d
))
40272 if (expand_vec_perm_vperm2f128 (d
))
40275 /* Try sequences of three instructions. */
40277 if (expand_vec_perm_2vperm2f128_vshuf (d
))
40280 if (expand_vec_perm_pshufb2 (d
))
40283 if (expand_vec_perm_interleave3 (d
))
40286 if (expand_vec_perm_vperm2f128_vblend (d
))
40289 /* Try sequences of four instructions. */
40291 if (expand_vec_perm_vpshufb2_vpermq (d
))
40294 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d
))
40297 /* ??? Look for narrow permutations whose element orderings would
40298 allow the promotion to a wider mode. */
40300 /* ??? Look for sequences of interleave or a wider permute that place
40301 the data into the correct lanes for a half-vector shuffle like
40302 pshuf[lh]w or vpermilps. */
40304 /* ??? Look for sequences of interleave that produce the desired results.
40305 The combinatorics of punpck[lh] get pretty ugly... */
40307 if (expand_vec_perm_even_odd (d
))
40310 /* Even longer sequences. */
40311 if (expand_vec_perm_vpshufb4_vpermq2 (d
))
40317 /* If a permutation only uses one operand, make it clear. Returns true
40318 if the permutation references both operands. */
40321 canonicalize_perm (struct expand_vec_perm_d
*d
)
40323 int i
, which
, nelt
= d
->nelt
;
40325 for (i
= which
= 0; i
< nelt
; ++i
)
40326 which
|= (d
->perm
[i
] < nelt
? 1 : 2);
40328 d
->one_operand_p
= true;
40335 if (!rtx_equal_p (d
->op0
, d
->op1
))
40337 d
->one_operand_p
= false;
40340 /* The elements of PERM do not suggest that only the first operand
40341 is used, but both operands are identical. Allow easier matching
40342 of the permutation by folding the permutation into the single
40347 for (i
= 0; i
< nelt
; ++i
)
40348 d
->perm
[i
] &= nelt
- 1;
40357 return (which
== 3);
40361 ix86_expand_vec_perm_const (rtx operands
[4])
40363 struct expand_vec_perm_d d
;
40364 unsigned char perm
[MAX_VECT_LEN
];
40369 d
.target
= operands
[0];
40370 d
.op0
= operands
[1];
40371 d
.op1
= operands
[2];
40374 d
.vmode
= GET_MODE (d
.target
);
40375 gcc_assert (VECTOR_MODE_P (d
.vmode
));
40376 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
40377 d
.testing_p
= false;
40379 gcc_assert (GET_CODE (sel
) == CONST_VECTOR
);
40380 gcc_assert (XVECLEN (sel
, 0) == nelt
);
40381 gcc_checking_assert (sizeof (d
.perm
) == sizeof (perm
));
40383 for (i
= 0; i
< nelt
; ++i
)
40385 rtx e
= XVECEXP (sel
, 0, i
);
40386 int ei
= INTVAL (e
) & (2 * nelt
- 1);
40391 two_args
= canonicalize_perm (&d
);
40393 if (ix86_expand_vec_perm_const_1 (&d
))
40396 /* If the selector says both arguments are needed, but the operands are the
40397 same, the above tried to expand with one_operand_p and flattened selector.
40398 If that didn't work, retry without one_operand_p; we succeeded with that
40400 if (two_args
&& d
.one_operand_p
)
40402 d
.one_operand_p
= false;
40403 memcpy (d
.perm
, perm
, sizeof (perm
));
40404 return ix86_expand_vec_perm_const_1 (&d
);
40410 /* Implement targetm.vectorize.vec_perm_const_ok. */
40413 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
40414 const unsigned char *sel
)
40416 struct expand_vec_perm_d d
;
40417 unsigned int i
, nelt
, which
;
40421 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
40422 d
.testing_p
= true;
40424 /* Given sufficient ISA support we can just return true here
40425 for selected vector modes. */
40426 if (GET_MODE_SIZE (d
.vmode
) == 16)
40428 /* All implementable with a single vpperm insn. */
40431 /* All implementable with 2 pshufb + 1 ior. */
40434 /* All implementable with shufpd or unpck[lh]pd. */
40439 /* Extract the values from the vector CST into the permutation
40441 memcpy (d
.perm
, sel
, nelt
);
40442 for (i
= which
= 0; i
< nelt
; ++i
)
40444 unsigned char e
= d
.perm
[i
];
40445 gcc_assert (e
< 2 * nelt
);
40446 which
|= (e
< nelt
? 1 : 2);
40449 /* For all elements from second vector, fold the elements to first. */
40451 for (i
= 0; i
< nelt
; ++i
)
40454 /* Check whether the mask can be applied to the vector type. */
40455 d
.one_operand_p
= (which
!= 3);
40457 /* Implementable with shufps or pshufd. */
40458 if (d
.one_operand_p
&& (d
.vmode
== V4SFmode
|| d
.vmode
== V4SImode
))
40461 /* Otherwise we have to go through the motions and see if we can
40462 figure out how to generate the requested permutation. */
40463 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
40464 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
40465 if (!d
.one_operand_p
)
40466 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
40469 ret
= ix86_expand_vec_perm_const_1 (&d
);
40476 ix86_expand_vec_extract_even_odd (rtx targ
, rtx op0
, rtx op1
, unsigned odd
)
40478 struct expand_vec_perm_d d
;
40484 d
.vmode
= GET_MODE (targ
);
40485 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
40486 d
.one_operand_p
= false;
40487 d
.testing_p
= false;
40489 for (i
= 0; i
< nelt
; ++i
)
40490 d
.perm
[i
] = i
* 2 + odd
;
40492 /* We'll either be able to implement the permutation directly... */
40493 if (expand_vec_perm_1 (&d
))
40496 /* ... or we use the special-case patterns. */
40497 expand_vec_perm_even_odd_1 (&d
, odd
);
40501 ix86_expand_vec_interleave (rtx targ
, rtx op0
, rtx op1
, bool high_p
)
40503 struct expand_vec_perm_d d
;
40504 unsigned i
, nelt
, base
;
40510 d
.vmode
= GET_MODE (targ
);
40511 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
40512 d
.one_operand_p
= false;
40513 d
.testing_p
= false;
40515 base
= high_p
? nelt
/ 2 : 0;
40516 for (i
= 0; i
< nelt
/ 2; ++i
)
40518 d
.perm
[i
* 2] = i
+ base
;
40519 d
.perm
[i
* 2 + 1] = i
+ base
+ nelt
;
40522 /* Note that for AVX this isn't one instruction. */
40523 ok
= ix86_expand_vec_perm_const_1 (&d
);
40528 /* Expand a vector operation CODE for a V*QImode in terms of the
40529 same operation on V*HImode. */
40532 ix86_expand_vecop_qihi (enum rtx_code code
, rtx dest
, rtx op1
, rtx op2
)
40534 enum machine_mode qimode
= GET_MODE (dest
);
40535 enum machine_mode himode
;
40536 rtx (*gen_il
) (rtx
, rtx
, rtx
);
40537 rtx (*gen_ih
) (rtx
, rtx
, rtx
);
40538 rtx op1_l
, op1_h
, op2_l
, op2_h
, res_l
, res_h
;
40539 struct expand_vec_perm_d d
;
40540 bool ok
, full_interleave
;
40541 bool uns_p
= false;
40548 gen_il
= gen_vec_interleave_lowv16qi
;
40549 gen_ih
= gen_vec_interleave_highv16qi
;
40552 himode
= V16HImode
;
40553 gen_il
= gen_avx2_interleave_lowv32qi
;
40554 gen_ih
= gen_avx2_interleave_highv32qi
;
40557 gcc_unreachable ();
40560 op2_l
= op2_h
= op2
;
40564 /* Unpack data such that we've got a source byte in each low byte of
40565 each word. We don't care what goes into the high byte of each word.
40566 Rather than trying to get zero in there, most convenient is to let
40567 it be a copy of the low byte. */
40568 op2_l
= gen_reg_rtx (qimode
);
40569 op2_h
= gen_reg_rtx (qimode
);
40570 emit_insn (gen_il (op2_l
, op2
, op2
));
40571 emit_insn (gen_ih (op2_h
, op2
, op2
));
40574 op1_l
= gen_reg_rtx (qimode
);
40575 op1_h
= gen_reg_rtx (qimode
);
40576 emit_insn (gen_il (op1_l
, op1
, op1
));
40577 emit_insn (gen_ih (op1_h
, op1
, op1
));
40578 full_interleave
= qimode
== V16QImode
;
40586 op1_l
= gen_reg_rtx (himode
);
40587 op1_h
= gen_reg_rtx (himode
);
40588 ix86_expand_sse_unpack (op1_l
, op1
, uns_p
, false);
40589 ix86_expand_sse_unpack (op1_h
, op1
, uns_p
, true);
40590 full_interleave
= true;
40593 gcc_unreachable ();
40596 /* Perform the operation. */
40597 res_l
= expand_simple_binop (himode
, code
, op1_l
, op2_l
, NULL_RTX
,
40599 res_h
= expand_simple_binop (himode
, code
, op1_h
, op2_h
, NULL_RTX
,
40601 gcc_assert (res_l
&& res_h
);
40603 /* Merge the data back into the right place. */
40605 d
.op0
= gen_lowpart (qimode
, res_l
);
40606 d
.op1
= gen_lowpart (qimode
, res_h
);
40608 d
.nelt
= GET_MODE_NUNITS (qimode
);
40609 d
.one_operand_p
= false;
40610 d
.testing_p
= false;
40612 if (full_interleave
)
40614 /* For SSE2, we used an full interleave, so the desired
40615 results are in the even elements. */
40616 for (i
= 0; i
< 32; ++i
)
40621 /* For AVX, the interleave used above was not cross-lane. So the
40622 extraction is evens but with the second and third quarter swapped.
40623 Happily, that is even one insn shorter than even extraction. */
40624 for (i
= 0; i
< 32; ++i
)
40625 d
.perm
[i
] = i
* 2 + ((i
& 24) == 8 ? 16 : (i
& 24) == 16 ? -16 : 0);
40628 ok
= ix86_expand_vec_perm_const_1 (&d
);
40631 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
40632 gen_rtx_fmt_ee (code
, qimode
, op1
, op2
));
40636 ix86_expand_mul_widen_evenodd (rtx dest
, rtx op1
, rtx op2
,
40637 bool uns_p
, bool odd_p
)
40639 enum machine_mode mode
= GET_MODE (op1
);
40640 enum machine_mode wmode
= GET_MODE (dest
);
40643 /* We only play even/odd games with vectors of SImode. */
40644 gcc_assert (mode
== V4SImode
|| mode
== V8SImode
);
40646 /* If we're looking for the odd results, shift those members down to
40647 the even slots. For some cpus this is faster than a PSHUFD. */
40650 if (TARGET_XOP
&& mode
== V4SImode
)
40652 x
= force_reg (wmode
, CONST0_RTX (wmode
));
40653 emit_insn (gen_xop_pmacsdqh (dest
, op1
, op2
, x
));
40657 x
= GEN_INT (GET_MODE_UNIT_BITSIZE (mode
));
40658 op1
= expand_binop (wmode
, lshr_optab
, gen_lowpart (wmode
, op1
),
40659 x
, NULL
, 1, OPTAB_DIRECT
);
40660 op2
= expand_binop (wmode
, lshr_optab
, gen_lowpart (wmode
, op2
),
40661 x
, NULL
, 1, OPTAB_DIRECT
);
40662 op1
= gen_lowpart (mode
, op1
);
40663 op2
= gen_lowpart (mode
, op2
);
40666 if (mode
== V8SImode
)
40669 x
= gen_vec_widen_umult_even_v8si (dest
, op1
, op2
);
40671 x
= gen_vec_widen_smult_even_v8si (dest
, op1
, op2
);
40674 x
= gen_vec_widen_umult_even_v4si (dest
, op1
, op2
);
40675 else if (TARGET_SSE4_1
)
40676 x
= gen_sse4_1_mulv2siv2di3 (dest
, op1
, op2
);
40679 rtx s1
, s2
, t0
, t1
, t2
;
40681 /* The easiest way to implement this without PMULDQ is to go through
40682 the motions as if we are performing a full 64-bit multiply. With
40683 the exception that we need to do less shuffling of the elements. */
40685 /* Compute the sign-extension, aka highparts, of the two operands. */
40686 s1
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), GT
, CONST0_RTX (mode
),
40687 op1
, pc_rtx
, pc_rtx
);
40688 s2
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), GT
, CONST0_RTX (mode
),
40689 op2
, pc_rtx
, pc_rtx
);
40691 /* Multiply LO(A) * HI(B), and vice-versa. */
40692 t1
= gen_reg_rtx (wmode
);
40693 t2
= gen_reg_rtx (wmode
);
40694 emit_insn (gen_vec_widen_umult_even_v4si (t1
, s1
, op2
));
40695 emit_insn (gen_vec_widen_umult_even_v4si (t2
, s2
, op1
));
40697 /* Multiply LO(A) * LO(B). */
40698 t0
= gen_reg_rtx (wmode
);
40699 emit_insn (gen_vec_widen_umult_even_v4si (t0
, op1
, op2
));
40701 /* Combine and shift the highparts into place. */
40702 t1
= expand_binop (wmode
, add_optab
, t1
, t2
, t1
, 1, OPTAB_DIRECT
);
40703 t1
= expand_binop (wmode
, ashl_optab
, t1
, GEN_INT (32), t1
,
40706 /* Combine high and low parts. */
40707 force_expand_binop (wmode
, add_optab
, t0
, t1
, dest
, 1, OPTAB_DIRECT
);
40714 ix86_expand_mul_widen_hilo (rtx dest
, rtx op1
, rtx op2
,
40715 bool uns_p
, bool high_p
)
40717 enum machine_mode wmode
= GET_MODE (dest
);
40718 enum machine_mode mode
= GET_MODE (op1
);
40719 rtx t1
, t2
, t3
, t4
, mask
;
40724 t1
= gen_reg_rtx (mode
);
40725 t2
= gen_reg_rtx (mode
);
40726 if (TARGET_XOP
&& !uns_p
)
40728 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
40729 shuffle the elements once so that all elements are in the right
40730 place for immediate use: { A C B D }. */
40731 emit_insn (gen_sse2_pshufd_1 (t1
, op1
, const0_rtx
, const2_rtx
,
40732 const1_rtx
, GEN_INT (3)));
40733 emit_insn (gen_sse2_pshufd_1 (t2
, op2
, const0_rtx
, const2_rtx
,
40734 const1_rtx
, GEN_INT (3)));
40738 /* Put the elements into place for the multiply. */
40739 ix86_expand_vec_interleave (t1
, op1
, op1
, high_p
);
40740 ix86_expand_vec_interleave (t2
, op2
, op2
, high_p
);
40743 ix86_expand_mul_widen_evenodd (dest
, t1
, t2
, uns_p
, high_p
);
40747 /* Shuffle the elements between the lanes. After this we
40748 have { A B E F | C D G H } for each operand. */
40749 t1
= gen_reg_rtx (V4DImode
);
40750 t2
= gen_reg_rtx (V4DImode
);
40751 emit_insn (gen_avx2_permv4di_1 (t1
, gen_lowpart (V4DImode
, op1
),
40752 const0_rtx
, const2_rtx
,
40753 const1_rtx
, GEN_INT (3)));
40754 emit_insn (gen_avx2_permv4di_1 (t2
, gen_lowpart (V4DImode
, op2
),
40755 const0_rtx
, const2_rtx
,
40756 const1_rtx
, GEN_INT (3)));
40758 /* Shuffle the elements within the lanes. After this we
40759 have { A A B B | C C D D } or { E E F F | G G H H }. */
40760 t3
= gen_reg_rtx (V8SImode
);
40761 t4
= gen_reg_rtx (V8SImode
);
40762 mask
= GEN_INT (high_p
40763 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
40764 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
40765 emit_insn (gen_avx2_pshufdv3 (t3
, gen_lowpart (V8SImode
, t1
), mask
));
40766 emit_insn (gen_avx2_pshufdv3 (t4
, gen_lowpart (V8SImode
, t2
), mask
));
40768 ix86_expand_mul_widen_evenodd (dest
, t3
, t4
, uns_p
, false);
40773 t1
= expand_binop (mode
, smul_optab
, op1
, op2
, NULL_RTX
,
40774 uns_p
, OPTAB_DIRECT
);
40775 t2
= expand_binop (mode
,
40776 uns_p
? umul_highpart_optab
: smul_highpart_optab
,
40777 op1
, op2
, NULL_RTX
, uns_p
, OPTAB_DIRECT
);
40778 gcc_assert (t1
&& t2
);
40780 ix86_expand_vec_interleave (gen_lowpart (mode
, dest
), t1
, t2
, high_p
);
40785 t1
= gen_reg_rtx (wmode
);
40786 t2
= gen_reg_rtx (wmode
);
40787 ix86_expand_sse_unpack (t1
, op1
, uns_p
, high_p
);
40788 ix86_expand_sse_unpack (t2
, op2
, uns_p
, high_p
);
40790 emit_insn (gen_rtx_SET (VOIDmode
, dest
, gen_rtx_MULT (wmode
, t1
, t2
)));
40794 gcc_unreachable ();
40799 ix86_expand_sse2_mulv4si3 (rtx op0
, rtx op1
, rtx op2
)
40803 res_1
= gen_reg_rtx (V4SImode
);
40804 res_2
= gen_reg_rtx (V4SImode
);
40805 ix86_expand_mul_widen_evenodd (gen_lowpart (V2DImode
, res_1
),
40806 op1
, op2
, true, false);
40807 ix86_expand_mul_widen_evenodd (gen_lowpart (V2DImode
, res_2
),
40808 op1
, op2
, true, true);
40810 /* Move the results in element 2 down to element 1; we don't care
40811 what goes in elements 2 and 3. Then we can merge the parts
40812 back together with an interleave.
40814 Note that two other sequences were tried:
40815 (1) Use interleaves at the start instead of psrldq, which allows
40816 us to use a single shufps to merge things back at the end.
40817 (2) Use shufps here to combine the two vectors, then pshufd to
40818 put the elements in the correct order.
40819 In both cases the cost of the reformatting stall was too high
40820 and the overall sequence slower. */
40822 emit_insn (gen_sse2_pshufd_1 (res_1
, res_1
, const0_rtx
, const2_rtx
,
40823 const0_rtx
, const0_rtx
));
40824 emit_insn (gen_sse2_pshufd_1 (res_2
, res_2
, const0_rtx
, const2_rtx
,
40825 const0_rtx
, const0_rtx
));
40826 res_1
= emit_insn (gen_vec_interleave_lowv4si (op0
, res_1
, res_2
));
40828 set_unique_reg_note (res_1
, REG_EQUAL
, gen_rtx_MULT (V4SImode
, op1
, op2
));
40832 ix86_expand_sse2_mulvxdi3 (rtx op0
, rtx op1
, rtx op2
)
40834 enum machine_mode mode
= GET_MODE (op0
);
40835 rtx t1
, t2
, t3
, t4
, t5
, t6
;
40837 if (TARGET_XOP
&& mode
== V2DImode
)
40839 /* op1: A,B,C,D, op2: E,F,G,H */
40840 op1
= gen_lowpart (V4SImode
, op1
);
40841 op2
= gen_lowpart (V4SImode
, op2
);
40843 t1
= gen_reg_rtx (V4SImode
);
40844 t2
= gen_reg_rtx (V4SImode
);
40845 t3
= gen_reg_rtx (V2DImode
);
40846 t4
= gen_reg_rtx (V2DImode
);
40849 emit_insn (gen_sse2_pshufd_1 (t1
, op1
,
40855 /* t2: (B*E),(A*F),(D*G),(C*H) */
40856 emit_insn (gen_mulv4si3 (t2
, t1
, op2
));
40858 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
40859 emit_insn (gen_xop_phadddq (t3
, t2
));
40861 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
40862 emit_insn (gen_ashlv2di3 (t4
, t3
, GEN_INT (32)));
40864 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
40865 emit_insn (gen_xop_pmacsdql (op0
, op1
, op2
, t4
));
40869 enum machine_mode nmode
;
40870 rtx (*umul
) (rtx
, rtx
, rtx
);
40872 if (mode
== V2DImode
)
40874 umul
= gen_vec_widen_umult_even_v4si
;
40877 else if (mode
== V4DImode
)
40879 umul
= gen_vec_widen_umult_even_v8si
;
40883 gcc_unreachable ();
40886 /* Multiply low parts. */
40887 t1
= gen_reg_rtx (mode
);
40888 emit_insn (umul (t1
, gen_lowpart (nmode
, op1
), gen_lowpart (nmode
, op2
)));
40890 /* Shift input vectors right 32 bits so we can multiply high parts. */
40892 t2
= expand_binop (mode
, lshr_optab
, op1
, t6
, NULL
, 1, OPTAB_DIRECT
);
40893 t3
= expand_binop (mode
, lshr_optab
, op2
, t6
, NULL
, 1, OPTAB_DIRECT
);
40895 /* Multiply high parts by low parts. */
40896 t4
= gen_reg_rtx (mode
);
40897 t5
= gen_reg_rtx (mode
);
40898 emit_insn (umul (t4
, gen_lowpart (nmode
, t2
), gen_lowpart (nmode
, op2
)));
40899 emit_insn (umul (t5
, gen_lowpart (nmode
, t3
), gen_lowpart (nmode
, op1
)));
40901 /* Combine and shift the highparts back. */
40902 t4
= expand_binop (mode
, add_optab
, t4
, t5
, t4
, 1, OPTAB_DIRECT
);
40903 t4
= expand_binop (mode
, ashl_optab
, t4
, t6
, t4
, 1, OPTAB_DIRECT
);
40905 /* Combine high and low parts. */
40906 force_expand_binop (mode
, add_optab
, t1
, t4
, op0
, 1, OPTAB_DIRECT
);
40909 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
40910 gen_rtx_MULT (mode
, op1
, op2
));
40913 /* Expand an insert into a vector register through pinsr insn.
40914 Return true if successful. */
40917 ix86_expand_pinsr (rtx
*operands
)
40919 rtx dst
= operands
[0];
40920 rtx src
= operands
[3];
40922 unsigned int size
= INTVAL (operands
[1]);
40923 unsigned int pos
= INTVAL (operands
[2]);
40925 if (GET_CODE (dst
) == SUBREG
)
40927 pos
+= SUBREG_BYTE (dst
) * BITS_PER_UNIT
;
40928 dst
= SUBREG_REG (dst
);
40931 if (GET_CODE (src
) == SUBREG
)
40932 src
= SUBREG_REG (src
);
40934 switch (GET_MODE (dst
))
40941 enum machine_mode srcmode
, dstmode
;
40942 rtx (*pinsr
)(rtx
, rtx
, rtx
, rtx
);
40944 srcmode
= mode_for_size (size
, MODE_INT
, 0);
40949 if (!TARGET_SSE4_1
)
40951 dstmode
= V16QImode
;
40952 pinsr
= gen_sse4_1_pinsrb
;
40958 dstmode
= V8HImode
;
40959 pinsr
= gen_sse2_pinsrw
;
40963 if (!TARGET_SSE4_1
)
40965 dstmode
= V4SImode
;
40966 pinsr
= gen_sse4_1_pinsrd
;
40970 gcc_assert (TARGET_64BIT
);
40971 if (!TARGET_SSE4_1
)
40973 dstmode
= V2DImode
;
40974 pinsr
= gen_sse4_1_pinsrq
;
40981 dst
= gen_lowpart (dstmode
, dst
);
40982 src
= gen_lowpart (srcmode
, src
);
40986 emit_insn (pinsr (dst
, dst
, src
, GEN_INT (1 << pos
)));
40995 /* This function returns the calling abi specific va_list type node.
40996 It returns the FNDECL specific va_list type. */
40999 ix86_fn_abi_va_list (tree fndecl
)
41002 return va_list_type_node
;
41003 gcc_assert (fndecl
!= NULL_TREE
);
41005 if (ix86_function_abi ((const_tree
) fndecl
) == MS_ABI
)
41006 return ms_va_list_type_node
;
41008 return sysv_va_list_type_node
;
41011 /* Returns the canonical va_list type specified by TYPE. If there
41012 is no valid TYPE provided, it return NULL_TREE. */
41015 ix86_canonical_va_list_type (tree type
)
41019 /* Resolve references and pointers to va_list type. */
41020 if (TREE_CODE (type
) == MEM_REF
)
41021 type
= TREE_TYPE (type
);
41022 else if (POINTER_TYPE_P (type
) && POINTER_TYPE_P (TREE_TYPE(type
)))
41023 type
= TREE_TYPE (type
);
41024 else if (POINTER_TYPE_P (type
) && TREE_CODE (TREE_TYPE (type
)) == ARRAY_TYPE
)
41025 type
= TREE_TYPE (type
);
41027 if (TARGET_64BIT
&& va_list_type_node
!= NULL_TREE
)
41029 wtype
= va_list_type_node
;
41030 gcc_assert (wtype
!= NULL_TREE
);
41032 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
41034 /* If va_list is an array type, the argument may have decayed
41035 to a pointer type, e.g. by being passed to another function.
41036 In that case, unwrap both types so that we can compare the
41037 underlying records. */
41038 if (TREE_CODE (htype
) == ARRAY_TYPE
41039 || POINTER_TYPE_P (htype
))
41041 wtype
= TREE_TYPE (wtype
);
41042 htype
= TREE_TYPE (htype
);
41045 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
41046 return va_list_type_node
;
41047 wtype
= sysv_va_list_type_node
;
41048 gcc_assert (wtype
!= NULL_TREE
);
41050 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
41052 /* If va_list is an array type, the argument may have decayed
41053 to a pointer type, e.g. by being passed to another function.
41054 In that case, unwrap both types so that we can compare the
41055 underlying records. */
41056 if (TREE_CODE (htype
) == ARRAY_TYPE
41057 || POINTER_TYPE_P (htype
))
41059 wtype
= TREE_TYPE (wtype
);
41060 htype
= TREE_TYPE (htype
);
41063 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
41064 return sysv_va_list_type_node
;
41065 wtype
= ms_va_list_type_node
;
41066 gcc_assert (wtype
!= NULL_TREE
);
41068 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
41070 /* If va_list is an array type, the argument may have decayed
41071 to a pointer type, e.g. by being passed to another function.
41072 In that case, unwrap both types so that we can compare the
41073 underlying records. */
41074 if (TREE_CODE (htype
) == ARRAY_TYPE
41075 || POINTER_TYPE_P (htype
))
41077 wtype
= TREE_TYPE (wtype
);
41078 htype
= TREE_TYPE (htype
);
41081 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
41082 return ms_va_list_type_node
;
41085 return std_canonical_va_list_type (type
);
41088 /* Iterate through the target-specific builtin types for va_list.
41089 IDX denotes the iterator, *PTREE is set to the result type of
41090 the va_list builtin, and *PNAME to its internal type.
41091 Returns zero if there is no element for this index, otherwise
41092 IDX should be increased upon the next call.
41093 Note, do not iterate a base builtin's name like __builtin_va_list.
41094 Used from c_common_nodes_and_builtins. */
41097 ix86_enum_va_list (int idx
, const char **pname
, tree
*ptree
)
41107 *ptree
= ms_va_list_type_node
;
41108 *pname
= "__builtin_ms_va_list";
41112 *ptree
= sysv_va_list_type_node
;
41113 *pname
= "__builtin_sysv_va_list";
41121 #undef TARGET_SCHED_DISPATCH
41122 #define TARGET_SCHED_DISPATCH has_dispatch
41123 #undef TARGET_SCHED_DISPATCH_DO
41124 #define TARGET_SCHED_DISPATCH_DO do_dispatch
41125 #undef TARGET_SCHED_REASSOCIATION_WIDTH
41126 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
41127 #undef TARGET_SCHED_REORDER
41128 #define TARGET_SCHED_REORDER ix86_sched_reorder
41129 #undef TARGET_SCHED_ADJUST_PRIORITY
41130 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
41131 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
41132 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ix86_dependencies_evaluation_hook
41134 /* The size of the dispatch window is the total number of bytes of
41135 object code allowed in a window. */
41136 #define DISPATCH_WINDOW_SIZE 16
41138 /* Number of dispatch windows considered for scheduling. */
41139 #define MAX_DISPATCH_WINDOWS 3
41141 /* Maximum number of instructions in a window. */
41144 /* Maximum number of immediate operands in a window. */
41147 /* Maximum number of immediate bits allowed in a window. */
41148 #define MAX_IMM_SIZE 128
41150 /* Maximum number of 32 bit immediates allowed in a window. */
41151 #define MAX_IMM_32 4
41153 /* Maximum number of 64 bit immediates allowed in a window. */
41154 #define MAX_IMM_64 2
41156 /* Maximum total of loads or prefetches allowed in a window. */
41159 /* Maximum total of stores allowed in a window. */
41160 #define MAX_STORE 1
41166 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
41167 enum dispatch_group
{
41182 /* Number of allowable groups in a dispatch window. It is an array
41183 indexed by dispatch_group enum. 100 is used as a big number,
41184 because the number of these kind of operations does not have any
41185 effect in dispatch window, but we need them for other reasons in
41187 static unsigned int num_allowable_groups
[disp_last
] = {
41188 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG
, BIG
41191 char group_name
[disp_last
+ 1][16] = {
41192 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
41193 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
41194 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
41197 /* Instruction path. */
41200 path_single
, /* Single micro op. */
41201 path_double
, /* Double micro op. */
41202 path_multi
, /* Instructions with more than 2 micro op.. */
41206 /* sched_insn_info defines a window to the instructions scheduled in
41207 the basic block. It contains a pointer to the insn_info table and
41208 the instruction scheduled.
41210 Windows are allocated for each basic block and are linked
41212 typedef struct sched_insn_info_s
{
41214 enum dispatch_group group
;
41215 enum insn_path path
;
41220 /* Linked list of dispatch windows. This is a two way list of
41221 dispatch windows of a basic block. It contains information about
41222 the number of uops in the window and the total number of
41223 instructions and of bytes in the object code for this dispatch
41225 typedef struct dispatch_windows_s
{
41226 int num_insn
; /* Number of insn in the window. */
41227 int num_uops
; /* Number of uops in the window. */
41228 int window_size
; /* Number of bytes in the window. */
41229 int window_num
; /* Window number between 0 or 1. */
41230 int num_imm
; /* Number of immediates in an insn. */
41231 int num_imm_32
; /* Number of 32 bit immediates in an insn. */
41232 int num_imm_64
; /* Number of 64 bit immediates in an insn. */
41233 int imm_size
; /* Total immediates in the window. */
41234 int num_loads
; /* Total memory loads in the window. */
41235 int num_stores
; /* Total memory stores in the window. */
41236 int violation
; /* Violation exists in window. */
41237 sched_insn_info
*window
; /* Pointer to the window. */
41238 struct dispatch_windows_s
*next
;
41239 struct dispatch_windows_s
*prev
;
41240 } dispatch_windows
;
41242 /* Immediate valuse used in an insn. */
41243 typedef struct imm_info_s
41250 static dispatch_windows
*dispatch_window_list
;
41251 static dispatch_windows
*dispatch_window_list1
;
41253 /* Get dispatch group of insn. */
41255 static enum dispatch_group
41256 get_mem_group (rtx insn
)
41258 enum attr_memory memory
;
41260 if (INSN_CODE (insn
) < 0)
41261 return disp_no_group
;
41262 memory
= get_attr_memory (insn
);
41263 if (memory
== MEMORY_STORE
)
41266 if (memory
== MEMORY_LOAD
)
41269 if (memory
== MEMORY_BOTH
)
41270 return disp_load_store
;
41272 return disp_no_group
;
41275 /* Return true if insn is a compare instruction. */
41280 enum attr_type type
;
41282 type
= get_attr_type (insn
);
41283 return (type
== TYPE_TEST
41284 || type
== TYPE_ICMP
41285 || type
== TYPE_FCMP
41286 || GET_CODE (PATTERN (insn
)) == COMPARE
);
41289 /* Return true if a dispatch violation encountered. */
41292 dispatch_violation (void)
41294 if (dispatch_window_list
->next
)
41295 return dispatch_window_list
->next
->violation
;
41296 return dispatch_window_list
->violation
;
41299 /* Return true if insn is a branch instruction. */
41302 is_branch (rtx insn
)
41304 return (CALL_P (insn
) || JUMP_P (insn
));
41307 /* Return true if insn is a prefetch instruction. */
41310 is_prefetch (rtx insn
)
41312 return NONJUMP_INSN_P (insn
) && GET_CODE (PATTERN (insn
)) == PREFETCH
;
41315 /* This function initializes a dispatch window and the list container holding a
41316 pointer to the window. */
41319 init_window (int window_num
)
41322 dispatch_windows
*new_list
;
41324 if (window_num
== 0)
41325 new_list
= dispatch_window_list
;
41327 new_list
= dispatch_window_list1
;
41329 new_list
->num_insn
= 0;
41330 new_list
->num_uops
= 0;
41331 new_list
->window_size
= 0;
41332 new_list
->next
= NULL
;
41333 new_list
->prev
= NULL
;
41334 new_list
->window_num
= window_num
;
41335 new_list
->num_imm
= 0;
41336 new_list
->num_imm_32
= 0;
41337 new_list
->num_imm_64
= 0;
41338 new_list
->imm_size
= 0;
41339 new_list
->num_loads
= 0;
41340 new_list
->num_stores
= 0;
41341 new_list
->violation
= false;
41343 for (i
= 0; i
< MAX_INSN
; i
++)
41345 new_list
->window
[i
].insn
= NULL
;
41346 new_list
->window
[i
].group
= disp_no_group
;
41347 new_list
->window
[i
].path
= no_path
;
41348 new_list
->window
[i
].byte_len
= 0;
41349 new_list
->window
[i
].imm_bytes
= 0;
41354 /* This function allocates and initializes a dispatch window and the
41355 list container holding a pointer to the window. */
41357 static dispatch_windows
*
41358 allocate_window (void)
41360 dispatch_windows
*new_list
= XNEW (struct dispatch_windows_s
);
41361 new_list
->window
= XNEWVEC (struct sched_insn_info_s
, MAX_INSN
+ 1);
41366 /* This routine initializes the dispatch scheduling information. It
41367 initiates building dispatch scheduler tables and constructs the
41368 first dispatch window. */
41371 init_dispatch_sched (void)
41373 /* Allocate a dispatch list and a window. */
41374 dispatch_window_list
= allocate_window ();
41375 dispatch_window_list1
= allocate_window ();
41380 /* This function returns true if a branch is detected. End of a basic block
41381 does not have to be a branch, but here we assume only branches end a
41385 is_end_basic_block (enum dispatch_group group
)
41387 return group
== disp_branch
;
41390 /* This function is called when the end of a window processing is reached. */
41393 process_end_window (void)
41395 gcc_assert (dispatch_window_list
->num_insn
<= MAX_INSN
);
41396 if (dispatch_window_list
->next
)
41398 gcc_assert (dispatch_window_list1
->num_insn
<= MAX_INSN
);
41399 gcc_assert (dispatch_window_list
->window_size
41400 + dispatch_window_list1
->window_size
<= 48);
41406 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
41407 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
41408 for 48 bytes of instructions. Note that these windows are not dispatch
41409 windows that their sizes are DISPATCH_WINDOW_SIZE. */
41411 static dispatch_windows
*
41412 allocate_next_window (int window_num
)
41414 if (window_num
== 0)
41416 if (dispatch_window_list
->next
)
41419 return dispatch_window_list
;
41422 dispatch_window_list
->next
= dispatch_window_list1
;
41423 dispatch_window_list1
->prev
= dispatch_window_list
;
41425 return dispatch_window_list1
;
41428 /* Increment the number of immediate operands of an instruction. */
41431 find_constant_1 (rtx
*in_rtx
, imm_info
*imm_values
)
41436 switch ( GET_CODE (*in_rtx
))
41441 (imm_values
->imm
)++;
41442 if (x86_64_immediate_operand (*in_rtx
, SImode
))
41443 (imm_values
->imm32
)++;
41445 (imm_values
->imm64
)++;
41449 (imm_values
->imm
)++;
41450 (imm_values
->imm64
)++;
41454 if (LABEL_KIND (*in_rtx
) == LABEL_NORMAL
)
41456 (imm_values
->imm
)++;
41457 (imm_values
->imm32
)++;
41468 /* Compute number of immediate operands of an instruction. */
41471 find_constant (rtx in_rtx
, imm_info
*imm_values
)
41473 for_each_rtx (INSN_P (in_rtx
) ? &PATTERN (in_rtx
) : &in_rtx
,
41474 (rtx_function
) find_constant_1
, (void *) imm_values
);
41477 /* Return total size of immediate operands of an instruction along with number
41478 of corresponding immediate-operands. It initializes its parameters to zero
41479 befor calling FIND_CONSTANT.
41480 INSN is the input instruction. IMM is the total of immediates.
41481 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
41485 get_num_immediates (rtx insn
, int *imm
, int *imm32
, int *imm64
)
41487 imm_info imm_values
= {0, 0, 0};
41489 find_constant (insn
, &imm_values
);
41490 *imm
= imm_values
.imm
;
41491 *imm32
= imm_values
.imm32
;
41492 *imm64
= imm_values
.imm64
;
41493 return imm_values
.imm32
* 4 + imm_values
.imm64
* 8;
41496 /* This function indicates if an operand of an instruction is an
41500 has_immediate (rtx insn
)
41502 int num_imm_operand
;
41503 int num_imm32_operand
;
41504 int num_imm64_operand
;
41507 return get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
41508 &num_imm64_operand
);
41512 /* Return single or double path for instructions. */
41514 static enum insn_path
41515 get_insn_path (rtx insn
)
41517 enum attr_amdfam10_decode path
= get_attr_amdfam10_decode (insn
);
41519 if ((int)path
== 0)
41520 return path_single
;
41522 if ((int)path
== 1)
41523 return path_double
;
41528 /* Return insn dispatch group. */
41530 static enum dispatch_group
41531 get_insn_group (rtx insn
)
41533 enum dispatch_group group
= get_mem_group (insn
);
41537 if (is_branch (insn
))
41538 return disp_branch
;
41543 if (has_immediate (insn
))
41546 if (is_prefetch (insn
))
41547 return disp_prefetch
;
41549 return disp_no_group
;
41552 /* Count number of GROUP restricted instructions in a dispatch
41553 window WINDOW_LIST. */
41556 count_num_restricted (rtx insn
, dispatch_windows
*window_list
)
41558 enum dispatch_group group
= get_insn_group (insn
);
41560 int num_imm_operand
;
41561 int num_imm32_operand
;
41562 int num_imm64_operand
;
41564 if (group
== disp_no_group
)
41567 if (group
== disp_imm
)
41569 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
41570 &num_imm64_operand
);
41571 if (window_list
->imm_size
+ imm_size
> MAX_IMM_SIZE
41572 || num_imm_operand
+ window_list
->num_imm
> MAX_IMM
41573 || (num_imm32_operand
> 0
41574 && (window_list
->num_imm_32
+ num_imm32_operand
> MAX_IMM_32
41575 || window_list
->num_imm_64
* 2 + num_imm32_operand
> MAX_IMM_32
))
41576 || (num_imm64_operand
> 0
41577 && (window_list
->num_imm_64
+ num_imm64_operand
> MAX_IMM_64
41578 || window_list
->num_imm_32
+ num_imm64_operand
* 2 > MAX_IMM_32
))
41579 || (window_list
->imm_size
+ imm_size
== MAX_IMM_SIZE
41580 && num_imm64_operand
> 0
41581 && ((window_list
->num_imm_64
> 0
41582 && window_list
->num_insn
>= 2)
41583 || window_list
->num_insn
>= 3)))
41589 if ((group
== disp_load_store
41590 && (window_list
->num_loads
>= MAX_LOAD
41591 || window_list
->num_stores
>= MAX_STORE
))
41592 || ((group
== disp_load
41593 || group
== disp_prefetch
)
41594 && window_list
->num_loads
>= MAX_LOAD
)
41595 || (group
== disp_store
41596 && window_list
->num_stores
>= MAX_STORE
))
41602 /* This function returns true if insn satisfies dispatch rules on the
41603 last window scheduled. */
41606 fits_dispatch_window (rtx insn
)
41608 dispatch_windows
*window_list
= dispatch_window_list
;
41609 dispatch_windows
*window_list_next
= dispatch_window_list
->next
;
41610 unsigned int num_restrict
;
41611 enum dispatch_group group
= get_insn_group (insn
);
41612 enum insn_path path
= get_insn_path (insn
);
41615 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
41616 instructions should be given the lowest priority in the
41617 scheduling process in Haifa scheduler to make sure they will be
41618 scheduled in the same dispatch window as the reference to them. */
41619 if (group
== disp_jcc
|| group
== disp_cmp
)
41622 /* Check nonrestricted. */
41623 if (group
== disp_no_group
|| group
== disp_branch
)
41626 /* Get last dispatch window. */
41627 if (window_list_next
)
41628 window_list
= window_list_next
;
41630 if (window_list
->window_num
== 1)
41632 sum
= window_list
->prev
->window_size
+ window_list
->window_size
;
41635 || (min_insn_size (insn
) + sum
) >= 48)
41636 /* Window 1 is full. Go for next window. */
41640 num_restrict
= count_num_restricted (insn
, window_list
);
41642 if (num_restrict
> num_allowable_groups
[group
])
41645 /* See if it fits in the first window. */
41646 if (window_list
->window_num
== 0)
41648 /* The first widow should have only single and double path
41650 if (path
== path_double
41651 && (window_list
->num_uops
+ 2) > MAX_INSN
)
41653 else if (path
!= path_single
)
41659 /* Add an instruction INSN with NUM_UOPS micro-operations to the
41660 dispatch window WINDOW_LIST. */
41663 add_insn_window (rtx insn
, dispatch_windows
*window_list
, int num_uops
)
41665 int byte_len
= min_insn_size (insn
);
41666 int num_insn
= window_list
->num_insn
;
41668 sched_insn_info
*window
= window_list
->window
;
41669 enum dispatch_group group
= get_insn_group (insn
);
41670 enum insn_path path
= get_insn_path (insn
);
41671 int num_imm_operand
;
41672 int num_imm32_operand
;
41673 int num_imm64_operand
;
41675 if (!window_list
->violation
&& group
!= disp_cmp
41676 && !fits_dispatch_window (insn
))
41677 window_list
->violation
= true;
41679 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
41680 &num_imm64_operand
);
41682 /* Initialize window with new instruction. */
41683 window
[num_insn
].insn
= insn
;
41684 window
[num_insn
].byte_len
= byte_len
;
41685 window
[num_insn
].group
= group
;
41686 window
[num_insn
].path
= path
;
41687 window
[num_insn
].imm_bytes
= imm_size
;
41689 window_list
->window_size
+= byte_len
;
41690 window_list
->num_insn
= num_insn
+ 1;
41691 window_list
->num_uops
= window_list
->num_uops
+ num_uops
;
41692 window_list
->imm_size
+= imm_size
;
41693 window_list
->num_imm
+= num_imm_operand
;
41694 window_list
->num_imm_32
+= num_imm32_operand
;
41695 window_list
->num_imm_64
+= num_imm64_operand
;
41697 if (group
== disp_store
)
41698 window_list
->num_stores
+= 1;
41699 else if (group
== disp_load
41700 || group
== disp_prefetch
)
41701 window_list
->num_loads
+= 1;
41702 else if (group
== disp_load_store
)
41704 window_list
->num_stores
+= 1;
41705 window_list
->num_loads
+= 1;
41709 /* Adds a scheduled instruction, INSN, to the current dispatch window.
41710 If the total bytes of instructions or the number of instructions in
41711 the window exceed allowable, it allocates a new window. */
41714 add_to_dispatch_window (rtx insn
)
41717 dispatch_windows
*window_list
;
41718 dispatch_windows
*next_list
;
41719 dispatch_windows
*window0_list
;
41720 enum insn_path path
;
41721 enum dispatch_group insn_group
;
41729 if (INSN_CODE (insn
) < 0)
41732 byte_len
= min_insn_size (insn
);
41733 window_list
= dispatch_window_list
;
41734 next_list
= window_list
->next
;
41735 path
= get_insn_path (insn
);
41736 insn_group
= get_insn_group (insn
);
41738 /* Get the last dispatch window. */
41740 window_list
= dispatch_window_list
->next
;
41742 if (path
== path_single
)
41744 else if (path
== path_double
)
41747 insn_num_uops
= (int) path
;
41749 /* If current window is full, get a new window.
41750 Window number zero is full, if MAX_INSN uops are scheduled in it.
41751 Window number one is full, if window zero's bytes plus window
41752 one's bytes is 32, or if the bytes of the new instruction added
41753 to the total makes it greater than 48, or it has already MAX_INSN
41754 instructions in it. */
41755 num_insn
= window_list
->num_insn
;
41756 num_uops
= window_list
->num_uops
;
41757 window_num
= window_list
->window_num
;
41758 insn_fits
= fits_dispatch_window (insn
);
41760 if (num_insn
>= MAX_INSN
41761 || num_uops
+ insn_num_uops
> MAX_INSN
41764 window_num
= ~window_num
& 1;
41765 window_list
= allocate_next_window (window_num
);
41768 if (window_num
== 0)
41770 add_insn_window (insn
, window_list
, insn_num_uops
);
41771 if (window_list
->num_insn
>= MAX_INSN
41772 && insn_group
== disp_branch
)
41774 process_end_window ();
41778 else if (window_num
== 1)
41780 window0_list
= window_list
->prev
;
41781 sum
= window0_list
->window_size
+ window_list
->window_size
;
41783 || (byte_len
+ sum
) >= 48)
41785 process_end_window ();
41786 window_list
= dispatch_window_list
;
41789 add_insn_window (insn
, window_list
, insn_num_uops
);
41792 gcc_unreachable ();
41794 if (is_end_basic_block (insn_group
))
41796 /* End of basic block is reached do end-basic-block process. */
41797 process_end_window ();
41802 /* Print the dispatch window, WINDOW_NUM, to FILE. */
41804 DEBUG_FUNCTION
static void
41805 debug_dispatch_window_file (FILE *file
, int window_num
)
41807 dispatch_windows
*list
;
41810 if (window_num
== 0)
41811 list
= dispatch_window_list
;
41813 list
= dispatch_window_list1
;
41815 fprintf (file
, "Window #%d:\n", list
->window_num
);
41816 fprintf (file
, " num_insn = %d, num_uops = %d, window_size = %d\n",
41817 list
->num_insn
, list
->num_uops
, list
->window_size
);
41818 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
41819 list
->num_imm
, list
->num_imm_32
, list
->num_imm_64
, list
->imm_size
);
41821 fprintf (file
, " num_loads = %d, num_stores = %d\n", list
->num_loads
,
41823 fprintf (file
, " insn info:\n");
41825 for (i
= 0; i
< MAX_INSN
; i
++)
41827 if (!list
->window
[i
].insn
)
41829 fprintf (file
, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
41830 i
, group_name
[list
->window
[i
].group
],
41831 i
, (void *)list
->window
[i
].insn
,
41832 i
, list
->window
[i
].path
,
41833 i
, list
->window
[i
].byte_len
,
41834 i
, list
->window
[i
].imm_bytes
);
41838 /* Print to stdout a dispatch window. */
41840 DEBUG_FUNCTION
void
41841 debug_dispatch_window (int window_num
)
41843 debug_dispatch_window_file (stdout
, window_num
);
41846 /* Print INSN dispatch information to FILE. */
41848 DEBUG_FUNCTION
static void
41849 debug_insn_dispatch_info_file (FILE *file
, rtx insn
)
41852 enum insn_path path
;
41853 enum dispatch_group group
;
41855 int num_imm_operand
;
41856 int num_imm32_operand
;
41857 int num_imm64_operand
;
41859 if (INSN_CODE (insn
) < 0)
41862 byte_len
= min_insn_size (insn
);
41863 path
= get_insn_path (insn
);
41864 group
= get_insn_group (insn
);
41865 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
41866 &num_imm64_operand
);
41868 fprintf (file
, " insn info:\n");
41869 fprintf (file
, " group = %s, path = %d, byte_len = %d\n",
41870 group_name
[group
], path
, byte_len
);
41871 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
41872 num_imm_operand
, num_imm32_operand
, num_imm64_operand
, imm_size
);
41875 /* Print to STDERR the status of the ready list with respect to
41876 dispatch windows. */
41878 DEBUG_FUNCTION
void
41879 debug_ready_dispatch (void)
41882 int no_ready
= number_in_ready ();
41884 fprintf (stdout
, "Number of ready: %d\n", no_ready
);
41886 for (i
= 0; i
< no_ready
; i
++)
41887 debug_insn_dispatch_info_file (stdout
, get_ready_element (i
));
41890 /* This routine is the driver of the dispatch scheduler. */
41893 do_dispatch (rtx insn
, int mode
)
41895 if (mode
== DISPATCH_INIT
)
41896 init_dispatch_sched ();
41897 else if (mode
== ADD_TO_DISPATCH_WINDOW
)
41898 add_to_dispatch_window (insn
);
41901 /* Return TRUE if Dispatch Scheduling is supported. */
41904 has_dispatch (rtx insn
, int action
)
41906 if ((TARGET_BDVER1
|| TARGET_BDVER2
|| TARGET_BDVER3
)
41907 && flag_dispatch_scheduler
)
41913 case IS_DISPATCH_ON
:
41918 return is_cmp (insn
);
41920 case DISPATCH_VIOLATION
:
41921 return dispatch_violation ();
41923 case FITS_DISPATCH_WINDOW
:
41924 return fits_dispatch_window (insn
);
41930 /* Implementation of reassociation_width target hook used by
41931 reassoc phase to identify parallelism level in reassociated
41932 tree. Statements tree_code is passed in OPC. Arguments type
41935 Currently parallel reassociation is enabled for Atom
41936 processors only and we set reassociation width to be 2
41937 because Atom may issue up to 2 instructions per cycle.
41939 Return value should be fixed if parallel reassociation is
41940 enabled for other processors. */
41943 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED
,
41944 enum machine_mode mode
)
41948 if (INTEGRAL_MODE_P (mode
) && TARGET_REASSOC_INT_TO_PARALLEL
)
41950 else if (FLOAT_MODE_P (mode
) && TARGET_REASSOC_FP_TO_PARALLEL
)
41956 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
41957 place emms and femms instructions. */
41959 static enum machine_mode
41960 ix86_preferred_simd_mode (enum machine_mode mode
)
41968 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V32QImode
: V16QImode
;
41970 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V16HImode
: V8HImode
;
41972 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V8SImode
: V4SImode
;
41974 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V4DImode
: V2DImode
;
41977 if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
41983 if (!TARGET_VECTORIZE_DOUBLE
)
41985 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
41987 else if (TARGET_SSE2
)
41996 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
41999 static unsigned int
42000 ix86_autovectorize_vector_sizes (void)
42002 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? 32 | 16 : 0;
42007 /* Return class of registers which could be used for pseudo of MODE
42008 and of class RCLASS for spilling instead of memory. Return NO_REGS
42009 if it is not possible or non-profitable. */
42011 ix86_spill_class (reg_class_t rclass
, enum machine_mode mode
)
42013 if (TARGET_SSE
&& TARGET_GENERAL_REGS_SSE_SPILL
&& ! TARGET_MMX
42014 && hard_reg_set_subset_p (reg_class_contents
[rclass
],
42015 reg_class_contents
[GENERAL_REGS
])
42016 && (mode
== SImode
|| (TARGET_64BIT
&& mode
== DImode
)))
42021 /* Implement targetm.vectorize.init_cost. */
42024 ix86_init_cost (struct loop
*loop_info ATTRIBUTE_UNUSED
)
42026 unsigned *cost
= XNEWVEC (unsigned, 3);
42027 cost
[vect_prologue
] = cost
[vect_body
] = cost
[vect_epilogue
] = 0;
42031 /* Implement targetm.vectorize.add_stmt_cost. */
42034 ix86_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
42035 struct _stmt_vec_info
*stmt_info
, int misalign
,
42036 enum vect_cost_model_location where
)
42038 unsigned *cost
= (unsigned *) data
;
42039 unsigned retval
= 0;
42041 if (flag_vect_cost_model
)
42043 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
42044 int stmt_cost
= ix86_builtin_vectorization_cost (kind
, vectype
, misalign
);
42046 /* Statements in an inner loop relative to the loop being
42047 vectorized are weighted more heavily. The value here is
42048 arbitrary and could potentially be improved with analysis. */
42049 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
42050 count
*= 50; /* FIXME. */
42052 retval
= (unsigned) (count
* stmt_cost
);
42053 cost
[where
] += retval
;
42059 /* Implement targetm.vectorize.finish_cost. */
42062 ix86_finish_cost (void *data
, unsigned *prologue_cost
,
42063 unsigned *body_cost
, unsigned *epilogue_cost
)
42065 unsigned *cost
= (unsigned *) data
;
42066 *prologue_cost
= cost
[vect_prologue
];
42067 *body_cost
= cost
[vect_body
];
42068 *epilogue_cost
= cost
[vect_epilogue
];
42071 /* Implement targetm.vectorize.destroy_cost_data. */
42074 ix86_destroy_cost_data (void *data
)
42079 /* Validate target specific memory model bits in VAL. */
42081 static unsigned HOST_WIDE_INT
42082 ix86_memmodel_check (unsigned HOST_WIDE_INT val
)
42084 unsigned HOST_WIDE_INT model
= val
& MEMMODEL_MASK
;
42087 if (val
& ~(unsigned HOST_WIDE_INT
)(IX86_HLE_ACQUIRE
|IX86_HLE_RELEASE
42089 || ((val
& IX86_HLE_ACQUIRE
) && (val
& IX86_HLE_RELEASE
)))
42091 warning (OPT_Winvalid_memory_model
,
42092 "Unknown architecture specific memory model");
42093 return MEMMODEL_SEQ_CST
;
42095 strong
= (model
== MEMMODEL_ACQ_REL
|| model
== MEMMODEL_SEQ_CST
);
42096 if (val
& IX86_HLE_ACQUIRE
&& !(model
== MEMMODEL_ACQUIRE
|| strong
))
42098 warning (OPT_Winvalid_memory_model
,
42099 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
42100 return MEMMODEL_SEQ_CST
| IX86_HLE_ACQUIRE
;
42102 if (val
& IX86_HLE_RELEASE
&& !(model
== MEMMODEL_RELEASE
|| strong
))
42104 warning (OPT_Winvalid_memory_model
,
42105 "HLE_RELEASE not used with RELEASE or stronger memory model");
42106 return MEMMODEL_SEQ_CST
| IX86_HLE_RELEASE
;
42111 /* Initialize the GCC target structure. */
42112 #undef TARGET_RETURN_IN_MEMORY
42113 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
42115 #undef TARGET_LEGITIMIZE_ADDRESS
42116 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
42118 #undef TARGET_ATTRIBUTE_TABLE
42119 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
42120 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42121 # undef TARGET_MERGE_DECL_ATTRIBUTES
42122 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
42125 #undef TARGET_COMP_TYPE_ATTRIBUTES
42126 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
42128 #undef TARGET_INIT_BUILTINS
42129 #define TARGET_INIT_BUILTINS ix86_init_builtins
42130 #undef TARGET_BUILTIN_DECL
42131 #define TARGET_BUILTIN_DECL ix86_builtin_decl
42132 #undef TARGET_EXPAND_BUILTIN
42133 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
42135 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
42136 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
42137 ix86_builtin_vectorized_function
42139 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
42140 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
42142 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
42143 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
42145 #undef TARGET_VECTORIZE_BUILTIN_GATHER
42146 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
42148 #undef TARGET_BUILTIN_RECIPROCAL
42149 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
42151 #undef TARGET_ASM_FUNCTION_EPILOGUE
42152 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
42154 #undef TARGET_ENCODE_SECTION_INFO
42155 #ifndef SUBTARGET_ENCODE_SECTION_INFO
42156 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
42158 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
42161 #undef TARGET_ASM_OPEN_PAREN
42162 #define TARGET_ASM_OPEN_PAREN ""
42163 #undef TARGET_ASM_CLOSE_PAREN
42164 #define TARGET_ASM_CLOSE_PAREN ""
42166 #undef TARGET_ASM_BYTE_OP
42167 #define TARGET_ASM_BYTE_OP ASM_BYTE
42169 #undef TARGET_ASM_ALIGNED_HI_OP
42170 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
42171 #undef TARGET_ASM_ALIGNED_SI_OP
42172 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
42174 #undef TARGET_ASM_ALIGNED_DI_OP
42175 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
42178 #undef TARGET_PROFILE_BEFORE_PROLOGUE
42179 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
42181 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
42182 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
42184 #undef TARGET_ASM_UNALIGNED_HI_OP
42185 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
42186 #undef TARGET_ASM_UNALIGNED_SI_OP
42187 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
42188 #undef TARGET_ASM_UNALIGNED_DI_OP
42189 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
42191 #undef TARGET_PRINT_OPERAND
42192 #define TARGET_PRINT_OPERAND ix86_print_operand
42193 #undef TARGET_PRINT_OPERAND_ADDRESS
42194 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
42195 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
42196 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
42197 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
42198 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
42200 #undef TARGET_SCHED_INIT_GLOBAL
42201 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
42202 #undef TARGET_SCHED_ADJUST_COST
42203 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
42204 #undef TARGET_SCHED_ISSUE_RATE
42205 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
42206 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
42207 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
42208 ia32_multipass_dfa_lookahead
42210 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
42211 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
42213 #undef TARGET_MEMMODEL_CHECK
42214 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
42217 #undef TARGET_HAVE_TLS
42218 #define TARGET_HAVE_TLS true
42220 #undef TARGET_CANNOT_FORCE_CONST_MEM
42221 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
42222 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
42223 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
42225 #undef TARGET_DELEGITIMIZE_ADDRESS
42226 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
42228 #undef TARGET_MS_BITFIELD_LAYOUT_P
42229 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
42232 #undef TARGET_BINDS_LOCAL_P
42233 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
42235 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42236 #undef TARGET_BINDS_LOCAL_P
42237 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
42240 #undef TARGET_ASM_OUTPUT_MI_THUNK
42241 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
42242 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
42243 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
42245 #undef TARGET_ASM_FILE_START
42246 #define TARGET_ASM_FILE_START x86_file_start
42248 #undef TARGET_OPTION_OVERRIDE
42249 #define TARGET_OPTION_OVERRIDE ix86_option_override
42251 #undef TARGET_REGISTER_MOVE_COST
42252 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
42253 #undef TARGET_MEMORY_MOVE_COST
42254 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
42255 #undef TARGET_RTX_COSTS
42256 #define TARGET_RTX_COSTS ix86_rtx_costs
42257 #undef TARGET_ADDRESS_COST
42258 #define TARGET_ADDRESS_COST ix86_address_cost
42260 #undef TARGET_FIXED_CONDITION_CODE_REGS
42261 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
42262 #undef TARGET_CC_MODES_COMPATIBLE
42263 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
42265 #undef TARGET_MACHINE_DEPENDENT_REORG
42266 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
42268 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
42269 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
42271 #undef TARGET_BUILD_BUILTIN_VA_LIST
42272 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
42274 #undef TARGET_FOLD_BUILTIN
42275 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
42277 #undef TARGET_COMPARE_VERSION_PRIORITY
42278 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
42280 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
42281 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
42282 ix86_generate_version_dispatcher_body
42284 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
42285 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
42286 ix86_get_function_versions_dispatcher
42288 #undef TARGET_ENUM_VA_LIST_P
42289 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
42291 #undef TARGET_FN_ABI_VA_LIST
42292 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
42294 #undef TARGET_CANONICAL_VA_LIST_TYPE
42295 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
42297 #undef TARGET_EXPAND_BUILTIN_VA_START
42298 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
42300 #undef TARGET_MD_ASM_CLOBBERS
42301 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
42303 #undef TARGET_PROMOTE_PROTOTYPES
42304 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
42305 #undef TARGET_STRUCT_VALUE_RTX
42306 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
42307 #undef TARGET_SETUP_INCOMING_VARARGS
42308 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
42309 #undef TARGET_MUST_PASS_IN_STACK
42310 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
42311 #undef TARGET_FUNCTION_ARG_ADVANCE
42312 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
42313 #undef TARGET_FUNCTION_ARG
42314 #define TARGET_FUNCTION_ARG ix86_function_arg
42315 #undef TARGET_FUNCTION_ARG_BOUNDARY
42316 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
42317 #undef TARGET_PASS_BY_REFERENCE
42318 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
42319 #undef TARGET_INTERNAL_ARG_POINTER
42320 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
42321 #undef TARGET_UPDATE_STACK_BOUNDARY
42322 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
42323 #undef TARGET_GET_DRAP_RTX
42324 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
42325 #undef TARGET_STRICT_ARGUMENT_NAMING
42326 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
42327 #undef TARGET_STATIC_CHAIN
42328 #define TARGET_STATIC_CHAIN ix86_static_chain
42329 #undef TARGET_TRAMPOLINE_INIT
42330 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
42331 #undef TARGET_RETURN_POPS_ARGS
42332 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
42334 #undef TARGET_LEGITIMATE_COMBINED_INSN
42335 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
42337 #undef TARGET_ASAN_SHADOW_OFFSET
42338 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
42340 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
42341 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
42343 #undef TARGET_SCALAR_MODE_SUPPORTED_P
42344 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
42346 #undef TARGET_VECTOR_MODE_SUPPORTED_P
42347 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
42349 #undef TARGET_C_MODE_FOR_SUFFIX
42350 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
42353 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
42354 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
42357 #ifdef SUBTARGET_INSERT_ATTRIBUTES
42358 #undef TARGET_INSERT_ATTRIBUTES
42359 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
42362 #undef TARGET_MANGLE_TYPE
42363 #define TARGET_MANGLE_TYPE ix86_mangle_type
42366 #undef TARGET_STACK_PROTECT_FAIL
42367 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
42370 #undef TARGET_FUNCTION_VALUE
42371 #define TARGET_FUNCTION_VALUE ix86_function_value
42373 #undef TARGET_FUNCTION_VALUE_REGNO_P
42374 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
42376 #undef TARGET_PROMOTE_FUNCTION_MODE
42377 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
42379 #undef TARGET_MEMBER_TYPE_FORCES_BLK
42380 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
42382 #undef TARGET_INSTANTIATE_DECLS
42383 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
42385 #undef TARGET_SECONDARY_RELOAD
42386 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
42388 #undef TARGET_CLASS_MAX_NREGS
42389 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
42391 #undef TARGET_PREFERRED_RELOAD_CLASS
42392 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
42393 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
42394 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
42395 #undef TARGET_CLASS_LIKELY_SPILLED_P
42396 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
42398 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
42399 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
42400 ix86_builtin_vectorization_cost
42401 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
42402 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
42403 ix86_vectorize_vec_perm_const_ok
42404 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
42405 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
42406 ix86_preferred_simd_mode
42407 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
42408 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
42409 ix86_autovectorize_vector_sizes
42410 #undef TARGET_VECTORIZE_INIT_COST
42411 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
42412 #undef TARGET_VECTORIZE_ADD_STMT_COST
42413 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
42414 #undef TARGET_VECTORIZE_FINISH_COST
42415 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
42416 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
42417 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
42419 #undef TARGET_SET_CURRENT_FUNCTION
42420 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
42422 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
42423 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
42425 #undef TARGET_OPTION_SAVE
42426 #define TARGET_OPTION_SAVE ix86_function_specific_save
42428 #undef TARGET_OPTION_RESTORE
42429 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
42431 #undef TARGET_OPTION_PRINT
42432 #define TARGET_OPTION_PRINT ix86_function_specific_print
42434 #undef TARGET_OPTION_FUNCTION_VERSIONS
42435 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
42437 #undef TARGET_OPTION_SUPPORTS_FUNCTION_VERSIONS
42438 #define TARGET_OPTION_SUPPORTS_FUNCTION_VERSIONS \
42439 ix86_supports_function_versions
42441 #undef TARGET_CAN_INLINE_P
42442 #define TARGET_CAN_INLINE_P ix86_can_inline_p
42444 #undef TARGET_EXPAND_TO_RTL_HOOK
42445 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
42447 #undef TARGET_LEGITIMATE_ADDRESS_P
42448 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
42450 #undef TARGET_LRA_P
42451 #define TARGET_LRA_P hook_bool_void_true
42453 #undef TARGET_REGISTER_PRIORITY
42454 #define TARGET_REGISTER_PRIORITY ix86_register_priority
42456 #undef TARGET_LEGITIMATE_CONSTANT_P
42457 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
42459 #undef TARGET_FRAME_POINTER_REQUIRED
42460 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
42462 #undef TARGET_CAN_ELIMINATE
42463 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
42465 #undef TARGET_EXTRA_LIVE_ON_ENTRY
42466 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
42468 #undef TARGET_ASM_CODE_END
42469 #define TARGET_ASM_CODE_END ix86_code_end
42471 #undef TARGET_CONDITIONAL_REGISTER_USAGE
42472 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
42475 #undef TARGET_INIT_LIBFUNCS
42476 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
42479 #undef TARGET_SPILL_CLASS
42480 #define TARGET_SPILL_CLASS ix86_spill_class
42482 struct gcc_target targetm
= TARGET_INITIALIZER
;
42484 #include "gt-i386.h"